xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/aco_optimizer.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2018 Valve Corporation
3*61046927SAndroid Build Coastguard Worker  *
4*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
5*61046927SAndroid Build Coastguard Worker  */
6*61046927SAndroid Build Coastguard Worker 
7*61046927SAndroid Build Coastguard Worker #include "aco_builder.h"
8*61046927SAndroid Build Coastguard Worker #include "aco_ir.h"
9*61046927SAndroid Build Coastguard Worker 
10*61046927SAndroid Build Coastguard Worker #include "util/half_float.h"
11*61046927SAndroid Build Coastguard Worker #include "util/memstream.h"
12*61046927SAndroid Build Coastguard Worker 
13*61046927SAndroid Build Coastguard Worker #include <algorithm>
14*61046927SAndroid Build Coastguard Worker #include <array>
15*61046927SAndroid Build Coastguard Worker #include <vector>
16*61046927SAndroid Build Coastguard Worker 
17*61046927SAndroid Build Coastguard Worker namespace aco {
18*61046927SAndroid Build Coastguard Worker 
19*61046927SAndroid Build Coastguard Worker namespace {
20*61046927SAndroid Build Coastguard Worker /**
21*61046927SAndroid Build Coastguard Worker  * The optimizer works in 4 phases:
22*61046927SAndroid Build Coastguard Worker  * (1) The first pass collects information for each ssa-def,
23*61046927SAndroid Build Coastguard Worker  *     propagates reg->reg operands of the same type, inline constants
24*61046927SAndroid Build Coastguard Worker  *     and neg/abs input modifiers.
25*61046927SAndroid Build Coastguard Worker  * (2) The second pass combines instructions like mad, omod, clamp and
26*61046927SAndroid Build Coastguard Worker  *     propagates sgpr's on VALU instructions.
27*61046927SAndroid Build Coastguard Worker  *     This pass depends on information collected in the first pass.
28*61046927SAndroid Build Coastguard Worker  * (3) The third pass goes backwards, and selects instructions,
29*61046927SAndroid Build Coastguard Worker  *     i.e. decides if a mad instruction is profitable and eliminates dead code.
30*61046927SAndroid Build Coastguard Worker  * (4) The fourth pass cleans up the sequence: literals get applied and dead
31*61046927SAndroid Build Coastguard Worker  *     instructions are removed from the sequence.
32*61046927SAndroid Build Coastguard Worker  */
33*61046927SAndroid Build Coastguard Worker 
34*61046927SAndroid Build Coastguard Worker struct mad_info {
35*61046927SAndroid Build Coastguard Worker    aco_ptr<Instruction> add_instr;
36*61046927SAndroid Build Coastguard Worker    uint32_t mul_temp_id;
37*61046927SAndroid Build Coastguard Worker    uint16_t literal_mask;
38*61046927SAndroid Build Coastguard Worker    uint16_t fp16_mask;
39*61046927SAndroid Build Coastguard Worker 
mad_infoaco::__anon9e387afb0111::mad_info40*61046927SAndroid Build Coastguard Worker    mad_info(aco_ptr<Instruction> instr, uint32_t id)
41*61046927SAndroid Build Coastguard Worker        : add_instr(std::move(instr)), mul_temp_id(id), literal_mask(0), fp16_mask(0)
42*61046927SAndroid Build Coastguard Worker    {}
43*61046927SAndroid Build Coastguard Worker };
44*61046927SAndroid Build Coastguard Worker 
45*61046927SAndroid Build Coastguard Worker enum Label {
46*61046927SAndroid Build Coastguard Worker    label_vec = 1 << 0,
47*61046927SAndroid Build Coastguard Worker    label_constant_32bit = 1 << 1,
48*61046927SAndroid Build Coastguard Worker    /* label_{abs,neg,mul,omod2,omod4,omod5,clamp} are used for both 16 and
49*61046927SAndroid Build Coastguard Worker     * 32-bit operations but this shouldn't cause any issues because we don't
50*61046927SAndroid Build Coastguard Worker     * look through any conversions */
51*61046927SAndroid Build Coastguard Worker    label_abs = 1 << 2,
52*61046927SAndroid Build Coastguard Worker    label_neg = 1 << 3,
53*61046927SAndroid Build Coastguard Worker    label_mul = 1 << 4,
54*61046927SAndroid Build Coastguard Worker    label_temp = 1 << 5,
55*61046927SAndroid Build Coastguard Worker    label_literal = 1 << 6,
56*61046927SAndroid Build Coastguard Worker    label_mad = 1 << 7,
57*61046927SAndroid Build Coastguard Worker    label_omod2 = 1 << 8,
58*61046927SAndroid Build Coastguard Worker    label_omod4 = 1 << 9,
59*61046927SAndroid Build Coastguard Worker    label_omod5 = 1 << 10,
60*61046927SAndroid Build Coastguard Worker    label_clamp = 1 << 12,
61*61046927SAndroid Build Coastguard Worker    label_b2f = 1 << 16,
62*61046927SAndroid Build Coastguard Worker    label_add_sub = 1 << 17,
63*61046927SAndroid Build Coastguard Worker    label_bitwise = 1 << 18,
64*61046927SAndroid Build Coastguard Worker    label_minmax = 1 << 19,
65*61046927SAndroid Build Coastguard Worker    label_vopc = 1 << 20,
66*61046927SAndroid Build Coastguard Worker    label_uniform_bool = 1 << 21,
67*61046927SAndroid Build Coastguard Worker    label_constant_64bit = 1 << 22,
68*61046927SAndroid Build Coastguard Worker    label_uniform_bitwise = 1 << 23,
69*61046927SAndroid Build Coastguard Worker    label_scc_invert = 1 << 24,
70*61046927SAndroid Build Coastguard Worker    label_scc_needed = 1 << 26,
71*61046927SAndroid Build Coastguard Worker    label_b2i = 1 << 27,
72*61046927SAndroid Build Coastguard Worker    label_fcanonicalize = 1 << 28,
73*61046927SAndroid Build Coastguard Worker    label_constant_16bit = 1 << 29,
74*61046927SAndroid Build Coastguard Worker    label_usedef = 1 << 30,   /* generic label */
75*61046927SAndroid Build Coastguard Worker    label_vop3p = 1ull << 31, /* 1ull to prevent sign extension */
76*61046927SAndroid Build Coastguard Worker    label_canonicalized = 1ull << 32,
77*61046927SAndroid Build Coastguard Worker    label_extract = 1ull << 33,
78*61046927SAndroid Build Coastguard Worker    label_insert = 1ull << 34,
79*61046927SAndroid Build Coastguard Worker    label_dpp16 = 1ull << 35,
80*61046927SAndroid Build Coastguard Worker    label_dpp8 = 1ull << 36,
81*61046927SAndroid Build Coastguard Worker    label_f2f32 = 1ull << 37,
82*61046927SAndroid Build Coastguard Worker    label_f2f16 = 1ull << 38,
83*61046927SAndroid Build Coastguard Worker    label_split = 1ull << 39,
84*61046927SAndroid Build Coastguard Worker };
85*61046927SAndroid Build Coastguard Worker 
86*61046927SAndroid Build Coastguard Worker static constexpr uint64_t instr_usedef_labels =
87*61046927SAndroid Build Coastguard Worker    label_vec | label_mul | label_add_sub | label_vop3p | label_bitwise | label_uniform_bitwise |
88*61046927SAndroid Build Coastguard Worker    label_minmax | label_vopc | label_usedef | label_extract | label_dpp16 | label_dpp8 |
89*61046927SAndroid Build Coastguard Worker    label_f2f32;
90*61046927SAndroid Build Coastguard Worker static constexpr uint64_t instr_mod_labels =
91*61046927SAndroid Build Coastguard Worker    label_omod2 | label_omod4 | label_omod5 | label_clamp | label_insert | label_f2f16;
92*61046927SAndroid Build Coastguard Worker 
93*61046927SAndroid Build Coastguard Worker static constexpr uint64_t instr_labels = instr_usedef_labels | instr_mod_labels | label_split;
94*61046927SAndroid Build Coastguard Worker static constexpr uint64_t temp_labels = label_abs | label_neg | label_temp | label_b2f |
95*61046927SAndroid Build Coastguard Worker                                         label_uniform_bool | label_scc_invert | label_b2i |
96*61046927SAndroid Build Coastguard Worker                                         label_fcanonicalize;
97*61046927SAndroid Build Coastguard Worker static constexpr uint32_t val_labels =
98*61046927SAndroid Build Coastguard Worker    label_constant_32bit | label_constant_64bit | label_constant_16bit | label_literal | label_mad;
99*61046927SAndroid Build Coastguard Worker 
100*61046927SAndroid Build Coastguard Worker static_assert((instr_labels & temp_labels) == 0, "labels cannot intersect");
101*61046927SAndroid Build Coastguard Worker static_assert((instr_labels & val_labels) == 0, "labels cannot intersect");
102*61046927SAndroid Build Coastguard Worker static_assert((temp_labels & val_labels) == 0, "labels cannot intersect");
103*61046927SAndroid Build Coastguard Worker 
104*61046927SAndroid Build Coastguard Worker struct ssa_info {
105*61046927SAndroid Build Coastguard Worker    uint64_t label;
106*61046927SAndroid Build Coastguard Worker    union {
107*61046927SAndroid Build Coastguard Worker       uint32_t val;
108*61046927SAndroid Build Coastguard Worker       Temp temp;
109*61046927SAndroid Build Coastguard Worker       Instruction* instr;
110*61046927SAndroid Build Coastguard Worker    };
111*61046927SAndroid Build Coastguard Worker 
ssa_infoaco::__anon9e387afb0111::ssa_info112*61046927SAndroid Build Coastguard Worker    ssa_info() : label(0) {}
113*61046927SAndroid Build Coastguard Worker 
add_labelaco::__anon9e387afb0111::ssa_info114*61046927SAndroid Build Coastguard Worker    void add_label(Label new_label)
115*61046927SAndroid Build Coastguard Worker    {
116*61046927SAndroid Build Coastguard Worker       /* Since all the instr_usedef_labels use instr for the same thing
117*61046927SAndroid Build Coastguard Worker        * (indicating the defining instruction), there is usually no need to
118*61046927SAndroid Build Coastguard Worker        * clear any other instr labels. */
119*61046927SAndroid Build Coastguard Worker       if (new_label & instr_usedef_labels)
120*61046927SAndroid Build Coastguard Worker          label &= ~(instr_mod_labels | temp_labels | val_labels); /* instr, temp and val alias */
121*61046927SAndroid Build Coastguard Worker 
122*61046927SAndroid Build Coastguard Worker       if (new_label & instr_mod_labels) {
123*61046927SAndroid Build Coastguard Worker          label &= ~instr_labels;
124*61046927SAndroid Build Coastguard Worker          label &= ~(temp_labels | val_labels); /* instr, temp and val alias */
125*61046927SAndroid Build Coastguard Worker       }
126*61046927SAndroid Build Coastguard Worker 
127*61046927SAndroid Build Coastguard Worker       if (new_label & temp_labels) {
128*61046927SAndroid Build Coastguard Worker          label &= ~temp_labels;
129*61046927SAndroid Build Coastguard Worker          label &= ~(instr_labels | val_labels); /* instr, temp and val alias */
130*61046927SAndroid Build Coastguard Worker       }
131*61046927SAndroid Build Coastguard Worker 
132*61046927SAndroid Build Coastguard Worker       uint32_t const_labels =
133*61046927SAndroid Build Coastguard Worker          label_literal | label_constant_32bit | label_constant_64bit | label_constant_16bit;
134*61046927SAndroid Build Coastguard Worker       if (new_label & const_labels) {
135*61046927SAndroid Build Coastguard Worker          label &= ~val_labels | const_labels;
136*61046927SAndroid Build Coastguard Worker          label &= ~(instr_labels | temp_labels); /* instr, temp and val alias */
137*61046927SAndroid Build Coastguard Worker       } else if (new_label & val_labels) {
138*61046927SAndroid Build Coastguard Worker          label &= ~val_labels;
139*61046927SAndroid Build Coastguard Worker          label &= ~(instr_labels | temp_labels); /* instr, temp and val alias */
140*61046927SAndroid Build Coastguard Worker       }
141*61046927SAndroid Build Coastguard Worker 
142*61046927SAndroid Build Coastguard Worker       label |= new_label;
143*61046927SAndroid Build Coastguard Worker    }
144*61046927SAndroid Build Coastguard Worker 
set_vecaco::__anon9e387afb0111::ssa_info145*61046927SAndroid Build Coastguard Worker    void set_vec(Instruction* vec)
146*61046927SAndroid Build Coastguard Worker    {
147*61046927SAndroid Build Coastguard Worker       add_label(label_vec);
148*61046927SAndroid Build Coastguard Worker       instr = vec;
149*61046927SAndroid Build Coastguard Worker    }
150*61046927SAndroid Build Coastguard Worker 
is_vecaco::__anon9e387afb0111::ssa_info151*61046927SAndroid Build Coastguard Worker    bool is_vec() { return label & label_vec; }
152*61046927SAndroid Build Coastguard Worker 
set_constantaco::__anon9e387afb0111::ssa_info153*61046927SAndroid Build Coastguard Worker    void set_constant(amd_gfx_level gfx_level, uint64_t constant)
154*61046927SAndroid Build Coastguard Worker    {
155*61046927SAndroid Build Coastguard Worker       Operand op16 = Operand::c16(constant);
156*61046927SAndroid Build Coastguard Worker       Operand op32 = Operand::get_const(gfx_level, constant, 4);
157*61046927SAndroid Build Coastguard Worker       add_label(label_literal);
158*61046927SAndroid Build Coastguard Worker       val = constant;
159*61046927SAndroid Build Coastguard Worker 
160*61046927SAndroid Build Coastguard Worker       /* check that no upper bits are lost in case of packed 16bit constants */
161*61046927SAndroid Build Coastguard Worker       if (gfx_level >= GFX8 && !op16.isLiteral() &&
162*61046927SAndroid Build Coastguard Worker           op16.constantValue16(true) == ((constant >> 16) & 0xffff))
163*61046927SAndroid Build Coastguard Worker          add_label(label_constant_16bit);
164*61046927SAndroid Build Coastguard Worker 
165*61046927SAndroid Build Coastguard Worker       if (!op32.isLiteral())
166*61046927SAndroid Build Coastguard Worker          add_label(label_constant_32bit);
167*61046927SAndroid Build Coastguard Worker 
168*61046927SAndroid Build Coastguard Worker       if (Operand::is_constant_representable(constant, 8))
169*61046927SAndroid Build Coastguard Worker          add_label(label_constant_64bit);
170*61046927SAndroid Build Coastguard Worker 
171*61046927SAndroid Build Coastguard Worker       if (label & label_constant_64bit) {
172*61046927SAndroid Build Coastguard Worker          val = Operand::c64(constant).constantValue();
173*61046927SAndroid Build Coastguard Worker          if (val != constant)
174*61046927SAndroid Build Coastguard Worker             label &= ~(label_literal | label_constant_16bit | label_constant_32bit);
175*61046927SAndroid Build Coastguard Worker       }
176*61046927SAndroid Build Coastguard Worker    }
177*61046927SAndroid Build Coastguard Worker 
is_constantaco::__anon9e387afb0111::ssa_info178*61046927SAndroid Build Coastguard Worker    bool is_constant(unsigned bits)
179*61046927SAndroid Build Coastguard Worker    {
180*61046927SAndroid Build Coastguard Worker       switch (bits) {
181*61046927SAndroid Build Coastguard Worker       case 8: return label & label_literal;
182*61046927SAndroid Build Coastguard Worker       case 16: return label & label_constant_16bit;
183*61046927SAndroid Build Coastguard Worker       case 32: return label & label_constant_32bit;
184*61046927SAndroid Build Coastguard Worker       case 64: return label & label_constant_64bit;
185*61046927SAndroid Build Coastguard Worker       }
186*61046927SAndroid Build Coastguard Worker       return false;
187*61046927SAndroid Build Coastguard Worker    }
188*61046927SAndroid Build Coastguard Worker 
is_literalaco::__anon9e387afb0111::ssa_info189*61046927SAndroid Build Coastguard Worker    bool is_literal(unsigned bits)
190*61046927SAndroid Build Coastguard Worker    {
191*61046927SAndroid Build Coastguard Worker       bool is_lit = label & label_literal;
192*61046927SAndroid Build Coastguard Worker       switch (bits) {
193*61046927SAndroid Build Coastguard Worker       case 8: return false;
194*61046927SAndroid Build Coastguard Worker       case 16: return is_lit && ~(label & label_constant_16bit);
195*61046927SAndroid Build Coastguard Worker       case 32: return is_lit && ~(label & label_constant_32bit);
196*61046927SAndroid Build Coastguard Worker       case 64: return false;
197*61046927SAndroid Build Coastguard Worker       }
198*61046927SAndroid Build Coastguard Worker       return false;
199*61046927SAndroid Build Coastguard Worker    }
200*61046927SAndroid Build Coastguard Worker 
is_constant_or_literalaco::__anon9e387afb0111::ssa_info201*61046927SAndroid Build Coastguard Worker    bool is_constant_or_literal(unsigned bits)
202*61046927SAndroid Build Coastguard Worker    {
203*61046927SAndroid Build Coastguard Worker       if (bits == 64)
204*61046927SAndroid Build Coastguard Worker          return label & label_constant_64bit;
205*61046927SAndroid Build Coastguard Worker       else
206*61046927SAndroid Build Coastguard Worker          return label & label_literal;
207*61046927SAndroid Build Coastguard Worker    }
208*61046927SAndroid Build Coastguard Worker 
set_absaco::__anon9e387afb0111::ssa_info209*61046927SAndroid Build Coastguard Worker    void set_abs(Temp abs_temp)
210*61046927SAndroid Build Coastguard Worker    {
211*61046927SAndroid Build Coastguard Worker       add_label(label_abs);
212*61046927SAndroid Build Coastguard Worker       temp = abs_temp;
213*61046927SAndroid Build Coastguard Worker    }
214*61046927SAndroid Build Coastguard Worker 
is_absaco::__anon9e387afb0111::ssa_info215*61046927SAndroid Build Coastguard Worker    bool is_abs() { return label & label_abs; }
216*61046927SAndroid Build Coastguard Worker 
set_negaco::__anon9e387afb0111::ssa_info217*61046927SAndroid Build Coastguard Worker    void set_neg(Temp neg_temp)
218*61046927SAndroid Build Coastguard Worker    {
219*61046927SAndroid Build Coastguard Worker       add_label(label_neg);
220*61046927SAndroid Build Coastguard Worker       temp = neg_temp;
221*61046927SAndroid Build Coastguard Worker    }
222*61046927SAndroid Build Coastguard Worker 
is_negaco::__anon9e387afb0111::ssa_info223*61046927SAndroid Build Coastguard Worker    bool is_neg() { return label & label_neg; }
224*61046927SAndroid Build Coastguard Worker 
set_neg_absaco::__anon9e387afb0111::ssa_info225*61046927SAndroid Build Coastguard Worker    void set_neg_abs(Temp neg_abs_temp)
226*61046927SAndroid Build Coastguard Worker    {
227*61046927SAndroid Build Coastguard Worker       add_label((Label)((uint32_t)label_abs | (uint32_t)label_neg));
228*61046927SAndroid Build Coastguard Worker       temp = neg_abs_temp;
229*61046927SAndroid Build Coastguard Worker    }
230*61046927SAndroid Build Coastguard Worker 
set_mulaco::__anon9e387afb0111::ssa_info231*61046927SAndroid Build Coastguard Worker    void set_mul(Instruction* mul)
232*61046927SAndroid Build Coastguard Worker    {
233*61046927SAndroid Build Coastguard Worker       add_label(label_mul);
234*61046927SAndroid Build Coastguard Worker       instr = mul;
235*61046927SAndroid Build Coastguard Worker    }
236*61046927SAndroid Build Coastguard Worker 
is_mulaco::__anon9e387afb0111::ssa_info237*61046927SAndroid Build Coastguard Worker    bool is_mul() { return label & label_mul; }
238*61046927SAndroid Build Coastguard Worker 
set_tempaco::__anon9e387afb0111::ssa_info239*61046927SAndroid Build Coastguard Worker    void set_temp(Temp tmp)
240*61046927SAndroid Build Coastguard Worker    {
241*61046927SAndroid Build Coastguard Worker       add_label(label_temp);
242*61046927SAndroid Build Coastguard Worker       temp = tmp;
243*61046927SAndroid Build Coastguard Worker    }
244*61046927SAndroid Build Coastguard Worker 
is_tempaco::__anon9e387afb0111::ssa_info245*61046927SAndroid Build Coastguard Worker    bool is_temp() { return label & label_temp; }
246*61046927SAndroid Build Coastguard Worker 
set_madaco::__anon9e387afb0111::ssa_info247*61046927SAndroid Build Coastguard Worker    void set_mad(uint32_t mad_info_idx)
248*61046927SAndroid Build Coastguard Worker    {
249*61046927SAndroid Build Coastguard Worker       add_label(label_mad);
250*61046927SAndroid Build Coastguard Worker       val = mad_info_idx;
251*61046927SAndroid Build Coastguard Worker    }
252*61046927SAndroid Build Coastguard Worker 
is_madaco::__anon9e387afb0111::ssa_info253*61046927SAndroid Build Coastguard Worker    bool is_mad() { return label & label_mad; }
254*61046927SAndroid Build Coastguard Worker 
set_omod2aco::__anon9e387afb0111::ssa_info255*61046927SAndroid Build Coastguard Worker    void set_omod2(Instruction* mul)
256*61046927SAndroid Build Coastguard Worker    {
257*61046927SAndroid Build Coastguard Worker       if (label & temp_labels)
258*61046927SAndroid Build Coastguard Worker          return;
259*61046927SAndroid Build Coastguard Worker       add_label(label_omod2);
260*61046927SAndroid Build Coastguard Worker       instr = mul;
261*61046927SAndroid Build Coastguard Worker    }
262*61046927SAndroid Build Coastguard Worker 
is_omod2aco::__anon9e387afb0111::ssa_info263*61046927SAndroid Build Coastguard Worker    bool is_omod2() { return label & label_omod2; }
264*61046927SAndroid Build Coastguard Worker 
set_omod4aco::__anon9e387afb0111::ssa_info265*61046927SAndroid Build Coastguard Worker    void set_omod4(Instruction* mul)
266*61046927SAndroid Build Coastguard Worker    {
267*61046927SAndroid Build Coastguard Worker       if (label & temp_labels)
268*61046927SAndroid Build Coastguard Worker          return;
269*61046927SAndroid Build Coastguard Worker       add_label(label_omod4);
270*61046927SAndroid Build Coastguard Worker       instr = mul;
271*61046927SAndroid Build Coastguard Worker    }
272*61046927SAndroid Build Coastguard Worker 
is_omod4aco::__anon9e387afb0111::ssa_info273*61046927SAndroid Build Coastguard Worker    bool is_omod4() { return label & label_omod4; }
274*61046927SAndroid Build Coastguard Worker 
set_omod5aco::__anon9e387afb0111::ssa_info275*61046927SAndroid Build Coastguard Worker    void set_omod5(Instruction* mul)
276*61046927SAndroid Build Coastguard Worker    {
277*61046927SAndroid Build Coastguard Worker       if (label & temp_labels)
278*61046927SAndroid Build Coastguard Worker          return;
279*61046927SAndroid Build Coastguard Worker       add_label(label_omod5);
280*61046927SAndroid Build Coastguard Worker       instr = mul;
281*61046927SAndroid Build Coastguard Worker    }
282*61046927SAndroid Build Coastguard Worker 
is_omod5aco::__anon9e387afb0111::ssa_info283*61046927SAndroid Build Coastguard Worker    bool is_omod5() { return label & label_omod5; }
284*61046927SAndroid Build Coastguard Worker 
set_clampaco::__anon9e387afb0111::ssa_info285*61046927SAndroid Build Coastguard Worker    void set_clamp(Instruction* med3)
286*61046927SAndroid Build Coastguard Worker    {
287*61046927SAndroid Build Coastguard Worker       if (label & temp_labels)
288*61046927SAndroid Build Coastguard Worker          return;
289*61046927SAndroid Build Coastguard Worker       add_label(label_clamp);
290*61046927SAndroid Build Coastguard Worker       instr = med3;
291*61046927SAndroid Build Coastguard Worker    }
292*61046927SAndroid Build Coastguard Worker 
is_clampaco::__anon9e387afb0111::ssa_info293*61046927SAndroid Build Coastguard Worker    bool is_clamp() { return label & label_clamp; }
294*61046927SAndroid Build Coastguard Worker 
set_f2f16aco::__anon9e387afb0111::ssa_info295*61046927SAndroid Build Coastguard Worker    void set_f2f16(Instruction* conv)
296*61046927SAndroid Build Coastguard Worker    {
297*61046927SAndroid Build Coastguard Worker       if (label & temp_labels)
298*61046927SAndroid Build Coastguard Worker          return;
299*61046927SAndroid Build Coastguard Worker       add_label(label_f2f16);
300*61046927SAndroid Build Coastguard Worker       instr = conv;
301*61046927SAndroid Build Coastguard Worker    }
302*61046927SAndroid Build Coastguard Worker 
is_f2f16aco::__anon9e387afb0111::ssa_info303*61046927SAndroid Build Coastguard Worker    bool is_f2f16() { return label & label_f2f16; }
304*61046927SAndroid Build Coastguard Worker 
set_b2faco::__anon9e387afb0111::ssa_info305*61046927SAndroid Build Coastguard Worker    void set_b2f(Temp b2f_val)
306*61046927SAndroid Build Coastguard Worker    {
307*61046927SAndroid Build Coastguard Worker       add_label(label_b2f);
308*61046927SAndroid Build Coastguard Worker       temp = b2f_val;
309*61046927SAndroid Build Coastguard Worker    }
310*61046927SAndroid Build Coastguard Worker 
is_b2faco::__anon9e387afb0111::ssa_info311*61046927SAndroid Build Coastguard Worker    bool is_b2f() { return label & label_b2f; }
312*61046927SAndroid Build Coastguard Worker 
set_add_subaco::__anon9e387afb0111::ssa_info313*61046927SAndroid Build Coastguard Worker    void set_add_sub(Instruction* add_sub_instr)
314*61046927SAndroid Build Coastguard Worker    {
315*61046927SAndroid Build Coastguard Worker       add_label(label_add_sub);
316*61046927SAndroid Build Coastguard Worker       instr = add_sub_instr;
317*61046927SAndroid Build Coastguard Worker    }
318*61046927SAndroid Build Coastguard Worker 
is_add_subaco::__anon9e387afb0111::ssa_info319*61046927SAndroid Build Coastguard Worker    bool is_add_sub() { return label & label_add_sub; }
320*61046927SAndroid Build Coastguard Worker 
set_bitwiseaco::__anon9e387afb0111::ssa_info321*61046927SAndroid Build Coastguard Worker    void set_bitwise(Instruction* bitwise_instr)
322*61046927SAndroid Build Coastguard Worker    {
323*61046927SAndroid Build Coastguard Worker       add_label(label_bitwise);
324*61046927SAndroid Build Coastguard Worker       instr = bitwise_instr;
325*61046927SAndroid Build Coastguard Worker    }
326*61046927SAndroid Build Coastguard Worker 
is_bitwiseaco::__anon9e387afb0111::ssa_info327*61046927SAndroid Build Coastguard Worker    bool is_bitwise() { return label & label_bitwise; }
328*61046927SAndroid Build Coastguard Worker 
set_uniform_bitwiseaco::__anon9e387afb0111::ssa_info329*61046927SAndroid Build Coastguard Worker    void set_uniform_bitwise() { add_label(label_uniform_bitwise); }
330*61046927SAndroid Build Coastguard Worker 
is_uniform_bitwiseaco::__anon9e387afb0111::ssa_info331*61046927SAndroid Build Coastguard Worker    bool is_uniform_bitwise() { return label & label_uniform_bitwise; }
332*61046927SAndroid Build Coastguard Worker 
set_minmaxaco::__anon9e387afb0111::ssa_info333*61046927SAndroid Build Coastguard Worker    void set_minmax(Instruction* minmax_instr)
334*61046927SAndroid Build Coastguard Worker    {
335*61046927SAndroid Build Coastguard Worker       add_label(label_minmax);
336*61046927SAndroid Build Coastguard Worker       instr = minmax_instr;
337*61046927SAndroid Build Coastguard Worker    }
338*61046927SAndroid Build Coastguard Worker 
is_minmaxaco::__anon9e387afb0111::ssa_info339*61046927SAndroid Build Coastguard Worker    bool is_minmax() { return label & label_minmax; }
340*61046927SAndroid Build Coastguard Worker 
set_vopcaco::__anon9e387afb0111::ssa_info341*61046927SAndroid Build Coastguard Worker    void set_vopc(Instruction* vopc_instr)
342*61046927SAndroid Build Coastguard Worker    {
343*61046927SAndroid Build Coastguard Worker       add_label(label_vopc);
344*61046927SAndroid Build Coastguard Worker       instr = vopc_instr;
345*61046927SAndroid Build Coastguard Worker    }
346*61046927SAndroid Build Coastguard Worker 
is_vopcaco::__anon9e387afb0111::ssa_info347*61046927SAndroid Build Coastguard Worker    bool is_vopc() { return label & label_vopc; }
348*61046927SAndroid Build Coastguard Worker 
set_scc_neededaco::__anon9e387afb0111::ssa_info349*61046927SAndroid Build Coastguard Worker    void set_scc_needed() { add_label(label_scc_needed); }
350*61046927SAndroid Build Coastguard Worker 
is_scc_neededaco::__anon9e387afb0111::ssa_info351*61046927SAndroid Build Coastguard Worker    bool is_scc_needed() { return label & label_scc_needed; }
352*61046927SAndroid Build Coastguard Worker 
set_scc_invertaco::__anon9e387afb0111::ssa_info353*61046927SAndroid Build Coastguard Worker    void set_scc_invert(Temp scc_inv)
354*61046927SAndroid Build Coastguard Worker    {
355*61046927SAndroid Build Coastguard Worker       add_label(label_scc_invert);
356*61046927SAndroid Build Coastguard Worker       temp = scc_inv;
357*61046927SAndroid Build Coastguard Worker    }
358*61046927SAndroid Build Coastguard Worker 
is_scc_invertaco::__anon9e387afb0111::ssa_info359*61046927SAndroid Build Coastguard Worker    bool is_scc_invert() { return label & label_scc_invert; }
360*61046927SAndroid Build Coastguard Worker 
set_uniform_boolaco::__anon9e387afb0111::ssa_info361*61046927SAndroid Build Coastguard Worker    void set_uniform_bool(Temp uniform_bool)
362*61046927SAndroid Build Coastguard Worker    {
363*61046927SAndroid Build Coastguard Worker       add_label(label_uniform_bool);
364*61046927SAndroid Build Coastguard Worker       temp = uniform_bool;
365*61046927SAndroid Build Coastguard Worker    }
366*61046927SAndroid Build Coastguard Worker 
is_uniform_boolaco::__anon9e387afb0111::ssa_info367*61046927SAndroid Build Coastguard Worker    bool is_uniform_bool() { return label & label_uniform_bool; }
368*61046927SAndroid Build Coastguard Worker 
set_b2iaco::__anon9e387afb0111::ssa_info369*61046927SAndroid Build Coastguard Worker    void set_b2i(Temp b2i_val)
370*61046927SAndroid Build Coastguard Worker    {
371*61046927SAndroid Build Coastguard Worker       add_label(label_b2i);
372*61046927SAndroid Build Coastguard Worker       temp = b2i_val;
373*61046927SAndroid Build Coastguard Worker    }
374*61046927SAndroid Build Coastguard Worker 
is_b2iaco::__anon9e387afb0111::ssa_info375*61046927SAndroid Build Coastguard Worker    bool is_b2i() { return label & label_b2i; }
376*61046927SAndroid Build Coastguard Worker 
set_usedefaco::__anon9e387afb0111::ssa_info377*61046927SAndroid Build Coastguard Worker    void set_usedef(Instruction* label_instr)
378*61046927SAndroid Build Coastguard Worker    {
379*61046927SAndroid Build Coastguard Worker       add_label(label_usedef);
380*61046927SAndroid Build Coastguard Worker       instr = label_instr;
381*61046927SAndroid Build Coastguard Worker    }
382*61046927SAndroid Build Coastguard Worker 
is_usedefaco::__anon9e387afb0111::ssa_info383*61046927SAndroid Build Coastguard Worker    bool is_usedef() { return label & label_usedef; }
384*61046927SAndroid Build Coastguard Worker 
set_vop3paco::__anon9e387afb0111::ssa_info385*61046927SAndroid Build Coastguard Worker    void set_vop3p(Instruction* vop3p_instr)
386*61046927SAndroid Build Coastguard Worker    {
387*61046927SAndroid Build Coastguard Worker       add_label(label_vop3p);
388*61046927SAndroid Build Coastguard Worker       instr = vop3p_instr;
389*61046927SAndroid Build Coastguard Worker    }
390*61046927SAndroid Build Coastguard Worker 
is_vop3paco::__anon9e387afb0111::ssa_info391*61046927SAndroid Build Coastguard Worker    bool is_vop3p() { return label & label_vop3p; }
392*61046927SAndroid Build Coastguard Worker 
set_fcanonicalizeaco::__anon9e387afb0111::ssa_info393*61046927SAndroid Build Coastguard Worker    void set_fcanonicalize(Temp tmp)
394*61046927SAndroid Build Coastguard Worker    {
395*61046927SAndroid Build Coastguard Worker       add_label(label_fcanonicalize);
396*61046927SAndroid Build Coastguard Worker       temp = tmp;
397*61046927SAndroid Build Coastguard Worker    }
398*61046927SAndroid Build Coastguard Worker 
is_fcanonicalizeaco::__anon9e387afb0111::ssa_info399*61046927SAndroid Build Coastguard Worker    bool is_fcanonicalize() { return label & label_fcanonicalize; }
400*61046927SAndroid Build Coastguard Worker 
set_canonicalizedaco::__anon9e387afb0111::ssa_info401*61046927SAndroid Build Coastguard Worker    void set_canonicalized() { add_label(label_canonicalized); }
402*61046927SAndroid Build Coastguard Worker 
is_canonicalizedaco::__anon9e387afb0111::ssa_info403*61046927SAndroid Build Coastguard Worker    bool is_canonicalized() { return label & label_canonicalized; }
404*61046927SAndroid Build Coastguard Worker 
set_f2f32aco::__anon9e387afb0111::ssa_info405*61046927SAndroid Build Coastguard Worker    void set_f2f32(Instruction* cvt)
406*61046927SAndroid Build Coastguard Worker    {
407*61046927SAndroid Build Coastguard Worker       add_label(label_f2f32);
408*61046927SAndroid Build Coastguard Worker       instr = cvt;
409*61046927SAndroid Build Coastguard Worker    }
410*61046927SAndroid Build Coastguard Worker 
is_f2f32aco::__anon9e387afb0111::ssa_info411*61046927SAndroid Build Coastguard Worker    bool is_f2f32() { return label & label_f2f32; }
412*61046927SAndroid Build Coastguard Worker 
set_extractaco::__anon9e387afb0111::ssa_info413*61046927SAndroid Build Coastguard Worker    void set_extract(Instruction* extract)
414*61046927SAndroid Build Coastguard Worker    {
415*61046927SAndroid Build Coastguard Worker       add_label(label_extract);
416*61046927SAndroid Build Coastguard Worker       instr = extract;
417*61046927SAndroid Build Coastguard Worker    }
418*61046927SAndroid Build Coastguard Worker 
is_extractaco::__anon9e387afb0111::ssa_info419*61046927SAndroid Build Coastguard Worker    bool is_extract() { return label & label_extract; }
420*61046927SAndroid Build Coastguard Worker 
set_insertaco::__anon9e387afb0111::ssa_info421*61046927SAndroid Build Coastguard Worker    void set_insert(Instruction* insert)
422*61046927SAndroid Build Coastguard Worker    {
423*61046927SAndroid Build Coastguard Worker       if (label & temp_labels)
424*61046927SAndroid Build Coastguard Worker          return;
425*61046927SAndroid Build Coastguard Worker       add_label(label_insert);
426*61046927SAndroid Build Coastguard Worker       instr = insert;
427*61046927SAndroid Build Coastguard Worker    }
428*61046927SAndroid Build Coastguard Worker 
is_insertaco::__anon9e387afb0111::ssa_info429*61046927SAndroid Build Coastguard Worker    bool is_insert() { return label & label_insert; }
430*61046927SAndroid Build Coastguard Worker 
set_dpp16aco::__anon9e387afb0111::ssa_info431*61046927SAndroid Build Coastguard Worker    void set_dpp16(Instruction* mov)
432*61046927SAndroid Build Coastguard Worker    {
433*61046927SAndroid Build Coastguard Worker       add_label(label_dpp16);
434*61046927SAndroid Build Coastguard Worker       instr = mov;
435*61046927SAndroid Build Coastguard Worker    }
436*61046927SAndroid Build Coastguard Worker 
set_dpp8aco::__anon9e387afb0111::ssa_info437*61046927SAndroid Build Coastguard Worker    void set_dpp8(Instruction* mov)
438*61046927SAndroid Build Coastguard Worker    {
439*61046927SAndroid Build Coastguard Worker       add_label(label_dpp8);
440*61046927SAndroid Build Coastguard Worker       instr = mov;
441*61046927SAndroid Build Coastguard Worker    }
442*61046927SAndroid Build Coastguard Worker 
is_dppaco::__anon9e387afb0111::ssa_info443*61046927SAndroid Build Coastguard Worker    bool is_dpp() { return label & (label_dpp16 | label_dpp8); }
is_dpp16aco::__anon9e387afb0111::ssa_info444*61046927SAndroid Build Coastguard Worker    bool is_dpp16() { return label & label_dpp16; }
is_dpp8aco::__anon9e387afb0111::ssa_info445*61046927SAndroid Build Coastguard Worker    bool is_dpp8() { return label & label_dpp8; }
446*61046927SAndroid Build Coastguard Worker 
set_splitaco::__anon9e387afb0111::ssa_info447*61046927SAndroid Build Coastguard Worker    void set_split(Instruction* split)
448*61046927SAndroid Build Coastguard Worker    {
449*61046927SAndroid Build Coastguard Worker       add_label(label_split);
450*61046927SAndroid Build Coastguard Worker       instr = split;
451*61046927SAndroid Build Coastguard Worker    }
452*61046927SAndroid Build Coastguard Worker 
is_splitaco::__anon9e387afb0111::ssa_info453*61046927SAndroid Build Coastguard Worker    bool is_split() { return label & label_split; }
454*61046927SAndroid Build Coastguard Worker };
455*61046927SAndroid Build Coastguard Worker 
456*61046927SAndroid Build Coastguard Worker struct opt_ctx {
457*61046927SAndroid Build Coastguard Worker    Program* program;
458*61046927SAndroid Build Coastguard Worker    float_mode fp_mode;
459*61046927SAndroid Build Coastguard Worker    std::vector<aco_ptr<Instruction>> instructions;
460*61046927SAndroid Build Coastguard Worker    std::vector<ssa_info> info;
461*61046927SAndroid Build Coastguard Worker    std::pair<uint32_t, Temp> last_literal;
462*61046927SAndroid Build Coastguard Worker    std::vector<mad_info> mad_infos;
463*61046927SAndroid Build Coastguard Worker    std::vector<uint16_t> uses;
464*61046927SAndroid Build Coastguard Worker };
465*61046927SAndroid Build Coastguard Worker 
466*61046927SAndroid Build Coastguard Worker bool
can_use_VOP3(opt_ctx & ctx,const aco_ptr<Instruction> & instr)467*61046927SAndroid Build Coastguard Worker can_use_VOP3(opt_ctx& ctx, const aco_ptr<Instruction>& instr)
468*61046927SAndroid Build Coastguard Worker {
469*61046927SAndroid Build Coastguard Worker    if (instr->isVOP3())
470*61046927SAndroid Build Coastguard Worker       return true;
471*61046927SAndroid Build Coastguard Worker 
472*61046927SAndroid Build Coastguard Worker    if (instr->isVOP3P() || instr->isVINTERP_INREG())
473*61046927SAndroid Build Coastguard Worker       return false;
474*61046927SAndroid Build Coastguard Worker 
475*61046927SAndroid Build Coastguard Worker    if (instr->operands.size() && instr->operands[0].isLiteral() && ctx.program->gfx_level < GFX10)
476*61046927SAndroid Build Coastguard Worker       return false;
477*61046927SAndroid Build Coastguard Worker 
478*61046927SAndroid Build Coastguard Worker    if (instr->isSDWA())
479*61046927SAndroid Build Coastguard Worker       return false;
480*61046927SAndroid Build Coastguard Worker 
481*61046927SAndroid Build Coastguard Worker    if (instr->isDPP() && ctx.program->gfx_level < GFX11)
482*61046927SAndroid Build Coastguard Worker       return false;
483*61046927SAndroid Build Coastguard Worker 
484*61046927SAndroid Build Coastguard Worker    return instr->opcode != aco_opcode::v_madmk_f32 && instr->opcode != aco_opcode::v_madak_f32 &&
485*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_madmk_f16 && instr->opcode != aco_opcode::v_madak_f16 &&
486*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_fmamk_f32 && instr->opcode != aco_opcode::v_fmaak_f32 &&
487*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_fmamk_f16 && instr->opcode != aco_opcode::v_fmaak_f16 &&
488*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_permlane64_b32 &&
489*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_readlane_b32 &&
490*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_writelane_b32 &&
491*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_readfirstlane_b32;
492*61046927SAndroid Build Coastguard Worker }
493*61046927SAndroid Build Coastguard Worker 
494*61046927SAndroid Build Coastguard Worker bool
pseudo_propagate_temp(opt_ctx & ctx,aco_ptr<Instruction> & instr,Temp temp,unsigned index)495*61046927SAndroid Build Coastguard Worker pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsigned index)
496*61046927SAndroid Build Coastguard Worker {
497*61046927SAndroid Build Coastguard Worker    if (instr->definitions.empty())
498*61046927SAndroid Build Coastguard Worker       return false;
499*61046927SAndroid Build Coastguard Worker 
500*61046927SAndroid Build Coastguard Worker    const bool vgpr =
501*61046927SAndroid Build Coastguard Worker       instr->opcode == aco_opcode::p_as_uniform ||
502*61046927SAndroid Build Coastguard Worker       std::all_of(instr->definitions.begin(), instr->definitions.end(),
503*61046927SAndroid Build Coastguard Worker                   [](const Definition& def) { return def.regClass().type() == RegType::vgpr; });
504*61046927SAndroid Build Coastguard Worker 
505*61046927SAndroid Build Coastguard Worker    /* don't propagate VGPRs into SGPR instructions */
506*61046927SAndroid Build Coastguard Worker    if (temp.type() == RegType::vgpr && !vgpr)
507*61046927SAndroid Build Coastguard Worker       return false;
508*61046927SAndroid Build Coastguard Worker 
509*61046927SAndroid Build Coastguard Worker    bool can_accept_sgpr =
510*61046927SAndroid Build Coastguard Worker       ctx.program->gfx_level >= GFX9 ||
511*61046927SAndroid Build Coastguard Worker       std::none_of(instr->definitions.begin(), instr->definitions.end(),
512*61046927SAndroid Build Coastguard Worker                    [](const Definition& def) { return def.regClass().is_subdword(); });
513*61046927SAndroid Build Coastguard Worker 
514*61046927SAndroid Build Coastguard Worker    switch (instr->opcode) {
515*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_phi:
516*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_linear_phi:
517*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_parallelcopy:
518*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_create_vector:
519*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_start_linear_vgpr:
520*61046927SAndroid Build Coastguard Worker       if (temp.bytes() != instr->operands[index].bytes())
521*61046927SAndroid Build Coastguard Worker          return false;
522*61046927SAndroid Build Coastguard Worker       break;
523*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_extract_vector:
524*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_extract:
525*61046927SAndroid Build Coastguard Worker       if (temp.type() == RegType::sgpr && !can_accept_sgpr)
526*61046927SAndroid Build Coastguard Worker          return false;
527*61046927SAndroid Build Coastguard Worker       break;
528*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_split_vector: {
529*61046927SAndroid Build Coastguard Worker       if (temp.type() == RegType::sgpr && !can_accept_sgpr)
530*61046927SAndroid Build Coastguard Worker          return false;
531*61046927SAndroid Build Coastguard Worker       /* don't increase the vector size */
532*61046927SAndroid Build Coastguard Worker       if (temp.bytes() > instr->operands[index].bytes())
533*61046927SAndroid Build Coastguard Worker          return false;
534*61046927SAndroid Build Coastguard Worker       /* We can decrease the vector size as smaller temporaries are only
535*61046927SAndroid Build Coastguard Worker        * propagated by p_as_uniform instructions.
536*61046927SAndroid Build Coastguard Worker        * If this propagation leads to invalid IR or hits the assertion below,
537*61046927SAndroid Build Coastguard Worker        * it means that some undefined bytes within a dword are begin accessed
538*61046927SAndroid Build Coastguard Worker        * and a bug in instruction_selection is likely. */
539*61046927SAndroid Build Coastguard Worker       int decrease = instr->operands[index].bytes() - temp.bytes();
540*61046927SAndroid Build Coastguard Worker       while (decrease > 0) {
541*61046927SAndroid Build Coastguard Worker          decrease -= instr->definitions.back().bytes();
542*61046927SAndroid Build Coastguard Worker          instr->definitions.pop_back();
543*61046927SAndroid Build Coastguard Worker       }
544*61046927SAndroid Build Coastguard Worker       assert(decrease == 0);
545*61046927SAndroid Build Coastguard Worker       break;
546*61046927SAndroid Build Coastguard Worker    }
547*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_as_uniform:
548*61046927SAndroid Build Coastguard Worker       if (temp.regClass() == instr->definitions[0].regClass())
549*61046927SAndroid Build Coastguard Worker          instr->opcode = aco_opcode::p_parallelcopy;
550*61046927SAndroid Build Coastguard Worker       break;
551*61046927SAndroid Build Coastguard Worker    default: return false;
552*61046927SAndroid Build Coastguard Worker    }
553*61046927SAndroid Build Coastguard Worker 
554*61046927SAndroid Build Coastguard Worker    instr->operands[index].setTemp(temp);
555*61046927SAndroid Build Coastguard Worker    return true;
556*61046927SAndroid Build Coastguard Worker }
557*61046927SAndroid Build Coastguard Worker 
558*61046927SAndroid Build Coastguard Worker /* This expects the DPP modifier to be removed. */
559*61046927SAndroid Build Coastguard Worker bool
can_apply_sgprs(opt_ctx & ctx,aco_ptr<Instruction> & instr)560*61046927SAndroid Build Coastguard Worker can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
561*61046927SAndroid Build Coastguard Worker {
562*61046927SAndroid Build Coastguard Worker    assert(instr->isVALU());
563*61046927SAndroid Build Coastguard Worker    if (instr->isSDWA() && ctx.program->gfx_level < GFX9)
564*61046927SAndroid Build Coastguard Worker       return false;
565*61046927SAndroid Build Coastguard Worker    return instr->opcode != aco_opcode::v_readfirstlane_b32 &&
566*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_readlane_b32 &&
567*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_readlane_b32_e64 &&
568*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_writelane_b32 &&
569*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_writelane_b32_e64 &&
570*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_permlane16_b32 &&
571*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_permlanex16_b32 &&
572*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_permlane64_b32 &&
573*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p1_f32 &&
574*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p2_f32 &&
575*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_mov_f32 &&
576*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p1ll_f16 &&
577*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p1lv_f16 &&
578*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p2_legacy_f16 &&
579*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p2_f16 &&
580*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p2_hi_f16 &&
581*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p10_f32_inreg &&
582*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p2_f32_inreg &&
583*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p10_f16_f32_inreg &&
584*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p2_f16_f32_inreg &&
585*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p10_rtz_f16_f32_inreg &&
586*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_interp_p2_rtz_f16_f32_inreg &&
587*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_wmma_f32_16x16x16_f16 &&
588*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_wmma_f32_16x16x16_bf16 &&
589*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_wmma_f16_16x16x16_f16 &&
590*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_wmma_bf16_16x16x16_bf16 &&
591*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_wmma_i32_16x16x16_iu8 &&
592*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_wmma_i32_16x16x16_iu4;
593*61046927SAndroid Build Coastguard Worker }
594*61046927SAndroid Build Coastguard Worker 
595*61046927SAndroid Build Coastguard Worker /* only covers special cases */
596*61046927SAndroid Build Coastguard Worker bool
alu_can_accept_constant(const aco_ptr<Instruction> & instr,unsigned operand)597*61046927SAndroid Build Coastguard Worker alu_can_accept_constant(const aco_ptr<Instruction>& instr, unsigned operand)
598*61046927SAndroid Build Coastguard Worker {
599*61046927SAndroid Build Coastguard Worker    /* Fixed operands can't accept constants because we need them
600*61046927SAndroid Build Coastguard Worker     * to be in their fixed register.
601*61046927SAndroid Build Coastguard Worker     */
602*61046927SAndroid Build Coastguard Worker    assert(instr->operands.size() > operand);
603*61046927SAndroid Build Coastguard Worker    if (instr->operands[operand].isFixed())
604*61046927SAndroid Build Coastguard Worker       return false;
605*61046927SAndroid Build Coastguard Worker 
606*61046927SAndroid Build Coastguard Worker    /* SOPP instructions can't use constants. */
607*61046927SAndroid Build Coastguard Worker    if (instr->isSOPP())
608*61046927SAndroid Build Coastguard Worker       return false;
609*61046927SAndroid Build Coastguard Worker 
610*61046927SAndroid Build Coastguard Worker    switch (instr->opcode) {
611*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_fmac_f16:
612*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_fmac_f32:
613*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mac_f32:
614*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_writelane_b32:
615*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_writelane_b32_e64:
616*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cndmask_b32: return operand != 2;
617*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_addk_i32:
618*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_mulk_i32:
619*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_extract_vector:
620*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_split_vector:
621*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_readlane_b32:
622*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_readlane_b32_e64:
623*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_readfirstlane_b32:
624*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_extract:
625*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_insert: return operand != 0;
626*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_bpermute_readlane:
627*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_bpermute_shared_vgpr:
628*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_bpermute_permlane:
629*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_interp_gfx11:
630*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_dual_src_export_gfx11:
631*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p1_f32:
632*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p2_f32:
633*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_mov_f32:
634*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p1ll_f16:
635*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p1lv_f16:
636*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p2_legacy_f16:
637*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p10_f32_inreg:
638*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p2_f32_inreg:
639*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p10_f16_f32_inreg:
640*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p2_f16_f32_inreg:
641*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p10_rtz_f16_f32_inreg:
642*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p2_rtz_f16_f32_inreg:
643*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_wmma_f32_16x16x16_f16:
644*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_wmma_f32_16x16x16_bf16:
645*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_wmma_f16_16x16x16_f16:
646*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_wmma_bf16_16x16x16_bf16:
647*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_wmma_i32_16x16x16_iu8:
648*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_wmma_i32_16x16x16_iu4: return false;
649*61046927SAndroid Build Coastguard Worker    default: return true;
650*61046927SAndroid Build Coastguard Worker    }
651*61046927SAndroid Build Coastguard Worker }
652*61046927SAndroid Build Coastguard Worker 
653*61046927SAndroid Build Coastguard Worker bool
valu_can_accept_vgpr(aco_ptr<Instruction> & instr,unsigned operand)654*61046927SAndroid Build Coastguard Worker valu_can_accept_vgpr(aco_ptr<Instruction>& instr, unsigned operand)
655*61046927SAndroid Build Coastguard Worker {
656*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::v_writelane_b32 ||
657*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_writelane_b32_e64)
658*61046927SAndroid Build Coastguard Worker       return operand == 2;
659*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::v_permlane16_b32 ||
660*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_permlanex16_b32 ||
661*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_readlane_b32 ||
662*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_readlane_b32_e64)
663*61046927SAndroid Build Coastguard Worker       return operand == 0;
664*61046927SAndroid Build Coastguard Worker    return instr_info.classes[(int)instr->opcode] != instr_class::valu_pseudo_scalar_trans;
665*61046927SAndroid Build Coastguard Worker }
666*61046927SAndroid Build Coastguard Worker 
667*61046927SAndroid Build Coastguard Worker /* check constant bus and literal limitations */
668*61046927SAndroid Build Coastguard Worker bool
check_vop3_operands(opt_ctx & ctx,unsigned num_operands,Operand * operands)669*61046927SAndroid Build Coastguard Worker check_vop3_operands(opt_ctx& ctx, unsigned num_operands, Operand* operands)
670*61046927SAndroid Build Coastguard Worker {
671*61046927SAndroid Build Coastguard Worker    int limit = ctx.program->gfx_level >= GFX10 ? 2 : 1;
672*61046927SAndroid Build Coastguard Worker    Operand literal32(s1);
673*61046927SAndroid Build Coastguard Worker    Operand literal64(s2);
674*61046927SAndroid Build Coastguard Worker    unsigned num_sgprs = 0;
675*61046927SAndroid Build Coastguard Worker    unsigned sgpr[] = {0, 0};
676*61046927SAndroid Build Coastguard Worker 
677*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < num_operands; i++) {
678*61046927SAndroid Build Coastguard Worker       Operand op = operands[i];
679*61046927SAndroid Build Coastguard Worker 
680*61046927SAndroid Build Coastguard Worker       if (op.hasRegClass() && op.regClass().type() == RegType::sgpr) {
681*61046927SAndroid Build Coastguard Worker          /* two reads of the same SGPR count as 1 to the limit */
682*61046927SAndroid Build Coastguard Worker          if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) {
683*61046927SAndroid Build Coastguard Worker             if (num_sgprs < 2)
684*61046927SAndroid Build Coastguard Worker                sgpr[num_sgprs++] = op.tempId();
685*61046927SAndroid Build Coastguard Worker             limit--;
686*61046927SAndroid Build Coastguard Worker             if (limit < 0)
687*61046927SAndroid Build Coastguard Worker                return false;
688*61046927SAndroid Build Coastguard Worker          }
689*61046927SAndroid Build Coastguard Worker       } else if (op.isLiteral()) {
690*61046927SAndroid Build Coastguard Worker          if (ctx.program->gfx_level < GFX10)
691*61046927SAndroid Build Coastguard Worker             return false;
692*61046927SAndroid Build Coastguard Worker 
693*61046927SAndroid Build Coastguard Worker          if (!literal32.isUndefined() && literal32.constantValue() != op.constantValue())
694*61046927SAndroid Build Coastguard Worker             return false;
695*61046927SAndroid Build Coastguard Worker          if (!literal64.isUndefined() && literal64.constantValue() != op.constantValue())
696*61046927SAndroid Build Coastguard Worker             return false;
697*61046927SAndroid Build Coastguard Worker 
698*61046927SAndroid Build Coastguard Worker          /* Any number of 32-bit literals counts as only 1 to the limit. Same
699*61046927SAndroid Build Coastguard Worker           * (but separately) for 64-bit literals. */
700*61046927SAndroid Build Coastguard Worker          if (op.size() == 1 && literal32.isUndefined()) {
701*61046927SAndroid Build Coastguard Worker             limit--;
702*61046927SAndroid Build Coastguard Worker             literal32 = op;
703*61046927SAndroid Build Coastguard Worker          } else if (op.size() == 2 && literal64.isUndefined()) {
704*61046927SAndroid Build Coastguard Worker             limit--;
705*61046927SAndroid Build Coastguard Worker             literal64 = op;
706*61046927SAndroid Build Coastguard Worker          }
707*61046927SAndroid Build Coastguard Worker 
708*61046927SAndroid Build Coastguard Worker          if (limit < 0)
709*61046927SAndroid Build Coastguard Worker             return false;
710*61046927SAndroid Build Coastguard Worker       }
711*61046927SAndroid Build Coastguard Worker    }
712*61046927SAndroid Build Coastguard Worker 
713*61046927SAndroid Build Coastguard Worker    return true;
714*61046927SAndroid Build Coastguard Worker }
715*61046927SAndroid Build Coastguard Worker 
716*61046927SAndroid Build Coastguard Worker bool
parse_base_offset(opt_ctx & ctx,Instruction * instr,unsigned op_index,Temp * base,uint32_t * offset,bool prevent_overflow)717*61046927SAndroid Build Coastguard Worker parse_base_offset(opt_ctx& ctx, Instruction* instr, unsigned op_index, Temp* base, uint32_t* offset,
718*61046927SAndroid Build Coastguard Worker                   bool prevent_overflow)
719*61046927SAndroid Build Coastguard Worker {
720*61046927SAndroid Build Coastguard Worker    Operand op = instr->operands[op_index];
721*61046927SAndroid Build Coastguard Worker 
722*61046927SAndroid Build Coastguard Worker    if (!op.isTemp())
723*61046927SAndroid Build Coastguard Worker       return false;
724*61046927SAndroid Build Coastguard Worker    Temp tmp = op.getTemp();
725*61046927SAndroid Build Coastguard Worker    if (!ctx.info[tmp.id()].is_add_sub())
726*61046927SAndroid Build Coastguard Worker       return false;
727*61046927SAndroid Build Coastguard Worker 
728*61046927SAndroid Build Coastguard Worker    Instruction* add_instr = ctx.info[tmp.id()].instr;
729*61046927SAndroid Build Coastguard Worker 
730*61046927SAndroid Build Coastguard Worker    unsigned mask = 0x3;
731*61046927SAndroid Build Coastguard Worker    bool is_sub = false;
732*61046927SAndroid Build Coastguard Worker    switch (add_instr->opcode) {
733*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_u32:
734*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_co_u32:
735*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_co_u32_e64:
736*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_add_i32:
737*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_add_u32: break;
738*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_u32:
739*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_i32:
740*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_co_u32:
741*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_co_u32_e64:
742*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_sub_u32:
743*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_sub_i32:
744*61046927SAndroid Build Coastguard Worker       mask = 0x2;
745*61046927SAndroid Build Coastguard Worker       is_sub = true;
746*61046927SAndroid Build Coastguard Worker       break;
747*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_u32:
748*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_co_u32:
749*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_co_u32_e64:
750*61046927SAndroid Build Coastguard Worker       mask = 0x1;
751*61046927SAndroid Build Coastguard Worker       is_sub = true;
752*61046927SAndroid Build Coastguard Worker       break;
753*61046927SAndroid Build Coastguard Worker    default: return false;
754*61046927SAndroid Build Coastguard Worker    }
755*61046927SAndroid Build Coastguard Worker    if (prevent_overflow && !add_instr->definitions[0].isNUW())
756*61046927SAndroid Build Coastguard Worker       return false;
757*61046927SAndroid Build Coastguard Worker 
758*61046927SAndroid Build Coastguard Worker    if (add_instr->usesModifiers())
759*61046927SAndroid Build Coastguard Worker       return false;
760*61046927SAndroid Build Coastguard Worker 
761*61046927SAndroid Build Coastguard Worker    u_foreach_bit (i, mask) {
762*61046927SAndroid Build Coastguard Worker       if (add_instr->operands[i].isConstant()) {
763*61046927SAndroid Build Coastguard Worker          *offset = add_instr->operands[i].constantValue() * (uint32_t)(is_sub ? -1 : 1);
764*61046927SAndroid Build Coastguard Worker       } else if (add_instr->operands[i].isTemp() &&
765*61046927SAndroid Build Coastguard Worker                  ctx.info[add_instr->operands[i].tempId()].is_constant_or_literal(32)) {
766*61046927SAndroid Build Coastguard Worker          *offset = ctx.info[add_instr->operands[i].tempId()].val * (uint32_t)(is_sub ? -1 : 1);
767*61046927SAndroid Build Coastguard Worker       } else {
768*61046927SAndroid Build Coastguard Worker          continue;
769*61046927SAndroid Build Coastguard Worker       }
770*61046927SAndroid Build Coastguard Worker       if (!add_instr->operands[!i].isTemp())
771*61046927SAndroid Build Coastguard Worker          continue;
772*61046927SAndroid Build Coastguard Worker 
773*61046927SAndroid Build Coastguard Worker       uint32_t offset2 = 0;
774*61046927SAndroid Build Coastguard Worker       if (parse_base_offset(ctx, add_instr, !i, base, &offset2, prevent_overflow)) {
775*61046927SAndroid Build Coastguard Worker          *offset += offset2;
776*61046927SAndroid Build Coastguard Worker       } else {
777*61046927SAndroid Build Coastguard Worker          *base = add_instr->operands[!i].getTemp();
778*61046927SAndroid Build Coastguard Worker       }
779*61046927SAndroid Build Coastguard Worker       return true;
780*61046927SAndroid Build Coastguard Worker    }
781*61046927SAndroid Build Coastguard Worker 
782*61046927SAndroid Build Coastguard Worker    return false;
783*61046927SAndroid Build Coastguard Worker }
784*61046927SAndroid Build Coastguard Worker 
785*61046927SAndroid Build Coastguard Worker void
skip_smem_offset_align(opt_ctx & ctx,SMEM_instruction * smem)786*61046927SAndroid Build Coastguard Worker skip_smem_offset_align(opt_ctx& ctx, SMEM_instruction* smem)
787*61046927SAndroid Build Coastguard Worker {
788*61046927SAndroid Build Coastguard Worker    bool soe = smem->operands.size() >= (!smem->definitions.empty() ? 3 : 4);
789*61046927SAndroid Build Coastguard Worker    if (soe && !smem->operands[1].isConstant())
790*61046927SAndroid Build Coastguard Worker       return;
791*61046927SAndroid Build Coastguard Worker    /* We don't need to check the constant offset because the address seems to be calculated with
792*61046927SAndroid Build Coastguard Worker     * (offset&-4 + const_offset&-4), not (offset+const_offset)&-4.
793*61046927SAndroid Build Coastguard Worker     */
794*61046927SAndroid Build Coastguard Worker 
795*61046927SAndroid Build Coastguard Worker    Operand& op = smem->operands[soe ? smem->operands.size() - 1 : 1];
796*61046927SAndroid Build Coastguard Worker    if (!op.isTemp() || !ctx.info[op.tempId()].is_bitwise())
797*61046927SAndroid Build Coastguard Worker       return;
798*61046927SAndroid Build Coastguard Worker 
799*61046927SAndroid Build Coastguard Worker    Instruction* bitwise_instr = ctx.info[op.tempId()].instr;
800*61046927SAndroid Build Coastguard Worker    if (bitwise_instr->opcode != aco_opcode::s_and_b32)
801*61046927SAndroid Build Coastguard Worker       return;
802*61046927SAndroid Build Coastguard Worker 
803*61046927SAndroid Build Coastguard Worker    if (bitwise_instr->operands[0].constantEquals(-4) &&
804*61046927SAndroid Build Coastguard Worker        bitwise_instr->operands[1].isOfType(op.regClass().type()))
805*61046927SAndroid Build Coastguard Worker       op.setTemp(bitwise_instr->operands[1].getTemp());
806*61046927SAndroid Build Coastguard Worker    else if (bitwise_instr->operands[1].constantEquals(-4) &&
807*61046927SAndroid Build Coastguard Worker             bitwise_instr->operands[0].isOfType(op.regClass().type()))
808*61046927SAndroid Build Coastguard Worker       op.setTemp(bitwise_instr->operands[0].getTemp());
809*61046927SAndroid Build Coastguard Worker }
810*61046927SAndroid Build Coastguard Worker 
811*61046927SAndroid Build Coastguard Worker void
smem_combine(opt_ctx & ctx,aco_ptr<Instruction> & instr)812*61046927SAndroid Build Coastguard Worker smem_combine(opt_ctx& ctx, aco_ptr<Instruction>& instr)
813*61046927SAndroid Build Coastguard Worker {
814*61046927SAndroid Build Coastguard Worker    /* skip &-4 before offset additions: load((a + 16) & -4, 0) */
815*61046927SAndroid Build Coastguard Worker    if (!instr->operands.empty())
816*61046927SAndroid Build Coastguard Worker       skip_smem_offset_align(ctx, &instr->smem());
817*61046927SAndroid Build Coastguard Worker 
818*61046927SAndroid Build Coastguard Worker    /* propagate constants and combine additions */
819*61046927SAndroid Build Coastguard Worker    if (!instr->operands.empty() && instr->operands[1].isTemp()) {
820*61046927SAndroid Build Coastguard Worker       SMEM_instruction& smem = instr->smem();
821*61046927SAndroid Build Coastguard Worker       ssa_info info = ctx.info[instr->operands[1].tempId()];
822*61046927SAndroid Build Coastguard Worker 
823*61046927SAndroid Build Coastguard Worker       Temp base;
824*61046927SAndroid Build Coastguard Worker       uint32_t offset;
825*61046927SAndroid Build Coastguard Worker       if (info.is_constant_or_literal(32) &&
826*61046927SAndroid Build Coastguard Worker           ((ctx.program->gfx_level == GFX6 && info.val <= 0x3FF) ||
827*61046927SAndroid Build Coastguard Worker            (ctx.program->gfx_level == GFX7 && info.val <= 0xFFFFFFFF) ||
828*61046927SAndroid Build Coastguard Worker            (ctx.program->gfx_level >= GFX8 && info.val <= 0xFFFFF))) {
829*61046927SAndroid Build Coastguard Worker          instr->operands[1] = Operand::c32(info.val);
830*61046927SAndroid Build Coastguard Worker       } else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, true) &&
831*61046927SAndroid Build Coastguard Worker                  base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->gfx_level >= GFX9 &&
832*61046927SAndroid Build Coastguard Worker                  offset % 4u == 0) {
833*61046927SAndroid Build Coastguard Worker          bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4);
834*61046927SAndroid Build Coastguard Worker          if (soe) {
835*61046927SAndroid Build Coastguard Worker             if (ctx.info[smem.operands.back().tempId()].is_constant_or_literal(32) &&
836*61046927SAndroid Build Coastguard Worker                 ctx.info[smem.operands.back().tempId()].val == 0) {
837*61046927SAndroid Build Coastguard Worker                smem.operands[1] = Operand::c32(offset);
838*61046927SAndroid Build Coastguard Worker                smem.operands.back() = Operand(base);
839*61046927SAndroid Build Coastguard Worker             }
840*61046927SAndroid Build Coastguard Worker          } else {
841*61046927SAndroid Build Coastguard Worker             Instruction* new_instr = create_instruction(
842*61046927SAndroid Build Coastguard Worker                smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size());
843*61046927SAndroid Build Coastguard Worker             new_instr->operands[0] = smem.operands[0];
844*61046927SAndroid Build Coastguard Worker             new_instr->operands[1] = Operand::c32(offset);
845*61046927SAndroid Build Coastguard Worker             if (smem.definitions.empty())
846*61046927SAndroid Build Coastguard Worker                new_instr->operands[2] = smem.operands[2];
847*61046927SAndroid Build Coastguard Worker             new_instr->operands.back() = Operand(base);
848*61046927SAndroid Build Coastguard Worker             if (!smem.definitions.empty())
849*61046927SAndroid Build Coastguard Worker                new_instr->definitions[0] = smem.definitions[0];
850*61046927SAndroid Build Coastguard Worker             new_instr->smem().sync = smem.sync;
851*61046927SAndroid Build Coastguard Worker             new_instr->smem().cache = smem.cache;
852*61046927SAndroid Build Coastguard Worker             instr.reset(new_instr);
853*61046927SAndroid Build Coastguard Worker          }
854*61046927SAndroid Build Coastguard Worker       }
855*61046927SAndroid Build Coastguard Worker    }
856*61046927SAndroid Build Coastguard Worker 
857*61046927SAndroid Build Coastguard Worker    /* skip &-4 after offset additions: load(a & -4, 16) */
858*61046927SAndroid Build Coastguard Worker    if (!instr->operands.empty())
859*61046927SAndroid Build Coastguard Worker       skip_smem_offset_align(ctx, &instr->smem());
860*61046927SAndroid Build Coastguard Worker }
861*61046927SAndroid Build Coastguard Worker 
862*61046927SAndroid Build Coastguard Worker Operand
get_constant_op(opt_ctx & ctx,ssa_info info,uint32_t bits)863*61046927SAndroid Build Coastguard Worker get_constant_op(opt_ctx& ctx, ssa_info info, uint32_t bits)
864*61046927SAndroid Build Coastguard Worker {
865*61046927SAndroid Build Coastguard Worker    if (bits == 64)
866*61046927SAndroid Build Coastguard Worker       return Operand::c32_or_c64(info.val, true);
867*61046927SAndroid Build Coastguard Worker    return Operand::get_const(ctx.program->gfx_level, info.val, bits / 8u);
868*61046927SAndroid Build Coastguard Worker }
869*61046927SAndroid Build Coastguard Worker 
870*61046927SAndroid Build Coastguard Worker void
propagate_constants_vop3p(opt_ctx & ctx,aco_ptr<Instruction> & instr,ssa_info & info,unsigned i)871*61046927SAndroid Build Coastguard Worker propagate_constants_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& info, unsigned i)
872*61046927SAndroid Build Coastguard Worker {
873*61046927SAndroid Build Coastguard Worker    if (!info.is_constant_or_literal(32))
874*61046927SAndroid Build Coastguard Worker       return;
875*61046927SAndroid Build Coastguard Worker 
876*61046927SAndroid Build Coastguard Worker    assert(instr->operands[i].isTemp());
877*61046927SAndroid Build Coastguard Worker    unsigned bits = get_operand_size(instr, i);
878*61046927SAndroid Build Coastguard Worker    if (info.is_constant(bits)) {
879*61046927SAndroid Build Coastguard Worker       instr->operands[i] = get_constant_op(ctx, info, bits);
880*61046927SAndroid Build Coastguard Worker       return;
881*61046927SAndroid Build Coastguard Worker    }
882*61046927SAndroid Build Coastguard Worker 
883*61046927SAndroid Build Coastguard Worker    /* The accumulation operand of dot product instructions ignores opsel. */
884*61046927SAndroid Build Coastguard Worker    bool cannot_use_opsel =
885*61046927SAndroid Build Coastguard Worker       (instr->opcode == aco_opcode::v_dot4_i32_i8 || instr->opcode == aco_opcode::v_dot2_i32_i16 ||
886*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_dot4_i32_iu8 || instr->opcode == aco_opcode::v_dot4_u32_u8 ||
887*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_dot2_u32_u16) &&
888*61046927SAndroid Build Coastguard Worker       i == 2;
889*61046927SAndroid Build Coastguard Worker    if (cannot_use_opsel)
890*61046927SAndroid Build Coastguard Worker       return;
891*61046927SAndroid Build Coastguard Worker 
892*61046927SAndroid Build Coastguard Worker    /* try to fold inline constants */
893*61046927SAndroid Build Coastguard Worker    VALU_instruction* vop3p = &instr->valu();
894*61046927SAndroid Build Coastguard Worker    bool opsel_lo = vop3p->opsel_lo[i];
895*61046927SAndroid Build Coastguard Worker    bool opsel_hi = vop3p->opsel_hi[i];
896*61046927SAndroid Build Coastguard Worker 
897*61046927SAndroid Build Coastguard Worker    Operand const_op[2];
898*61046927SAndroid Build Coastguard Worker    bool const_opsel[2] = {false, false};
899*61046927SAndroid Build Coastguard Worker    for (unsigned j = 0; j < 2; j++) {
900*61046927SAndroid Build Coastguard Worker       if ((unsigned)opsel_lo != j && (unsigned)opsel_hi != j)
901*61046927SAndroid Build Coastguard Worker          continue; /* this half is unused */
902*61046927SAndroid Build Coastguard Worker 
903*61046927SAndroid Build Coastguard Worker       uint16_t val = info.val >> (j ? 16 : 0);
904*61046927SAndroid Build Coastguard Worker       Operand op = Operand::get_const(ctx.program->gfx_level, val, bits / 8u);
905*61046927SAndroid Build Coastguard Worker       if (bits == 32 && op.isLiteral()) /* try sign extension */
906*61046927SAndroid Build Coastguard Worker          op = Operand::get_const(ctx.program->gfx_level, val | 0xffff0000, 4);
907*61046927SAndroid Build Coastguard Worker       if (bits == 32 && op.isLiteral()) { /* try shifting left */
908*61046927SAndroid Build Coastguard Worker          op = Operand::get_const(ctx.program->gfx_level, val << 16, 4);
909*61046927SAndroid Build Coastguard Worker          const_opsel[j] = true;
910*61046927SAndroid Build Coastguard Worker       }
911*61046927SAndroid Build Coastguard Worker       if (op.isLiteral())
912*61046927SAndroid Build Coastguard Worker          return;
913*61046927SAndroid Build Coastguard Worker       const_op[j] = op;
914*61046927SAndroid Build Coastguard Worker    }
915*61046927SAndroid Build Coastguard Worker 
916*61046927SAndroid Build Coastguard Worker    Operand const_lo = const_op[0];
917*61046927SAndroid Build Coastguard Worker    Operand const_hi = const_op[1];
918*61046927SAndroid Build Coastguard Worker    bool const_lo_opsel = const_opsel[0];
919*61046927SAndroid Build Coastguard Worker    bool const_hi_opsel = const_opsel[1];
920*61046927SAndroid Build Coastguard Worker 
921*61046927SAndroid Build Coastguard Worker    if (opsel_lo == opsel_hi) {
922*61046927SAndroid Build Coastguard Worker       /* use the single 16bit value */
923*61046927SAndroid Build Coastguard Worker       instr->operands[i] = opsel_lo ? const_hi : const_lo;
924*61046927SAndroid Build Coastguard Worker 
925*61046927SAndroid Build Coastguard Worker       /* opsel must point the same for both halves */
926*61046927SAndroid Build Coastguard Worker       opsel_lo = opsel_lo ? const_hi_opsel : const_lo_opsel;
927*61046927SAndroid Build Coastguard Worker       opsel_hi = opsel_lo;
928*61046927SAndroid Build Coastguard Worker    } else if (const_lo == const_hi) {
929*61046927SAndroid Build Coastguard Worker       /* both constants are the same */
930*61046927SAndroid Build Coastguard Worker       instr->operands[i] = const_lo;
931*61046927SAndroid Build Coastguard Worker 
932*61046927SAndroid Build Coastguard Worker       /* opsel must point the same for both halves */
933*61046927SAndroid Build Coastguard Worker       opsel_lo = const_lo_opsel;
934*61046927SAndroid Build Coastguard Worker       opsel_hi = const_lo_opsel;
935*61046927SAndroid Build Coastguard Worker    } else if (const_lo.constantValue16(const_lo_opsel) ==
936*61046927SAndroid Build Coastguard Worker               const_hi.constantValue16(!const_hi_opsel)) {
937*61046927SAndroid Build Coastguard Worker       instr->operands[i] = const_hi;
938*61046927SAndroid Build Coastguard Worker 
939*61046927SAndroid Build Coastguard Worker       /* redirect opsel selection */
940*61046927SAndroid Build Coastguard Worker       opsel_lo = opsel_lo ? const_hi_opsel : !const_hi_opsel;
941*61046927SAndroid Build Coastguard Worker       opsel_hi = opsel_hi ? const_hi_opsel : !const_hi_opsel;
942*61046927SAndroid Build Coastguard Worker    } else if (const_hi.constantValue16(const_hi_opsel) ==
943*61046927SAndroid Build Coastguard Worker               const_lo.constantValue16(!const_lo_opsel)) {
944*61046927SAndroid Build Coastguard Worker       instr->operands[i] = const_lo;
945*61046927SAndroid Build Coastguard Worker 
946*61046927SAndroid Build Coastguard Worker       /* redirect opsel selection */
947*61046927SAndroid Build Coastguard Worker       opsel_lo = opsel_lo ? !const_lo_opsel : const_lo_opsel;
948*61046927SAndroid Build Coastguard Worker       opsel_hi = opsel_hi ? !const_lo_opsel : const_lo_opsel;
949*61046927SAndroid Build Coastguard Worker    } else if (bits == 16 && const_lo.constantValue() == (const_hi.constantValue() ^ (1 << 15))) {
950*61046927SAndroid Build Coastguard Worker       assert(const_lo_opsel == false && const_hi_opsel == false);
951*61046927SAndroid Build Coastguard Worker 
952*61046927SAndroid Build Coastguard Worker       /* const_lo == -const_hi */
953*61046927SAndroid Build Coastguard Worker       if (!can_use_input_modifiers(ctx.program->gfx_level, instr->opcode, i))
954*61046927SAndroid Build Coastguard Worker          return;
955*61046927SAndroid Build Coastguard Worker 
956*61046927SAndroid Build Coastguard Worker       instr->operands[i] = Operand::c16(const_lo.constantValue() & 0x7FFF);
957*61046927SAndroid Build Coastguard Worker       bool neg_lo = const_lo.constantValue() & (1 << 15);
958*61046927SAndroid Build Coastguard Worker       vop3p->neg_lo[i] ^= opsel_lo ^ neg_lo;
959*61046927SAndroid Build Coastguard Worker       vop3p->neg_hi[i] ^= opsel_hi ^ neg_lo;
960*61046927SAndroid Build Coastguard Worker 
961*61046927SAndroid Build Coastguard Worker       /* opsel must point to lo for both operands */
962*61046927SAndroid Build Coastguard Worker       opsel_lo = false;
963*61046927SAndroid Build Coastguard Worker       opsel_hi = false;
964*61046927SAndroid Build Coastguard Worker    }
965*61046927SAndroid Build Coastguard Worker 
966*61046927SAndroid Build Coastguard Worker    vop3p->opsel_lo[i] = opsel_lo;
967*61046927SAndroid Build Coastguard Worker    vop3p->opsel_hi[i] = opsel_hi;
968*61046927SAndroid Build Coastguard Worker }
969*61046927SAndroid Build Coastguard Worker 
970*61046927SAndroid Build Coastguard Worker bool
fixed_to_exec(Operand op)971*61046927SAndroid Build Coastguard Worker fixed_to_exec(Operand op)
972*61046927SAndroid Build Coastguard Worker {
973*61046927SAndroid Build Coastguard Worker    return op.isFixed() && op.physReg() == exec;
974*61046927SAndroid Build Coastguard Worker }
975*61046927SAndroid Build Coastguard Worker 
976*61046927SAndroid Build Coastguard Worker SubdwordSel
parse_extract(Instruction * instr)977*61046927SAndroid Build Coastguard Worker parse_extract(Instruction* instr)
978*61046927SAndroid Build Coastguard Worker {
979*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::p_extract) {
980*61046927SAndroid Build Coastguard Worker       unsigned size = instr->operands[2].constantValue() / 8;
981*61046927SAndroid Build Coastguard Worker       unsigned offset = instr->operands[1].constantValue() * size;
982*61046927SAndroid Build Coastguard Worker       bool sext = instr->operands[3].constantEquals(1);
983*61046927SAndroid Build Coastguard Worker       return SubdwordSel(size, offset, sext);
984*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::p_insert && instr->operands[1].constantEquals(0)) {
985*61046927SAndroid Build Coastguard Worker       return instr->operands[2].constantEquals(8) ? SubdwordSel::ubyte : SubdwordSel::uword;
986*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::p_extract_vector) {
987*61046927SAndroid Build Coastguard Worker       unsigned size = instr->definitions[0].bytes();
988*61046927SAndroid Build Coastguard Worker       unsigned offset = instr->operands[1].constantValue() * size;
989*61046927SAndroid Build Coastguard Worker       if (size <= 2)
990*61046927SAndroid Build Coastguard Worker          return SubdwordSel(size, offset, false);
991*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::p_split_vector) {
992*61046927SAndroid Build Coastguard Worker       assert(instr->operands[0].bytes() == 4 && instr->definitions[1].bytes() == 2);
993*61046927SAndroid Build Coastguard Worker       return SubdwordSel(2, 2, false);
994*61046927SAndroid Build Coastguard Worker    }
995*61046927SAndroid Build Coastguard Worker 
996*61046927SAndroid Build Coastguard Worker    return SubdwordSel();
997*61046927SAndroid Build Coastguard Worker }
998*61046927SAndroid Build Coastguard Worker 
999*61046927SAndroid Build Coastguard Worker SubdwordSel
parse_insert(Instruction * instr)1000*61046927SAndroid Build Coastguard Worker parse_insert(Instruction* instr)
1001*61046927SAndroid Build Coastguard Worker {
1002*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::p_extract && instr->operands[3].constantEquals(0) &&
1003*61046927SAndroid Build Coastguard Worker        instr->operands[1].constantEquals(0)) {
1004*61046927SAndroid Build Coastguard Worker       return instr->operands[2].constantEquals(8) ? SubdwordSel::ubyte : SubdwordSel::uword;
1005*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::p_insert) {
1006*61046927SAndroid Build Coastguard Worker       unsigned size = instr->operands[2].constantValue() / 8;
1007*61046927SAndroid Build Coastguard Worker       unsigned offset = instr->operands[1].constantValue() * size;
1008*61046927SAndroid Build Coastguard Worker       return SubdwordSel(size, offset, false);
1009*61046927SAndroid Build Coastguard Worker    } else {
1010*61046927SAndroid Build Coastguard Worker       return SubdwordSel();
1011*61046927SAndroid Build Coastguard Worker    }
1012*61046927SAndroid Build Coastguard Worker }
1013*61046927SAndroid Build Coastguard Worker 
1014*61046927SAndroid Build Coastguard Worker bool
can_apply_extract(opt_ctx & ctx,aco_ptr<Instruction> & instr,unsigned idx,ssa_info & info)1015*61046927SAndroid Build Coastguard Worker can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info& info)
1016*61046927SAndroid Build Coastguard Worker {
1017*61046927SAndroid Build Coastguard Worker    Temp tmp = info.instr->operands[0].getTemp();
1018*61046927SAndroid Build Coastguard Worker    SubdwordSel sel = parse_extract(info.instr);
1019*61046927SAndroid Build Coastguard Worker 
1020*61046927SAndroid Build Coastguard Worker    if (!sel) {
1021*61046927SAndroid Build Coastguard Worker       return false;
1022*61046927SAndroid Build Coastguard Worker    } else if (sel.size() == 4) {
1023*61046927SAndroid Build Coastguard Worker       return true;
1024*61046927SAndroid Build Coastguard Worker    } else if ((instr->opcode == aco_opcode::v_cvt_f32_u32 ||
1025*61046927SAndroid Build Coastguard Worker                instr->opcode == aco_opcode::v_cvt_f32_i32) &&
1026*61046927SAndroid Build Coastguard Worker               sel.size() == 1 && !sel.sign_extend()) {
1027*61046927SAndroid Build Coastguard Worker       return true;
1028*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_lshlrev_b32 && instr->operands[0].isConstant() &&
1029*61046927SAndroid Build Coastguard Worker               sel.offset() == 0 &&
1030*61046927SAndroid Build Coastguard Worker               ((sel.size() == 2 && instr->operands[0].constantValue() >= 16u) ||
1031*61046927SAndroid Build Coastguard Worker                (sel.size() == 1 && instr->operands[0].constantValue() >= 24u))) {
1032*61046927SAndroid Build Coastguard Worker       return true;
1033*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_mul_u32_u24 && ctx.program->gfx_level >= GFX10 &&
1034*61046927SAndroid Build Coastguard Worker               !instr->usesModifiers() && sel.size() == 2 && !sel.sign_extend() &&
1035*61046927SAndroid Build Coastguard Worker               (instr->operands[!idx].is16bit() ||
1036*61046927SAndroid Build Coastguard Worker                (instr->operands[!idx].isConstant() &&
1037*61046927SAndroid Build Coastguard Worker                 instr->operands[!idx].constantValue() <= UINT16_MAX))) {
1038*61046927SAndroid Build Coastguard Worker       return true;
1039*61046927SAndroid Build Coastguard Worker    } else if (idx < 2 && can_use_SDWA(ctx.program->gfx_level, instr, true) &&
1040*61046927SAndroid Build Coastguard Worker               (tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
1041*61046927SAndroid Build Coastguard Worker       if (instr->isSDWA() && instr->sdwa().sel[idx] != SubdwordSel::dword)
1042*61046927SAndroid Build Coastguard Worker          return false;
1043*61046927SAndroid Build Coastguard Worker       return true;
1044*61046927SAndroid Build Coastguard Worker    } else if (instr->isVALU() && sel.size() == 2 && !instr->valu().opsel[idx] &&
1045*61046927SAndroid Build Coastguard Worker               can_use_opsel(ctx.program->gfx_level, instr->opcode, idx)) {
1046*61046927SAndroid Build Coastguard Worker       return true;
1047*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::s_pack_ll_b32_b16 && sel.size() == 2 &&
1048*61046927SAndroid Build Coastguard Worker               (idx == 1 || ctx.program->gfx_level >= GFX11 || !sel.offset())) {
1049*61046927SAndroid Build Coastguard Worker       return true;
1050*61046927SAndroid Build Coastguard Worker    } else if (sel.size() == 2 &&
1051*61046927SAndroid Build Coastguard Worker               ((instr->opcode == aco_opcode::s_pack_lh_b32_b16 && idx == 0) ||
1052*61046927SAndroid Build Coastguard Worker                (instr->opcode == aco_opcode::s_pack_hl_b32_b16 && idx == 1))) {
1053*61046927SAndroid Build Coastguard Worker       return true;
1054*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::p_extract) {
1055*61046927SAndroid Build Coastguard Worker       SubdwordSel instrSel = parse_extract(instr.get());
1056*61046927SAndroid Build Coastguard Worker 
1057*61046927SAndroid Build Coastguard Worker       /* the outer offset must be within extracted range */
1058*61046927SAndroid Build Coastguard Worker       if (instrSel.offset() >= sel.size())
1059*61046927SAndroid Build Coastguard Worker          return false;
1060*61046927SAndroid Build Coastguard Worker 
1061*61046927SAndroid Build Coastguard Worker       /* don't remove the sign-extension when increasing the size further */
1062*61046927SAndroid Build Coastguard Worker       if (instrSel.size() > sel.size() && !instrSel.sign_extend() && sel.sign_extend())
1063*61046927SAndroid Build Coastguard Worker          return false;
1064*61046927SAndroid Build Coastguard Worker 
1065*61046927SAndroid Build Coastguard Worker       return true;
1066*61046927SAndroid Build Coastguard Worker    }
1067*61046927SAndroid Build Coastguard Worker 
1068*61046927SAndroid Build Coastguard Worker    return false;
1069*61046927SAndroid Build Coastguard Worker }
1070*61046927SAndroid Build Coastguard Worker 
1071*61046927SAndroid Build Coastguard Worker /* Combine an p_extract (or p_insert, in some cases) instruction with instr.
1072*61046927SAndroid Build Coastguard Worker  * instr(p_extract(...)) -> instr()
1073*61046927SAndroid Build Coastguard Worker  */
1074*61046927SAndroid Build Coastguard Worker void
apply_extract(opt_ctx & ctx,aco_ptr<Instruction> & instr,unsigned idx,ssa_info & info)1075*61046927SAndroid Build Coastguard Worker apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info& info)
1076*61046927SAndroid Build Coastguard Worker {
1077*61046927SAndroid Build Coastguard Worker    Temp tmp = info.instr->operands[0].getTemp();
1078*61046927SAndroid Build Coastguard Worker    SubdwordSel sel = parse_extract(info.instr);
1079*61046927SAndroid Build Coastguard Worker    assert(sel);
1080*61046927SAndroid Build Coastguard Worker 
1081*61046927SAndroid Build Coastguard Worker    instr->operands[idx].set16bit(false);
1082*61046927SAndroid Build Coastguard Worker    instr->operands[idx].set24bit(false);
1083*61046927SAndroid Build Coastguard Worker 
1084*61046927SAndroid Build Coastguard Worker    ctx.info[tmp.id()].label &= ~label_insert;
1085*61046927SAndroid Build Coastguard Worker 
1086*61046927SAndroid Build Coastguard Worker    if (sel.size() == 4) {
1087*61046927SAndroid Build Coastguard Worker       /* full dword selection */
1088*61046927SAndroid Build Coastguard Worker    } else if ((instr->opcode == aco_opcode::v_cvt_f32_u32 ||
1089*61046927SAndroid Build Coastguard Worker                instr->opcode == aco_opcode::v_cvt_f32_i32) &&
1090*61046927SAndroid Build Coastguard Worker               sel.size() == 1 && !sel.sign_extend()) {
1091*61046927SAndroid Build Coastguard Worker       switch (sel.offset()) {
1092*61046927SAndroid Build Coastguard Worker       case 0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break;
1093*61046927SAndroid Build Coastguard Worker       case 1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break;
1094*61046927SAndroid Build Coastguard Worker       case 2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break;
1095*61046927SAndroid Build Coastguard Worker       case 3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break;
1096*61046927SAndroid Build Coastguard Worker       }
1097*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_lshlrev_b32 && instr->operands[0].isConstant() &&
1098*61046927SAndroid Build Coastguard Worker               sel.offset() == 0 &&
1099*61046927SAndroid Build Coastguard Worker               ((sel.size() == 2 && instr->operands[0].constantValue() >= 16u) ||
1100*61046927SAndroid Build Coastguard Worker                (sel.size() == 1 && instr->operands[0].constantValue() >= 24u))) {
1101*61046927SAndroid Build Coastguard Worker       /* The undesirable upper bits are already shifted out. */
1102*61046927SAndroid Build Coastguard Worker       return;
1103*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_mul_u32_u24 && ctx.program->gfx_level >= GFX10 &&
1104*61046927SAndroid Build Coastguard Worker               !instr->usesModifiers() && sel.size() == 2 && !sel.sign_extend() &&
1105*61046927SAndroid Build Coastguard Worker               (instr->operands[!idx].is16bit() ||
1106*61046927SAndroid Build Coastguard Worker                instr->operands[!idx].constantValue() <= UINT16_MAX)) {
1107*61046927SAndroid Build Coastguard Worker       Instruction* mad = create_instruction(aco_opcode::v_mad_u32_u16, Format::VOP3, 3, 1);
1108*61046927SAndroid Build Coastguard Worker       mad->definitions[0] = instr->definitions[0];
1109*61046927SAndroid Build Coastguard Worker       mad->operands[0] = instr->operands[0];
1110*61046927SAndroid Build Coastguard Worker       mad->operands[1] = instr->operands[1];
1111*61046927SAndroid Build Coastguard Worker       mad->operands[2] = Operand::zero();
1112*61046927SAndroid Build Coastguard Worker       mad->valu().opsel[idx] = sel.offset();
1113*61046927SAndroid Build Coastguard Worker       mad->pass_flags = instr->pass_flags;
1114*61046927SAndroid Build Coastguard Worker       instr.reset(mad);
1115*61046927SAndroid Build Coastguard Worker    } else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
1116*61046927SAndroid Build Coastguard Worker               (tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
1117*61046927SAndroid Build Coastguard Worker       convert_to_SDWA(ctx.program->gfx_level, instr);
1118*61046927SAndroid Build Coastguard Worker       instr->sdwa().sel[idx] = sel;
1119*61046927SAndroid Build Coastguard Worker    } else if (instr->isVALU()) {
1120*61046927SAndroid Build Coastguard Worker       if (sel.offset()) {
1121*61046927SAndroid Build Coastguard Worker          instr->valu().opsel[idx] = true;
1122*61046927SAndroid Build Coastguard Worker 
1123*61046927SAndroid Build Coastguard Worker          /* VOP12C cannot use opsel with SGPRs. */
1124*61046927SAndroid Build Coastguard Worker          if (!instr->isVOP3() && !instr->isVINTERP_INREG() &&
1125*61046927SAndroid Build Coastguard Worker              !info.instr->operands[0].isOfType(RegType::vgpr))
1126*61046927SAndroid Build Coastguard Worker             instr->format = asVOP3(instr->format);
1127*61046927SAndroid Build Coastguard Worker       }
1128*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::s_pack_ll_b32_b16) {
1129*61046927SAndroid Build Coastguard Worker       if (sel.offset())
1130*61046927SAndroid Build Coastguard Worker          instr->opcode = idx ? aco_opcode::s_pack_lh_b32_b16 : aco_opcode::s_pack_hl_b32_b16;
1131*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::s_pack_lh_b32_b16 ||
1132*61046927SAndroid Build Coastguard Worker               instr->opcode == aco_opcode::s_pack_hl_b32_b16) {
1133*61046927SAndroid Build Coastguard Worker       if (sel.offset())
1134*61046927SAndroid Build Coastguard Worker          instr->opcode = aco_opcode::s_pack_hh_b32_b16;
1135*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::p_extract) {
1136*61046927SAndroid Build Coastguard Worker       SubdwordSel instrSel = parse_extract(instr.get());
1137*61046927SAndroid Build Coastguard Worker 
1138*61046927SAndroid Build Coastguard Worker       unsigned size = std::min(sel.size(), instrSel.size());
1139*61046927SAndroid Build Coastguard Worker       unsigned offset = sel.offset() + instrSel.offset();
1140*61046927SAndroid Build Coastguard Worker       unsigned sign_extend =
1141*61046927SAndroid Build Coastguard Worker          instrSel.sign_extend() && (sel.sign_extend() || instrSel.size() <= sel.size());
1142*61046927SAndroid Build Coastguard Worker 
1143*61046927SAndroid Build Coastguard Worker       instr->operands[1] = Operand::c32(offset / size);
1144*61046927SAndroid Build Coastguard Worker       instr->operands[2] = Operand::c32(size * 8u);
1145*61046927SAndroid Build Coastguard Worker       instr->operands[3] = Operand::c32(sign_extend);
1146*61046927SAndroid Build Coastguard Worker       return;
1147*61046927SAndroid Build Coastguard Worker    }
1148*61046927SAndroid Build Coastguard Worker 
1149*61046927SAndroid Build Coastguard Worker    /* These are the only labels worth keeping at the moment. */
1150*61046927SAndroid Build Coastguard Worker    for (Definition& def : instr->definitions) {
1151*61046927SAndroid Build Coastguard Worker       ctx.info[def.tempId()].label &=
1152*61046927SAndroid Build Coastguard Worker          (label_mul | label_minmax | label_usedef | label_vopc | label_f2f32 | instr_mod_labels);
1153*61046927SAndroid Build Coastguard Worker       if (ctx.info[def.tempId()].label & instr_usedef_labels)
1154*61046927SAndroid Build Coastguard Worker          ctx.info[def.tempId()].instr = instr.get();
1155*61046927SAndroid Build Coastguard Worker    }
1156*61046927SAndroid Build Coastguard Worker }
1157*61046927SAndroid Build Coastguard Worker 
1158*61046927SAndroid Build Coastguard Worker void
check_sdwa_extract(opt_ctx & ctx,aco_ptr<Instruction> & instr)1159*61046927SAndroid Build Coastguard Worker check_sdwa_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr)
1160*61046927SAndroid Build Coastguard Worker {
1161*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < instr->operands.size(); i++) {
1162*61046927SAndroid Build Coastguard Worker       Operand op = instr->operands[i];
1163*61046927SAndroid Build Coastguard Worker       if (!op.isTemp())
1164*61046927SAndroid Build Coastguard Worker          continue;
1165*61046927SAndroid Build Coastguard Worker       ssa_info& info = ctx.info[op.tempId()];
1166*61046927SAndroid Build Coastguard Worker       if (info.is_extract() && (info.instr->operands[0].getTemp().type() == RegType::vgpr ||
1167*61046927SAndroid Build Coastguard Worker                                 op.getTemp().type() == RegType::sgpr)) {
1168*61046927SAndroid Build Coastguard Worker          if (!can_apply_extract(ctx, instr, i, info))
1169*61046927SAndroid Build Coastguard Worker             info.label &= ~label_extract;
1170*61046927SAndroid Build Coastguard Worker       }
1171*61046927SAndroid Build Coastguard Worker    }
1172*61046927SAndroid Build Coastguard Worker }
1173*61046927SAndroid Build Coastguard Worker 
1174*61046927SAndroid Build Coastguard Worker bool
does_fp_op_flush_denorms(opt_ctx & ctx,aco_opcode op)1175*61046927SAndroid Build Coastguard Worker does_fp_op_flush_denorms(opt_ctx& ctx, aco_opcode op)
1176*61046927SAndroid Build Coastguard Worker {
1177*61046927SAndroid Build Coastguard Worker    switch (op) {
1178*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_f32:
1179*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_f32:
1180*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_med3_f32:
1181*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min3_f32:
1182*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max3_f32:
1183*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_f16:
1184*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_f16: return ctx.program->gfx_level > GFX8;
1185*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cndmask_b32:
1186*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cndmask_b16:
1187*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mov_b32:
1188*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mov_b16: return false;
1189*61046927SAndroid Build Coastguard Worker    default: return true;
1190*61046927SAndroid Build Coastguard Worker    }
1191*61046927SAndroid Build Coastguard Worker }
1192*61046927SAndroid Build Coastguard Worker 
1193*61046927SAndroid Build Coastguard Worker bool
can_eliminate_fcanonicalize(opt_ctx & ctx,aco_ptr<Instruction> & instr,Temp tmp,unsigned idx)1194*61046927SAndroid Build Coastguard Worker can_eliminate_fcanonicalize(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp tmp, unsigned idx)
1195*61046927SAndroid Build Coastguard Worker {
1196*61046927SAndroid Build Coastguard Worker    float_mode* fp = &ctx.fp_mode;
1197*61046927SAndroid Build Coastguard Worker    if (ctx.info[tmp.id()].is_canonicalized() ||
1198*61046927SAndroid Build Coastguard Worker        (tmp.bytes() == 4 ? fp->denorm32 : fp->denorm16_64) == fp_denorm_keep)
1199*61046927SAndroid Build Coastguard Worker       return true;
1200*61046927SAndroid Build Coastguard Worker 
1201*61046927SAndroid Build Coastguard Worker    aco_opcode op = instr->opcode;
1202*61046927SAndroid Build Coastguard Worker    return can_use_input_modifiers(ctx.program->gfx_level, instr->opcode, idx) &&
1203*61046927SAndroid Build Coastguard Worker           does_fp_op_flush_denorms(ctx, op);
1204*61046927SAndroid Build Coastguard Worker }
1205*61046927SAndroid Build Coastguard Worker 
1206*61046927SAndroid Build Coastguard Worker bool
can_eliminate_and_exec(opt_ctx & ctx,Temp tmp,unsigned pass_flags)1207*61046927SAndroid Build Coastguard Worker can_eliminate_and_exec(opt_ctx& ctx, Temp tmp, unsigned pass_flags)
1208*61046927SAndroid Build Coastguard Worker {
1209*61046927SAndroid Build Coastguard Worker    if (ctx.info[tmp.id()].is_vopc()) {
1210*61046927SAndroid Build Coastguard Worker       Instruction* vopc_instr = ctx.info[tmp.id()].instr;
1211*61046927SAndroid Build Coastguard Worker       /* Remove superfluous s_and when the VOPC instruction uses the same exec and thus
1212*61046927SAndroid Build Coastguard Worker        * already produces the same result */
1213*61046927SAndroid Build Coastguard Worker       return vopc_instr->pass_flags == pass_flags;
1214*61046927SAndroid Build Coastguard Worker    }
1215*61046927SAndroid Build Coastguard Worker    if (ctx.info[tmp.id()].is_bitwise()) {
1216*61046927SAndroid Build Coastguard Worker       Instruction* instr = ctx.info[tmp.id()].instr;
1217*61046927SAndroid Build Coastguard Worker       if (instr->operands.size() != 2 || instr->pass_flags != pass_flags)
1218*61046927SAndroid Build Coastguard Worker          return false;
1219*61046927SAndroid Build Coastguard Worker       if (!(instr->operands[0].isTemp() && instr->operands[1].isTemp()))
1220*61046927SAndroid Build Coastguard Worker          return false;
1221*61046927SAndroid Build Coastguard Worker       if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_and_b64) {
1222*61046927SAndroid Build Coastguard Worker          return can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), pass_flags) ||
1223*61046927SAndroid Build Coastguard Worker                 can_eliminate_and_exec(ctx, instr->operands[1].getTemp(), pass_flags);
1224*61046927SAndroid Build Coastguard Worker       } else {
1225*61046927SAndroid Build Coastguard Worker          return can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), pass_flags) &&
1226*61046927SAndroid Build Coastguard Worker                 can_eliminate_and_exec(ctx, instr->operands[1].getTemp(), pass_flags);
1227*61046927SAndroid Build Coastguard Worker       }
1228*61046927SAndroid Build Coastguard Worker    }
1229*61046927SAndroid Build Coastguard Worker    return false;
1230*61046927SAndroid Build Coastguard Worker }
1231*61046927SAndroid Build Coastguard Worker 
1232*61046927SAndroid Build Coastguard Worker bool
is_copy_label(opt_ctx & ctx,aco_ptr<Instruction> & instr,ssa_info & info,unsigned idx)1233*61046927SAndroid Build Coastguard Worker is_copy_label(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& info, unsigned idx)
1234*61046927SAndroid Build Coastguard Worker {
1235*61046927SAndroid Build Coastguard Worker    return info.is_temp() ||
1236*61046927SAndroid Build Coastguard Worker           (info.is_fcanonicalize() && can_eliminate_fcanonicalize(ctx, instr, info.temp, idx));
1237*61046927SAndroid Build Coastguard Worker }
1238*61046927SAndroid Build Coastguard Worker 
1239*61046927SAndroid Build Coastguard Worker bool
is_op_canonicalized(opt_ctx & ctx,Operand op)1240*61046927SAndroid Build Coastguard Worker is_op_canonicalized(opt_ctx& ctx, Operand op)
1241*61046927SAndroid Build Coastguard Worker {
1242*61046927SAndroid Build Coastguard Worker    float_mode* fp = &ctx.fp_mode;
1243*61046927SAndroid Build Coastguard Worker    if ((op.isTemp() && ctx.info[op.tempId()].is_canonicalized()) ||
1244*61046927SAndroid Build Coastguard Worker        (op.bytes() == 4 ? fp->denorm32 : fp->denorm16_64) == fp_denorm_keep)
1245*61046927SAndroid Build Coastguard Worker       return true;
1246*61046927SAndroid Build Coastguard Worker 
1247*61046927SAndroid Build Coastguard Worker    if (op.isConstant() || (op.isTemp() && ctx.info[op.tempId()].is_constant_or_literal(32))) {
1248*61046927SAndroid Build Coastguard Worker       uint32_t val = op.isTemp() ? ctx.info[op.tempId()].val : op.constantValue();
1249*61046927SAndroid Build Coastguard Worker       if (op.bytes() == 2)
1250*61046927SAndroid Build Coastguard Worker          return (val & 0x7fff) == 0 || (val & 0x7fff) > 0x3ff;
1251*61046927SAndroid Build Coastguard Worker       else if (op.bytes() == 4)
1252*61046927SAndroid Build Coastguard Worker          return (val & 0x7fffffff) == 0 || (val & 0x7fffffff) > 0x7fffff;
1253*61046927SAndroid Build Coastguard Worker    }
1254*61046927SAndroid Build Coastguard Worker    return false;
1255*61046927SAndroid Build Coastguard Worker }
1256*61046927SAndroid Build Coastguard Worker 
1257*61046927SAndroid Build Coastguard Worker bool
is_scratch_offset_valid(opt_ctx & ctx,Instruction * instr,int64_t offset0,int64_t offset1)1258*61046927SAndroid Build Coastguard Worker is_scratch_offset_valid(opt_ctx& ctx, Instruction* instr, int64_t offset0, int64_t offset1)
1259*61046927SAndroid Build Coastguard Worker {
1260*61046927SAndroid Build Coastguard Worker    bool negative_unaligned_scratch_offset_bug = ctx.program->gfx_level == GFX10;
1261*61046927SAndroid Build Coastguard Worker    int32_t min = ctx.program->dev.scratch_global_offset_min;
1262*61046927SAndroid Build Coastguard Worker    int32_t max = ctx.program->dev.scratch_global_offset_max;
1263*61046927SAndroid Build Coastguard Worker 
1264*61046927SAndroid Build Coastguard Worker    int64_t offset = offset0 + offset1;
1265*61046927SAndroid Build Coastguard Worker 
1266*61046927SAndroid Build Coastguard Worker    bool has_vgpr_offset = instr && !instr->operands[0].isUndefined();
1267*61046927SAndroid Build Coastguard Worker    if (negative_unaligned_scratch_offset_bug && has_vgpr_offset && offset < 0 && offset % 4)
1268*61046927SAndroid Build Coastguard Worker       return false;
1269*61046927SAndroid Build Coastguard Worker 
1270*61046927SAndroid Build Coastguard Worker    return offset >= min && offset <= max;
1271*61046927SAndroid Build Coastguard Worker }
1272*61046927SAndroid Build Coastguard Worker 
1273*61046927SAndroid Build Coastguard Worker bool
detect_clamp(Instruction * instr,unsigned * clamped_idx)1274*61046927SAndroid Build Coastguard Worker detect_clamp(Instruction* instr, unsigned* clamped_idx)
1275*61046927SAndroid Build Coastguard Worker {
1276*61046927SAndroid Build Coastguard Worker    VALU_instruction& valu = instr->valu();
1277*61046927SAndroid Build Coastguard Worker    if (valu.omod != 0 || valu.opsel != 0)
1278*61046927SAndroid Build Coastguard Worker       return false;
1279*61046927SAndroid Build Coastguard Worker 
1280*61046927SAndroid Build Coastguard Worker    unsigned idx = 0;
1281*61046927SAndroid Build Coastguard Worker    bool found_zero = false, found_one = false;
1282*61046927SAndroid Build Coastguard Worker    bool is_fp16 = instr->opcode == aco_opcode::v_med3_f16;
1283*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 3; i++) {
1284*61046927SAndroid Build Coastguard Worker       if (!valu.neg[i] && instr->operands[i].constantEquals(0))
1285*61046927SAndroid Build Coastguard Worker          found_zero = true;
1286*61046927SAndroid Build Coastguard Worker       else if (!valu.neg[i] &&
1287*61046927SAndroid Build Coastguard Worker                instr->operands[i].constantEquals(is_fp16 ? 0x3c00 : 0x3f800000)) /* 1.0 */
1288*61046927SAndroid Build Coastguard Worker          found_one = true;
1289*61046927SAndroid Build Coastguard Worker       else
1290*61046927SAndroid Build Coastguard Worker          idx = i;
1291*61046927SAndroid Build Coastguard Worker    }
1292*61046927SAndroid Build Coastguard Worker    if (found_zero && found_one && instr->operands[idx].isTemp()) {
1293*61046927SAndroid Build Coastguard Worker       *clamped_idx = idx;
1294*61046927SAndroid Build Coastguard Worker       return true;
1295*61046927SAndroid Build Coastguard Worker    } else {
1296*61046927SAndroid Build Coastguard Worker       return false;
1297*61046927SAndroid Build Coastguard Worker    }
1298*61046927SAndroid Build Coastguard Worker }
1299*61046927SAndroid Build Coastguard Worker 
1300*61046927SAndroid Build Coastguard Worker void
label_instruction(opt_ctx & ctx,aco_ptr<Instruction> & instr)1301*61046927SAndroid Build Coastguard Worker label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
1302*61046927SAndroid Build Coastguard Worker {
1303*61046927SAndroid Build Coastguard Worker    if (instr->isSMEM())
1304*61046927SAndroid Build Coastguard Worker       smem_combine(ctx, instr);
1305*61046927SAndroid Build Coastguard Worker 
1306*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < instr->operands.size(); i++) {
1307*61046927SAndroid Build Coastguard Worker       if (!instr->operands[i].isTemp())
1308*61046927SAndroid Build Coastguard Worker          continue;
1309*61046927SAndroid Build Coastguard Worker 
1310*61046927SAndroid Build Coastguard Worker       ssa_info info = ctx.info[instr->operands[i].tempId()];
1311*61046927SAndroid Build Coastguard Worker       /* propagate reg->reg of same type */
1312*61046927SAndroid Build Coastguard Worker       while (info.is_temp() && info.temp.regClass() == instr->operands[i].getTemp().regClass()) {
1313*61046927SAndroid Build Coastguard Worker          instr->operands[i].setTemp(ctx.info[instr->operands[i].tempId()].temp);
1314*61046927SAndroid Build Coastguard Worker          info = ctx.info[info.temp.id()];
1315*61046927SAndroid Build Coastguard Worker       }
1316*61046927SAndroid Build Coastguard Worker 
1317*61046927SAndroid Build Coastguard Worker       /* PSEUDO: propagate temporaries */
1318*61046927SAndroid Build Coastguard Worker       if (instr->isPseudo()) {
1319*61046927SAndroid Build Coastguard Worker          while (info.is_temp()) {
1320*61046927SAndroid Build Coastguard Worker             pseudo_propagate_temp(ctx, instr, info.temp, i);
1321*61046927SAndroid Build Coastguard Worker             info = ctx.info[info.temp.id()];
1322*61046927SAndroid Build Coastguard Worker          }
1323*61046927SAndroid Build Coastguard Worker       }
1324*61046927SAndroid Build Coastguard Worker 
1325*61046927SAndroid Build Coastguard Worker       /* SALU / PSEUDO: propagate inline constants */
1326*61046927SAndroid Build Coastguard Worker       if (instr->isSALU() || instr->isPseudo()) {
1327*61046927SAndroid Build Coastguard Worker          unsigned bits = get_operand_size(instr, i);
1328*61046927SAndroid Build Coastguard Worker          if ((info.is_constant(bits) || (info.is_literal(bits) && instr->isPseudo())) &&
1329*61046927SAndroid Build Coastguard Worker              alu_can_accept_constant(instr, i)) {
1330*61046927SAndroid Build Coastguard Worker             instr->operands[i] = get_constant_op(ctx, info, bits);
1331*61046927SAndroid Build Coastguard Worker             continue;
1332*61046927SAndroid Build Coastguard Worker          }
1333*61046927SAndroid Build Coastguard Worker       }
1334*61046927SAndroid Build Coastguard Worker 
1335*61046927SAndroid Build Coastguard Worker       /* VALU: propagate neg, abs & inline constants */
1336*61046927SAndroid Build Coastguard Worker       else if (instr->isVALU()) {
1337*61046927SAndroid Build Coastguard Worker          if (is_copy_label(ctx, instr, info, i) && info.temp.type() == RegType::vgpr &&
1338*61046927SAndroid Build Coastguard Worker              valu_can_accept_vgpr(instr, i)) {
1339*61046927SAndroid Build Coastguard Worker             instr->operands[i].setTemp(info.temp);
1340*61046927SAndroid Build Coastguard Worker             info = ctx.info[info.temp.id()];
1341*61046927SAndroid Build Coastguard Worker          }
1342*61046927SAndroid Build Coastguard Worker          /* applying SGPRs to VOP1 doesn't increase code size and DCE is helped by doing it earlier */
1343*61046927SAndroid Build Coastguard Worker          if (info.is_temp() && info.temp.type() == RegType::sgpr && can_apply_sgprs(ctx, instr) &&
1344*61046927SAndroid Build Coastguard Worker              instr->operands.size() == 1) {
1345*61046927SAndroid Build Coastguard Worker             instr->format = withoutDPP(instr->format);
1346*61046927SAndroid Build Coastguard Worker             instr->operands[i].setTemp(info.temp);
1347*61046927SAndroid Build Coastguard Worker             info = ctx.info[info.temp.id()];
1348*61046927SAndroid Build Coastguard Worker          }
1349*61046927SAndroid Build Coastguard Worker 
1350*61046927SAndroid Build Coastguard Worker          /* for instructions other than v_cndmask_b32, the size of the instruction should match the
1351*61046927SAndroid Build Coastguard Worker           * operand size */
1352*61046927SAndroid Build Coastguard Worker          bool can_use_mod =
1353*61046927SAndroid Build Coastguard Worker             instr->opcode != aco_opcode::v_cndmask_b32 || instr->operands[i].getTemp().bytes() == 4;
1354*61046927SAndroid Build Coastguard Worker          can_use_mod &= can_use_input_modifiers(ctx.program->gfx_level, instr->opcode, i);
1355*61046927SAndroid Build Coastguard Worker 
1356*61046927SAndroid Build Coastguard Worker          bool packed_math = instr->isVOP3P() && instr->opcode != aco_opcode::v_fma_mix_f32 &&
1357*61046927SAndroid Build Coastguard Worker                             instr->opcode != aco_opcode::v_fma_mixlo_f16 &&
1358*61046927SAndroid Build Coastguard Worker                             instr->opcode != aco_opcode::v_fma_mixhi_f16;
1359*61046927SAndroid Build Coastguard Worker 
1360*61046927SAndroid Build Coastguard Worker          if (instr->isSDWA())
1361*61046927SAndroid Build Coastguard Worker             can_use_mod &= instr->sdwa().sel[i].size() == 4;
1362*61046927SAndroid Build Coastguard Worker          else if (instr->isVOP3P())
1363*61046927SAndroid Build Coastguard Worker             can_use_mod &= !packed_math || !info.is_abs();
1364*61046927SAndroid Build Coastguard Worker          else if (instr->isVINTERP_INREG())
1365*61046927SAndroid Build Coastguard Worker             can_use_mod &= !info.is_abs();
1366*61046927SAndroid Build Coastguard Worker          else
1367*61046927SAndroid Build Coastguard Worker             can_use_mod &= instr->isDPP16() || can_use_VOP3(ctx, instr);
1368*61046927SAndroid Build Coastguard Worker 
1369*61046927SAndroid Build Coastguard Worker          unsigned bits = get_operand_size(instr, i);
1370*61046927SAndroid Build Coastguard Worker          can_use_mod &= instr->operands[i].bytes() * 8 == bits;
1371*61046927SAndroid Build Coastguard Worker 
1372*61046927SAndroid Build Coastguard Worker          if (info.is_neg() && can_use_mod &&
1373*61046927SAndroid Build Coastguard Worker              can_eliminate_fcanonicalize(ctx, instr, info.temp, i)) {
1374*61046927SAndroid Build Coastguard Worker             instr->operands[i].setTemp(info.temp);
1375*61046927SAndroid Build Coastguard Worker             if (!packed_math && instr->valu().abs[i]) {
1376*61046927SAndroid Build Coastguard Worker                /* fabs(fneg(a)) -> fabs(a) */
1377*61046927SAndroid Build Coastguard Worker             } else if (instr->opcode == aco_opcode::v_add_f32) {
1378*61046927SAndroid Build Coastguard Worker                instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32;
1379*61046927SAndroid Build Coastguard Worker             } else if (instr->opcode == aco_opcode::v_add_f16) {
1380*61046927SAndroid Build Coastguard Worker                instr->opcode = i ? aco_opcode::v_sub_f16 : aco_opcode::v_subrev_f16;
1381*61046927SAndroid Build Coastguard Worker             } else if (packed_math) {
1382*61046927SAndroid Build Coastguard Worker                /* Bit size compat should ensure this. */
1383*61046927SAndroid Build Coastguard Worker                assert(!instr->valu().opsel_lo[i] && !instr->valu().opsel_hi[i]);
1384*61046927SAndroid Build Coastguard Worker                instr->valu().neg_lo[i] ^= true;
1385*61046927SAndroid Build Coastguard Worker                instr->valu().neg_hi[i] ^= true;
1386*61046927SAndroid Build Coastguard Worker             } else {
1387*61046927SAndroid Build Coastguard Worker                if (!instr->isDPP16() && can_use_VOP3(ctx, instr))
1388*61046927SAndroid Build Coastguard Worker                   instr->format = asVOP3(instr->format);
1389*61046927SAndroid Build Coastguard Worker                instr->valu().neg[i] ^= true;
1390*61046927SAndroid Build Coastguard Worker             }
1391*61046927SAndroid Build Coastguard Worker          }
1392*61046927SAndroid Build Coastguard Worker          if (info.is_abs() && can_use_mod &&
1393*61046927SAndroid Build Coastguard Worker              can_eliminate_fcanonicalize(ctx, instr, info.temp, i)) {
1394*61046927SAndroid Build Coastguard Worker             if (!instr->isDPP16() && can_use_VOP3(ctx, instr))
1395*61046927SAndroid Build Coastguard Worker                instr->format = asVOP3(instr->format);
1396*61046927SAndroid Build Coastguard Worker             instr->operands[i] = Operand(info.temp);
1397*61046927SAndroid Build Coastguard Worker             instr->valu().abs[i] = true;
1398*61046927SAndroid Build Coastguard Worker             continue;
1399*61046927SAndroid Build Coastguard Worker          }
1400*61046927SAndroid Build Coastguard Worker 
1401*61046927SAndroid Build Coastguard Worker          if (instr->isVOP3P()) {
1402*61046927SAndroid Build Coastguard Worker             propagate_constants_vop3p(ctx, instr, info, i);
1403*61046927SAndroid Build Coastguard Worker             continue;
1404*61046927SAndroid Build Coastguard Worker          }
1405*61046927SAndroid Build Coastguard Worker 
1406*61046927SAndroid Build Coastguard Worker          if (info.is_constant(bits) && alu_can_accept_constant(instr, i) &&
1407*61046927SAndroid Build Coastguard Worker              (!instr->isSDWA() || ctx.program->gfx_level >= GFX9) && (!instr->isDPP() || i != 1)) {
1408*61046927SAndroid Build Coastguard Worker             Operand op = get_constant_op(ctx, info, bits);
1409*61046927SAndroid Build Coastguard Worker             if (i == 0 || instr->isSDWA() || instr->opcode == aco_opcode::v_readlane_b32 ||
1410*61046927SAndroid Build Coastguard Worker                 instr->opcode == aco_opcode::v_writelane_b32) {
1411*61046927SAndroid Build Coastguard Worker                instr->format = withoutDPP(instr->format);
1412*61046927SAndroid Build Coastguard Worker                instr->operands[i] = op;
1413*61046927SAndroid Build Coastguard Worker                continue;
1414*61046927SAndroid Build Coastguard Worker             } else if (!instr->isVOP3() && can_swap_operands(instr, &instr->opcode)) {
1415*61046927SAndroid Build Coastguard Worker                instr->operands[i] = op;
1416*61046927SAndroid Build Coastguard Worker                instr->valu().swapOperands(0, i);
1417*61046927SAndroid Build Coastguard Worker                continue;
1418*61046927SAndroid Build Coastguard Worker             } else if (can_use_VOP3(ctx, instr)) {
1419*61046927SAndroid Build Coastguard Worker                instr->format = asVOP3(instr->format);
1420*61046927SAndroid Build Coastguard Worker                instr->operands[i] = op;
1421*61046927SAndroid Build Coastguard Worker                continue;
1422*61046927SAndroid Build Coastguard Worker             }
1423*61046927SAndroid Build Coastguard Worker          }
1424*61046927SAndroid Build Coastguard Worker       }
1425*61046927SAndroid Build Coastguard Worker 
1426*61046927SAndroid Build Coastguard Worker       /* MUBUF: propagate constants and combine additions */
1427*61046927SAndroid Build Coastguard Worker       else if (instr->isMUBUF()) {
1428*61046927SAndroid Build Coastguard Worker          MUBUF_instruction& mubuf = instr->mubuf();
1429*61046927SAndroid Build Coastguard Worker          Temp base;
1430*61046927SAndroid Build Coastguard Worker          uint32_t offset;
1431*61046927SAndroid Build Coastguard Worker          while (info.is_temp())
1432*61046927SAndroid Build Coastguard Worker             info = ctx.info[info.temp.id()];
1433*61046927SAndroid Build Coastguard Worker 
1434*61046927SAndroid Build Coastguard Worker          bool swizzled = ctx.program->gfx_level >= GFX12 ? mubuf.cache.gfx12.swizzled
1435*61046927SAndroid Build Coastguard Worker                                                          : (mubuf.cache.value & ac_swizzled);
1436*61046927SAndroid Build Coastguard Worker          /* According to AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(), vaddr
1437*61046927SAndroid Build Coastguard Worker           * overflow for scratch accesses works only on GFX9+ and saddr overflow
1438*61046927SAndroid Build Coastguard Worker           * never works. Since swizzling is the only thing that separates
1439*61046927SAndroid Build Coastguard Worker           * scratch accesses and other accesses and swizzling changing how
1440*61046927SAndroid Build Coastguard Worker           * addressing works significantly, this probably applies to swizzled
1441*61046927SAndroid Build Coastguard Worker           * MUBUF accesses. */
1442*61046927SAndroid Build Coastguard Worker          bool vaddr_prevent_overflow = swizzled && ctx.program->gfx_level < GFX9;
1443*61046927SAndroid Build Coastguard Worker 
1444*61046927SAndroid Build Coastguard Worker          if (mubuf.offen && mubuf.idxen && i == 1 && info.is_vec() &&
1445*61046927SAndroid Build Coastguard Worker              info.instr->operands.size() == 2 && info.instr->operands[0].isTemp() &&
1446*61046927SAndroid Build Coastguard Worker              info.instr->operands[0].regClass() == v1 && info.instr->operands[1].isConstant() &&
1447*61046927SAndroid Build Coastguard Worker              mubuf.offset + info.instr->operands[1].constantValue() < 4096) {
1448*61046927SAndroid Build Coastguard Worker             instr->operands[1] = info.instr->operands[0];
1449*61046927SAndroid Build Coastguard Worker             mubuf.offset += info.instr->operands[1].constantValue();
1450*61046927SAndroid Build Coastguard Worker             mubuf.offen = false;
1451*61046927SAndroid Build Coastguard Worker             continue;
1452*61046927SAndroid Build Coastguard Worker          } else if (mubuf.offen && i == 1 && info.is_constant_or_literal(32) &&
1453*61046927SAndroid Build Coastguard Worker                     mubuf.offset + info.val < 4096) {
1454*61046927SAndroid Build Coastguard Worker             assert(!mubuf.idxen);
1455*61046927SAndroid Build Coastguard Worker             instr->operands[1] = Operand(v1);
1456*61046927SAndroid Build Coastguard Worker             mubuf.offset += info.val;
1457*61046927SAndroid Build Coastguard Worker             mubuf.offen = false;
1458*61046927SAndroid Build Coastguard Worker             continue;
1459*61046927SAndroid Build Coastguard Worker          } else if (i == 2 && info.is_constant_or_literal(32) && mubuf.offset + info.val < 4096) {
1460*61046927SAndroid Build Coastguard Worker             instr->operands[2] = Operand::c32(0);
1461*61046927SAndroid Build Coastguard Worker             mubuf.offset += info.val;
1462*61046927SAndroid Build Coastguard Worker             continue;
1463*61046927SAndroid Build Coastguard Worker          } else if (mubuf.offen && i == 1 &&
1464*61046927SAndroid Build Coastguard Worker                     parse_base_offset(ctx, instr.get(), i, &base, &offset,
1465*61046927SAndroid Build Coastguard Worker                                       vaddr_prevent_overflow) &&
1466*61046927SAndroid Build Coastguard Worker                     base.regClass() == v1 && mubuf.offset + offset < 4096) {
1467*61046927SAndroid Build Coastguard Worker             assert(!mubuf.idxen);
1468*61046927SAndroid Build Coastguard Worker             instr->operands[1].setTemp(base);
1469*61046927SAndroid Build Coastguard Worker             mubuf.offset += offset;
1470*61046927SAndroid Build Coastguard Worker             continue;
1471*61046927SAndroid Build Coastguard Worker          } else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset, true) &&
1472*61046927SAndroid Build Coastguard Worker                     base.regClass() == s1 && mubuf.offset + offset < 4096 && !swizzled) {
1473*61046927SAndroid Build Coastguard Worker             instr->operands[i].setTemp(base);
1474*61046927SAndroid Build Coastguard Worker             mubuf.offset += offset;
1475*61046927SAndroid Build Coastguard Worker             continue;
1476*61046927SAndroid Build Coastguard Worker          }
1477*61046927SAndroid Build Coastguard Worker       }
1478*61046927SAndroid Build Coastguard Worker 
1479*61046927SAndroid Build Coastguard Worker       else if (instr->isMTBUF()) {
1480*61046927SAndroid Build Coastguard Worker          MTBUF_instruction& mtbuf = instr->mtbuf();
1481*61046927SAndroid Build Coastguard Worker          while (info.is_temp())
1482*61046927SAndroid Build Coastguard Worker             info = ctx.info[info.temp.id()];
1483*61046927SAndroid Build Coastguard Worker 
1484*61046927SAndroid Build Coastguard Worker          if (mtbuf.offen && mtbuf.idxen && i == 1 && info.is_vec() &&
1485*61046927SAndroid Build Coastguard Worker              info.instr->operands.size() == 2 && info.instr->operands[0].isTemp() &&
1486*61046927SAndroid Build Coastguard Worker              info.instr->operands[0].regClass() == v1 && info.instr->operands[1].isConstant() &&
1487*61046927SAndroid Build Coastguard Worker              mtbuf.offset + info.instr->operands[1].constantValue() < 4096) {
1488*61046927SAndroid Build Coastguard Worker             instr->operands[1] = info.instr->operands[0];
1489*61046927SAndroid Build Coastguard Worker             mtbuf.offset += info.instr->operands[1].constantValue();
1490*61046927SAndroid Build Coastguard Worker             mtbuf.offen = false;
1491*61046927SAndroid Build Coastguard Worker             continue;
1492*61046927SAndroid Build Coastguard Worker          }
1493*61046927SAndroid Build Coastguard Worker       }
1494*61046927SAndroid Build Coastguard Worker 
1495*61046927SAndroid Build Coastguard Worker       /* SCRATCH: propagate constants and combine additions */
1496*61046927SAndroid Build Coastguard Worker       else if (instr->isScratch()) {
1497*61046927SAndroid Build Coastguard Worker          FLAT_instruction& scratch = instr->scratch();
1498*61046927SAndroid Build Coastguard Worker          Temp base;
1499*61046927SAndroid Build Coastguard Worker          uint32_t offset;
1500*61046927SAndroid Build Coastguard Worker          while (info.is_temp())
1501*61046927SAndroid Build Coastguard Worker             info = ctx.info[info.temp.id()];
1502*61046927SAndroid Build Coastguard Worker 
1503*61046927SAndroid Build Coastguard Worker          /* The hardware probably does: 'scratch_base + u2u64(saddr) + i2i64(offset)'. This means
1504*61046927SAndroid Build Coastguard Worker           * we can't combine the addition if the unsigned addition overflows and offset is
1505*61046927SAndroid Build Coastguard Worker           * positive. In theory, there is also issues if
1506*61046927SAndroid Build Coastguard Worker           * 'ilt(offset, 0) && ige(saddr, 0) && ilt(saddr + offset, 0)', but that just
1507*61046927SAndroid Build Coastguard Worker           * replaces an already out-of-bounds access with a larger one since 'saddr + offset'
1508*61046927SAndroid Build Coastguard Worker           * would be larger than INT32_MAX.
1509*61046927SAndroid Build Coastguard Worker           */
1510*61046927SAndroid Build Coastguard Worker          if (i <= 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, true) &&
1511*61046927SAndroid Build Coastguard Worker              base.regClass() == instr->operands[i].regClass() &&
1512*61046927SAndroid Build Coastguard Worker              is_scratch_offset_valid(ctx, instr.get(), scratch.offset, (int32_t)offset)) {
1513*61046927SAndroid Build Coastguard Worker             instr->operands[i].setTemp(base);
1514*61046927SAndroid Build Coastguard Worker             scratch.offset += (int32_t)offset;
1515*61046927SAndroid Build Coastguard Worker             continue;
1516*61046927SAndroid Build Coastguard Worker          } else if (i <= 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, false) &&
1517*61046927SAndroid Build Coastguard Worker                     base.regClass() == instr->operands[i].regClass() && (int32_t)offset < 0 &&
1518*61046927SAndroid Build Coastguard Worker                     is_scratch_offset_valid(ctx, instr.get(), scratch.offset, (int32_t)offset)) {
1519*61046927SAndroid Build Coastguard Worker             instr->operands[i].setTemp(base);
1520*61046927SAndroid Build Coastguard Worker             scratch.offset += (int32_t)offset;
1521*61046927SAndroid Build Coastguard Worker             continue;
1522*61046927SAndroid Build Coastguard Worker          } else if (i <= 1 && info.is_constant_or_literal(32) &&
1523*61046927SAndroid Build Coastguard Worker                     ctx.program->gfx_level >= GFX10_3 &&
1524*61046927SAndroid Build Coastguard Worker                     is_scratch_offset_valid(ctx, NULL, scratch.offset, (int32_t)info.val)) {
1525*61046927SAndroid Build Coastguard Worker             /* GFX10.3+ can disable both SADDR and ADDR. */
1526*61046927SAndroid Build Coastguard Worker             instr->operands[i] = Operand(instr->operands[i].regClass());
1527*61046927SAndroid Build Coastguard Worker             scratch.offset += (int32_t)info.val;
1528*61046927SAndroid Build Coastguard Worker             continue;
1529*61046927SAndroid Build Coastguard Worker          }
1530*61046927SAndroid Build Coastguard Worker       }
1531*61046927SAndroid Build Coastguard Worker 
1532*61046927SAndroid Build Coastguard Worker       /* DS: combine additions */
1533*61046927SAndroid Build Coastguard Worker       else if (instr->isDS()) {
1534*61046927SAndroid Build Coastguard Worker 
1535*61046927SAndroid Build Coastguard Worker          DS_instruction& ds = instr->ds();
1536*61046927SAndroid Build Coastguard Worker          Temp base;
1537*61046927SAndroid Build Coastguard Worker          uint32_t offset;
1538*61046927SAndroid Build Coastguard Worker          bool has_usable_ds_offset = ctx.program->gfx_level >= GFX7;
1539*61046927SAndroid Build Coastguard Worker          if (has_usable_ds_offset && i == 0 &&
1540*61046927SAndroid Build Coastguard Worker              parse_base_offset(ctx, instr.get(), i, &base, &offset, false) &&
1541*61046927SAndroid Build Coastguard Worker              base.regClass() == instr->operands[i].regClass() &&
1542*61046927SAndroid Build Coastguard Worker              instr->opcode != aco_opcode::ds_swizzle_b32) {
1543*61046927SAndroid Build Coastguard Worker             if (instr->opcode == aco_opcode::ds_write2_b32 ||
1544*61046927SAndroid Build Coastguard Worker                 instr->opcode == aco_opcode::ds_read2_b32 ||
1545*61046927SAndroid Build Coastguard Worker                 instr->opcode == aco_opcode::ds_write2_b64 ||
1546*61046927SAndroid Build Coastguard Worker                 instr->opcode == aco_opcode::ds_read2_b64 ||
1547*61046927SAndroid Build Coastguard Worker                 instr->opcode == aco_opcode::ds_write2st64_b32 ||
1548*61046927SAndroid Build Coastguard Worker                 instr->opcode == aco_opcode::ds_read2st64_b32 ||
1549*61046927SAndroid Build Coastguard Worker                 instr->opcode == aco_opcode::ds_write2st64_b64 ||
1550*61046927SAndroid Build Coastguard Worker                 instr->opcode == aco_opcode::ds_read2st64_b64) {
1551*61046927SAndroid Build Coastguard Worker                bool is64bit = instr->opcode == aco_opcode::ds_write2_b64 ||
1552*61046927SAndroid Build Coastguard Worker                               instr->opcode == aco_opcode::ds_read2_b64 ||
1553*61046927SAndroid Build Coastguard Worker                               instr->opcode == aco_opcode::ds_write2st64_b64 ||
1554*61046927SAndroid Build Coastguard Worker                               instr->opcode == aco_opcode::ds_read2st64_b64;
1555*61046927SAndroid Build Coastguard Worker                bool st64 = instr->opcode == aco_opcode::ds_write2st64_b32 ||
1556*61046927SAndroid Build Coastguard Worker                            instr->opcode == aco_opcode::ds_read2st64_b32 ||
1557*61046927SAndroid Build Coastguard Worker                            instr->opcode == aco_opcode::ds_write2st64_b64 ||
1558*61046927SAndroid Build Coastguard Worker                            instr->opcode == aco_opcode::ds_read2st64_b64;
1559*61046927SAndroid Build Coastguard Worker                unsigned shifts = (is64bit ? 3 : 2) + (st64 ? 6 : 0);
1560*61046927SAndroid Build Coastguard Worker                unsigned mask = BITFIELD_MASK(shifts);
1561*61046927SAndroid Build Coastguard Worker 
1562*61046927SAndroid Build Coastguard Worker                if ((offset & mask) == 0 && ds.offset0 + (offset >> shifts) <= 255 &&
1563*61046927SAndroid Build Coastguard Worker                    ds.offset1 + (offset >> shifts) <= 255) {
1564*61046927SAndroid Build Coastguard Worker                   instr->operands[i].setTemp(base);
1565*61046927SAndroid Build Coastguard Worker                   ds.offset0 += offset >> shifts;
1566*61046927SAndroid Build Coastguard Worker                   ds.offset1 += offset >> shifts;
1567*61046927SAndroid Build Coastguard Worker                }
1568*61046927SAndroid Build Coastguard Worker             } else {
1569*61046927SAndroid Build Coastguard Worker                if (ds.offset0 + offset <= 65535) {
1570*61046927SAndroid Build Coastguard Worker                   instr->operands[i].setTemp(base);
1571*61046927SAndroid Build Coastguard Worker                   ds.offset0 += offset;
1572*61046927SAndroid Build Coastguard Worker                }
1573*61046927SAndroid Build Coastguard Worker             }
1574*61046927SAndroid Build Coastguard Worker          }
1575*61046927SAndroid Build Coastguard Worker       }
1576*61046927SAndroid Build Coastguard Worker 
1577*61046927SAndroid Build Coastguard Worker       else if (instr->isBranch()) {
1578*61046927SAndroid Build Coastguard Worker          if (ctx.info[instr->operands[0].tempId()].is_scc_invert()) {
1579*61046927SAndroid Build Coastguard Worker             /* Flip the branch instruction to get rid of the scc_invert instruction */
1580*61046927SAndroid Build Coastguard Worker             instr->opcode = instr->opcode == aco_opcode::p_cbranch_z ? aco_opcode::p_cbranch_nz
1581*61046927SAndroid Build Coastguard Worker                                                                      : aco_opcode::p_cbranch_z;
1582*61046927SAndroid Build Coastguard Worker             instr->operands[0].setTemp(ctx.info[instr->operands[0].tempId()].temp);
1583*61046927SAndroid Build Coastguard Worker          }
1584*61046927SAndroid Build Coastguard Worker       }
1585*61046927SAndroid Build Coastguard Worker    }
1586*61046927SAndroid Build Coastguard Worker 
1587*61046927SAndroid Build Coastguard Worker    /* if this instruction doesn't define anything, return */
1588*61046927SAndroid Build Coastguard Worker    if (instr->definitions.empty()) {
1589*61046927SAndroid Build Coastguard Worker       check_sdwa_extract(ctx, instr);
1590*61046927SAndroid Build Coastguard Worker       return;
1591*61046927SAndroid Build Coastguard Worker    }
1592*61046927SAndroid Build Coastguard Worker 
1593*61046927SAndroid Build Coastguard Worker    if (instr->isVALU() || instr->isVINTRP()) {
1594*61046927SAndroid Build Coastguard Worker       if (instr_info.can_use_output_modifiers[(int)instr->opcode] || instr->isVINTRP() ||
1595*61046927SAndroid Build Coastguard Worker           instr->opcode == aco_opcode::v_cndmask_b32) {
1596*61046927SAndroid Build Coastguard Worker          bool canonicalized = true;
1597*61046927SAndroid Build Coastguard Worker          if (!does_fp_op_flush_denorms(ctx, instr->opcode)) {
1598*61046927SAndroid Build Coastguard Worker             unsigned ops = instr->opcode == aco_opcode::v_cndmask_b32 ? 2 : instr->operands.size();
1599*61046927SAndroid Build Coastguard Worker             for (unsigned i = 0; canonicalized && (i < ops); i++)
1600*61046927SAndroid Build Coastguard Worker                canonicalized = is_op_canonicalized(ctx, instr->operands[i]);
1601*61046927SAndroid Build Coastguard Worker          }
1602*61046927SAndroid Build Coastguard Worker          if (canonicalized)
1603*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[0].tempId()].set_canonicalized();
1604*61046927SAndroid Build Coastguard Worker       }
1605*61046927SAndroid Build Coastguard Worker 
1606*61046927SAndroid Build Coastguard Worker       if (instr->isVOPC()) {
1607*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_vopc(instr.get());
1608*61046927SAndroid Build Coastguard Worker          check_sdwa_extract(ctx, instr);
1609*61046927SAndroid Build Coastguard Worker          return;
1610*61046927SAndroid Build Coastguard Worker       }
1611*61046927SAndroid Build Coastguard Worker       if (instr->isVOP3P()) {
1612*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_vop3p(instr.get());
1613*61046927SAndroid Build Coastguard Worker          return;
1614*61046927SAndroid Build Coastguard Worker       }
1615*61046927SAndroid Build Coastguard Worker    }
1616*61046927SAndroid Build Coastguard Worker 
1617*61046927SAndroid Build Coastguard Worker    switch (instr->opcode) {
1618*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_create_vector: {
1619*61046927SAndroid Build Coastguard Worker       bool copy_prop = instr->operands.size() == 1 && instr->operands[0].isTemp() &&
1620*61046927SAndroid Build Coastguard Worker                        instr->operands[0].regClass() == instr->definitions[0].regClass();
1621*61046927SAndroid Build Coastguard Worker       if (copy_prop) {
1622*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1623*61046927SAndroid Build Coastguard Worker          break;
1624*61046927SAndroid Build Coastguard Worker       }
1625*61046927SAndroid Build Coastguard Worker 
1626*61046927SAndroid Build Coastguard Worker       /* expand vector operands */
1627*61046927SAndroid Build Coastguard Worker       std::vector<Operand> ops;
1628*61046927SAndroid Build Coastguard Worker       unsigned offset = 0;
1629*61046927SAndroid Build Coastguard Worker       for (const Operand& op : instr->operands) {
1630*61046927SAndroid Build Coastguard Worker          /* ensure that any expanded operands are properly aligned */
1631*61046927SAndroid Build Coastguard Worker          bool aligned = offset % 4 == 0 || op.bytes() < 4;
1632*61046927SAndroid Build Coastguard Worker          offset += op.bytes();
1633*61046927SAndroid Build Coastguard Worker          if (aligned && op.isTemp() && ctx.info[op.tempId()].is_vec()) {
1634*61046927SAndroid Build Coastguard Worker             Instruction* vec = ctx.info[op.tempId()].instr;
1635*61046927SAndroid Build Coastguard Worker             for (const Operand& vec_op : vec->operands)
1636*61046927SAndroid Build Coastguard Worker                ops.emplace_back(vec_op);
1637*61046927SAndroid Build Coastguard Worker          } else {
1638*61046927SAndroid Build Coastguard Worker             ops.emplace_back(op);
1639*61046927SAndroid Build Coastguard Worker          }
1640*61046927SAndroid Build Coastguard Worker       }
1641*61046927SAndroid Build Coastguard Worker 
1642*61046927SAndroid Build Coastguard Worker       /* combine expanded operands to new vector */
1643*61046927SAndroid Build Coastguard Worker       if (ops.size() != instr->operands.size()) {
1644*61046927SAndroid Build Coastguard Worker          assert(ops.size() > instr->operands.size());
1645*61046927SAndroid Build Coastguard Worker          Definition def = instr->definitions[0];
1646*61046927SAndroid Build Coastguard Worker          instr.reset(
1647*61046927SAndroid Build Coastguard Worker             create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, ops.size(), 1));
1648*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < ops.size(); i++) {
1649*61046927SAndroid Build Coastguard Worker             if (ops[i].isTemp() && ctx.info[ops[i].tempId()].is_temp() &&
1650*61046927SAndroid Build Coastguard Worker                 ops[i].regClass() == ctx.info[ops[i].tempId()].temp.regClass())
1651*61046927SAndroid Build Coastguard Worker                ops[i].setTemp(ctx.info[ops[i].tempId()].temp);
1652*61046927SAndroid Build Coastguard Worker             instr->operands[i] = ops[i];
1653*61046927SAndroid Build Coastguard Worker          }
1654*61046927SAndroid Build Coastguard Worker          instr->definitions[0] = def;
1655*61046927SAndroid Build Coastguard Worker       } else {
1656*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < ops.size(); i++) {
1657*61046927SAndroid Build Coastguard Worker             assert(instr->operands[i] == ops[i]);
1658*61046927SAndroid Build Coastguard Worker          }
1659*61046927SAndroid Build Coastguard Worker       }
1660*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].set_vec(instr.get());
1661*61046927SAndroid Build Coastguard Worker 
1662*61046927SAndroid Build Coastguard Worker       if (instr->operands.size() == 2) {
1663*61046927SAndroid Build Coastguard Worker          /* check if this is created from split_vector */
1664*61046927SAndroid Build Coastguard Worker          if (instr->operands[1].isTemp() && ctx.info[instr->operands[1].tempId()].is_split()) {
1665*61046927SAndroid Build Coastguard Worker             Instruction* split = ctx.info[instr->operands[1].tempId()].instr;
1666*61046927SAndroid Build Coastguard Worker             if (instr->operands[0].isTemp() &&
1667*61046927SAndroid Build Coastguard Worker                 instr->operands[0].getTemp() == split->definitions[0].getTemp())
1668*61046927SAndroid Build Coastguard Worker                ctx.info[instr->definitions[0].tempId()].set_temp(split->operands[0].getTemp());
1669*61046927SAndroid Build Coastguard Worker          }
1670*61046927SAndroid Build Coastguard Worker       }
1671*61046927SAndroid Build Coastguard Worker       break;
1672*61046927SAndroid Build Coastguard Worker    }
1673*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_split_vector: {
1674*61046927SAndroid Build Coastguard Worker       ssa_info& info = ctx.info[instr->operands[0].tempId()];
1675*61046927SAndroid Build Coastguard Worker 
1676*61046927SAndroid Build Coastguard Worker       if (info.is_constant_or_literal(32)) {
1677*61046927SAndroid Build Coastguard Worker          uint64_t val = info.val;
1678*61046927SAndroid Build Coastguard Worker          for (Definition def : instr->definitions) {
1679*61046927SAndroid Build Coastguard Worker             uint32_t mask = u_bit_consecutive(0, def.bytes() * 8u);
1680*61046927SAndroid Build Coastguard Worker             ctx.info[def.tempId()].set_constant(ctx.program->gfx_level, val & mask);
1681*61046927SAndroid Build Coastguard Worker             val >>= def.bytes() * 8u;
1682*61046927SAndroid Build Coastguard Worker          }
1683*61046927SAndroid Build Coastguard Worker          break;
1684*61046927SAndroid Build Coastguard Worker       } else if (!info.is_vec()) {
1685*61046927SAndroid Build Coastguard Worker          if (instr->definitions.size() == 2 && instr->operands[0].isTemp() &&
1686*61046927SAndroid Build Coastguard Worker              instr->definitions[0].bytes() == instr->definitions[1].bytes()) {
1687*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[1].tempId()].set_split(instr.get());
1688*61046927SAndroid Build Coastguard Worker             if (instr->operands[0].bytes() == 4) {
1689*61046927SAndroid Build Coastguard Worker                /* D16 subdword split */
1690*61046927SAndroid Build Coastguard Worker                ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1691*61046927SAndroid Build Coastguard Worker                ctx.info[instr->definitions[1].tempId()].set_extract(instr.get());
1692*61046927SAndroid Build Coastguard Worker             }
1693*61046927SAndroid Build Coastguard Worker          }
1694*61046927SAndroid Build Coastguard Worker          break;
1695*61046927SAndroid Build Coastguard Worker       }
1696*61046927SAndroid Build Coastguard Worker 
1697*61046927SAndroid Build Coastguard Worker       Instruction* vec = ctx.info[instr->operands[0].tempId()].instr;
1698*61046927SAndroid Build Coastguard Worker       unsigned split_offset = 0;
1699*61046927SAndroid Build Coastguard Worker       unsigned vec_offset = 0;
1700*61046927SAndroid Build Coastguard Worker       unsigned vec_index = 0;
1701*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < instr->definitions.size();
1702*61046927SAndroid Build Coastguard Worker            split_offset += instr->definitions[i++].bytes()) {
1703*61046927SAndroid Build Coastguard Worker          while (vec_offset < split_offset && vec_index < vec->operands.size())
1704*61046927SAndroid Build Coastguard Worker             vec_offset += vec->operands[vec_index++].bytes();
1705*61046927SAndroid Build Coastguard Worker 
1706*61046927SAndroid Build Coastguard Worker          if (vec_offset != split_offset ||
1707*61046927SAndroid Build Coastguard Worker              vec->operands[vec_index].bytes() != instr->definitions[i].bytes())
1708*61046927SAndroid Build Coastguard Worker             continue;
1709*61046927SAndroid Build Coastguard Worker 
1710*61046927SAndroid Build Coastguard Worker          Operand vec_op = vec->operands[vec_index];
1711*61046927SAndroid Build Coastguard Worker          if (vec_op.isConstant()) {
1712*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[i].tempId()].set_constant(ctx.program->gfx_level,
1713*61046927SAndroid Build Coastguard Worker                                                                   vec_op.constantValue64());
1714*61046927SAndroid Build Coastguard Worker          } else if (vec_op.isTemp()) {
1715*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[i].tempId()].set_temp(vec_op.getTemp());
1716*61046927SAndroid Build Coastguard Worker          }
1717*61046927SAndroid Build Coastguard Worker       }
1718*61046927SAndroid Build Coastguard Worker       break;
1719*61046927SAndroid Build Coastguard Worker    }
1720*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_extract_vector: { /* mov */
1721*61046927SAndroid Build Coastguard Worker       const unsigned index = instr->operands[1].constantValue();
1722*61046927SAndroid Build Coastguard Worker 
1723*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].isTemp()) {
1724*61046927SAndroid Build Coastguard Worker          ssa_info& info = ctx.info[instr->operands[0].tempId()];
1725*61046927SAndroid Build Coastguard Worker          const unsigned dst_offset = index * instr->definitions[0].bytes();
1726*61046927SAndroid Build Coastguard Worker 
1727*61046927SAndroid Build Coastguard Worker          if (info.is_vec()) {
1728*61046927SAndroid Build Coastguard Worker             /* check if we index directly into a vector element */
1729*61046927SAndroid Build Coastguard Worker             Instruction* vec = info.instr;
1730*61046927SAndroid Build Coastguard Worker             unsigned offset = 0;
1731*61046927SAndroid Build Coastguard Worker 
1732*61046927SAndroid Build Coastguard Worker             for (const Operand& op : vec->operands) {
1733*61046927SAndroid Build Coastguard Worker                if (offset < dst_offset) {
1734*61046927SAndroid Build Coastguard Worker                   offset += op.bytes();
1735*61046927SAndroid Build Coastguard Worker                   continue;
1736*61046927SAndroid Build Coastguard Worker                } else if (offset != dst_offset || op.bytes() != instr->definitions[0].bytes()) {
1737*61046927SAndroid Build Coastguard Worker                   break;
1738*61046927SAndroid Build Coastguard Worker                }
1739*61046927SAndroid Build Coastguard Worker                instr->operands[0] = op;
1740*61046927SAndroid Build Coastguard Worker                break;
1741*61046927SAndroid Build Coastguard Worker             }
1742*61046927SAndroid Build Coastguard Worker          } else if (info.is_constant_or_literal(32)) {
1743*61046927SAndroid Build Coastguard Worker             /* propagate constants */
1744*61046927SAndroid Build Coastguard Worker             uint32_t mask = u_bit_consecutive(0, instr->definitions[0].bytes() * 8u);
1745*61046927SAndroid Build Coastguard Worker             uint32_t val = (info.val >> (dst_offset * 8u)) & mask;
1746*61046927SAndroid Build Coastguard Worker             instr->operands[0] =
1747*61046927SAndroid Build Coastguard Worker                Operand::get_const(ctx.program->gfx_level, val, instr->definitions[0].bytes());
1748*61046927SAndroid Build Coastguard Worker             ;
1749*61046927SAndroid Build Coastguard Worker          }
1750*61046927SAndroid Build Coastguard Worker       }
1751*61046927SAndroid Build Coastguard Worker 
1752*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].bytes() != instr->definitions[0].bytes()) {
1753*61046927SAndroid Build Coastguard Worker          if (instr->operands[0].size() != 1 || !instr->operands[0].isTemp())
1754*61046927SAndroid Build Coastguard Worker             break;
1755*61046927SAndroid Build Coastguard Worker 
1756*61046927SAndroid Build Coastguard Worker          if (index == 0)
1757*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1758*61046927SAndroid Build Coastguard Worker          else
1759*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
1760*61046927SAndroid Build Coastguard Worker          break;
1761*61046927SAndroid Build Coastguard Worker       }
1762*61046927SAndroid Build Coastguard Worker 
1763*61046927SAndroid Build Coastguard Worker       /* convert this extract into a copy instruction */
1764*61046927SAndroid Build Coastguard Worker       instr->opcode = aco_opcode::p_parallelcopy;
1765*61046927SAndroid Build Coastguard Worker       instr->operands.pop_back();
1766*61046927SAndroid Build Coastguard Worker       FALLTHROUGH;
1767*61046927SAndroid Build Coastguard Worker    }
1768*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_parallelcopy: /* propagate */
1769*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].isTemp() && ctx.info[instr->operands[0].tempId()].is_vec() &&
1770*61046927SAndroid Build Coastguard Worker           instr->operands[0].regClass() != instr->definitions[0].regClass()) {
1771*61046927SAndroid Build Coastguard Worker          /* We might not be able to copy-propagate if it's a SGPR->VGPR copy, so
1772*61046927SAndroid Build Coastguard Worker           * duplicate the vector instead.
1773*61046927SAndroid Build Coastguard Worker           */
1774*61046927SAndroid Build Coastguard Worker          Instruction* vec = ctx.info[instr->operands[0].tempId()].instr;
1775*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> old_copy = std::move(instr);
1776*61046927SAndroid Build Coastguard Worker 
1777*61046927SAndroid Build Coastguard Worker          instr.reset(create_instruction(aco_opcode::p_create_vector, Format::PSEUDO,
1778*61046927SAndroid Build Coastguard Worker                                         vec->operands.size(), 1));
1779*61046927SAndroid Build Coastguard Worker          instr->definitions[0] = old_copy->definitions[0];
1780*61046927SAndroid Build Coastguard Worker          std::copy(vec->operands.begin(), vec->operands.end(), instr->operands.begin());
1781*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < vec->operands.size(); i++) {
1782*61046927SAndroid Build Coastguard Worker             Operand& op = instr->operands[i];
1783*61046927SAndroid Build Coastguard Worker             if (op.isTemp() && ctx.info[op.tempId()].is_temp() &&
1784*61046927SAndroid Build Coastguard Worker                 ctx.info[op.tempId()].temp.type() == instr->definitions[0].regClass().type())
1785*61046927SAndroid Build Coastguard Worker                op.setTemp(ctx.info[op.tempId()].temp);
1786*61046927SAndroid Build Coastguard Worker          }
1787*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_vec(instr.get());
1788*61046927SAndroid Build Coastguard Worker          break;
1789*61046927SAndroid Build Coastguard Worker       }
1790*61046927SAndroid Build Coastguard Worker       FALLTHROUGH;
1791*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_as_uniform:
1792*61046927SAndroid Build Coastguard Worker       if (instr->definitions[0].isFixed()) {
1793*61046927SAndroid Build Coastguard Worker          /* don't copy-propagate copies into fixed registers */
1794*61046927SAndroid Build Coastguard Worker       } else if (instr->operands[0].isConstant()) {
1795*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_constant(
1796*61046927SAndroid Build Coastguard Worker             ctx.program->gfx_level, instr->operands[0].constantValue64());
1797*61046927SAndroid Build Coastguard Worker       } else if (instr->operands[0].isTemp()) {
1798*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1799*61046927SAndroid Build Coastguard Worker          if (ctx.info[instr->operands[0].tempId()].is_canonicalized())
1800*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[0].tempId()].set_canonicalized();
1801*61046927SAndroid Build Coastguard Worker       } else {
1802*61046927SAndroid Build Coastguard Worker          assert(instr->operands[0].isFixed());
1803*61046927SAndroid Build Coastguard Worker       }
1804*61046927SAndroid Build Coastguard Worker       break;
1805*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mov_b32:
1806*61046927SAndroid Build Coastguard Worker       if (instr->isDPP16()) {
1807*61046927SAndroid Build Coastguard Worker          /* anything else doesn't make sense in SSA */
1808*61046927SAndroid Build Coastguard Worker          assert(instr->dpp16().row_mask == 0xf && instr->dpp16().bank_mask == 0xf);
1809*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_dpp16(instr.get());
1810*61046927SAndroid Build Coastguard Worker       } else if (instr->isDPP8()) {
1811*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_dpp8(instr.get());
1812*61046927SAndroid Build Coastguard Worker       }
1813*61046927SAndroid Build Coastguard Worker       break;
1814*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_is_helper:
1815*61046927SAndroid Build Coastguard Worker       if (!ctx.program->needs_wqm)
1816*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->gfx_level, 0u);
1817*61046927SAndroid Build Coastguard Worker       break;
1818*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_f64_e64:
1819*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_f64: ctx.info[instr->definitions[0].tempId()].set_mul(instr.get()); break;
1820*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_f16:
1821*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_f32:
1822*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_legacy_f32: { /* omod */
1823*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].set_mul(instr.get());
1824*61046927SAndroid Build Coastguard Worker 
1825*61046927SAndroid Build Coastguard Worker       /* TODO: try to move the negate/abs modifier to the consumer instead */
1826*61046927SAndroid Build Coastguard Worker       bool uses_mods = instr->usesModifiers();
1827*61046927SAndroid Build Coastguard Worker       bool fp16 = instr->opcode == aco_opcode::v_mul_f16;
1828*61046927SAndroid Build Coastguard Worker 
1829*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < 2; i++) {
1830*61046927SAndroid Build Coastguard Worker          if (instr->operands[!i].isConstant() && instr->operands[i].isTemp()) {
1831*61046927SAndroid Build Coastguard Worker             if (!instr->isDPP() && !instr->isSDWA() && !instr->valu().opsel &&
1832*61046927SAndroid Build Coastguard Worker                 (instr->operands[!i].constantEquals(fp16 ? 0x3c00 : 0x3f800000) ||   /* 1.0 */
1833*61046927SAndroid Build Coastguard Worker                  instr->operands[!i].constantEquals(fp16 ? 0xbc00 : 0xbf800000u))) { /* -1.0 */
1834*61046927SAndroid Build Coastguard Worker                bool neg1 = instr->operands[!i].constantEquals(fp16 ? 0xbc00 : 0xbf800000u);
1835*61046927SAndroid Build Coastguard Worker 
1836*61046927SAndroid Build Coastguard Worker                VALU_instruction* valu = &instr->valu();
1837*61046927SAndroid Build Coastguard Worker                if (valu->abs[!i] || valu->neg[!i] || valu->omod)
1838*61046927SAndroid Build Coastguard Worker                   continue;
1839*61046927SAndroid Build Coastguard Worker 
1840*61046927SAndroid Build Coastguard Worker                bool abs = valu->abs[i];
1841*61046927SAndroid Build Coastguard Worker                bool neg = neg1 ^ valu->neg[i];
1842*61046927SAndroid Build Coastguard Worker                Temp other = instr->operands[i].getTemp();
1843*61046927SAndroid Build Coastguard Worker 
1844*61046927SAndroid Build Coastguard Worker                if (valu->clamp) {
1845*61046927SAndroid Build Coastguard Worker                   if (!abs && !neg && other.type() == RegType::vgpr)
1846*61046927SAndroid Build Coastguard Worker                      ctx.info[other.id()].set_clamp(instr.get());
1847*61046927SAndroid Build Coastguard Worker                   continue;
1848*61046927SAndroid Build Coastguard Worker                }
1849*61046927SAndroid Build Coastguard Worker 
1850*61046927SAndroid Build Coastguard Worker                if (abs && neg && other.type() == RegType::vgpr)
1851*61046927SAndroid Build Coastguard Worker                   ctx.info[instr->definitions[0].tempId()].set_neg_abs(other);
1852*61046927SAndroid Build Coastguard Worker                else if (abs && !neg && other.type() == RegType::vgpr)
1853*61046927SAndroid Build Coastguard Worker                   ctx.info[instr->definitions[0].tempId()].set_abs(other);
1854*61046927SAndroid Build Coastguard Worker                else if (!abs && neg && other.type() == RegType::vgpr)
1855*61046927SAndroid Build Coastguard Worker                   ctx.info[instr->definitions[0].tempId()].set_neg(other);
1856*61046927SAndroid Build Coastguard Worker                else if (!abs && !neg)
1857*61046927SAndroid Build Coastguard Worker                   ctx.info[instr->definitions[0].tempId()].set_fcanonicalize(other);
1858*61046927SAndroid Build Coastguard Worker             } else if (uses_mods || (instr->definitions[0].isSZPreserve() &&
1859*61046927SAndroid Build Coastguard Worker                                      instr->opcode != aco_opcode::v_mul_legacy_f32)) {
1860*61046927SAndroid Build Coastguard Worker                continue; /* omod uses a legacy multiplication. */
1861*61046927SAndroid Build Coastguard Worker             } else if (instr->operands[!i].constantValue() == 0u &&
1862*61046927SAndroid Build Coastguard Worker                        ((!instr->definitions[0].isNaNPreserve() &&
1863*61046927SAndroid Build Coastguard Worker                          !instr->definitions[0].isInfPreserve()) ||
1864*61046927SAndroid Build Coastguard Worker                         instr->opcode == aco_opcode::v_mul_legacy_f32)) { /* 0.0 */
1865*61046927SAndroid Build Coastguard Worker                ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->gfx_level, 0u);
1866*61046927SAndroid Build Coastguard Worker             } else if ((fp16 ? ctx.fp_mode.denorm16_64 : ctx.fp_mode.denorm32) != fp_denorm_flush) {
1867*61046927SAndroid Build Coastguard Worker                /* omod has no effect if denormals are enabled. */
1868*61046927SAndroid Build Coastguard Worker                continue;
1869*61046927SAndroid Build Coastguard Worker             } else if (instr->operands[!i].constantValue() ==
1870*61046927SAndroid Build Coastguard Worker                        (fp16 ? 0x4000 : 0x40000000)) { /* 2.0 */
1871*61046927SAndroid Build Coastguard Worker                ctx.info[instr->operands[i].tempId()].set_omod2(instr.get());
1872*61046927SAndroid Build Coastguard Worker             } else if (instr->operands[!i].constantValue() ==
1873*61046927SAndroid Build Coastguard Worker                        (fp16 ? 0x4400 : 0x40800000)) { /* 4.0 */
1874*61046927SAndroid Build Coastguard Worker                ctx.info[instr->operands[i].tempId()].set_omod4(instr.get());
1875*61046927SAndroid Build Coastguard Worker             } else if (instr->operands[!i].constantValue() ==
1876*61046927SAndroid Build Coastguard Worker                        (fp16 ? 0x3800 : 0x3f000000)) { /* 0.5 */
1877*61046927SAndroid Build Coastguard Worker                ctx.info[instr->operands[i].tempId()].set_omod5(instr.get());
1878*61046927SAndroid Build Coastguard Worker             } else {
1879*61046927SAndroid Build Coastguard Worker                continue;
1880*61046927SAndroid Build Coastguard Worker             }
1881*61046927SAndroid Build Coastguard Worker             break;
1882*61046927SAndroid Build Coastguard Worker          }
1883*61046927SAndroid Build Coastguard Worker       }
1884*61046927SAndroid Build Coastguard Worker       break;
1885*61046927SAndroid Build Coastguard Worker    }
1886*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_lo_u16:
1887*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_lo_u16_e64:
1888*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_u32_u24:
1889*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
1890*61046927SAndroid Build Coastguard Worker       break;
1891*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_med3_f16:
1892*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_med3_f32: { /* clamp */
1893*61046927SAndroid Build Coastguard Worker       unsigned idx;
1894*61046927SAndroid Build Coastguard Worker       if (detect_clamp(instr.get(), &idx) && !instr->valu().abs && !instr->valu().neg)
1895*61046927SAndroid Build Coastguard Worker          ctx.info[instr->operands[idx].tempId()].set_clamp(instr.get());
1896*61046927SAndroid Build Coastguard Worker       break;
1897*61046927SAndroid Build Coastguard Worker    }
1898*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cndmask_b32:
1899*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].constantEquals(0) && instr->operands[1].constantEquals(0x3f800000u))
1900*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_b2f(instr->operands[2].getTemp());
1901*61046927SAndroid Build Coastguard Worker       else if (instr->operands[0].constantEquals(0) && instr->operands[1].constantEquals(1))
1902*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_b2i(instr->operands[2].getTemp());
1903*61046927SAndroid Build Coastguard Worker 
1904*61046927SAndroid Build Coastguard Worker       break;
1905*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_u32:
1906*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_co_u32:
1907*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_co_u32_e64:
1908*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_add_i32:
1909*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_add_u32:
1910*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subbrev_co_u32:
1911*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_u32:
1912*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_i32:
1913*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_co_u32:
1914*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_co_u32_e64:
1915*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_sub_u32:
1916*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_sub_i32:
1917*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_u32:
1918*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_co_u32:
1919*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_co_u32_e64:
1920*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].set_add_sub(instr.get());
1921*61046927SAndroid Build Coastguard Worker       break;
1922*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_not_b32:
1923*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_not_b64:
1924*61046927SAndroid Build Coastguard Worker       if (!instr->operands[0].isTemp()) {
1925*61046927SAndroid Build Coastguard Worker       } else if (ctx.info[instr->operands[0].tempId()].is_uniform_bool()) {
1926*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise();
1927*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[1].tempId()].set_scc_invert(
1928*61046927SAndroid Build Coastguard Worker             ctx.info[instr->operands[0].tempId()].temp);
1929*61046927SAndroid Build Coastguard Worker       } else if (ctx.info[instr->operands[0].tempId()].is_uniform_bitwise()) {
1930*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise();
1931*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[1].tempId()].set_scc_invert(
1932*61046927SAndroid Build Coastguard Worker             ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
1933*61046927SAndroid Build Coastguard Worker       }
1934*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
1935*61046927SAndroid Build Coastguard Worker       break;
1936*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_and_b32:
1937*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_and_b64:
1938*61046927SAndroid Build Coastguard Worker       if (fixed_to_exec(instr->operands[1]) && instr->operands[0].isTemp()) {
1939*61046927SAndroid Build Coastguard Worker          if (ctx.info[instr->operands[0].tempId()].is_uniform_bool()) {
1940*61046927SAndroid Build Coastguard Worker             /* Try to get rid of the superfluous s_cselect + s_and_b64 that comes from turning a
1941*61046927SAndroid Build Coastguard Worker              * uniform bool into divergent */
1942*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[1].tempId()].set_temp(
1943*61046927SAndroid Build Coastguard Worker                ctx.info[instr->operands[0].tempId()].temp);
1944*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[0].tempId()].set_uniform_bool(
1945*61046927SAndroid Build Coastguard Worker                ctx.info[instr->operands[0].tempId()].temp);
1946*61046927SAndroid Build Coastguard Worker             break;
1947*61046927SAndroid Build Coastguard Worker          } else if (ctx.info[instr->operands[0].tempId()].is_uniform_bitwise()) {
1948*61046927SAndroid Build Coastguard Worker             /* Try to get rid of the superfluous s_and_b64, since the uniform bitwise instruction
1949*61046927SAndroid Build Coastguard Worker              * already produces the same SCC */
1950*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[1].tempId()].set_temp(
1951*61046927SAndroid Build Coastguard Worker                ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
1952*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[0].tempId()].set_uniform_bool(
1953*61046927SAndroid Build Coastguard Worker                ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
1954*61046927SAndroid Build Coastguard Worker             break;
1955*61046927SAndroid Build Coastguard Worker          } else if ((ctx.program->stage.num_sw_stages() > 1 ||
1956*61046927SAndroid Build Coastguard Worker                      ctx.program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) &&
1957*61046927SAndroid Build Coastguard Worker                     instr->pass_flags == 1) {
1958*61046927SAndroid Build Coastguard Worker             /* In case of merged shaders, pass_flags=1 means that all lanes are active (exec=-1), so
1959*61046927SAndroid Build Coastguard Worker              * s_and is unnecessary. */
1960*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
1961*61046927SAndroid Build Coastguard Worker             break;
1962*61046927SAndroid Build Coastguard Worker          }
1963*61046927SAndroid Build Coastguard Worker       }
1964*61046927SAndroid Build Coastguard Worker       FALLTHROUGH;
1965*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_or_b32:
1966*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_or_b64:
1967*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_xor_b32:
1968*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_xor_b64:
1969*61046927SAndroid Build Coastguard Worker       if (std::all_of(instr->operands.begin(), instr->operands.end(),
1970*61046927SAndroid Build Coastguard Worker                       [&ctx](const Operand& op)
1971*61046927SAndroid Build Coastguard Worker                       {
1972*61046927SAndroid Build Coastguard Worker                          return op.isTemp() && (ctx.info[op.tempId()].is_uniform_bool() ||
1973*61046927SAndroid Build Coastguard Worker                                                 ctx.info[op.tempId()].is_uniform_bitwise());
1974*61046927SAndroid Build Coastguard Worker                       })) {
1975*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise();
1976*61046927SAndroid Build Coastguard Worker       }
1977*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
1978*61046927SAndroid Build Coastguard Worker       break;
1979*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_lshl_b32:
1980*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_or_b32:
1981*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_lshlrev_b32:
1982*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_bcnt_u32_b32:
1983*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_and_b32:
1984*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_xor_b32:
1985*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_not_b32:
1986*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
1987*61046927SAndroid Build Coastguard Worker       break;
1988*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_f32:
1989*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_f16:
1990*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_u32:
1991*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_i32:
1992*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_u16:
1993*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_i16:
1994*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_u16_e64:
1995*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_i16_e64:
1996*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_f32:
1997*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_f16:
1998*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_u32:
1999*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_i32:
2000*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_u16:
2001*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_i16:
2002*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_u16_e64:
2003*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_i16_e64:
2004*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].set_minmax(instr.get());
2005*61046927SAndroid Build Coastguard Worker       break;
2006*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_cselect_b64:
2007*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_cselect_b32:
2008*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].constantEquals((unsigned)-1) && instr->operands[1].constantEquals(0)) {
2009*61046927SAndroid Build Coastguard Worker          /* Found a cselect that operates on a uniform bool that comes from eg. s_cmp */
2010*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_uniform_bool(instr->operands[2].getTemp());
2011*61046927SAndroid Build Coastguard Worker       }
2012*61046927SAndroid Build Coastguard Worker       if (instr->operands[2].isTemp() && ctx.info[instr->operands[2].tempId()].is_scc_invert()) {
2013*61046927SAndroid Build Coastguard Worker          /* Flip the operands to get rid of the scc_invert instruction */
2014*61046927SAndroid Build Coastguard Worker          std::swap(instr->operands[0], instr->operands[1]);
2015*61046927SAndroid Build Coastguard Worker          instr->operands[2].setTemp(ctx.info[instr->operands[2].tempId()].temp);
2016*61046927SAndroid Build Coastguard Worker       }
2017*61046927SAndroid Build Coastguard Worker       break;
2018*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_mul_i32:
2019*61046927SAndroid Build Coastguard Worker       /* Testing every uint32_t shows that 0x3f800000*n is never a denormal.
2020*61046927SAndroid Build Coastguard Worker        * This pattern is created from a uniform nir_op_b2f. */
2021*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].constantEquals(0x3f800000u))
2022*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_canonicalized();
2023*61046927SAndroid Build Coastguard Worker       break;
2024*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_extract: {
2025*61046927SAndroid Build Coastguard Worker       if (instr->definitions[0].bytes() == 4 && instr->operands[0].isTemp()) {
2026*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
2027*61046927SAndroid Build Coastguard Worker          if (instr->operands[0].regClass() == v1 && parse_insert(instr.get()))
2028*61046927SAndroid Build Coastguard Worker             ctx.info[instr->operands[0].tempId()].set_insert(instr.get());
2029*61046927SAndroid Build Coastguard Worker       }
2030*61046927SAndroid Build Coastguard Worker       break;
2031*61046927SAndroid Build Coastguard Worker    }
2032*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_insert: {
2033*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].bytes() == 4 && instr->operands[0].isTemp()) {
2034*61046927SAndroid Build Coastguard Worker          if (instr->operands[0].regClass() == v1)
2035*61046927SAndroid Build Coastguard Worker             ctx.info[instr->operands[0].tempId()].set_insert(instr.get());
2036*61046927SAndroid Build Coastguard Worker          if (parse_extract(instr.get()))
2037*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
2038*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
2039*61046927SAndroid Build Coastguard Worker       }
2040*61046927SAndroid Build Coastguard Worker       break;
2041*61046927SAndroid Build Coastguard Worker    }
2042*61046927SAndroid Build Coastguard Worker    case aco_opcode::ds_read_u8:
2043*61046927SAndroid Build Coastguard Worker    case aco_opcode::ds_read_u8_d16:
2044*61046927SAndroid Build Coastguard Worker    case aco_opcode::ds_read_u16:
2045*61046927SAndroid Build Coastguard Worker    case aco_opcode::ds_read_u16_d16: {
2046*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
2047*61046927SAndroid Build Coastguard Worker       break;
2048*61046927SAndroid Build Coastguard Worker    }
2049*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_f16_f32: {
2050*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].isTemp()) {
2051*61046927SAndroid Build Coastguard Worker          ssa_info& info = ctx.info[instr->operands[0].tempId()];
2052*61046927SAndroid Build Coastguard Worker          if (!info.is_dpp() || info.instr->pass_flags != instr->pass_flags)
2053*61046927SAndroid Build Coastguard Worker             info.set_f2f16(instr.get());
2054*61046927SAndroid Build Coastguard Worker       }
2055*61046927SAndroid Build Coastguard Worker       break;
2056*61046927SAndroid Build Coastguard Worker    }
2057*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_f32_f16: {
2058*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].isTemp())
2059*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_f2f32(instr.get());
2060*61046927SAndroid Build Coastguard Worker       break;
2061*61046927SAndroid Build Coastguard Worker    }
2062*61046927SAndroid Build Coastguard Worker    default: break;
2063*61046927SAndroid Build Coastguard Worker    }
2064*61046927SAndroid Build Coastguard Worker 
2065*61046927SAndroid Build Coastguard Worker    /* Don't remove label_extract if we can't apply the extract to
2066*61046927SAndroid Build Coastguard Worker     * neg/abs instructions because we'll likely combine it into another valu. */
2067*61046927SAndroid Build Coastguard Worker    if (!(ctx.info[instr->definitions[0].tempId()].label & (label_neg | label_abs)))
2068*61046927SAndroid Build Coastguard Worker       check_sdwa_extract(ctx, instr);
2069*61046927SAndroid Build Coastguard Worker }
2070*61046927SAndroid Build Coastguard Worker 
2071*61046927SAndroid Build Coastguard Worker unsigned
original_temp_id(opt_ctx & ctx,Temp tmp)2072*61046927SAndroid Build Coastguard Worker original_temp_id(opt_ctx& ctx, Temp tmp)
2073*61046927SAndroid Build Coastguard Worker {
2074*61046927SAndroid Build Coastguard Worker    if (ctx.info[tmp.id()].is_temp())
2075*61046927SAndroid Build Coastguard Worker       return ctx.info[tmp.id()].temp.id();
2076*61046927SAndroid Build Coastguard Worker    else
2077*61046927SAndroid Build Coastguard Worker       return tmp.id();
2078*61046927SAndroid Build Coastguard Worker }
2079*61046927SAndroid Build Coastguard Worker 
2080*61046927SAndroid Build Coastguard Worker void
decrease_op_uses_if_dead(opt_ctx & ctx,Instruction * instr)2081*61046927SAndroid Build Coastguard Worker decrease_op_uses_if_dead(opt_ctx& ctx, Instruction* instr)
2082*61046927SAndroid Build Coastguard Worker {
2083*61046927SAndroid Build Coastguard Worker    if (is_dead(ctx.uses, instr)) {
2084*61046927SAndroid Build Coastguard Worker       for (const Operand& op : instr->operands) {
2085*61046927SAndroid Build Coastguard Worker          if (op.isTemp())
2086*61046927SAndroid Build Coastguard Worker             ctx.uses[op.tempId()]--;
2087*61046927SAndroid Build Coastguard Worker       }
2088*61046927SAndroid Build Coastguard Worker    }
2089*61046927SAndroid Build Coastguard Worker }
2090*61046927SAndroid Build Coastguard Worker 
2091*61046927SAndroid Build Coastguard Worker void
decrease_uses(opt_ctx & ctx,Instruction * instr)2092*61046927SAndroid Build Coastguard Worker decrease_uses(opt_ctx& ctx, Instruction* instr)
2093*61046927SAndroid Build Coastguard Worker {
2094*61046927SAndroid Build Coastguard Worker    ctx.uses[instr->definitions[0].tempId()]--;
2095*61046927SAndroid Build Coastguard Worker    decrease_op_uses_if_dead(ctx, instr);
2096*61046927SAndroid Build Coastguard Worker }
2097*61046927SAndroid Build Coastguard Worker 
2098*61046927SAndroid Build Coastguard Worker Operand
copy_operand(opt_ctx & ctx,Operand op)2099*61046927SAndroid Build Coastguard Worker copy_operand(opt_ctx& ctx, Operand op)
2100*61046927SAndroid Build Coastguard Worker {
2101*61046927SAndroid Build Coastguard Worker    if (op.isTemp())
2102*61046927SAndroid Build Coastguard Worker       ctx.uses[op.tempId()]++;
2103*61046927SAndroid Build Coastguard Worker    return op;
2104*61046927SAndroid Build Coastguard Worker }
2105*61046927SAndroid Build Coastguard Worker 
2106*61046927SAndroid Build Coastguard Worker Instruction*
follow_operand(opt_ctx & ctx,Operand op,bool ignore_uses=false)2107*61046927SAndroid Build Coastguard Worker follow_operand(opt_ctx& ctx, Operand op, bool ignore_uses = false)
2108*61046927SAndroid Build Coastguard Worker {
2109*61046927SAndroid Build Coastguard Worker    if (!op.isTemp() || !(ctx.info[op.tempId()].label & instr_usedef_labels))
2110*61046927SAndroid Build Coastguard Worker       return nullptr;
2111*61046927SAndroid Build Coastguard Worker    if (!ignore_uses && ctx.uses[op.tempId()] > 1)
2112*61046927SAndroid Build Coastguard Worker       return nullptr;
2113*61046927SAndroid Build Coastguard Worker 
2114*61046927SAndroid Build Coastguard Worker    Instruction* instr = ctx.info[op.tempId()].instr;
2115*61046927SAndroid Build Coastguard Worker 
2116*61046927SAndroid Build Coastguard Worker    if (instr->definitions.size() == 2) {
2117*61046927SAndroid Build Coastguard Worker       unsigned idx = ctx.info[op.tempId()].label & label_split ? 1 : 0;
2118*61046927SAndroid Build Coastguard Worker       assert(instr->definitions[idx].isTemp() && instr->definitions[idx].tempId() == op.tempId());
2119*61046927SAndroid Build Coastguard Worker       if (instr->definitions[!idx].isTemp() && ctx.uses[instr->definitions[!idx].tempId()])
2120*61046927SAndroid Build Coastguard Worker          return nullptr;
2121*61046927SAndroid Build Coastguard Worker    }
2122*61046927SAndroid Build Coastguard Worker 
2123*61046927SAndroid Build Coastguard Worker    for (Operand& operand : instr->operands) {
2124*61046927SAndroid Build Coastguard Worker       if (fixed_to_exec(operand))
2125*61046927SAndroid Build Coastguard Worker          return nullptr;
2126*61046927SAndroid Build Coastguard Worker    }
2127*61046927SAndroid Build Coastguard Worker 
2128*61046927SAndroid Build Coastguard Worker    return instr;
2129*61046927SAndroid Build Coastguard Worker }
2130*61046927SAndroid Build Coastguard Worker 
2131*61046927SAndroid Build Coastguard Worker bool
is_operand_constant(opt_ctx & ctx,Operand op,unsigned bit_size,uint64_t * value)2132*61046927SAndroid Build Coastguard Worker is_operand_constant(opt_ctx& ctx, Operand op, unsigned bit_size, uint64_t* value)
2133*61046927SAndroid Build Coastguard Worker {
2134*61046927SAndroid Build Coastguard Worker    if (op.isConstant()) {
2135*61046927SAndroid Build Coastguard Worker       *value = op.constantValue64();
2136*61046927SAndroid Build Coastguard Worker       return true;
2137*61046927SAndroid Build Coastguard Worker    } else if (op.isTemp()) {
2138*61046927SAndroid Build Coastguard Worker       unsigned id = original_temp_id(ctx, op.getTemp());
2139*61046927SAndroid Build Coastguard Worker       if (!ctx.info[id].is_constant_or_literal(bit_size))
2140*61046927SAndroid Build Coastguard Worker          return false;
2141*61046927SAndroid Build Coastguard Worker       *value = get_constant_op(ctx, ctx.info[id], bit_size).constantValue64();
2142*61046927SAndroid Build Coastguard Worker       return true;
2143*61046927SAndroid Build Coastguard Worker    }
2144*61046927SAndroid Build Coastguard Worker    return false;
2145*61046927SAndroid Build Coastguard Worker }
2146*61046927SAndroid Build Coastguard Worker 
2147*61046927SAndroid Build Coastguard Worker /* s_not(cmp(a, b)) -> get_vcmp_inverse(cmp)(a, b) */
2148*61046927SAndroid Build Coastguard Worker bool
combine_inverse_comparison(opt_ctx & ctx,aco_ptr<Instruction> & instr)2149*61046927SAndroid Build Coastguard Worker combine_inverse_comparison(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2150*61046927SAndroid Build Coastguard Worker {
2151*61046927SAndroid Build Coastguard Worker    if (ctx.uses[instr->definitions[1].tempId()])
2152*61046927SAndroid Build Coastguard Worker       return false;
2153*61046927SAndroid Build Coastguard Worker    if (!instr->operands[0].isTemp() || ctx.uses[instr->operands[0].tempId()] != 1)
2154*61046927SAndroid Build Coastguard Worker       return false;
2155*61046927SAndroid Build Coastguard Worker 
2156*61046927SAndroid Build Coastguard Worker    Instruction* cmp = follow_operand(ctx, instr->operands[0]);
2157*61046927SAndroid Build Coastguard Worker    if (!cmp)
2158*61046927SAndroid Build Coastguard Worker       return false;
2159*61046927SAndroid Build Coastguard Worker 
2160*61046927SAndroid Build Coastguard Worker    aco_opcode new_opcode = get_vcmp_inverse(cmp->opcode);
2161*61046927SAndroid Build Coastguard Worker    if (new_opcode == aco_opcode::num_opcodes)
2162*61046927SAndroid Build Coastguard Worker       return false;
2163*61046927SAndroid Build Coastguard Worker 
2164*61046927SAndroid Build Coastguard Worker    /* Invert compare instruction and assign this instruction's definition */
2165*61046927SAndroid Build Coastguard Worker    cmp->opcode = new_opcode;
2166*61046927SAndroid Build Coastguard Worker    ctx.info[instr->definitions[0].tempId()] = ctx.info[cmp->definitions[0].tempId()];
2167*61046927SAndroid Build Coastguard Worker    std::swap(instr->definitions[0], cmp->definitions[0]);
2168*61046927SAndroid Build Coastguard Worker 
2169*61046927SAndroid Build Coastguard Worker    ctx.uses[instr->operands[0].tempId()]--;
2170*61046927SAndroid Build Coastguard Worker    return true;
2171*61046927SAndroid Build Coastguard Worker }
2172*61046927SAndroid Build Coastguard Worker 
2173*61046927SAndroid Build Coastguard Worker /* op1(op2(1, 2), 0) if swap = false
2174*61046927SAndroid Build Coastguard Worker  * op1(0, op2(1, 2)) if swap = true */
2175*61046927SAndroid Build Coastguard Worker bool
match_op3_for_vop3(opt_ctx & ctx,aco_opcode op1,aco_opcode op2,Instruction * op1_instr,bool swap,const char * shuffle_str,Operand operands[3],bitarray8 & neg,bitarray8 & abs,bitarray8 & opsel,bool * op1_clamp,uint8_t * op1_omod,bool * inbetween_neg,bool * inbetween_abs,bool * inbetween_opsel,bool * precise)2176*61046927SAndroid Build Coastguard Worker match_op3_for_vop3(opt_ctx& ctx, aco_opcode op1, aco_opcode op2, Instruction* op1_instr, bool swap,
2177*61046927SAndroid Build Coastguard Worker                    const char* shuffle_str, Operand operands[3], bitarray8& neg, bitarray8& abs,
2178*61046927SAndroid Build Coastguard Worker                    bitarray8& opsel, bool* op1_clamp, uint8_t* op1_omod, bool* inbetween_neg,
2179*61046927SAndroid Build Coastguard Worker                    bool* inbetween_abs, bool* inbetween_opsel, bool* precise)
2180*61046927SAndroid Build Coastguard Worker {
2181*61046927SAndroid Build Coastguard Worker    /* checks */
2182*61046927SAndroid Build Coastguard Worker    if (op1_instr->opcode != op1)
2183*61046927SAndroid Build Coastguard Worker       return false;
2184*61046927SAndroid Build Coastguard Worker 
2185*61046927SAndroid Build Coastguard Worker    Instruction* op2_instr = follow_operand(ctx, op1_instr->operands[swap]);
2186*61046927SAndroid Build Coastguard Worker    if (!op2_instr || op2_instr->opcode != op2)
2187*61046927SAndroid Build Coastguard Worker       return false;
2188*61046927SAndroid Build Coastguard Worker 
2189*61046927SAndroid Build Coastguard Worker    VALU_instruction* op1_valu = op1_instr->isVALU() ? &op1_instr->valu() : NULL;
2190*61046927SAndroid Build Coastguard Worker    VALU_instruction* op2_valu = op2_instr->isVALU() ? &op2_instr->valu() : NULL;
2191*61046927SAndroid Build Coastguard Worker 
2192*61046927SAndroid Build Coastguard Worker    if (op1_instr->isSDWA() || op2_instr->isSDWA())
2193*61046927SAndroid Build Coastguard Worker       return false;
2194*61046927SAndroid Build Coastguard Worker    if (op1_instr->isDPP() || op2_instr->isDPP())
2195*61046927SAndroid Build Coastguard Worker       return false;
2196*61046927SAndroid Build Coastguard Worker 
2197*61046927SAndroid Build Coastguard Worker    /* don't support inbetween clamp/omod */
2198*61046927SAndroid Build Coastguard Worker    if (op2_valu && (op2_valu->clamp || op2_valu->omod))
2199*61046927SAndroid Build Coastguard Worker       return false;
2200*61046927SAndroid Build Coastguard Worker 
2201*61046927SAndroid Build Coastguard Worker    /* get operands and modifiers and check inbetween modifiers */
2202*61046927SAndroid Build Coastguard Worker    *op1_clamp = op1_valu ? (bool)op1_valu->clamp : false;
2203*61046927SAndroid Build Coastguard Worker    *op1_omod = op1_valu ? (unsigned)op1_valu->omod : 0u;
2204*61046927SAndroid Build Coastguard Worker 
2205*61046927SAndroid Build Coastguard Worker    if (inbetween_neg)
2206*61046927SAndroid Build Coastguard Worker       *inbetween_neg = op1_valu ? op1_valu->neg[swap] : false;
2207*61046927SAndroid Build Coastguard Worker    else if (op1_valu && op1_valu->neg[swap])
2208*61046927SAndroid Build Coastguard Worker       return false;
2209*61046927SAndroid Build Coastguard Worker 
2210*61046927SAndroid Build Coastguard Worker    if (inbetween_abs)
2211*61046927SAndroid Build Coastguard Worker       *inbetween_abs = op1_valu ? op1_valu->abs[swap] : false;
2212*61046927SAndroid Build Coastguard Worker    else if (op1_valu && op1_valu->abs[swap])
2213*61046927SAndroid Build Coastguard Worker       return false;
2214*61046927SAndroid Build Coastguard Worker 
2215*61046927SAndroid Build Coastguard Worker    if (inbetween_opsel)
2216*61046927SAndroid Build Coastguard Worker       *inbetween_opsel = op1_valu ? op1_valu->opsel[swap] : false;
2217*61046927SAndroid Build Coastguard Worker    else if (op1_valu && op1_valu->opsel[swap])
2218*61046927SAndroid Build Coastguard Worker       return false;
2219*61046927SAndroid Build Coastguard Worker 
2220*61046927SAndroid Build Coastguard Worker    *precise = op1_instr->definitions[0].isPrecise() || op2_instr->definitions[0].isPrecise();
2221*61046927SAndroid Build Coastguard Worker 
2222*61046927SAndroid Build Coastguard Worker    int shuffle[3];
2223*61046927SAndroid Build Coastguard Worker    shuffle[shuffle_str[0] - '0'] = 0;
2224*61046927SAndroid Build Coastguard Worker    shuffle[shuffle_str[1] - '0'] = 1;
2225*61046927SAndroid Build Coastguard Worker    shuffle[shuffle_str[2] - '0'] = 2;
2226*61046927SAndroid Build Coastguard Worker 
2227*61046927SAndroid Build Coastguard Worker    operands[shuffle[0]] = op1_instr->operands[!swap];
2228*61046927SAndroid Build Coastguard Worker    neg[shuffle[0]] = op1_valu ? op1_valu->neg[!swap] : false;
2229*61046927SAndroid Build Coastguard Worker    abs[shuffle[0]] = op1_valu ? op1_valu->abs[!swap] : false;
2230*61046927SAndroid Build Coastguard Worker    opsel[shuffle[0]] = op1_valu ? op1_valu->opsel[!swap] : false;
2231*61046927SAndroid Build Coastguard Worker 
2232*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 2; i++) {
2233*61046927SAndroid Build Coastguard Worker       operands[shuffle[i + 1]] = op2_instr->operands[i];
2234*61046927SAndroid Build Coastguard Worker       neg[shuffle[i + 1]] = op2_valu ? op2_valu->neg[i] : false;
2235*61046927SAndroid Build Coastguard Worker       abs[shuffle[i + 1]] = op2_valu ? op2_valu->abs[i] : false;
2236*61046927SAndroid Build Coastguard Worker       opsel[shuffle[i + 1]] = op2_valu ? op2_valu->opsel[i] : false;
2237*61046927SAndroid Build Coastguard Worker    }
2238*61046927SAndroid Build Coastguard Worker 
2239*61046927SAndroid Build Coastguard Worker    /* check operands */
2240*61046927SAndroid Build Coastguard Worker    if (!check_vop3_operands(ctx, 3, operands))
2241*61046927SAndroid Build Coastguard Worker       return false;
2242*61046927SAndroid Build Coastguard Worker 
2243*61046927SAndroid Build Coastguard Worker    return true;
2244*61046927SAndroid Build Coastguard Worker }
2245*61046927SAndroid Build Coastguard Worker 
2246*61046927SAndroid Build Coastguard Worker void
create_vop3_for_op3(opt_ctx & ctx,aco_opcode opcode,aco_ptr<Instruction> & instr,Operand operands[3],uint8_t neg,uint8_t abs,uint8_t opsel,bool clamp,unsigned omod)2247*61046927SAndroid Build Coastguard Worker create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& instr,
2248*61046927SAndroid Build Coastguard Worker                     Operand operands[3], uint8_t neg, uint8_t abs, uint8_t opsel, bool clamp,
2249*61046927SAndroid Build Coastguard Worker                     unsigned omod)
2250*61046927SAndroid Build Coastguard Worker {
2251*61046927SAndroid Build Coastguard Worker    Instruction* new_instr = create_instruction(opcode, Format::VOP3, 3, 1);
2252*61046927SAndroid Build Coastguard Worker    new_instr->valu().neg = neg;
2253*61046927SAndroid Build Coastguard Worker    new_instr->valu().abs = abs;
2254*61046927SAndroid Build Coastguard Worker    new_instr->valu().clamp = clamp;
2255*61046927SAndroid Build Coastguard Worker    new_instr->valu().omod = omod;
2256*61046927SAndroid Build Coastguard Worker    new_instr->valu().opsel = opsel;
2257*61046927SAndroid Build Coastguard Worker    new_instr->operands[0] = operands[0];
2258*61046927SAndroid Build Coastguard Worker    new_instr->operands[1] = operands[1];
2259*61046927SAndroid Build Coastguard Worker    new_instr->operands[2] = operands[2];
2260*61046927SAndroid Build Coastguard Worker    new_instr->definitions[0] = instr->definitions[0];
2261*61046927SAndroid Build Coastguard Worker    new_instr->pass_flags = instr->pass_flags;
2262*61046927SAndroid Build Coastguard Worker    ctx.info[instr->definitions[0].tempId()].label = 0;
2263*61046927SAndroid Build Coastguard Worker 
2264*61046927SAndroid Build Coastguard Worker    instr.reset(new_instr);
2265*61046927SAndroid Build Coastguard Worker }
2266*61046927SAndroid Build Coastguard Worker 
2267*61046927SAndroid Build Coastguard Worker bool
combine_three_valu_op(opt_ctx & ctx,aco_ptr<Instruction> & instr,aco_opcode op2,aco_opcode new_op,const char * shuffle,uint8_t ops)2268*61046927SAndroid Build Coastguard Worker combine_three_valu_op(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode op2, aco_opcode new_op,
2269*61046927SAndroid Build Coastguard Worker                       const char* shuffle, uint8_t ops)
2270*61046927SAndroid Build Coastguard Worker {
2271*61046927SAndroid Build Coastguard Worker    for (unsigned swap = 0; swap < 2; swap++) {
2272*61046927SAndroid Build Coastguard Worker       if (!((1 << swap) & ops))
2273*61046927SAndroid Build Coastguard Worker          continue;
2274*61046927SAndroid Build Coastguard Worker 
2275*61046927SAndroid Build Coastguard Worker       Operand operands[3];
2276*61046927SAndroid Build Coastguard Worker       bool clamp, precise;
2277*61046927SAndroid Build Coastguard Worker       bitarray8 neg = 0, abs = 0, opsel = 0;
2278*61046927SAndroid Build Coastguard Worker       uint8_t omod = 0;
2279*61046927SAndroid Build Coastguard Worker       if (match_op3_for_vop3(ctx, instr->opcode, op2, instr.get(), swap, shuffle, operands, neg,
2280*61046927SAndroid Build Coastguard Worker                              abs, opsel, &clamp, &omod, NULL, NULL, NULL, &precise)) {
2281*61046927SAndroid Build Coastguard Worker          ctx.uses[instr->operands[swap].tempId()]--;
2282*61046927SAndroid Build Coastguard Worker          create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod);
2283*61046927SAndroid Build Coastguard Worker          return true;
2284*61046927SAndroid Build Coastguard Worker       }
2285*61046927SAndroid Build Coastguard Worker    }
2286*61046927SAndroid Build Coastguard Worker    return false;
2287*61046927SAndroid Build Coastguard Worker }
2288*61046927SAndroid Build Coastguard Worker 
2289*61046927SAndroid Build Coastguard Worker /* creates v_lshl_add_u32, v_lshl_or_b32 or v_and_or_b32 */
2290*61046927SAndroid Build Coastguard Worker bool
combine_add_or_then_and_lshl(opt_ctx & ctx,aco_ptr<Instruction> & instr)2291*61046927SAndroid Build Coastguard Worker combine_add_or_then_and_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2292*61046927SAndroid Build Coastguard Worker {
2293*61046927SAndroid Build Coastguard Worker    bool is_or = instr->opcode == aco_opcode::v_or_b32;
2294*61046927SAndroid Build Coastguard Worker    aco_opcode new_op_lshl = is_or ? aco_opcode::v_lshl_or_b32 : aco_opcode::v_lshl_add_u32;
2295*61046927SAndroid Build Coastguard Worker 
2296*61046927SAndroid Build Coastguard Worker    if (is_or && combine_three_valu_op(ctx, instr, aco_opcode::s_and_b32, aco_opcode::v_and_or_b32,
2297*61046927SAndroid Build Coastguard Worker                                       "120", 1 | 2))
2298*61046927SAndroid Build Coastguard Worker       return true;
2299*61046927SAndroid Build Coastguard Worker    if (is_or && combine_three_valu_op(ctx, instr, aco_opcode::v_and_b32, aco_opcode::v_and_or_b32,
2300*61046927SAndroid Build Coastguard Worker                                       "120", 1 | 2))
2301*61046927SAndroid Build Coastguard Worker       return true;
2302*61046927SAndroid Build Coastguard Worker    if (combine_three_valu_op(ctx, instr, aco_opcode::s_lshl_b32, new_op_lshl, "120", 1 | 2))
2303*61046927SAndroid Build Coastguard Worker       return true;
2304*61046927SAndroid Build Coastguard Worker    if (combine_three_valu_op(ctx, instr, aco_opcode::v_lshlrev_b32, new_op_lshl, "210", 1 | 2))
2305*61046927SAndroid Build Coastguard Worker       return true;
2306*61046927SAndroid Build Coastguard Worker 
2307*61046927SAndroid Build Coastguard Worker    if (instr->isSDWA() || instr->isDPP())
2308*61046927SAndroid Build Coastguard Worker       return false;
2309*61046927SAndroid Build Coastguard Worker 
2310*61046927SAndroid Build Coastguard Worker    /* v_or_b32(p_extract(a, 0, 8/16, 0), b) -> v_and_or_b32(a, 0xff/0xffff, b)
2311*61046927SAndroid Build Coastguard Worker     * v_or_b32(p_insert(a, 0, 8/16), b) -> v_and_or_b32(a, 0xff/0xffff, b)
2312*61046927SAndroid Build Coastguard Worker     * v_or_b32(p_insert(a, 24/16, 8/16), b) -> v_lshl_or_b32(a, 24/16, b)
2313*61046927SAndroid Build Coastguard Worker     * v_add_u32(p_insert(a, 24/16, 8/16), b) -> v_lshl_add_b32(a, 24/16, b)
2314*61046927SAndroid Build Coastguard Worker     */
2315*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 2; i++) {
2316*61046927SAndroid Build Coastguard Worker       Instruction* extins = follow_operand(ctx, instr->operands[i]);
2317*61046927SAndroid Build Coastguard Worker       if (!extins)
2318*61046927SAndroid Build Coastguard Worker          continue;
2319*61046927SAndroid Build Coastguard Worker 
2320*61046927SAndroid Build Coastguard Worker       aco_opcode op;
2321*61046927SAndroid Build Coastguard Worker       Operand operands[3];
2322*61046927SAndroid Build Coastguard Worker 
2323*61046927SAndroid Build Coastguard Worker       if (extins->opcode == aco_opcode::p_insert &&
2324*61046927SAndroid Build Coastguard Worker           (extins->operands[1].constantValue() + 1) * extins->operands[2].constantValue() == 32) {
2325*61046927SAndroid Build Coastguard Worker          op = new_op_lshl;
2326*61046927SAndroid Build Coastguard Worker          operands[1] =
2327*61046927SAndroid Build Coastguard Worker             Operand::c32(extins->operands[1].constantValue() * extins->operands[2].constantValue());
2328*61046927SAndroid Build Coastguard Worker       } else if (is_or &&
2329*61046927SAndroid Build Coastguard Worker                  (extins->opcode == aco_opcode::p_insert ||
2330*61046927SAndroid Build Coastguard Worker                   (extins->opcode == aco_opcode::p_extract &&
2331*61046927SAndroid Build Coastguard Worker                    extins->operands[3].constantEquals(0))) &&
2332*61046927SAndroid Build Coastguard Worker                  extins->operands[1].constantEquals(0)) {
2333*61046927SAndroid Build Coastguard Worker          op = aco_opcode::v_and_or_b32;
2334*61046927SAndroid Build Coastguard Worker          operands[1] = Operand::c32(extins->operands[2].constantEquals(8) ? 0xffu : 0xffffu);
2335*61046927SAndroid Build Coastguard Worker       } else {
2336*61046927SAndroid Build Coastguard Worker          continue;
2337*61046927SAndroid Build Coastguard Worker       }
2338*61046927SAndroid Build Coastguard Worker 
2339*61046927SAndroid Build Coastguard Worker       operands[0] = extins->operands[0];
2340*61046927SAndroid Build Coastguard Worker       operands[2] = instr->operands[!i];
2341*61046927SAndroid Build Coastguard Worker 
2342*61046927SAndroid Build Coastguard Worker       if (!check_vop3_operands(ctx, 3, operands))
2343*61046927SAndroid Build Coastguard Worker          continue;
2344*61046927SAndroid Build Coastguard Worker 
2345*61046927SAndroid Build Coastguard Worker       uint8_t neg = 0, abs = 0, opsel = 0, omod = 0;
2346*61046927SAndroid Build Coastguard Worker       bool clamp = false;
2347*61046927SAndroid Build Coastguard Worker       if (instr->isVOP3())
2348*61046927SAndroid Build Coastguard Worker          clamp = instr->valu().clamp;
2349*61046927SAndroid Build Coastguard Worker 
2350*61046927SAndroid Build Coastguard Worker       ctx.uses[instr->operands[i].tempId()]--;
2351*61046927SAndroid Build Coastguard Worker       create_vop3_for_op3(ctx, op, instr, operands, neg, abs, opsel, clamp, omod);
2352*61046927SAndroid Build Coastguard Worker       return true;
2353*61046927SAndroid Build Coastguard Worker    }
2354*61046927SAndroid Build Coastguard Worker 
2355*61046927SAndroid Build Coastguard Worker    return false;
2356*61046927SAndroid Build Coastguard Worker }
2357*61046927SAndroid Build Coastguard Worker 
2358*61046927SAndroid Build Coastguard Worker /* v_xor(a, s_not(b)) -> v_xnor(a, b)
2359*61046927SAndroid Build Coastguard Worker  * v_xor(a, v_not(b)) -> v_xnor(a, b)
2360*61046927SAndroid Build Coastguard Worker  */
2361*61046927SAndroid Build Coastguard Worker bool
combine_xor_not(opt_ctx & ctx,aco_ptr<Instruction> & instr)2362*61046927SAndroid Build Coastguard Worker combine_xor_not(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2363*61046927SAndroid Build Coastguard Worker {
2364*61046927SAndroid Build Coastguard Worker    if (instr->usesModifiers())
2365*61046927SAndroid Build Coastguard Worker       return false;
2366*61046927SAndroid Build Coastguard Worker 
2367*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 2; i++) {
2368*61046927SAndroid Build Coastguard Worker       Instruction* op_instr = follow_operand(ctx, instr->operands[i], true);
2369*61046927SAndroid Build Coastguard Worker       if (!op_instr ||
2370*61046927SAndroid Build Coastguard Worker           (op_instr->opcode != aco_opcode::v_not_b32 &&
2371*61046927SAndroid Build Coastguard Worker            op_instr->opcode != aco_opcode::s_not_b32) ||
2372*61046927SAndroid Build Coastguard Worker           op_instr->usesModifiers() || op_instr->operands[0].isLiteral())
2373*61046927SAndroid Build Coastguard Worker          continue;
2374*61046927SAndroid Build Coastguard Worker 
2375*61046927SAndroid Build Coastguard Worker       instr->opcode = aco_opcode::v_xnor_b32;
2376*61046927SAndroid Build Coastguard Worker       instr->operands[i] = copy_operand(ctx, op_instr->operands[0]);
2377*61046927SAndroid Build Coastguard Worker       decrease_uses(ctx, op_instr);
2378*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].isOfType(RegType::vgpr))
2379*61046927SAndroid Build Coastguard Worker          std::swap(instr->operands[0], instr->operands[1]);
2380*61046927SAndroid Build Coastguard Worker       if (!instr->operands[1].isOfType(RegType::vgpr))
2381*61046927SAndroid Build Coastguard Worker          instr->format = asVOP3(instr->format);
2382*61046927SAndroid Build Coastguard Worker 
2383*61046927SAndroid Build Coastguard Worker       return true;
2384*61046927SAndroid Build Coastguard Worker    }
2385*61046927SAndroid Build Coastguard Worker 
2386*61046927SAndroid Build Coastguard Worker    return false;
2387*61046927SAndroid Build Coastguard Worker }
2388*61046927SAndroid Build Coastguard Worker 
2389*61046927SAndroid Build Coastguard Worker /* v_not(v_xor(a, b)) -> v_xnor(a, b) */
2390*61046927SAndroid Build Coastguard Worker bool
combine_not_xor(opt_ctx & ctx,aco_ptr<Instruction> & instr)2391*61046927SAndroid Build Coastguard Worker combine_not_xor(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2392*61046927SAndroid Build Coastguard Worker {
2393*61046927SAndroid Build Coastguard Worker    if (instr->usesModifiers())
2394*61046927SAndroid Build Coastguard Worker       return false;
2395*61046927SAndroid Build Coastguard Worker 
2396*61046927SAndroid Build Coastguard Worker    Instruction* op_instr = follow_operand(ctx, instr->operands[0]);
2397*61046927SAndroid Build Coastguard Worker    if (!op_instr || op_instr->opcode != aco_opcode::v_xor_b32 || op_instr->isSDWA())
2398*61046927SAndroid Build Coastguard Worker       return false;
2399*61046927SAndroid Build Coastguard Worker 
2400*61046927SAndroid Build Coastguard Worker    ctx.uses[instr->operands[0].tempId()]--;
2401*61046927SAndroid Build Coastguard Worker    std::swap(instr->definitions[0], op_instr->definitions[0]);
2402*61046927SAndroid Build Coastguard Worker    op_instr->opcode = aco_opcode::v_xnor_b32;
2403*61046927SAndroid Build Coastguard Worker    ctx.info[op_instr->definitions[0].tempId()].label = 0;
2404*61046927SAndroid Build Coastguard Worker 
2405*61046927SAndroid Build Coastguard Worker    return true;
2406*61046927SAndroid Build Coastguard Worker }
2407*61046927SAndroid Build Coastguard Worker 
2408*61046927SAndroid Build Coastguard Worker bool
combine_minmax(opt_ctx & ctx,aco_ptr<Instruction> & instr,aco_opcode opposite,aco_opcode op3src,aco_opcode minmax)2409*61046927SAndroid Build Coastguard Worker combine_minmax(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode opposite, aco_opcode op3src,
2410*61046927SAndroid Build Coastguard Worker                aco_opcode minmax)
2411*61046927SAndroid Build Coastguard Worker {
2412*61046927SAndroid Build Coastguard Worker    /* TODO: this can handle SDWA min/max instructions by using opsel */
2413*61046927SAndroid Build Coastguard Worker 
2414*61046927SAndroid Build Coastguard Worker    /* min(min(a, b), c) -> min3(a, b, c)
2415*61046927SAndroid Build Coastguard Worker     * max(max(a, b), c) -> max3(a, b, c)
2416*61046927SAndroid Build Coastguard Worker     * gfx11: min(-min(a, b), c) -> maxmin(-a, -b, c)
2417*61046927SAndroid Build Coastguard Worker     * gfx11: max(-max(a, b), c) -> minmax(-a, -b, c)
2418*61046927SAndroid Build Coastguard Worker     */
2419*61046927SAndroid Build Coastguard Worker    for (unsigned swap = 0; swap < 2; swap++) {
2420*61046927SAndroid Build Coastguard Worker       Operand operands[3];
2421*61046927SAndroid Build Coastguard Worker       bool clamp, precise;
2422*61046927SAndroid Build Coastguard Worker       bitarray8 opsel = 0, neg = 0, abs = 0;
2423*61046927SAndroid Build Coastguard Worker       uint8_t omod = 0;
2424*61046927SAndroid Build Coastguard Worker       bool inbetween_neg;
2425*61046927SAndroid Build Coastguard Worker       if (match_op3_for_vop3(ctx, instr->opcode, instr->opcode, instr.get(), swap, "120", operands,
2426*61046927SAndroid Build Coastguard Worker                              neg, abs, opsel, &clamp, &omod, &inbetween_neg, NULL, NULL,
2427*61046927SAndroid Build Coastguard Worker                              &precise) &&
2428*61046927SAndroid Build Coastguard Worker           (!inbetween_neg ||
2429*61046927SAndroid Build Coastguard Worker            (minmax != aco_opcode::num_opcodes && ctx.program->gfx_level >= GFX11))) {
2430*61046927SAndroid Build Coastguard Worker          ctx.uses[instr->operands[swap].tempId()]--;
2431*61046927SAndroid Build Coastguard Worker          if (inbetween_neg) {
2432*61046927SAndroid Build Coastguard Worker             neg[0] = !neg[0];
2433*61046927SAndroid Build Coastguard Worker             neg[1] = !neg[1];
2434*61046927SAndroid Build Coastguard Worker             create_vop3_for_op3(ctx, minmax, instr, operands, neg, abs, opsel, clamp, omod);
2435*61046927SAndroid Build Coastguard Worker          } else {
2436*61046927SAndroid Build Coastguard Worker             create_vop3_for_op3(ctx, op3src, instr, operands, neg, abs, opsel, clamp, omod);
2437*61046927SAndroid Build Coastguard Worker          }
2438*61046927SAndroid Build Coastguard Worker          return true;
2439*61046927SAndroid Build Coastguard Worker       }
2440*61046927SAndroid Build Coastguard Worker    }
2441*61046927SAndroid Build Coastguard Worker 
2442*61046927SAndroid Build Coastguard Worker    /* min(-max(a, b), c) -> min3(-a, -b, c)
2443*61046927SAndroid Build Coastguard Worker     * max(-min(a, b), c) -> max3(-a, -b, c)
2444*61046927SAndroid Build Coastguard Worker     * gfx11: min(max(a, b), c) -> maxmin(a, b, c)
2445*61046927SAndroid Build Coastguard Worker     * gfx11: max(min(a, b), c) -> minmax(a, b, c)
2446*61046927SAndroid Build Coastguard Worker     */
2447*61046927SAndroid Build Coastguard Worker    for (unsigned swap = 0; swap < 2; swap++) {
2448*61046927SAndroid Build Coastguard Worker       Operand operands[3];
2449*61046927SAndroid Build Coastguard Worker       bool clamp, precise;
2450*61046927SAndroid Build Coastguard Worker       bitarray8 opsel = 0, neg = 0, abs = 0;
2451*61046927SAndroid Build Coastguard Worker       uint8_t omod = 0;
2452*61046927SAndroid Build Coastguard Worker       bool inbetween_neg;
2453*61046927SAndroid Build Coastguard Worker       if (match_op3_for_vop3(ctx, instr->opcode, opposite, instr.get(), swap, "120", operands, neg,
2454*61046927SAndroid Build Coastguard Worker                              abs, opsel, &clamp, &omod, &inbetween_neg, NULL, NULL, &precise) &&
2455*61046927SAndroid Build Coastguard Worker           (inbetween_neg ||
2456*61046927SAndroid Build Coastguard Worker            (minmax != aco_opcode::num_opcodes && ctx.program->gfx_level >= GFX11))) {
2457*61046927SAndroid Build Coastguard Worker          ctx.uses[instr->operands[swap].tempId()]--;
2458*61046927SAndroid Build Coastguard Worker          if (inbetween_neg) {
2459*61046927SAndroid Build Coastguard Worker             neg[0] = !neg[0];
2460*61046927SAndroid Build Coastguard Worker             neg[1] = !neg[1];
2461*61046927SAndroid Build Coastguard Worker             create_vop3_for_op3(ctx, op3src, instr, operands, neg, abs, opsel, clamp, omod);
2462*61046927SAndroid Build Coastguard Worker          } else {
2463*61046927SAndroid Build Coastguard Worker             create_vop3_for_op3(ctx, minmax, instr, operands, neg, abs, opsel, clamp, omod);
2464*61046927SAndroid Build Coastguard Worker          }
2465*61046927SAndroid Build Coastguard Worker          return true;
2466*61046927SAndroid Build Coastguard Worker       }
2467*61046927SAndroid Build Coastguard Worker    }
2468*61046927SAndroid Build Coastguard Worker    return false;
2469*61046927SAndroid Build Coastguard Worker }
2470*61046927SAndroid Build Coastguard Worker 
2471*61046927SAndroid Build Coastguard Worker /* s_not_b32(s_and_b32(a, b)) -> s_nand_b32(a, b)
2472*61046927SAndroid Build Coastguard Worker  * s_not_b32(s_or_b32(a, b)) -> s_nor_b32(a, b)
2473*61046927SAndroid Build Coastguard Worker  * s_not_b32(s_xor_b32(a, b)) -> s_xnor_b32(a, b)
2474*61046927SAndroid Build Coastguard Worker  * s_not_b64(s_and_b64(a, b)) -> s_nand_b64(a, b)
2475*61046927SAndroid Build Coastguard Worker  * s_not_b64(s_or_b64(a, b)) -> s_nor_b64(a, b)
2476*61046927SAndroid Build Coastguard Worker  * s_not_b64(s_xor_b64(a, b)) -> s_xnor_b64(a, b) */
2477*61046927SAndroid Build Coastguard Worker bool
combine_salu_not_bitwise(opt_ctx & ctx,aco_ptr<Instruction> & instr)2478*61046927SAndroid Build Coastguard Worker combine_salu_not_bitwise(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2479*61046927SAndroid Build Coastguard Worker {
2480*61046927SAndroid Build Coastguard Worker    /* checks */
2481*61046927SAndroid Build Coastguard Worker    if (!instr->operands[0].isTemp())
2482*61046927SAndroid Build Coastguard Worker       return false;
2483*61046927SAndroid Build Coastguard Worker    if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
2484*61046927SAndroid Build Coastguard Worker       return false;
2485*61046927SAndroid Build Coastguard Worker 
2486*61046927SAndroid Build Coastguard Worker    Instruction* op2_instr = follow_operand(ctx, instr->operands[0]);
2487*61046927SAndroid Build Coastguard Worker    if (!op2_instr)
2488*61046927SAndroid Build Coastguard Worker       return false;
2489*61046927SAndroid Build Coastguard Worker    switch (op2_instr->opcode) {
2490*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_and_b32:
2491*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_or_b32:
2492*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_xor_b32:
2493*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_and_b64:
2494*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_or_b64:
2495*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_xor_b64: break;
2496*61046927SAndroid Build Coastguard Worker    default: return false;
2497*61046927SAndroid Build Coastguard Worker    }
2498*61046927SAndroid Build Coastguard Worker 
2499*61046927SAndroid Build Coastguard Worker    /* create instruction */
2500*61046927SAndroid Build Coastguard Worker    std::swap(instr->definitions[0], op2_instr->definitions[0]);
2501*61046927SAndroid Build Coastguard Worker    std::swap(instr->definitions[1], op2_instr->definitions[1]);
2502*61046927SAndroid Build Coastguard Worker    ctx.uses[instr->operands[0].tempId()]--;
2503*61046927SAndroid Build Coastguard Worker    ctx.info[op2_instr->definitions[0].tempId()].label = 0;
2504*61046927SAndroid Build Coastguard Worker 
2505*61046927SAndroid Build Coastguard Worker    switch (op2_instr->opcode) {
2506*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_and_b32: op2_instr->opcode = aco_opcode::s_nand_b32; break;
2507*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_or_b32: op2_instr->opcode = aco_opcode::s_nor_b32; break;
2508*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_xor_b32: op2_instr->opcode = aco_opcode::s_xnor_b32; break;
2509*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_and_b64: op2_instr->opcode = aco_opcode::s_nand_b64; break;
2510*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_or_b64: op2_instr->opcode = aco_opcode::s_nor_b64; break;
2511*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_xor_b64: op2_instr->opcode = aco_opcode::s_xnor_b64; break;
2512*61046927SAndroid Build Coastguard Worker    default: break;
2513*61046927SAndroid Build Coastguard Worker    }
2514*61046927SAndroid Build Coastguard Worker 
2515*61046927SAndroid Build Coastguard Worker    return true;
2516*61046927SAndroid Build Coastguard Worker }
2517*61046927SAndroid Build Coastguard Worker 
2518*61046927SAndroid Build Coastguard Worker /* s_and_b32(a, s_not_b32(b)) -> s_andn2_b32(a, b)
2519*61046927SAndroid Build Coastguard Worker  * s_or_b32(a, s_not_b32(b)) -> s_orn2_b32(a, b)
2520*61046927SAndroid Build Coastguard Worker  * s_and_b64(a, s_not_b64(b)) -> s_andn2_b64(a, b)
2521*61046927SAndroid Build Coastguard Worker  * s_or_b64(a, s_not_b64(b)) -> s_orn2_b64(a, b) */
2522*61046927SAndroid Build Coastguard Worker bool
combine_salu_n2(opt_ctx & ctx,aco_ptr<Instruction> & instr)2523*61046927SAndroid Build Coastguard Worker combine_salu_n2(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2524*61046927SAndroid Build Coastguard Worker {
2525*61046927SAndroid Build Coastguard Worker    if (instr->definitions[0].isTemp() && ctx.info[instr->definitions[0].tempId()].is_uniform_bool())
2526*61046927SAndroid Build Coastguard Worker       return false;
2527*61046927SAndroid Build Coastguard Worker 
2528*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 2; i++) {
2529*61046927SAndroid Build Coastguard Worker       Instruction* op2_instr = follow_operand(ctx, instr->operands[i]);
2530*61046927SAndroid Build Coastguard Worker       if (!op2_instr || (op2_instr->opcode != aco_opcode::s_not_b32 &&
2531*61046927SAndroid Build Coastguard Worker                          op2_instr->opcode != aco_opcode::s_not_b64))
2532*61046927SAndroid Build Coastguard Worker          continue;
2533*61046927SAndroid Build Coastguard Worker       if (ctx.uses[op2_instr->definitions[1].tempId()])
2534*61046927SAndroid Build Coastguard Worker          continue;
2535*61046927SAndroid Build Coastguard Worker 
2536*61046927SAndroid Build Coastguard Worker       if (instr->operands[!i].isLiteral() && op2_instr->operands[0].isLiteral() &&
2537*61046927SAndroid Build Coastguard Worker           instr->operands[!i].constantValue() != op2_instr->operands[0].constantValue())
2538*61046927SAndroid Build Coastguard Worker          continue;
2539*61046927SAndroid Build Coastguard Worker 
2540*61046927SAndroid Build Coastguard Worker       ctx.uses[instr->operands[i].tempId()]--;
2541*61046927SAndroid Build Coastguard Worker       instr->operands[0] = instr->operands[!i];
2542*61046927SAndroid Build Coastguard Worker       instr->operands[1] = op2_instr->operands[0];
2543*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].label = 0;
2544*61046927SAndroid Build Coastguard Worker 
2545*61046927SAndroid Build Coastguard Worker       switch (instr->opcode) {
2546*61046927SAndroid Build Coastguard Worker       case aco_opcode::s_and_b32: instr->opcode = aco_opcode::s_andn2_b32; break;
2547*61046927SAndroid Build Coastguard Worker       case aco_opcode::s_or_b32: instr->opcode = aco_opcode::s_orn2_b32; break;
2548*61046927SAndroid Build Coastguard Worker       case aco_opcode::s_and_b64: instr->opcode = aco_opcode::s_andn2_b64; break;
2549*61046927SAndroid Build Coastguard Worker       case aco_opcode::s_or_b64: instr->opcode = aco_opcode::s_orn2_b64; break;
2550*61046927SAndroid Build Coastguard Worker       default: break;
2551*61046927SAndroid Build Coastguard Worker       }
2552*61046927SAndroid Build Coastguard Worker 
2553*61046927SAndroid Build Coastguard Worker       return true;
2554*61046927SAndroid Build Coastguard Worker    }
2555*61046927SAndroid Build Coastguard Worker    return false;
2556*61046927SAndroid Build Coastguard Worker }
2557*61046927SAndroid Build Coastguard Worker 
2558*61046927SAndroid Build Coastguard Worker /* s_add_{i32,u32}(a, s_lshl_b32(b, <n>)) -> s_lshl<n>_add_u32(a, b) */
2559*61046927SAndroid Build Coastguard Worker bool
combine_salu_lshl_add(opt_ctx & ctx,aco_ptr<Instruction> & instr)2560*61046927SAndroid Build Coastguard Worker combine_salu_lshl_add(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2561*61046927SAndroid Build Coastguard Worker {
2562*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::s_add_i32 && ctx.uses[instr->definitions[1].tempId()])
2563*61046927SAndroid Build Coastguard Worker       return false;
2564*61046927SAndroid Build Coastguard Worker 
2565*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 2; i++) {
2566*61046927SAndroid Build Coastguard Worker       Instruction* op2_instr = follow_operand(ctx, instr->operands[i], true);
2567*61046927SAndroid Build Coastguard Worker       if (!op2_instr || op2_instr->opcode != aco_opcode::s_lshl_b32 ||
2568*61046927SAndroid Build Coastguard Worker           ctx.uses[op2_instr->definitions[1].tempId()])
2569*61046927SAndroid Build Coastguard Worker          continue;
2570*61046927SAndroid Build Coastguard Worker       if (!op2_instr->operands[1].isConstant())
2571*61046927SAndroid Build Coastguard Worker          continue;
2572*61046927SAndroid Build Coastguard Worker 
2573*61046927SAndroid Build Coastguard Worker       uint32_t shift = op2_instr->operands[1].constantValue();
2574*61046927SAndroid Build Coastguard Worker       if (shift < 1 || shift > 4)
2575*61046927SAndroid Build Coastguard Worker          continue;
2576*61046927SAndroid Build Coastguard Worker 
2577*61046927SAndroid Build Coastguard Worker       if (instr->operands[!i].isLiteral() && op2_instr->operands[0].isLiteral() &&
2578*61046927SAndroid Build Coastguard Worker           instr->operands[!i].constantValue() != op2_instr->operands[0].constantValue())
2579*61046927SAndroid Build Coastguard Worker          continue;
2580*61046927SAndroid Build Coastguard Worker 
2581*61046927SAndroid Build Coastguard Worker       instr->operands[1] = instr->operands[!i];
2582*61046927SAndroid Build Coastguard Worker       instr->operands[0] = copy_operand(ctx, op2_instr->operands[0]);
2583*61046927SAndroid Build Coastguard Worker       decrease_uses(ctx, op2_instr);
2584*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].label = 0;
2585*61046927SAndroid Build Coastguard Worker 
2586*61046927SAndroid Build Coastguard Worker       instr->opcode = std::array<aco_opcode, 4>{
2587*61046927SAndroid Build Coastguard Worker          aco_opcode::s_lshl1_add_u32, aco_opcode::s_lshl2_add_u32, aco_opcode::s_lshl3_add_u32,
2588*61046927SAndroid Build Coastguard Worker          aco_opcode::s_lshl4_add_u32}[shift - 1];
2589*61046927SAndroid Build Coastguard Worker 
2590*61046927SAndroid Build Coastguard Worker       return true;
2591*61046927SAndroid Build Coastguard Worker    }
2592*61046927SAndroid Build Coastguard Worker    return false;
2593*61046927SAndroid Build Coastguard Worker }
2594*61046927SAndroid Build Coastguard Worker 
2595*61046927SAndroid Build Coastguard Worker /* s_abs_i32(s_sub_[iu]32(a, b)) -> s_absdiff_i32(a, b)
2596*61046927SAndroid Build Coastguard Worker  * s_abs_i32(s_add_[iu]32(a, #b)) -> s_absdiff_i32(a, -b)
2597*61046927SAndroid Build Coastguard Worker  */
2598*61046927SAndroid Build Coastguard Worker bool
combine_sabsdiff(opt_ctx & ctx,aco_ptr<Instruction> & instr)2599*61046927SAndroid Build Coastguard Worker combine_sabsdiff(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2600*61046927SAndroid Build Coastguard Worker {
2601*61046927SAndroid Build Coastguard Worker    if (!instr->operands[0].isTemp() || !ctx.info[instr->operands[0].tempId()].is_add_sub())
2602*61046927SAndroid Build Coastguard Worker       return false;
2603*61046927SAndroid Build Coastguard Worker 
2604*61046927SAndroid Build Coastguard Worker    Instruction* op_instr = follow_operand(ctx, instr->operands[0], false);
2605*61046927SAndroid Build Coastguard Worker    if (!op_instr)
2606*61046927SAndroid Build Coastguard Worker       return false;
2607*61046927SAndroid Build Coastguard Worker 
2608*61046927SAndroid Build Coastguard Worker    if (op_instr->opcode == aco_opcode::s_add_i32 || op_instr->opcode == aco_opcode::s_add_u32) {
2609*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < 2; i++) {
2610*61046927SAndroid Build Coastguard Worker          uint64_t constant;
2611*61046927SAndroid Build Coastguard Worker          if (op_instr->operands[!i].isLiteral() ||
2612*61046927SAndroid Build Coastguard Worker              !is_operand_constant(ctx, op_instr->operands[i], 32, &constant))
2613*61046927SAndroid Build Coastguard Worker             continue;
2614*61046927SAndroid Build Coastguard Worker 
2615*61046927SAndroid Build Coastguard Worker          if (op_instr->operands[i].isTemp())
2616*61046927SAndroid Build Coastguard Worker             ctx.uses[op_instr->operands[i].tempId()]--;
2617*61046927SAndroid Build Coastguard Worker          op_instr->operands[0] = op_instr->operands[!i];
2618*61046927SAndroid Build Coastguard Worker          op_instr->operands[1] = Operand::c32(-int32_t(constant));
2619*61046927SAndroid Build Coastguard Worker          goto use_absdiff;
2620*61046927SAndroid Build Coastguard Worker       }
2621*61046927SAndroid Build Coastguard Worker       return false;
2622*61046927SAndroid Build Coastguard Worker    }
2623*61046927SAndroid Build Coastguard Worker 
2624*61046927SAndroid Build Coastguard Worker use_absdiff:
2625*61046927SAndroid Build Coastguard Worker    op_instr->opcode = aco_opcode::s_absdiff_i32;
2626*61046927SAndroid Build Coastguard Worker    std::swap(instr->definitions[0], op_instr->definitions[0]);
2627*61046927SAndroid Build Coastguard Worker    std::swap(instr->definitions[1], op_instr->definitions[1]);
2628*61046927SAndroid Build Coastguard Worker    ctx.uses[instr->operands[0].tempId()]--;
2629*61046927SAndroid Build Coastguard Worker    ctx.info[op_instr->definitions[0].tempId()].label = 0;
2630*61046927SAndroid Build Coastguard Worker 
2631*61046927SAndroid Build Coastguard Worker    return true;
2632*61046927SAndroid Build Coastguard Worker }
2633*61046927SAndroid Build Coastguard Worker 
2634*61046927SAndroid Build Coastguard Worker bool
combine_add_sub_b2i(opt_ctx & ctx,aco_ptr<Instruction> & instr,aco_opcode new_op,uint8_t ops)2635*61046927SAndroid Build Coastguard Worker combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode new_op, uint8_t ops)
2636*61046927SAndroid Build Coastguard Worker {
2637*61046927SAndroid Build Coastguard Worker    if (instr->usesModifiers())
2638*61046927SAndroid Build Coastguard Worker       return false;
2639*61046927SAndroid Build Coastguard Worker 
2640*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 2; i++) {
2641*61046927SAndroid Build Coastguard Worker       if (!((1 << i) & ops))
2642*61046927SAndroid Build Coastguard Worker          continue;
2643*61046927SAndroid Build Coastguard Worker       if (instr->operands[i].isTemp() && ctx.info[instr->operands[i].tempId()].is_b2i() &&
2644*61046927SAndroid Build Coastguard Worker           ctx.uses[instr->operands[i].tempId()] == 1) {
2645*61046927SAndroid Build Coastguard Worker 
2646*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> new_instr;
2647*61046927SAndroid Build Coastguard Worker          if (instr->operands[!i].isTemp() &&
2648*61046927SAndroid Build Coastguard Worker              instr->operands[!i].getTemp().type() == RegType::vgpr) {
2649*61046927SAndroid Build Coastguard Worker             new_instr.reset(create_instruction(new_op, Format::VOP2, 3, 2));
2650*61046927SAndroid Build Coastguard Worker          } else if (ctx.program->gfx_level >= GFX10 ||
2651*61046927SAndroid Build Coastguard Worker                     (instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
2652*61046927SAndroid Build Coastguard Worker             new_instr.reset(create_instruction(new_op, asVOP3(Format::VOP2), 3, 2));
2653*61046927SAndroid Build Coastguard Worker          } else {
2654*61046927SAndroid Build Coastguard Worker             return false;
2655*61046927SAndroid Build Coastguard Worker          }
2656*61046927SAndroid Build Coastguard Worker          ctx.uses[instr->operands[i].tempId()]--;
2657*61046927SAndroid Build Coastguard Worker          new_instr->definitions[0] = instr->definitions[0];
2658*61046927SAndroid Build Coastguard Worker          if (instr->definitions.size() == 2) {
2659*61046927SAndroid Build Coastguard Worker             new_instr->definitions[1] = instr->definitions[1];
2660*61046927SAndroid Build Coastguard Worker          } else {
2661*61046927SAndroid Build Coastguard Worker             new_instr->definitions[1] =
2662*61046927SAndroid Build Coastguard Worker                Definition(ctx.program->allocateTmp(ctx.program->lane_mask));
2663*61046927SAndroid Build Coastguard Worker             /* Make sure the uses vector is large enough and the number of
2664*61046927SAndroid Build Coastguard Worker              * uses properly initialized to 0.
2665*61046927SAndroid Build Coastguard Worker              */
2666*61046927SAndroid Build Coastguard Worker             ctx.uses.push_back(0);
2667*61046927SAndroid Build Coastguard Worker             ctx.info.push_back(ssa_info{});
2668*61046927SAndroid Build Coastguard Worker          }
2669*61046927SAndroid Build Coastguard Worker          new_instr->operands[0] = Operand::zero();
2670*61046927SAndroid Build Coastguard Worker          new_instr->operands[1] = instr->operands[!i];
2671*61046927SAndroid Build Coastguard Worker          new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp);
2672*61046927SAndroid Build Coastguard Worker          new_instr->pass_flags = instr->pass_flags;
2673*61046927SAndroid Build Coastguard Worker          instr = std::move(new_instr);
2674*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_add_sub(instr.get());
2675*61046927SAndroid Build Coastguard Worker          return true;
2676*61046927SAndroid Build Coastguard Worker       }
2677*61046927SAndroid Build Coastguard Worker    }
2678*61046927SAndroid Build Coastguard Worker 
2679*61046927SAndroid Build Coastguard Worker    return false;
2680*61046927SAndroid Build Coastguard Worker }
2681*61046927SAndroid Build Coastguard Worker 
2682*61046927SAndroid Build Coastguard Worker bool
combine_add_bcnt(opt_ctx & ctx,aco_ptr<Instruction> & instr)2683*61046927SAndroid Build Coastguard Worker combine_add_bcnt(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2684*61046927SAndroid Build Coastguard Worker {
2685*61046927SAndroid Build Coastguard Worker    if (instr->usesModifiers())
2686*61046927SAndroid Build Coastguard Worker       return false;
2687*61046927SAndroid Build Coastguard Worker 
2688*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 2; i++) {
2689*61046927SAndroid Build Coastguard Worker       Instruction* op_instr = follow_operand(ctx, instr->operands[i]);
2690*61046927SAndroid Build Coastguard Worker       if (op_instr && op_instr->opcode == aco_opcode::v_bcnt_u32_b32 &&
2691*61046927SAndroid Build Coastguard Worker           !op_instr->usesModifiers() && op_instr->operands[0].isTemp() &&
2692*61046927SAndroid Build Coastguard Worker           op_instr->operands[0].getTemp().type() == RegType::vgpr &&
2693*61046927SAndroid Build Coastguard Worker           op_instr->operands[1].constantEquals(0)) {
2694*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> new_instr{
2695*61046927SAndroid Build Coastguard Worker             create_instruction(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)};
2696*61046927SAndroid Build Coastguard Worker          ctx.uses[instr->operands[i].tempId()]--;
2697*61046927SAndroid Build Coastguard Worker          new_instr->operands[0] = op_instr->operands[0];
2698*61046927SAndroid Build Coastguard Worker          new_instr->operands[1] = instr->operands[!i];
2699*61046927SAndroid Build Coastguard Worker          new_instr->definitions[0] = instr->definitions[0];
2700*61046927SAndroid Build Coastguard Worker          new_instr->pass_flags = instr->pass_flags;
2701*61046927SAndroid Build Coastguard Worker          instr = std::move(new_instr);
2702*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].label = 0;
2703*61046927SAndroid Build Coastguard Worker 
2704*61046927SAndroid Build Coastguard Worker          return true;
2705*61046927SAndroid Build Coastguard Worker       }
2706*61046927SAndroid Build Coastguard Worker    }
2707*61046927SAndroid Build Coastguard Worker 
2708*61046927SAndroid Build Coastguard Worker    return false;
2709*61046927SAndroid Build Coastguard Worker }
2710*61046927SAndroid Build Coastguard Worker 
2711*61046927SAndroid Build Coastguard Worker bool
get_minmax_info(aco_opcode op,aco_opcode * min,aco_opcode * max,aco_opcode * min3,aco_opcode * max3,aco_opcode * med3,aco_opcode * minmax,bool * some_gfx9_only)2712*61046927SAndroid Build Coastguard Worker get_minmax_info(aco_opcode op, aco_opcode* min, aco_opcode* max, aco_opcode* min3, aco_opcode* max3,
2713*61046927SAndroid Build Coastguard Worker                 aco_opcode* med3, aco_opcode* minmax, bool* some_gfx9_only)
2714*61046927SAndroid Build Coastguard Worker {
2715*61046927SAndroid Build Coastguard Worker    switch (op) {
2716*61046927SAndroid Build Coastguard Worker #define MINMAX(type, gfx9)                                                                         \
2717*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_##type:                                                                  \
2718*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_##type:                                                                  \
2719*61046927SAndroid Build Coastguard Worker       *min = aco_opcode::v_min_##type;                                                             \
2720*61046927SAndroid Build Coastguard Worker       *max = aco_opcode::v_max_##type;                                                             \
2721*61046927SAndroid Build Coastguard Worker       *med3 = aco_opcode::v_med3_##type;                                                           \
2722*61046927SAndroid Build Coastguard Worker       *min3 = aco_opcode::v_min3_##type;                                                           \
2723*61046927SAndroid Build Coastguard Worker       *max3 = aco_opcode::v_max3_##type;                                                           \
2724*61046927SAndroid Build Coastguard Worker       *minmax = op == *min ? aco_opcode::v_maxmin_##type : aco_opcode::v_minmax_##type;            \
2725*61046927SAndroid Build Coastguard Worker       *some_gfx9_only = gfx9;                                                                      \
2726*61046927SAndroid Build Coastguard Worker       return true;
2727*61046927SAndroid Build Coastguard Worker #define MINMAX_INT16(type, gfx9)                                                                   \
2728*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_##type:                                                                  \
2729*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_##type:                                                                  \
2730*61046927SAndroid Build Coastguard Worker       *min = aco_opcode::v_min_##type;                                                             \
2731*61046927SAndroid Build Coastguard Worker       *max = aco_opcode::v_max_##type;                                                             \
2732*61046927SAndroid Build Coastguard Worker       *med3 = aco_opcode::v_med3_##type;                                                           \
2733*61046927SAndroid Build Coastguard Worker       *min3 = aco_opcode::v_min3_##type;                                                           \
2734*61046927SAndroid Build Coastguard Worker       *max3 = aco_opcode::v_max3_##type;                                                           \
2735*61046927SAndroid Build Coastguard Worker       *minmax = aco_opcode::num_opcodes;                                                           \
2736*61046927SAndroid Build Coastguard Worker       *some_gfx9_only = gfx9;                                                                      \
2737*61046927SAndroid Build Coastguard Worker       return true;
2738*61046927SAndroid Build Coastguard Worker #define MINMAX_INT16_E64(type, gfx9)                                                               \
2739*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_##type##_e64:                                                            \
2740*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_##type##_e64:                                                            \
2741*61046927SAndroid Build Coastguard Worker       *min = aco_opcode::v_min_##type##_e64;                                                       \
2742*61046927SAndroid Build Coastguard Worker       *max = aco_opcode::v_max_##type##_e64;                                                       \
2743*61046927SAndroid Build Coastguard Worker       *med3 = aco_opcode::v_med3_##type;                                                           \
2744*61046927SAndroid Build Coastguard Worker       *min3 = aco_opcode::v_min3_##type;                                                           \
2745*61046927SAndroid Build Coastguard Worker       *max3 = aco_opcode::v_max3_##type;                                                           \
2746*61046927SAndroid Build Coastguard Worker       *minmax = aco_opcode::num_opcodes;                                                           \
2747*61046927SAndroid Build Coastguard Worker       *some_gfx9_only = gfx9;                                                                      \
2748*61046927SAndroid Build Coastguard Worker       return true;
2749*61046927SAndroid Build Coastguard Worker       MINMAX(f32, false)
2750*61046927SAndroid Build Coastguard Worker       MINMAX(u32, false)
2751*61046927SAndroid Build Coastguard Worker       MINMAX(i32, false)
2752*61046927SAndroid Build Coastguard Worker       MINMAX(f16, true)
2753*61046927SAndroid Build Coastguard Worker       MINMAX_INT16(u16, true)
2754*61046927SAndroid Build Coastguard Worker       MINMAX_INT16(i16, true)
2755*61046927SAndroid Build Coastguard Worker       MINMAX_INT16_E64(u16, true)
2756*61046927SAndroid Build Coastguard Worker       MINMAX_INT16_E64(i16, true)
2757*61046927SAndroid Build Coastguard Worker #undef MINMAX_INT16_E64
2758*61046927SAndroid Build Coastguard Worker #undef MINMAX_INT16
2759*61046927SAndroid Build Coastguard Worker #undef MINMAX
2760*61046927SAndroid Build Coastguard Worker    default: return false;
2761*61046927SAndroid Build Coastguard Worker    }
2762*61046927SAndroid Build Coastguard Worker }
2763*61046927SAndroid Build Coastguard Worker 
2764*61046927SAndroid Build Coastguard Worker /* when ub > lb:
2765*61046927SAndroid Build Coastguard Worker  * v_min_{f,u,i}{16,32}(v_max_{f,u,i}{16,32}(a, lb), ub) -> v_med3_{f,u,i}{16,32}(a, lb, ub)
2766*61046927SAndroid Build Coastguard Worker  * v_max_{f,u,i}{16,32}(v_min_{f,u,i}{16,32}(a, ub), lb) -> v_med3_{f,u,i}{16,32}(a, lb, ub)
2767*61046927SAndroid Build Coastguard Worker  */
2768*61046927SAndroid Build Coastguard Worker bool
combine_clamp(opt_ctx & ctx,aco_ptr<Instruction> & instr,aco_opcode min,aco_opcode max,aco_opcode med)2769*61046927SAndroid Build Coastguard Worker combine_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode min, aco_opcode max,
2770*61046927SAndroid Build Coastguard Worker               aco_opcode med)
2771*61046927SAndroid Build Coastguard Worker {
2772*61046927SAndroid Build Coastguard Worker    /* TODO: GLSL's clamp(x, minVal, maxVal) and SPIR-V's
2773*61046927SAndroid Build Coastguard Worker     * FClamp(x, minVal, maxVal)/NClamp(x, minVal, maxVal) are undefined if
2774*61046927SAndroid Build Coastguard Worker     * minVal > maxVal, which means we can always select it to a v_med3_f32 */
2775*61046927SAndroid Build Coastguard Worker    aco_opcode other_op;
2776*61046927SAndroid Build Coastguard Worker    if (instr->opcode == min)
2777*61046927SAndroid Build Coastguard Worker       other_op = max;
2778*61046927SAndroid Build Coastguard Worker    else if (instr->opcode == max)
2779*61046927SAndroid Build Coastguard Worker       other_op = min;
2780*61046927SAndroid Build Coastguard Worker    else
2781*61046927SAndroid Build Coastguard Worker       return false;
2782*61046927SAndroid Build Coastguard Worker 
2783*61046927SAndroid Build Coastguard Worker    for (unsigned swap = 0; swap < 2; swap++) {
2784*61046927SAndroid Build Coastguard Worker       Operand operands[3];
2785*61046927SAndroid Build Coastguard Worker       bool clamp, precise;
2786*61046927SAndroid Build Coastguard Worker       bitarray8 opsel = 0, neg = 0, abs = 0;
2787*61046927SAndroid Build Coastguard Worker       uint8_t omod = 0;
2788*61046927SAndroid Build Coastguard Worker       if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap, "012", operands, neg,
2789*61046927SAndroid Build Coastguard Worker                              abs, opsel, &clamp, &omod, NULL, NULL, NULL, &precise)) {
2790*61046927SAndroid Build Coastguard Worker          /* max(min(src, upper), lower) returns upper if src is NaN, but
2791*61046927SAndroid Build Coastguard Worker           * med3(src, lower, upper) returns lower.
2792*61046927SAndroid Build Coastguard Worker           */
2793*61046927SAndroid Build Coastguard Worker          if (precise && instr->opcode != min &&
2794*61046927SAndroid Build Coastguard Worker              (min == aco_opcode::v_min_f16 || min == aco_opcode::v_min_f32))
2795*61046927SAndroid Build Coastguard Worker             continue;
2796*61046927SAndroid Build Coastguard Worker 
2797*61046927SAndroid Build Coastguard Worker          int const0_idx = -1, const1_idx = -1;
2798*61046927SAndroid Build Coastguard Worker          uint32_t const0 = 0, const1 = 0;
2799*61046927SAndroid Build Coastguard Worker          for (int i = 0; i < 3; i++) {
2800*61046927SAndroid Build Coastguard Worker             uint32_t val;
2801*61046927SAndroid Build Coastguard Worker             bool hi16 = opsel & (1 << i);
2802*61046927SAndroid Build Coastguard Worker             if (operands[i].isConstant()) {
2803*61046927SAndroid Build Coastguard Worker                val = hi16 ? operands[i].constantValue16(true) : operands[i].constantValue();
2804*61046927SAndroid Build Coastguard Worker             } else if (operands[i].isTemp() &&
2805*61046927SAndroid Build Coastguard Worker                        ctx.info[operands[i].tempId()].is_constant_or_literal(32)) {
2806*61046927SAndroid Build Coastguard Worker                val = ctx.info[operands[i].tempId()].val >> (hi16 ? 16 : 0);
2807*61046927SAndroid Build Coastguard Worker             } else {
2808*61046927SAndroid Build Coastguard Worker                continue;
2809*61046927SAndroid Build Coastguard Worker             }
2810*61046927SAndroid Build Coastguard Worker             if (const0_idx >= 0) {
2811*61046927SAndroid Build Coastguard Worker                const1_idx = i;
2812*61046927SAndroid Build Coastguard Worker                const1 = val;
2813*61046927SAndroid Build Coastguard Worker             } else {
2814*61046927SAndroid Build Coastguard Worker                const0_idx = i;
2815*61046927SAndroid Build Coastguard Worker                const0 = val;
2816*61046927SAndroid Build Coastguard Worker             }
2817*61046927SAndroid Build Coastguard Worker          }
2818*61046927SAndroid Build Coastguard Worker          if (const0_idx < 0 || const1_idx < 0)
2819*61046927SAndroid Build Coastguard Worker             continue;
2820*61046927SAndroid Build Coastguard Worker 
2821*61046927SAndroid Build Coastguard Worker          int lower_idx = const0_idx;
2822*61046927SAndroid Build Coastguard Worker          switch (min) {
2823*61046927SAndroid Build Coastguard Worker          case aco_opcode::v_min_f32:
2824*61046927SAndroid Build Coastguard Worker          case aco_opcode::v_min_f16: {
2825*61046927SAndroid Build Coastguard Worker             float const0_f, const1_f;
2826*61046927SAndroid Build Coastguard Worker             if (min == aco_opcode::v_min_f32) {
2827*61046927SAndroid Build Coastguard Worker                memcpy(&const0_f, &const0, 4);
2828*61046927SAndroid Build Coastguard Worker                memcpy(&const1_f, &const1, 4);
2829*61046927SAndroid Build Coastguard Worker             } else {
2830*61046927SAndroid Build Coastguard Worker                const0_f = _mesa_half_to_float(const0);
2831*61046927SAndroid Build Coastguard Worker                const1_f = _mesa_half_to_float(const1);
2832*61046927SAndroid Build Coastguard Worker             }
2833*61046927SAndroid Build Coastguard Worker             if (abs[const0_idx])
2834*61046927SAndroid Build Coastguard Worker                const0_f = fabsf(const0_f);
2835*61046927SAndroid Build Coastguard Worker             if (abs[const1_idx])
2836*61046927SAndroid Build Coastguard Worker                const1_f = fabsf(const1_f);
2837*61046927SAndroid Build Coastguard Worker             if (neg[const0_idx])
2838*61046927SAndroid Build Coastguard Worker                const0_f = -const0_f;
2839*61046927SAndroid Build Coastguard Worker             if (neg[const1_idx])
2840*61046927SAndroid Build Coastguard Worker                const1_f = -const1_f;
2841*61046927SAndroid Build Coastguard Worker             lower_idx = const0_f < const1_f ? const0_idx : const1_idx;
2842*61046927SAndroid Build Coastguard Worker             break;
2843*61046927SAndroid Build Coastguard Worker          }
2844*61046927SAndroid Build Coastguard Worker          case aco_opcode::v_min_u32: {
2845*61046927SAndroid Build Coastguard Worker             lower_idx = const0 < const1 ? const0_idx : const1_idx;
2846*61046927SAndroid Build Coastguard Worker             break;
2847*61046927SAndroid Build Coastguard Worker          }
2848*61046927SAndroid Build Coastguard Worker          case aco_opcode::v_min_u16:
2849*61046927SAndroid Build Coastguard Worker          case aco_opcode::v_min_u16_e64: {
2850*61046927SAndroid Build Coastguard Worker             lower_idx = (uint16_t)const0 < (uint16_t)const1 ? const0_idx : const1_idx;
2851*61046927SAndroid Build Coastguard Worker             break;
2852*61046927SAndroid Build Coastguard Worker          }
2853*61046927SAndroid Build Coastguard Worker          case aco_opcode::v_min_i32: {
2854*61046927SAndroid Build Coastguard Worker             int32_t const0_i =
2855*61046927SAndroid Build Coastguard Worker                const0 & 0x80000000u ? -2147483648 + (int32_t)(const0 & 0x7fffffffu) : const0;
2856*61046927SAndroid Build Coastguard Worker             int32_t const1_i =
2857*61046927SAndroid Build Coastguard Worker                const1 & 0x80000000u ? -2147483648 + (int32_t)(const1 & 0x7fffffffu) : const1;
2858*61046927SAndroid Build Coastguard Worker             lower_idx = const0_i < const1_i ? const0_idx : const1_idx;
2859*61046927SAndroid Build Coastguard Worker             break;
2860*61046927SAndroid Build Coastguard Worker          }
2861*61046927SAndroid Build Coastguard Worker          case aco_opcode::v_min_i16:
2862*61046927SAndroid Build Coastguard Worker          case aco_opcode::v_min_i16_e64: {
2863*61046927SAndroid Build Coastguard Worker             int16_t const0_i = const0 & 0x8000u ? -32768 + (int16_t)(const0 & 0x7fffu) : const0;
2864*61046927SAndroid Build Coastguard Worker             int16_t const1_i = const1 & 0x8000u ? -32768 + (int16_t)(const1 & 0x7fffu) : const1;
2865*61046927SAndroid Build Coastguard Worker             lower_idx = const0_i < const1_i ? const0_idx : const1_idx;
2866*61046927SAndroid Build Coastguard Worker             break;
2867*61046927SAndroid Build Coastguard Worker          }
2868*61046927SAndroid Build Coastguard Worker          default: break;
2869*61046927SAndroid Build Coastguard Worker          }
2870*61046927SAndroid Build Coastguard Worker          int upper_idx = lower_idx == const0_idx ? const1_idx : const0_idx;
2871*61046927SAndroid Build Coastguard Worker 
2872*61046927SAndroid Build Coastguard Worker          if (instr->opcode == min) {
2873*61046927SAndroid Build Coastguard Worker             if (upper_idx != 0 || lower_idx == 0)
2874*61046927SAndroid Build Coastguard Worker                return false;
2875*61046927SAndroid Build Coastguard Worker          } else {
2876*61046927SAndroid Build Coastguard Worker             if (upper_idx == 0 || lower_idx != 0)
2877*61046927SAndroid Build Coastguard Worker                return false;
2878*61046927SAndroid Build Coastguard Worker          }
2879*61046927SAndroid Build Coastguard Worker 
2880*61046927SAndroid Build Coastguard Worker          ctx.uses[instr->operands[swap].tempId()]--;
2881*61046927SAndroid Build Coastguard Worker          create_vop3_for_op3(ctx, med, instr, operands, neg, abs, opsel, clamp, omod);
2882*61046927SAndroid Build Coastguard Worker 
2883*61046927SAndroid Build Coastguard Worker          return true;
2884*61046927SAndroid Build Coastguard Worker       }
2885*61046927SAndroid Build Coastguard Worker    }
2886*61046927SAndroid Build Coastguard Worker 
2887*61046927SAndroid Build Coastguard Worker    return false;
2888*61046927SAndroid Build Coastguard Worker }
2889*61046927SAndroid Build Coastguard Worker 
2890*61046927SAndroid Build Coastguard Worker void
apply_sgprs(opt_ctx & ctx,aco_ptr<Instruction> & instr)2891*61046927SAndroid Build Coastguard Worker apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2892*61046927SAndroid Build Coastguard Worker {
2893*61046927SAndroid Build Coastguard Worker    bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64_e64 ||
2894*61046927SAndroid Build Coastguard Worker                      instr->opcode == aco_opcode::v_lshlrev_b64 ||
2895*61046927SAndroid Build Coastguard Worker                      instr->opcode == aco_opcode::v_lshrrev_b64 ||
2896*61046927SAndroid Build Coastguard Worker                      instr->opcode == aco_opcode::v_ashrrev_i64;
2897*61046927SAndroid Build Coastguard Worker 
2898*61046927SAndroid Build Coastguard Worker    /* find candidates and create the set of sgprs already read */
2899*61046927SAndroid Build Coastguard Worker    unsigned sgpr_ids[2] = {0, 0};
2900*61046927SAndroid Build Coastguard Worker    uint32_t operand_mask = 0;
2901*61046927SAndroid Build Coastguard Worker    bool has_literal = false;
2902*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < instr->operands.size(); i++) {
2903*61046927SAndroid Build Coastguard Worker       if (instr->operands[i].isLiteral())
2904*61046927SAndroid Build Coastguard Worker          has_literal = true;
2905*61046927SAndroid Build Coastguard Worker       if (!instr->operands[i].isTemp())
2906*61046927SAndroid Build Coastguard Worker          continue;
2907*61046927SAndroid Build Coastguard Worker       if (instr->operands[i].getTemp().type() == RegType::sgpr) {
2908*61046927SAndroid Build Coastguard Worker          if (instr->operands[i].tempId() != sgpr_ids[0])
2909*61046927SAndroid Build Coastguard Worker             sgpr_ids[!!sgpr_ids[0]] = instr->operands[i].tempId();
2910*61046927SAndroid Build Coastguard Worker       }
2911*61046927SAndroid Build Coastguard Worker       ssa_info& info = ctx.info[instr->operands[i].tempId()];
2912*61046927SAndroid Build Coastguard Worker       if (is_copy_label(ctx, instr, info, i) && info.temp.type() == RegType::sgpr)
2913*61046927SAndroid Build Coastguard Worker          operand_mask |= 1u << i;
2914*61046927SAndroid Build Coastguard Worker       if (info.is_extract() && info.instr->operands[0].getTemp().type() == RegType::sgpr)
2915*61046927SAndroid Build Coastguard Worker          operand_mask |= 1u << i;
2916*61046927SAndroid Build Coastguard Worker    }
2917*61046927SAndroid Build Coastguard Worker    unsigned max_sgprs = 1;
2918*61046927SAndroid Build Coastguard Worker    if (ctx.program->gfx_level >= GFX10 && !is_shift64)
2919*61046927SAndroid Build Coastguard Worker       max_sgprs = 2;
2920*61046927SAndroid Build Coastguard Worker    if (has_literal)
2921*61046927SAndroid Build Coastguard Worker       max_sgprs--;
2922*61046927SAndroid Build Coastguard Worker 
2923*61046927SAndroid Build Coastguard Worker    unsigned num_sgprs = !!sgpr_ids[0] + !!sgpr_ids[1];
2924*61046927SAndroid Build Coastguard Worker 
2925*61046927SAndroid Build Coastguard Worker    /* keep on applying sgprs until there is nothing left to be done */
2926*61046927SAndroid Build Coastguard Worker    while (operand_mask) {
2927*61046927SAndroid Build Coastguard Worker       uint32_t sgpr_idx = 0;
2928*61046927SAndroid Build Coastguard Worker       uint32_t sgpr_info_id = 0;
2929*61046927SAndroid Build Coastguard Worker       uint32_t mask = operand_mask;
2930*61046927SAndroid Build Coastguard Worker       /* choose a sgpr */
2931*61046927SAndroid Build Coastguard Worker       while (mask) {
2932*61046927SAndroid Build Coastguard Worker          unsigned i = u_bit_scan(&mask);
2933*61046927SAndroid Build Coastguard Worker          uint16_t uses = ctx.uses[instr->operands[i].tempId()];
2934*61046927SAndroid Build Coastguard Worker          if (sgpr_info_id == 0 || uses < ctx.uses[sgpr_info_id]) {
2935*61046927SAndroid Build Coastguard Worker             sgpr_idx = i;
2936*61046927SAndroid Build Coastguard Worker             sgpr_info_id = instr->operands[i].tempId();
2937*61046927SAndroid Build Coastguard Worker          }
2938*61046927SAndroid Build Coastguard Worker       }
2939*61046927SAndroid Build Coastguard Worker       operand_mask &= ~(1u << sgpr_idx);
2940*61046927SAndroid Build Coastguard Worker 
2941*61046927SAndroid Build Coastguard Worker       ssa_info& info = ctx.info[sgpr_info_id];
2942*61046927SAndroid Build Coastguard Worker 
2943*61046927SAndroid Build Coastguard Worker       /* Applying two sgprs require making it VOP3, so don't do it unless it's
2944*61046927SAndroid Build Coastguard Worker        * definitively beneficial.
2945*61046927SAndroid Build Coastguard Worker        * TODO: this is too conservative because later the use count could be reduced to 1 */
2946*61046927SAndroid Build Coastguard Worker       if (!info.is_extract() && num_sgprs && ctx.uses[sgpr_info_id] > 1 && !instr->isVOP3() &&
2947*61046927SAndroid Build Coastguard Worker           !instr->isSDWA() && instr->format != Format::VOP3P)
2948*61046927SAndroid Build Coastguard Worker          break;
2949*61046927SAndroid Build Coastguard Worker 
2950*61046927SAndroid Build Coastguard Worker       Temp sgpr = info.is_extract() ? info.instr->operands[0].getTemp() : info.temp;
2951*61046927SAndroid Build Coastguard Worker       bool new_sgpr = sgpr.id() != sgpr_ids[0] && sgpr.id() != sgpr_ids[1];
2952*61046927SAndroid Build Coastguard Worker       if (new_sgpr && num_sgprs >= max_sgprs)
2953*61046927SAndroid Build Coastguard Worker          continue;
2954*61046927SAndroid Build Coastguard Worker 
2955*61046927SAndroid Build Coastguard Worker       if (sgpr_idx == 0)
2956*61046927SAndroid Build Coastguard Worker          instr->format = withoutDPP(instr->format);
2957*61046927SAndroid Build Coastguard Worker 
2958*61046927SAndroid Build Coastguard Worker       if (sgpr_idx == 1 && instr->isDPP())
2959*61046927SAndroid Build Coastguard Worker          continue;
2960*61046927SAndroid Build Coastguard Worker 
2961*61046927SAndroid Build Coastguard Worker       if (sgpr_idx == 0 || instr->isVOP3() || instr->isSDWA() || instr->isVOP3P() ||
2962*61046927SAndroid Build Coastguard Worker           info.is_extract()) {
2963*61046927SAndroid Build Coastguard Worker          /* can_apply_extract() checks SGPR encoding restrictions */
2964*61046927SAndroid Build Coastguard Worker          if (info.is_extract() && can_apply_extract(ctx, instr, sgpr_idx, info))
2965*61046927SAndroid Build Coastguard Worker             apply_extract(ctx, instr, sgpr_idx, info);
2966*61046927SAndroid Build Coastguard Worker          else if (info.is_extract())
2967*61046927SAndroid Build Coastguard Worker             continue;
2968*61046927SAndroid Build Coastguard Worker          instr->operands[sgpr_idx] = Operand(sgpr);
2969*61046927SAndroid Build Coastguard Worker       } else if (can_swap_operands(instr, &instr->opcode) && !instr->valu().opsel[sgpr_idx]) {
2970*61046927SAndroid Build Coastguard Worker          instr->operands[sgpr_idx] = instr->operands[0];
2971*61046927SAndroid Build Coastguard Worker          instr->operands[0] = Operand(sgpr);
2972*61046927SAndroid Build Coastguard Worker          instr->valu().opsel[0].swap(instr->valu().opsel[sgpr_idx]);
2973*61046927SAndroid Build Coastguard Worker          /* swap bits using a 4-entry LUT */
2974*61046927SAndroid Build Coastguard Worker          uint32_t swapped = (0x3120 >> (operand_mask & 0x3)) & 0xf;
2975*61046927SAndroid Build Coastguard Worker          operand_mask = (operand_mask & ~0x3) | swapped;
2976*61046927SAndroid Build Coastguard Worker       } else if (can_use_VOP3(ctx, instr) && !info.is_extract()) {
2977*61046927SAndroid Build Coastguard Worker          instr->format = asVOP3(instr->format);
2978*61046927SAndroid Build Coastguard Worker          instr->operands[sgpr_idx] = Operand(sgpr);
2979*61046927SAndroid Build Coastguard Worker       } else {
2980*61046927SAndroid Build Coastguard Worker          continue;
2981*61046927SAndroid Build Coastguard Worker       }
2982*61046927SAndroid Build Coastguard Worker 
2983*61046927SAndroid Build Coastguard Worker       if (new_sgpr)
2984*61046927SAndroid Build Coastguard Worker          sgpr_ids[num_sgprs++] = sgpr.id();
2985*61046927SAndroid Build Coastguard Worker       ctx.uses[sgpr_info_id]--;
2986*61046927SAndroid Build Coastguard Worker       ctx.uses[sgpr.id()]++;
2987*61046927SAndroid Build Coastguard Worker 
2988*61046927SAndroid Build Coastguard Worker       /* TODO: handle when it's a VGPR */
2989*61046927SAndroid Build Coastguard Worker       if ((ctx.info[sgpr.id()].label & (label_extract | label_temp)) &&
2990*61046927SAndroid Build Coastguard Worker           ctx.info[sgpr.id()].temp.type() == RegType::sgpr)
2991*61046927SAndroid Build Coastguard Worker          operand_mask |= 1u << sgpr_idx;
2992*61046927SAndroid Build Coastguard Worker    }
2993*61046927SAndroid Build Coastguard Worker }
2994*61046927SAndroid Build Coastguard Worker 
2995*61046927SAndroid Build Coastguard Worker bool
interp_can_become_fma(opt_ctx & ctx,aco_ptr<Instruction> & instr)2996*61046927SAndroid Build Coastguard Worker interp_can_become_fma(opt_ctx& ctx, aco_ptr<Instruction>& instr)
2997*61046927SAndroid Build Coastguard Worker {
2998*61046927SAndroid Build Coastguard Worker    if (instr->opcode != aco_opcode::v_interp_p2_f32_inreg)
2999*61046927SAndroid Build Coastguard Worker       return false;
3000*61046927SAndroid Build Coastguard Worker 
3001*61046927SAndroid Build Coastguard Worker    instr->opcode = aco_opcode::v_fma_f32;
3002*61046927SAndroid Build Coastguard Worker    instr->format = Format::VOP3;
3003*61046927SAndroid Build Coastguard Worker    bool dpp_allowed = can_use_DPP(ctx.program->gfx_level, instr, false);
3004*61046927SAndroid Build Coastguard Worker    instr->opcode = aco_opcode::v_interp_p2_f32_inreg;
3005*61046927SAndroid Build Coastguard Worker    instr->format = Format::VINTERP_INREG;
3006*61046927SAndroid Build Coastguard Worker 
3007*61046927SAndroid Build Coastguard Worker    return dpp_allowed;
3008*61046927SAndroid Build Coastguard Worker }
3009*61046927SAndroid Build Coastguard Worker 
3010*61046927SAndroid Build Coastguard Worker void
interp_p2_f32_inreg_to_fma_dpp(aco_ptr<Instruction> & instr)3011*61046927SAndroid Build Coastguard Worker interp_p2_f32_inreg_to_fma_dpp(aco_ptr<Instruction>& instr)
3012*61046927SAndroid Build Coastguard Worker {
3013*61046927SAndroid Build Coastguard Worker    static_assert(sizeof(DPP16_instruction) == sizeof(VINTERP_inreg_instruction),
3014*61046927SAndroid Build Coastguard Worker                  "Invalid instr cast.");
3015*61046927SAndroid Build Coastguard Worker    instr->format = asVOP3(Format::DPP16);
3016*61046927SAndroid Build Coastguard Worker    instr->opcode = aco_opcode::v_fma_f32;
3017*61046927SAndroid Build Coastguard Worker    instr->dpp16().dpp_ctrl = dpp_quad_perm(2, 2, 2, 2);
3018*61046927SAndroid Build Coastguard Worker    instr->dpp16().row_mask = 0xf;
3019*61046927SAndroid Build Coastguard Worker    instr->dpp16().bank_mask = 0xf;
3020*61046927SAndroid Build Coastguard Worker    instr->dpp16().bound_ctrl = 0;
3021*61046927SAndroid Build Coastguard Worker    instr->dpp16().fetch_inactive = 1;
3022*61046927SAndroid Build Coastguard Worker }
3023*61046927SAndroid Build Coastguard Worker 
3024*61046927SAndroid Build Coastguard Worker /* apply omod / clamp modifiers if the def is used only once and the instruction can have modifiers */
3025*61046927SAndroid Build Coastguard Worker bool
apply_omod_clamp(opt_ctx & ctx,aco_ptr<Instruction> & instr)3026*61046927SAndroid Build Coastguard Worker apply_omod_clamp(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3027*61046927SAndroid Build Coastguard Worker {
3028*61046927SAndroid Build Coastguard Worker    if (instr->definitions.empty() || ctx.uses[instr->definitions[0].tempId()] != 1 ||
3029*61046927SAndroid Build Coastguard Worker        !instr_info.can_use_output_modifiers[(int)instr->opcode])
3030*61046927SAndroid Build Coastguard Worker       return false;
3031*61046927SAndroid Build Coastguard Worker 
3032*61046927SAndroid Build Coastguard Worker    bool can_vop3 = can_use_VOP3(ctx, instr);
3033*61046927SAndroid Build Coastguard Worker    bool is_mad_mix =
3034*61046927SAndroid Build Coastguard Worker       instr->opcode == aco_opcode::v_fma_mix_f32 || instr->opcode == aco_opcode::v_fma_mixlo_f16;
3035*61046927SAndroid Build Coastguard Worker    bool needs_vop3 = !instr->isSDWA() && !instr->isVINTERP_INREG() && !is_mad_mix;
3036*61046927SAndroid Build Coastguard Worker    if (needs_vop3 && !can_vop3)
3037*61046927SAndroid Build Coastguard Worker       return false;
3038*61046927SAndroid Build Coastguard Worker 
3039*61046927SAndroid Build Coastguard Worker    /* SDWA omod is GFX9+. */
3040*61046927SAndroid Build Coastguard Worker    bool can_use_omod = (can_vop3 || ctx.program->gfx_level >= GFX9) && !instr->isVOP3P() &&
3041*61046927SAndroid Build Coastguard Worker                        (!instr->isVINTERP_INREG() || interp_can_become_fma(ctx, instr));
3042*61046927SAndroid Build Coastguard Worker 
3043*61046927SAndroid Build Coastguard Worker    ssa_info& def_info = ctx.info[instr->definitions[0].tempId()];
3044*61046927SAndroid Build Coastguard Worker 
3045*61046927SAndroid Build Coastguard Worker    uint64_t omod_labels = label_omod2 | label_omod4 | label_omod5;
3046*61046927SAndroid Build Coastguard Worker    if (!def_info.is_clamp() && !(can_use_omod && (def_info.label & omod_labels)))
3047*61046927SAndroid Build Coastguard Worker       return false;
3048*61046927SAndroid Build Coastguard Worker    /* if the omod/clamp instruction is dead, then the single user of this
3049*61046927SAndroid Build Coastguard Worker     * instruction is a different instruction */
3050*61046927SAndroid Build Coastguard Worker    if (!ctx.uses[def_info.instr->definitions[0].tempId()])
3051*61046927SAndroid Build Coastguard Worker       return false;
3052*61046927SAndroid Build Coastguard Worker 
3053*61046927SAndroid Build Coastguard Worker    if (def_info.instr->definitions[0].bytes() != instr->definitions[0].bytes())
3054*61046927SAndroid Build Coastguard Worker       return false;
3055*61046927SAndroid Build Coastguard Worker 
3056*61046927SAndroid Build Coastguard Worker    /* MADs/FMAs are created later, so we don't have to update the original add */
3057*61046927SAndroid Build Coastguard Worker    assert(!ctx.info[instr->definitions[0].tempId()].is_mad());
3058*61046927SAndroid Build Coastguard Worker 
3059*61046927SAndroid Build Coastguard Worker    if (!def_info.is_clamp() && (instr->valu().clamp || instr->valu().omod))
3060*61046927SAndroid Build Coastguard Worker       return false;
3061*61046927SAndroid Build Coastguard Worker 
3062*61046927SAndroid Build Coastguard Worker    if (needs_vop3)
3063*61046927SAndroid Build Coastguard Worker       instr->format = asVOP3(instr->format);
3064*61046927SAndroid Build Coastguard Worker 
3065*61046927SAndroid Build Coastguard Worker    if (!def_info.is_clamp() && instr->opcode == aco_opcode::v_interp_p2_f32_inreg)
3066*61046927SAndroid Build Coastguard Worker       interp_p2_f32_inreg_to_fma_dpp(instr);
3067*61046927SAndroid Build Coastguard Worker 
3068*61046927SAndroid Build Coastguard Worker    if (def_info.is_omod2())
3069*61046927SAndroid Build Coastguard Worker       instr->valu().omod = 1;
3070*61046927SAndroid Build Coastguard Worker    else if (def_info.is_omod4())
3071*61046927SAndroid Build Coastguard Worker       instr->valu().omod = 2;
3072*61046927SAndroid Build Coastguard Worker    else if (def_info.is_omod5())
3073*61046927SAndroid Build Coastguard Worker       instr->valu().omod = 3;
3074*61046927SAndroid Build Coastguard Worker    else if (def_info.is_clamp())
3075*61046927SAndroid Build Coastguard Worker       instr->valu().clamp = true;
3076*61046927SAndroid Build Coastguard Worker 
3077*61046927SAndroid Build Coastguard Worker    instr->definitions[0].swapTemp(def_info.instr->definitions[0]);
3078*61046927SAndroid Build Coastguard Worker    ctx.info[instr->definitions[0].tempId()].label &= label_clamp | label_insert | label_f2f16;
3079*61046927SAndroid Build Coastguard Worker    ctx.uses[def_info.instr->definitions[0].tempId()]--;
3080*61046927SAndroid Build Coastguard Worker 
3081*61046927SAndroid Build Coastguard Worker    return true;
3082*61046927SAndroid Build Coastguard Worker }
3083*61046927SAndroid Build Coastguard Worker 
3084*61046927SAndroid Build Coastguard Worker /* Combine an p_insert (or p_extract, in some cases) instruction with instr.
3085*61046927SAndroid Build Coastguard Worker  * p_insert(instr(...)) -> instr_insert().
3086*61046927SAndroid Build Coastguard Worker  */
3087*61046927SAndroid Build Coastguard Worker bool
apply_insert(opt_ctx & ctx,aco_ptr<Instruction> & instr)3088*61046927SAndroid Build Coastguard Worker apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3089*61046927SAndroid Build Coastguard Worker {
3090*61046927SAndroid Build Coastguard Worker    if (instr->definitions.empty() || ctx.uses[instr->definitions[0].tempId()] != 1)
3091*61046927SAndroid Build Coastguard Worker       return false;
3092*61046927SAndroid Build Coastguard Worker 
3093*61046927SAndroid Build Coastguard Worker    ssa_info& def_info = ctx.info[instr->definitions[0].tempId()];
3094*61046927SAndroid Build Coastguard Worker    if (!def_info.is_insert())
3095*61046927SAndroid Build Coastguard Worker       return false;
3096*61046927SAndroid Build Coastguard Worker    /* if the insert instruction is dead, then the single user of this
3097*61046927SAndroid Build Coastguard Worker     * instruction is a different instruction */
3098*61046927SAndroid Build Coastguard Worker    if (!ctx.uses[def_info.instr->definitions[0].tempId()])
3099*61046927SAndroid Build Coastguard Worker       return false;
3100*61046927SAndroid Build Coastguard Worker 
3101*61046927SAndroid Build Coastguard Worker    /* MADs/FMAs are created later, so we don't have to update the original add */
3102*61046927SAndroid Build Coastguard Worker    assert(!ctx.info[instr->definitions[0].tempId()].is_mad());
3103*61046927SAndroid Build Coastguard Worker 
3104*61046927SAndroid Build Coastguard Worker    SubdwordSel sel = parse_insert(def_info.instr);
3105*61046927SAndroid Build Coastguard Worker    assert(sel);
3106*61046927SAndroid Build Coastguard Worker 
3107*61046927SAndroid Build Coastguard Worker    if (!can_use_SDWA(ctx.program->gfx_level, instr, true))
3108*61046927SAndroid Build Coastguard Worker       return false;
3109*61046927SAndroid Build Coastguard Worker 
3110*61046927SAndroid Build Coastguard Worker    convert_to_SDWA(ctx.program->gfx_level, instr);
3111*61046927SAndroid Build Coastguard Worker    if (instr->sdwa().dst_sel.size() != 4)
3112*61046927SAndroid Build Coastguard Worker       return false;
3113*61046927SAndroid Build Coastguard Worker    instr->sdwa().dst_sel = sel;
3114*61046927SAndroid Build Coastguard Worker 
3115*61046927SAndroid Build Coastguard Worker    instr->definitions[0].swapTemp(def_info.instr->definitions[0]);
3116*61046927SAndroid Build Coastguard Worker    ctx.info[instr->definitions[0].tempId()].label = 0;
3117*61046927SAndroid Build Coastguard Worker    ctx.uses[def_info.instr->definitions[0].tempId()]--;
3118*61046927SAndroid Build Coastguard Worker 
3119*61046927SAndroid Build Coastguard Worker    return true;
3120*61046927SAndroid Build Coastguard Worker }
3121*61046927SAndroid Build Coastguard Worker 
3122*61046927SAndroid Build Coastguard Worker /* Remove superfluous extract after ds_read like so:
3123*61046927SAndroid Build Coastguard Worker  * p_extract(ds_read_uN(), 0, N, 0) -> ds_read_uN()
3124*61046927SAndroid Build Coastguard Worker  */
3125*61046927SAndroid Build Coastguard Worker bool
apply_ds_extract(opt_ctx & ctx,aco_ptr<Instruction> & extract)3126*61046927SAndroid Build Coastguard Worker apply_ds_extract(opt_ctx& ctx, aco_ptr<Instruction>& extract)
3127*61046927SAndroid Build Coastguard Worker {
3128*61046927SAndroid Build Coastguard Worker    /* Check if p_extract has a usedef operand and is the only user. */
3129*61046927SAndroid Build Coastguard Worker    if (!ctx.info[extract->operands[0].tempId()].is_usedef() ||
3130*61046927SAndroid Build Coastguard Worker        ctx.uses[extract->operands[0].tempId()] > 1)
3131*61046927SAndroid Build Coastguard Worker       return false;
3132*61046927SAndroid Build Coastguard Worker 
3133*61046927SAndroid Build Coastguard Worker    /* Check if the usedef is a DS instruction. */
3134*61046927SAndroid Build Coastguard Worker    Instruction* ds = ctx.info[extract->operands[0].tempId()].instr;
3135*61046927SAndroid Build Coastguard Worker    if (ds->format != Format::DS)
3136*61046927SAndroid Build Coastguard Worker       return false;
3137*61046927SAndroid Build Coastguard Worker 
3138*61046927SAndroid Build Coastguard Worker    unsigned extract_idx = extract->operands[1].constantValue();
3139*61046927SAndroid Build Coastguard Worker    unsigned bits_extracted = extract->operands[2].constantValue();
3140*61046927SAndroid Build Coastguard Worker    unsigned sign_ext = extract->operands[3].constantValue();
3141*61046927SAndroid Build Coastguard Worker    unsigned dst_bitsize = extract->definitions[0].bytes() * 8u;
3142*61046927SAndroid Build Coastguard Worker 
3143*61046927SAndroid Build Coastguard Worker    /* TODO: These are doable, but probably don't occur too often. */
3144*61046927SAndroid Build Coastguard Worker    if (extract_idx || sign_ext || dst_bitsize != 32)
3145*61046927SAndroid Build Coastguard Worker       return false;
3146*61046927SAndroid Build Coastguard Worker 
3147*61046927SAndroid Build Coastguard Worker    unsigned bits_loaded = 0;
3148*61046927SAndroid Build Coastguard Worker    if (ds->opcode == aco_opcode::ds_read_u8 || ds->opcode == aco_opcode::ds_read_u8_d16)
3149*61046927SAndroid Build Coastguard Worker       bits_loaded = 8;
3150*61046927SAndroid Build Coastguard Worker    else if (ds->opcode == aco_opcode::ds_read_u16 || ds->opcode == aco_opcode::ds_read_u16_d16)
3151*61046927SAndroid Build Coastguard Worker       bits_loaded = 16;
3152*61046927SAndroid Build Coastguard Worker    else
3153*61046927SAndroid Build Coastguard Worker       return false;
3154*61046927SAndroid Build Coastguard Worker 
3155*61046927SAndroid Build Coastguard Worker    /* Shrink the DS load if the extracted bit size is smaller. */
3156*61046927SAndroid Build Coastguard Worker    bits_loaded = MIN2(bits_loaded, bits_extracted);
3157*61046927SAndroid Build Coastguard Worker 
3158*61046927SAndroid Build Coastguard Worker    /* Change the DS opcode so it writes the full register. */
3159*61046927SAndroid Build Coastguard Worker    if (bits_loaded == 8)
3160*61046927SAndroid Build Coastguard Worker       ds->opcode = aco_opcode::ds_read_u8;
3161*61046927SAndroid Build Coastguard Worker    else if (bits_loaded == 16)
3162*61046927SAndroid Build Coastguard Worker       ds->opcode = aco_opcode::ds_read_u16;
3163*61046927SAndroid Build Coastguard Worker    else
3164*61046927SAndroid Build Coastguard Worker       unreachable("Forgot to add DS opcode above.");
3165*61046927SAndroid Build Coastguard Worker 
3166*61046927SAndroid Build Coastguard Worker    /* The DS now produces the exact same thing as the extract, remove the extract. */
3167*61046927SAndroid Build Coastguard Worker    std::swap(ds->definitions[0], extract->definitions[0]);
3168*61046927SAndroid Build Coastguard Worker    ctx.uses[extract->definitions[0].tempId()] = 0;
3169*61046927SAndroid Build Coastguard Worker    ctx.info[ds->definitions[0].tempId()].label = 0;
3170*61046927SAndroid Build Coastguard Worker    return true;
3171*61046927SAndroid Build Coastguard Worker }
3172*61046927SAndroid Build Coastguard Worker 
3173*61046927SAndroid Build Coastguard Worker /* v_and(a, v_subbrev_co(0, 0, vcc)) -> v_cndmask(0, a, vcc) */
3174*61046927SAndroid Build Coastguard Worker bool
combine_and_subbrev(opt_ctx & ctx,aco_ptr<Instruction> & instr)3175*61046927SAndroid Build Coastguard Worker combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3176*61046927SAndroid Build Coastguard Worker {
3177*61046927SAndroid Build Coastguard Worker    if (instr->usesModifiers())
3178*61046927SAndroid Build Coastguard Worker       return false;
3179*61046927SAndroid Build Coastguard Worker 
3180*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 2; i++) {
3181*61046927SAndroid Build Coastguard Worker       Instruction* op_instr = follow_operand(ctx, instr->operands[i], true);
3182*61046927SAndroid Build Coastguard Worker       if (op_instr && op_instr->opcode == aco_opcode::v_subbrev_co_u32 &&
3183*61046927SAndroid Build Coastguard Worker           op_instr->operands[0].constantEquals(0) && op_instr->operands[1].constantEquals(0) &&
3184*61046927SAndroid Build Coastguard Worker           !op_instr->usesModifiers()) {
3185*61046927SAndroid Build Coastguard Worker 
3186*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> new_instr;
3187*61046927SAndroid Build Coastguard Worker          if (instr->operands[!i].isTemp() &&
3188*61046927SAndroid Build Coastguard Worker              instr->operands[!i].getTemp().type() == RegType::vgpr) {
3189*61046927SAndroid Build Coastguard Worker             new_instr.reset(create_instruction(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1));
3190*61046927SAndroid Build Coastguard Worker          } else if (ctx.program->gfx_level >= GFX10 ||
3191*61046927SAndroid Build Coastguard Worker                     (instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
3192*61046927SAndroid Build Coastguard Worker             new_instr.reset(
3193*61046927SAndroid Build Coastguard Worker                create_instruction(aco_opcode::v_cndmask_b32, asVOP3(Format::VOP2), 3, 1));
3194*61046927SAndroid Build Coastguard Worker          } else {
3195*61046927SAndroid Build Coastguard Worker             return false;
3196*61046927SAndroid Build Coastguard Worker          }
3197*61046927SAndroid Build Coastguard Worker 
3198*61046927SAndroid Build Coastguard Worker          new_instr->operands[0] = Operand::zero();
3199*61046927SAndroid Build Coastguard Worker          new_instr->operands[1] = instr->operands[!i];
3200*61046927SAndroid Build Coastguard Worker          new_instr->operands[2] = copy_operand(ctx, op_instr->operands[2]);
3201*61046927SAndroid Build Coastguard Worker          new_instr->definitions[0] = instr->definitions[0];
3202*61046927SAndroid Build Coastguard Worker          new_instr->pass_flags = instr->pass_flags;
3203*61046927SAndroid Build Coastguard Worker          instr = std::move(new_instr);
3204*61046927SAndroid Build Coastguard Worker          decrease_uses(ctx, op_instr);
3205*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].label = 0;
3206*61046927SAndroid Build Coastguard Worker          return true;
3207*61046927SAndroid Build Coastguard Worker       }
3208*61046927SAndroid Build Coastguard Worker    }
3209*61046927SAndroid Build Coastguard Worker 
3210*61046927SAndroid Build Coastguard Worker    return false;
3211*61046927SAndroid Build Coastguard Worker }
3212*61046927SAndroid Build Coastguard Worker 
3213*61046927SAndroid Build Coastguard Worker /* v_and(a, not(b)) -> v_bfi_b32(b, 0, a)
3214*61046927SAndroid Build Coastguard Worker  * v_or(a, not(b)) -> v_bfi_b32(b, a, -1)
3215*61046927SAndroid Build Coastguard Worker  */
3216*61046927SAndroid Build Coastguard Worker bool
combine_v_andor_not(opt_ctx & ctx,aco_ptr<Instruction> & instr)3217*61046927SAndroid Build Coastguard Worker combine_v_andor_not(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3218*61046927SAndroid Build Coastguard Worker {
3219*61046927SAndroid Build Coastguard Worker    if (instr->usesModifiers())
3220*61046927SAndroid Build Coastguard Worker       return false;
3221*61046927SAndroid Build Coastguard Worker 
3222*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 2; i++) {
3223*61046927SAndroid Build Coastguard Worker       Instruction* op_instr = follow_operand(ctx, instr->operands[i], true);
3224*61046927SAndroid Build Coastguard Worker       if (op_instr && !op_instr->usesModifiers() &&
3225*61046927SAndroid Build Coastguard Worker           (op_instr->opcode == aco_opcode::v_not_b32 ||
3226*61046927SAndroid Build Coastguard Worker            op_instr->opcode == aco_opcode::s_not_b32)) {
3227*61046927SAndroid Build Coastguard Worker 
3228*61046927SAndroid Build Coastguard Worker          Operand ops[3] = {
3229*61046927SAndroid Build Coastguard Worker             op_instr->operands[0],
3230*61046927SAndroid Build Coastguard Worker             Operand::zero(),
3231*61046927SAndroid Build Coastguard Worker             instr->operands[!i],
3232*61046927SAndroid Build Coastguard Worker          };
3233*61046927SAndroid Build Coastguard Worker          if (instr->opcode == aco_opcode::v_or_b32) {
3234*61046927SAndroid Build Coastguard Worker             ops[1] = instr->operands[!i];
3235*61046927SAndroid Build Coastguard Worker             ops[2] = Operand::c32(-1);
3236*61046927SAndroid Build Coastguard Worker          }
3237*61046927SAndroid Build Coastguard Worker          if (!check_vop3_operands(ctx, 3, ops))
3238*61046927SAndroid Build Coastguard Worker             continue;
3239*61046927SAndroid Build Coastguard Worker 
3240*61046927SAndroid Build Coastguard Worker          Instruction* new_instr = create_instruction(aco_opcode::v_bfi_b32, Format::VOP3, 3, 1);
3241*61046927SAndroid Build Coastguard Worker 
3242*61046927SAndroid Build Coastguard Worker          if (op_instr->operands[0].isTemp())
3243*61046927SAndroid Build Coastguard Worker             ctx.uses[op_instr->operands[0].tempId()]++;
3244*61046927SAndroid Build Coastguard Worker          for (unsigned j = 0; j < 3; j++)
3245*61046927SAndroid Build Coastguard Worker             new_instr->operands[j] = ops[j];
3246*61046927SAndroid Build Coastguard Worker          new_instr->definitions[0] = instr->definitions[0];
3247*61046927SAndroid Build Coastguard Worker          new_instr->pass_flags = instr->pass_flags;
3248*61046927SAndroid Build Coastguard Worker          instr.reset(new_instr);
3249*61046927SAndroid Build Coastguard Worker          decrease_uses(ctx, op_instr);
3250*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].label = 0;
3251*61046927SAndroid Build Coastguard Worker          return true;
3252*61046927SAndroid Build Coastguard Worker       }
3253*61046927SAndroid Build Coastguard Worker    }
3254*61046927SAndroid Build Coastguard Worker 
3255*61046927SAndroid Build Coastguard Worker    return false;
3256*61046927SAndroid Build Coastguard Worker }
3257*61046927SAndroid Build Coastguard Worker 
3258*61046927SAndroid Build Coastguard Worker /* v_add_co(c, s_lshl(a, b)) -> v_mad_u32_u24(a, 1<<b, c)
3259*61046927SAndroid Build Coastguard Worker  * v_add_co(c, v_lshlrev(a, b)) -> v_mad_u32_u24(b, 1<<a, c)
3260*61046927SAndroid Build Coastguard Worker  * v_sub(c, s_lshl(a, b)) -> v_mad_i32_i24(a, -(1<<b), c)
3261*61046927SAndroid Build Coastguard Worker  * v_sub(c, v_lshlrev(a, b)) -> v_mad_i32_i24(b, -(1<<a), c)
3262*61046927SAndroid Build Coastguard Worker  */
3263*61046927SAndroid Build Coastguard Worker bool
combine_add_lshl(opt_ctx & ctx,aco_ptr<Instruction> & instr,bool is_sub)3264*61046927SAndroid Build Coastguard Worker combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr, bool is_sub)
3265*61046927SAndroid Build Coastguard Worker {
3266*61046927SAndroid Build Coastguard Worker    if (instr->usesModifiers())
3267*61046927SAndroid Build Coastguard Worker       return false;
3268*61046927SAndroid Build Coastguard Worker 
3269*61046927SAndroid Build Coastguard Worker    /* Substractions: start at operand 1 to avoid mixup such as
3270*61046927SAndroid Build Coastguard Worker     * turning v_sub(v_lshlrev(a, b), c) into v_mad_i32_i24(b, -(1<<a), c)
3271*61046927SAndroid Build Coastguard Worker     */
3272*61046927SAndroid Build Coastguard Worker    unsigned start_op_idx = is_sub ? 1 : 0;
3273*61046927SAndroid Build Coastguard Worker 
3274*61046927SAndroid Build Coastguard Worker    /* Don't allow 24-bit operands on subtraction because
3275*61046927SAndroid Build Coastguard Worker     * v_mad_i32_i24 applies a sign extension.
3276*61046927SAndroid Build Coastguard Worker     */
3277*61046927SAndroid Build Coastguard Worker    bool allow_24bit = !is_sub;
3278*61046927SAndroid Build Coastguard Worker 
3279*61046927SAndroid Build Coastguard Worker    for (unsigned i = start_op_idx; i < 2; i++) {
3280*61046927SAndroid Build Coastguard Worker       Instruction* op_instr = follow_operand(ctx, instr->operands[i]);
3281*61046927SAndroid Build Coastguard Worker       if (!op_instr)
3282*61046927SAndroid Build Coastguard Worker          continue;
3283*61046927SAndroid Build Coastguard Worker 
3284*61046927SAndroid Build Coastguard Worker       if (op_instr->opcode != aco_opcode::s_lshl_b32 &&
3285*61046927SAndroid Build Coastguard Worker           op_instr->opcode != aco_opcode::v_lshlrev_b32)
3286*61046927SAndroid Build Coastguard Worker          continue;
3287*61046927SAndroid Build Coastguard Worker 
3288*61046927SAndroid Build Coastguard Worker       int shift_op_idx = op_instr->opcode == aco_opcode::s_lshl_b32 ? 1 : 0;
3289*61046927SAndroid Build Coastguard Worker 
3290*61046927SAndroid Build Coastguard Worker       if (op_instr->operands[shift_op_idx].isConstant() &&
3291*61046927SAndroid Build Coastguard Worker           ((allow_24bit && op_instr->operands[!shift_op_idx].is24bit()) ||
3292*61046927SAndroid Build Coastguard Worker            op_instr->operands[!shift_op_idx].is16bit())) {
3293*61046927SAndroid Build Coastguard Worker          uint32_t multiplier = 1 << (op_instr->operands[shift_op_idx].constantValue() % 32u);
3294*61046927SAndroid Build Coastguard Worker          if (is_sub)
3295*61046927SAndroid Build Coastguard Worker             multiplier = -multiplier;
3296*61046927SAndroid Build Coastguard Worker          if (is_sub ? (multiplier < 0xff800000) : (multiplier > 0xffffff))
3297*61046927SAndroid Build Coastguard Worker             continue;
3298*61046927SAndroid Build Coastguard Worker 
3299*61046927SAndroid Build Coastguard Worker          Operand ops[3] = {
3300*61046927SAndroid Build Coastguard Worker             op_instr->operands[!shift_op_idx],
3301*61046927SAndroid Build Coastguard Worker             Operand::c32(multiplier),
3302*61046927SAndroid Build Coastguard Worker             instr->operands[!i],
3303*61046927SAndroid Build Coastguard Worker          };
3304*61046927SAndroid Build Coastguard Worker          if (!check_vop3_operands(ctx, 3, ops))
3305*61046927SAndroid Build Coastguard Worker             return false;
3306*61046927SAndroid Build Coastguard Worker 
3307*61046927SAndroid Build Coastguard Worker          ctx.uses[instr->operands[i].tempId()]--;
3308*61046927SAndroid Build Coastguard Worker 
3309*61046927SAndroid Build Coastguard Worker          aco_opcode mad_op = is_sub ? aco_opcode::v_mad_i32_i24 : aco_opcode::v_mad_u32_u24;
3310*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> new_instr{create_instruction(mad_op, Format::VOP3, 3, 1)};
3311*61046927SAndroid Build Coastguard Worker          for (unsigned op_idx = 0; op_idx < 3; ++op_idx)
3312*61046927SAndroid Build Coastguard Worker             new_instr->operands[op_idx] = ops[op_idx];
3313*61046927SAndroid Build Coastguard Worker          new_instr->definitions[0] = instr->definitions[0];
3314*61046927SAndroid Build Coastguard Worker          new_instr->pass_flags = instr->pass_flags;
3315*61046927SAndroid Build Coastguard Worker          instr = std::move(new_instr);
3316*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].label = 0;
3317*61046927SAndroid Build Coastguard Worker          return true;
3318*61046927SAndroid Build Coastguard Worker       }
3319*61046927SAndroid Build Coastguard Worker    }
3320*61046927SAndroid Build Coastguard Worker 
3321*61046927SAndroid Build Coastguard Worker    return false;
3322*61046927SAndroid Build Coastguard Worker }
3323*61046927SAndroid Build Coastguard Worker 
3324*61046927SAndroid Build Coastguard Worker void
propagate_swizzles(VALU_instruction * instr,bool opsel_lo,bool opsel_hi)3325*61046927SAndroid Build Coastguard Worker propagate_swizzles(VALU_instruction* instr, bool opsel_lo, bool opsel_hi)
3326*61046927SAndroid Build Coastguard Worker {
3327*61046927SAndroid Build Coastguard Worker    /* propagate swizzles which apply to a result down to the instruction's operands:
3328*61046927SAndroid Build Coastguard Worker     * result = a.xy + b.xx -> result.yx = a.yx + b.xx */
3329*61046927SAndroid Build Coastguard Worker    uint8_t tmp_lo = instr->opsel_lo;
3330*61046927SAndroid Build Coastguard Worker    uint8_t tmp_hi = instr->opsel_hi;
3331*61046927SAndroid Build Coastguard Worker    uint8_t neg_lo = instr->neg_lo;
3332*61046927SAndroid Build Coastguard Worker    uint8_t neg_hi = instr->neg_hi;
3333*61046927SAndroid Build Coastguard Worker    if (opsel_lo == 1) {
3334*61046927SAndroid Build Coastguard Worker       instr->opsel_lo = tmp_hi;
3335*61046927SAndroid Build Coastguard Worker       instr->neg_lo = neg_hi;
3336*61046927SAndroid Build Coastguard Worker    }
3337*61046927SAndroid Build Coastguard Worker    if (opsel_hi == 0) {
3338*61046927SAndroid Build Coastguard Worker       instr->opsel_hi = tmp_lo;
3339*61046927SAndroid Build Coastguard Worker       instr->neg_hi = neg_lo;
3340*61046927SAndroid Build Coastguard Worker    }
3341*61046927SAndroid Build Coastguard Worker }
3342*61046927SAndroid Build Coastguard Worker 
3343*61046927SAndroid Build Coastguard Worker void
combine_vop3p(opt_ctx & ctx,aco_ptr<Instruction> & instr)3344*61046927SAndroid Build Coastguard Worker combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3345*61046927SAndroid Build Coastguard Worker {
3346*61046927SAndroid Build Coastguard Worker    VALU_instruction* vop3p = &instr->valu();
3347*61046927SAndroid Build Coastguard Worker 
3348*61046927SAndroid Build Coastguard Worker    /* apply clamp */
3349*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::v_pk_mul_f16 && instr->operands[1].constantEquals(0x3C00) &&
3350*61046927SAndroid Build Coastguard Worker        vop3p->clamp && instr->operands[0].isTemp() && ctx.uses[instr->operands[0].tempId()] == 1 &&
3351*61046927SAndroid Build Coastguard Worker        !vop3p->opsel_lo[1] && !vop3p->opsel_hi[1]) {
3352*61046927SAndroid Build Coastguard Worker 
3353*61046927SAndroid Build Coastguard Worker       ssa_info& info = ctx.info[instr->operands[0].tempId()];
3354*61046927SAndroid Build Coastguard Worker       if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) {
3355*61046927SAndroid Build Coastguard Worker          VALU_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->valu();
3356*61046927SAndroid Build Coastguard Worker          candidate->clamp = true;
3357*61046927SAndroid Build Coastguard Worker          propagate_swizzles(candidate, vop3p->opsel_lo[0], vop3p->opsel_hi[0]);
3358*61046927SAndroid Build Coastguard Worker          instr->definitions[0].swapTemp(candidate->definitions[0]);
3359*61046927SAndroid Build Coastguard Worker          ctx.info[candidate->definitions[0].tempId()].instr = candidate;
3360*61046927SAndroid Build Coastguard Worker          ctx.uses[instr->definitions[0].tempId()]--;
3361*61046927SAndroid Build Coastguard Worker          return;
3362*61046927SAndroid Build Coastguard Worker       }
3363*61046927SAndroid Build Coastguard Worker    }
3364*61046927SAndroid Build Coastguard Worker 
3365*61046927SAndroid Build Coastguard Worker    /* check for fneg modifiers */
3366*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < instr->operands.size(); i++) {
3367*61046927SAndroid Build Coastguard Worker       if (!can_use_input_modifiers(ctx.program->gfx_level, instr->opcode, i))
3368*61046927SAndroid Build Coastguard Worker          continue;
3369*61046927SAndroid Build Coastguard Worker       Operand& op = instr->operands[i];
3370*61046927SAndroid Build Coastguard Worker       if (!op.isTemp())
3371*61046927SAndroid Build Coastguard Worker          continue;
3372*61046927SAndroid Build Coastguard Worker 
3373*61046927SAndroid Build Coastguard Worker       ssa_info& info = ctx.info[op.tempId()];
3374*61046927SAndroid Build Coastguard Worker       if (info.is_vop3p() && info.instr->opcode == aco_opcode::v_pk_mul_f16 &&
3375*61046927SAndroid Build Coastguard Worker           (info.instr->operands[0].constantEquals(0x3C00) ||
3376*61046927SAndroid Build Coastguard Worker            info.instr->operands[1].constantEquals(0x3C00))) {
3377*61046927SAndroid Build Coastguard Worker 
3378*61046927SAndroid Build Coastguard Worker          VALU_instruction* fneg = &info.instr->valu();
3379*61046927SAndroid Build Coastguard Worker 
3380*61046927SAndroid Build Coastguard Worker          unsigned fneg_src = fneg->operands[0].constantEquals(0x3C00);
3381*61046927SAndroid Build Coastguard Worker 
3382*61046927SAndroid Build Coastguard Worker          if (fneg->opsel_lo[1 - fneg_src] || fneg->opsel_hi[1 - fneg_src])
3383*61046927SAndroid Build Coastguard Worker             continue;
3384*61046927SAndroid Build Coastguard Worker 
3385*61046927SAndroid Build Coastguard Worker          Operand ops[3];
3386*61046927SAndroid Build Coastguard Worker          for (unsigned j = 0; j < instr->operands.size(); j++)
3387*61046927SAndroid Build Coastguard Worker             ops[j] = instr->operands[j];
3388*61046927SAndroid Build Coastguard Worker          ops[i] = fneg->operands[fneg_src];
3389*61046927SAndroid Build Coastguard Worker          if (!check_vop3_operands(ctx, instr->operands.size(), ops))
3390*61046927SAndroid Build Coastguard Worker             continue;
3391*61046927SAndroid Build Coastguard Worker 
3392*61046927SAndroid Build Coastguard Worker          if (fneg->clamp)
3393*61046927SAndroid Build Coastguard Worker             continue;
3394*61046927SAndroid Build Coastguard Worker          instr->operands[i] = fneg->operands[fneg_src];
3395*61046927SAndroid Build Coastguard Worker 
3396*61046927SAndroid Build Coastguard Worker          /* opsel_lo/hi is either 0 or 1:
3397*61046927SAndroid Build Coastguard Worker           * if 0 - pick selection from fneg->lo
3398*61046927SAndroid Build Coastguard Worker           * if 1 - pick selection from fneg->hi
3399*61046927SAndroid Build Coastguard Worker           */
3400*61046927SAndroid Build Coastguard Worker          bool opsel_lo = vop3p->opsel_lo[i];
3401*61046927SAndroid Build Coastguard Worker          bool opsel_hi = vop3p->opsel_hi[i];
3402*61046927SAndroid Build Coastguard Worker          bool neg_lo = fneg->neg_lo[0] ^ fneg->neg_lo[1];
3403*61046927SAndroid Build Coastguard Worker          bool neg_hi = fneg->neg_hi[0] ^ fneg->neg_hi[1];
3404*61046927SAndroid Build Coastguard Worker          vop3p->neg_lo[i] ^= opsel_lo ? neg_hi : neg_lo;
3405*61046927SAndroid Build Coastguard Worker          vop3p->neg_hi[i] ^= opsel_hi ? neg_hi : neg_lo;
3406*61046927SAndroid Build Coastguard Worker          vop3p->opsel_lo[i] ^= opsel_lo ? !fneg->opsel_hi[fneg_src] : fneg->opsel_lo[fneg_src];
3407*61046927SAndroid Build Coastguard Worker          vop3p->opsel_hi[i] ^= opsel_hi ? !fneg->opsel_hi[fneg_src] : fneg->opsel_lo[fneg_src];
3408*61046927SAndroid Build Coastguard Worker 
3409*61046927SAndroid Build Coastguard Worker          if (--ctx.uses[fneg->definitions[0].tempId()])
3410*61046927SAndroid Build Coastguard Worker             ctx.uses[fneg->operands[fneg_src].tempId()]++;
3411*61046927SAndroid Build Coastguard Worker       }
3412*61046927SAndroid Build Coastguard Worker    }
3413*61046927SAndroid Build Coastguard Worker 
3414*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::v_pk_add_f16 || instr->opcode == aco_opcode::v_pk_add_u16) {
3415*61046927SAndroid Build Coastguard Worker       bool fadd = instr->opcode == aco_opcode::v_pk_add_f16;
3416*61046927SAndroid Build Coastguard Worker       if (fadd && instr->definitions[0].isPrecise())
3417*61046927SAndroid Build Coastguard Worker          return;
3418*61046927SAndroid Build Coastguard Worker       if (!fadd && instr->valu().clamp)
3419*61046927SAndroid Build Coastguard Worker          return;
3420*61046927SAndroid Build Coastguard Worker 
3421*61046927SAndroid Build Coastguard Worker       Instruction* mul_instr = nullptr;
3422*61046927SAndroid Build Coastguard Worker       unsigned add_op_idx = 0;
3423*61046927SAndroid Build Coastguard Worker       bitarray8 mul_neg_lo = 0, mul_neg_hi = 0, mul_opsel_lo = 0, mul_opsel_hi = 0;
3424*61046927SAndroid Build Coastguard Worker       uint32_t uses = UINT32_MAX;
3425*61046927SAndroid Build Coastguard Worker 
3426*61046927SAndroid Build Coastguard Worker       /* find the 'best' mul instruction to combine with the add */
3427*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < 2; i++) {
3428*61046927SAndroid Build Coastguard Worker          Instruction* op_instr = follow_operand(ctx, instr->operands[i], true);
3429*61046927SAndroid Build Coastguard Worker          if (!op_instr)
3430*61046927SAndroid Build Coastguard Worker             continue;
3431*61046927SAndroid Build Coastguard Worker 
3432*61046927SAndroid Build Coastguard Worker          if (ctx.info[instr->operands[i].tempId()].is_vop3p()) {
3433*61046927SAndroid Build Coastguard Worker             if (fadd) {
3434*61046927SAndroid Build Coastguard Worker                if (op_instr->opcode != aco_opcode::v_pk_mul_f16 ||
3435*61046927SAndroid Build Coastguard Worker                    op_instr->definitions[0].isPrecise())
3436*61046927SAndroid Build Coastguard Worker                   continue;
3437*61046927SAndroid Build Coastguard Worker             } else {
3438*61046927SAndroid Build Coastguard Worker                if (op_instr->opcode != aco_opcode::v_pk_mul_lo_u16)
3439*61046927SAndroid Build Coastguard Worker                   continue;
3440*61046927SAndroid Build Coastguard Worker             }
3441*61046927SAndroid Build Coastguard Worker 
3442*61046927SAndroid Build Coastguard Worker             Operand op[3] = {op_instr->operands[0], op_instr->operands[1], instr->operands[1 - i]};
3443*61046927SAndroid Build Coastguard Worker             if (ctx.uses[instr->operands[i].tempId()] >= uses || !check_vop3_operands(ctx, 3, op))
3444*61046927SAndroid Build Coastguard Worker                continue;
3445*61046927SAndroid Build Coastguard Worker 
3446*61046927SAndroid Build Coastguard Worker             /* no clamp allowed between mul and add */
3447*61046927SAndroid Build Coastguard Worker             if (op_instr->valu().clamp)
3448*61046927SAndroid Build Coastguard Worker                continue;
3449*61046927SAndroid Build Coastguard Worker 
3450*61046927SAndroid Build Coastguard Worker             mul_instr = op_instr;
3451*61046927SAndroid Build Coastguard Worker             add_op_idx = 1 - i;
3452*61046927SAndroid Build Coastguard Worker             uses = ctx.uses[instr->operands[i].tempId()];
3453*61046927SAndroid Build Coastguard Worker             mul_neg_lo = mul_instr->valu().neg_lo;
3454*61046927SAndroid Build Coastguard Worker             mul_neg_hi = mul_instr->valu().neg_hi;
3455*61046927SAndroid Build Coastguard Worker             mul_opsel_lo = mul_instr->valu().opsel_lo;
3456*61046927SAndroid Build Coastguard Worker             mul_opsel_hi = mul_instr->valu().opsel_hi;
3457*61046927SAndroid Build Coastguard Worker          } else if (instr->operands[i].bytes() == 2) {
3458*61046927SAndroid Build Coastguard Worker             if ((fadd && (op_instr->opcode != aco_opcode::v_mul_f16 ||
3459*61046927SAndroid Build Coastguard Worker                           op_instr->definitions[0].isPrecise())) ||
3460*61046927SAndroid Build Coastguard Worker                 (!fadd && op_instr->opcode != aco_opcode::v_mul_lo_u16 &&
3461*61046927SAndroid Build Coastguard Worker                  op_instr->opcode != aco_opcode::v_mul_lo_u16_e64))
3462*61046927SAndroid Build Coastguard Worker                continue;
3463*61046927SAndroid Build Coastguard Worker 
3464*61046927SAndroid Build Coastguard Worker             if (op_instr->valu().clamp || op_instr->valu().omod || op_instr->valu().abs)
3465*61046927SAndroid Build Coastguard Worker                continue;
3466*61046927SAndroid Build Coastguard Worker 
3467*61046927SAndroid Build Coastguard Worker             if (op_instr->isDPP() || (op_instr->isSDWA() && (op_instr->sdwa().sel[0].size() < 2 ||
3468*61046927SAndroid Build Coastguard Worker                                                              op_instr->sdwa().sel[1].size() < 2)))
3469*61046927SAndroid Build Coastguard Worker                continue;
3470*61046927SAndroid Build Coastguard Worker 
3471*61046927SAndroid Build Coastguard Worker             Operand op[3] = {op_instr->operands[0], op_instr->operands[1], instr->operands[1 - i]};
3472*61046927SAndroid Build Coastguard Worker             if (ctx.uses[instr->operands[i].tempId()] >= uses || !check_vop3_operands(ctx, 3, op))
3473*61046927SAndroid Build Coastguard Worker                continue;
3474*61046927SAndroid Build Coastguard Worker 
3475*61046927SAndroid Build Coastguard Worker             mul_instr = op_instr;
3476*61046927SAndroid Build Coastguard Worker             add_op_idx = 1 - i;
3477*61046927SAndroid Build Coastguard Worker             uses = ctx.uses[instr->operands[i].tempId()];
3478*61046927SAndroid Build Coastguard Worker             mul_neg_lo = mul_instr->valu().neg;
3479*61046927SAndroid Build Coastguard Worker             mul_neg_hi = mul_instr->valu().neg;
3480*61046927SAndroid Build Coastguard Worker             if (mul_instr->isSDWA()) {
3481*61046927SAndroid Build Coastguard Worker                for (unsigned j = 0; j < 2; j++)
3482*61046927SAndroid Build Coastguard Worker                   mul_opsel_lo[j] = mul_instr->sdwa().sel[j].offset();
3483*61046927SAndroid Build Coastguard Worker             } else {
3484*61046927SAndroid Build Coastguard Worker                mul_opsel_lo = mul_instr->valu().opsel;
3485*61046927SAndroid Build Coastguard Worker             }
3486*61046927SAndroid Build Coastguard Worker             mul_opsel_hi = mul_opsel_lo;
3487*61046927SAndroid Build Coastguard Worker          }
3488*61046927SAndroid Build Coastguard Worker       }
3489*61046927SAndroid Build Coastguard Worker 
3490*61046927SAndroid Build Coastguard Worker       if (!mul_instr)
3491*61046927SAndroid Build Coastguard Worker          return;
3492*61046927SAndroid Build Coastguard Worker 
3493*61046927SAndroid Build Coastguard Worker       /* turn mul + packed add into v_pk_fma_f16 */
3494*61046927SAndroid Build Coastguard Worker       aco_opcode mad = fadd ? aco_opcode::v_pk_fma_f16 : aco_opcode::v_pk_mad_u16;
3495*61046927SAndroid Build Coastguard Worker       aco_ptr<Instruction> fma{create_instruction(mad, Format::VOP3P, 3, 1)};
3496*61046927SAndroid Build Coastguard Worker       fma->operands[0] = copy_operand(ctx, mul_instr->operands[0]);
3497*61046927SAndroid Build Coastguard Worker       fma->operands[1] = copy_operand(ctx, mul_instr->operands[1]);
3498*61046927SAndroid Build Coastguard Worker       fma->operands[2] = instr->operands[add_op_idx];
3499*61046927SAndroid Build Coastguard Worker       fma->valu().clamp = vop3p->clamp;
3500*61046927SAndroid Build Coastguard Worker       fma->valu().neg_lo = mul_neg_lo;
3501*61046927SAndroid Build Coastguard Worker       fma->valu().neg_hi = mul_neg_hi;
3502*61046927SAndroid Build Coastguard Worker       fma->valu().opsel_lo = mul_opsel_lo;
3503*61046927SAndroid Build Coastguard Worker       fma->valu().opsel_hi = mul_opsel_hi;
3504*61046927SAndroid Build Coastguard Worker       propagate_swizzles(&fma->valu(), vop3p->opsel_lo[1 - add_op_idx],
3505*61046927SAndroid Build Coastguard Worker                          vop3p->opsel_hi[1 - add_op_idx]);
3506*61046927SAndroid Build Coastguard Worker       fma->valu().opsel_lo[2] = vop3p->opsel_lo[add_op_idx];
3507*61046927SAndroid Build Coastguard Worker       fma->valu().opsel_hi[2] = vop3p->opsel_hi[add_op_idx];
3508*61046927SAndroid Build Coastguard Worker       fma->valu().neg_lo[2] = vop3p->neg_lo[add_op_idx];
3509*61046927SAndroid Build Coastguard Worker       fma->valu().neg_hi[2] = vop3p->neg_hi[add_op_idx];
3510*61046927SAndroid Build Coastguard Worker       fma->valu().neg_lo[1] = fma->valu().neg_lo[1] ^ vop3p->neg_lo[1 - add_op_idx];
3511*61046927SAndroid Build Coastguard Worker       fma->valu().neg_hi[1] = fma->valu().neg_hi[1] ^ vop3p->neg_hi[1 - add_op_idx];
3512*61046927SAndroid Build Coastguard Worker       fma->definitions[0] = instr->definitions[0];
3513*61046927SAndroid Build Coastguard Worker       fma->pass_flags = instr->pass_flags;
3514*61046927SAndroid Build Coastguard Worker       instr = std::move(fma);
3515*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].set_vop3p(instr.get());
3516*61046927SAndroid Build Coastguard Worker       decrease_uses(ctx, mul_instr);
3517*61046927SAndroid Build Coastguard Worker       return;
3518*61046927SAndroid Build Coastguard Worker    }
3519*61046927SAndroid Build Coastguard Worker }
3520*61046927SAndroid Build Coastguard Worker 
3521*61046927SAndroid Build Coastguard Worker bool
can_use_mad_mix(opt_ctx & ctx,aco_ptr<Instruction> & instr)3522*61046927SAndroid Build Coastguard Worker can_use_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3523*61046927SAndroid Build Coastguard Worker {
3524*61046927SAndroid Build Coastguard Worker    if (ctx.program->gfx_level < GFX9)
3525*61046927SAndroid Build Coastguard Worker       return false;
3526*61046927SAndroid Build Coastguard Worker 
3527*61046927SAndroid Build Coastguard Worker    /* v_mad_mix* on GFX9 always flushes denormals for 16-bit inputs/outputs */
3528*61046927SAndroid Build Coastguard Worker    if (ctx.program->gfx_level == GFX9 && ctx.fp_mode.denorm16_64)
3529*61046927SAndroid Build Coastguard Worker       return false;
3530*61046927SAndroid Build Coastguard Worker 
3531*61046927SAndroid Build Coastguard Worker    if (instr->valu().omod)
3532*61046927SAndroid Build Coastguard Worker       return false;
3533*61046927SAndroid Build Coastguard Worker 
3534*61046927SAndroid Build Coastguard Worker    switch (instr->opcode) {
3535*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_f32:
3536*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_f32:
3537*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_f32:
3538*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_f32: return !instr->isSDWA() && !instr->isDPP();
3539*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_f32:
3540*61046927SAndroid Build Coastguard Worker       return ctx.program->dev.fused_mad_mix || !instr->definitions[0].isPrecise();
3541*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_mix_f32:
3542*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_mixlo_f16: return true;
3543*61046927SAndroid Build Coastguard Worker    default: return false;
3544*61046927SAndroid Build Coastguard Worker    }
3545*61046927SAndroid Build Coastguard Worker }
3546*61046927SAndroid Build Coastguard Worker 
3547*61046927SAndroid Build Coastguard Worker void
to_mad_mix(opt_ctx & ctx,aco_ptr<Instruction> & instr)3548*61046927SAndroid Build Coastguard Worker to_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3549*61046927SAndroid Build Coastguard Worker {
3550*61046927SAndroid Build Coastguard Worker    ctx.info[instr->definitions[0].tempId()].label &= label_f2f16 | label_clamp | label_mul;
3551*61046927SAndroid Build Coastguard Worker 
3552*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::v_fma_f32) {
3553*61046927SAndroid Build Coastguard Worker       instr->format = (Format)((uint32_t)withoutVOP3(instr->format) | (uint32_t)(Format::VOP3P));
3554*61046927SAndroid Build Coastguard Worker       instr->opcode = aco_opcode::v_fma_mix_f32;
3555*61046927SAndroid Build Coastguard Worker       return;
3556*61046927SAndroid Build Coastguard Worker    }
3557*61046927SAndroid Build Coastguard Worker 
3558*61046927SAndroid Build Coastguard Worker    bool is_add = instr->opcode != aco_opcode::v_mul_f32;
3559*61046927SAndroid Build Coastguard Worker 
3560*61046927SAndroid Build Coastguard Worker    aco_ptr<Instruction> vop3p{create_instruction(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1)};
3561*61046927SAndroid Build Coastguard Worker 
3562*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < instr->operands.size(); i++) {
3563*61046927SAndroid Build Coastguard Worker       vop3p->operands[is_add + i] = instr->operands[i];
3564*61046927SAndroid Build Coastguard Worker       vop3p->valu().neg_lo[is_add + i] = instr->valu().neg[i];
3565*61046927SAndroid Build Coastguard Worker       vop3p->valu().neg_hi[is_add + i] = instr->valu().abs[i];
3566*61046927SAndroid Build Coastguard Worker    }
3567*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::v_mul_f32) {
3568*61046927SAndroid Build Coastguard Worker       vop3p->operands[2] = Operand::zero();
3569*61046927SAndroid Build Coastguard Worker       vop3p->valu().neg_lo[2] = true;
3570*61046927SAndroid Build Coastguard Worker    } else if (is_add) {
3571*61046927SAndroid Build Coastguard Worker       vop3p->operands[0] = Operand::c32(0x3f800000);
3572*61046927SAndroid Build Coastguard Worker       if (instr->opcode == aco_opcode::v_sub_f32)
3573*61046927SAndroid Build Coastguard Worker          vop3p->valu().neg_lo[2] ^= true;
3574*61046927SAndroid Build Coastguard Worker       else if (instr->opcode == aco_opcode::v_subrev_f32)
3575*61046927SAndroid Build Coastguard Worker          vop3p->valu().neg_lo[1] ^= true;
3576*61046927SAndroid Build Coastguard Worker    }
3577*61046927SAndroid Build Coastguard Worker    vop3p->definitions[0] = instr->definitions[0];
3578*61046927SAndroid Build Coastguard Worker    vop3p->valu().clamp = instr->valu().clamp;
3579*61046927SAndroid Build Coastguard Worker    vop3p->pass_flags = instr->pass_flags;
3580*61046927SAndroid Build Coastguard Worker    instr = std::move(vop3p);
3581*61046927SAndroid Build Coastguard Worker 
3582*61046927SAndroid Build Coastguard Worker    if (ctx.info[instr->definitions[0].tempId()].label & label_mul)
3583*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].instr = instr.get();
3584*61046927SAndroid Build Coastguard Worker }
3585*61046927SAndroid Build Coastguard Worker 
3586*61046927SAndroid Build Coastguard Worker bool
combine_output_conversion(opt_ctx & ctx,aco_ptr<Instruction> & instr)3587*61046927SAndroid Build Coastguard Worker combine_output_conversion(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3588*61046927SAndroid Build Coastguard Worker {
3589*61046927SAndroid Build Coastguard Worker    ssa_info& def_info = ctx.info[instr->definitions[0].tempId()];
3590*61046927SAndroid Build Coastguard Worker    if (!def_info.is_f2f16())
3591*61046927SAndroid Build Coastguard Worker       return false;
3592*61046927SAndroid Build Coastguard Worker    Instruction* conv = def_info.instr;
3593*61046927SAndroid Build Coastguard Worker 
3594*61046927SAndroid Build Coastguard Worker    if (!ctx.uses[conv->definitions[0].tempId()] || ctx.uses[instr->definitions[0].tempId()] != 1)
3595*61046927SAndroid Build Coastguard Worker       return false;
3596*61046927SAndroid Build Coastguard Worker 
3597*61046927SAndroid Build Coastguard Worker    if (conv->usesModifiers())
3598*61046927SAndroid Build Coastguard Worker       return false;
3599*61046927SAndroid Build Coastguard Worker 
3600*61046927SAndroid Build Coastguard Worker    if (interp_can_become_fma(ctx, instr))
3601*61046927SAndroid Build Coastguard Worker       interp_p2_f32_inreg_to_fma_dpp(instr);
3602*61046927SAndroid Build Coastguard Worker 
3603*61046927SAndroid Build Coastguard Worker    if (!can_use_mad_mix(ctx, instr))
3604*61046927SAndroid Build Coastguard Worker       return false;
3605*61046927SAndroid Build Coastguard Worker 
3606*61046927SAndroid Build Coastguard Worker    if (!instr->isVOP3P())
3607*61046927SAndroid Build Coastguard Worker       to_mad_mix(ctx, instr);
3608*61046927SAndroid Build Coastguard Worker 
3609*61046927SAndroid Build Coastguard Worker    instr->opcode = aco_opcode::v_fma_mixlo_f16;
3610*61046927SAndroid Build Coastguard Worker    instr->definitions[0].swapTemp(conv->definitions[0]);
3611*61046927SAndroid Build Coastguard Worker    if (conv->definitions[0].isPrecise())
3612*61046927SAndroid Build Coastguard Worker       instr->definitions[0].setPrecise(true);
3613*61046927SAndroid Build Coastguard Worker    ctx.info[instr->definitions[0].tempId()].label &= label_clamp;
3614*61046927SAndroid Build Coastguard Worker    ctx.uses[conv->definitions[0].tempId()]--;
3615*61046927SAndroid Build Coastguard Worker 
3616*61046927SAndroid Build Coastguard Worker    return true;
3617*61046927SAndroid Build Coastguard Worker }
3618*61046927SAndroid Build Coastguard Worker 
3619*61046927SAndroid Build Coastguard Worker void
combine_mad_mix(opt_ctx & ctx,aco_ptr<Instruction> & instr)3620*61046927SAndroid Build Coastguard Worker combine_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3621*61046927SAndroid Build Coastguard Worker {
3622*61046927SAndroid Build Coastguard Worker    if (!can_use_mad_mix(ctx, instr))
3623*61046927SAndroid Build Coastguard Worker       return;
3624*61046927SAndroid Build Coastguard Worker 
3625*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < instr->operands.size(); i++) {
3626*61046927SAndroid Build Coastguard Worker       if (!instr->operands[i].isTemp())
3627*61046927SAndroid Build Coastguard Worker          continue;
3628*61046927SAndroid Build Coastguard Worker       Temp tmp = instr->operands[i].getTemp();
3629*61046927SAndroid Build Coastguard Worker       if (!ctx.info[tmp.id()].is_f2f32())
3630*61046927SAndroid Build Coastguard Worker          continue;
3631*61046927SAndroid Build Coastguard Worker 
3632*61046927SAndroid Build Coastguard Worker       Instruction* conv = ctx.info[tmp.id()].instr;
3633*61046927SAndroid Build Coastguard Worker       if (conv->valu().clamp || conv->valu().omod) {
3634*61046927SAndroid Build Coastguard Worker          continue;
3635*61046927SAndroid Build Coastguard Worker       } else if (conv->isSDWA() &&
3636*61046927SAndroid Build Coastguard Worker                  (conv->sdwa().dst_sel.size() != 4 || conv->sdwa().sel[0].size() != 2)) {
3637*61046927SAndroid Build Coastguard Worker          continue;
3638*61046927SAndroid Build Coastguard Worker       } else if (conv->isDPP()) {
3639*61046927SAndroid Build Coastguard Worker          continue;
3640*61046927SAndroid Build Coastguard Worker       }
3641*61046927SAndroid Build Coastguard Worker 
3642*61046927SAndroid Build Coastguard Worker       if (get_operand_size(instr, i) != 32)
3643*61046927SAndroid Build Coastguard Worker          continue;
3644*61046927SAndroid Build Coastguard Worker 
3645*61046927SAndroid Build Coastguard Worker       /* Conversion to VOP3P will add inline constant operands, but that shouldn't affect
3646*61046927SAndroid Build Coastguard Worker        * check_vop3_operands(). */
3647*61046927SAndroid Build Coastguard Worker       Operand op[3];
3648*61046927SAndroid Build Coastguard Worker       for (unsigned j = 0; j < instr->operands.size(); j++)
3649*61046927SAndroid Build Coastguard Worker          op[j] = instr->operands[j];
3650*61046927SAndroid Build Coastguard Worker       op[i] = conv->operands[0];
3651*61046927SAndroid Build Coastguard Worker       if (!check_vop3_operands(ctx, instr->operands.size(), op))
3652*61046927SAndroid Build Coastguard Worker          continue;
3653*61046927SAndroid Build Coastguard Worker       if (!conv->operands[0].isOfType(RegType::vgpr) && instr->isDPP())
3654*61046927SAndroid Build Coastguard Worker          continue;
3655*61046927SAndroid Build Coastguard Worker 
3656*61046927SAndroid Build Coastguard Worker       if (!instr->isVOP3P()) {
3657*61046927SAndroid Build Coastguard Worker          bool is_add =
3658*61046927SAndroid Build Coastguard Worker             instr->opcode != aco_opcode::v_mul_f32 && instr->opcode != aco_opcode::v_fma_f32;
3659*61046927SAndroid Build Coastguard Worker          to_mad_mix(ctx, instr);
3660*61046927SAndroid Build Coastguard Worker          i += is_add;
3661*61046927SAndroid Build Coastguard Worker       }
3662*61046927SAndroid Build Coastguard Worker 
3663*61046927SAndroid Build Coastguard Worker       if (--ctx.uses[tmp.id()])
3664*61046927SAndroid Build Coastguard Worker          ctx.uses[conv->operands[0].tempId()]++;
3665*61046927SAndroid Build Coastguard Worker       instr->operands[i].setTemp(conv->operands[0].getTemp());
3666*61046927SAndroid Build Coastguard Worker       if (conv->definitions[0].isPrecise())
3667*61046927SAndroid Build Coastguard Worker          instr->definitions[0].setPrecise(true);
3668*61046927SAndroid Build Coastguard Worker       instr->valu().opsel_hi[i] = true;
3669*61046927SAndroid Build Coastguard Worker       if (conv->isSDWA() && conv->sdwa().sel[0].offset() == 2)
3670*61046927SAndroid Build Coastguard Worker          instr->valu().opsel_lo[i] = true;
3671*61046927SAndroid Build Coastguard Worker       else
3672*61046927SAndroid Build Coastguard Worker          instr->valu().opsel_lo[i] = conv->valu().opsel[0];
3673*61046927SAndroid Build Coastguard Worker       bool neg = conv->valu().neg[0];
3674*61046927SAndroid Build Coastguard Worker       bool abs = conv->valu().abs[0];
3675*61046927SAndroid Build Coastguard Worker       if (!instr->valu().abs[i]) {
3676*61046927SAndroid Build Coastguard Worker          instr->valu().neg[i] ^= neg;
3677*61046927SAndroid Build Coastguard Worker          instr->valu().abs[i] = abs;
3678*61046927SAndroid Build Coastguard Worker       }
3679*61046927SAndroid Build Coastguard Worker    }
3680*61046927SAndroid Build Coastguard Worker }
3681*61046927SAndroid Build Coastguard Worker 
3682*61046927SAndroid Build Coastguard Worker // TODO: we could possibly move the whole label_instruction pass to combine_instruction:
3683*61046927SAndroid Build Coastguard Worker // this would mean that we'd have to fix the instruction uses while value propagation
3684*61046927SAndroid Build Coastguard Worker 
3685*61046927SAndroid Build Coastguard Worker /* also returns true for inf */
3686*61046927SAndroid Build Coastguard Worker bool
is_pow_of_two(opt_ctx & ctx,Operand op)3687*61046927SAndroid Build Coastguard Worker is_pow_of_two(opt_ctx& ctx, Operand op)
3688*61046927SAndroid Build Coastguard Worker {
3689*61046927SAndroid Build Coastguard Worker    if (op.isTemp() && ctx.info[op.tempId()].is_constant_or_literal(op.bytes() * 8))
3690*61046927SAndroid Build Coastguard Worker       return is_pow_of_two(ctx, get_constant_op(ctx, ctx.info[op.tempId()], op.bytes() * 8));
3691*61046927SAndroid Build Coastguard Worker    else if (!op.isConstant())
3692*61046927SAndroid Build Coastguard Worker       return false;
3693*61046927SAndroid Build Coastguard Worker 
3694*61046927SAndroid Build Coastguard Worker    uint64_t val = op.constantValue64();
3695*61046927SAndroid Build Coastguard Worker 
3696*61046927SAndroid Build Coastguard Worker    if (op.bytes() == 4) {
3697*61046927SAndroid Build Coastguard Worker       uint32_t exponent = (val & 0x7f800000) >> 23;
3698*61046927SAndroid Build Coastguard Worker       uint32_t fraction = val & 0x007fffff;
3699*61046927SAndroid Build Coastguard Worker       return (exponent >= 127) && (fraction == 0);
3700*61046927SAndroid Build Coastguard Worker    } else if (op.bytes() == 2) {
3701*61046927SAndroid Build Coastguard Worker       uint32_t exponent = (val & 0x7c00) >> 10;
3702*61046927SAndroid Build Coastguard Worker       uint32_t fraction = val & 0x03ff;
3703*61046927SAndroid Build Coastguard Worker       return (exponent >= 15) && (fraction == 0);
3704*61046927SAndroid Build Coastguard Worker    } else {
3705*61046927SAndroid Build Coastguard Worker       assert(op.bytes() == 8);
3706*61046927SAndroid Build Coastguard Worker       uint64_t exponent = (val & UINT64_C(0x7ff0000000000000)) >> 52;
3707*61046927SAndroid Build Coastguard Worker       uint64_t fraction = val & UINT64_C(0x000fffffffffffff);
3708*61046927SAndroid Build Coastguard Worker       return (exponent >= 1023) && (fraction == 0);
3709*61046927SAndroid Build Coastguard Worker    }
3710*61046927SAndroid Build Coastguard Worker }
3711*61046927SAndroid Build Coastguard Worker 
3712*61046927SAndroid Build Coastguard Worker void
combine_instruction(opt_ctx & ctx,aco_ptr<Instruction> & instr)3713*61046927SAndroid Build Coastguard Worker combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
3714*61046927SAndroid Build Coastguard Worker {
3715*61046927SAndroid Build Coastguard Worker    if (instr->definitions.empty() || is_dead(ctx.uses, instr.get()))
3716*61046927SAndroid Build Coastguard Worker       return;
3717*61046927SAndroid Build Coastguard Worker 
3718*61046927SAndroid Build Coastguard Worker    if (instr->isVALU() || instr->isSALU()) {
3719*61046927SAndroid Build Coastguard Worker       /* Apply SDWA. Do this after label_instruction() so it can remove
3720*61046927SAndroid Build Coastguard Worker        * label_extract if not all instructions can take SDWA. */
3721*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < instr->operands.size(); i++) {
3722*61046927SAndroid Build Coastguard Worker          Operand& op = instr->operands[i];
3723*61046927SAndroid Build Coastguard Worker          if (!op.isTemp())
3724*61046927SAndroid Build Coastguard Worker             continue;
3725*61046927SAndroid Build Coastguard Worker          ssa_info& info = ctx.info[op.tempId()];
3726*61046927SAndroid Build Coastguard Worker          if (!info.is_extract())
3727*61046927SAndroid Build Coastguard Worker             continue;
3728*61046927SAndroid Build Coastguard Worker          /* if there are that many uses, there are likely better combinations */
3729*61046927SAndroid Build Coastguard Worker          // TODO: delay applying extract to a point where we know better
3730*61046927SAndroid Build Coastguard Worker          if (ctx.uses[op.tempId()] > 4) {
3731*61046927SAndroid Build Coastguard Worker             info.label &= ~label_extract;
3732*61046927SAndroid Build Coastguard Worker             continue;
3733*61046927SAndroid Build Coastguard Worker          }
3734*61046927SAndroid Build Coastguard Worker          if (info.is_extract() &&
3735*61046927SAndroid Build Coastguard Worker              (info.instr->operands[0].getTemp().type() == RegType::vgpr ||
3736*61046927SAndroid Build Coastguard Worker               instr->operands[i].getTemp().type() == RegType::sgpr) &&
3737*61046927SAndroid Build Coastguard Worker              can_apply_extract(ctx, instr, i, info)) {
3738*61046927SAndroid Build Coastguard Worker             /* Increase use count of the extract's operand if the extract still has uses. */
3739*61046927SAndroid Build Coastguard Worker             apply_extract(ctx, instr, i, info);
3740*61046927SAndroid Build Coastguard Worker             if (--ctx.uses[instr->operands[i].tempId()])
3741*61046927SAndroid Build Coastguard Worker                ctx.uses[info.instr->operands[0].tempId()]++;
3742*61046927SAndroid Build Coastguard Worker             instr->operands[i].setTemp(info.instr->operands[0].getTemp());
3743*61046927SAndroid Build Coastguard Worker          }
3744*61046927SAndroid Build Coastguard Worker       }
3745*61046927SAndroid Build Coastguard Worker    }
3746*61046927SAndroid Build Coastguard Worker 
3747*61046927SAndroid Build Coastguard Worker    if (instr->isVALU()) {
3748*61046927SAndroid Build Coastguard Worker       if (can_apply_sgprs(ctx, instr))
3749*61046927SAndroid Build Coastguard Worker          apply_sgprs(ctx, instr);
3750*61046927SAndroid Build Coastguard Worker       combine_mad_mix(ctx, instr);
3751*61046927SAndroid Build Coastguard Worker       while (apply_omod_clamp(ctx, instr) || combine_output_conversion(ctx, instr))
3752*61046927SAndroid Build Coastguard Worker          ;
3753*61046927SAndroid Build Coastguard Worker       apply_insert(ctx, instr);
3754*61046927SAndroid Build Coastguard Worker    }
3755*61046927SAndroid Build Coastguard Worker 
3756*61046927SAndroid Build Coastguard Worker    if (instr->isVOP3P() && instr->opcode != aco_opcode::v_fma_mix_f32 &&
3757*61046927SAndroid Build Coastguard Worker        instr->opcode != aco_opcode::v_fma_mixlo_f16)
3758*61046927SAndroid Build Coastguard Worker       return combine_vop3p(ctx, instr);
3759*61046927SAndroid Build Coastguard Worker 
3760*61046927SAndroid Build Coastguard Worker    if (instr->isSDWA() || instr->isDPP())
3761*61046927SAndroid Build Coastguard Worker       return;
3762*61046927SAndroid Build Coastguard Worker 
3763*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::p_extract) {
3764*61046927SAndroid Build Coastguard Worker       ssa_info& info = ctx.info[instr->operands[0].tempId()];
3765*61046927SAndroid Build Coastguard Worker       if (info.is_extract() && can_apply_extract(ctx, instr, 0, info)) {
3766*61046927SAndroid Build Coastguard Worker          apply_extract(ctx, instr, 0, info);
3767*61046927SAndroid Build Coastguard Worker          if (--ctx.uses[instr->operands[0].tempId()])
3768*61046927SAndroid Build Coastguard Worker             ctx.uses[info.instr->operands[0].tempId()]++;
3769*61046927SAndroid Build Coastguard Worker          instr->operands[0].setTemp(info.instr->operands[0].getTemp());
3770*61046927SAndroid Build Coastguard Worker       }
3771*61046927SAndroid Build Coastguard Worker 
3772*61046927SAndroid Build Coastguard Worker       apply_ds_extract(ctx, instr);
3773*61046927SAndroid Build Coastguard Worker    }
3774*61046927SAndroid Build Coastguard Worker 
3775*61046927SAndroid Build Coastguard Worker    /* TODO: There are still some peephole optimizations that could be done:
3776*61046927SAndroid Build Coastguard Worker     * - abs(a - b) -> s_absdiff_i32
3777*61046927SAndroid Build Coastguard Worker     * - various patterns for s_bitcmp{0,1}_b32 and s_bitset{0,1}_b32
3778*61046927SAndroid Build Coastguard Worker     * - patterns for v_alignbit_b32 and v_alignbyte_b32
3779*61046927SAndroid Build Coastguard Worker     * These aren't probably too interesting though.
3780*61046927SAndroid Build Coastguard Worker     * There are also patterns for v_cmp_class_f{16,32,64}. This is difficult but
3781*61046927SAndroid Build Coastguard Worker     * probably more useful than the previously mentioned optimizations.
3782*61046927SAndroid Build Coastguard Worker     * The various comparison optimizations also currently only work with 32-bit
3783*61046927SAndroid Build Coastguard Worker     * floats. */
3784*61046927SAndroid Build Coastguard Worker 
3785*61046927SAndroid Build Coastguard Worker    /* neg(mul(a, b)) -> mul(neg(a), b), abs(mul(a, b)) -> mul(abs(a), abs(b)) */
3786*61046927SAndroid Build Coastguard Worker    if ((ctx.info[instr->definitions[0].tempId()].label & (label_neg | label_abs)) &&
3787*61046927SAndroid Build Coastguard Worker        ctx.uses[instr->operands[1].tempId()] == 1) {
3788*61046927SAndroid Build Coastguard Worker       Temp val = ctx.info[instr->definitions[0].tempId()].temp;
3789*61046927SAndroid Build Coastguard Worker 
3790*61046927SAndroid Build Coastguard Worker       if (!ctx.info[val.id()].is_mul())
3791*61046927SAndroid Build Coastguard Worker          return;
3792*61046927SAndroid Build Coastguard Worker 
3793*61046927SAndroid Build Coastguard Worker       Instruction* mul_instr = ctx.info[val.id()].instr;
3794*61046927SAndroid Build Coastguard Worker 
3795*61046927SAndroid Build Coastguard Worker       if (mul_instr->operands[0].isLiteral())
3796*61046927SAndroid Build Coastguard Worker          return;
3797*61046927SAndroid Build Coastguard Worker       if (mul_instr->valu().clamp)
3798*61046927SAndroid Build Coastguard Worker          return;
3799*61046927SAndroid Build Coastguard Worker       if (mul_instr->isSDWA() || mul_instr->isDPP())
3800*61046927SAndroid Build Coastguard Worker          return;
3801*61046927SAndroid Build Coastguard Worker       if (mul_instr->opcode == aco_opcode::v_mul_legacy_f32 &&
3802*61046927SAndroid Build Coastguard Worker           mul_instr->definitions[0].isSZPreserve())
3803*61046927SAndroid Build Coastguard Worker          return;
3804*61046927SAndroid Build Coastguard Worker       if (mul_instr->definitions[0].bytes() != instr->definitions[0].bytes())
3805*61046927SAndroid Build Coastguard Worker          return;
3806*61046927SAndroid Build Coastguard Worker 
3807*61046927SAndroid Build Coastguard Worker       /* convert to mul(neg(a), b), mul(abs(a), abs(b)) or mul(neg(abs(a)), abs(b)) */
3808*61046927SAndroid Build Coastguard Worker       ctx.uses[mul_instr->definitions[0].tempId()]--;
3809*61046927SAndroid Build Coastguard Worker       Definition def = instr->definitions[0];
3810*61046927SAndroid Build Coastguard Worker       bool is_neg = ctx.info[instr->definitions[0].tempId()].is_neg();
3811*61046927SAndroid Build Coastguard Worker       bool is_abs = ctx.info[instr->definitions[0].tempId()].is_abs();
3812*61046927SAndroid Build Coastguard Worker       uint32_t pass_flags = instr->pass_flags;
3813*61046927SAndroid Build Coastguard Worker       Format format = mul_instr->format == Format::VOP2 ? asVOP3(Format::VOP2) : mul_instr->format;
3814*61046927SAndroid Build Coastguard Worker       instr.reset(create_instruction(mul_instr->opcode, format, mul_instr->operands.size(), 1));
3815*61046927SAndroid Build Coastguard Worker       std::copy(mul_instr->operands.cbegin(), mul_instr->operands.cend(), instr->operands.begin());
3816*61046927SAndroid Build Coastguard Worker       instr->pass_flags = pass_flags;
3817*61046927SAndroid Build Coastguard Worker       instr->definitions[0] = def;
3818*61046927SAndroid Build Coastguard Worker       VALU_instruction& new_mul = instr->valu();
3819*61046927SAndroid Build Coastguard Worker       VALU_instruction& mul = mul_instr->valu();
3820*61046927SAndroid Build Coastguard Worker       new_mul.neg = mul.neg;
3821*61046927SAndroid Build Coastguard Worker       new_mul.abs = mul.abs;
3822*61046927SAndroid Build Coastguard Worker       new_mul.omod = mul.omod;
3823*61046927SAndroid Build Coastguard Worker       new_mul.opsel = mul.opsel;
3824*61046927SAndroid Build Coastguard Worker       new_mul.opsel_lo = mul.opsel_lo;
3825*61046927SAndroid Build Coastguard Worker       new_mul.opsel_hi = mul.opsel_hi;
3826*61046927SAndroid Build Coastguard Worker       if (is_abs) {
3827*61046927SAndroid Build Coastguard Worker          new_mul.neg[0] = new_mul.neg[1] = false;
3828*61046927SAndroid Build Coastguard Worker          new_mul.abs[0] = new_mul.abs[1] = true;
3829*61046927SAndroid Build Coastguard Worker       }
3830*61046927SAndroid Build Coastguard Worker       new_mul.neg[0] ^= is_neg;
3831*61046927SAndroid Build Coastguard Worker       new_mul.clamp = false;
3832*61046927SAndroid Build Coastguard Worker 
3833*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].set_mul(instr.get());
3834*61046927SAndroid Build Coastguard Worker       return;
3835*61046927SAndroid Build Coastguard Worker    }
3836*61046927SAndroid Build Coastguard Worker 
3837*61046927SAndroid Build Coastguard Worker    /* combine mul+add -> mad */
3838*61046927SAndroid Build Coastguard Worker    bool is_add_mix =
3839*61046927SAndroid Build Coastguard Worker       (instr->opcode == aco_opcode::v_fma_mix_f32 ||
3840*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_fma_mixlo_f16) &&
3841*61046927SAndroid Build Coastguard Worker       !instr->valu().neg_lo[0] &&
3842*61046927SAndroid Build Coastguard Worker       ((instr->operands[0].constantEquals(0x3f800000) && !instr->valu().opsel_hi[0]) ||
3843*61046927SAndroid Build Coastguard Worker        (instr->operands[0].constantEquals(0x3C00) && instr->valu().opsel_hi[0] &&
3844*61046927SAndroid Build Coastguard Worker         !instr->valu().opsel_lo[0]));
3845*61046927SAndroid Build Coastguard Worker    bool mad32 = instr->opcode == aco_opcode::v_add_f32 || instr->opcode == aco_opcode::v_sub_f32 ||
3846*61046927SAndroid Build Coastguard Worker                 instr->opcode == aco_opcode::v_subrev_f32;
3847*61046927SAndroid Build Coastguard Worker    bool mad16 = instr->opcode == aco_opcode::v_add_f16 || instr->opcode == aco_opcode::v_sub_f16 ||
3848*61046927SAndroid Build Coastguard Worker                 instr->opcode == aco_opcode::v_subrev_f16;
3849*61046927SAndroid Build Coastguard Worker    bool mad64 =
3850*61046927SAndroid Build Coastguard Worker       instr->opcode == aco_opcode::v_add_f64_e64 || instr->opcode == aco_opcode::v_add_f64;
3851*61046927SAndroid Build Coastguard Worker    if (is_add_mix || mad16 || mad32 || mad64) {
3852*61046927SAndroid Build Coastguard Worker       Instruction* mul_instr = nullptr;
3853*61046927SAndroid Build Coastguard Worker       unsigned add_op_idx = 0;
3854*61046927SAndroid Build Coastguard Worker       uint32_t uses = UINT32_MAX;
3855*61046927SAndroid Build Coastguard Worker       bool emit_fma = false;
3856*61046927SAndroid Build Coastguard Worker       /* find the 'best' mul instruction to combine with the add */
3857*61046927SAndroid Build Coastguard Worker       for (unsigned i = is_add_mix ? 1 : 0; i < instr->operands.size(); i++) {
3858*61046927SAndroid Build Coastguard Worker          if (!instr->operands[i].isTemp() || !ctx.info[instr->operands[i].tempId()].is_mul())
3859*61046927SAndroid Build Coastguard Worker             continue;
3860*61046927SAndroid Build Coastguard Worker          ssa_info& info = ctx.info[instr->operands[i].tempId()];
3861*61046927SAndroid Build Coastguard Worker 
3862*61046927SAndroid Build Coastguard Worker          /* no clamp/omod allowed between mul and add */
3863*61046927SAndroid Build Coastguard Worker          if (info.instr->isVOP3() && (info.instr->valu().clamp || info.instr->valu().omod))
3864*61046927SAndroid Build Coastguard Worker             continue;
3865*61046927SAndroid Build Coastguard Worker          if (info.instr->isVOP3P() && info.instr->valu().clamp)
3866*61046927SAndroid Build Coastguard Worker             continue;
3867*61046927SAndroid Build Coastguard Worker          /* v_fma_mix_f32/etc can't do omod */
3868*61046927SAndroid Build Coastguard Worker          if (info.instr->isVOP3P() && instr->isVOP3() && instr->valu().omod)
3869*61046927SAndroid Build Coastguard Worker             continue;
3870*61046927SAndroid Build Coastguard Worker          /* don't promote fp16 to fp32 or remove fp32->fp16->fp32 conversions */
3871*61046927SAndroid Build Coastguard Worker          if (is_add_mix && info.instr->definitions[0].bytes() == 2)
3872*61046927SAndroid Build Coastguard Worker             continue;
3873*61046927SAndroid Build Coastguard Worker 
3874*61046927SAndroid Build Coastguard Worker          if (get_operand_size(instr, i) != info.instr->definitions[0].bytes() * 8)
3875*61046927SAndroid Build Coastguard Worker             continue;
3876*61046927SAndroid Build Coastguard Worker 
3877*61046927SAndroid Build Coastguard Worker          bool legacy = info.instr->opcode == aco_opcode::v_mul_legacy_f32;
3878*61046927SAndroid Build Coastguard Worker          bool mad_mix = is_add_mix || info.instr->isVOP3P();
3879*61046927SAndroid Build Coastguard Worker 
3880*61046927SAndroid Build Coastguard Worker          /* Multiplication by power-of-two should never need rounding. 1/power-of-two also works,
3881*61046927SAndroid Build Coastguard Worker           * but using fma removes denormal flushing (0xfffffe * 0.5 + 0x810001a2).
3882*61046927SAndroid Build Coastguard Worker           */
3883*61046927SAndroid Build Coastguard Worker          bool is_fma_precise = is_pow_of_two(ctx, info.instr->operands[0]) ||
3884*61046927SAndroid Build Coastguard Worker                                is_pow_of_two(ctx, info.instr->operands[1]);
3885*61046927SAndroid Build Coastguard Worker 
3886*61046927SAndroid Build Coastguard Worker          bool has_fma = mad16 || mad64 || (legacy && ctx.program->gfx_level >= GFX10_3) ||
3887*61046927SAndroid Build Coastguard Worker                         (mad32 && !legacy && !mad_mix && ctx.program->dev.has_fast_fma32) ||
3888*61046927SAndroid Build Coastguard Worker                         (mad_mix && ctx.program->dev.fused_mad_mix);
3889*61046927SAndroid Build Coastguard Worker          bool has_mad = mad_mix ? !ctx.program->dev.fused_mad_mix
3890*61046927SAndroid Build Coastguard Worker                                 : ((mad32 && ctx.program->gfx_level < GFX10_3) ||
3891*61046927SAndroid Build Coastguard Worker                                    (mad16 && ctx.program->gfx_level <= GFX9));
3892*61046927SAndroid Build Coastguard Worker          bool can_use_fma =
3893*61046927SAndroid Build Coastguard Worker             has_fma &&
3894*61046927SAndroid Build Coastguard Worker             (!(info.instr->definitions[0].isPrecise() || instr->definitions[0].isPrecise()) ||
3895*61046927SAndroid Build Coastguard Worker              is_fma_precise);
3896*61046927SAndroid Build Coastguard Worker          bool can_use_mad =
3897*61046927SAndroid Build Coastguard Worker             has_mad && (mad_mix || mad32 ? ctx.fp_mode.denorm32 : ctx.fp_mode.denorm16_64) == 0;
3898*61046927SAndroid Build Coastguard Worker          if (mad_mix && legacy)
3899*61046927SAndroid Build Coastguard Worker             continue;
3900*61046927SAndroid Build Coastguard Worker          if (!can_use_fma && !can_use_mad)
3901*61046927SAndroid Build Coastguard Worker             continue;
3902*61046927SAndroid Build Coastguard Worker 
3903*61046927SAndroid Build Coastguard Worker          unsigned candidate_add_op_idx = is_add_mix ? (3 - i) : (1 - i);
3904*61046927SAndroid Build Coastguard Worker          Operand op[3] = {info.instr->operands[0], info.instr->operands[1],
3905*61046927SAndroid Build Coastguard Worker                           instr->operands[candidate_add_op_idx]};
3906*61046927SAndroid Build Coastguard Worker          if (info.instr->isSDWA() || info.instr->isDPP() || !check_vop3_operands(ctx, 3, op) ||
3907*61046927SAndroid Build Coastguard Worker              ctx.uses[instr->operands[i].tempId()] > uses)
3908*61046927SAndroid Build Coastguard Worker             continue;
3909*61046927SAndroid Build Coastguard Worker 
3910*61046927SAndroid Build Coastguard Worker          if (ctx.uses[instr->operands[i].tempId()] == uses) {
3911*61046927SAndroid Build Coastguard Worker             unsigned cur_idx = mul_instr->definitions[0].tempId();
3912*61046927SAndroid Build Coastguard Worker             unsigned new_idx = info.instr->definitions[0].tempId();
3913*61046927SAndroid Build Coastguard Worker             if (cur_idx > new_idx)
3914*61046927SAndroid Build Coastguard Worker                continue;
3915*61046927SAndroid Build Coastguard Worker          }
3916*61046927SAndroid Build Coastguard Worker 
3917*61046927SAndroid Build Coastguard Worker          mul_instr = info.instr;
3918*61046927SAndroid Build Coastguard Worker          add_op_idx = candidate_add_op_idx;
3919*61046927SAndroid Build Coastguard Worker          uses = ctx.uses[instr->operands[i].tempId()];
3920*61046927SAndroid Build Coastguard Worker          emit_fma = !can_use_mad;
3921*61046927SAndroid Build Coastguard Worker       }
3922*61046927SAndroid Build Coastguard Worker 
3923*61046927SAndroid Build Coastguard Worker       if (mul_instr) {
3924*61046927SAndroid Build Coastguard Worker          /* turn mul+add into v_mad/v_fma */
3925*61046927SAndroid Build Coastguard Worker          Operand op[3] = {mul_instr->operands[0], mul_instr->operands[1],
3926*61046927SAndroid Build Coastguard Worker                           instr->operands[add_op_idx]};
3927*61046927SAndroid Build Coastguard Worker          ctx.uses[mul_instr->definitions[0].tempId()]--;
3928*61046927SAndroid Build Coastguard Worker          if (ctx.uses[mul_instr->definitions[0].tempId()]) {
3929*61046927SAndroid Build Coastguard Worker             if (op[0].isTemp())
3930*61046927SAndroid Build Coastguard Worker                ctx.uses[op[0].tempId()]++;
3931*61046927SAndroid Build Coastguard Worker             if (op[1].isTemp())
3932*61046927SAndroid Build Coastguard Worker                ctx.uses[op[1].tempId()]++;
3933*61046927SAndroid Build Coastguard Worker          }
3934*61046927SAndroid Build Coastguard Worker 
3935*61046927SAndroid Build Coastguard Worker          bool neg[3] = {false, false, false};
3936*61046927SAndroid Build Coastguard Worker          bool abs[3] = {false, false, false};
3937*61046927SAndroid Build Coastguard Worker          unsigned omod = 0;
3938*61046927SAndroid Build Coastguard Worker          bool clamp = false;
3939*61046927SAndroid Build Coastguard Worker          bitarray8 opsel_lo = 0;
3940*61046927SAndroid Build Coastguard Worker          bitarray8 opsel_hi = 0;
3941*61046927SAndroid Build Coastguard Worker          bitarray8 opsel = 0;
3942*61046927SAndroid Build Coastguard Worker          unsigned mul_op_idx = (instr->isVOP3P() ? 3 : 1) - add_op_idx;
3943*61046927SAndroid Build Coastguard Worker 
3944*61046927SAndroid Build Coastguard Worker          VALU_instruction& valu_mul = mul_instr->valu();
3945*61046927SAndroid Build Coastguard Worker          neg[0] = valu_mul.neg[0];
3946*61046927SAndroid Build Coastguard Worker          neg[1] = valu_mul.neg[1];
3947*61046927SAndroid Build Coastguard Worker          abs[0] = valu_mul.abs[0];
3948*61046927SAndroid Build Coastguard Worker          abs[1] = valu_mul.abs[1];
3949*61046927SAndroid Build Coastguard Worker          opsel_lo = valu_mul.opsel_lo & 0x3;
3950*61046927SAndroid Build Coastguard Worker          opsel_hi = valu_mul.opsel_hi & 0x3;
3951*61046927SAndroid Build Coastguard Worker          opsel = valu_mul.opsel & 0x3;
3952*61046927SAndroid Build Coastguard Worker 
3953*61046927SAndroid Build Coastguard Worker          VALU_instruction& valu = instr->valu();
3954*61046927SAndroid Build Coastguard Worker          neg[2] = valu.neg[add_op_idx];
3955*61046927SAndroid Build Coastguard Worker          abs[2] = valu.abs[add_op_idx];
3956*61046927SAndroid Build Coastguard Worker          opsel_lo[2] = valu.opsel_lo[add_op_idx];
3957*61046927SAndroid Build Coastguard Worker          opsel_hi[2] = valu.opsel_hi[add_op_idx];
3958*61046927SAndroid Build Coastguard Worker          opsel[2] = valu.opsel[add_op_idx];
3959*61046927SAndroid Build Coastguard Worker          opsel[3] = valu.opsel[3];
3960*61046927SAndroid Build Coastguard Worker          omod = valu.omod;
3961*61046927SAndroid Build Coastguard Worker          clamp = valu.clamp;
3962*61046927SAndroid Build Coastguard Worker          /* abs of the multiplication result */
3963*61046927SAndroid Build Coastguard Worker          if (valu.abs[mul_op_idx]) {
3964*61046927SAndroid Build Coastguard Worker             neg[0] = false;
3965*61046927SAndroid Build Coastguard Worker             neg[1] = false;
3966*61046927SAndroid Build Coastguard Worker             abs[0] = true;
3967*61046927SAndroid Build Coastguard Worker             abs[1] = true;
3968*61046927SAndroid Build Coastguard Worker          }
3969*61046927SAndroid Build Coastguard Worker          /* neg of the multiplication result */
3970*61046927SAndroid Build Coastguard Worker          neg[1] ^= valu.neg[mul_op_idx];
3971*61046927SAndroid Build Coastguard Worker 
3972*61046927SAndroid Build Coastguard Worker          if (instr->opcode == aco_opcode::v_sub_f32 || instr->opcode == aco_opcode::v_sub_f16)
3973*61046927SAndroid Build Coastguard Worker             neg[1 + add_op_idx] = neg[1 + add_op_idx] ^ true;
3974*61046927SAndroid Build Coastguard Worker          else if (instr->opcode == aco_opcode::v_subrev_f32 ||
3975*61046927SAndroid Build Coastguard Worker                   instr->opcode == aco_opcode::v_subrev_f16)
3976*61046927SAndroid Build Coastguard Worker             neg[2 - add_op_idx] = neg[2 - add_op_idx] ^ true;
3977*61046927SAndroid Build Coastguard Worker 
3978*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> add_instr = std::move(instr);
3979*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> mad;
3980*61046927SAndroid Build Coastguard Worker          if (add_instr->isVOP3P() || mul_instr->isVOP3P()) {
3981*61046927SAndroid Build Coastguard Worker             assert(!omod);
3982*61046927SAndroid Build Coastguard Worker             assert(!opsel);
3983*61046927SAndroid Build Coastguard Worker 
3984*61046927SAndroid Build Coastguard Worker             aco_opcode mad_op = add_instr->definitions[0].bytes() == 2 ? aco_opcode::v_fma_mixlo_f16
3985*61046927SAndroid Build Coastguard Worker                                                                        : aco_opcode::v_fma_mix_f32;
3986*61046927SAndroid Build Coastguard Worker             mad.reset(create_instruction(mad_op, Format::VOP3P, 3, 1));
3987*61046927SAndroid Build Coastguard Worker          } else {
3988*61046927SAndroid Build Coastguard Worker             assert(!opsel_lo);
3989*61046927SAndroid Build Coastguard Worker             assert(!opsel_hi);
3990*61046927SAndroid Build Coastguard Worker 
3991*61046927SAndroid Build Coastguard Worker             aco_opcode mad_op = emit_fma ? aco_opcode::v_fma_f32 : aco_opcode::v_mad_f32;
3992*61046927SAndroid Build Coastguard Worker             if (mul_instr->opcode == aco_opcode::v_mul_legacy_f32) {
3993*61046927SAndroid Build Coastguard Worker                assert(emit_fma == (ctx.program->gfx_level >= GFX10_3));
3994*61046927SAndroid Build Coastguard Worker                mad_op = emit_fma ? aco_opcode::v_fma_legacy_f32 : aco_opcode::v_mad_legacy_f32;
3995*61046927SAndroid Build Coastguard Worker             } else if (mad16) {
3996*61046927SAndroid Build Coastguard Worker                mad_op = emit_fma ? (ctx.program->gfx_level == GFX8 ? aco_opcode::v_fma_legacy_f16
3997*61046927SAndroid Build Coastguard Worker                                                                    : aco_opcode::v_fma_f16)
3998*61046927SAndroid Build Coastguard Worker                                  : (ctx.program->gfx_level == GFX8 ? aco_opcode::v_mad_legacy_f16
3999*61046927SAndroid Build Coastguard Worker                                                                    : aco_opcode::v_mad_f16);
4000*61046927SAndroid Build Coastguard Worker             } else if (mad64) {
4001*61046927SAndroid Build Coastguard Worker                mad_op = aco_opcode::v_fma_f64;
4002*61046927SAndroid Build Coastguard Worker             }
4003*61046927SAndroid Build Coastguard Worker 
4004*61046927SAndroid Build Coastguard Worker             mad.reset(create_instruction(mad_op, Format::VOP3, 3, 1));
4005*61046927SAndroid Build Coastguard Worker          }
4006*61046927SAndroid Build Coastguard Worker 
4007*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < 3; i++) {
4008*61046927SAndroid Build Coastguard Worker             mad->operands[i] = op[i];
4009*61046927SAndroid Build Coastguard Worker             mad->valu().neg[i] = neg[i];
4010*61046927SAndroid Build Coastguard Worker             mad->valu().abs[i] = abs[i];
4011*61046927SAndroid Build Coastguard Worker          }
4012*61046927SAndroid Build Coastguard Worker          mad->valu().omod = omod;
4013*61046927SAndroid Build Coastguard Worker          mad->valu().clamp = clamp;
4014*61046927SAndroid Build Coastguard Worker          mad->valu().opsel_lo = opsel_lo;
4015*61046927SAndroid Build Coastguard Worker          mad->valu().opsel_hi = opsel_hi;
4016*61046927SAndroid Build Coastguard Worker          mad->valu().opsel = opsel;
4017*61046927SAndroid Build Coastguard Worker          mad->definitions[0] = add_instr->definitions[0];
4018*61046927SAndroid Build Coastguard Worker          mad->definitions[0].setPrecise(add_instr->definitions[0].isPrecise() ||
4019*61046927SAndroid Build Coastguard Worker                                         mul_instr->definitions[0].isPrecise());
4020*61046927SAndroid Build Coastguard Worker          mad->pass_flags = add_instr->pass_flags;
4021*61046927SAndroid Build Coastguard Worker 
4022*61046927SAndroid Build Coastguard Worker          instr = std::move(mad);
4023*61046927SAndroid Build Coastguard Worker 
4024*61046927SAndroid Build Coastguard Worker          /* mark this ssa_def to be re-checked for profitability and literals */
4025*61046927SAndroid Build Coastguard Worker          ctx.mad_infos.emplace_back(std::move(add_instr), mul_instr->definitions[0].tempId());
4026*61046927SAndroid Build Coastguard Worker          ctx.info[instr->definitions[0].tempId()].set_mad(ctx.mad_infos.size() - 1);
4027*61046927SAndroid Build Coastguard Worker          return;
4028*61046927SAndroid Build Coastguard Worker       }
4029*61046927SAndroid Build Coastguard Worker    }
4030*61046927SAndroid Build Coastguard Worker    /* v_mul_f32(v_cndmask_b32(0, 1.0, cond), a) -> v_cndmask_b32(0, a, cond) */
4031*61046927SAndroid Build Coastguard Worker    else if (((instr->opcode == aco_opcode::v_mul_f32 && !instr->definitions[0].isNaNPreserve() &&
4032*61046927SAndroid Build Coastguard Worker               !instr->definitions[0].isInfPreserve()) ||
4033*61046927SAndroid Build Coastguard Worker              (instr->opcode == aco_opcode::v_mul_legacy_f32 &&
4034*61046927SAndroid Build Coastguard Worker               !instr->definitions[0].isSZPreserve())) &&
4035*61046927SAndroid Build Coastguard Worker             !instr->usesModifiers() && !ctx.fp_mode.must_flush_denorms32) {
4036*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < 2; i++) {
4037*61046927SAndroid Build Coastguard Worker          if (instr->operands[i].isTemp() && ctx.info[instr->operands[i].tempId()].is_b2f() &&
4038*61046927SAndroid Build Coastguard Worker              ctx.uses[instr->operands[i].tempId()] == 1 && instr->operands[!i].isTemp() &&
4039*61046927SAndroid Build Coastguard Worker              instr->operands[!i].getTemp().type() == RegType::vgpr) {
4040*61046927SAndroid Build Coastguard Worker             ctx.uses[instr->operands[i].tempId()]--;
4041*61046927SAndroid Build Coastguard Worker             ctx.uses[ctx.info[instr->operands[i].tempId()].temp.id()]++;
4042*61046927SAndroid Build Coastguard Worker 
4043*61046927SAndroid Build Coastguard Worker             aco_ptr<Instruction> new_instr{
4044*61046927SAndroid Build Coastguard Worker                create_instruction(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1)};
4045*61046927SAndroid Build Coastguard Worker             new_instr->operands[0] = Operand::zero();
4046*61046927SAndroid Build Coastguard Worker             new_instr->operands[1] = instr->operands[!i];
4047*61046927SAndroid Build Coastguard Worker             new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp);
4048*61046927SAndroid Build Coastguard Worker             new_instr->definitions[0] = instr->definitions[0];
4049*61046927SAndroid Build Coastguard Worker             new_instr->pass_flags = instr->pass_flags;
4050*61046927SAndroid Build Coastguard Worker             instr = std::move(new_instr);
4051*61046927SAndroid Build Coastguard Worker             ctx.info[instr->definitions[0].tempId()].label = 0;
4052*61046927SAndroid Build Coastguard Worker             return;
4053*61046927SAndroid Build Coastguard Worker          }
4054*61046927SAndroid Build Coastguard Worker       }
4055*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_or_b32 && ctx.program->gfx_level >= GFX9) {
4056*61046927SAndroid Build Coastguard Worker       if (combine_three_valu_op(ctx, instr, aco_opcode::s_or_b32, aco_opcode::v_or3_b32, "012",
4057*61046927SAndroid Build Coastguard Worker                                 1 | 2)) {
4058*61046927SAndroid Build Coastguard Worker       } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_or_b32, aco_opcode::v_or3_b32,
4059*61046927SAndroid Build Coastguard Worker                                        "012", 1 | 2)) {
4060*61046927SAndroid Build Coastguard Worker       } else if (combine_add_or_then_and_lshl(ctx, instr)) {
4061*61046927SAndroid Build Coastguard Worker       } else if (combine_v_andor_not(ctx, instr)) {
4062*61046927SAndroid Build Coastguard Worker       }
4063*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_xor_b32 && ctx.program->gfx_level >= GFX10) {
4064*61046927SAndroid Build Coastguard Worker       if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xor3_b32, "012",
4065*61046927SAndroid Build Coastguard Worker                                 1 | 2)) {
4066*61046927SAndroid Build Coastguard Worker       } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xor3_b32,
4067*61046927SAndroid Build Coastguard Worker                                        "012", 1 | 2)) {
4068*61046927SAndroid Build Coastguard Worker       } else if (combine_xor_not(ctx, instr)) {
4069*61046927SAndroid Build Coastguard Worker       }
4070*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_not_b32 && ctx.program->gfx_level >= GFX10) {
4071*61046927SAndroid Build Coastguard Worker       combine_not_xor(ctx, instr);
4072*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_add_u16 && !instr->valu().clamp) {
4073*61046927SAndroid Build Coastguard Worker       combine_three_valu_op(
4074*61046927SAndroid Build Coastguard Worker          ctx, instr, aco_opcode::v_mul_lo_u16,
4075*61046927SAndroid Build Coastguard Worker          ctx.program->gfx_level == GFX8 ? aco_opcode::v_mad_legacy_u16 : aco_opcode::v_mad_u16,
4076*61046927SAndroid Build Coastguard Worker          "120", 1 | 2);
4077*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_add_u16_e64 && !instr->valu().clamp) {
4078*61046927SAndroid Build Coastguard Worker       combine_three_valu_op(ctx, instr, aco_opcode::v_mul_lo_u16_e64, aco_opcode::v_mad_u16, "120",
4079*61046927SAndroid Build Coastguard Worker                             1 | 2);
4080*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_add_u32 && !instr->usesModifiers()) {
4081*61046927SAndroid Build Coastguard Worker       if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) {
4082*61046927SAndroid Build Coastguard Worker       } else if (combine_add_bcnt(ctx, instr)) {
4083*61046927SAndroid Build Coastguard Worker       } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24,
4084*61046927SAndroid Build Coastguard Worker                                        aco_opcode::v_mad_u32_u24, "120", 1 | 2)) {
4085*61046927SAndroid Build Coastguard Worker       } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_mul_i32_i24,
4086*61046927SAndroid Build Coastguard Worker                                        aco_opcode::v_mad_i32_i24, "120", 1 | 2)) {
4087*61046927SAndroid Build Coastguard Worker       } else if (ctx.program->gfx_level >= GFX9) {
4088*61046927SAndroid Build Coastguard Worker          if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120",
4089*61046927SAndroid Build Coastguard Worker                                    1 | 2)) {
4090*61046927SAndroid Build Coastguard Worker          } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32,
4091*61046927SAndroid Build Coastguard Worker                                           "120", 1 | 2)) {
4092*61046927SAndroid Build Coastguard Worker          } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_i32, aco_opcode::v_add3_u32,
4093*61046927SAndroid Build Coastguard Worker                                           "012", 1 | 2)) {
4094*61046927SAndroid Build Coastguard Worker          } else if (combine_three_valu_op(ctx, instr, aco_opcode::s_add_u32, aco_opcode::v_add3_u32,
4095*61046927SAndroid Build Coastguard Worker                                           "012", 1 | 2)) {
4096*61046927SAndroid Build Coastguard Worker          } else if (combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add3_u32,
4097*61046927SAndroid Build Coastguard Worker                                           "012", 1 | 2)) {
4098*61046927SAndroid Build Coastguard Worker          } else if (combine_add_or_then_and_lshl(ctx, instr)) {
4099*61046927SAndroid Build Coastguard Worker          }
4100*61046927SAndroid Build Coastguard Worker       }
4101*61046927SAndroid Build Coastguard Worker    } else if ((instr->opcode == aco_opcode::v_add_co_u32 ||
4102*61046927SAndroid Build Coastguard Worker                instr->opcode == aco_opcode::v_add_co_u32_e64) &&
4103*61046927SAndroid Build Coastguard Worker               !instr->usesModifiers()) {
4104*61046927SAndroid Build Coastguard Worker       bool carry_out = ctx.uses[instr->definitions[1].tempId()] > 0;
4105*61046927SAndroid Build Coastguard Worker       if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) {
4106*61046927SAndroid Build Coastguard Worker       } else if (!carry_out && combine_add_bcnt(ctx, instr)) {
4107*61046927SAndroid Build Coastguard Worker       } else if (!carry_out && combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24,
4108*61046927SAndroid Build Coastguard Worker                                                      aco_opcode::v_mad_u32_u24, "120", 1 | 2)) {
4109*61046927SAndroid Build Coastguard Worker       } else if (!carry_out && combine_three_valu_op(ctx, instr, aco_opcode::v_mul_i32_i24,
4110*61046927SAndroid Build Coastguard Worker                                                      aco_opcode::v_mad_i32_i24, "120", 1 | 2)) {
4111*61046927SAndroid Build Coastguard Worker       } else if (!carry_out && combine_add_lshl(ctx, instr, false)) {
4112*61046927SAndroid Build Coastguard Worker       }
4113*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_sub_u32 || instr->opcode == aco_opcode::v_sub_co_u32 ||
4114*61046927SAndroid Build Coastguard Worker               instr->opcode == aco_opcode::v_sub_co_u32_e64) {
4115*61046927SAndroid Build Coastguard Worker       bool carry_out =
4116*61046927SAndroid Build Coastguard Worker          instr->opcode != aco_opcode::v_sub_u32 && ctx.uses[instr->definitions[1].tempId()] > 0;
4117*61046927SAndroid Build Coastguard Worker       if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_subbrev_co_u32, 2)) {
4118*61046927SAndroid Build Coastguard Worker       } else if (!carry_out && combine_add_lshl(ctx, instr, true)) {
4119*61046927SAndroid Build Coastguard Worker       }
4120*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_subrev_u32 ||
4121*61046927SAndroid Build Coastguard Worker               instr->opcode == aco_opcode::v_subrev_co_u32 ||
4122*61046927SAndroid Build Coastguard Worker               instr->opcode == aco_opcode::v_subrev_co_u32_e64) {
4123*61046927SAndroid Build Coastguard Worker       combine_add_sub_b2i(ctx, instr, aco_opcode::v_subbrev_co_u32, 1);
4124*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_lshlrev_b32 && ctx.program->gfx_level >= GFX9) {
4125*61046927SAndroid Build Coastguard Worker       combine_three_valu_op(ctx, instr, aco_opcode::v_add_u32, aco_opcode::v_add_lshl_u32, "120",
4126*61046927SAndroid Build Coastguard Worker                             2);
4127*61046927SAndroid Build Coastguard Worker    } else if ((instr->opcode == aco_opcode::s_add_u32 || instr->opcode == aco_opcode::s_add_i32) &&
4128*61046927SAndroid Build Coastguard Worker               ctx.program->gfx_level >= GFX9) {
4129*61046927SAndroid Build Coastguard Worker       combine_salu_lshl_add(ctx, instr);
4130*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::s_not_b32 || instr->opcode == aco_opcode::s_not_b64) {
4131*61046927SAndroid Build Coastguard Worker       if (!combine_salu_not_bitwise(ctx, instr))
4132*61046927SAndroid Build Coastguard Worker          combine_inverse_comparison(ctx, instr);
4133*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_or_b32 ||
4134*61046927SAndroid Build Coastguard Worker               instr->opcode == aco_opcode::s_and_b64 || instr->opcode == aco_opcode::s_or_b64) {
4135*61046927SAndroid Build Coastguard Worker       combine_salu_n2(ctx, instr);
4136*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::s_abs_i32) {
4137*61046927SAndroid Build Coastguard Worker       combine_sabsdiff(ctx, instr);
4138*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_and_b32) {
4139*61046927SAndroid Build Coastguard Worker       if (combine_and_subbrev(ctx, instr)) {
4140*61046927SAndroid Build Coastguard Worker       } else if (combine_v_andor_not(ctx, instr)) {
4141*61046927SAndroid Build Coastguard Worker       }
4142*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) {
4143*61046927SAndroid Build Coastguard Worker       /* set existing v_fma_f32 with label_mad so we can create v_fmamk_f32/v_fmaak_f32.
4144*61046927SAndroid Build Coastguard Worker        * since ctx.uses[mad_info::mul_temp_id] is always 0, we don't have to worry about
4145*61046927SAndroid Build Coastguard Worker        * select_instruction() using mad_info::add_instr.
4146*61046927SAndroid Build Coastguard Worker        */
4147*61046927SAndroid Build Coastguard Worker       ctx.mad_infos.emplace_back(nullptr, 0);
4148*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].set_mad(ctx.mad_infos.size() - 1);
4149*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::v_med3_f32 || instr->opcode == aco_opcode::v_med3_f16) {
4150*61046927SAndroid Build Coastguard Worker       /* Optimize v_med3 to v_add so that it can be dual issued on GFX11. We start with v_med3 in
4151*61046927SAndroid Build Coastguard Worker        * case omod can be applied.
4152*61046927SAndroid Build Coastguard Worker        */
4153*61046927SAndroid Build Coastguard Worker       unsigned idx;
4154*61046927SAndroid Build Coastguard Worker       if (detect_clamp(instr.get(), &idx)) {
4155*61046927SAndroid Build Coastguard Worker          instr->format = asVOP3(Format::VOP2);
4156*61046927SAndroid Build Coastguard Worker          instr->operands[0] = instr->operands[idx];
4157*61046927SAndroid Build Coastguard Worker          instr->operands[1] = Operand::zero();
4158*61046927SAndroid Build Coastguard Worker          instr->opcode =
4159*61046927SAndroid Build Coastguard Worker             instr->opcode == aco_opcode::v_med3_f32 ? aco_opcode::v_add_f32 : aco_opcode::v_add_f16;
4160*61046927SAndroid Build Coastguard Worker          instr->valu().clamp = true;
4161*61046927SAndroid Build Coastguard Worker          instr->valu().abs = (uint8_t)instr->valu().abs[idx];
4162*61046927SAndroid Build Coastguard Worker          instr->valu().neg = (uint8_t)instr->valu().neg[idx];
4163*61046927SAndroid Build Coastguard Worker          instr->operands.pop_back();
4164*61046927SAndroid Build Coastguard Worker       }
4165*61046927SAndroid Build Coastguard Worker    } else {
4166*61046927SAndroid Build Coastguard Worker       aco_opcode min, max, min3, max3, med3, minmax;
4167*61046927SAndroid Build Coastguard Worker       bool some_gfx9_only;
4168*61046927SAndroid Build Coastguard Worker       if (get_minmax_info(instr->opcode, &min, &max, &min3, &max3, &med3, &minmax,
4169*61046927SAndroid Build Coastguard Worker                           &some_gfx9_only) &&
4170*61046927SAndroid Build Coastguard Worker           (!some_gfx9_only || ctx.program->gfx_level >= GFX9)) {
4171*61046927SAndroid Build Coastguard Worker          if (combine_minmax(ctx, instr, instr->opcode == min ? max : min,
4172*61046927SAndroid Build Coastguard Worker                             instr->opcode == min ? min3 : max3, minmax)) {
4173*61046927SAndroid Build Coastguard Worker          } else {
4174*61046927SAndroid Build Coastguard Worker             combine_clamp(ctx, instr, min, max, med3);
4175*61046927SAndroid Build Coastguard Worker          }
4176*61046927SAndroid Build Coastguard Worker       }
4177*61046927SAndroid Build Coastguard Worker    }
4178*61046927SAndroid Build Coastguard Worker }
4179*61046927SAndroid Build Coastguard Worker 
4180*61046927SAndroid Build Coastguard Worker struct remat_entry {
4181*61046927SAndroid Build Coastguard Worker    Instruction* instr;
4182*61046927SAndroid Build Coastguard Worker    uint32_t block;
4183*61046927SAndroid Build Coastguard Worker };
4184*61046927SAndroid Build Coastguard Worker 
4185*61046927SAndroid Build Coastguard Worker inline bool
is_constant(Instruction * instr)4186*61046927SAndroid Build Coastguard Worker is_constant(Instruction* instr)
4187*61046927SAndroid Build Coastguard Worker {
4188*61046927SAndroid Build Coastguard Worker    if (instr->opcode != aco_opcode::p_parallelcopy || instr->operands.size() != 1)
4189*61046927SAndroid Build Coastguard Worker       return false;
4190*61046927SAndroid Build Coastguard Worker 
4191*61046927SAndroid Build Coastguard Worker    return instr->operands[0].isConstant() && instr->definitions[0].isTemp();
4192*61046927SAndroid Build Coastguard Worker }
4193*61046927SAndroid Build Coastguard Worker 
4194*61046927SAndroid Build Coastguard Worker void
remat_constants_instr(opt_ctx & ctx,aco::map<Temp,remat_entry> & constants,Instruction * instr,uint32_t block_idx)4195*61046927SAndroid Build Coastguard Worker remat_constants_instr(opt_ctx& ctx, aco::map<Temp, remat_entry>& constants, Instruction* instr,
4196*61046927SAndroid Build Coastguard Worker                       uint32_t block_idx)
4197*61046927SAndroid Build Coastguard Worker {
4198*61046927SAndroid Build Coastguard Worker    for (Operand& op : instr->operands) {
4199*61046927SAndroid Build Coastguard Worker       if (!op.isTemp())
4200*61046927SAndroid Build Coastguard Worker          continue;
4201*61046927SAndroid Build Coastguard Worker 
4202*61046927SAndroid Build Coastguard Worker       auto it = constants.find(op.getTemp());
4203*61046927SAndroid Build Coastguard Worker       if (it == constants.end())
4204*61046927SAndroid Build Coastguard Worker          continue;
4205*61046927SAndroid Build Coastguard Worker 
4206*61046927SAndroid Build Coastguard Worker       /* Check if we already emitted the same constant in this block. */
4207*61046927SAndroid Build Coastguard Worker       if (it->second.block != block_idx) {
4208*61046927SAndroid Build Coastguard Worker          /* Rematerialize the constant. */
4209*61046927SAndroid Build Coastguard Worker          Builder bld(ctx.program, &ctx.instructions);
4210*61046927SAndroid Build Coastguard Worker          Operand const_op = it->second.instr->operands[0];
4211*61046927SAndroid Build Coastguard Worker          it->second.instr = bld.copy(bld.def(op.regClass()), const_op);
4212*61046927SAndroid Build Coastguard Worker          it->second.block = block_idx;
4213*61046927SAndroid Build Coastguard Worker          ctx.uses.push_back(0);
4214*61046927SAndroid Build Coastguard Worker          ctx.info.push_back(ctx.info[op.tempId()]);
4215*61046927SAndroid Build Coastguard Worker       }
4216*61046927SAndroid Build Coastguard Worker 
4217*61046927SAndroid Build Coastguard Worker       /* Use the rematerialized constant and update information about latest use. */
4218*61046927SAndroid Build Coastguard Worker       if (op.getTemp() != it->second.instr->definitions[0].getTemp()) {
4219*61046927SAndroid Build Coastguard Worker          ctx.uses[op.tempId()]--;
4220*61046927SAndroid Build Coastguard Worker          op.setTemp(it->second.instr->definitions[0].getTemp());
4221*61046927SAndroid Build Coastguard Worker          ctx.uses[op.tempId()]++;
4222*61046927SAndroid Build Coastguard Worker       }
4223*61046927SAndroid Build Coastguard Worker    }
4224*61046927SAndroid Build Coastguard Worker }
4225*61046927SAndroid Build Coastguard Worker 
4226*61046927SAndroid Build Coastguard Worker /**
4227*61046927SAndroid Build Coastguard Worker  * This pass implements a simple constant rematerialization.
4228*61046927SAndroid Build Coastguard Worker  * As common subexpression elimination (CSE) might increase the live-ranges
4229*61046927SAndroid Build Coastguard Worker  * of loaded constants over large distances, this pass splits the live-ranges
4230*61046927SAndroid Build Coastguard Worker  * again by re-emitting constants in every basic block.
4231*61046927SAndroid Build Coastguard Worker  */
4232*61046927SAndroid Build Coastguard Worker void
rematerialize_constants(opt_ctx & ctx)4233*61046927SAndroid Build Coastguard Worker rematerialize_constants(opt_ctx& ctx)
4234*61046927SAndroid Build Coastguard Worker {
4235*61046927SAndroid Build Coastguard Worker    aco::monotonic_buffer_resource memory(1024);
4236*61046927SAndroid Build Coastguard Worker    aco::map<Temp, remat_entry> constants(memory);
4237*61046927SAndroid Build Coastguard Worker 
4238*61046927SAndroid Build Coastguard Worker    for (Block& block : ctx.program->blocks) {
4239*61046927SAndroid Build Coastguard Worker       if (block.logical_idom == -1)
4240*61046927SAndroid Build Coastguard Worker          continue;
4241*61046927SAndroid Build Coastguard Worker 
4242*61046927SAndroid Build Coastguard Worker       if (block.logical_idom == (int)block.index)
4243*61046927SAndroid Build Coastguard Worker          constants.clear();
4244*61046927SAndroid Build Coastguard Worker 
4245*61046927SAndroid Build Coastguard Worker       ctx.instructions.reserve(block.instructions.size());
4246*61046927SAndroid Build Coastguard Worker 
4247*61046927SAndroid Build Coastguard Worker       for (aco_ptr<Instruction>& instr : block.instructions) {
4248*61046927SAndroid Build Coastguard Worker          if (is_dead(ctx.uses, instr.get()))
4249*61046927SAndroid Build Coastguard Worker             continue;
4250*61046927SAndroid Build Coastguard Worker 
4251*61046927SAndroid Build Coastguard Worker          if (is_constant(instr.get())) {
4252*61046927SAndroid Build Coastguard Worker             Temp tmp = instr->definitions[0].getTemp();
4253*61046927SAndroid Build Coastguard Worker             constants[tmp] = {instr.get(), block.index};
4254*61046927SAndroid Build Coastguard Worker          } else if (!is_phi(instr)) {
4255*61046927SAndroid Build Coastguard Worker             remat_constants_instr(ctx, constants, instr.get(), block.index);
4256*61046927SAndroid Build Coastguard Worker          }
4257*61046927SAndroid Build Coastguard Worker 
4258*61046927SAndroid Build Coastguard Worker          ctx.instructions.emplace_back(instr.release());
4259*61046927SAndroid Build Coastguard Worker       }
4260*61046927SAndroid Build Coastguard Worker 
4261*61046927SAndroid Build Coastguard Worker       block.instructions = std::move(ctx.instructions);
4262*61046927SAndroid Build Coastguard Worker    }
4263*61046927SAndroid Build Coastguard Worker }
4264*61046927SAndroid Build Coastguard Worker 
4265*61046927SAndroid Build Coastguard Worker bool
to_uniform_bool_instr(opt_ctx & ctx,aco_ptr<Instruction> & instr)4266*61046927SAndroid Build Coastguard Worker to_uniform_bool_instr(opt_ctx& ctx, aco_ptr<Instruction>& instr)
4267*61046927SAndroid Build Coastguard Worker {
4268*61046927SAndroid Build Coastguard Worker    /* Check every operand to make sure they are suitable. */
4269*61046927SAndroid Build Coastguard Worker    for (Operand& op : instr->operands) {
4270*61046927SAndroid Build Coastguard Worker       if (!op.isTemp())
4271*61046927SAndroid Build Coastguard Worker          return false;
4272*61046927SAndroid Build Coastguard Worker       if (!ctx.info[op.tempId()].is_uniform_bool() && !ctx.info[op.tempId()].is_uniform_bitwise())
4273*61046927SAndroid Build Coastguard Worker          return false;
4274*61046927SAndroid Build Coastguard Worker    }
4275*61046927SAndroid Build Coastguard Worker 
4276*61046927SAndroid Build Coastguard Worker    switch (instr->opcode) {
4277*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_and_b32:
4278*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_and_b64: instr->opcode = aco_opcode::s_and_b32; break;
4279*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_or_b32:
4280*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_or_b64: instr->opcode = aco_opcode::s_or_b32; break;
4281*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_xor_b32:
4282*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_xor_b64: instr->opcode = aco_opcode::s_absdiff_i32; break;
4283*61046927SAndroid Build Coastguard Worker    default:
4284*61046927SAndroid Build Coastguard Worker       /* Don't transform other instructions. They are very unlikely to appear here. */
4285*61046927SAndroid Build Coastguard Worker       return false;
4286*61046927SAndroid Build Coastguard Worker    }
4287*61046927SAndroid Build Coastguard Worker 
4288*61046927SAndroid Build Coastguard Worker    for (Operand& op : instr->operands) {
4289*61046927SAndroid Build Coastguard Worker       ctx.uses[op.tempId()]--;
4290*61046927SAndroid Build Coastguard Worker 
4291*61046927SAndroid Build Coastguard Worker       if (ctx.info[op.tempId()].is_uniform_bool()) {
4292*61046927SAndroid Build Coastguard Worker          /* Just use the uniform boolean temp. */
4293*61046927SAndroid Build Coastguard Worker          op.setTemp(ctx.info[op.tempId()].temp);
4294*61046927SAndroid Build Coastguard Worker       } else if (ctx.info[op.tempId()].is_uniform_bitwise()) {
4295*61046927SAndroid Build Coastguard Worker          /* Use the SCC definition of the predecessor instruction.
4296*61046927SAndroid Build Coastguard Worker           * This allows the predecessor to get picked up by the same optimization (if it has no
4297*61046927SAndroid Build Coastguard Worker           * divergent users), and it also makes sure that the current instruction will keep working
4298*61046927SAndroid Build Coastguard Worker           * even if the predecessor won't be transformed.
4299*61046927SAndroid Build Coastguard Worker           */
4300*61046927SAndroid Build Coastguard Worker          Instruction* pred_instr = ctx.info[op.tempId()].instr;
4301*61046927SAndroid Build Coastguard Worker          assert(pred_instr->definitions.size() >= 2);
4302*61046927SAndroid Build Coastguard Worker          assert(pred_instr->definitions[1].isFixed() &&
4303*61046927SAndroid Build Coastguard Worker                 pred_instr->definitions[1].physReg() == scc);
4304*61046927SAndroid Build Coastguard Worker          op.setTemp(pred_instr->definitions[1].getTemp());
4305*61046927SAndroid Build Coastguard Worker       } else {
4306*61046927SAndroid Build Coastguard Worker          unreachable("Invalid operand on uniform bitwise instruction.");
4307*61046927SAndroid Build Coastguard Worker       }
4308*61046927SAndroid Build Coastguard Worker 
4309*61046927SAndroid Build Coastguard Worker       ctx.uses[op.tempId()]++;
4310*61046927SAndroid Build Coastguard Worker    }
4311*61046927SAndroid Build Coastguard Worker 
4312*61046927SAndroid Build Coastguard Worker    instr->definitions[0].setTemp(Temp(instr->definitions[0].tempId(), s1));
4313*61046927SAndroid Build Coastguard Worker    ctx.program->temp_rc[instr->definitions[0].tempId()] = s1;
4314*61046927SAndroid Build Coastguard Worker    assert(instr->operands[0].regClass() == s1);
4315*61046927SAndroid Build Coastguard Worker    assert(instr->operands[1].regClass() == s1);
4316*61046927SAndroid Build Coastguard Worker    return true;
4317*61046927SAndroid Build Coastguard Worker }
4318*61046927SAndroid Build Coastguard Worker 
4319*61046927SAndroid Build Coastguard Worker void
select_instruction(opt_ctx & ctx,aco_ptr<Instruction> & instr)4320*61046927SAndroid Build Coastguard Worker select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
4321*61046927SAndroid Build Coastguard Worker {
4322*61046927SAndroid Build Coastguard Worker    const uint32_t threshold = 4;
4323*61046927SAndroid Build Coastguard Worker 
4324*61046927SAndroid Build Coastguard Worker    if (is_dead(ctx.uses, instr.get())) {
4325*61046927SAndroid Build Coastguard Worker       instr.reset();
4326*61046927SAndroid Build Coastguard Worker       return;
4327*61046927SAndroid Build Coastguard Worker    }
4328*61046927SAndroid Build Coastguard Worker 
4329*61046927SAndroid Build Coastguard Worker    /* convert split_vector into a copy or extract_vector if only one definition is ever used */
4330*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::p_split_vector) {
4331*61046927SAndroid Build Coastguard Worker       unsigned num_used = 0;
4332*61046927SAndroid Build Coastguard Worker       unsigned idx = 0;
4333*61046927SAndroid Build Coastguard Worker       unsigned split_offset = 0;
4334*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0, offset = 0; i < instr->definitions.size();
4335*61046927SAndroid Build Coastguard Worker            offset += instr->definitions[i++].bytes()) {
4336*61046927SAndroid Build Coastguard Worker          if (ctx.uses[instr->definitions[i].tempId()]) {
4337*61046927SAndroid Build Coastguard Worker             num_used++;
4338*61046927SAndroid Build Coastguard Worker             idx = i;
4339*61046927SAndroid Build Coastguard Worker             split_offset = offset;
4340*61046927SAndroid Build Coastguard Worker          }
4341*61046927SAndroid Build Coastguard Worker       }
4342*61046927SAndroid Build Coastguard Worker       bool done = false;
4343*61046927SAndroid Build Coastguard Worker       if (num_used == 1 && ctx.info[instr->operands[0].tempId()].is_vec() &&
4344*61046927SAndroid Build Coastguard Worker           ctx.uses[instr->operands[0].tempId()] == 1) {
4345*61046927SAndroid Build Coastguard Worker          Instruction* vec = ctx.info[instr->operands[0].tempId()].instr;
4346*61046927SAndroid Build Coastguard Worker 
4347*61046927SAndroid Build Coastguard Worker          unsigned off = 0;
4348*61046927SAndroid Build Coastguard Worker          Operand op;
4349*61046927SAndroid Build Coastguard Worker          for (Operand& vec_op : vec->operands) {
4350*61046927SAndroid Build Coastguard Worker             if (off == split_offset) {
4351*61046927SAndroid Build Coastguard Worker                op = vec_op;
4352*61046927SAndroid Build Coastguard Worker                break;
4353*61046927SAndroid Build Coastguard Worker             }
4354*61046927SAndroid Build Coastguard Worker             off += vec_op.bytes();
4355*61046927SAndroid Build Coastguard Worker          }
4356*61046927SAndroid Build Coastguard Worker          if (off != instr->operands[0].bytes() && op.bytes() == instr->definitions[idx].bytes()) {
4357*61046927SAndroid Build Coastguard Worker             ctx.uses[instr->operands[0].tempId()]--;
4358*61046927SAndroid Build Coastguard Worker             for (Operand& vec_op : vec->operands) {
4359*61046927SAndroid Build Coastguard Worker                if (vec_op.isTemp())
4360*61046927SAndroid Build Coastguard Worker                   ctx.uses[vec_op.tempId()]--;
4361*61046927SAndroid Build Coastguard Worker             }
4362*61046927SAndroid Build Coastguard Worker             if (op.isTemp())
4363*61046927SAndroid Build Coastguard Worker                ctx.uses[op.tempId()]++;
4364*61046927SAndroid Build Coastguard Worker 
4365*61046927SAndroid Build Coastguard Worker             aco_ptr<Instruction> copy{
4366*61046927SAndroid Build Coastguard Worker                create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, 1, 1)};
4367*61046927SAndroid Build Coastguard Worker             copy->operands[0] = op;
4368*61046927SAndroid Build Coastguard Worker             copy->definitions[0] = instr->definitions[idx];
4369*61046927SAndroid Build Coastguard Worker             instr = std::move(copy);
4370*61046927SAndroid Build Coastguard Worker 
4371*61046927SAndroid Build Coastguard Worker             done = true;
4372*61046927SAndroid Build Coastguard Worker          }
4373*61046927SAndroid Build Coastguard Worker       }
4374*61046927SAndroid Build Coastguard Worker 
4375*61046927SAndroid Build Coastguard Worker       if (!done && num_used == 1 &&
4376*61046927SAndroid Build Coastguard Worker           instr->operands[0].bytes() % instr->definitions[idx].bytes() == 0 &&
4377*61046927SAndroid Build Coastguard Worker           split_offset % instr->definitions[idx].bytes() == 0) {
4378*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> extract{
4379*61046927SAndroid Build Coastguard Worker             create_instruction(aco_opcode::p_extract_vector, Format::PSEUDO, 2, 1)};
4380*61046927SAndroid Build Coastguard Worker          extract->operands[0] = instr->operands[0];
4381*61046927SAndroid Build Coastguard Worker          extract->operands[1] =
4382*61046927SAndroid Build Coastguard Worker             Operand::c32((uint32_t)split_offset / instr->definitions[idx].bytes());
4383*61046927SAndroid Build Coastguard Worker          extract->definitions[0] = instr->definitions[idx];
4384*61046927SAndroid Build Coastguard Worker          instr = std::move(extract);
4385*61046927SAndroid Build Coastguard Worker       }
4386*61046927SAndroid Build Coastguard Worker    }
4387*61046927SAndroid Build Coastguard Worker 
4388*61046927SAndroid Build Coastguard Worker    mad_info* mad_info = NULL;
4389*61046927SAndroid Build Coastguard Worker    if (!instr->definitions.empty() && ctx.info[instr->definitions[0].tempId()].is_mad()) {
4390*61046927SAndroid Build Coastguard Worker       mad_info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].val];
4391*61046927SAndroid Build Coastguard Worker       /* re-check mad instructions */
4392*61046927SAndroid Build Coastguard Worker       if (ctx.uses[mad_info->mul_temp_id] && mad_info->add_instr) {
4393*61046927SAndroid Build Coastguard Worker          ctx.uses[mad_info->mul_temp_id]++;
4394*61046927SAndroid Build Coastguard Worker          if (instr->operands[0].isTemp())
4395*61046927SAndroid Build Coastguard Worker             ctx.uses[instr->operands[0].tempId()]--;
4396*61046927SAndroid Build Coastguard Worker          if (instr->operands[1].isTemp())
4397*61046927SAndroid Build Coastguard Worker             ctx.uses[instr->operands[1].tempId()]--;
4398*61046927SAndroid Build Coastguard Worker          instr.swap(mad_info->add_instr);
4399*61046927SAndroid Build Coastguard Worker          mad_info = NULL;
4400*61046927SAndroid Build Coastguard Worker       }
4401*61046927SAndroid Build Coastguard Worker       /* check literals */
4402*61046927SAndroid Build Coastguard Worker       else if (!instr->isDPP() && !instr->isVOP3P() && instr->opcode != aco_opcode::v_fma_f64 &&
4403*61046927SAndroid Build Coastguard Worker                instr->opcode != aco_opcode::v_mad_legacy_f32 &&
4404*61046927SAndroid Build Coastguard Worker                instr->opcode != aco_opcode::v_fma_legacy_f32) {
4405*61046927SAndroid Build Coastguard Worker          /* FMA can only take literals on GFX10+ */
4406*61046927SAndroid Build Coastguard Worker          if ((instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) &&
4407*61046927SAndroid Build Coastguard Worker              ctx.program->gfx_level < GFX10)
4408*61046927SAndroid Build Coastguard Worker             return;
4409*61046927SAndroid Build Coastguard Worker          /* There are no v_fmaak_legacy_f16/v_fmamk_legacy_f16 and on chips where VOP3 can take
4410*61046927SAndroid Build Coastguard Worker           * literals (GFX10+), these instructions don't exist.
4411*61046927SAndroid Build Coastguard Worker           */
4412*61046927SAndroid Build Coastguard Worker          if (instr->opcode == aco_opcode::v_fma_legacy_f16)
4413*61046927SAndroid Build Coastguard Worker             return;
4414*61046927SAndroid Build Coastguard Worker 
4415*61046927SAndroid Build Coastguard Worker          uint32_t literal_mask = 0;
4416*61046927SAndroid Build Coastguard Worker          uint32_t fp16_mask = 0;
4417*61046927SAndroid Build Coastguard Worker          uint32_t sgpr_mask = 0;
4418*61046927SAndroid Build Coastguard Worker          uint32_t vgpr_mask = 0;
4419*61046927SAndroid Build Coastguard Worker          uint32_t literal_uses = UINT32_MAX;
4420*61046927SAndroid Build Coastguard Worker          uint32_t literal_value = 0;
4421*61046927SAndroid Build Coastguard Worker 
4422*61046927SAndroid Build Coastguard Worker          /* Iterate in reverse to prefer v_madak/v_fmaak. */
4423*61046927SAndroid Build Coastguard Worker          for (int i = 2; i >= 0; i--) {
4424*61046927SAndroid Build Coastguard Worker             Operand& op = instr->operands[i];
4425*61046927SAndroid Build Coastguard Worker             if (!op.isTemp())
4426*61046927SAndroid Build Coastguard Worker                continue;
4427*61046927SAndroid Build Coastguard Worker             if (ctx.info[op.tempId()].is_literal(get_operand_size(instr, i))) {
4428*61046927SAndroid Build Coastguard Worker                uint32_t new_literal = ctx.info[op.tempId()].val;
4429*61046927SAndroid Build Coastguard Worker                float value = uif(new_literal);
4430*61046927SAndroid Build Coastguard Worker                uint16_t fp16_val = _mesa_float_to_half(value);
4431*61046927SAndroid Build Coastguard Worker                bool is_denorm = (fp16_val & 0x7fff) != 0 && (fp16_val & 0x7fff) <= 0x3ff;
4432*61046927SAndroid Build Coastguard Worker                if (_mesa_half_to_float(fp16_val) == value &&
4433*61046927SAndroid Build Coastguard Worker                    (!is_denorm || (ctx.fp_mode.denorm16_64 & fp_denorm_keep_in)))
4434*61046927SAndroid Build Coastguard Worker                   fp16_mask |= 1 << i;
4435*61046927SAndroid Build Coastguard Worker 
4436*61046927SAndroid Build Coastguard Worker                if (!literal_mask || literal_value == new_literal) {
4437*61046927SAndroid Build Coastguard Worker                   literal_value = new_literal;
4438*61046927SAndroid Build Coastguard Worker                   literal_uses = MIN2(literal_uses, ctx.uses[op.tempId()]);
4439*61046927SAndroid Build Coastguard Worker                   literal_mask |= 1 << i;
4440*61046927SAndroid Build Coastguard Worker                   continue;
4441*61046927SAndroid Build Coastguard Worker                }
4442*61046927SAndroid Build Coastguard Worker             }
4443*61046927SAndroid Build Coastguard Worker             sgpr_mask |= op.isOfType(RegType::sgpr) << i;
4444*61046927SAndroid Build Coastguard Worker             vgpr_mask |= op.isOfType(RegType::vgpr) << i;
4445*61046927SAndroid Build Coastguard Worker          }
4446*61046927SAndroid Build Coastguard Worker 
4447*61046927SAndroid Build Coastguard Worker          /* The constant bus limitations before GFX10 disallows SGPRs. */
4448*61046927SAndroid Build Coastguard Worker          if (sgpr_mask && ctx.program->gfx_level < GFX10)
4449*61046927SAndroid Build Coastguard Worker             literal_mask = 0;
4450*61046927SAndroid Build Coastguard Worker 
4451*61046927SAndroid Build Coastguard Worker          /* Encoding needs a vgpr. */
4452*61046927SAndroid Build Coastguard Worker          if (!vgpr_mask)
4453*61046927SAndroid Build Coastguard Worker             literal_mask = 0;
4454*61046927SAndroid Build Coastguard Worker 
4455*61046927SAndroid Build Coastguard Worker          /* v_madmk/v_fmamk needs a vgpr in the third source. */
4456*61046927SAndroid Build Coastguard Worker          if (!(literal_mask & 0b100) && !(vgpr_mask & 0b100))
4457*61046927SAndroid Build Coastguard Worker             literal_mask = 0;
4458*61046927SAndroid Build Coastguard Worker 
4459*61046927SAndroid Build Coastguard Worker          /* opsel with GFX11+ is the only modifier supported by fmamk/fmaak*/
4460*61046927SAndroid Build Coastguard Worker          if (instr->valu().abs || instr->valu().neg || instr->valu().omod || instr->valu().clamp ||
4461*61046927SAndroid Build Coastguard Worker              (instr->valu().opsel && ctx.program->gfx_level < GFX11))
4462*61046927SAndroid Build Coastguard Worker             literal_mask = 0;
4463*61046927SAndroid Build Coastguard Worker 
4464*61046927SAndroid Build Coastguard Worker          if (instr->valu().opsel & ~vgpr_mask)
4465*61046927SAndroid Build Coastguard Worker             literal_mask = 0;
4466*61046927SAndroid Build Coastguard Worker 
4467*61046927SAndroid Build Coastguard Worker          /* We can't use three unique fp16 literals */
4468*61046927SAndroid Build Coastguard Worker          if (fp16_mask == 0b111)
4469*61046927SAndroid Build Coastguard Worker             fp16_mask = 0b11;
4470*61046927SAndroid Build Coastguard Worker 
4471*61046927SAndroid Build Coastguard Worker          if ((instr->opcode == aco_opcode::v_fma_f32 ||
4472*61046927SAndroid Build Coastguard Worker               (instr->opcode == aco_opcode::v_mad_f32 && !instr->definitions[0].isPrecise())) &&
4473*61046927SAndroid Build Coastguard Worker              !instr->valu().omod && ctx.program->gfx_level >= GFX10 &&
4474*61046927SAndroid Build Coastguard Worker              util_bitcount(fp16_mask) > std::max<uint32_t>(util_bitcount(literal_mask), 1)) {
4475*61046927SAndroid Build Coastguard Worker             assert(ctx.program->dev.fused_mad_mix);
4476*61046927SAndroid Build Coastguard Worker             u_foreach_bit (i, fp16_mask)
4477*61046927SAndroid Build Coastguard Worker                ctx.uses[instr->operands[i].tempId()]--;
4478*61046927SAndroid Build Coastguard Worker             mad_info->fp16_mask = fp16_mask;
4479*61046927SAndroid Build Coastguard Worker             return;
4480*61046927SAndroid Build Coastguard Worker          }
4481*61046927SAndroid Build Coastguard Worker 
4482*61046927SAndroid Build Coastguard Worker          /* Limit the number of literals to apply to not increase the code
4483*61046927SAndroid Build Coastguard Worker           * size too much, but always apply literals for v_mad->v_madak
4484*61046927SAndroid Build Coastguard Worker           * because both instructions are 64-bit and this doesn't increase
4485*61046927SAndroid Build Coastguard Worker           * code size.
4486*61046927SAndroid Build Coastguard Worker           * TODO: try to apply the literals earlier to lower the number of
4487*61046927SAndroid Build Coastguard Worker           * uses below threshold
4488*61046927SAndroid Build Coastguard Worker           */
4489*61046927SAndroid Build Coastguard Worker          if (literal_mask && (literal_uses < threshold || (literal_mask & 0b100))) {
4490*61046927SAndroid Build Coastguard Worker             u_foreach_bit (i, literal_mask)
4491*61046927SAndroid Build Coastguard Worker                ctx.uses[instr->operands[i].tempId()]--;
4492*61046927SAndroid Build Coastguard Worker             mad_info->literal_mask = literal_mask;
4493*61046927SAndroid Build Coastguard Worker             return;
4494*61046927SAndroid Build Coastguard Worker          }
4495*61046927SAndroid Build Coastguard Worker       }
4496*61046927SAndroid Build Coastguard Worker    }
4497*61046927SAndroid Build Coastguard Worker 
4498*61046927SAndroid Build Coastguard Worker    /* Mark SCC needed, so the uniform boolean transformation won't swap the definitions
4499*61046927SAndroid Build Coastguard Worker     * when it isn't beneficial */
4500*61046927SAndroid Build Coastguard Worker    if (instr->isBranch() && instr->operands.size() && instr->operands[0].isTemp() &&
4501*61046927SAndroid Build Coastguard Worker        instr->operands[0].isFixed() && instr->operands[0].physReg() == scc) {
4502*61046927SAndroid Build Coastguard Worker       ctx.info[instr->operands[0].tempId()].set_scc_needed();
4503*61046927SAndroid Build Coastguard Worker       return;
4504*61046927SAndroid Build Coastguard Worker    } else if ((instr->opcode == aco_opcode::s_cselect_b64 ||
4505*61046927SAndroid Build Coastguard Worker                instr->opcode == aco_opcode::s_cselect_b32) &&
4506*61046927SAndroid Build Coastguard Worker               instr->operands[2].isTemp()) {
4507*61046927SAndroid Build Coastguard Worker       ctx.info[instr->operands[2].tempId()].set_scc_needed();
4508*61046927SAndroid Build Coastguard Worker    }
4509*61046927SAndroid Build Coastguard Worker 
4510*61046927SAndroid Build Coastguard Worker    /* check for literals */
4511*61046927SAndroid Build Coastguard Worker    if (!instr->isSALU() && !instr->isVALU())
4512*61046927SAndroid Build Coastguard Worker       return;
4513*61046927SAndroid Build Coastguard Worker 
4514*61046927SAndroid Build Coastguard Worker    /* Transform uniform bitwise boolean operations to 32-bit when there are no divergent uses. */
4515*61046927SAndroid Build Coastguard Worker    if (instr->definitions.size() && ctx.uses[instr->definitions[0].tempId()] == 0 &&
4516*61046927SAndroid Build Coastguard Worker        ctx.info[instr->definitions[0].tempId()].is_uniform_bitwise()) {
4517*61046927SAndroid Build Coastguard Worker       bool transform_done = to_uniform_bool_instr(ctx, instr);
4518*61046927SAndroid Build Coastguard Worker 
4519*61046927SAndroid Build Coastguard Worker       if (transform_done && !ctx.info[instr->definitions[1].tempId()].is_scc_needed()) {
4520*61046927SAndroid Build Coastguard Worker          /* Swap the two definition IDs in order to avoid overusing the SCC.
4521*61046927SAndroid Build Coastguard Worker           * This reduces extra moves generated by RA. */
4522*61046927SAndroid Build Coastguard Worker          uint32_t def0_id = instr->definitions[0].getTemp().id();
4523*61046927SAndroid Build Coastguard Worker          uint32_t def1_id = instr->definitions[1].getTemp().id();
4524*61046927SAndroid Build Coastguard Worker          instr->definitions[0].setTemp(Temp(def1_id, s1));
4525*61046927SAndroid Build Coastguard Worker          instr->definitions[1].setTemp(Temp(def0_id, s1));
4526*61046927SAndroid Build Coastguard Worker       }
4527*61046927SAndroid Build Coastguard Worker 
4528*61046927SAndroid Build Coastguard Worker       return;
4529*61046927SAndroid Build Coastguard Worker    }
4530*61046927SAndroid Build Coastguard Worker 
4531*61046927SAndroid Build Coastguard Worker    /* This optimization is done late in order to be able to apply otherwise
4532*61046927SAndroid Build Coastguard Worker     * unsafe optimizations such as the inverse comparison optimization.
4533*61046927SAndroid Build Coastguard Worker     */
4534*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_and_b64) {
4535*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].isTemp() && fixed_to_exec(instr->operands[1]) &&
4536*61046927SAndroid Build Coastguard Worker           ctx.uses[instr->operands[0].tempId()] == 1 &&
4537*61046927SAndroid Build Coastguard Worker           ctx.uses[instr->definitions[1].tempId()] == 0 &&
4538*61046927SAndroid Build Coastguard Worker           can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), instr->pass_flags)) {
4539*61046927SAndroid Build Coastguard Worker          ctx.uses[instr->operands[0].tempId()]--;
4540*61046927SAndroid Build Coastguard Worker          ctx.info[instr->operands[0].tempId()].instr->definitions[0].setTemp(
4541*61046927SAndroid Build Coastguard Worker             instr->definitions[0].getTemp());
4542*61046927SAndroid Build Coastguard Worker          instr.reset();
4543*61046927SAndroid Build Coastguard Worker          return;
4544*61046927SAndroid Build Coastguard Worker       }
4545*61046927SAndroid Build Coastguard Worker    }
4546*61046927SAndroid Build Coastguard Worker 
4547*61046927SAndroid Build Coastguard Worker    /* Combine DPP copies into VALU. This should be done after creating MAD/FMA. */
4548*61046927SAndroid Build Coastguard Worker    if (instr->isVALU() && !instr->isDPP()) {
4549*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < instr->operands.size(); i++) {
4550*61046927SAndroid Build Coastguard Worker          if (!instr->operands[i].isTemp())
4551*61046927SAndroid Build Coastguard Worker             continue;
4552*61046927SAndroid Build Coastguard Worker          ssa_info info = ctx.info[instr->operands[i].tempId()];
4553*61046927SAndroid Build Coastguard Worker 
4554*61046927SAndroid Build Coastguard Worker          if (!info.is_dpp() || info.instr->pass_flags != instr->pass_flags)
4555*61046927SAndroid Build Coastguard Worker             continue;
4556*61046927SAndroid Build Coastguard Worker 
4557*61046927SAndroid Build Coastguard Worker          /* We won't eliminate the DPP mov if the operand is used twice */
4558*61046927SAndroid Build Coastguard Worker          bool op_used_twice = false;
4559*61046927SAndroid Build Coastguard Worker          for (unsigned j = 0; j < instr->operands.size(); j++)
4560*61046927SAndroid Build Coastguard Worker             op_used_twice |= i != j && instr->operands[i] == instr->operands[j];
4561*61046927SAndroid Build Coastguard Worker          if (op_used_twice)
4562*61046927SAndroid Build Coastguard Worker             continue;
4563*61046927SAndroid Build Coastguard Worker 
4564*61046927SAndroid Build Coastguard Worker          if (i != 0) {
4565*61046927SAndroid Build Coastguard Worker             if (!can_swap_operands(instr, &instr->opcode, 0, i))
4566*61046927SAndroid Build Coastguard Worker                continue;
4567*61046927SAndroid Build Coastguard Worker             instr->valu().swapOperands(0, i);
4568*61046927SAndroid Build Coastguard Worker          }
4569*61046927SAndroid Build Coastguard Worker 
4570*61046927SAndroid Build Coastguard Worker          if (!can_use_DPP(ctx.program->gfx_level, instr, info.is_dpp8()))
4571*61046927SAndroid Build Coastguard Worker             continue;
4572*61046927SAndroid Build Coastguard Worker 
4573*61046927SAndroid Build Coastguard Worker          bool dpp8 = info.is_dpp8();
4574*61046927SAndroid Build Coastguard Worker          bool input_mods = can_use_input_modifiers(ctx.program->gfx_level, instr->opcode, 0) &&
4575*61046927SAndroid Build Coastguard Worker                            get_operand_size(instr, 0) == 32;
4576*61046927SAndroid Build Coastguard Worker          bool mov_uses_mods = info.instr->valu().neg[0] || info.instr->valu().abs[0];
4577*61046927SAndroid Build Coastguard Worker          if (((dpp8 && ctx.program->gfx_level < GFX11) || !input_mods) && mov_uses_mods)
4578*61046927SAndroid Build Coastguard Worker             continue;
4579*61046927SAndroid Build Coastguard Worker 
4580*61046927SAndroid Build Coastguard Worker          convert_to_DPP(ctx.program->gfx_level, instr, dpp8);
4581*61046927SAndroid Build Coastguard Worker 
4582*61046927SAndroid Build Coastguard Worker          if (dpp8) {
4583*61046927SAndroid Build Coastguard Worker             DPP8_instruction* dpp = &instr->dpp8();
4584*61046927SAndroid Build Coastguard Worker             dpp->lane_sel = info.instr->dpp8().lane_sel;
4585*61046927SAndroid Build Coastguard Worker             dpp->fetch_inactive = info.instr->dpp8().fetch_inactive;
4586*61046927SAndroid Build Coastguard Worker             if (mov_uses_mods)
4587*61046927SAndroid Build Coastguard Worker                instr->format = asVOP3(instr->format);
4588*61046927SAndroid Build Coastguard Worker          } else {
4589*61046927SAndroid Build Coastguard Worker             DPP16_instruction* dpp = &instr->dpp16();
4590*61046927SAndroid Build Coastguard Worker             dpp->dpp_ctrl = info.instr->dpp16().dpp_ctrl;
4591*61046927SAndroid Build Coastguard Worker             dpp->bound_ctrl = info.instr->dpp16().bound_ctrl;
4592*61046927SAndroid Build Coastguard Worker             dpp->fetch_inactive = info.instr->dpp16().fetch_inactive;
4593*61046927SAndroid Build Coastguard Worker          }
4594*61046927SAndroid Build Coastguard Worker 
4595*61046927SAndroid Build Coastguard Worker          instr->valu().neg[0] ^= info.instr->valu().neg[0] && !instr->valu().abs[0];
4596*61046927SAndroid Build Coastguard Worker          instr->valu().abs[0] |= info.instr->valu().abs[0];
4597*61046927SAndroid Build Coastguard Worker 
4598*61046927SAndroid Build Coastguard Worker          if (--ctx.uses[info.instr->definitions[0].tempId()])
4599*61046927SAndroid Build Coastguard Worker             ctx.uses[info.instr->operands[0].tempId()]++;
4600*61046927SAndroid Build Coastguard Worker          instr->operands[0].setTemp(info.instr->operands[0].getTemp());
4601*61046927SAndroid Build Coastguard Worker          break;
4602*61046927SAndroid Build Coastguard Worker       }
4603*61046927SAndroid Build Coastguard Worker    }
4604*61046927SAndroid Build Coastguard Worker 
4605*61046927SAndroid Build Coastguard Worker    /* Use v_fma_mix for f2f32/f2f16 if it has higher throughput.
4606*61046927SAndroid Build Coastguard Worker     * Do this late to not disturb other optimizations.
4607*61046927SAndroid Build Coastguard Worker     */
4608*61046927SAndroid Build Coastguard Worker    if ((instr->opcode == aco_opcode::v_cvt_f32_f16 || instr->opcode == aco_opcode::v_cvt_f16_f32) &&
4609*61046927SAndroid Build Coastguard Worker        ctx.program->gfx_level >= GFX11 && ctx.program->wave_size == 64 && !instr->valu().omod &&
4610*61046927SAndroid Build Coastguard Worker        !instr->isDPP()) {
4611*61046927SAndroid Build Coastguard Worker       bool is_f2f16 = instr->opcode == aco_opcode::v_cvt_f16_f32;
4612*61046927SAndroid Build Coastguard Worker       Instruction* fma = create_instruction(
4613*61046927SAndroid Build Coastguard Worker          is_f2f16 ? aco_opcode::v_fma_mixlo_f16 : aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1);
4614*61046927SAndroid Build Coastguard Worker       fma->definitions[0] = instr->definitions[0];
4615*61046927SAndroid Build Coastguard Worker       fma->operands[0] = instr->operands[0];
4616*61046927SAndroid Build Coastguard Worker       fma->valu().opsel_hi[0] = !is_f2f16;
4617*61046927SAndroid Build Coastguard Worker       fma->valu().opsel_lo[0] = instr->valu().opsel[0];
4618*61046927SAndroid Build Coastguard Worker       fma->valu().clamp = instr->valu().clamp;
4619*61046927SAndroid Build Coastguard Worker       fma->valu().abs[0] = instr->valu().abs[0];
4620*61046927SAndroid Build Coastguard Worker       fma->valu().neg[0] = instr->valu().neg[0];
4621*61046927SAndroid Build Coastguard Worker       fma->operands[1] = Operand::c32(fui(1.0f));
4622*61046927SAndroid Build Coastguard Worker       fma->operands[2] = Operand::zero();
4623*61046927SAndroid Build Coastguard Worker       fma->valu().neg[2] = true;
4624*61046927SAndroid Build Coastguard Worker       instr.reset(fma);
4625*61046927SAndroid Build Coastguard Worker       ctx.info[instr->definitions[0].tempId()].label = 0;
4626*61046927SAndroid Build Coastguard Worker    }
4627*61046927SAndroid Build Coastguard Worker 
4628*61046927SAndroid Build Coastguard Worker    if (instr->isSDWA() || (instr->isVOP3() && ctx.program->gfx_level < GFX10) ||
4629*61046927SAndroid Build Coastguard Worker        (instr->isVOP3P() && ctx.program->gfx_level < GFX10))
4630*61046927SAndroid Build Coastguard Worker       return; /* some encodings can't ever take literals */
4631*61046927SAndroid Build Coastguard Worker 
4632*61046927SAndroid Build Coastguard Worker    /* we do not apply the literals yet as we don't know if it is profitable */
4633*61046927SAndroid Build Coastguard Worker    Operand current_literal(s1);
4634*61046927SAndroid Build Coastguard Worker 
4635*61046927SAndroid Build Coastguard Worker    unsigned literal_id = 0;
4636*61046927SAndroid Build Coastguard Worker    unsigned literal_uses = UINT32_MAX;
4637*61046927SAndroid Build Coastguard Worker    Operand literal(s1);
4638*61046927SAndroid Build Coastguard Worker    unsigned num_operands = 1;
4639*61046927SAndroid Build Coastguard Worker    if (instr->isSALU() || (ctx.program->gfx_level >= GFX10 &&
4640*61046927SAndroid Build Coastguard Worker                            (can_use_VOP3(ctx, instr) || instr->isVOP3P()) && !instr->isDPP()))
4641*61046927SAndroid Build Coastguard Worker       num_operands = instr->operands.size();
4642*61046927SAndroid Build Coastguard Worker    /* catch VOP2 with a 3rd SGPR operand (e.g. v_cndmask_b32, v_addc_co_u32) */
4643*61046927SAndroid Build Coastguard Worker    else if (instr->isVALU() && instr->operands.size() >= 3)
4644*61046927SAndroid Build Coastguard Worker       return;
4645*61046927SAndroid Build Coastguard Worker 
4646*61046927SAndroid Build Coastguard Worker    unsigned sgpr_ids[2] = {0, 0};
4647*61046927SAndroid Build Coastguard Worker    bool is_literal_sgpr = false;
4648*61046927SAndroid Build Coastguard Worker    uint32_t mask = 0;
4649*61046927SAndroid Build Coastguard Worker 
4650*61046927SAndroid Build Coastguard Worker    /* choose a literal to apply */
4651*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < num_operands; i++) {
4652*61046927SAndroid Build Coastguard Worker       Operand op = instr->operands[i];
4653*61046927SAndroid Build Coastguard Worker       unsigned bits = get_operand_size(instr, i);
4654*61046927SAndroid Build Coastguard Worker 
4655*61046927SAndroid Build Coastguard Worker       if (instr->isVALU() && op.isTemp() && op.getTemp().type() == RegType::sgpr &&
4656*61046927SAndroid Build Coastguard Worker           op.tempId() != sgpr_ids[0])
4657*61046927SAndroid Build Coastguard Worker          sgpr_ids[!!sgpr_ids[0]] = op.tempId();
4658*61046927SAndroid Build Coastguard Worker 
4659*61046927SAndroid Build Coastguard Worker       if (op.isLiteral()) {
4660*61046927SAndroid Build Coastguard Worker          current_literal = op;
4661*61046927SAndroid Build Coastguard Worker          continue;
4662*61046927SAndroid Build Coastguard Worker       } else if (!op.isTemp() || !ctx.info[op.tempId()].is_literal(bits)) {
4663*61046927SAndroid Build Coastguard Worker          continue;
4664*61046927SAndroid Build Coastguard Worker       }
4665*61046927SAndroid Build Coastguard Worker 
4666*61046927SAndroid Build Coastguard Worker       if (!alu_can_accept_constant(instr, i))
4667*61046927SAndroid Build Coastguard Worker          continue;
4668*61046927SAndroid Build Coastguard Worker 
4669*61046927SAndroid Build Coastguard Worker       if (ctx.uses[op.tempId()] < literal_uses) {
4670*61046927SAndroid Build Coastguard Worker          is_literal_sgpr = op.getTemp().type() == RegType::sgpr;
4671*61046927SAndroid Build Coastguard Worker          mask = 0;
4672*61046927SAndroid Build Coastguard Worker          literal = Operand::c32(ctx.info[op.tempId()].val);
4673*61046927SAndroid Build Coastguard Worker          literal_uses = ctx.uses[op.tempId()];
4674*61046927SAndroid Build Coastguard Worker          literal_id = op.tempId();
4675*61046927SAndroid Build Coastguard Worker       }
4676*61046927SAndroid Build Coastguard Worker 
4677*61046927SAndroid Build Coastguard Worker       mask |= (op.tempId() == literal_id) << i;
4678*61046927SAndroid Build Coastguard Worker    }
4679*61046927SAndroid Build Coastguard Worker 
4680*61046927SAndroid Build Coastguard Worker    /* don't go over the constant bus limit */
4681*61046927SAndroid Build Coastguard Worker    bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64_e64 ||
4682*61046927SAndroid Build Coastguard Worker                      instr->opcode == aco_opcode::v_lshlrev_b64 ||
4683*61046927SAndroid Build Coastguard Worker                      instr->opcode == aco_opcode::v_lshrrev_b64 ||
4684*61046927SAndroid Build Coastguard Worker                      instr->opcode == aco_opcode::v_ashrrev_i64;
4685*61046927SAndroid Build Coastguard Worker    unsigned const_bus_limit = instr->isVALU() ? 1 : UINT32_MAX;
4686*61046927SAndroid Build Coastguard Worker    if (ctx.program->gfx_level >= GFX10 && !is_shift64)
4687*61046927SAndroid Build Coastguard Worker       const_bus_limit = 2;
4688*61046927SAndroid Build Coastguard Worker 
4689*61046927SAndroid Build Coastguard Worker    unsigned num_sgprs = !!sgpr_ids[0] + !!sgpr_ids[1];
4690*61046927SAndroid Build Coastguard Worker    if (num_sgprs == const_bus_limit && !is_literal_sgpr)
4691*61046927SAndroid Build Coastguard Worker       return;
4692*61046927SAndroid Build Coastguard Worker 
4693*61046927SAndroid Build Coastguard Worker    if (literal_id && literal_uses < threshold &&
4694*61046927SAndroid Build Coastguard Worker        (current_literal.isUndefined() ||
4695*61046927SAndroid Build Coastguard Worker         (current_literal.size() == literal.size() &&
4696*61046927SAndroid Build Coastguard Worker          current_literal.constantValue() == literal.constantValue()))) {
4697*61046927SAndroid Build Coastguard Worker       /* mark the literal to be applied */
4698*61046927SAndroid Build Coastguard Worker       while (mask) {
4699*61046927SAndroid Build Coastguard Worker          unsigned i = u_bit_scan(&mask);
4700*61046927SAndroid Build Coastguard Worker          if (instr->operands[i].isTemp() && instr->operands[i].tempId() == literal_id)
4701*61046927SAndroid Build Coastguard Worker             ctx.uses[instr->operands[i].tempId()]--;
4702*61046927SAndroid Build Coastguard Worker       }
4703*61046927SAndroid Build Coastguard Worker    }
4704*61046927SAndroid Build Coastguard Worker }
4705*61046927SAndroid Build Coastguard Worker 
4706*61046927SAndroid Build Coastguard Worker static aco_opcode
sopk_opcode_for_sopc(aco_opcode opcode)4707*61046927SAndroid Build Coastguard Worker sopk_opcode_for_sopc(aco_opcode opcode)
4708*61046927SAndroid Build Coastguard Worker {
4709*61046927SAndroid Build Coastguard Worker #define CTOK(op)                                                                                   \
4710*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_cmp_##op##_i32: return aco_opcode::s_cmpk_##op##_i32;                        \
4711*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_cmp_##op##_u32: return aco_opcode::s_cmpk_##op##_u32;
4712*61046927SAndroid Build Coastguard Worker    switch (opcode) {
4713*61046927SAndroid Build Coastguard Worker       CTOK(eq)
4714*61046927SAndroid Build Coastguard Worker       CTOK(lg)
4715*61046927SAndroid Build Coastguard Worker       CTOK(gt)
4716*61046927SAndroid Build Coastguard Worker       CTOK(ge)
4717*61046927SAndroid Build Coastguard Worker       CTOK(lt)
4718*61046927SAndroid Build Coastguard Worker       CTOK(le)
4719*61046927SAndroid Build Coastguard Worker    default: return aco_opcode::num_opcodes;
4720*61046927SAndroid Build Coastguard Worker    }
4721*61046927SAndroid Build Coastguard Worker #undef CTOK
4722*61046927SAndroid Build Coastguard Worker }
4723*61046927SAndroid Build Coastguard Worker 
4724*61046927SAndroid Build Coastguard Worker static bool
sopc_is_signed(aco_opcode opcode)4725*61046927SAndroid Build Coastguard Worker sopc_is_signed(aco_opcode opcode)
4726*61046927SAndroid Build Coastguard Worker {
4727*61046927SAndroid Build Coastguard Worker #define SOPC(op)                                                                                   \
4728*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_cmp_##op##_i32: return true;                                                 \
4729*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_cmp_##op##_u32: return false;
4730*61046927SAndroid Build Coastguard Worker    switch (opcode) {
4731*61046927SAndroid Build Coastguard Worker       SOPC(eq)
4732*61046927SAndroid Build Coastguard Worker       SOPC(lg)
4733*61046927SAndroid Build Coastguard Worker       SOPC(gt)
4734*61046927SAndroid Build Coastguard Worker       SOPC(ge)
4735*61046927SAndroid Build Coastguard Worker       SOPC(lt)
4736*61046927SAndroid Build Coastguard Worker       SOPC(le)
4737*61046927SAndroid Build Coastguard Worker    default: unreachable("Not a valid SOPC instruction.");
4738*61046927SAndroid Build Coastguard Worker    }
4739*61046927SAndroid Build Coastguard Worker #undef SOPC
4740*61046927SAndroid Build Coastguard Worker }
4741*61046927SAndroid Build Coastguard Worker 
4742*61046927SAndroid Build Coastguard Worker static aco_opcode
sopc_32_swapped(aco_opcode opcode)4743*61046927SAndroid Build Coastguard Worker sopc_32_swapped(aco_opcode opcode)
4744*61046927SAndroid Build Coastguard Worker {
4745*61046927SAndroid Build Coastguard Worker #define SOPC(op1, op2)                                                                             \
4746*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_cmp_##op1##_i32: return aco_opcode::s_cmp_##op2##_i32;                       \
4747*61046927SAndroid Build Coastguard Worker    case aco_opcode::s_cmp_##op1##_u32: return aco_opcode::s_cmp_##op2##_u32;
4748*61046927SAndroid Build Coastguard Worker    switch (opcode) {
4749*61046927SAndroid Build Coastguard Worker       SOPC(eq, eq)
4750*61046927SAndroid Build Coastguard Worker       SOPC(lg, lg)
4751*61046927SAndroid Build Coastguard Worker       SOPC(gt, lt)
4752*61046927SAndroid Build Coastguard Worker       SOPC(ge, le)
4753*61046927SAndroid Build Coastguard Worker       SOPC(lt, gt)
4754*61046927SAndroid Build Coastguard Worker       SOPC(le, ge)
4755*61046927SAndroid Build Coastguard Worker    default: return aco_opcode::num_opcodes;
4756*61046927SAndroid Build Coastguard Worker    }
4757*61046927SAndroid Build Coastguard Worker #undef SOPC
4758*61046927SAndroid Build Coastguard Worker }
4759*61046927SAndroid Build Coastguard Worker 
4760*61046927SAndroid Build Coastguard Worker static void
try_convert_sopc_to_sopk(aco_ptr<Instruction> & instr)4761*61046927SAndroid Build Coastguard Worker try_convert_sopc_to_sopk(aco_ptr<Instruction>& instr)
4762*61046927SAndroid Build Coastguard Worker {
4763*61046927SAndroid Build Coastguard Worker    if (sopk_opcode_for_sopc(instr->opcode) == aco_opcode::num_opcodes)
4764*61046927SAndroid Build Coastguard Worker       return;
4765*61046927SAndroid Build Coastguard Worker 
4766*61046927SAndroid Build Coastguard Worker    if (instr->operands[0].isLiteral()) {
4767*61046927SAndroid Build Coastguard Worker       std::swap(instr->operands[0], instr->operands[1]);
4768*61046927SAndroid Build Coastguard Worker       instr->opcode = sopc_32_swapped(instr->opcode);
4769*61046927SAndroid Build Coastguard Worker    }
4770*61046927SAndroid Build Coastguard Worker 
4771*61046927SAndroid Build Coastguard Worker    if (!instr->operands[1].isLiteral())
4772*61046927SAndroid Build Coastguard Worker       return;
4773*61046927SAndroid Build Coastguard Worker 
4774*61046927SAndroid Build Coastguard Worker    if (instr->operands[0].isFixed() && instr->operands[0].physReg() >= 128)
4775*61046927SAndroid Build Coastguard Worker       return;
4776*61046927SAndroid Build Coastguard Worker 
4777*61046927SAndroid Build Coastguard Worker    uint32_t value = instr->operands[1].constantValue();
4778*61046927SAndroid Build Coastguard Worker 
4779*61046927SAndroid Build Coastguard Worker    const uint32_t i16_mask = 0xffff8000u;
4780*61046927SAndroid Build Coastguard Worker 
4781*61046927SAndroid Build Coastguard Worker    bool value_is_i16 = (value & i16_mask) == 0 || (value & i16_mask) == i16_mask;
4782*61046927SAndroid Build Coastguard Worker    bool value_is_u16 = !(value & 0xffff0000u);
4783*61046927SAndroid Build Coastguard Worker 
4784*61046927SAndroid Build Coastguard Worker    if (!value_is_i16 && !value_is_u16)
4785*61046927SAndroid Build Coastguard Worker       return;
4786*61046927SAndroid Build Coastguard Worker 
4787*61046927SAndroid Build Coastguard Worker    if (!value_is_i16 && sopc_is_signed(instr->opcode)) {
4788*61046927SAndroid Build Coastguard Worker       if (instr->opcode == aco_opcode::s_cmp_lg_i32)
4789*61046927SAndroid Build Coastguard Worker          instr->opcode = aco_opcode::s_cmp_lg_u32;
4790*61046927SAndroid Build Coastguard Worker       else if (instr->opcode == aco_opcode::s_cmp_eq_i32)
4791*61046927SAndroid Build Coastguard Worker          instr->opcode = aco_opcode::s_cmp_eq_u32;
4792*61046927SAndroid Build Coastguard Worker       else
4793*61046927SAndroid Build Coastguard Worker          return;
4794*61046927SAndroid Build Coastguard Worker    } else if (!value_is_u16 && !sopc_is_signed(instr->opcode)) {
4795*61046927SAndroid Build Coastguard Worker       if (instr->opcode == aco_opcode::s_cmp_lg_u32)
4796*61046927SAndroid Build Coastguard Worker          instr->opcode = aco_opcode::s_cmp_lg_i32;
4797*61046927SAndroid Build Coastguard Worker       else if (instr->opcode == aco_opcode::s_cmp_eq_u32)
4798*61046927SAndroid Build Coastguard Worker          instr->opcode = aco_opcode::s_cmp_eq_i32;
4799*61046927SAndroid Build Coastguard Worker       else
4800*61046927SAndroid Build Coastguard Worker          return;
4801*61046927SAndroid Build Coastguard Worker    }
4802*61046927SAndroid Build Coastguard Worker 
4803*61046927SAndroid Build Coastguard Worker    instr->format = Format::SOPK;
4804*61046927SAndroid Build Coastguard Worker    SALU_instruction* instr_sopk = &instr->salu();
4805*61046927SAndroid Build Coastguard Worker 
4806*61046927SAndroid Build Coastguard Worker    instr_sopk->imm = instr_sopk->operands[1].constantValue() & 0xffff;
4807*61046927SAndroid Build Coastguard Worker    instr_sopk->opcode = sopk_opcode_for_sopc(instr_sopk->opcode);
4808*61046927SAndroid Build Coastguard Worker    instr_sopk->operands.pop_back();
4809*61046927SAndroid Build Coastguard Worker }
4810*61046927SAndroid Build Coastguard Worker 
4811*61046927SAndroid Build Coastguard Worker static void
opt_fma_mix_acc(opt_ctx & ctx,aco_ptr<Instruction> & instr)4812*61046927SAndroid Build Coastguard Worker opt_fma_mix_acc(opt_ctx& ctx, aco_ptr<Instruction>& instr)
4813*61046927SAndroid Build Coastguard Worker {
4814*61046927SAndroid Build Coastguard Worker    /* fma_mix is only dual issued on gfx11 if dst and acc type match */
4815*61046927SAndroid Build Coastguard Worker    bool f2f16 = instr->opcode == aco_opcode::v_fma_mixlo_f16;
4816*61046927SAndroid Build Coastguard Worker 
4817*61046927SAndroid Build Coastguard Worker    if (instr->valu().opsel_hi[2] == f2f16 || instr->isDPP())
4818*61046927SAndroid Build Coastguard Worker       return;
4819*61046927SAndroid Build Coastguard Worker 
4820*61046927SAndroid Build Coastguard Worker    bool is_add = false;
4821*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 2; i++) {
4822*61046927SAndroid Build Coastguard Worker       uint32_t one = instr->valu().opsel_hi[i] ? 0x3800 : 0x3f800000;
4823*61046927SAndroid Build Coastguard Worker       is_add = instr->operands[i].constantEquals(one) && !instr->valu().neg[i] &&
4824*61046927SAndroid Build Coastguard Worker                !instr->valu().opsel_lo[i];
4825*61046927SAndroid Build Coastguard Worker       if (is_add) {
4826*61046927SAndroid Build Coastguard Worker          instr->valu().swapOperands(0, i);
4827*61046927SAndroid Build Coastguard Worker          break;
4828*61046927SAndroid Build Coastguard Worker       }
4829*61046927SAndroid Build Coastguard Worker    }
4830*61046927SAndroid Build Coastguard Worker 
4831*61046927SAndroid Build Coastguard Worker    if (is_add && instr->valu().opsel_hi[1] == f2f16) {
4832*61046927SAndroid Build Coastguard Worker       instr->valu().swapOperands(1, 2);
4833*61046927SAndroid Build Coastguard Worker       return;
4834*61046927SAndroid Build Coastguard Worker    }
4835*61046927SAndroid Build Coastguard Worker 
4836*61046927SAndroid Build Coastguard Worker    unsigned literal_count = instr->operands[0].isLiteral() + instr->operands[1].isLiteral() +
4837*61046927SAndroid Build Coastguard Worker                             instr->operands[2].isLiteral();
4838*61046927SAndroid Build Coastguard Worker 
4839*61046927SAndroid Build Coastguard Worker    if (!f2f16 || literal_count > 1)
4840*61046927SAndroid Build Coastguard Worker       return;
4841*61046927SAndroid Build Coastguard Worker 
4842*61046927SAndroid Build Coastguard Worker    /* try to convert constant operand to fp16 */
4843*61046927SAndroid Build Coastguard Worker    for (unsigned i = 2 - is_add; i < 3; i++) {
4844*61046927SAndroid Build Coastguard Worker       if (!instr->operands[i].isConstant())
4845*61046927SAndroid Build Coastguard Worker          continue;
4846*61046927SAndroid Build Coastguard Worker 
4847*61046927SAndroid Build Coastguard Worker       float value = uif(instr->operands[i].constantValue());
4848*61046927SAndroid Build Coastguard Worker       uint16_t fp16_val = _mesa_float_to_half(value);
4849*61046927SAndroid Build Coastguard Worker       bool is_denorm = (fp16_val & 0x7fff) != 0 && (fp16_val & 0x7fff) <= 0x3ff;
4850*61046927SAndroid Build Coastguard Worker 
4851*61046927SAndroid Build Coastguard Worker       if (_mesa_half_to_float(fp16_val) != value ||
4852*61046927SAndroid Build Coastguard Worker           (is_denorm && !(ctx.fp_mode.denorm16_64 & fp_denorm_keep_in)))
4853*61046927SAndroid Build Coastguard Worker          continue;
4854*61046927SAndroid Build Coastguard Worker 
4855*61046927SAndroid Build Coastguard Worker       instr->valu().swapOperands(i, 2);
4856*61046927SAndroid Build Coastguard Worker 
4857*61046927SAndroid Build Coastguard Worker       Operand op16 = Operand::c16(fp16_val);
4858*61046927SAndroid Build Coastguard Worker       assert(!op16.isLiteral() || instr->operands[2].isLiteral());
4859*61046927SAndroid Build Coastguard Worker 
4860*61046927SAndroid Build Coastguard Worker       instr->operands[2] = op16;
4861*61046927SAndroid Build Coastguard Worker       instr->valu().opsel_lo[2] = false;
4862*61046927SAndroid Build Coastguard Worker       instr->valu().opsel_hi[2] = true;
4863*61046927SAndroid Build Coastguard Worker       return;
4864*61046927SAndroid Build Coastguard Worker    }
4865*61046927SAndroid Build Coastguard Worker }
4866*61046927SAndroid Build Coastguard Worker 
4867*61046927SAndroid Build Coastguard Worker void
apply_literals(opt_ctx & ctx,aco_ptr<Instruction> & instr)4868*61046927SAndroid Build Coastguard Worker apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
4869*61046927SAndroid Build Coastguard Worker {
4870*61046927SAndroid Build Coastguard Worker    /* Cleanup Dead Instructions */
4871*61046927SAndroid Build Coastguard Worker    if (!instr)
4872*61046927SAndroid Build Coastguard Worker       return;
4873*61046927SAndroid Build Coastguard Worker 
4874*61046927SAndroid Build Coastguard Worker    /* apply literals on MAD */
4875*61046927SAndroid Build Coastguard Worker    if (!instr->definitions.empty() && ctx.info[instr->definitions[0].tempId()].is_mad()) {
4876*61046927SAndroid Build Coastguard Worker       mad_info* info = &ctx.mad_infos[ctx.info[instr->definitions[0].tempId()].val];
4877*61046927SAndroid Build Coastguard Worker       const bool madak = (info->literal_mask & 0b100);
4878*61046927SAndroid Build Coastguard Worker       bool has_dead_literal = false;
4879*61046927SAndroid Build Coastguard Worker       u_foreach_bit (i, info->literal_mask | info->fp16_mask)
4880*61046927SAndroid Build Coastguard Worker          has_dead_literal |= ctx.uses[instr->operands[i].tempId()] == 0;
4881*61046927SAndroid Build Coastguard Worker 
4882*61046927SAndroid Build Coastguard Worker       if (has_dead_literal && info->fp16_mask) {
4883*61046927SAndroid Build Coastguard Worker          instr->format = Format::VOP3P;
4884*61046927SAndroid Build Coastguard Worker          instr->opcode = aco_opcode::v_fma_mix_f32;
4885*61046927SAndroid Build Coastguard Worker 
4886*61046927SAndroid Build Coastguard Worker          uint32_t literal = 0;
4887*61046927SAndroid Build Coastguard Worker          bool second = false;
4888*61046927SAndroid Build Coastguard Worker          u_foreach_bit (i, info->fp16_mask) {
4889*61046927SAndroid Build Coastguard Worker             float value = uif(ctx.info[instr->operands[i].tempId()].val);
4890*61046927SAndroid Build Coastguard Worker             literal |= _mesa_float_to_half(value) << (second * 16);
4891*61046927SAndroid Build Coastguard Worker             instr->valu().opsel_lo[i] = second;
4892*61046927SAndroid Build Coastguard Worker             instr->valu().opsel_hi[i] = true;
4893*61046927SAndroid Build Coastguard Worker             second = true;
4894*61046927SAndroid Build Coastguard Worker          }
4895*61046927SAndroid Build Coastguard Worker 
4896*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < 3; i++) {
4897*61046927SAndroid Build Coastguard Worker             if (info->fp16_mask & (1 << i))
4898*61046927SAndroid Build Coastguard Worker                instr->operands[i] = Operand::literal32(literal);
4899*61046927SAndroid Build Coastguard Worker          }
4900*61046927SAndroid Build Coastguard Worker 
4901*61046927SAndroid Build Coastguard Worker          ctx.instructions.emplace_back(std::move(instr));
4902*61046927SAndroid Build Coastguard Worker          return;
4903*61046927SAndroid Build Coastguard Worker       }
4904*61046927SAndroid Build Coastguard Worker 
4905*61046927SAndroid Build Coastguard Worker       if (has_dead_literal || madak) {
4906*61046927SAndroid Build Coastguard Worker          aco_opcode new_op = madak ? aco_opcode::v_madak_f32 : aco_opcode::v_madmk_f32;
4907*61046927SAndroid Build Coastguard Worker          if (instr->opcode == aco_opcode::v_fma_f32)
4908*61046927SAndroid Build Coastguard Worker             new_op = madak ? aco_opcode::v_fmaak_f32 : aco_opcode::v_fmamk_f32;
4909*61046927SAndroid Build Coastguard Worker          else if (instr->opcode == aco_opcode::v_mad_f16 ||
4910*61046927SAndroid Build Coastguard Worker                   instr->opcode == aco_opcode::v_mad_legacy_f16)
4911*61046927SAndroid Build Coastguard Worker             new_op = madak ? aco_opcode::v_madak_f16 : aco_opcode::v_madmk_f16;
4912*61046927SAndroid Build Coastguard Worker          else if (instr->opcode == aco_opcode::v_fma_f16)
4913*61046927SAndroid Build Coastguard Worker             new_op = madak ? aco_opcode::v_fmaak_f16 : aco_opcode::v_fmamk_f16;
4914*61046927SAndroid Build Coastguard Worker 
4915*61046927SAndroid Build Coastguard Worker          uint32_t literal = ctx.info[instr->operands[ffs(info->literal_mask) - 1].tempId()].val;
4916*61046927SAndroid Build Coastguard Worker          instr->format = Format::VOP2;
4917*61046927SAndroid Build Coastguard Worker          instr->opcode = new_op;
4918*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < 3; i++) {
4919*61046927SAndroid Build Coastguard Worker             if (info->literal_mask & (1 << i))
4920*61046927SAndroid Build Coastguard Worker                instr->operands[i] = Operand::literal32(literal);
4921*61046927SAndroid Build Coastguard Worker          }
4922*61046927SAndroid Build Coastguard Worker          if (madak) { /* add literal -> madak */
4923*61046927SAndroid Build Coastguard Worker             if (!instr->operands[1].isOfType(RegType::vgpr))
4924*61046927SAndroid Build Coastguard Worker                instr->valu().swapOperands(0, 1);
4925*61046927SAndroid Build Coastguard Worker          } else { /* mul literal -> madmk */
4926*61046927SAndroid Build Coastguard Worker             if (!(info->literal_mask & 0b10))
4927*61046927SAndroid Build Coastguard Worker                instr->valu().swapOperands(0, 1);
4928*61046927SAndroid Build Coastguard Worker             instr->valu().swapOperands(1, 2);
4929*61046927SAndroid Build Coastguard Worker          }
4930*61046927SAndroid Build Coastguard Worker          ctx.instructions.emplace_back(std::move(instr));
4931*61046927SAndroid Build Coastguard Worker          return;
4932*61046927SAndroid Build Coastguard Worker       }
4933*61046927SAndroid Build Coastguard Worker    }
4934*61046927SAndroid Build Coastguard Worker 
4935*61046927SAndroid Build Coastguard Worker    /* apply literals on other SALU/VALU */
4936*61046927SAndroid Build Coastguard Worker    if (instr->isSALU() || instr->isVALU()) {
4937*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < instr->operands.size(); i++) {
4938*61046927SAndroid Build Coastguard Worker          Operand op = instr->operands[i];
4939*61046927SAndroid Build Coastguard Worker          unsigned bits = get_operand_size(instr, i);
4940*61046927SAndroid Build Coastguard Worker          if (op.isTemp() && ctx.info[op.tempId()].is_literal(bits) && ctx.uses[op.tempId()] == 0) {
4941*61046927SAndroid Build Coastguard Worker             Operand literal = Operand::literal32(ctx.info[op.tempId()].val);
4942*61046927SAndroid Build Coastguard Worker             instr->format = withoutDPP(instr->format);
4943*61046927SAndroid Build Coastguard Worker             if (instr->isVALU() && i > 0 && instr->format != Format::VOP3P)
4944*61046927SAndroid Build Coastguard Worker                instr->format = asVOP3(instr->format);
4945*61046927SAndroid Build Coastguard Worker             instr->operands[i] = literal;
4946*61046927SAndroid Build Coastguard Worker          }
4947*61046927SAndroid Build Coastguard Worker       }
4948*61046927SAndroid Build Coastguard Worker    }
4949*61046927SAndroid Build Coastguard Worker 
4950*61046927SAndroid Build Coastguard Worker    if (instr->isSOPC() && ctx.program->gfx_level < GFX12)
4951*61046927SAndroid Build Coastguard Worker       try_convert_sopc_to_sopk(instr);
4952*61046927SAndroid Build Coastguard Worker 
4953*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::v_fma_mixlo_f16 || instr->opcode == aco_opcode::v_fma_mix_f32)
4954*61046927SAndroid Build Coastguard Worker       opt_fma_mix_acc(ctx, instr);
4955*61046927SAndroid Build Coastguard Worker 
4956*61046927SAndroid Build Coastguard Worker    ctx.instructions.emplace_back(std::move(instr));
4957*61046927SAndroid Build Coastguard Worker }
4958*61046927SAndroid Build Coastguard Worker 
4959*61046927SAndroid Build Coastguard Worker } /* end namespace */
4960*61046927SAndroid Build Coastguard Worker 
4961*61046927SAndroid Build Coastguard Worker void
optimize(Program * program)4962*61046927SAndroid Build Coastguard Worker optimize(Program* program)
4963*61046927SAndroid Build Coastguard Worker {
4964*61046927SAndroid Build Coastguard Worker    opt_ctx ctx;
4965*61046927SAndroid Build Coastguard Worker    ctx.program = program;
4966*61046927SAndroid Build Coastguard Worker    ctx.info = std::vector<ssa_info>(program->peekAllocationId());
4967*61046927SAndroid Build Coastguard Worker 
4968*61046927SAndroid Build Coastguard Worker    /* 1. Bottom-Up DAG pass (forward) to label all ssa-defs */
4969*61046927SAndroid Build Coastguard Worker    for (Block& block : program->blocks) {
4970*61046927SAndroid Build Coastguard Worker       ctx.fp_mode = block.fp_mode;
4971*61046927SAndroid Build Coastguard Worker       for (aco_ptr<Instruction>& instr : block.instructions)
4972*61046927SAndroid Build Coastguard Worker          label_instruction(ctx, instr);
4973*61046927SAndroid Build Coastguard Worker    }
4974*61046927SAndroid Build Coastguard Worker 
4975*61046927SAndroid Build Coastguard Worker    ctx.uses = dead_code_analysis(program);
4976*61046927SAndroid Build Coastguard Worker 
4977*61046927SAndroid Build Coastguard Worker    /* 2. Rematerialize constants in every block. */
4978*61046927SAndroid Build Coastguard Worker    rematerialize_constants(ctx);
4979*61046927SAndroid Build Coastguard Worker 
4980*61046927SAndroid Build Coastguard Worker    /* 3. Combine v_mad, omod, clamp and propagate sgpr on VALU instructions */
4981*61046927SAndroid Build Coastguard Worker    for (Block& block : program->blocks) {
4982*61046927SAndroid Build Coastguard Worker       ctx.fp_mode = block.fp_mode;
4983*61046927SAndroid Build Coastguard Worker       for (aco_ptr<Instruction>& instr : block.instructions)
4984*61046927SAndroid Build Coastguard Worker          combine_instruction(ctx, instr);
4985*61046927SAndroid Build Coastguard Worker    }
4986*61046927SAndroid Build Coastguard Worker 
4987*61046927SAndroid Build Coastguard Worker    /* 4. Top-Down DAG pass (backward) to select instructions (includes DCE) */
4988*61046927SAndroid Build Coastguard Worker    for (auto block_rit = program->blocks.rbegin(); block_rit != program->blocks.rend();
4989*61046927SAndroid Build Coastguard Worker         ++block_rit) {
4990*61046927SAndroid Build Coastguard Worker       Block* block = &(*block_rit);
4991*61046927SAndroid Build Coastguard Worker       ctx.fp_mode = block->fp_mode;
4992*61046927SAndroid Build Coastguard Worker       for (auto instr_rit = block->instructions.rbegin(); instr_rit != block->instructions.rend();
4993*61046927SAndroid Build Coastguard Worker            ++instr_rit)
4994*61046927SAndroid Build Coastguard Worker          select_instruction(ctx, *instr_rit);
4995*61046927SAndroid Build Coastguard Worker    }
4996*61046927SAndroid Build Coastguard Worker 
4997*61046927SAndroid Build Coastguard Worker    /* 5. Add literals to instructions */
4998*61046927SAndroid Build Coastguard Worker    for (Block& block : program->blocks) {
4999*61046927SAndroid Build Coastguard Worker       ctx.instructions.reserve(block.instructions.size());
5000*61046927SAndroid Build Coastguard Worker       ctx.fp_mode = block.fp_mode;
5001*61046927SAndroid Build Coastguard Worker       for (aco_ptr<Instruction>& instr : block.instructions)
5002*61046927SAndroid Build Coastguard Worker          apply_literals(ctx, instr);
5003*61046927SAndroid Build Coastguard Worker       block.instructions = std::move(ctx.instructions);
5004*61046927SAndroid Build Coastguard Worker    }
5005*61046927SAndroid Build Coastguard Worker }
5006*61046927SAndroid Build Coastguard Worker 
5007*61046927SAndroid Build Coastguard Worker } // namespace aco
5008