xref: /aosp_15_r20/external/mesa3d/src/nouveau/compiler/nak_nir.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2022 Collabora, Ltd.
3*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
4*61046927SAndroid Build Coastguard Worker  */
5*61046927SAndroid Build Coastguard Worker 
6*61046927SAndroid Build Coastguard Worker #include "nak_private.h"
7*61046927SAndroid Build Coastguard Worker #include "nir_builder.h"
8*61046927SAndroid Build Coastguard Worker #include "nir_control_flow.h"
9*61046927SAndroid Build Coastguard Worker #include "nir_xfb_info.h"
10*61046927SAndroid Build Coastguard Worker 
11*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
12*61046927SAndroid Build Coastguard Worker 
13*61046927SAndroid Build Coastguard Worker #define OPT(nir, pass, ...) ({                           \
14*61046927SAndroid Build Coastguard Worker    bool this_progress = false;                           \
15*61046927SAndroid Build Coastguard Worker    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);    \
16*61046927SAndroid Build Coastguard Worker    if (this_progress)                                    \
17*61046927SAndroid Build Coastguard Worker       progress = true;                                   \
18*61046927SAndroid Build Coastguard Worker    this_progress;                                        \
19*61046927SAndroid Build Coastguard Worker })
20*61046927SAndroid Build Coastguard Worker 
21*61046927SAndroid Build Coastguard Worker #define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
22*61046927SAndroid Build Coastguard Worker 
23*61046927SAndroid Build Coastguard Worker bool
nak_nir_workgroup_has_one_subgroup(const nir_shader * nir)24*61046927SAndroid Build Coastguard Worker nak_nir_workgroup_has_one_subgroup(const nir_shader *nir)
25*61046927SAndroid Build Coastguard Worker {
26*61046927SAndroid Build Coastguard Worker    switch (nir->info.stage) {
27*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_VERTEX:
28*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_TESS_EVAL:
29*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_GEOMETRY:
30*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_FRAGMENT:
31*61046927SAndroid Build Coastguard Worker       unreachable("Shader stage does not have workgroups");
32*61046927SAndroid Build Coastguard Worker       break;
33*61046927SAndroid Build Coastguard Worker 
34*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_TESS_CTRL:
35*61046927SAndroid Build Coastguard Worker       /* Tessellation only ever has one subgroup per workgroup.  The Vulkan
36*61046927SAndroid Build Coastguard Worker        * limit on the number of tessellation invocations is 32 to allow for
37*61046927SAndroid Build Coastguard Worker        * this.
38*61046927SAndroid Build Coastguard Worker        */
39*61046927SAndroid Build Coastguard Worker       return true;
40*61046927SAndroid Build Coastguard Worker 
41*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_COMPUTE:
42*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_KERNEL: {
43*61046927SAndroid Build Coastguard Worker       if (nir->info.workgroup_size_variable)
44*61046927SAndroid Build Coastguard Worker          return false;
45*61046927SAndroid Build Coastguard Worker 
46*61046927SAndroid Build Coastguard Worker       uint16_t wg_sz = nir->info.workgroup_size[0] *
47*61046927SAndroid Build Coastguard Worker                        nir->info.workgroup_size[1] *
48*61046927SAndroid Build Coastguard Worker                        nir->info.workgroup_size[2];
49*61046927SAndroid Build Coastguard Worker 
50*61046927SAndroid Build Coastguard Worker       return wg_sz <= NAK_SUBGROUP_SIZE;
51*61046927SAndroid Build Coastguard Worker    }
52*61046927SAndroid Build Coastguard Worker 
53*61046927SAndroid Build Coastguard Worker    default:
54*61046927SAndroid Build Coastguard Worker       unreachable("Unknown shader stage");
55*61046927SAndroid Build Coastguard Worker    }
56*61046927SAndroid Build Coastguard Worker }
57*61046927SAndroid Build Coastguard Worker 
58*61046927SAndroid Build Coastguard Worker static uint8_t
vectorize_filter_cb(const nir_instr * instr,const void * _data)59*61046927SAndroid Build Coastguard Worker vectorize_filter_cb(const nir_instr *instr, const void *_data)
60*61046927SAndroid Build Coastguard Worker {
61*61046927SAndroid Build Coastguard Worker    if (instr->type != nir_instr_type_alu)
62*61046927SAndroid Build Coastguard Worker       return 0;
63*61046927SAndroid Build Coastguard Worker 
64*61046927SAndroid Build Coastguard Worker    const nir_alu_instr *alu = nir_instr_as_alu(instr);
65*61046927SAndroid Build Coastguard Worker 
66*61046927SAndroid Build Coastguard Worker    const unsigned bit_size = nir_alu_instr_is_comparison(alu)
67*61046927SAndroid Build Coastguard Worker                              ? alu->src[0].src.ssa->bit_size
68*61046927SAndroid Build Coastguard Worker                              : alu->def.bit_size;
69*61046927SAndroid Build Coastguard Worker 
70*61046927SAndroid Build Coastguard Worker    switch (alu->op) {
71*61046927SAndroid Build Coastguard Worker    case nir_op_fadd:
72*61046927SAndroid Build Coastguard Worker    case nir_op_fsub:
73*61046927SAndroid Build Coastguard Worker    case nir_op_fabs:
74*61046927SAndroid Build Coastguard Worker    case nir_op_fneg:
75*61046927SAndroid Build Coastguard Worker    case nir_op_feq:
76*61046927SAndroid Build Coastguard Worker    case nir_op_fge:
77*61046927SAndroid Build Coastguard Worker    case nir_op_flt:
78*61046927SAndroid Build Coastguard Worker    case nir_op_fneu:
79*61046927SAndroid Build Coastguard Worker    case nir_op_fmul:
80*61046927SAndroid Build Coastguard Worker    case nir_op_ffma:
81*61046927SAndroid Build Coastguard Worker    case nir_op_fsign:
82*61046927SAndroid Build Coastguard Worker    case nir_op_fsat:
83*61046927SAndroid Build Coastguard Worker    case nir_op_fmax:
84*61046927SAndroid Build Coastguard Worker    case nir_op_fmin:
85*61046927SAndroid Build Coastguard Worker       return bit_size == 16 ? 2 : 1;
86*61046927SAndroid Build Coastguard Worker    default:
87*61046927SAndroid Build Coastguard Worker       return 1;
88*61046927SAndroid Build Coastguard Worker    }
89*61046927SAndroid Build Coastguard Worker }
90*61046927SAndroid Build Coastguard Worker 
91*61046927SAndroid Build Coastguard Worker static void
optimize_nir(nir_shader * nir,const struct nak_compiler * nak,bool allow_copies)92*61046927SAndroid Build Coastguard Worker optimize_nir(nir_shader *nir, const struct nak_compiler *nak, bool allow_copies)
93*61046927SAndroid Build Coastguard Worker {
94*61046927SAndroid Build Coastguard Worker    bool progress;
95*61046927SAndroid Build Coastguard Worker 
96*61046927SAndroid Build Coastguard Worker    unsigned lower_flrp =
97*61046927SAndroid Build Coastguard Worker       (nir->options->lower_flrp16 ? 16 : 0) |
98*61046927SAndroid Build Coastguard Worker       (nir->options->lower_flrp32 ? 32 : 0) |
99*61046927SAndroid Build Coastguard Worker       (nir->options->lower_flrp64 ? 64 : 0);
100*61046927SAndroid Build Coastguard Worker 
101*61046927SAndroid Build Coastguard Worker    do {
102*61046927SAndroid Build Coastguard Worker       progress = false;
103*61046927SAndroid Build Coastguard Worker 
104*61046927SAndroid Build Coastguard Worker       /* This pass is causing problems with types used by OpenCL :
105*61046927SAndroid Build Coastguard Worker        *    https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13955
106*61046927SAndroid Build Coastguard Worker        *
107*61046927SAndroid Build Coastguard Worker        * Running with it disabled made no difference in the resulting assembly
108*61046927SAndroid Build Coastguard Worker        * code.
109*61046927SAndroid Build Coastguard Worker        */
110*61046927SAndroid Build Coastguard Worker       if (nir->info.stage != MESA_SHADER_KERNEL)
111*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_split_array_vars, nir_var_function_temp);
112*61046927SAndroid Build Coastguard Worker 
113*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_shrink_vec_array_vars, nir_var_function_temp);
114*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_deref);
115*61046927SAndroid Build Coastguard Worker       if (OPT(nir, nir_opt_memcpy))
116*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_split_var_copies);
117*61046927SAndroid Build Coastguard Worker 
118*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_vars_to_ssa);
119*61046927SAndroid Build Coastguard Worker 
120*61046927SAndroid Build Coastguard Worker       if (allow_copies) {
121*61046927SAndroid Build Coastguard Worker          /* Only run this pass in the first call to brw_nir_optimize.  Later
122*61046927SAndroid Build Coastguard Worker           * calls assume that we've lowered away any copy_deref instructions
123*61046927SAndroid Build Coastguard Worker           * and we don't want to introduce any more.
124*61046927SAndroid Build Coastguard Worker           */
125*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_opt_find_array_copies);
126*61046927SAndroid Build Coastguard Worker       }
127*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_copy_prop_vars);
128*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_dead_write_vars);
129*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_combine_stores, nir_var_all);
130*61046927SAndroid Build Coastguard Worker 
131*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_alu_width, vectorize_filter_cb, NULL);
132*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_vectorize, vectorize_filter_cb, NULL);
133*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_phis_to_scalar, false);
134*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_frexp);
135*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_copy_prop);
136*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_dce);
137*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_cse);
138*61046927SAndroid Build Coastguard Worker 
139*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_peephole_select, 0, false, false);
140*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_intrinsics);
141*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_idiv_const, 32);
142*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_algebraic);
143*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_constant_convert_alu_types);
144*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_constant_folding);
145*61046927SAndroid Build Coastguard Worker 
146*61046927SAndroid Build Coastguard Worker       if (lower_flrp != 0) {
147*61046927SAndroid Build Coastguard Worker          if (OPT(nir, nir_lower_flrp, lower_flrp, false /* always_precise */))
148*61046927SAndroid Build Coastguard Worker             OPT(nir, nir_opt_constant_folding);
149*61046927SAndroid Build Coastguard Worker          /* Nothing should rematerialize any flrps */
150*61046927SAndroid Build Coastguard Worker          lower_flrp = 0;
151*61046927SAndroid Build Coastguard Worker       }
152*61046927SAndroid Build Coastguard Worker 
153*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_dead_cf);
154*61046927SAndroid Build Coastguard Worker       if (OPT(nir, nir_opt_loop)) {
155*61046927SAndroid Build Coastguard Worker          /* If nir_opt_loop makes progress, then we need to clean things up
156*61046927SAndroid Build Coastguard Worker           * if we want any hope of nir_opt_if or nir_opt_loop_unroll to make
157*61046927SAndroid Build Coastguard Worker           * progress.
158*61046927SAndroid Build Coastguard Worker           */
159*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_copy_prop);
160*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_opt_dce);
161*61046927SAndroid Build Coastguard Worker       }
162*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
163*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_conditional_discard);
164*61046927SAndroid Build Coastguard Worker       if (nir->options->max_unroll_iterations != 0) {
165*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_opt_loop_unroll);
166*61046927SAndroid Build Coastguard Worker       }
167*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_remove_phis);
168*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_gcm, false);
169*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_undef);
170*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_pack);
171*61046927SAndroid Build Coastguard Worker    } while (progress);
172*61046927SAndroid Build Coastguard Worker 
173*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
174*61046927SAndroid Build Coastguard Worker }
175*61046927SAndroid Build Coastguard Worker 
176*61046927SAndroid Build Coastguard Worker void
nak_optimize_nir(nir_shader * nir,const struct nak_compiler * nak)177*61046927SAndroid Build Coastguard Worker nak_optimize_nir(nir_shader *nir, const struct nak_compiler *nak)
178*61046927SAndroid Build Coastguard Worker {
179*61046927SAndroid Build Coastguard Worker    optimize_nir(nir, nak, false);
180*61046927SAndroid Build Coastguard Worker }
181*61046927SAndroid Build Coastguard Worker 
182*61046927SAndroid Build Coastguard Worker static unsigned
lower_bit_size_cb(const nir_instr * instr,void * data)183*61046927SAndroid Build Coastguard Worker lower_bit_size_cb(const nir_instr *instr, void *data)
184*61046927SAndroid Build Coastguard Worker {
185*61046927SAndroid Build Coastguard Worker    const struct nak_compiler *nak = data;
186*61046927SAndroid Build Coastguard Worker 
187*61046927SAndroid Build Coastguard Worker    switch (instr->type) {
188*61046927SAndroid Build Coastguard Worker    case nir_instr_type_alu: {
189*61046927SAndroid Build Coastguard Worker       nir_alu_instr *alu = nir_instr_as_alu(instr);
190*61046927SAndroid Build Coastguard Worker       if (nir_op_infos[alu->op].is_conversion)
191*61046927SAndroid Build Coastguard Worker          return 0;
192*61046927SAndroid Build Coastguard Worker 
193*61046927SAndroid Build Coastguard Worker       const unsigned bit_size = nir_alu_instr_is_comparison(alu)
194*61046927SAndroid Build Coastguard Worker                                 ? alu->src[0].src.ssa->bit_size
195*61046927SAndroid Build Coastguard Worker                                 : alu->def.bit_size;
196*61046927SAndroid Build Coastguard Worker 
197*61046927SAndroid Build Coastguard Worker       switch (alu->op) {
198*61046927SAndroid Build Coastguard Worker       case nir_op_bit_count:
199*61046927SAndroid Build Coastguard Worker       case nir_op_ufind_msb:
200*61046927SAndroid Build Coastguard Worker       case nir_op_ifind_msb:
201*61046927SAndroid Build Coastguard Worker       case nir_op_find_lsb:
202*61046927SAndroid Build Coastguard Worker          /* These are handled specially because the destination is always
203*61046927SAndroid Build Coastguard Worker           * 32-bit and so the bit size of the instruction is given by the
204*61046927SAndroid Build Coastguard Worker           * source.
205*61046927SAndroid Build Coastguard Worker           */
206*61046927SAndroid Build Coastguard Worker          return alu->src[0].src.ssa->bit_size == 32 ? 0 : 32;
207*61046927SAndroid Build Coastguard Worker 
208*61046927SAndroid Build Coastguard Worker       case nir_op_fabs:
209*61046927SAndroid Build Coastguard Worker       case nir_op_fadd:
210*61046927SAndroid Build Coastguard Worker       case nir_op_fneg:
211*61046927SAndroid Build Coastguard Worker       case nir_op_feq:
212*61046927SAndroid Build Coastguard Worker       case nir_op_fge:
213*61046927SAndroid Build Coastguard Worker       case nir_op_flt:
214*61046927SAndroid Build Coastguard Worker       case nir_op_fneu:
215*61046927SAndroid Build Coastguard Worker       case nir_op_fmul:
216*61046927SAndroid Build Coastguard Worker       case nir_op_ffma:
217*61046927SAndroid Build Coastguard Worker       case nir_op_ffmaz:
218*61046927SAndroid Build Coastguard Worker       case nir_op_fsign:
219*61046927SAndroid Build Coastguard Worker       case nir_op_fsat:
220*61046927SAndroid Build Coastguard Worker       case nir_op_fceil:
221*61046927SAndroid Build Coastguard Worker       case nir_op_ffloor:
222*61046927SAndroid Build Coastguard Worker       case nir_op_fround_even:
223*61046927SAndroid Build Coastguard Worker       case nir_op_ftrunc:
224*61046927SAndroid Build Coastguard Worker          if (bit_size == 16  && nak->sm >= 70)
225*61046927SAndroid Build Coastguard Worker             return 0;
226*61046927SAndroid Build Coastguard Worker          break;
227*61046927SAndroid Build Coastguard Worker 
228*61046927SAndroid Build Coastguard Worker       case nir_op_fmax:
229*61046927SAndroid Build Coastguard Worker       case nir_op_fmin:
230*61046927SAndroid Build Coastguard Worker          if (bit_size == 16 && nak->sm >= 80)
231*61046927SAndroid Build Coastguard Worker             return 0;
232*61046927SAndroid Build Coastguard Worker          break;
233*61046927SAndroid Build Coastguard Worker 
234*61046927SAndroid Build Coastguard Worker       default:
235*61046927SAndroid Build Coastguard Worker          break;
236*61046927SAndroid Build Coastguard Worker       }
237*61046927SAndroid Build Coastguard Worker 
238*61046927SAndroid Build Coastguard Worker       if (bit_size >= 32)
239*61046927SAndroid Build Coastguard Worker          return 0;
240*61046927SAndroid Build Coastguard Worker 
241*61046927SAndroid Build Coastguard Worker       if (bit_size & (8 | 16))
242*61046927SAndroid Build Coastguard Worker          return 32;
243*61046927SAndroid Build Coastguard Worker 
244*61046927SAndroid Build Coastguard Worker       return 0;
245*61046927SAndroid Build Coastguard Worker    }
246*61046927SAndroid Build Coastguard Worker 
247*61046927SAndroid Build Coastguard Worker    case nir_instr_type_intrinsic: {
248*61046927SAndroid Build Coastguard Worker       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
249*61046927SAndroid Build Coastguard Worker       switch (intrin->intrinsic) {
250*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_vote_ieq:
251*61046927SAndroid Build Coastguard Worker          if (intrin->src[0].ssa->bit_size != 1 &&
252*61046927SAndroid Build Coastguard Worker              intrin->src[0].ssa->bit_size < 32)
253*61046927SAndroid Build Coastguard Worker             return 32;
254*61046927SAndroid Build Coastguard Worker          return 0;
255*61046927SAndroid Build Coastguard Worker 
256*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_vote_feq:
257*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_read_invocation:
258*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_read_first_invocation:
259*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_shuffle:
260*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_shuffle_xor:
261*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_shuffle_up:
262*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_shuffle_down:
263*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_quad_broadcast:
264*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_quad_swap_horizontal:
265*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_quad_swap_vertical:
266*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_quad_swap_diagonal:
267*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_reduce:
268*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_inclusive_scan:
269*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_exclusive_scan:
270*61046927SAndroid Build Coastguard Worker          if (intrin->src[0].ssa->bit_size < 32)
271*61046927SAndroid Build Coastguard Worker             return 32;
272*61046927SAndroid Build Coastguard Worker          return 0;
273*61046927SAndroid Build Coastguard Worker 
274*61046927SAndroid Build Coastguard Worker       default:
275*61046927SAndroid Build Coastguard Worker          return 0;
276*61046927SAndroid Build Coastguard Worker       }
277*61046927SAndroid Build Coastguard Worker    }
278*61046927SAndroid Build Coastguard Worker 
279*61046927SAndroid Build Coastguard Worker    case nir_instr_type_phi: {
280*61046927SAndroid Build Coastguard Worker       nir_phi_instr *phi = nir_instr_as_phi(instr);
281*61046927SAndroid Build Coastguard Worker       if (phi->def.bit_size < 32 && phi->def.bit_size != 1)
282*61046927SAndroid Build Coastguard Worker          return 32;
283*61046927SAndroid Build Coastguard Worker       return 0;
284*61046927SAndroid Build Coastguard Worker    }
285*61046927SAndroid Build Coastguard Worker 
286*61046927SAndroid Build Coastguard Worker    default:
287*61046927SAndroid Build Coastguard Worker       return 0;
288*61046927SAndroid Build Coastguard Worker    }
289*61046927SAndroid Build Coastguard Worker }
290*61046927SAndroid Build Coastguard Worker 
291*61046927SAndroid Build Coastguard Worker void
nak_preprocess_nir(nir_shader * nir,const struct nak_compiler * nak)292*61046927SAndroid Build Coastguard Worker nak_preprocess_nir(nir_shader *nir, const struct nak_compiler *nak)
293*61046927SAndroid Build Coastguard Worker {
294*61046927SAndroid Build Coastguard Worker    UNUSED bool progress = false;
295*61046927SAndroid Build Coastguard Worker 
296*61046927SAndroid Build Coastguard Worker    nir_validate_ssa_dominance(nir, "before nak_preprocess_nir");
297*61046927SAndroid Build Coastguard Worker 
298*61046927SAndroid Build Coastguard Worker    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
299*61046927SAndroid Build Coastguard Worker       nir_lower_io_to_temporaries(nir, nir_shader_get_entrypoint(nir),
300*61046927SAndroid Build Coastguard Worker                                   true /* outputs */, false /* inputs */);
301*61046927SAndroid Build Coastguard Worker    }
302*61046927SAndroid Build Coastguard Worker 
303*61046927SAndroid Build Coastguard Worker    const nir_lower_tex_options tex_options = {
304*61046927SAndroid Build Coastguard Worker       .lower_txd_3d = true,
305*61046927SAndroid Build Coastguard Worker       .lower_txd_cube_map = true,
306*61046927SAndroid Build Coastguard Worker       .lower_txd_clamp = true,
307*61046927SAndroid Build Coastguard Worker       .lower_txd_shadow = true,
308*61046927SAndroid Build Coastguard Worker       .lower_txp = ~0,
309*61046927SAndroid Build Coastguard Worker       /* TODO: More lowering */
310*61046927SAndroid Build Coastguard Worker    };
311*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_tex, &tex_options);
312*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_normalize_cubemap_coords);
313*61046927SAndroid Build Coastguard Worker 
314*61046927SAndroid Build Coastguard Worker    nir_lower_image_options image_options = {
315*61046927SAndroid Build Coastguard Worker       .lower_cube_size = true,
316*61046927SAndroid Build Coastguard Worker    };
317*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_image, &image_options);
318*61046927SAndroid Build Coastguard Worker 
319*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_global_vars_to_local);
320*61046927SAndroid Build Coastguard Worker 
321*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_split_var_copies);
322*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_split_struct_vars, nir_var_function_temp);
323*61046927SAndroid Build Coastguard Worker 
324*61046927SAndroid Build Coastguard Worker    /* Optimize but allow copies because we haven't lowered them yet */
325*61046927SAndroid Build Coastguard Worker    optimize_nir(nir, nak, true /* allow_copies */);
326*61046927SAndroid Build Coastguard Worker 
327*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_load_const_to_scalar);
328*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_var_copies);
329*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_system_values);
330*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_compute_system_values, NULL);
331*61046927SAndroid Build Coastguard Worker 
332*61046927SAndroid Build Coastguard Worker    if (nir->info.stage == MESA_SHADER_FRAGMENT)
333*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_terminate_to_demote);
334*61046927SAndroid Build Coastguard Worker }
335*61046927SAndroid Build Coastguard Worker 
336*61046927SAndroid Build Coastguard Worker uint16_t
nak_varying_attr_addr(gl_varying_slot slot)337*61046927SAndroid Build Coastguard Worker nak_varying_attr_addr(gl_varying_slot slot)
338*61046927SAndroid Build Coastguard Worker {
339*61046927SAndroid Build Coastguard Worker    if (slot >= VARYING_SLOT_PATCH0) {
340*61046927SAndroid Build Coastguard Worker       return NAK_ATTR_PATCH_START + (slot - VARYING_SLOT_PATCH0) * 0x10;
341*61046927SAndroid Build Coastguard Worker    } else if (slot >= VARYING_SLOT_VAR0) {
342*61046927SAndroid Build Coastguard Worker       return NAK_ATTR_GENERIC_START + (slot - VARYING_SLOT_VAR0) * 0x10;
343*61046927SAndroid Build Coastguard Worker    } else {
344*61046927SAndroid Build Coastguard Worker       switch (slot) {
345*61046927SAndroid Build Coastguard Worker       case VARYING_SLOT_TESS_LEVEL_OUTER: return NAK_ATTR_TESS_LOD;
346*61046927SAndroid Build Coastguard Worker       case VARYING_SLOT_TESS_LEVEL_INNER: return NAK_ATTR_TESS_INTERRIOR;
347*61046927SAndroid Build Coastguard Worker       case VARYING_SLOT_PRIMITIVE_ID:     return NAK_ATTR_PRIMITIVE_ID;
348*61046927SAndroid Build Coastguard Worker       case VARYING_SLOT_LAYER:            return NAK_ATTR_RT_ARRAY_INDEX;
349*61046927SAndroid Build Coastguard Worker       case VARYING_SLOT_VIEWPORT:         return NAK_ATTR_VIEWPORT_INDEX;
350*61046927SAndroid Build Coastguard Worker       case VARYING_SLOT_PSIZ:             return NAK_ATTR_POINT_SIZE;
351*61046927SAndroid Build Coastguard Worker       case VARYING_SLOT_POS:              return NAK_ATTR_POSITION;
352*61046927SAndroid Build Coastguard Worker       case VARYING_SLOT_CLIP_DIST0:       return NAK_ATTR_CLIP_CULL_DIST_0;
353*61046927SAndroid Build Coastguard Worker       case VARYING_SLOT_CLIP_DIST1:       return NAK_ATTR_CLIP_CULL_DIST_4;
354*61046927SAndroid Build Coastguard Worker       default: unreachable("Invalid varying slot");
355*61046927SAndroid Build Coastguard Worker       }
356*61046927SAndroid Build Coastguard Worker    }
357*61046927SAndroid Build Coastguard Worker }
358*61046927SAndroid Build Coastguard Worker 
359*61046927SAndroid Build Coastguard Worker static uint16_t
nak_fs_out_addr(gl_frag_result slot,uint32_t blend_idx)360*61046927SAndroid Build Coastguard Worker nak_fs_out_addr(gl_frag_result slot, uint32_t blend_idx)
361*61046927SAndroid Build Coastguard Worker {
362*61046927SAndroid Build Coastguard Worker    switch (slot) {
363*61046927SAndroid Build Coastguard Worker    case FRAG_RESULT_DEPTH:
364*61046927SAndroid Build Coastguard Worker       assert(blend_idx == 0);
365*61046927SAndroid Build Coastguard Worker       return NAK_FS_OUT_DEPTH;
366*61046927SAndroid Build Coastguard Worker 
367*61046927SAndroid Build Coastguard Worker    case FRAG_RESULT_STENCIL:
368*61046927SAndroid Build Coastguard Worker       unreachable("EXT_shader_stencil_export not supported");
369*61046927SAndroid Build Coastguard Worker 
370*61046927SAndroid Build Coastguard Worker    case FRAG_RESULT_COLOR:
371*61046927SAndroid Build Coastguard Worker       unreachable("Vulkan alway uses explicit locations");
372*61046927SAndroid Build Coastguard Worker 
373*61046927SAndroid Build Coastguard Worker    case FRAG_RESULT_SAMPLE_MASK:
374*61046927SAndroid Build Coastguard Worker       assert(blend_idx == 0);
375*61046927SAndroid Build Coastguard Worker       return NAK_FS_OUT_SAMPLE_MASK;
376*61046927SAndroid Build Coastguard Worker 
377*61046927SAndroid Build Coastguard Worker    default:
378*61046927SAndroid Build Coastguard Worker       assert(blend_idx < 2);
379*61046927SAndroid Build Coastguard Worker       return NAK_FS_OUT_COLOR((slot - FRAG_RESULT_DATA0) + blend_idx);
380*61046927SAndroid Build Coastguard Worker    }
381*61046927SAndroid Build Coastguard Worker }
382*61046927SAndroid Build Coastguard Worker 
383*61046927SAndroid Build Coastguard Worker uint16_t
nak_sysval_attr_addr(gl_system_value sysval)384*61046927SAndroid Build Coastguard Worker nak_sysval_attr_addr(gl_system_value sysval)
385*61046927SAndroid Build Coastguard Worker {
386*61046927SAndroid Build Coastguard Worker    switch (sysval) {
387*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_PRIMITIVE_ID:  return NAK_ATTR_PRIMITIVE_ID;
388*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_FRAG_COORD:    return NAK_ATTR_POSITION;
389*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_POINT_COORD:   return NAK_ATTR_POINT_SPRITE;
390*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_TESS_COORD:    return NAK_ATTR_TESS_COORD;
391*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_INSTANCE_ID:   return NAK_ATTR_INSTANCE_ID;
392*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_VERTEX_ID:     return NAK_ATTR_VERTEX_ID;
393*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_FRONT_FACE:    return NAK_ATTR_FRONT_FACE;
394*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_LAYER_ID:      return NAK_ATTR_RT_ARRAY_INDEX;
395*61046927SAndroid Build Coastguard Worker    default: unreachable("Invalid system value");
396*61046927SAndroid Build Coastguard Worker    }
397*61046927SAndroid Build Coastguard Worker }
398*61046927SAndroid Build Coastguard Worker 
399*61046927SAndroid Build Coastguard Worker static uint8_t
nak_sysval_sysval_idx(gl_system_value sysval)400*61046927SAndroid Build Coastguard Worker nak_sysval_sysval_idx(gl_system_value sysval)
401*61046927SAndroid Build Coastguard Worker {
402*61046927SAndroid Build Coastguard Worker    switch (sysval) {
403*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_SUBGROUP_INVOCATION:    return NAK_SV_LANE_ID;
404*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_VERTICES_IN:            return NAK_SV_VERTEX_COUNT;
405*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_INVOCATION_ID:          return NAK_SV_INVOCATION_ID;
406*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_HELPER_INVOCATION:      return NAK_SV_THREAD_KILL;
407*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_LOCAL_INVOCATION_ID:    return NAK_SV_TID;
408*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_WORKGROUP_ID:           return NAK_SV_CTAID;
409*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_SUBGROUP_EQ_MASK:       return NAK_SV_LANEMASK_EQ;
410*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_SUBGROUP_LT_MASK:       return NAK_SV_LANEMASK_LT;
411*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_SUBGROUP_LE_MASK:       return NAK_SV_LANEMASK_LE;
412*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_SUBGROUP_GT_MASK:       return NAK_SV_LANEMASK_GT;
413*61046927SAndroid Build Coastguard Worker    case SYSTEM_VALUE_SUBGROUP_GE_MASK:       return NAK_SV_LANEMASK_GE;
414*61046927SAndroid Build Coastguard Worker    default: unreachable("Invalid system value");
415*61046927SAndroid Build Coastguard Worker    }
416*61046927SAndroid Build Coastguard Worker }
417*61046927SAndroid Build Coastguard Worker 
418*61046927SAndroid Build Coastguard Worker static bool
nak_nir_lower_system_value_intrin(nir_builder * b,nir_intrinsic_instr * intrin,void * data)419*61046927SAndroid Build Coastguard Worker nak_nir_lower_system_value_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
420*61046927SAndroid Build Coastguard Worker                                   void *data)
421*61046927SAndroid Build Coastguard Worker {
422*61046927SAndroid Build Coastguard Worker    const struct nak_compiler *nak = data;
423*61046927SAndroid Build Coastguard Worker 
424*61046927SAndroid Build Coastguard Worker    b->cursor = nir_before_instr(&intrin->instr);
425*61046927SAndroid Build Coastguard Worker 
426*61046927SAndroid Build Coastguard Worker    nir_def *val;
427*61046927SAndroid Build Coastguard Worker    switch (intrin->intrinsic) {
428*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_primitive_id:
429*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_instance_id:
430*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_vertex_id: {
431*61046927SAndroid Build Coastguard Worker       assert(b->shader->info.stage != MESA_SHADER_VERTEX ||
432*61046927SAndroid Build Coastguard Worker              b->shader->info.stage != MESA_SHADER_TESS_CTRL ||
433*61046927SAndroid Build Coastguard Worker              b->shader->info.stage == MESA_SHADER_TESS_EVAL ||
434*61046927SAndroid Build Coastguard Worker              b->shader->info.stage == MESA_SHADER_GEOMETRY);
435*61046927SAndroid Build Coastguard Worker       const gl_system_value sysval =
436*61046927SAndroid Build Coastguard Worker          nir_system_value_from_intrinsic(intrin->intrinsic);
437*61046927SAndroid Build Coastguard Worker       const uint32_t addr = nak_sysval_attr_addr(sysval);
438*61046927SAndroid Build Coastguard Worker       val = nir_ald_nv(b, 1, nir_imm_int(b, 0), nir_imm_int(b, 0),
439*61046927SAndroid Build Coastguard Worker                        .base = addr, .flags = 0,
440*61046927SAndroid Build Coastguard Worker                        .range_base = addr, .range = 4,
441*61046927SAndroid Build Coastguard Worker                        .access = ACCESS_CAN_REORDER);
442*61046927SAndroid Build Coastguard Worker       break;
443*61046927SAndroid Build Coastguard Worker    }
444*61046927SAndroid Build Coastguard Worker 
445*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_patch_vertices_in: {
446*61046927SAndroid Build Coastguard Worker       val = nir_load_sysval_nv(b, 32, .base = NAK_SV_VERTEX_COUNT,
447*61046927SAndroid Build Coastguard Worker                                .access = ACCESS_CAN_REORDER);
448*61046927SAndroid Build Coastguard Worker       val = nir_extract_u8(b, val, nir_imm_int(b, 1));
449*61046927SAndroid Build Coastguard Worker       break;
450*61046927SAndroid Build Coastguard Worker    }
451*61046927SAndroid Build Coastguard Worker 
452*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_subgroup_eq_mask:
453*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_subgroup_lt_mask:
454*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_subgroup_le_mask:
455*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_subgroup_gt_mask:
456*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_subgroup_ge_mask: {
457*61046927SAndroid Build Coastguard Worker       const gl_system_value sysval =
458*61046927SAndroid Build Coastguard Worker          nir_system_value_from_intrinsic(intrin->intrinsic);
459*61046927SAndroid Build Coastguard Worker       const uint32_t idx = nak_sysval_sysval_idx(sysval);
460*61046927SAndroid Build Coastguard Worker       val = nir_load_sysval_nv(b, 32, .base = idx,
461*61046927SAndroid Build Coastguard Worker                                .access = ACCESS_CAN_REORDER);
462*61046927SAndroid Build Coastguard Worker 
463*61046927SAndroid Build Coastguard Worker       /* Pad with 0 because all invocations above 31 are off */
464*61046927SAndroid Build Coastguard Worker       if (intrin->def.bit_size == 64) {
465*61046927SAndroid Build Coastguard Worker          val = nir_u2u32(b, val);
466*61046927SAndroid Build Coastguard Worker       } else {
467*61046927SAndroid Build Coastguard Worker          assert(intrin->def.bit_size == 32);
468*61046927SAndroid Build Coastguard Worker          val = nir_pad_vector_imm_int(b, val, 0, intrin->def.num_components);
469*61046927SAndroid Build Coastguard Worker       }
470*61046927SAndroid Build Coastguard Worker       break;
471*61046927SAndroid Build Coastguard Worker    }
472*61046927SAndroid Build Coastguard Worker 
473*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_subgroup_invocation:
474*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_helper_invocation:
475*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_invocation_id:
476*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_workgroup_id: {
477*61046927SAndroid Build Coastguard Worker       const gl_system_value sysval =
478*61046927SAndroid Build Coastguard Worker          nir_system_value_from_intrinsic(intrin->intrinsic);
479*61046927SAndroid Build Coastguard Worker       const uint32_t idx = nak_sysval_sysval_idx(sysval);
480*61046927SAndroid Build Coastguard Worker       nir_def *comps[3];
481*61046927SAndroid Build Coastguard Worker       assert(intrin->def.num_components <= 3);
482*61046927SAndroid Build Coastguard Worker       for (unsigned c = 0; c < intrin->def.num_components; c++) {
483*61046927SAndroid Build Coastguard Worker          comps[c] = nir_load_sysval_nv(b, 32, .base = idx + c,
484*61046927SAndroid Build Coastguard Worker                                        .access = ACCESS_CAN_REORDER);
485*61046927SAndroid Build Coastguard Worker       }
486*61046927SAndroid Build Coastguard Worker       val = nir_vec(b, comps, intrin->def.num_components);
487*61046927SAndroid Build Coastguard Worker       break;
488*61046927SAndroid Build Coastguard Worker    }
489*61046927SAndroid Build Coastguard Worker 
490*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_local_invocation_id: {
491*61046927SAndroid Build Coastguard Worker       nir_def *x = nir_load_sysval_nv(b, 32, .base = NAK_SV_TID_X,
492*61046927SAndroid Build Coastguard Worker                                       .access = ACCESS_CAN_REORDER);
493*61046927SAndroid Build Coastguard Worker       nir_def *y = nir_load_sysval_nv(b, 32, .base = NAK_SV_TID_Y,
494*61046927SAndroid Build Coastguard Worker                                       .access = ACCESS_CAN_REORDER);
495*61046927SAndroid Build Coastguard Worker       nir_def *z = nir_load_sysval_nv(b, 32, .base = NAK_SV_TID_Z,
496*61046927SAndroid Build Coastguard Worker                                       .access = ACCESS_CAN_REORDER);
497*61046927SAndroid Build Coastguard Worker 
498*61046927SAndroid Build Coastguard Worker       if (b->shader->info.derivative_group == DERIVATIVE_GROUP_QUADS) {
499*61046927SAndroid Build Coastguard Worker          nir_def *x_lo = nir_iand_imm(b, x, 0x1);
500*61046927SAndroid Build Coastguard Worker          nir_def *y_lo = nir_ushr_imm(b, nir_iand_imm(b, x, 0x2), 1);
501*61046927SAndroid Build Coastguard Worker          nir_def *x_hi = nir_ushr_imm(b, nir_iand_imm(b, x, ~0x3), 1);
502*61046927SAndroid Build Coastguard Worker          nir_def *y_hi = nir_ishl_imm(b, y, 1);
503*61046927SAndroid Build Coastguard Worker 
504*61046927SAndroid Build Coastguard Worker          x = nir_ior(b, x_lo, x_hi);
505*61046927SAndroid Build Coastguard Worker          y = nir_ior(b, y_lo, y_hi);
506*61046927SAndroid Build Coastguard Worker       }
507*61046927SAndroid Build Coastguard Worker 
508*61046927SAndroid Build Coastguard Worker       val = nir_vec3(b, x, y, z);
509*61046927SAndroid Build Coastguard Worker       break;
510*61046927SAndroid Build Coastguard Worker    }
511*61046927SAndroid Build Coastguard Worker 
512*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_num_subgroups: {
513*61046927SAndroid Build Coastguard Worker       assert(!b->shader->info.workgroup_size_variable);
514*61046927SAndroid Build Coastguard Worker       uint16_t wg_size = b->shader->info.workgroup_size[0] *
515*61046927SAndroid Build Coastguard Worker                          b->shader->info.workgroup_size[1] *
516*61046927SAndroid Build Coastguard Worker                          b->shader->info.workgroup_size[2];
517*61046927SAndroid Build Coastguard Worker       val = nir_imm_int(b, DIV_ROUND_UP(wg_size, 32));
518*61046927SAndroid Build Coastguard Worker       break;
519*61046927SAndroid Build Coastguard Worker    }
520*61046927SAndroid Build Coastguard Worker 
521*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_subgroup_id:
522*61046927SAndroid Build Coastguard Worker       if (nak_nir_workgroup_has_one_subgroup(b->shader)) {
523*61046927SAndroid Build Coastguard Worker          val = nir_imm_int(b, 0);
524*61046927SAndroid Build Coastguard Worker       } else {
525*61046927SAndroid Build Coastguard Worker          assert(!b->shader->info.workgroup_size_variable);
526*61046927SAndroid Build Coastguard Worker          nir_def *tid_x = nir_load_sysval_nv(b, 32, .base = NAK_SV_TID_X,
527*61046927SAndroid Build Coastguard Worker                                              .access = ACCESS_CAN_REORDER);
528*61046927SAndroid Build Coastguard Worker          nir_def *tid_y = nir_load_sysval_nv(b, 32, .base = NAK_SV_TID_Y,
529*61046927SAndroid Build Coastguard Worker                                              .access = ACCESS_CAN_REORDER);
530*61046927SAndroid Build Coastguard Worker          nir_def *tid_z = nir_load_sysval_nv(b, 32, .base = NAK_SV_TID_Z,
531*61046927SAndroid Build Coastguard Worker                                              .access = ACCESS_CAN_REORDER);
532*61046927SAndroid Build Coastguard Worker 
533*61046927SAndroid Build Coastguard Worker          const uint16_t *wg_size = b->shader->info.workgroup_size;
534*61046927SAndroid Build Coastguard Worker          nir_def *tid =
535*61046927SAndroid Build Coastguard Worker             nir_iadd(b, tid_x,
536*61046927SAndroid Build Coastguard Worker             nir_iadd(b, nir_imul_imm(b, tid_y, wg_size[0]),
537*61046927SAndroid Build Coastguard Worker                         nir_imul_imm(b, tid_z, wg_size[0] * wg_size[1])));
538*61046927SAndroid Build Coastguard Worker 
539*61046927SAndroid Build Coastguard Worker          val = nir_udiv_imm(b, tid, 32);
540*61046927SAndroid Build Coastguard Worker       }
541*61046927SAndroid Build Coastguard Worker       break;
542*61046927SAndroid Build Coastguard Worker 
543*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_is_helper_invocation: {
544*61046927SAndroid Build Coastguard Worker       /* Unlike load_helper_invocation, this one isn't re-orderable */
545*61046927SAndroid Build Coastguard Worker       val = nir_load_sysval_nv(b, 32, .base = NAK_SV_THREAD_KILL);
546*61046927SAndroid Build Coastguard Worker       break;
547*61046927SAndroid Build Coastguard Worker    }
548*61046927SAndroid Build Coastguard Worker 
549*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_shader_clock: {
550*61046927SAndroid Build Coastguard Worker       /* The CS2R opcode can load 64 bits worth of sysval data at a time but
551*61046927SAndroid Build Coastguard Worker        * it's not actually atomic.  In order to get correct shader clocks, we
552*61046927SAndroid Build Coastguard Worker        * need to do a loop where we do
553*61046927SAndroid Build Coastguard Worker        *
554*61046927SAndroid Build Coastguard Worker        *    CS2R SV_CLOCK_HI
555*61046927SAndroid Build Coastguard Worker        *    CS2R SV_CLOCK_LO
556*61046927SAndroid Build Coastguard Worker        *    CS2R SV_CLOCK_HI
557*61046927SAndroid Build Coastguard Worker        *    CS2R SV_CLOCK_LO
558*61046927SAndroid Build Coastguard Worker        *    CS2R SV_CLOCK_HI
559*61046927SAndroid Build Coastguard Worker        *    ...
560*61046927SAndroid Build Coastguard Worker        *
561*61046927SAndroid Build Coastguard Worker        * The moment two high values are the same, we take the low value
562*61046927SAndroid Build Coastguard Worker        * between them and that gives us our clock.
563*61046927SAndroid Build Coastguard Worker        *
564*61046927SAndroid Build Coastguard Worker        * In order to make sure we don't run into any weird races, we also need
565*61046927SAndroid Build Coastguard Worker        * to insert a barrier after every load to ensure the one load completes
566*61046927SAndroid Build Coastguard Worker        * before we kick off the next load.  Otherwise, if one load happens to
567*61046927SAndroid Build Coastguard Worker        * be faster than the other (they are variable latency, after all) we're
568*61046927SAndroid Build Coastguard Worker        * still guaranteed that the loads happen in the order we want.
569*61046927SAndroid Build Coastguard Worker        */
570*61046927SAndroid Build Coastguard Worker       nir_variable *clock =
571*61046927SAndroid Build Coastguard Worker          nir_local_variable_create(b->impl, glsl_uvec2_type(), NULL);
572*61046927SAndroid Build Coastguard Worker 
573*61046927SAndroid Build Coastguard Worker       nir_def *clock_hi = nir_load_sysval_nv(b, 32, .base = NAK_SV_CLOCK_HI);
574*61046927SAndroid Build Coastguard Worker       nir_ssa_bar_nv(b, clock_hi);
575*61046927SAndroid Build Coastguard Worker 
576*61046927SAndroid Build Coastguard Worker       nir_store_var(b, clock, nir_vec2(b, nir_imm_int(b, 0), clock_hi), 0x3);
577*61046927SAndroid Build Coastguard Worker 
578*61046927SAndroid Build Coastguard Worker       nir_push_loop(b);
579*61046927SAndroid Build Coastguard Worker       {
580*61046927SAndroid Build Coastguard Worker          nir_def *last_clock = nir_load_var(b, clock);
581*61046927SAndroid Build Coastguard Worker 
582*61046927SAndroid Build Coastguard Worker          nir_def *clock_lo = nir_load_sysval_nv(b, 32, .base = NAK_SV_CLOCK_LO);
583*61046927SAndroid Build Coastguard Worker          nir_ssa_bar_nv(b, clock_lo);
584*61046927SAndroid Build Coastguard Worker 
585*61046927SAndroid Build Coastguard Worker          clock_hi = nir_load_sysval_nv(b, 32, .base = NAK_SV_CLOCK + 1);
586*61046927SAndroid Build Coastguard Worker          nir_ssa_bar_nv(b, clock_hi);
587*61046927SAndroid Build Coastguard Worker 
588*61046927SAndroid Build Coastguard Worker          nir_store_var(b, clock, nir_vec2(b, clock_lo, clock_hi), 0x3);
589*61046927SAndroid Build Coastguard Worker 
590*61046927SAndroid Build Coastguard Worker          nir_break_if(b, nir_ieq(b, clock_hi, nir_channel(b, last_clock, 1)));
591*61046927SAndroid Build Coastguard Worker       }
592*61046927SAndroid Build Coastguard Worker       nir_pop_loop(b, NULL);
593*61046927SAndroid Build Coastguard Worker 
594*61046927SAndroid Build Coastguard Worker       val = nir_load_var(b, clock);
595*61046927SAndroid Build Coastguard Worker       if (intrin->def.bit_size == 64)
596*61046927SAndroid Build Coastguard Worker          val = nir_pack_64_2x32(b, val);
597*61046927SAndroid Build Coastguard Worker       break;
598*61046927SAndroid Build Coastguard Worker    }
599*61046927SAndroid Build Coastguard Worker 
600*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_warps_per_sm_nv:
601*61046927SAndroid Build Coastguard Worker       val = nir_imm_int(b, nak->warps_per_sm);
602*61046927SAndroid Build Coastguard Worker       break;
603*61046927SAndroid Build Coastguard Worker 
604*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_sm_count_nv:
605*61046927SAndroid Build Coastguard Worker       val = nir_load_sysval_nv(b, 32, .base = NAK_SV_VIRTCFG);
606*61046927SAndroid Build Coastguard Worker       val = nir_ubitfield_extract_imm(b, val, 20, 9);
607*61046927SAndroid Build Coastguard Worker       break;
608*61046927SAndroid Build Coastguard Worker 
609*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_warp_id_nv:
610*61046927SAndroid Build Coastguard Worker       val = nir_load_sysval_nv(b, 32, .base = NAK_SV_VIRTID);
611*61046927SAndroid Build Coastguard Worker       val = nir_ubitfield_extract_imm(b, val, 8, 7);
612*61046927SAndroid Build Coastguard Worker       break;
613*61046927SAndroid Build Coastguard Worker 
614*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_sm_id_nv:
615*61046927SAndroid Build Coastguard Worker       val = nir_load_sysval_nv(b, 32, .base = NAK_SV_VIRTID);
616*61046927SAndroid Build Coastguard Worker       val = nir_ubitfield_extract_imm(b, val, 20, 9);
617*61046927SAndroid Build Coastguard Worker       break;
618*61046927SAndroid Build Coastguard Worker 
619*61046927SAndroid Build Coastguard Worker    default:
620*61046927SAndroid Build Coastguard Worker       return false;
621*61046927SAndroid Build Coastguard Worker    }
622*61046927SAndroid Build Coastguard Worker 
623*61046927SAndroid Build Coastguard Worker    if (intrin->def.bit_size == 1)
624*61046927SAndroid Build Coastguard Worker       val = nir_i2b(b, val);
625*61046927SAndroid Build Coastguard Worker 
626*61046927SAndroid Build Coastguard Worker    nir_def_rewrite_uses(&intrin->def, val);
627*61046927SAndroid Build Coastguard Worker 
628*61046927SAndroid Build Coastguard Worker    return true;
629*61046927SAndroid Build Coastguard Worker }
630*61046927SAndroid Build Coastguard Worker 
631*61046927SAndroid Build Coastguard Worker static bool
nak_nir_lower_system_values(nir_shader * nir,const struct nak_compiler * nak)632*61046927SAndroid Build Coastguard Worker nak_nir_lower_system_values(nir_shader *nir, const struct nak_compiler *nak)
633*61046927SAndroid Build Coastguard Worker {
634*61046927SAndroid Build Coastguard Worker    return nir_shader_intrinsics_pass(nir, nak_nir_lower_system_value_intrin,
635*61046927SAndroid Build Coastguard Worker                                      nir_metadata_none,
636*61046927SAndroid Build Coastguard Worker                                      (void *)nak);
637*61046927SAndroid Build Coastguard Worker }
638*61046927SAndroid Build Coastguard Worker 
639*61046927SAndroid Build Coastguard Worker struct nak_xfb_info
nak_xfb_from_nir(const struct nir_xfb_info * nir_xfb)640*61046927SAndroid Build Coastguard Worker nak_xfb_from_nir(const struct nir_xfb_info *nir_xfb)
641*61046927SAndroid Build Coastguard Worker {
642*61046927SAndroid Build Coastguard Worker    if (nir_xfb == NULL)
643*61046927SAndroid Build Coastguard Worker       return (struct nak_xfb_info) { };
644*61046927SAndroid Build Coastguard Worker 
645*61046927SAndroid Build Coastguard Worker    struct nak_xfb_info nak_xfb = { };
646*61046927SAndroid Build Coastguard Worker 
647*61046927SAndroid Build Coastguard Worker    u_foreach_bit(b, nir_xfb->buffers_written) {
648*61046927SAndroid Build Coastguard Worker       nak_xfb.stride[b] = nir_xfb->buffers[b].stride;
649*61046927SAndroid Build Coastguard Worker       nak_xfb.stream[b] = nir_xfb->buffer_to_stream[b];
650*61046927SAndroid Build Coastguard Worker    }
651*61046927SAndroid Build Coastguard Worker    memset(nak_xfb.attr_index, 0xff, sizeof(nak_xfb.attr_index)); /* = skip */
652*61046927SAndroid Build Coastguard Worker 
653*61046927SAndroid Build Coastguard Worker    for (unsigned o = 0; o < nir_xfb->output_count; o++) {
654*61046927SAndroid Build Coastguard Worker       const nir_xfb_output_info *out = &nir_xfb->outputs[o];
655*61046927SAndroid Build Coastguard Worker       const uint8_t b = out->buffer;
656*61046927SAndroid Build Coastguard Worker       assert(nir_xfb->buffers_written & BITFIELD_BIT(b));
657*61046927SAndroid Build Coastguard Worker 
658*61046927SAndroid Build Coastguard Worker       const uint16_t attr_addr = nak_varying_attr_addr(out->location);
659*61046927SAndroid Build Coastguard Worker       assert(attr_addr % 4 == 0);
660*61046927SAndroid Build Coastguard Worker       const uint16_t attr_idx = attr_addr / 4;
661*61046927SAndroid Build Coastguard Worker 
662*61046927SAndroid Build Coastguard Worker       assert(out->offset % 4 == 0);
663*61046927SAndroid Build Coastguard Worker       uint8_t out_idx = out->offset / 4;
664*61046927SAndroid Build Coastguard Worker 
665*61046927SAndroid Build Coastguard Worker       u_foreach_bit(c, out->component_mask)
666*61046927SAndroid Build Coastguard Worker          nak_xfb.attr_index[b][out_idx++] = attr_idx + c;
667*61046927SAndroid Build Coastguard Worker 
668*61046927SAndroid Build Coastguard Worker       nak_xfb.attr_count[b] = MAX2(nak_xfb.attr_count[b], out_idx);
669*61046927SAndroid Build Coastguard Worker    }
670*61046927SAndroid Build Coastguard Worker 
671*61046927SAndroid Build Coastguard Worker    return nak_xfb;
672*61046927SAndroid Build Coastguard Worker }
673*61046927SAndroid Build Coastguard Worker 
674*61046927SAndroid Build Coastguard Worker static bool
lower_fs_output_intrin(nir_builder * b,nir_intrinsic_instr * intrin,void * _data)675*61046927SAndroid Build Coastguard Worker lower_fs_output_intrin(nir_builder *b, nir_intrinsic_instr *intrin, void *_data)
676*61046927SAndroid Build Coastguard Worker {
677*61046927SAndroid Build Coastguard Worker    if (intrin->intrinsic != nir_intrinsic_store_output)
678*61046927SAndroid Build Coastguard Worker       return false;
679*61046927SAndroid Build Coastguard Worker 
680*61046927SAndroid Build Coastguard Worker    b->cursor = nir_before_instr(&intrin->instr);
681*61046927SAndroid Build Coastguard Worker 
682*61046927SAndroid Build Coastguard Worker    const nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
683*61046927SAndroid Build Coastguard Worker    uint16_t addr = nak_fs_out_addr(sem.location, sem.dual_source_blend_index) +
684*61046927SAndroid Build Coastguard Worker                    nir_src_as_uint(intrin->src[1]) * 16 +
685*61046927SAndroid Build Coastguard Worker                    nir_intrinsic_component(intrin) * 4;
686*61046927SAndroid Build Coastguard Worker 
687*61046927SAndroid Build Coastguard Worker    nir_def *data = intrin->src[0].ssa;
688*61046927SAndroid Build Coastguard Worker 
689*61046927SAndroid Build Coastguard Worker    /* The fs_out_nv intrinsic is always scalar */
690*61046927SAndroid Build Coastguard Worker    u_foreach_bit(c, nir_intrinsic_write_mask(intrin)) {
691*61046927SAndroid Build Coastguard Worker       if (nir_scalar_is_undef(nir_scalar_resolved(data, c)))
692*61046927SAndroid Build Coastguard Worker          continue;
693*61046927SAndroid Build Coastguard Worker 
694*61046927SAndroid Build Coastguard Worker       nir_fs_out_nv(b, nir_channel(b, data, c), .base = addr + c * 4);
695*61046927SAndroid Build Coastguard Worker    }
696*61046927SAndroid Build Coastguard Worker 
697*61046927SAndroid Build Coastguard Worker    nir_instr_remove(&intrin->instr);
698*61046927SAndroid Build Coastguard Worker 
699*61046927SAndroid Build Coastguard Worker    return true;
700*61046927SAndroid Build Coastguard Worker }
701*61046927SAndroid Build Coastguard Worker 
702*61046927SAndroid Build Coastguard Worker static bool
nak_nir_lower_fs_outputs(nir_shader * nir)703*61046927SAndroid Build Coastguard Worker nak_nir_lower_fs_outputs(nir_shader *nir)
704*61046927SAndroid Build Coastguard Worker {
705*61046927SAndroid Build Coastguard Worker    if (nir->info.outputs_written == 0)
706*61046927SAndroid Build Coastguard Worker       return false;
707*61046927SAndroid Build Coastguard Worker 
708*61046927SAndroid Build Coastguard Worker    bool progress = nir_shader_intrinsics_pass(nir, lower_fs_output_intrin,
709*61046927SAndroid Build Coastguard Worker                                               nir_metadata_control_flow,
710*61046927SAndroid Build Coastguard Worker                                               NULL);
711*61046927SAndroid Build Coastguard Worker 
712*61046927SAndroid Build Coastguard Worker    if (progress) {
713*61046927SAndroid Build Coastguard Worker       /* We need a copy_fs_outputs_nv intrinsic so NAK knows where to place
714*61046927SAndroid Build Coastguard Worker        * the final copy.  This needs to be in the last block, after all
715*61046927SAndroid Build Coastguard Worker        * store_output intrinsics.
716*61046927SAndroid Build Coastguard Worker        */
717*61046927SAndroid Build Coastguard Worker       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
718*61046927SAndroid Build Coastguard Worker       nir_builder b = nir_builder_at(nir_after_impl(impl));
719*61046927SAndroid Build Coastguard Worker       nir_copy_fs_outputs_nv(&b);
720*61046927SAndroid Build Coastguard Worker    }
721*61046927SAndroid Build Coastguard Worker 
722*61046927SAndroid Build Coastguard Worker    return progress;
723*61046927SAndroid Build Coastguard Worker }
724*61046927SAndroid Build Coastguard Worker 
725*61046927SAndroid Build Coastguard Worker static bool
nak_nir_remove_barrier_intrin(nir_builder * b,nir_intrinsic_instr * barrier,UNUSED void * _data)726*61046927SAndroid Build Coastguard Worker nak_nir_remove_barrier_intrin(nir_builder *b, nir_intrinsic_instr *barrier,
727*61046927SAndroid Build Coastguard Worker                               UNUSED void *_data)
728*61046927SAndroid Build Coastguard Worker {
729*61046927SAndroid Build Coastguard Worker    if (barrier->intrinsic != nir_intrinsic_barrier)
730*61046927SAndroid Build Coastguard Worker       return false;
731*61046927SAndroid Build Coastguard Worker 
732*61046927SAndroid Build Coastguard Worker    mesa_scope exec_scope = nir_intrinsic_execution_scope(barrier);
733*61046927SAndroid Build Coastguard Worker    assert(exec_scope <= SCOPE_WORKGROUP &&
734*61046927SAndroid Build Coastguard Worker           "Control barrier with scope > WORKGROUP");
735*61046927SAndroid Build Coastguard Worker 
736*61046927SAndroid Build Coastguard Worker    if (exec_scope == SCOPE_WORKGROUP &&
737*61046927SAndroid Build Coastguard Worker        nak_nir_workgroup_has_one_subgroup(b->shader))
738*61046927SAndroid Build Coastguard Worker       exec_scope = SCOPE_SUBGROUP;
739*61046927SAndroid Build Coastguard Worker 
740*61046927SAndroid Build Coastguard Worker    /* Because we're guaranteeing maximal convergence via warp barriers,
741*61046927SAndroid Build Coastguard Worker     * subgroup barriers do nothing.
742*61046927SAndroid Build Coastguard Worker     */
743*61046927SAndroid Build Coastguard Worker    if (exec_scope <= SCOPE_SUBGROUP)
744*61046927SAndroid Build Coastguard Worker       exec_scope = SCOPE_NONE;
745*61046927SAndroid Build Coastguard Worker 
746*61046927SAndroid Build Coastguard Worker    const nir_variable_mode mem_modes = nir_intrinsic_memory_modes(barrier);
747*61046927SAndroid Build Coastguard Worker    if (exec_scope == SCOPE_NONE && mem_modes == 0) {
748*61046927SAndroid Build Coastguard Worker       nir_instr_remove(&barrier->instr);
749*61046927SAndroid Build Coastguard Worker       return true;
750*61046927SAndroid Build Coastguard Worker    }
751*61046927SAndroid Build Coastguard Worker 
752*61046927SAndroid Build Coastguard Worker    /* In this case, we're leaving the barrier there */
753*61046927SAndroid Build Coastguard Worker    b->shader->info.uses_control_barrier = true;
754*61046927SAndroid Build Coastguard Worker 
755*61046927SAndroid Build Coastguard Worker    bool progress = false;
756*61046927SAndroid Build Coastguard Worker    if (exec_scope != nir_intrinsic_execution_scope(barrier)) {
757*61046927SAndroid Build Coastguard Worker       nir_intrinsic_set_execution_scope(barrier, exec_scope);
758*61046927SAndroid Build Coastguard Worker       progress = true;
759*61046927SAndroid Build Coastguard Worker    }
760*61046927SAndroid Build Coastguard Worker 
761*61046927SAndroid Build Coastguard Worker    return progress;
762*61046927SAndroid Build Coastguard Worker }
763*61046927SAndroid Build Coastguard Worker 
764*61046927SAndroid Build Coastguard Worker static bool
nak_nir_remove_barriers(nir_shader * nir)765*61046927SAndroid Build Coastguard Worker nak_nir_remove_barriers(nir_shader *nir)
766*61046927SAndroid Build Coastguard Worker {
767*61046927SAndroid Build Coastguard Worker    /* We'll set this back to true if we leave any barriers in place */
768*61046927SAndroid Build Coastguard Worker    nir->info.uses_control_barrier = false;
769*61046927SAndroid Build Coastguard Worker 
770*61046927SAndroid Build Coastguard Worker    return nir_shader_intrinsics_pass(nir, nak_nir_remove_barrier_intrin,
771*61046927SAndroid Build Coastguard Worker                                      nir_metadata_control_flow,
772*61046927SAndroid Build Coastguard Worker                                      NULL);
773*61046927SAndroid Build Coastguard Worker }
774*61046927SAndroid Build Coastguard Worker 
775*61046927SAndroid Build Coastguard Worker static bool
nak_mem_vectorize_cb(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * cb_data)776*61046927SAndroid Build Coastguard Worker nak_mem_vectorize_cb(unsigned align_mul, unsigned align_offset,
777*61046927SAndroid Build Coastguard Worker                      unsigned bit_size, unsigned num_components,
778*61046927SAndroid Build Coastguard Worker                      nir_intrinsic_instr *low, nir_intrinsic_instr *high,
779*61046927SAndroid Build Coastguard Worker                      void *cb_data)
780*61046927SAndroid Build Coastguard Worker {
781*61046927SAndroid Build Coastguard Worker    /*
782*61046927SAndroid Build Coastguard Worker     * Since we legalize these later with nir_lower_mem_access_bit_sizes,
783*61046927SAndroid Build Coastguard Worker     * we can optimistically combine anything that might be profitable
784*61046927SAndroid Build Coastguard Worker     */
785*61046927SAndroid Build Coastguard Worker    assert(util_is_power_of_two_nonzero(align_mul));
786*61046927SAndroid Build Coastguard Worker 
787*61046927SAndroid Build Coastguard Worker    unsigned max_bytes = 128u / 8u;
788*61046927SAndroid Build Coastguard Worker    if (low->intrinsic == nir_intrinsic_ldc_nv ||
789*61046927SAndroid Build Coastguard Worker        low->intrinsic == nir_intrinsic_ldcx_nv)
790*61046927SAndroid Build Coastguard Worker       max_bytes = 64u / 8u;
791*61046927SAndroid Build Coastguard Worker 
792*61046927SAndroid Build Coastguard Worker    align_mul = MIN2(align_mul, max_bytes);
793*61046927SAndroid Build Coastguard Worker    align_offset = align_offset % align_mul;
794*61046927SAndroid Build Coastguard Worker    return align_offset + num_components * (bit_size / 8) <= align_mul;
795*61046927SAndroid Build Coastguard Worker }
796*61046927SAndroid Build Coastguard Worker 
797*61046927SAndroid Build Coastguard Worker static nir_mem_access_size_align
nak_mem_access_size_align(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align_mul,uint32_t align_offset,bool offset_is_const,const void * cb_data)798*61046927SAndroid Build Coastguard Worker nak_mem_access_size_align(nir_intrinsic_op intrin,
799*61046927SAndroid Build Coastguard Worker                           uint8_t bytes, uint8_t bit_size,
800*61046927SAndroid Build Coastguard Worker                           uint32_t align_mul, uint32_t align_offset,
801*61046927SAndroid Build Coastguard Worker                           bool offset_is_const, const void *cb_data)
802*61046927SAndroid Build Coastguard Worker {
803*61046927SAndroid Build Coastguard Worker    const uint32_t align = nir_combined_align(align_mul, align_offset);
804*61046927SAndroid Build Coastguard Worker    assert(util_is_power_of_two_nonzero(align));
805*61046927SAndroid Build Coastguard Worker 
806*61046927SAndroid Build Coastguard Worker    unsigned bytes_pow2;
807*61046927SAndroid Build Coastguard Worker    if (nir_intrinsic_infos[intrin].has_dest) {
808*61046927SAndroid Build Coastguard Worker       /* Reads can over-fetch a bit if the alignment is okay. */
809*61046927SAndroid Build Coastguard Worker       bytes_pow2 = util_next_power_of_two(bytes);
810*61046927SAndroid Build Coastguard Worker    } else {
811*61046927SAndroid Build Coastguard Worker       bytes_pow2 = 1 << (util_last_bit(bytes) - 1);
812*61046927SAndroid Build Coastguard Worker    }
813*61046927SAndroid Build Coastguard Worker 
814*61046927SAndroid Build Coastguard Worker    unsigned chunk_bytes = MIN3(bytes_pow2, align, 16);
815*61046927SAndroid Build Coastguard Worker    assert(util_is_power_of_two_nonzero(chunk_bytes));
816*61046927SAndroid Build Coastguard Worker    if (intrin == nir_intrinsic_ldc_nv ||
817*61046927SAndroid Build Coastguard Worker        intrin == nir_intrinsic_ldcx_nv)
818*61046927SAndroid Build Coastguard Worker       chunk_bytes = MIN2(chunk_bytes, 8);
819*61046927SAndroid Build Coastguard Worker 
820*61046927SAndroid Build Coastguard Worker    if ((intrin == nir_intrinsic_ldc_nv ||
821*61046927SAndroid Build Coastguard Worker         intrin == nir_intrinsic_ldcx_nv) && align < 4) {
822*61046927SAndroid Build Coastguard Worker       /* CBufs require 4B alignment unless we're doing a ldc.u8 or ldc.i8.
823*61046927SAndroid Build Coastguard Worker        * In particular, this applies to ldc.u16 which means we either have to
824*61046927SAndroid Build Coastguard Worker        * fall back to two ldc.u8 or use ldc.u32 and shift stuff around to get
825*61046927SAndroid Build Coastguard Worker        * the 16bit value out.  Fortunately, nir_lower_mem_access_bit_sizes()
826*61046927SAndroid Build Coastguard Worker        * can handle over-alignment for reads.
827*61046927SAndroid Build Coastguard Worker        */
828*61046927SAndroid Build Coastguard Worker       if (align == 2 || offset_is_const) {
829*61046927SAndroid Build Coastguard Worker          return (nir_mem_access_size_align) {
830*61046927SAndroid Build Coastguard Worker             .bit_size = 32,
831*61046927SAndroid Build Coastguard Worker             .num_components = 1,
832*61046927SAndroid Build Coastguard Worker             .align = 4,
833*61046927SAndroid Build Coastguard Worker          };
834*61046927SAndroid Build Coastguard Worker       } else {
835*61046927SAndroid Build Coastguard Worker          assert(align == 1);
836*61046927SAndroid Build Coastguard Worker          return (nir_mem_access_size_align) {
837*61046927SAndroid Build Coastguard Worker             .bit_size = 8,
838*61046927SAndroid Build Coastguard Worker             .num_components = 1,
839*61046927SAndroid Build Coastguard Worker             .align = 1,
840*61046927SAndroid Build Coastguard Worker          };
841*61046927SAndroid Build Coastguard Worker       }
842*61046927SAndroid Build Coastguard Worker    } else if (chunk_bytes < 4) {
843*61046927SAndroid Build Coastguard Worker       return (nir_mem_access_size_align) {
844*61046927SAndroid Build Coastguard Worker          .bit_size = chunk_bytes * 8,
845*61046927SAndroid Build Coastguard Worker          .num_components = 1,
846*61046927SAndroid Build Coastguard Worker          .align = chunk_bytes,
847*61046927SAndroid Build Coastguard Worker       };
848*61046927SAndroid Build Coastguard Worker    } else {
849*61046927SAndroid Build Coastguard Worker       return (nir_mem_access_size_align) {
850*61046927SAndroid Build Coastguard Worker          .bit_size = 32,
851*61046927SAndroid Build Coastguard Worker          .num_components = chunk_bytes / 4,
852*61046927SAndroid Build Coastguard Worker          .align = chunk_bytes,
853*61046927SAndroid Build Coastguard Worker       };
854*61046927SAndroid Build Coastguard Worker    }
855*61046927SAndroid Build Coastguard Worker }
856*61046927SAndroid Build Coastguard Worker 
857*61046927SAndroid Build Coastguard Worker static bool
nir_shader_has_local_variables(const nir_shader * nir)858*61046927SAndroid Build Coastguard Worker nir_shader_has_local_variables(const nir_shader *nir)
859*61046927SAndroid Build Coastguard Worker {
860*61046927SAndroid Build Coastguard Worker    nir_foreach_function(func, nir) {
861*61046927SAndroid Build Coastguard Worker       if (func->impl && !exec_list_is_empty(&func->impl->locals))
862*61046927SAndroid Build Coastguard Worker          return true;
863*61046927SAndroid Build Coastguard Worker    }
864*61046927SAndroid Build Coastguard Worker 
865*61046927SAndroid Build Coastguard Worker    return false;
866*61046927SAndroid Build Coastguard Worker }
867*61046927SAndroid Build Coastguard Worker 
868*61046927SAndroid Build Coastguard Worker static int
type_size_vec4(const struct glsl_type * type,bool bindless)869*61046927SAndroid Build Coastguard Worker type_size_vec4(const struct glsl_type *type, bool bindless)
870*61046927SAndroid Build Coastguard Worker {
871*61046927SAndroid Build Coastguard Worker    return glsl_count_vec4_slots(type, false, bindless);
872*61046927SAndroid Build Coastguard Worker }
873*61046927SAndroid Build Coastguard Worker 
874*61046927SAndroid Build Coastguard Worker void
nak_postprocess_nir(nir_shader * nir,const struct nak_compiler * nak,nir_variable_mode robust2_modes,const struct nak_fs_key * fs_key)875*61046927SAndroid Build Coastguard Worker nak_postprocess_nir(nir_shader *nir,
876*61046927SAndroid Build Coastguard Worker                     const struct nak_compiler *nak,
877*61046927SAndroid Build Coastguard Worker                     nir_variable_mode robust2_modes,
878*61046927SAndroid Build Coastguard Worker                     const struct nak_fs_key *fs_key)
879*61046927SAndroid Build Coastguard Worker {
880*61046927SAndroid Build Coastguard Worker    UNUSED bool progress = false;
881*61046927SAndroid Build Coastguard Worker 
882*61046927SAndroid Build Coastguard Worker    nak_optimize_nir(nir, nak);
883*61046927SAndroid Build Coastguard Worker 
884*61046927SAndroid Build Coastguard Worker    const nir_lower_subgroups_options subgroups_options = {
885*61046927SAndroid Build Coastguard Worker       .subgroup_size = NAK_SUBGROUP_SIZE,
886*61046927SAndroid Build Coastguard Worker       .ballot_bit_size = 32,
887*61046927SAndroid Build Coastguard Worker       .ballot_components = 1,
888*61046927SAndroid Build Coastguard Worker       .lower_to_scalar = true,
889*61046927SAndroid Build Coastguard Worker       .lower_vote_eq = true,
890*61046927SAndroid Build Coastguard Worker       .lower_first_invocation_to_ballot = true,
891*61046927SAndroid Build Coastguard Worker       .lower_read_first_invocation = true,
892*61046927SAndroid Build Coastguard Worker       .lower_elect = true,
893*61046927SAndroid Build Coastguard Worker       .lower_inverse_ballot = true,
894*61046927SAndroid Build Coastguard Worker       .lower_rotate_to_shuffle = true
895*61046927SAndroid Build Coastguard Worker    };
896*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_subgroups, &subgroups_options);
897*61046927SAndroid Build Coastguard Worker    OPT(nir, nak_nir_lower_scan_reduce);
898*61046927SAndroid Build Coastguard Worker 
899*61046927SAndroid Build Coastguard Worker    if (nir_shader_has_local_variables(nir)) {
900*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
901*61046927SAndroid Build Coastguard Worker           glsl_get_natural_size_align_bytes);
902*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_explicit_io, nir_var_function_temp,
903*61046927SAndroid Build Coastguard Worker           nir_address_format_32bit_offset);
904*61046927SAndroid Build Coastguard Worker       nak_optimize_nir(nir, nak);
905*61046927SAndroid Build Coastguard Worker    }
906*61046927SAndroid Build Coastguard Worker 
907*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_opt_shrink_vectors, true);
908*61046927SAndroid Build Coastguard Worker 
909*61046927SAndroid Build Coastguard Worker    nir_load_store_vectorize_options vectorize_opts = {};
910*61046927SAndroid Build Coastguard Worker    vectorize_opts.modes = nir_var_mem_global |
911*61046927SAndroid Build Coastguard Worker                           nir_var_mem_ssbo |
912*61046927SAndroid Build Coastguard Worker                           nir_var_mem_shared |
913*61046927SAndroid Build Coastguard Worker                           nir_var_shader_temp;
914*61046927SAndroid Build Coastguard Worker    vectorize_opts.callback = nak_mem_vectorize_cb;
915*61046927SAndroid Build Coastguard Worker    vectorize_opts.robust_modes = robust2_modes;
916*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_opt_load_store_vectorize, &vectorize_opts);
917*61046927SAndroid Build Coastguard Worker 
918*61046927SAndroid Build Coastguard Worker    nir_lower_mem_access_bit_sizes_options mem_bit_size_options = {
919*61046927SAndroid Build Coastguard Worker       .modes = nir_var_mem_constant | nir_var_mem_ubo | nir_var_mem_generic,
920*61046927SAndroid Build Coastguard Worker       .callback = nak_mem_access_size_align,
921*61046927SAndroid Build Coastguard Worker    };
922*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_mem_access_bit_sizes, &mem_bit_size_options);
923*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_bit_size, lower_bit_size_cb, (void *)nak);
924*61046927SAndroid Build Coastguard Worker 
925*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_opt_combine_barriers, NULL, NULL);
926*61046927SAndroid Build Coastguard Worker 
927*61046927SAndroid Build Coastguard Worker    nak_optimize_nir(nir, nak);
928*61046927SAndroid Build Coastguard Worker 
929*61046927SAndroid Build Coastguard Worker    OPT(nir, nak_nir_lower_tex, nak);
930*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_idiv, NULL);
931*61046927SAndroid Build Coastguard Worker 
932*61046927SAndroid Build Coastguard Worker    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
933*61046927SAndroid Build Coastguard Worker 
934*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_indirect_derefs, 0, UINT32_MAX);
935*61046927SAndroid Build Coastguard Worker 
936*61046927SAndroid Build Coastguard Worker    if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
937*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_tess_coord_z,
938*61046927SAndroid Build Coastguard Worker           nir->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES);
939*61046927SAndroid Build Coastguard Worker    }
940*61046927SAndroid Build Coastguard Worker 
941*61046927SAndroid Build Coastguard Worker    /* We need to do this before nak_nir_lower_system_values() because it
942*61046927SAndroid Build Coastguard Worker     * relies on the workgroup size being the actual HW workgroup size in
943*61046927SAndroid Build Coastguard Worker     * nir_intrinsic_load_subgroup_id.
944*61046927SAndroid Build Coastguard Worker     */
945*61046927SAndroid Build Coastguard Worker    if (gl_shader_stage_uses_workgroup(nir->info.stage) &&
946*61046927SAndroid Build Coastguard Worker        nir->info.derivative_group == DERIVATIVE_GROUP_QUADS) {
947*61046927SAndroid Build Coastguard Worker       assert(nir->info.workgroup_size[0] % 2 == 0);
948*61046927SAndroid Build Coastguard Worker       assert(nir->info.workgroup_size[1] % 2 == 0);
949*61046927SAndroid Build Coastguard Worker       nir->info.workgroup_size[0] *= 2;
950*61046927SAndroid Build Coastguard Worker       nir->info.workgroup_size[1] /= 2;
951*61046927SAndroid Build Coastguard Worker    }
952*61046927SAndroid Build Coastguard Worker 
953*61046927SAndroid Build Coastguard Worker    OPT(nir, nak_nir_lower_system_values, nak);
954*61046927SAndroid Build Coastguard Worker 
955*61046927SAndroid Build Coastguard Worker    switch (nir->info.stage) {
956*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_VERTEX:
957*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_TESS_CTRL:
958*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_TESS_EVAL:
959*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_GEOMETRY:
960*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
961*61046927SAndroid Build Coastguard Worker           type_size_vec4, nir_lower_io_lower_64bit_to_32_new);
962*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_constant_folding);
963*61046927SAndroid Build Coastguard Worker       OPT(nir, nak_nir_lower_vtg_io, nak);
964*61046927SAndroid Build Coastguard Worker       if (nir->info.stage == MESA_SHADER_GEOMETRY)
965*61046927SAndroid Build Coastguard Worker          OPT(nir, nak_nir_lower_gs_intrinsics);
966*61046927SAndroid Build Coastguard Worker       break;
967*61046927SAndroid Build Coastguard Worker 
968*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_FRAGMENT:
969*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_indirect_derefs,
970*61046927SAndroid Build Coastguard Worker           nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
971*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
972*61046927SAndroid Build Coastguard Worker           type_size_vec4, nir_lower_io_lower_64bit_to_32_new);
973*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_constant_folding);
974*61046927SAndroid Build Coastguard Worker       OPT(nir, nak_nir_lower_fs_inputs, nak, fs_key);
975*61046927SAndroid Build Coastguard Worker       OPT(nir, nak_nir_lower_fs_outputs);
976*61046927SAndroid Build Coastguard Worker       break;
977*61046927SAndroid Build Coastguard Worker 
978*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_COMPUTE:
979*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_KERNEL:
980*61046927SAndroid Build Coastguard Worker       break;
981*61046927SAndroid Build Coastguard Worker 
982*61046927SAndroid Build Coastguard Worker    default:
983*61046927SAndroid Build Coastguard Worker       unreachable("Unsupported shader stage");
984*61046927SAndroid Build Coastguard Worker    }
985*61046927SAndroid Build Coastguard Worker 
986*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_doubles, NULL, nak->nir_options.lower_doubles_options);
987*61046927SAndroid Build Coastguard Worker    OPT(nir, nir_lower_int64);
988*61046927SAndroid Build Coastguard Worker 
989*61046927SAndroid Build Coastguard Worker    nak_optimize_nir(nir, nak);
990*61046927SAndroid Build Coastguard Worker 
991*61046927SAndroid Build Coastguard Worker    do {
992*61046927SAndroid Build Coastguard Worker       progress = false;
993*61046927SAndroid Build Coastguard Worker       OPT(nir, nir_opt_algebraic_late);
994*61046927SAndroid Build Coastguard Worker       OPT(nir, nak_nir_lower_algebraic_late, nak);
995*61046927SAndroid Build Coastguard Worker 
996*61046927SAndroid Build Coastguard Worker       /* If we're lowering fp64 sat but not min/max, the sat lowering may have
997*61046927SAndroid Build Coastguard Worker        * been undone by nir_opt_algebraic.  Lower sat again just to be sure.
998*61046927SAndroid Build Coastguard Worker        */
999*61046927SAndroid Build Coastguard Worker       if ((nak->nir_options.lower_doubles_options & nir_lower_dsat) &&
1000*61046927SAndroid Build Coastguard Worker           !(nak->nir_options.lower_doubles_options & nir_lower_dminmax))
1001*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_lower_doubles, NULL, nir_lower_dsat);
1002*61046927SAndroid Build Coastguard Worker 
1003*61046927SAndroid Build Coastguard Worker       if (progress) {
1004*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_opt_constant_folding);
1005*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_copy_prop);
1006*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_opt_dce);
1007*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_opt_cse);
1008*61046927SAndroid Build Coastguard Worker       }
1009*61046927SAndroid Build Coastguard Worker    } while (progress);
1010*61046927SAndroid Build Coastguard Worker 
1011*61046927SAndroid Build Coastguard Worker    if (nak->sm < 70)
1012*61046927SAndroid Build Coastguard Worker       OPT(nir, nak_nir_split_64bit_conversions);
1013*61046927SAndroid Build Coastguard Worker 
1014*61046927SAndroid Build Coastguard Worker    nir_convert_to_lcssa(nir, true, true);
1015*61046927SAndroid Build Coastguard Worker    nir_divergence_analysis(nir);
1016*61046927SAndroid Build Coastguard Worker 
1017*61046927SAndroid Build Coastguard Worker    if (nak->sm >= 75) {
1018*61046927SAndroid Build Coastguard Worker       if (OPT(nir, nak_nir_lower_non_uniform_ldcx)) {
1019*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_copy_prop);
1020*61046927SAndroid Build Coastguard Worker          OPT(nir, nir_opt_dce);
1021*61046927SAndroid Build Coastguard Worker          nir_divergence_analysis(nir);
1022*61046927SAndroid Build Coastguard Worker       }
1023*61046927SAndroid Build Coastguard Worker    }
1024*61046927SAndroid Build Coastguard Worker 
1025*61046927SAndroid Build Coastguard Worker    OPT(nir, nak_nir_remove_barriers);
1026*61046927SAndroid Build Coastguard Worker 
1027*61046927SAndroid Build Coastguard Worker    if (nak->sm >= 70) {
1028*61046927SAndroid Build Coastguard Worker       if (nak_should_print_nir()) {
1029*61046927SAndroid Build Coastguard Worker          fprintf(stderr, "Structured NIR for %s shader:\n",
1030*61046927SAndroid Build Coastguard Worker                  _mesa_shader_stage_to_string(nir->info.stage));
1031*61046927SAndroid Build Coastguard Worker          nir_print_shader(nir, stderr);
1032*61046927SAndroid Build Coastguard Worker       }
1033*61046927SAndroid Build Coastguard Worker       OPT(nir, nak_nir_lower_cf);
1034*61046927SAndroid Build Coastguard Worker    }
1035*61046927SAndroid Build Coastguard Worker 
1036*61046927SAndroid Build Coastguard Worker    /* Re-index blocks and compact SSA defs because we'll use them to index
1037*61046927SAndroid Build Coastguard Worker     * arrays
1038*61046927SAndroid Build Coastguard Worker     */
1039*61046927SAndroid Build Coastguard Worker    nir_foreach_function(func, nir) {
1040*61046927SAndroid Build Coastguard Worker       if (func->impl) {
1041*61046927SAndroid Build Coastguard Worker          nir_index_blocks(func->impl);
1042*61046927SAndroid Build Coastguard Worker          nir_index_ssa_defs(func->impl);
1043*61046927SAndroid Build Coastguard Worker       }
1044*61046927SAndroid Build Coastguard Worker    }
1045*61046927SAndroid Build Coastguard Worker 
1046*61046927SAndroid Build Coastguard Worker    if (nak_should_print_nir()) {
1047*61046927SAndroid Build Coastguard Worker       fprintf(stderr, "NIR for %s shader:\n",
1048*61046927SAndroid Build Coastguard Worker               _mesa_shader_stage_to_string(nir->info.stage));
1049*61046927SAndroid Build Coastguard Worker       nir_print_shader(nir, stderr);
1050*61046927SAndroid Build Coastguard Worker    }
1051*61046927SAndroid Build Coastguard Worker }
1052*61046927SAndroid Build Coastguard Worker 
1053*61046927SAndroid Build Coastguard Worker static bool
scalar_is_imm_int(nir_scalar x,unsigned bits)1054*61046927SAndroid Build Coastguard Worker scalar_is_imm_int(nir_scalar x, unsigned bits)
1055*61046927SAndroid Build Coastguard Worker {
1056*61046927SAndroid Build Coastguard Worker    if (!nir_scalar_is_const(x))
1057*61046927SAndroid Build Coastguard Worker       return false;
1058*61046927SAndroid Build Coastguard Worker 
1059*61046927SAndroid Build Coastguard Worker    int64_t imm = nir_scalar_as_int(x);
1060*61046927SAndroid Build Coastguard Worker    return u_intN_min(bits) <= imm && imm <= u_intN_max(bits);
1061*61046927SAndroid Build Coastguard Worker }
1062*61046927SAndroid Build Coastguard Worker 
1063*61046927SAndroid Build Coastguard Worker struct nak_io_addr_offset
nak_get_io_addr_offset(nir_def * addr,uint8_t imm_bits)1064*61046927SAndroid Build Coastguard Worker nak_get_io_addr_offset(nir_def *addr, uint8_t imm_bits)
1065*61046927SAndroid Build Coastguard Worker {
1066*61046927SAndroid Build Coastguard Worker    nir_scalar addr_s = {
1067*61046927SAndroid Build Coastguard Worker       .def = addr,
1068*61046927SAndroid Build Coastguard Worker       .comp = 0,
1069*61046927SAndroid Build Coastguard Worker    };
1070*61046927SAndroid Build Coastguard Worker    if (scalar_is_imm_int(addr_s, imm_bits)) {
1071*61046927SAndroid Build Coastguard Worker       /* Base is a dumb name for this.  It should be offset */
1072*61046927SAndroid Build Coastguard Worker       return (struct nak_io_addr_offset) {
1073*61046927SAndroid Build Coastguard Worker          .offset = nir_scalar_as_int(addr_s),
1074*61046927SAndroid Build Coastguard Worker       };
1075*61046927SAndroid Build Coastguard Worker    }
1076*61046927SAndroid Build Coastguard Worker 
1077*61046927SAndroid Build Coastguard Worker    addr_s = nir_scalar_chase_movs(addr_s);
1078*61046927SAndroid Build Coastguard Worker    if (!nir_scalar_is_alu(addr_s) ||
1079*61046927SAndroid Build Coastguard Worker        nir_scalar_alu_op(addr_s) != nir_op_iadd) {
1080*61046927SAndroid Build Coastguard Worker       return (struct nak_io_addr_offset) {
1081*61046927SAndroid Build Coastguard Worker          .base = addr_s,
1082*61046927SAndroid Build Coastguard Worker       };
1083*61046927SAndroid Build Coastguard Worker    }
1084*61046927SAndroid Build Coastguard Worker 
1085*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < 2; i++) {
1086*61046927SAndroid Build Coastguard Worker       nir_scalar off_s = nir_scalar_chase_alu_src(addr_s, i);
1087*61046927SAndroid Build Coastguard Worker       off_s = nir_scalar_chase_movs(off_s);
1088*61046927SAndroid Build Coastguard Worker       if (scalar_is_imm_int(off_s, imm_bits)) {
1089*61046927SAndroid Build Coastguard Worker          return (struct nak_io_addr_offset) {
1090*61046927SAndroid Build Coastguard Worker             .base = nir_scalar_chase_alu_src(addr_s, 1 - i),
1091*61046927SAndroid Build Coastguard Worker             .offset = nir_scalar_as_int(off_s),
1092*61046927SAndroid Build Coastguard Worker          };
1093*61046927SAndroid Build Coastguard Worker       }
1094*61046927SAndroid Build Coastguard Worker    }
1095*61046927SAndroid Build Coastguard Worker 
1096*61046927SAndroid Build Coastguard Worker    return (struct nak_io_addr_offset) {
1097*61046927SAndroid Build Coastguard Worker       .base = addr_s,
1098*61046927SAndroid Build Coastguard Worker    };
1099*61046927SAndroid Build Coastguard Worker }
1100