xref: /aosp_15_r20/external/mesa3d/src/asahi/compiler/agx_compile.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright 2021 Alyssa Rosenzweig
3*61046927SAndroid Build Coastguard Worker  * Copyright 2020 Collabora Ltd.
4*61046927SAndroid Build Coastguard Worker  * Copyright 2016 Broadcom
5*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
6*61046927SAndroid Build Coastguard Worker  */
7*61046927SAndroid Build Coastguard Worker 
8*61046927SAndroid Build Coastguard Worker #include "agx_compile.h"
9*61046927SAndroid Build Coastguard Worker #include "asahi/layout/layout.h"
10*61046927SAndroid Build Coastguard Worker #include "compiler/nir/nir_builder.h"
11*61046927SAndroid Build Coastguard Worker #include "util/bitset.h"
12*61046927SAndroid Build Coastguard Worker #include "util/glheader.h"
13*61046927SAndroid Build Coastguard Worker #include "util/list.h"
14*61046927SAndroid Build Coastguard Worker #include "util/macros.h"
15*61046927SAndroid Build Coastguard Worker #include "util/u_debug.h"
16*61046927SAndroid Build Coastguard Worker #include "util/u_dynarray.h"
17*61046927SAndroid Build Coastguard Worker #include "agx_builder.h"
18*61046927SAndroid Build Coastguard Worker #include "agx_compiler.h"
19*61046927SAndroid Build Coastguard Worker #include "agx_debug.h"
20*61046927SAndroid Build Coastguard Worker #include "agx_nir.h"
21*61046927SAndroid Build Coastguard Worker #include "glsl_types.h"
22*61046927SAndroid Build Coastguard Worker #include "nir.h"
23*61046927SAndroid Build Coastguard Worker #include "nir_builtin_builder.h"
24*61046927SAndroid Build Coastguard Worker #include "nir_intrinsics.h"
25*61046927SAndroid Build Coastguard Worker #include "nir_intrinsics_indices.h"
26*61046927SAndroid Build Coastguard Worker #include "shader_enums.h"
27*61046927SAndroid Build Coastguard Worker 
28*61046927SAndroid Build Coastguard Worker /* Alignment for shader programs. I'm not sure what the optimal value is. */
29*61046927SAndroid Build Coastguard Worker #define AGX_CODE_ALIGN 0x100
30*61046927SAndroid Build Coastguard Worker 
31*61046927SAndroid Build Coastguard Worker /* clang-format off */
32*61046927SAndroid Build Coastguard Worker static const struct debug_named_value agx_debug_options[] = {
33*61046927SAndroid Build Coastguard Worker    {"shaders",   AGX_DBG_SHADERS,	"Dump shaders in NIR and AIR"},
34*61046927SAndroid Build Coastguard Worker    {"shaderdb",  AGX_DBG_SHADERDB,	"Print statistics"},
35*61046927SAndroid Build Coastguard Worker    {"verbose",   AGX_DBG_VERBOSE,	"Disassemble verbosely"},
36*61046927SAndroid Build Coastguard Worker    {"internal",  AGX_DBG_INTERNAL,	"Dump even internal shaders"},
37*61046927SAndroid Build Coastguard Worker    {"novalidate",AGX_DBG_NOVALIDATE,"Skip IR validation in debug builds"},
38*61046927SAndroid Build Coastguard Worker    {"noopt",     AGX_DBG_NOOPT,     "Disable backend optimizations"},
39*61046927SAndroid Build Coastguard Worker    {"wait",      AGX_DBG_WAIT,      "Wait after all async instructions"},
40*61046927SAndroid Build Coastguard Worker    {"nopreamble",AGX_DBG_NOPREAMBLE,"Do not use shader preambles"},
41*61046927SAndroid Build Coastguard Worker    {"demand",    AGX_DBG_DEMAND,    "Bound tightly to register demand"},
42*61046927SAndroid Build Coastguard Worker    {"nosched",   AGX_DBG_NOSCHED,   "Do not schedule the shader"},
43*61046927SAndroid Build Coastguard Worker    {"spill",     AGX_DBG_SPILL,     "Spill (almost) everything"},
44*61046927SAndroid Build Coastguard Worker    {"nopromote", AGX_DBG_NOPROMOTE, "Do not promote constants to uniforms"},
45*61046927SAndroid Build Coastguard Worker    DEBUG_NAMED_VALUE_END
46*61046927SAndroid Build Coastguard Worker };
47*61046927SAndroid Build Coastguard Worker /* clang-format on */
48*61046927SAndroid Build Coastguard Worker 
49*61046927SAndroid Build Coastguard Worker DEBUG_GET_ONCE_FLAGS_OPTION(agx_compiler_debug, "AGX_MESA_DEBUG",
50*61046927SAndroid Build Coastguard Worker                             agx_debug_options, 0)
51*61046927SAndroid Build Coastguard Worker 
52*61046927SAndroid Build Coastguard Worker int agx_compiler_debug = 0;
53*61046927SAndroid Build Coastguard Worker 
54*61046927SAndroid Build Coastguard Worker uint64_t
agx_get_compiler_debug(void)55*61046927SAndroid Build Coastguard Worker agx_get_compiler_debug(void)
56*61046927SAndroid Build Coastguard Worker {
57*61046927SAndroid Build Coastguard Worker    return debug_get_option_agx_compiler_debug();
58*61046927SAndroid Build Coastguard Worker }
59*61046927SAndroid Build Coastguard Worker 
60*61046927SAndroid Build Coastguard Worker static agx_index
agx_cached_preload(agx_context * ctx,unsigned base,enum agx_size size)61*61046927SAndroid Build Coastguard Worker agx_cached_preload(agx_context *ctx, unsigned base, enum agx_size size)
62*61046927SAndroid Build Coastguard Worker {
63*61046927SAndroid Build Coastguard Worker    if (agx_is_null(ctx->preloaded[base])) {
64*61046927SAndroid Build Coastguard Worker       agx_block *block = agx_start_block(ctx);
65*61046927SAndroid Build Coastguard Worker       agx_builder b = agx_init_builder(ctx, agx_before_block(block));
66*61046927SAndroid Build Coastguard Worker       ctx->preloaded[base] = agx_preload(&b, agx_register(base, size));
67*61046927SAndroid Build Coastguard Worker    }
68*61046927SAndroid Build Coastguard Worker 
69*61046927SAndroid Build Coastguard Worker    return ctx->preloaded[base];
70*61046927SAndroid Build Coastguard Worker }
71*61046927SAndroid Build Coastguard Worker 
72*61046927SAndroid Build Coastguard Worker static agx_index
agx_vertex_id(agx_builder * b)73*61046927SAndroid Build Coastguard Worker agx_vertex_id(agx_builder *b)
74*61046927SAndroid Build Coastguard Worker {
75*61046927SAndroid Build Coastguard Worker    return agx_cached_preload(b->shader, 10, AGX_SIZE_32);
76*61046927SAndroid Build Coastguard Worker }
77*61046927SAndroid Build Coastguard Worker 
78*61046927SAndroid Build Coastguard Worker static agx_index
agx_instance_id(agx_builder * b)79*61046927SAndroid Build Coastguard Worker agx_instance_id(agx_builder *b)
80*61046927SAndroid Build Coastguard Worker {
81*61046927SAndroid Build Coastguard Worker    return agx_cached_preload(b->shader, 12, AGX_SIZE_32);
82*61046927SAndroid Build Coastguard Worker }
83*61046927SAndroid Build Coastguard Worker 
84*61046927SAndroid Build Coastguard Worker #define VARYING_NUM_COMPONENTS (VARYING_SLOT_MAX * 4)
85*61046927SAndroid Build Coastguard Worker 
86*61046927SAndroid Build Coastguard Worker struct coefficient_info {
87*61046927SAndroid Build Coastguard Worker    BITSET_DECLARE(smooth, VARYING_NUM_COMPONENTS);
88*61046927SAndroid Build Coastguard Worker    BITSET_DECLARE(flat, VARYING_NUM_COMPONENTS);
89*61046927SAndroid Build Coastguard Worker    BITSET_DECLARE(noperspective, VARYING_NUM_COMPONENTS);
90*61046927SAndroid Build Coastguard Worker };
91*61046927SAndroid Build Coastguard Worker 
92*61046927SAndroid Build Coastguard Worker static BITSET_WORD *
bitset_for_interp(struct coefficient_info * info,enum glsl_interp_mode mode)93*61046927SAndroid Build Coastguard Worker bitset_for_interp(struct coefficient_info *info, enum glsl_interp_mode mode)
94*61046927SAndroid Build Coastguard Worker {
95*61046927SAndroid Build Coastguard Worker    /* clang-format off */
96*61046927SAndroid Build Coastguard Worker    switch (mode) {
97*61046927SAndroid Build Coastguard Worker    case INTERP_MODE_NONE:
98*61046927SAndroid Build Coastguard Worker    case INTERP_MODE_SMOOTH:         return info->smooth;
99*61046927SAndroid Build Coastguard Worker    case INTERP_MODE_NOPERSPECTIVE:  return info->noperspective;
100*61046927SAndroid Build Coastguard Worker    case INTERP_MODE_FLAT:           return info->flat;
101*61046927SAndroid Build Coastguard Worker    default:                         unreachable("invalid interp mode");
102*61046927SAndroid Build Coastguard Worker    }
103*61046927SAndroid Build Coastguard Worker    /* clang-format on */
104*61046927SAndroid Build Coastguard Worker }
105*61046927SAndroid Build Coastguard Worker 
106*61046927SAndroid Build Coastguard Worker static bool
gather_cf(nir_builder * b,nir_intrinsic_instr * intr,void * data)107*61046927SAndroid Build Coastguard Worker gather_cf(nir_builder *b, nir_intrinsic_instr *intr, void *data)
108*61046927SAndroid Build Coastguard Worker {
109*61046927SAndroid Build Coastguard Worker    /* First handle frag coord loads */
110*61046927SAndroid Build Coastguard Worker    struct coefficient_info *info = data;
111*61046927SAndroid Build Coastguard Worker    if (intr->intrinsic == nir_intrinsic_load_frag_coord_zw) {
112*61046927SAndroid Build Coastguard Worker       BITSET_SET(info->noperspective,
113*61046927SAndroid Build Coastguard Worker                  VARYING_SLOT_POS + nir_intrinsic_component(intr));
114*61046927SAndroid Build Coastguard Worker       return false;
115*61046927SAndroid Build Coastguard Worker    }
116*61046927SAndroid Build Coastguard Worker 
117*61046927SAndroid Build Coastguard Worker    /* Look for input loads and grab the instruction with the interp mode */
118*61046927SAndroid Build Coastguard Worker    nir_intrinsic_instr *bary;
119*61046927SAndroid Build Coastguard Worker    unsigned nr = 1;
120*61046927SAndroid Build Coastguard Worker 
121*61046927SAndroid Build Coastguard Worker    if (intr->intrinsic == nir_intrinsic_load_coefficients_agx) {
122*61046927SAndroid Build Coastguard Worker       bary = intr;
123*61046927SAndroid Build Coastguard Worker       /* Always load a scalar */
124*61046927SAndroid Build Coastguard Worker    } else if (intr->intrinsic == nir_intrinsic_load_interpolated_input) {
125*61046927SAndroid Build Coastguard Worker       bary = nir_src_as_intrinsic(intr->src[0]);
126*61046927SAndroid Build Coastguard Worker       nr = intr->num_components;
127*61046927SAndroid Build Coastguard Worker 
128*61046927SAndroid Build Coastguard Worker       /* Perspective interpolation internally reads W */
129*61046927SAndroid Build Coastguard Worker       if (nir_intrinsic_interp_mode(bary) != INTERP_MODE_NOPERSPECTIVE)
130*61046927SAndroid Build Coastguard Worker          BITSET_SET(info->noperspective, VARYING_SLOT_POS + 3);
131*61046927SAndroid Build Coastguard Worker    } else {
132*61046927SAndroid Build Coastguard Worker       return false;
133*61046927SAndroid Build Coastguard Worker    }
134*61046927SAndroid Build Coastguard Worker 
135*61046927SAndroid Build Coastguard Worker    BITSET_WORD *set = bitset_for_interp(data, nir_intrinsic_interp_mode(bary));
136*61046927SAndroid Build Coastguard Worker    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
137*61046927SAndroid Build Coastguard Worker    nir_src *offset = nir_get_io_offset_src(intr);
138*61046927SAndroid Build Coastguard Worker 
139*61046927SAndroid Build Coastguard Worker    /* Mark the exact range for direct loads to minimize CF registers, but mark a
140*61046927SAndroid Build Coastguard Worker     * conservative bounding range for indirect array access.
141*61046927SAndroid Build Coastguard Worker     */
142*61046927SAndroid Build Coastguard Worker    if (nir_src_is_const(*offset)) {
143*61046927SAndroid Build Coastguard Worker       unsigned location = sem.location + nir_src_as_uint(*offset);
144*61046927SAndroid Build Coastguard Worker       unsigned start_comp = (location * 4) + nir_intrinsic_component(intr);
145*61046927SAndroid Build Coastguard Worker 
146*61046927SAndroid Build Coastguard Worker       BITSET_SET_RANGE(set, start_comp, start_comp + nr - 1);
147*61046927SAndroid Build Coastguard Worker    } else {
148*61046927SAndroid Build Coastguard Worker       unsigned start_comp = (sem.location * 4) + nir_intrinsic_component(intr);
149*61046927SAndroid Build Coastguard Worker       bool compact = sem.location == VARYING_SLOT_CLIP_DIST0 ||
150*61046927SAndroid Build Coastguard Worker                      sem.location == VARYING_SLOT_CLIP_DIST1;
151*61046927SAndroid Build Coastguard Worker       unsigned stride = compact ? 1 : 4;
152*61046927SAndroid Build Coastguard Worker 
153*61046927SAndroid Build Coastguard Worker       /* For now we have to assign CF for the whole vec4 to make indirect
154*61046927SAndroid Build Coastguard Worker        * indexiing work. This could be optimized later.
155*61046927SAndroid Build Coastguard Worker        */
156*61046927SAndroid Build Coastguard Worker       nr = stride;
157*61046927SAndroid Build Coastguard Worker 
158*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < sem.num_slots; ++i) {
159*61046927SAndroid Build Coastguard Worker          BITSET_SET_RANGE(set, start_comp + (i * stride),
160*61046927SAndroid Build Coastguard Worker                           start_comp + (i * stride) + nr - 1);
161*61046927SAndroid Build Coastguard Worker       }
162*61046927SAndroid Build Coastguard Worker    }
163*61046927SAndroid Build Coastguard Worker 
164*61046927SAndroid Build Coastguard Worker    return false;
165*61046927SAndroid Build Coastguard Worker }
166*61046927SAndroid Build Coastguard Worker 
167*61046927SAndroid Build Coastguard Worker /*
168*61046927SAndroid Build Coastguard Worker  * We assign all coefficient registers up front to ensure we have a consistent
169*61046927SAndroid Build Coastguard Worker  * layout required for indirects to work.
170*61046927SAndroid Build Coastguard Worker  */
171*61046927SAndroid Build Coastguard Worker static void
assign_coefficient_regs(nir_shader * nir,struct agx_varyings_fs * var)172*61046927SAndroid Build Coastguard Worker assign_coefficient_regs(nir_shader *nir, struct agx_varyings_fs *var)
173*61046927SAndroid Build Coastguard Worker {
174*61046927SAndroid Build Coastguard Worker    struct coefficient_info info = {0};
175*61046927SAndroid Build Coastguard Worker    nir_shader_intrinsics_pass(nir, gather_cf, nir_metadata_all, &info);
176*61046927SAndroid Build Coastguard Worker 
177*61046927SAndroid Build Coastguard Worker    /* W */
178*61046927SAndroid Build Coastguard Worker    if (BITSET_TEST(info.noperspective, VARYING_SLOT_POS + 3)) {
179*61046927SAndroid Build Coastguard Worker       var->bindings[var->nr_bindings++] = (struct agx_cf_binding){
180*61046927SAndroid Build Coastguard Worker          .cf_base = var->nr_cf++,
181*61046927SAndroid Build Coastguard Worker          .slot = VARYING_SLOT_POS,
182*61046927SAndroid Build Coastguard Worker          .offset = 3,
183*61046927SAndroid Build Coastguard Worker          .count = 1,
184*61046927SAndroid Build Coastguard Worker          .smooth = true,
185*61046927SAndroid Build Coastguard Worker       };
186*61046927SAndroid Build Coastguard Worker    }
187*61046927SAndroid Build Coastguard Worker 
188*61046927SAndroid Build Coastguard Worker    /* Z */
189*61046927SAndroid Build Coastguard Worker    if (BITSET_TEST(info.noperspective, VARYING_SLOT_POS + 2)) {
190*61046927SAndroid Build Coastguard Worker       var->bindings[var->nr_bindings++] = (struct agx_cf_binding){
191*61046927SAndroid Build Coastguard Worker          .cf_base = var->nr_cf++,
192*61046927SAndroid Build Coastguard Worker          .slot = VARYING_SLOT_POS,
193*61046927SAndroid Build Coastguard Worker          .offset = 2,
194*61046927SAndroid Build Coastguard Worker          .count = 1,
195*61046927SAndroid Build Coastguard Worker          .smooth = true,
196*61046927SAndroid Build Coastguard Worker       };
197*61046927SAndroid Build Coastguard Worker 
198*61046927SAndroid Build Coastguard Worker       var->reads_z = true;
199*61046927SAndroid Build Coastguard Worker    }
200*61046927SAndroid Build Coastguard Worker 
201*61046927SAndroid Build Coastguard Worker    static_assert(VARYING_SLOT_POS == 0, "special and handled first");
202*61046927SAndroid Build Coastguard Worker 
203*61046927SAndroid Build Coastguard Worker    for (unsigned i = VARYING_SLOT_POS + 1; i < VARYING_SLOT_MAX; ++i) {
204*61046927SAndroid Build Coastguard Worker       bool smooth = BITSET_TEST_RANGE(info.smooth, i * 4, (i * 4) + 3);
205*61046927SAndroid Build Coastguard Worker       bool flat = BITSET_TEST_RANGE(info.flat, i * 4, (i * 4) + 3);
206*61046927SAndroid Build Coastguard Worker       bool noperspective =
207*61046927SAndroid Build Coastguard Worker          BITSET_TEST_RANGE(info.noperspective, i * 4, (i * 4) + 3);
208*61046927SAndroid Build Coastguard Worker 
209*61046927SAndroid Build Coastguard Worker       if (!(smooth || flat || noperspective))
210*61046927SAndroid Build Coastguard Worker          continue;
211*61046927SAndroid Build Coastguard Worker 
212*61046927SAndroid Build Coastguard Worker       /* From the GLSL 4.60 spec ("Input Layout Qualifiers"):
213*61046927SAndroid Build Coastguard Worker        *
214*61046927SAndroid Build Coastguard Worker        *    when location aliasing, the aliases sharing the location must have
215*61046927SAndroid Build Coastguard Worker        *    the same underlying numerical type and bit width (floating-point or
216*61046927SAndroid Build Coastguard Worker        *    integer, 32-bit versus 64-bit, etc.) and the same auxiliary storage
217*61046927SAndroid Build Coastguard Worker        *    and interpolation qualification.
218*61046927SAndroid Build Coastguard Worker        *
219*61046927SAndroid Build Coastguard Worker        * SPIR-V should obey this as well although the spec text is muddier.
220*61046927SAndroid Build Coastguard Worker        */
221*61046927SAndroid Build Coastguard Worker       assert((smooth + flat + noperspective) == 1 &&
222*61046927SAndroid Build Coastguard Worker              "slots must have consistent interpolation");
223*61046927SAndroid Build Coastguard Worker 
224*61046927SAndroid Build Coastguard Worker       BITSET_WORD *set = smooth ? info.smooth
225*61046927SAndroid Build Coastguard Worker                          : flat ? info.flat
226*61046927SAndroid Build Coastguard Worker                                 : info.noperspective;
227*61046927SAndroid Build Coastguard Worker 
228*61046927SAndroid Build Coastguard Worker       /* Find the start offset */
229*61046927SAndroid Build Coastguard Worker       unsigned offset = 0;
230*61046927SAndroid Build Coastguard Worker       for (offset = 0; offset < 4 && !BITSET_TEST(set, (i * 4) + offset);
231*61046927SAndroid Build Coastguard Worker            ++offset)
232*61046927SAndroid Build Coastguard Worker          ;
233*61046927SAndroid Build Coastguard Worker 
234*61046927SAndroid Build Coastguard Worker       /* Find the end offset. TODO: Do we ever need to split into two bindings
235*61046927SAndroid Build Coastguard Worker        * to handle e.g. x_zw read masks?
236*61046927SAndroid Build Coastguard Worker        */
237*61046927SAndroid Build Coastguard Worker       unsigned count = 0;
238*61046927SAndroid Build Coastguard Worker       for (unsigned c = offset; c < 4; ++c) {
239*61046927SAndroid Build Coastguard Worker          if (BITSET_TEST(set, (i * 4) + c))
240*61046927SAndroid Build Coastguard Worker             count = c - offset + 1;
241*61046927SAndroid Build Coastguard Worker       }
242*61046927SAndroid Build Coastguard Worker       assert(count >= 1 && (count + offset) <= 4);
243*61046927SAndroid Build Coastguard Worker 
244*61046927SAndroid Build Coastguard Worker       var->bindings[var->nr_bindings++] = (struct agx_cf_binding){
245*61046927SAndroid Build Coastguard Worker          .cf_base = var->nr_cf,
246*61046927SAndroid Build Coastguard Worker          .slot = i,
247*61046927SAndroid Build Coastguard Worker          .offset = offset,
248*61046927SAndroid Build Coastguard Worker          .count = count,
249*61046927SAndroid Build Coastguard Worker          .smooth = !flat,
250*61046927SAndroid Build Coastguard Worker          .perspective = smooth,
251*61046927SAndroid Build Coastguard Worker       };
252*61046927SAndroid Build Coastguard Worker 
253*61046927SAndroid Build Coastguard Worker       var->nr_cf += count;
254*61046927SAndroid Build Coastguard Worker    }
255*61046927SAndroid Build Coastguard Worker }
256*61046927SAndroid Build Coastguard Worker 
257*61046927SAndroid Build Coastguard Worker static agx_index
agx_get_cf(agx_context * ctx,gl_varying_slot slot,unsigned offset)258*61046927SAndroid Build Coastguard Worker agx_get_cf(agx_context *ctx, gl_varying_slot slot, unsigned offset)
259*61046927SAndroid Build Coastguard Worker {
260*61046927SAndroid Build Coastguard Worker    struct agx_varyings_fs *varyings = &ctx->out->varyings.fs;
261*61046927SAndroid Build Coastguard Worker 
262*61046927SAndroid Build Coastguard Worker    /* We already have an appropriate binding, find it */
263*61046927SAndroid Build Coastguard Worker    for (unsigned b = 0; b < varyings->nr_bindings; ++b) {
264*61046927SAndroid Build Coastguard Worker       if (varyings->bindings[b].slot == slot &&
265*61046927SAndroid Build Coastguard Worker           (slot != VARYING_SLOT_POS ||
266*61046927SAndroid Build Coastguard Worker            offset == varyings->bindings[b].offset)) {
267*61046927SAndroid Build Coastguard Worker 
268*61046927SAndroid Build Coastguard Worker          signed cf_offset = offset - varyings->bindings[b].offset;
269*61046927SAndroid Build Coastguard Worker          assert(cf_offset >= 0);
270*61046927SAndroid Build Coastguard Worker 
271*61046927SAndroid Build Coastguard Worker          return agx_immediate(varyings->bindings[b].cf_base + cf_offset);
272*61046927SAndroid Build Coastguard Worker       }
273*61046927SAndroid Build Coastguard Worker    }
274*61046927SAndroid Build Coastguard Worker 
275*61046927SAndroid Build Coastguard Worker    unreachable("all coefficient registers preassigned");
276*61046927SAndroid Build Coastguard Worker }
277*61046927SAndroid Build Coastguard Worker 
278*61046927SAndroid Build Coastguard Worker /* Builds a 64-bit hash table key for an index */
279*61046927SAndroid Build Coastguard Worker static uint64_t
agx_index_to_key(agx_index idx)280*61046927SAndroid Build Coastguard Worker agx_index_to_key(agx_index idx)
281*61046927SAndroid Build Coastguard Worker {
282*61046927SAndroid Build Coastguard Worker    STATIC_ASSERT(sizeof(idx) <= sizeof(uint64_t));
283*61046927SAndroid Build Coastguard Worker 
284*61046927SAndroid Build Coastguard Worker    uint64_t key = 0;
285*61046927SAndroid Build Coastguard Worker    memcpy(&key, &idx, sizeof(idx));
286*61046927SAndroid Build Coastguard Worker    return key;
287*61046927SAndroid Build Coastguard Worker }
288*61046927SAndroid Build Coastguard Worker 
289*61046927SAndroid Build Coastguard Worker /*
290*61046927SAndroid Build Coastguard Worker  * Extract a single channel out of a vector source. We split vectors with
291*61046927SAndroid Build Coastguard Worker  * p_split so we can use the split components directly, without emitting a
292*61046927SAndroid Build Coastguard Worker  * machine instruction. This has advantages of RA, as the split can usually be
293*61046927SAndroid Build Coastguard Worker  * optimized away.
294*61046927SAndroid Build Coastguard Worker  */
295*61046927SAndroid Build Coastguard Worker static agx_index
agx_emit_extract(agx_builder * b,agx_index vec,unsigned channel)296*61046927SAndroid Build Coastguard Worker agx_emit_extract(agx_builder *b, agx_index vec, unsigned channel)
297*61046927SAndroid Build Coastguard Worker {
298*61046927SAndroid Build Coastguard Worker    agx_index *components = _mesa_hash_table_u64_search(b->shader->allocated_vec,
299*61046927SAndroid Build Coastguard Worker                                                        agx_index_to_key(vec));
300*61046927SAndroid Build Coastguard Worker 
301*61046927SAndroid Build Coastguard Worker    assert(components != NULL && "missing agx_emit_collect_to");
302*61046927SAndroid Build Coastguard Worker 
303*61046927SAndroid Build Coastguard Worker    return components[channel];
304*61046927SAndroid Build Coastguard Worker }
305*61046927SAndroid Build Coastguard Worker 
306*61046927SAndroid Build Coastguard Worker static agx_index
agx_extract_nir_src(agx_builder * b,nir_src src,unsigned channel)307*61046927SAndroid Build Coastguard Worker agx_extract_nir_src(agx_builder *b, nir_src src, unsigned channel)
308*61046927SAndroid Build Coastguard Worker {
309*61046927SAndroid Build Coastguard Worker    agx_index idx = agx_src_index(&src);
310*61046927SAndroid Build Coastguard Worker 
311*61046927SAndroid Build Coastguard Worker    /* We only deal with scalars, extract a single scalar if needed */
312*61046927SAndroid Build Coastguard Worker    if (nir_src_num_components(src) > 1)
313*61046927SAndroid Build Coastguard Worker       return agx_emit_extract(b, idx, channel);
314*61046927SAndroid Build Coastguard Worker    else
315*61046927SAndroid Build Coastguard Worker       return idx;
316*61046927SAndroid Build Coastguard Worker }
317*61046927SAndroid Build Coastguard Worker 
318*61046927SAndroid Build Coastguard Worker static void
agx_cache_collect(agx_builder * b,agx_index dst,unsigned nr_srcs,agx_index * srcs)319*61046927SAndroid Build Coastguard Worker agx_cache_collect(agx_builder *b, agx_index dst, unsigned nr_srcs,
320*61046927SAndroid Build Coastguard Worker                   agx_index *srcs)
321*61046927SAndroid Build Coastguard Worker {
322*61046927SAndroid Build Coastguard Worker    /* Lifetime of a hash table entry has to be at least as long as the table */
323*61046927SAndroid Build Coastguard Worker    agx_index *channels = ralloc_array(b->shader, agx_index, nr_srcs);
324*61046927SAndroid Build Coastguard Worker 
325*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < nr_srcs; ++i)
326*61046927SAndroid Build Coastguard Worker       channels[i] = srcs[i];
327*61046927SAndroid Build Coastguard Worker 
328*61046927SAndroid Build Coastguard Worker    _mesa_hash_table_u64_insert(b->shader->allocated_vec, agx_index_to_key(dst),
329*61046927SAndroid Build Coastguard Worker                                channels);
330*61046927SAndroid Build Coastguard Worker }
331*61046927SAndroid Build Coastguard Worker 
332*61046927SAndroid Build Coastguard Worker /*
333*61046927SAndroid Build Coastguard Worker  * Combine multiple scalars into a vector destination. This corresponds to
334*61046927SAndroid Build Coastguard Worker  * collect, lowered to moves (a shuffle in general) after register allocation.
335*61046927SAndroid Build Coastguard Worker  *
336*61046927SAndroid Build Coastguard Worker  * To optimize vector extractions, we record the individual channels
337*61046927SAndroid Build Coastguard Worker  */
338*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_emit_collect_to(agx_builder * b,agx_index dst,unsigned nr_srcs,agx_index * srcs)339*61046927SAndroid Build Coastguard Worker agx_emit_collect_to(agx_builder *b, agx_index dst, unsigned nr_srcs,
340*61046927SAndroid Build Coastguard Worker                     agx_index *srcs)
341*61046927SAndroid Build Coastguard Worker {
342*61046927SAndroid Build Coastguard Worker    agx_cache_collect(b, dst, nr_srcs, srcs);
343*61046927SAndroid Build Coastguard Worker 
344*61046927SAndroid Build Coastguard Worker    if (nr_srcs == 1)
345*61046927SAndroid Build Coastguard Worker       return agx_mov_to(b, dst, srcs[0]);
346*61046927SAndroid Build Coastguard Worker 
347*61046927SAndroid Build Coastguard Worker    agx_instr *I = agx_collect_to(b, dst, nr_srcs);
348*61046927SAndroid Build Coastguard Worker 
349*61046927SAndroid Build Coastguard Worker    agx_foreach_src(I, s)
350*61046927SAndroid Build Coastguard Worker       I->src[s] = srcs[s];
351*61046927SAndroid Build Coastguard Worker 
352*61046927SAndroid Build Coastguard Worker    return I;
353*61046927SAndroid Build Coastguard Worker }
354*61046927SAndroid Build Coastguard Worker 
355*61046927SAndroid Build Coastguard Worker static agx_index
agx_emit_collect(agx_builder * b,unsigned nr_srcs,agx_index * srcs)356*61046927SAndroid Build Coastguard Worker agx_emit_collect(agx_builder *b, unsigned nr_srcs, agx_index *srcs)
357*61046927SAndroid Build Coastguard Worker {
358*61046927SAndroid Build Coastguard Worker    agx_index dst = agx_vec_temp(b->shader, srcs[0].size, nr_srcs);
359*61046927SAndroid Build Coastguard Worker    agx_emit_collect_to(b, dst, nr_srcs, srcs);
360*61046927SAndroid Build Coastguard Worker    return dst;
361*61046927SAndroid Build Coastguard Worker }
362*61046927SAndroid Build Coastguard Worker 
363*61046927SAndroid Build Coastguard Worker static agx_index
agx_vec2(agx_builder * b,agx_index s0,agx_index s1)364*61046927SAndroid Build Coastguard Worker agx_vec2(agx_builder *b, agx_index s0, agx_index s1)
365*61046927SAndroid Build Coastguard Worker {
366*61046927SAndroid Build Coastguard Worker    return agx_emit_collect(b, 2, (agx_index[]){s0, s1});
367*61046927SAndroid Build Coastguard Worker }
368*61046927SAndroid Build Coastguard Worker 
369*61046927SAndroid Build Coastguard Worker static agx_index
agx_recollect_vector(agx_builder * b,nir_src vec)370*61046927SAndroid Build Coastguard Worker agx_recollect_vector(agx_builder *b, nir_src vec)
371*61046927SAndroid Build Coastguard Worker {
372*61046927SAndroid Build Coastguard Worker    agx_index comps[4];
373*61046927SAndroid Build Coastguard Worker    unsigned nr = nir_src_num_components(vec);
374*61046927SAndroid Build Coastguard Worker 
375*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < nr; ++i)
376*61046927SAndroid Build Coastguard Worker       comps[i] = agx_extract_nir_src(b, vec, i);
377*61046927SAndroid Build Coastguard Worker 
378*61046927SAndroid Build Coastguard Worker    return agx_emit_collect(b, nr, comps);
379*61046927SAndroid Build Coastguard Worker }
380*61046927SAndroid Build Coastguard Worker 
381*61046927SAndroid Build Coastguard Worker /*
382*61046927SAndroid Build Coastguard Worker  * Extract the lower or upper N-bits from a (2*N)-bit quantity. We use a split
383*61046927SAndroid Build Coastguard Worker  * without null destinations to let us CSE (and coalesce) the splits when both x
384*61046927SAndroid Build Coastguard Worker  * and y are split.
385*61046927SAndroid Build Coastguard Worker  */
386*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_subdivide_to(agx_builder * b,agx_index dst,agx_index s0,unsigned comp)387*61046927SAndroid Build Coastguard Worker agx_subdivide_to(agx_builder *b, agx_index dst, agx_index s0, unsigned comp)
388*61046927SAndroid Build Coastguard Worker {
389*61046927SAndroid Build Coastguard Worker    assert((s0.size == (dst.size + 1)) && "only 2x subdivide handled");
390*61046927SAndroid Build Coastguard Worker    assert((comp == 0 || comp == 1) && "too many components");
391*61046927SAndroid Build Coastguard Worker 
392*61046927SAndroid Build Coastguard Worker    /* Handle immediates specially so we don't have to constant fold splits. */
393*61046927SAndroid Build Coastguard Worker    if (s0.type == AGX_INDEX_IMMEDIATE) {
394*61046927SAndroid Build Coastguard Worker       unsigned bits = 16 * agx_size_align_16(dst.size);
395*61046927SAndroid Build Coastguard Worker       return agx_mov_imm_to(b, dst, (s0.value >> bits) & BITFIELD64_MASK(bits));
396*61046927SAndroid Build Coastguard Worker    }
397*61046927SAndroid Build Coastguard Worker 
398*61046927SAndroid Build Coastguard Worker    agx_instr *split = agx_split(b, 2, s0);
399*61046927SAndroid Build Coastguard Worker    split->dest[comp] = dst;
400*61046927SAndroid Build Coastguard Worker    split->dest[1 - comp] = agx_temp(b->shader, dst.size);
401*61046927SAndroid Build Coastguard Worker    return split;
402*61046927SAndroid Build Coastguard Worker }
403*61046927SAndroid Build Coastguard Worker 
404*61046927SAndroid Build Coastguard Worker void
agx_block_add_successor(agx_block * block,agx_block * successor)405*61046927SAndroid Build Coastguard Worker agx_block_add_successor(agx_block *block, agx_block *successor)
406*61046927SAndroid Build Coastguard Worker {
407*61046927SAndroid Build Coastguard Worker    assert(block != NULL && successor != NULL);
408*61046927SAndroid Build Coastguard Worker 
409*61046927SAndroid Build Coastguard Worker    /* Cull impossible edges */
410*61046927SAndroid Build Coastguard Worker    if (block->unconditional_jumps)
411*61046927SAndroid Build Coastguard Worker       return;
412*61046927SAndroid Build Coastguard Worker 
413*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
414*61046927SAndroid Build Coastguard Worker       if (block->successors[i]) {
415*61046927SAndroid Build Coastguard Worker          if (block->successors[i] == successor)
416*61046927SAndroid Build Coastguard Worker             return;
417*61046927SAndroid Build Coastguard Worker          else
418*61046927SAndroid Build Coastguard Worker             continue;
419*61046927SAndroid Build Coastguard Worker       }
420*61046927SAndroid Build Coastguard Worker 
421*61046927SAndroid Build Coastguard Worker       block->successors[i] = successor;
422*61046927SAndroid Build Coastguard Worker       util_dynarray_append(&successor->predecessors, agx_block *, block);
423*61046927SAndroid Build Coastguard Worker       return;
424*61046927SAndroid Build Coastguard Worker    }
425*61046927SAndroid Build Coastguard Worker 
426*61046927SAndroid Build Coastguard Worker    unreachable("Too many successors");
427*61046927SAndroid Build Coastguard Worker }
428*61046927SAndroid Build Coastguard Worker 
429*61046927SAndroid Build Coastguard Worker /*
430*61046927SAndroid Build Coastguard Worker  * Splits an n-component vector (vec) into n scalar destinations (dests) using a
431*61046927SAndroid Build Coastguard Worker  * split pseudo-instruction.
432*61046927SAndroid Build Coastguard Worker  *
433*61046927SAndroid Build Coastguard Worker  * Pre-condition: dests is filled with agx_null().
434*61046927SAndroid Build Coastguard Worker  */
435*61046927SAndroid Build Coastguard Worker static void
agx_emit_split(agx_builder * b,agx_index * dests,agx_index vec,unsigned n)436*61046927SAndroid Build Coastguard Worker agx_emit_split(agx_builder *b, agx_index *dests, agx_index vec, unsigned n)
437*61046927SAndroid Build Coastguard Worker {
438*61046927SAndroid Build Coastguard Worker    agx_instr *I = agx_split(b, n, vec);
439*61046927SAndroid Build Coastguard Worker 
440*61046927SAndroid Build Coastguard Worker    agx_foreach_dest(I, d) {
441*61046927SAndroid Build Coastguard Worker       dests[d] = agx_temp(b->shader, vec.size);
442*61046927SAndroid Build Coastguard Worker       I->dest[d] = dests[d];
443*61046927SAndroid Build Coastguard Worker    }
444*61046927SAndroid Build Coastguard Worker }
445*61046927SAndroid Build Coastguard Worker 
446*61046927SAndroid Build Coastguard Worker static void
agx_emit_cached_split(agx_builder * b,agx_index vec,unsigned n)447*61046927SAndroid Build Coastguard Worker agx_emit_cached_split(agx_builder *b, agx_index vec, unsigned n)
448*61046927SAndroid Build Coastguard Worker {
449*61046927SAndroid Build Coastguard Worker    agx_index dests[4] = {agx_null(), agx_null(), agx_null(), agx_null()};
450*61046927SAndroid Build Coastguard Worker    agx_emit_split(b, dests, vec, n);
451*61046927SAndroid Build Coastguard Worker    agx_cache_collect(b, vec, n, dests);
452*61046927SAndroid Build Coastguard Worker }
453*61046927SAndroid Build Coastguard Worker 
454*61046927SAndroid Build Coastguard Worker static void
agx_emit_load_const(agx_builder * b,nir_load_const_instr * instr)455*61046927SAndroid Build Coastguard Worker agx_emit_load_const(agx_builder *b, nir_load_const_instr *instr)
456*61046927SAndroid Build Coastguard Worker {
457*61046927SAndroid Build Coastguard Worker    /* Ensure we've been scalarized and bit size lowered */
458*61046927SAndroid Build Coastguard Worker    unsigned bit_size = instr->def.bit_size;
459*61046927SAndroid Build Coastguard Worker    assert(instr->def.num_components == 1);
460*61046927SAndroid Build Coastguard Worker 
461*61046927SAndroid Build Coastguard Worker    /* Emit move, later passes can inline/push if useful */
462*61046927SAndroid Build Coastguard Worker    agx_mov_imm_to(b, agx_def_index(&instr->def),
463*61046927SAndroid Build Coastguard Worker                   nir_const_value_as_uint(instr->value[0], bit_size));
464*61046927SAndroid Build Coastguard Worker }
465*61046927SAndroid Build Coastguard Worker 
466*61046927SAndroid Build Coastguard Worker /*
467*61046927SAndroid Build Coastguard Worker  * Implement mul_high of 32-bit sources by doing a 32x32->64-bit multiply and
468*61046927SAndroid Build Coastguard Worker  * extracting only the high word.
469*61046927SAndroid Build Coastguard Worker  */
470*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_mul_high_to(agx_builder * b,agx_index dst,agx_index P,agx_index Q,bool is_signed)471*61046927SAndroid Build Coastguard Worker agx_mul_high_to(agx_builder *b, agx_index dst, agx_index P, agx_index Q,
472*61046927SAndroid Build Coastguard Worker                 bool is_signed)
473*61046927SAndroid Build Coastguard Worker {
474*61046927SAndroid Build Coastguard Worker    assert(P.size == Q.size && "source sizes must match");
475*61046927SAndroid Build Coastguard Worker    assert(P.size == dst.size && "dest size must match");
476*61046927SAndroid Build Coastguard Worker    assert(P.size != AGX_SIZE_64 && "64x64 multiply should have been lowered");
477*61046927SAndroid Build Coastguard Worker 
478*61046927SAndroid Build Coastguard Worker    static_assert(AGX_SIZE_64 == (AGX_SIZE_32 + 1), "enum wrong");
479*61046927SAndroid Build Coastguard Worker    static_assert(AGX_SIZE_32 == (AGX_SIZE_16 + 1), "enum wrong");
480*61046927SAndroid Build Coastguard Worker 
481*61046927SAndroid Build Coastguard Worker    if (!is_signed) {
482*61046927SAndroid Build Coastguard Worker       P = agx_abs(P);
483*61046927SAndroid Build Coastguard Worker       Q = agx_abs(Q);
484*61046927SAndroid Build Coastguard Worker    }
485*61046927SAndroid Build Coastguard Worker 
486*61046927SAndroid Build Coastguard Worker    agx_index product = agx_temp(b->shader, P.size + 1);
487*61046927SAndroid Build Coastguard Worker    agx_imad_to(b, product, P, Q, agx_zero(), 0);
488*61046927SAndroid Build Coastguard Worker 
489*61046927SAndroid Build Coastguard Worker    return agx_subdivide_to(b, dst, product, 1);
490*61046927SAndroid Build Coastguard Worker }
491*61046927SAndroid Build Coastguard Worker 
492*61046927SAndroid Build Coastguard Worker static enum agx_format
agx_format_for_pipe(enum pipe_format format)493*61046927SAndroid Build Coastguard Worker agx_format_for_pipe(enum pipe_format format)
494*61046927SAndroid Build Coastguard Worker {
495*61046927SAndroid Build Coastguard Worker #define CASE(x)                                                                \
496*61046927SAndroid Build Coastguard Worker    if (format == (enum pipe_format)AIL_ISA_FORMAT_##x)                         \
497*61046927SAndroid Build Coastguard Worker       return AGX_FORMAT_##x;
498*61046927SAndroid Build Coastguard Worker 
499*61046927SAndroid Build Coastguard Worker    CASE(I8);
500*61046927SAndroid Build Coastguard Worker    CASE(I16);
501*61046927SAndroid Build Coastguard Worker    CASE(I32);
502*61046927SAndroid Build Coastguard Worker    CASE(F16);
503*61046927SAndroid Build Coastguard Worker    CASE(U8NORM);
504*61046927SAndroid Build Coastguard Worker    CASE(S8NORM);
505*61046927SAndroid Build Coastguard Worker    CASE(U16NORM);
506*61046927SAndroid Build Coastguard Worker    CASE(S16NORM);
507*61046927SAndroid Build Coastguard Worker    CASE(RGB10A2);
508*61046927SAndroid Build Coastguard Worker    CASE(SRGBA8);
509*61046927SAndroid Build Coastguard Worker    CASE(RG11B10F);
510*61046927SAndroid Build Coastguard Worker    CASE(RGB9E5);
511*61046927SAndroid Build Coastguard Worker 
512*61046927SAndroid Build Coastguard Worker #undef CASE
513*61046927SAndroid Build Coastguard Worker    unreachable("Invalid format");
514*61046927SAndroid Build Coastguard Worker }
515*61046927SAndroid Build Coastguard Worker 
516*61046927SAndroid Build Coastguard Worker static agx_index
cf_for_intrinsic(agx_builder * b,nir_intrinsic_instr * intr)517*61046927SAndroid Build Coastguard Worker cf_for_intrinsic(agx_builder *b, nir_intrinsic_instr *intr)
518*61046927SAndroid Build Coastguard Worker {
519*61046927SAndroid Build Coastguard Worker    /* Determine the base location, taking into account a constant offset */
520*61046927SAndroid Build Coastguard Worker    unsigned location = nir_intrinsic_io_semantics(intr).location;
521*61046927SAndroid Build Coastguard Worker    bool compact = location == VARYING_SLOT_CLIP_DIST0 ||
522*61046927SAndroid Build Coastguard Worker                   location == VARYING_SLOT_CLIP_DIST1;
523*61046927SAndroid Build Coastguard Worker 
524*61046927SAndroid Build Coastguard Worker    nir_src *offset = nir_get_io_offset_src(intr);
525*61046927SAndroid Build Coastguard Worker    if (nir_src_is_const(*offset)) {
526*61046927SAndroid Build Coastguard Worker       /* XXX: NIR is broken and uses constant offsets in slots but dynamic
527*61046927SAndroid Build Coastguard Worker        * offsets in scalars for compact varyings. This needs to be fixed
528*61046927SAndroid Build Coastguard Worker        * upstream.
529*61046927SAndroid Build Coastguard Worker        */
530*61046927SAndroid Build Coastguard Worker       location += nir_src_as_uint(*offset);
531*61046927SAndroid Build Coastguard Worker    }
532*61046927SAndroid Build Coastguard Worker 
533*61046927SAndroid Build Coastguard Worker    agx_index I = agx_get_cf(b->shader, location, nir_intrinsic_component(intr));
534*61046927SAndroid Build Coastguard Worker 
535*61046927SAndroid Build Coastguard Worker    /* If we have a non-constant offset, we add it to the CF. Offsets are in
536*61046927SAndroid Build Coastguard Worker     * vec4 slots (unless we're compact) but the CF is in components, so we need
537*61046927SAndroid Build Coastguard Worker     * to shift the offset by 2 before adding.
538*61046927SAndroid Build Coastguard Worker     */
539*61046927SAndroid Build Coastguard Worker    if (!nir_src_is_const(*offset)) {
540*61046927SAndroid Build Coastguard Worker       I = agx_iadd(b, I, agx_src_index(offset), compact ? 0 : 2);
541*61046927SAndroid Build Coastguard Worker    }
542*61046927SAndroid Build Coastguard Worker 
543*61046927SAndroid Build Coastguard Worker    return I;
544*61046927SAndroid Build Coastguard Worker }
545*61046927SAndroid Build Coastguard Worker 
546*61046927SAndroid Build Coastguard Worker static enum agx_interpolation
agx_interp_for_bary(nir_intrinsic_instr * bary,agx_index * sample_index)547*61046927SAndroid Build Coastguard Worker agx_interp_for_bary(nir_intrinsic_instr *bary, agx_index *sample_index)
548*61046927SAndroid Build Coastguard Worker {
549*61046927SAndroid Build Coastguard Worker    switch (bary->intrinsic) {
550*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_barycentric_pixel:
551*61046927SAndroid Build Coastguard Worker       return AGX_INTERPOLATION_CENTER;
552*61046927SAndroid Build Coastguard Worker 
553*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_barycentric_centroid:
554*61046927SAndroid Build Coastguard Worker       return AGX_INTERPOLATION_CENTROID;
555*61046927SAndroid Build Coastguard Worker 
556*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_barycentric_at_sample:
557*61046927SAndroid Build Coastguard Worker       *sample_index = agx_src_index(&bary->src[0]);
558*61046927SAndroid Build Coastguard Worker       return AGX_INTERPOLATION_SAMPLE;
559*61046927SAndroid Build Coastguard Worker 
560*61046927SAndroid Build Coastguard Worker    default:
561*61046927SAndroid Build Coastguard Worker       unreachable("should have been lowered");
562*61046927SAndroid Build Coastguard Worker    }
563*61046927SAndroid Build Coastguard Worker }
564*61046927SAndroid Build Coastguard Worker 
565*61046927SAndroid Build Coastguard Worker static void
agx_emit_load_vary(agx_builder * b,agx_index dest,nir_intrinsic_instr * instr)566*61046927SAndroid Build Coastguard Worker agx_emit_load_vary(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
567*61046927SAndroid Build Coastguard Worker {
568*61046927SAndroid Build Coastguard Worker    ASSERTED unsigned components = instr->num_components;
569*61046927SAndroid Build Coastguard Worker    nir_intrinsic_instr *bary = nir_src_as_intrinsic(instr->src[0]);
570*61046927SAndroid Build Coastguard Worker 
571*61046927SAndroid Build Coastguard Worker    assert(components >= 1 && components <= 4);
572*61046927SAndroid Build Coastguard Worker 
573*61046927SAndroid Build Coastguard Worker    agx_index sample_index = agx_zero();
574*61046927SAndroid Build Coastguard Worker    enum agx_interpolation interp = agx_interp_for_bary(bary, &sample_index);
575*61046927SAndroid Build Coastguard Worker 
576*61046927SAndroid Build Coastguard Worker    bool perspective =
577*61046927SAndroid Build Coastguard Worker       nir_intrinsic_interp_mode(bary) != INTERP_MODE_NOPERSPECTIVE;
578*61046927SAndroid Build Coastguard Worker 
579*61046927SAndroid Build Coastguard Worker    agx_index I = cf_for_intrinsic(b, instr);
580*61046927SAndroid Build Coastguard Worker 
581*61046927SAndroid Build Coastguard Worker    /* For perspective interpolation, we project (multiply by 1/W) */
582*61046927SAndroid Build Coastguard Worker    if (perspective) {
583*61046927SAndroid Build Coastguard Worker       agx_index J = agx_get_cf(b->shader, VARYING_SLOT_POS, 3);
584*61046927SAndroid Build Coastguard Worker       agx_iterproj_to(b, dest, I, J, sample_index, components, interp);
585*61046927SAndroid Build Coastguard Worker    } else {
586*61046927SAndroid Build Coastguard Worker       agx_iter_to(b, dest, I, sample_index, components, interp);
587*61046927SAndroid Build Coastguard Worker    }
588*61046927SAndroid Build Coastguard Worker 
589*61046927SAndroid Build Coastguard Worker    agx_emit_cached_split(b, dest, components);
590*61046927SAndroid Build Coastguard Worker }
591*61046927SAndroid Build Coastguard Worker 
592*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_emit_local_store_pixel(agx_builder * b,nir_intrinsic_instr * instr)593*61046927SAndroid Build Coastguard Worker agx_emit_local_store_pixel(agx_builder *b, nir_intrinsic_instr *instr)
594*61046927SAndroid Build Coastguard Worker {
595*61046927SAndroid Build Coastguard Worker    bool explicit = nir_intrinsic_explicit_coord(instr);
596*61046927SAndroid Build Coastguard Worker 
597*61046927SAndroid Build Coastguard Worker    /* TODO: Reverse-engineer interactions with MRT */
598*61046927SAndroid Build Coastguard Worker    if (b->shader->stage == MESA_SHADER_FRAGMENT) {
599*61046927SAndroid Build Coastguard Worker       if (b->shader->key->fs.ignore_tib_dependencies) {
600*61046927SAndroid Build Coastguard Worker          assert(b->shader->nir->info.internal && "only for clear shaders");
601*61046927SAndroid Build Coastguard Worker       } else if (b->shader->did_writeout) {
602*61046927SAndroid Build Coastguard Worker          agx_wait_pix(b, 0x0004);
603*61046927SAndroid Build Coastguard Worker       } else {
604*61046927SAndroid Build Coastguard Worker          agx_wait_pix(b, 0x000C);
605*61046927SAndroid Build Coastguard Worker       }
606*61046927SAndroid Build Coastguard Worker    }
607*61046927SAndroid Build Coastguard Worker 
608*61046927SAndroid Build Coastguard Worker    /* Compact the registers according to the mask */
609*61046927SAndroid Build Coastguard Worker    agx_index compacted[4] = {agx_null()};
610*61046927SAndroid Build Coastguard Worker 
611*61046927SAndroid Build Coastguard Worker    unsigned compact_count = 0;
612*61046927SAndroid Build Coastguard Worker    u_foreach_bit(i, nir_intrinsic_write_mask(instr)) {
613*61046927SAndroid Build Coastguard Worker       compacted[compact_count++] = agx_extract_nir_src(b, instr->src[0], i);
614*61046927SAndroid Build Coastguard Worker    }
615*61046927SAndroid Build Coastguard Worker 
616*61046927SAndroid Build Coastguard Worker    agx_index collected = agx_emit_collect(b, compact_count, compacted);
617*61046927SAndroid Build Coastguard Worker    agx_index coords = explicit ? agx_src_index(&instr->src[2]) : agx_null();
618*61046927SAndroid Build Coastguard Worker 
619*61046927SAndroid Build Coastguard Worker    b->shader->did_writeout = true;
620*61046927SAndroid Build Coastguard Worker    b->shader->out->tag_write_disable = false;
621*61046927SAndroid Build Coastguard Worker    return agx_st_tile(b, collected, agx_src_index(&instr->src[1]), coords,
622*61046927SAndroid Build Coastguard Worker                       agx_format_for_pipe(nir_intrinsic_format(instr)),
623*61046927SAndroid Build Coastguard Worker                       nir_intrinsic_write_mask(instr),
624*61046927SAndroid Build Coastguard Worker                       nir_intrinsic_base(instr), explicit);
625*61046927SAndroid Build Coastguard Worker }
626*61046927SAndroid Build Coastguard Worker 
627*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_emit_store_zs(agx_builder * b,nir_intrinsic_instr * instr)628*61046927SAndroid Build Coastguard Worker agx_emit_store_zs(agx_builder *b, nir_intrinsic_instr *instr)
629*61046927SAndroid Build Coastguard Worker {
630*61046927SAndroid Build Coastguard Worker    unsigned base = nir_intrinsic_base(instr);
631*61046927SAndroid Build Coastguard Worker    bool write_z = base & 1;
632*61046927SAndroid Build Coastguard Worker    bool write_s = base & 2;
633*61046927SAndroid Build Coastguard Worker 
634*61046927SAndroid Build Coastguard Worker    /* TODO: Handle better */
635*61046927SAndroid Build Coastguard Worker    assert(!b->shader->key->fs.ignore_tib_dependencies && "not used");
636*61046927SAndroid Build Coastguard Worker    agx_wait_pix(b, 0x0001);
637*61046927SAndroid Build Coastguard Worker 
638*61046927SAndroid Build Coastguard Worker    agx_index z = agx_src_index(&instr->src[1]);
639*61046927SAndroid Build Coastguard Worker    agx_index s = agx_src_index(&instr->src[2]);
640*61046927SAndroid Build Coastguard Worker 
641*61046927SAndroid Build Coastguard Worker    assert(!write_z || z.size == AGX_SIZE_32);
642*61046927SAndroid Build Coastguard Worker    assert(!write_s || s.size == AGX_SIZE_16);
643*61046927SAndroid Build Coastguard Worker 
644*61046927SAndroid Build Coastguard Worker    if (write_z && write_s) {
645*61046927SAndroid Build Coastguard Worker       agx_index u2u32 = agx_temp(b->shader, AGX_SIZE_32);
646*61046927SAndroid Build Coastguard Worker       agx_mov_to(b, u2u32, s);
647*61046927SAndroid Build Coastguard Worker       s = u2u32;
648*61046927SAndroid Build Coastguard Worker    }
649*61046927SAndroid Build Coastguard Worker 
650*61046927SAndroid Build Coastguard Worker    agx_index zs = (write_z && write_s) ? agx_vec2(b, z, s) : write_z ? z : s;
651*61046927SAndroid Build Coastguard Worker 
652*61046927SAndroid Build Coastguard Worker    /* Not necessarily a sample mask but overlapping hw mechanism... Should
653*61046927SAndroid Build Coastguard Worker     * maybe rename this flag to something more general.
654*61046927SAndroid Build Coastguard Worker     */
655*61046927SAndroid Build Coastguard Worker    b->shader->out->writes_sample_mask = true;
656*61046927SAndroid Build Coastguard Worker 
657*61046927SAndroid Build Coastguard Worker    return agx_zs_emit(b, agx_src_index(&instr->src[0]), zs, base);
658*61046927SAndroid Build Coastguard Worker }
659*61046927SAndroid Build Coastguard Worker 
660*61046927SAndroid Build Coastguard Worker static void
agx_emit_local_load_pixel(agx_builder * b,agx_index dest,nir_intrinsic_instr * instr)661*61046927SAndroid Build Coastguard Worker agx_emit_local_load_pixel(agx_builder *b, agx_index dest,
662*61046927SAndroid Build Coastguard Worker                           nir_intrinsic_instr *instr)
663*61046927SAndroid Build Coastguard Worker {
664*61046927SAndroid Build Coastguard Worker    /* TODO: Reverse-engineer interactions with MRT */
665*61046927SAndroid Build Coastguard Worker    assert(!b->shader->key->fs.ignore_tib_dependencies && "invalid usage");
666*61046927SAndroid Build Coastguard Worker    agx_wait_pix(b, 0x0008);
667*61046927SAndroid Build Coastguard Worker    b->shader->did_writeout = true;
668*61046927SAndroid Build Coastguard Worker 
669*61046927SAndroid Build Coastguard Worker    unsigned nr_comps = instr->def.num_components;
670*61046927SAndroid Build Coastguard Worker    agx_ld_tile_to(b, dest, agx_src_index(&instr->src[0]), agx_null(),
671*61046927SAndroid Build Coastguard Worker                   agx_format_for_pipe(nir_intrinsic_format(instr)),
672*61046927SAndroid Build Coastguard Worker                   BITFIELD_MASK(nr_comps), nir_intrinsic_base(instr), false);
673*61046927SAndroid Build Coastguard Worker    agx_emit_cached_split(b, dest, nr_comps);
674*61046927SAndroid Build Coastguard Worker }
675*61046927SAndroid Build Coastguard Worker 
676*61046927SAndroid Build Coastguard Worker static void
agx_emit_load(agx_builder * b,agx_index dest,nir_intrinsic_instr * instr)677*61046927SAndroid Build Coastguard Worker agx_emit_load(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
678*61046927SAndroid Build Coastguard Worker {
679*61046927SAndroid Build Coastguard Worker    agx_index addr = agx_src_index(&instr->src[0]);
680*61046927SAndroid Build Coastguard Worker    agx_index offset = agx_src_index(&instr->src[1]);
681*61046927SAndroid Build Coastguard Worker    enum agx_format fmt = agx_format_for_pipe(nir_intrinsic_format(instr));
682*61046927SAndroid Build Coastguard Worker    unsigned shift = nir_intrinsic_base(instr);
683*61046927SAndroid Build Coastguard Worker 
684*61046927SAndroid Build Coastguard Worker    /* Zero-extend offset if we're not sign-extending */
685*61046927SAndroid Build Coastguard Worker    if (!nir_intrinsic_sign_extend(instr))
686*61046927SAndroid Build Coastguard Worker       offset = agx_abs(offset);
687*61046927SAndroid Build Coastguard Worker 
688*61046927SAndroid Build Coastguard Worker    agx_device_load_to(b, dest, addr, offset, fmt,
689*61046927SAndroid Build Coastguard Worker                       BITFIELD_MASK(instr->def.num_components), shift);
690*61046927SAndroid Build Coastguard Worker    agx_emit_cached_split(b, dest, instr->def.num_components);
691*61046927SAndroid Build Coastguard Worker }
692*61046927SAndroid Build Coastguard Worker 
693*61046927SAndroid Build Coastguard Worker static void
agx_emit_store(agx_builder * b,nir_intrinsic_instr * instr)694*61046927SAndroid Build Coastguard Worker agx_emit_store(agx_builder *b, nir_intrinsic_instr *instr)
695*61046927SAndroid Build Coastguard Worker {
696*61046927SAndroid Build Coastguard Worker    agx_index addr = agx_src_index(&instr->src[1]);
697*61046927SAndroid Build Coastguard Worker    agx_index offset = agx_src_index(&instr->src[2]);
698*61046927SAndroid Build Coastguard Worker    enum agx_format fmt = agx_format_for_pipe(nir_intrinsic_format(instr));
699*61046927SAndroid Build Coastguard Worker    unsigned shift = nir_intrinsic_base(instr);
700*61046927SAndroid Build Coastguard Worker 
701*61046927SAndroid Build Coastguard Worker    /* Zero-extend offset if we're not sign-extending */
702*61046927SAndroid Build Coastguard Worker    if (!nir_intrinsic_sign_extend(instr))
703*61046927SAndroid Build Coastguard Worker       offset = agx_abs(offset);
704*61046927SAndroid Build Coastguard Worker 
705*61046927SAndroid Build Coastguard Worker    agx_device_store(b, agx_recollect_vector(b, instr->src[0]), addr, offset,
706*61046927SAndroid Build Coastguard Worker                     fmt, BITFIELD_MASK(nir_src_num_components(instr->src[0])),
707*61046927SAndroid Build Coastguard Worker                     shift);
708*61046927SAndroid Build Coastguard Worker }
709*61046927SAndroid Build Coastguard Worker 
710*61046927SAndroid Build Coastguard Worker /* Preambles write directly to uniform registers, so move from uniform to GPR */
711*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_emit_load_preamble(agx_builder * b,agx_index dst,nir_intrinsic_instr * instr)712*61046927SAndroid Build Coastguard Worker agx_emit_load_preamble(agx_builder *b, agx_index dst,
713*61046927SAndroid Build Coastguard Worker                        nir_intrinsic_instr *instr)
714*61046927SAndroid Build Coastguard Worker {
715*61046927SAndroid Build Coastguard Worker    agx_index srcs[4] = {agx_null()};
716*61046927SAndroid Build Coastguard Worker    unsigned dim = instr->def.num_components;
717*61046927SAndroid Build Coastguard Worker    assert(dim <= ARRAY_SIZE(srcs) && "shouldn't see larger vectors");
718*61046927SAndroid Build Coastguard Worker 
719*61046927SAndroid Build Coastguard Worker    unsigned base = nir_intrinsic_base(instr);
720*61046927SAndroid Build Coastguard Worker    unsigned stride = agx_size_align_16(dst.size);
721*61046927SAndroid Build Coastguard Worker 
722*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < dim; ++i)
723*61046927SAndroid Build Coastguard Worker       srcs[i] = agx_uniform(base + i * stride, dst.size);
724*61046927SAndroid Build Coastguard Worker 
725*61046927SAndroid Build Coastguard Worker    return agx_emit_collect_to(b, dst, dim, srcs);
726*61046927SAndroid Build Coastguard Worker }
727*61046927SAndroid Build Coastguard Worker 
728*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_emit_store_preamble(agx_builder * b,nir_intrinsic_instr * instr)729*61046927SAndroid Build Coastguard Worker agx_emit_store_preamble(agx_builder *b, nir_intrinsic_instr *instr)
730*61046927SAndroid Build Coastguard Worker {
731*61046927SAndroid Build Coastguard Worker    agx_index vec = agx_src_index(&instr->src[0]);
732*61046927SAndroid Build Coastguard Worker    unsigned base = nir_intrinsic_base(instr);
733*61046927SAndroid Build Coastguard Worker    unsigned stride = agx_size_align_16(vec.size);
734*61046927SAndroid Build Coastguard Worker    unsigned nr = nir_src_num_components(instr->src[0]);
735*61046927SAndroid Build Coastguard Worker 
736*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < nr; i += (4 / stride)) {
737*61046927SAndroid Build Coastguard Worker       agx_index data[4] = {0};
738*61046927SAndroid Build Coastguard Worker       unsigned count = MIN2(4 / stride, nr - i);
739*61046927SAndroid Build Coastguard Worker 
740*61046927SAndroid Build Coastguard Worker       for (unsigned c = 0; c < count; ++c) {
741*61046927SAndroid Build Coastguard Worker          data[c] = agx_extract_nir_src(b, instr->src[0], i + c);
742*61046927SAndroid Build Coastguard Worker       }
743*61046927SAndroid Build Coastguard Worker 
744*61046927SAndroid Build Coastguard Worker       agx_uniform_store(b, agx_emit_collect(b, count, data),
745*61046927SAndroid Build Coastguard Worker                         agx_immediate(base + i * stride), BITFIELD_MASK(count));
746*61046927SAndroid Build Coastguard Worker    }
747*61046927SAndroid Build Coastguard Worker 
748*61046927SAndroid Build Coastguard Worker    return NULL;
749*61046927SAndroid Build Coastguard Worker }
750*61046927SAndroid Build Coastguard Worker 
751*61046927SAndroid Build Coastguard Worker static enum agx_dim
agx_tex_dim(enum glsl_sampler_dim dim,bool array)752*61046927SAndroid Build Coastguard Worker agx_tex_dim(enum glsl_sampler_dim dim, bool array)
753*61046927SAndroid Build Coastguard Worker {
754*61046927SAndroid Build Coastguard Worker    switch (dim) {
755*61046927SAndroid Build Coastguard Worker    case GLSL_SAMPLER_DIM_1D:
756*61046927SAndroid Build Coastguard Worker       return array ? AGX_DIM_1D_ARRAY : AGX_DIM_1D;
757*61046927SAndroid Build Coastguard Worker 
758*61046927SAndroid Build Coastguard Worker    case GLSL_SAMPLER_DIM_2D:
759*61046927SAndroid Build Coastguard Worker    case GLSL_SAMPLER_DIM_RECT:
760*61046927SAndroid Build Coastguard Worker    case GLSL_SAMPLER_DIM_EXTERNAL:
761*61046927SAndroid Build Coastguard Worker       return array ? AGX_DIM_2D_ARRAY : AGX_DIM_2D;
762*61046927SAndroid Build Coastguard Worker 
763*61046927SAndroid Build Coastguard Worker    case GLSL_SAMPLER_DIM_MS:
764*61046927SAndroid Build Coastguard Worker       return array ? AGX_DIM_2D_MS_ARRAY : AGX_DIM_2D_MS;
765*61046927SAndroid Build Coastguard Worker 
766*61046927SAndroid Build Coastguard Worker    case GLSL_SAMPLER_DIM_3D:
767*61046927SAndroid Build Coastguard Worker       assert(!array && "3D arrays unsupported");
768*61046927SAndroid Build Coastguard Worker       return AGX_DIM_3D;
769*61046927SAndroid Build Coastguard Worker 
770*61046927SAndroid Build Coastguard Worker    case GLSL_SAMPLER_DIM_CUBE:
771*61046927SAndroid Build Coastguard Worker       return array ? AGX_DIM_CUBE_ARRAY : AGX_DIM_CUBE;
772*61046927SAndroid Build Coastguard Worker 
773*61046927SAndroid Build Coastguard Worker    case GLSL_SAMPLER_DIM_BUF:
774*61046927SAndroid Build Coastguard Worker       unreachable("Buffer textures should have been lowered");
775*61046927SAndroid Build Coastguard Worker 
776*61046927SAndroid Build Coastguard Worker    default:
777*61046927SAndroid Build Coastguard Worker       unreachable("Invalid sampler dim\n");
778*61046927SAndroid Build Coastguard Worker    }
779*61046927SAndroid Build Coastguard Worker }
780*61046927SAndroid Build Coastguard Worker 
781*61046927SAndroid Build Coastguard Worker /*
782*61046927SAndroid Build Coastguard Worker  * In the hardware, bindless texture sources are specified as a 64-bit uniform
783*61046927SAndroid Build Coastguard Worker  * base address summed with a 32-bit register index. In NIR, we model this as a
784*61046927SAndroid Build Coastguard Worker  * vec2, where the first source is the (constant) uniform register number and
785*61046927SAndroid Build Coastguard Worker  * the second source is the (dynamic) byte offset.
786*61046927SAndroid Build Coastguard Worker  */
787*61046927SAndroid Build Coastguard Worker static agx_index
agx_translate_bindless_handle(agx_builder * b,nir_src * handle,agx_index * base)788*61046927SAndroid Build Coastguard Worker agx_translate_bindless_handle(agx_builder *b, nir_src *handle, agx_index *base)
789*61046927SAndroid Build Coastguard Worker {
790*61046927SAndroid Build Coastguard Worker    nir_scalar base_scalar = nir_scalar_resolved(handle->ssa, 0);
791*61046927SAndroid Build Coastguard Worker    assert(nir_scalar_is_const(base_scalar) && "base must be constant");
792*61046927SAndroid Build Coastguard Worker 
793*61046927SAndroid Build Coastguard Worker    unsigned base_uint = nir_scalar_as_uint(base_scalar);
794*61046927SAndroid Build Coastguard Worker    *base = agx_uniform(base_uint, AGX_SIZE_64);
795*61046927SAndroid Build Coastguard Worker 
796*61046927SAndroid Build Coastguard Worker    return agx_emit_extract(b, agx_src_index(handle), 1);
797*61046927SAndroid Build Coastguard Worker }
798*61046927SAndroid Build Coastguard Worker 
799*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_emit_block_image_store(agx_builder * b,nir_intrinsic_instr * instr)800*61046927SAndroid Build Coastguard Worker agx_emit_block_image_store(agx_builder *b, nir_intrinsic_instr *instr)
801*61046927SAndroid Build Coastguard Worker {
802*61046927SAndroid Build Coastguard Worker    agx_index offset = agx_src_index(&instr->src[1]);
803*61046927SAndroid Build Coastguard Worker    agx_index coords = agx_src_index(&instr->src[2]);
804*61046927SAndroid Build Coastguard Worker    enum agx_format format = agx_format_for_pipe(nir_intrinsic_format(instr));
805*61046927SAndroid Build Coastguard Worker 
806*61046927SAndroid Build Coastguard Worker    bool ms = nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_MS;
807*61046927SAndroid Build Coastguard Worker    bool array = nir_intrinsic_image_array(instr);
808*61046927SAndroid Build Coastguard Worker    enum agx_dim dim = agx_tex_dim(nir_intrinsic_image_dim(instr), array);
809*61046927SAndroid Build Coastguard Worker    bool explicit = nir_intrinsic_explicit_coord(instr);
810*61046927SAndroid Build Coastguard Worker 
811*61046927SAndroid Build Coastguard Worker    /* 32-bit source physically, 16-bit in NIR, top half ignored but needed
812*61046927SAndroid Build Coastguard Worker     * logically to ensure alignment.
813*61046927SAndroid Build Coastguard Worker     */
814*61046927SAndroid Build Coastguard Worker    offset = agx_vec2(b, offset, agx_undef(AGX_SIZE_16));
815*61046927SAndroid Build Coastguard Worker    offset.channels_m1--;
816*61046927SAndroid Build Coastguard Worker    offset.size = AGX_SIZE_32;
817*61046927SAndroid Build Coastguard Worker 
818*61046927SAndroid Build Coastguard Worker    /* Modified coordinate descriptor */
819*61046927SAndroid Build Coastguard Worker    if (!explicit) {
820*61046927SAndroid Build Coastguard Worker       if (array) {
821*61046927SAndroid Build Coastguard Worker          agx_index layer = coords;
822*61046927SAndroid Build Coastguard Worker          coords = agx_temp(b->shader, AGX_SIZE_32);
823*61046927SAndroid Build Coastguard Worker          agx_emit_collect_to(b, coords, 2,
824*61046927SAndroid Build Coastguard Worker                              (agx_index[]){
825*61046927SAndroid Build Coastguard Worker                                 ms ? agx_mov_imm(b, 16, 0) : layer,
826*61046927SAndroid Build Coastguard Worker                                 ms ? layer : agx_undef(AGX_SIZE_16),
827*61046927SAndroid Build Coastguard Worker                              });
828*61046927SAndroid Build Coastguard Worker       } else {
829*61046927SAndroid Build Coastguard Worker          coords = agx_null();
830*61046927SAndroid Build Coastguard Worker       }
831*61046927SAndroid Build Coastguard Worker    }
832*61046927SAndroid Build Coastguard Worker 
833*61046927SAndroid Build Coastguard Worker    agx_index base, index;
834*61046927SAndroid Build Coastguard Worker    if (instr->intrinsic == nir_intrinsic_bindless_image_store_block_agx) {
835*61046927SAndroid Build Coastguard Worker       index = agx_translate_bindless_handle(b, &instr->src[0], &base);
836*61046927SAndroid Build Coastguard Worker 
837*61046927SAndroid Build Coastguard Worker       assert(base.size == AGX_SIZE_64);
838*61046927SAndroid Build Coastguard Worker       assert(index.size == AGX_SIZE_32);
839*61046927SAndroid Build Coastguard Worker    } else {
840*61046927SAndroid Build Coastguard Worker       base = agx_zero();
841*61046927SAndroid Build Coastguard Worker       index = agx_src_index(&instr->src[0]);
842*61046927SAndroid Build Coastguard Worker 
843*61046927SAndroid Build Coastguard Worker       assert(index.size == AGX_SIZE_16);
844*61046927SAndroid Build Coastguard Worker    }
845*61046927SAndroid Build Coastguard Worker 
846*61046927SAndroid Build Coastguard Worker    // XXX: how does this possibly work
847*61046927SAndroid Build Coastguard Worker    if (format == AGX_FORMAT_F16)
848*61046927SAndroid Build Coastguard Worker       format = AGX_FORMAT_I16;
849*61046927SAndroid Build Coastguard Worker 
850*61046927SAndroid Build Coastguard Worker    return agx_block_image_store(b, base, index, offset, coords, format, dim,
851*61046927SAndroid Build Coastguard Worker                                 explicit);
852*61046927SAndroid Build Coastguard Worker }
853*61046927SAndroid Build Coastguard Worker 
854*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_load_compute_dimension(agx_builder * b,agx_index dst,nir_intrinsic_instr * instr,enum agx_sr base)855*61046927SAndroid Build Coastguard Worker agx_load_compute_dimension(agx_builder *b, agx_index dst,
856*61046927SAndroid Build Coastguard Worker                            nir_intrinsic_instr *instr, enum agx_sr base)
857*61046927SAndroid Build Coastguard Worker {
858*61046927SAndroid Build Coastguard Worker    unsigned dim = instr->def.num_components;
859*61046927SAndroid Build Coastguard Worker    unsigned size = instr->def.bit_size;
860*61046927SAndroid Build Coastguard Worker    assert(size == 16 || size == 32);
861*61046927SAndroid Build Coastguard Worker 
862*61046927SAndroid Build Coastguard Worker    agx_index srcs[] = {
863*61046927SAndroid Build Coastguard Worker       agx_get_sr(b, size, base + 0),
864*61046927SAndroid Build Coastguard Worker       agx_get_sr(b, size, base + 1),
865*61046927SAndroid Build Coastguard Worker       agx_get_sr(b, size, base + 2),
866*61046927SAndroid Build Coastguard Worker    };
867*61046927SAndroid Build Coastguard Worker 
868*61046927SAndroid Build Coastguard Worker    return agx_emit_collect_to(b, dst, dim, srcs);
869*61046927SAndroid Build Coastguard Worker }
870*61046927SAndroid Build Coastguard Worker 
871*61046927SAndroid Build Coastguard Worker static enum agx_atomic_opc
translate_atomic_opcode(nir_atomic_op op)872*61046927SAndroid Build Coastguard Worker translate_atomic_opcode(nir_atomic_op op)
873*61046927SAndroid Build Coastguard Worker {
874*61046927SAndroid Build Coastguard Worker    /* clang-format off */
875*61046927SAndroid Build Coastguard Worker    switch (op) {
876*61046927SAndroid Build Coastguard Worker    case nir_atomic_op_iadd:    return AGX_ATOMIC_OPC_ADD;
877*61046927SAndroid Build Coastguard Worker    case nir_atomic_op_imin:    return AGX_ATOMIC_OPC_IMIN;
878*61046927SAndroid Build Coastguard Worker    case nir_atomic_op_umin:    return AGX_ATOMIC_OPC_UMIN;
879*61046927SAndroid Build Coastguard Worker    case nir_atomic_op_imax:    return AGX_ATOMIC_OPC_IMAX;
880*61046927SAndroid Build Coastguard Worker    case nir_atomic_op_umax:    return AGX_ATOMIC_OPC_UMAX;
881*61046927SAndroid Build Coastguard Worker    case nir_atomic_op_iand:    return AGX_ATOMIC_OPC_AND;
882*61046927SAndroid Build Coastguard Worker    case nir_atomic_op_ior:     return AGX_ATOMIC_OPC_OR;
883*61046927SAndroid Build Coastguard Worker    case nir_atomic_op_ixor:    return AGX_ATOMIC_OPC_XOR;
884*61046927SAndroid Build Coastguard Worker    case nir_atomic_op_xchg:    return AGX_ATOMIC_OPC_XCHG;
885*61046927SAndroid Build Coastguard Worker    case nir_atomic_op_cmpxchg: return AGX_ATOMIC_OPC_CMPXCHG;
886*61046927SAndroid Build Coastguard Worker    default: unreachable("unknown atomic opcode");
887*61046927SAndroid Build Coastguard Worker    }
888*61046927SAndroid Build Coastguard Worker    /* clang-format on */
889*61046927SAndroid Build Coastguard Worker }
890*61046927SAndroid Build Coastguard Worker 
891*61046927SAndroid Build Coastguard Worker /*
892*61046927SAndroid Build Coastguard Worker  * The "base" of a local load/store/atomic can be zero but no other immediates.
893*61046927SAndroid Build Coastguard Worker  * This would be a little silly to handle when inlining immediates, so we
894*61046927SAndroid Build Coastguard Worker  * instead exclude these ops from immediate inlining and just handle 0 specially
895*61046927SAndroid Build Coastguard Worker  * when translating.
896*61046927SAndroid Build Coastguard Worker  */
897*61046927SAndroid Build Coastguard Worker static agx_index
agx_local_base(nir_src src)898*61046927SAndroid Build Coastguard Worker agx_local_base(nir_src src)
899*61046927SAndroid Build Coastguard Worker {
900*61046927SAndroid Build Coastguard Worker    if (nir_src_is_const(src) && nir_src_as_uint(src) == 0)
901*61046927SAndroid Build Coastguard Worker       return agx_zero();
902*61046927SAndroid Build Coastguard Worker    else
903*61046927SAndroid Build Coastguard Worker       return agx_src_index(&src);
904*61046927SAndroid Build Coastguard Worker }
905*61046927SAndroid Build Coastguard Worker 
906*61046927SAndroid Build Coastguard Worker static void
agx_emit_atomic(agx_builder * b,agx_index dst,nir_intrinsic_instr * instr,bool local)907*61046927SAndroid Build Coastguard Worker agx_emit_atomic(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr,
908*61046927SAndroid Build Coastguard Worker                 bool local)
909*61046927SAndroid Build Coastguard Worker {
910*61046927SAndroid Build Coastguard Worker    enum agx_atomic_opc op =
911*61046927SAndroid Build Coastguard Worker       translate_atomic_opcode(nir_intrinsic_atomic_op(instr));
912*61046927SAndroid Build Coastguard Worker    agx_index base =
913*61046927SAndroid Build Coastguard Worker       local ? agx_local_base(instr->src[0]) : agx_src_index(&instr->src[0]);
914*61046927SAndroid Build Coastguard Worker    agx_index value = agx_src_index(&instr->src[local ? 1 : 2]);
915*61046927SAndroid Build Coastguard Worker    agx_index index = local ? agx_zero() : agx_src_index(&instr->src[1]);
916*61046927SAndroid Build Coastguard Worker 
917*61046927SAndroid Build Coastguard Worker    /* cmpxchg (only) takes 2 sources, passed in consecutive registers */
918*61046927SAndroid Build Coastguard Worker    if (op == AGX_ATOMIC_OPC_CMPXCHG) {
919*61046927SAndroid Build Coastguard Worker       agx_index value2 = agx_src_index(&instr->src[local ? 2 : 3]);
920*61046927SAndroid Build Coastguard Worker       value = agx_vec2(b, value2, value);
921*61046927SAndroid Build Coastguard Worker    }
922*61046927SAndroid Build Coastguard Worker 
923*61046927SAndroid Build Coastguard Worker    if (local) {
924*61046927SAndroid Build Coastguard Worker       assert(base.size == AGX_SIZE_16);
925*61046927SAndroid Build Coastguard Worker       agx_local_atomic_to(b, dst, value, base, index, op);
926*61046927SAndroid Build Coastguard Worker    } else {
927*61046927SAndroid Build Coastguard Worker       assert(base.size == AGX_SIZE_64);
928*61046927SAndroid Build Coastguard Worker       agx_atomic_to(b, dst, value, base, index, op);
929*61046927SAndroid Build Coastguard Worker    }
930*61046927SAndroid Build Coastguard Worker }
931*61046927SAndroid Build Coastguard Worker 
932*61046927SAndroid Build Coastguard Worker static enum agx_format
format_for_bitsize(unsigned bitsize)933*61046927SAndroid Build Coastguard Worker format_for_bitsize(unsigned bitsize)
934*61046927SAndroid Build Coastguard Worker {
935*61046927SAndroid Build Coastguard Worker    switch (bitsize) {
936*61046927SAndroid Build Coastguard Worker    case 8:
937*61046927SAndroid Build Coastguard Worker       return AGX_FORMAT_I8;
938*61046927SAndroid Build Coastguard Worker    case 16:
939*61046927SAndroid Build Coastguard Worker       return AGX_FORMAT_I16;
940*61046927SAndroid Build Coastguard Worker    case 32:
941*61046927SAndroid Build Coastguard Worker       return AGX_FORMAT_I32;
942*61046927SAndroid Build Coastguard Worker    default:
943*61046927SAndroid Build Coastguard Worker       unreachable("should've been lowered");
944*61046927SAndroid Build Coastguard Worker    }
945*61046927SAndroid Build Coastguard Worker }
946*61046927SAndroid Build Coastguard Worker 
947*61046927SAndroid Build Coastguard Worker static void
agx_emit_local_load(agx_builder * b,agx_index dst,nir_intrinsic_instr * instr)948*61046927SAndroid Build Coastguard Worker agx_emit_local_load(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr)
949*61046927SAndroid Build Coastguard Worker {
950*61046927SAndroid Build Coastguard Worker    agx_index base = agx_local_base(instr->src[0]);
951*61046927SAndroid Build Coastguard Worker    agx_index index = agx_zero(); /* TODO: optimize address arithmetic */
952*61046927SAndroid Build Coastguard Worker    assert(base.size == AGX_SIZE_16);
953*61046927SAndroid Build Coastguard Worker 
954*61046927SAndroid Build Coastguard Worker    enum agx_format format = format_for_bitsize(instr->def.bit_size);
955*61046927SAndroid Build Coastguard Worker    unsigned nr = instr->def.num_components;
956*61046927SAndroid Build Coastguard Worker    unsigned mask = BITFIELD_MASK(nr);
957*61046927SAndroid Build Coastguard Worker 
958*61046927SAndroid Build Coastguard Worker    agx_local_load_to(b, dst, base, index, format, mask);
959*61046927SAndroid Build Coastguard Worker    agx_emit_cached_split(b, dst, nr);
960*61046927SAndroid Build Coastguard Worker }
961*61046927SAndroid Build Coastguard Worker 
962*61046927SAndroid Build Coastguard Worker static void
agx_emit_local_store(agx_builder * b,nir_intrinsic_instr * instr)963*61046927SAndroid Build Coastguard Worker agx_emit_local_store(agx_builder *b, nir_intrinsic_instr *instr)
964*61046927SAndroid Build Coastguard Worker {
965*61046927SAndroid Build Coastguard Worker    agx_index value = agx_src_index(&instr->src[0]);
966*61046927SAndroid Build Coastguard Worker    agx_index base = agx_local_base(instr->src[1]);
967*61046927SAndroid Build Coastguard Worker    agx_index index = agx_zero(); /* TODO: optimize address arithmetic */
968*61046927SAndroid Build Coastguard Worker    assert(base.size == AGX_SIZE_16);
969*61046927SAndroid Build Coastguard Worker 
970*61046927SAndroid Build Coastguard Worker    enum agx_format format = format_for_bitsize(nir_src_bit_size(instr->src[0]));
971*61046927SAndroid Build Coastguard Worker    unsigned mask = BITFIELD_MASK(
972*61046927SAndroid Build Coastguard Worker       nir_src_num_components(instr->src[0])); /* XXX: there's a write mask */
973*61046927SAndroid Build Coastguard Worker 
974*61046927SAndroid Build Coastguard Worker    agx_local_store(b, value, base, index, format, mask);
975*61046927SAndroid Build Coastguard Worker }
976*61046927SAndroid Build Coastguard Worker 
977*61046927SAndroid Build Coastguard Worker static void
agx_emit_load_scratch(agx_builder * b,agx_index dst,nir_intrinsic_instr * instr)978*61046927SAndroid Build Coastguard Worker agx_emit_load_scratch(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr)
979*61046927SAndroid Build Coastguard Worker {
980*61046927SAndroid Build Coastguard Worker    agx_index offset = agx_src_index(&instr->src[0]);
981*61046927SAndroid Build Coastguard Worker    enum agx_format format = format_for_bitsize(instr->def.bit_size);
982*61046927SAndroid Build Coastguard Worker    unsigned nr = instr->def.num_components;
983*61046927SAndroid Build Coastguard Worker    unsigned mask = BITFIELD_MASK(nr);
984*61046927SAndroid Build Coastguard Worker 
985*61046927SAndroid Build Coastguard Worker    agx_stack_load_to(b, dst, offset, format, mask);
986*61046927SAndroid Build Coastguard Worker    agx_emit_cached_split(b, dst, nr);
987*61046927SAndroid Build Coastguard Worker    b->shader->any_scratch = true;
988*61046927SAndroid Build Coastguard Worker }
989*61046927SAndroid Build Coastguard Worker 
990*61046927SAndroid Build Coastguard Worker static void
agx_emit_store_scratch(agx_builder * b,nir_intrinsic_instr * instr)991*61046927SAndroid Build Coastguard Worker agx_emit_store_scratch(agx_builder *b, nir_intrinsic_instr *instr)
992*61046927SAndroid Build Coastguard Worker {
993*61046927SAndroid Build Coastguard Worker    agx_index value = agx_recollect_vector(b, instr->src[0]);
994*61046927SAndroid Build Coastguard Worker    agx_index offset = agx_src_index(&instr->src[1]);
995*61046927SAndroid Build Coastguard Worker    enum agx_format format = format_for_bitsize(nir_src_bit_size(instr->src[0]));
996*61046927SAndroid Build Coastguard Worker    unsigned mask = BITFIELD_MASK(nir_src_num_components(instr->src[0]));
997*61046927SAndroid Build Coastguard Worker 
998*61046927SAndroid Build Coastguard Worker    agx_stack_store(b, value, offset, format, mask);
999*61046927SAndroid Build Coastguard Worker    b->shader->any_scratch = true;
1000*61046927SAndroid Build Coastguard Worker }
1001*61046927SAndroid Build Coastguard Worker 
1002*61046927SAndroid Build Coastguard Worker static unsigned
agx_expand_tex_to(agx_builder * b,nir_def * def,agx_index src,bool masked)1003*61046927SAndroid Build Coastguard Worker agx_expand_tex_to(agx_builder *b, nir_def *def, agx_index src, bool masked)
1004*61046927SAndroid Build Coastguard Worker {
1005*61046927SAndroid Build Coastguard Worker    unsigned nr_channels = def->num_components;
1006*61046927SAndroid Build Coastguard Worker    nir_component_mask_t mask = nir_def_components_read(def);
1007*61046927SAndroid Build Coastguard Worker 
1008*61046927SAndroid Build Coastguard Worker    if (!masked)
1009*61046927SAndroid Build Coastguard Worker       mask = (nir_component_mask_t)BITFIELD_MASK(nr_channels);
1010*61046927SAndroid Build Coastguard Worker 
1011*61046927SAndroid Build Coastguard Worker    agx_index packed_channels[4] = {agx_null()};
1012*61046927SAndroid Build Coastguard Worker    agx_index unpacked_channels[4] = {agx_null()};
1013*61046927SAndroid Build Coastguard Worker 
1014*61046927SAndroid Build Coastguard Worker    /* Hardware writes the masked components contiguously, expand out for NIR */
1015*61046927SAndroid Build Coastguard Worker    agx_emit_split(b, packed_channels, src, 4 /* XXX: why not nr_channels */);
1016*61046927SAndroid Build Coastguard Worker 
1017*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < nr_channels; ++i) {
1018*61046927SAndroid Build Coastguard Worker       unpacked_channels[i] =
1019*61046927SAndroid Build Coastguard Worker          (mask & BITFIELD_BIT(i))
1020*61046927SAndroid Build Coastguard Worker             ? packed_channels[util_bitcount(mask & BITFIELD_MASK(i))]
1021*61046927SAndroid Build Coastguard Worker             : agx_undef(src.size);
1022*61046927SAndroid Build Coastguard Worker    }
1023*61046927SAndroid Build Coastguard Worker 
1024*61046927SAndroid Build Coastguard Worker    agx_emit_collect_to(b, agx_def_index(def), nr_channels, unpacked_channels);
1025*61046927SAndroid Build Coastguard Worker    return mask;
1026*61046927SAndroid Build Coastguard Worker }
1027*61046927SAndroid Build Coastguard Worker 
1028*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_emit_image_load(agx_builder * b,agx_index dst,nir_intrinsic_instr * intr)1029*61046927SAndroid Build Coastguard Worker agx_emit_image_load(agx_builder *b, agx_index dst, nir_intrinsic_instr *intr)
1030*61046927SAndroid Build Coastguard Worker {
1031*61046927SAndroid Build Coastguard Worker    agx_index ms_index = agx_src_index(&intr->src[2]);
1032*61046927SAndroid Build Coastguard Worker    agx_index lod = agx_src_index(&intr->src[3]);
1033*61046927SAndroid Build Coastguard Worker    enum agx_lod_mode lod_mode = AGX_LOD_MODE_LOD_MIN;
1034*61046927SAndroid Build Coastguard Worker 
1035*61046927SAndroid Build Coastguard Worker    agx_index bindless = agx_immediate(0), texture;
1036*61046927SAndroid Build Coastguard Worker    if (intr->intrinsic == nir_intrinsic_bindless_image_load)
1037*61046927SAndroid Build Coastguard Worker       texture = agx_translate_bindless_handle(b, &intr->src[0], &bindless);
1038*61046927SAndroid Build Coastguard Worker    else if (nir_src_is_const(intr->src[0]) &&
1039*61046927SAndroid Build Coastguard Worker             nir_src_as_uint(intr->src[0]) < 0x100)
1040*61046927SAndroid Build Coastguard Worker       texture = agx_immediate(nir_src_as_uint(intr->src[0]));
1041*61046927SAndroid Build Coastguard Worker    else
1042*61046927SAndroid Build Coastguard Worker       texture = agx_src_index(&intr->src[0]);
1043*61046927SAndroid Build Coastguard Worker 
1044*61046927SAndroid Build Coastguard Worker    assert(nir_src_num_components(intr->src[1]) == 4);
1045*61046927SAndroid Build Coastguard Worker    agx_index coord[4] = {
1046*61046927SAndroid Build Coastguard Worker       agx_extract_nir_src(b, intr->src[1], 0),
1047*61046927SAndroid Build Coastguard Worker       agx_extract_nir_src(b, intr->src[1], 1),
1048*61046927SAndroid Build Coastguard Worker       agx_extract_nir_src(b, intr->src[1], 2),
1049*61046927SAndroid Build Coastguard Worker       agx_extract_nir_src(b, intr->src[1], 3),
1050*61046927SAndroid Build Coastguard Worker    };
1051*61046927SAndroid Build Coastguard Worker 
1052*61046927SAndroid Build Coastguard Worker    /* Get the image dimension. Cubes are lowered to 2D, since they are logically
1053*61046927SAndroid Build Coastguard Worker     * equivalent for imageLoad, but out-of-bounds behaviour for cubes on G13
1054*61046927SAndroid Build Coastguard Worker     * is wrong according to Piglit's arb_shader_image_load_store-invalid.
1055*61046927SAndroid Build Coastguard Worker     *
1056*61046927SAndroid Build Coastguard Worker     * This requires a matching transform in the driver.
1057*61046927SAndroid Build Coastguard Worker     */
1058*61046927SAndroid Build Coastguard Worker    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
1059*61046927SAndroid Build Coastguard Worker    bool is_array = nir_intrinsic_image_array(intr);
1060*61046927SAndroid Build Coastguard Worker 
1061*61046927SAndroid Build Coastguard Worker    if (dim == GLSL_SAMPLER_DIM_CUBE) {
1062*61046927SAndroid Build Coastguard Worker       dim = GLSL_SAMPLER_DIM_2D;
1063*61046927SAndroid Build Coastguard Worker       is_array = true;
1064*61046927SAndroid Build Coastguard Worker    }
1065*61046927SAndroid Build Coastguard Worker 
1066*61046927SAndroid Build Coastguard Worker    bool is_ms = dim == GLSL_SAMPLER_DIM_MS;
1067*61046927SAndroid Build Coastguard Worker    unsigned coord_comps = glsl_get_sampler_dim_coordinate_components(dim);
1068*61046927SAndroid Build Coastguard Worker    if (is_array && is_ms) {
1069*61046927SAndroid Build Coastguard Worker       agx_index layer = agx_temp(b->shader, AGX_SIZE_16);
1070*61046927SAndroid Build Coastguard Worker       agx_subdivide_to(b, layer, coord[coord_comps], 0);
1071*61046927SAndroid Build Coastguard Worker 
1072*61046927SAndroid Build Coastguard Worker       assert(ms_index.size == AGX_SIZE_16);
1073*61046927SAndroid Build Coastguard Worker       agx_index tmp = agx_temp(b->shader, AGX_SIZE_32);
1074*61046927SAndroid Build Coastguard Worker       agx_emit_collect_to(b, tmp, 2, (agx_index[]){ms_index, layer});
1075*61046927SAndroid Build Coastguard Worker       coord[coord_comps++] = tmp;
1076*61046927SAndroid Build Coastguard Worker    } else if (is_ms) {
1077*61046927SAndroid Build Coastguard Worker       agx_index tmp = agx_temp(b->shader, AGX_SIZE_32);
1078*61046927SAndroid Build Coastguard Worker       agx_mov_to(b, tmp, ms_index);
1079*61046927SAndroid Build Coastguard Worker       coord[coord_comps++] = tmp;
1080*61046927SAndroid Build Coastguard Worker    } else if (is_array) {
1081*61046927SAndroid Build Coastguard Worker       coord_comps++;
1082*61046927SAndroid Build Coastguard Worker    }
1083*61046927SAndroid Build Coastguard Worker 
1084*61046927SAndroid Build Coastguard Worker    /* Multisampled images do not support mipmapping */
1085*61046927SAndroid Build Coastguard Worker    if (is_ms) {
1086*61046927SAndroid Build Coastguard Worker       lod_mode = AGX_LOD_MODE_AUTO_LOD;
1087*61046927SAndroid Build Coastguard Worker       lod = agx_zero();
1088*61046927SAndroid Build Coastguard Worker    }
1089*61046927SAndroid Build Coastguard Worker 
1090*61046927SAndroid Build Coastguard Worker    agx_index coords = agx_emit_collect(b, coord_comps, coord);
1091*61046927SAndroid Build Coastguard Worker    agx_index tmp = agx_vec_temp(b->shader, dst.size, 4);
1092*61046927SAndroid Build Coastguard Worker 
1093*61046927SAndroid Build Coastguard Worker    agx_instr *I = agx_image_load_to(
1094*61046927SAndroid Build Coastguard Worker       b, tmp, coords, lod, bindless, texture, agx_immediate(0), agx_null(),
1095*61046927SAndroid Build Coastguard Worker       agx_tex_dim(dim, is_array), lod_mode, 0, false);
1096*61046927SAndroid Build Coastguard Worker    I->mask = agx_expand_tex_to(b, &intr->def, tmp, true);
1097*61046927SAndroid Build Coastguard Worker 
1098*61046927SAndroid Build Coastguard Worker    b->shader->out->uses_txf = true;
1099*61046927SAndroid Build Coastguard Worker    return NULL;
1100*61046927SAndroid Build Coastguard Worker }
1101*61046927SAndroid Build Coastguard Worker 
1102*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_emit_export(agx_builder * b,unsigned base,nir_src src)1103*61046927SAndroid Build Coastguard Worker agx_emit_export(agx_builder *b, unsigned base, nir_src src)
1104*61046927SAndroid Build Coastguard Worker {
1105*61046927SAndroid Build Coastguard Worker    agx_builder b_ = *b;
1106*61046927SAndroid Build Coastguard Worker    agx_cursor after_cursor = agx_after_block(agx_exit_block(b->shader));
1107*61046927SAndroid Build Coastguard Worker    b_.cursor = after_cursor;
1108*61046927SAndroid Build Coastguard Worker 
1109*61046927SAndroid Build Coastguard Worker    for (unsigned c = 0; c < nir_src_num_components(src); ++c) {
1110*61046927SAndroid Build Coastguard Worker       agx_index chan = agx_extract_nir_src(b, src, c);
1111*61046927SAndroid Build Coastguard Worker       unsigned stride = agx_size_align_16(chan.size);
1112*61046927SAndroid Build Coastguard Worker 
1113*61046927SAndroid Build Coastguard Worker       agx_export(&b_, chan, base + (c * stride));
1114*61046927SAndroid Build Coastguard Worker    }
1115*61046927SAndroid Build Coastguard Worker 
1116*61046927SAndroid Build Coastguard Worker    if (agx_cursors_equal(b->cursor, after_cursor)) {
1117*61046927SAndroid Build Coastguard Worker       b->cursor = agx_after_block_logical(b->cursor.block);
1118*61046927SAndroid Build Coastguard Worker    }
1119*61046927SAndroid Build Coastguard Worker 
1120*61046927SAndroid Build Coastguard Worker    return NULL;
1121*61046927SAndroid Build Coastguard Worker }
1122*61046927SAndroid Build Coastguard Worker 
1123*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_load_exported_to(agx_builder * b,agx_index dst,unsigned base,unsigned nr)1124*61046927SAndroid Build Coastguard Worker agx_load_exported_to(agx_builder *b, agx_index dst, unsigned base, unsigned nr)
1125*61046927SAndroid Build Coastguard Worker {
1126*61046927SAndroid Build Coastguard Worker    agx_index chans[4] = {0};
1127*61046927SAndroid Build Coastguard Worker    unsigned stride = agx_size_align_16(dst.size);
1128*61046927SAndroid Build Coastguard Worker 
1129*61046927SAndroid Build Coastguard Worker    for (unsigned c = 0; c < nr; ++c) {
1130*61046927SAndroid Build Coastguard Worker       chans[c] = agx_cached_preload(b->shader, base + c * stride, dst.size);
1131*61046927SAndroid Build Coastguard Worker    }
1132*61046927SAndroid Build Coastguard Worker 
1133*61046927SAndroid Build Coastguard Worker    return agx_emit_collect_to(b, dst, nr, chans);
1134*61046927SAndroid Build Coastguard Worker }
1135*61046927SAndroid Build Coastguard Worker 
1136*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_emit_image_store(agx_builder * b,nir_intrinsic_instr * instr)1137*61046927SAndroid Build Coastguard Worker agx_emit_image_store(agx_builder *b, nir_intrinsic_instr *instr)
1138*61046927SAndroid Build Coastguard Worker {
1139*61046927SAndroid Build Coastguard Worker    /* See remarks in agx_emit_image_load */
1140*61046927SAndroid Build Coastguard Worker    enum glsl_sampler_dim glsl_dim = nir_intrinsic_image_dim(instr);
1141*61046927SAndroid Build Coastguard Worker    bool is_array = nir_intrinsic_image_array(instr);
1142*61046927SAndroid Build Coastguard Worker 
1143*61046927SAndroid Build Coastguard Worker    if (glsl_dim == GLSL_SAMPLER_DIM_CUBE) {
1144*61046927SAndroid Build Coastguard Worker       glsl_dim = GLSL_SAMPLER_DIM_2D;
1145*61046927SAndroid Build Coastguard Worker       is_array = true;
1146*61046927SAndroid Build Coastguard Worker    }
1147*61046927SAndroid Build Coastguard Worker 
1148*61046927SAndroid Build Coastguard Worker    enum agx_dim dim = agx_tex_dim(glsl_dim, is_array);
1149*61046927SAndroid Build Coastguard Worker    assert(glsl_dim != GLSL_SAMPLER_DIM_MS && "needs to be lowered");
1150*61046927SAndroid Build Coastguard Worker 
1151*61046927SAndroid Build Coastguard Worker    agx_index base, index;
1152*61046927SAndroid Build Coastguard Worker    if (instr->intrinsic == nir_intrinsic_bindless_image_store) {
1153*61046927SAndroid Build Coastguard Worker       index = agx_translate_bindless_handle(b, &instr->src[0], &base);
1154*61046927SAndroid Build Coastguard Worker 
1155*61046927SAndroid Build Coastguard Worker       assert(base.size == AGX_SIZE_64);
1156*61046927SAndroid Build Coastguard Worker       assert(index.size == AGX_SIZE_32);
1157*61046927SAndroid Build Coastguard Worker    } else {
1158*61046927SAndroid Build Coastguard Worker       base = agx_zero();
1159*61046927SAndroid Build Coastguard Worker       index = agx_src_index(&instr->src[0]);
1160*61046927SAndroid Build Coastguard Worker 
1161*61046927SAndroid Build Coastguard Worker       assert(index.size == AGX_SIZE_16);
1162*61046927SAndroid Build Coastguard Worker    }
1163*61046927SAndroid Build Coastguard Worker 
1164*61046927SAndroid Build Coastguard Worker    agx_index coords4 = agx_src_index(&instr->src[1]);
1165*61046927SAndroid Build Coastguard Worker    agx_index lod = agx_src_index(&instr->src[4]);
1166*61046927SAndroid Build Coastguard Worker    assert(lod.size == AGX_SIZE_16);
1167*61046927SAndroid Build Coastguard Worker 
1168*61046927SAndroid Build Coastguard Worker    int coord_components = glsl_get_sampler_dim_coordinate_components(glsl_dim);
1169*61046927SAndroid Build Coastguard Worker    if (is_array)
1170*61046927SAndroid Build Coastguard Worker       coord_components++;
1171*61046927SAndroid Build Coastguard Worker 
1172*61046927SAndroid Build Coastguard Worker    agx_index coord_comps[4] = {};
1173*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < coord_components; ++i)
1174*61046927SAndroid Build Coastguard Worker       coord_comps[i] = agx_emit_extract(b, coords4, i);
1175*61046927SAndroid Build Coastguard Worker 
1176*61046927SAndroid Build Coastguard Worker    agx_index coords = agx_emit_collect(b, coord_components, coord_comps);
1177*61046927SAndroid Build Coastguard Worker    agx_index data = agx_src_index(&instr->src[3]);
1178*61046927SAndroid Build Coastguard Worker 
1179*61046927SAndroid Build Coastguard Worker    /* If the image format has less than 4 components, nir_opt_shrink_stores can
1180*61046927SAndroid Build Coastguard Worker     * shrink the store. But the IR still expects 4 components: pad with undef.
1181*61046927SAndroid Build Coastguard Worker     */
1182*61046927SAndroid Build Coastguard Worker    if (nir_src_num_components(instr->src[3]) < 4) {
1183*61046927SAndroid Build Coastguard Worker       agx_index chan[4] = {agx_null()};
1184*61046927SAndroid Build Coastguard Worker 
1185*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < 4; ++i) {
1186*61046927SAndroid Build Coastguard Worker          if (i < nir_src_num_components(instr->src[3]))
1187*61046927SAndroid Build Coastguard Worker             chan[i] = agx_extract_nir_src(b, instr->src[3], i);
1188*61046927SAndroid Build Coastguard Worker          else
1189*61046927SAndroid Build Coastguard Worker             chan[i] = agx_undef(data.size);
1190*61046927SAndroid Build Coastguard Worker       }
1191*61046927SAndroid Build Coastguard Worker 
1192*61046927SAndroid Build Coastguard Worker       data = agx_emit_collect(b, 4, chan);
1193*61046927SAndroid Build Coastguard Worker    }
1194*61046927SAndroid Build Coastguard Worker 
1195*61046927SAndroid Build Coastguard Worker    /* Image stores act like tilebuffer stores when used for tib spilling */
1196*61046927SAndroid Build Coastguard Worker    b->shader->out->tag_write_disable = false;
1197*61046927SAndroid Build Coastguard Worker 
1198*61046927SAndroid Build Coastguard Worker    return agx_image_write(b, data, coords, lod, base, index, dim);
1199*61046927SAndroid Build Coastguard Worker }
1200*61046927SAndroid Build Coastguard Worker 
1201*61046927SAndroid Build Coastguard Worker static enum agx_simd_op
translate_simd_op(nir_op op)1202*61046927SAndroid Build Coastguard Worker translate_simd_op(nir_op op)
1203*61046927SAndroid Build Coastguard Worker {
1204*61046927SAndroid Build Coastguard Worker #define CASE(agx_, nir_)                                                       \
1205*61046927SAndroid Build Coastguard Worker    case nir_op_##nir_:                                                         \
1206*61046927SAndroid Build Coastguard Worker       return AGX_SIMD_OP_##agx_;
1207*61046927SAndroid Build Coastguard Worker 
1208*61046927SAndroid Build Coastguard Worker    switch (op) {
1209*61046927SAndroid Build Coastguard Worker       CASE(AND, iand)
1210*61046927SAndroid Build Coastguard Worker       CASE(FADD, fadd)
1211*61046927SAndroid Build Coastguard Worker       CASE(OR, ior)
1212*61046927SAndroid Build Coastguard Worker       CASE(FMUL, fmul)
1213*61046927SAndroid Build Coastguard Worker       CASE(XOR, ixor)
1214*61046927SAndroid Build Coastguard Worker       CASE(FMIN, fmin)
1215*61046927SAndroid Build Coastguard Worker       CASE(FMAX, fmax)
1216*61046927SAndroid Build Coastguard Worker       CASE(IADD, iadd)
1217*61046927SAndroid Build Coastguard Worker       CASE(SMIN, imin)
1218*61046927SAndroid Build Coastguard Worker       CASE(SMAX, imax)
1219*61046927SAndroid Build Coastguard Worker       CASE(UMIN, umin)
1220*61046927SAndroid Build Coastguard Worker       CASE(UMAX, umax)
1221*61046927SAndroid Build Coastguard Worker    default:
1222*61046927SAndroid Build Coastguard Worker       unreachable("unknown simd op");
1223*61046927SAndroid Build Coastguard Worker    }
1224*61046927SAndroid Build Coastguard Worker #undef CASE
1225*61046927SAndroid Build Coastguard Worker }
1226*61046927SAndroid Build Coastguard Worker 
1227*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_emit_intrinsic(agx_builder * b,nir_intrinsic_instr * instr)1228*61046927SAndroid Build Coastguard Worker agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
1229*61046927SAndroid Build Coastguard Worker {
1230*61046927SAndroid Build Coastguard Worker    agx_index dst = nir_intrinsic_infos[instr->intrinsic].has_dest
1231*61046927SAndroid Build Coastguard Worker                       ? agx_def_index(&instr->def)
1232*61046927SAndroid Build Coastguard Worker                       : agx_null();
1233*61046927SAndroid Build Coastguard Worker    gl_shader_stage stage = b->shader->stage;
1234*61046927SAndroid Build Coastguard Worker 
1235*61046927SAndroid Build Coastguard Worker    switch (instr->intrinsic) {
1236*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_barycentric_pixel:
1237*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_barycentric_centroid:
1238*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_barycentric_at_sample:
1239*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_barycentric_at_offset:
1240*61046927SAndroid Build Coastguard Worker       /* handled later via load_vary */
1241*61046927SAndroid Build Coastguard Worker       return NULL;
1242*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_interpolated_input:
1243*61046927SAndroid Build Coastguard Worker       assert(stage == MESA_SHADER_FRAGMENT);
1244*61046927SAndroid Build Coastguard Worker       agx_emit_load_vary(b, dst, instr);
1245*61046927SAndroid Build Coastguard Worker       return NULL;
1246*61046927SAndroid Build Coastguard Worker 
1247*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_coefficients_agx:
1248*61046927SAndroid Build Coastguard Worker       assert(stage == MESA_SHADER_FRAGMENT);
1249*61046927SAndroid Build Coastguard Worker       agx_ldcf_to(b, dst, cf_for_intrinsic(b, instr), 1);
1250*61046927SAndroid Build Coastguard Worker       agx_emit_cached_split(b, dst, 3);
1251*61046927SAndroid Build Coastguard Worker       return NULL;
1252*61046927SAndroid Build Coastguard Worker 
1253*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_agx:
1254*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_constant_agx:
1255*61046927SAndroid Build Coastguard Worker       agx_emit_load(b, dst, instr);
1256*61046927SAndroid Build Coastguard Worker       return NULL;
1257*61046927SAndroid Build Coastguard Worker 
1258*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_store_uvs_agx:
1259*61046927SAndroid Build Coastguard Worker       assert(stage == MESA_SHADER_VERTEX);
1260*61046927SAndroid Build Coastguard Worker       return agx_st_vary(b, agx_src_index(&instr->src[1]),
1261*61046927SAndroid Build Coastguard Worker                          agx_src_index(&instr->src[0]));
1262*61046927SAndroid Build Coastguard Worker 
1263*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_store_agx:
1264*61046927SAndroid Build Coastguard Worker       agx_emit_store(b, instr);
1265*61046927SAndroid Build Coastguard Worker       return NULL;
1266*61046927SAndroid Build Coastguard Worker 
1267*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_store_shared:
1268*61046927SAndroid Build Coastguard Worker       agx_emit_local_store(b, instr);
1269*61046927SAndroid Build Coastguard Worker       return NULL;
1270*61046927SAndroid Build Coastguard Worker 
1271*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_shared:
1272*61046927SAndroid Build Coastguard Worker       agx_emit_local_load(b, dst, instr);
1273*61046927SAndroid Build Coastguard Worker       return NULL;
1274*61046927SAndroid Build Coastguard Worker 
1275*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_global_atomic_agx:
1276*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_global_atomic_swap_agx:
1277*61046927SAndroid Build Coastguard Worker       agx_emit_atomic(b, dst, instr, false);
1278*61046927SAndroid Build Coastguard Worker       return NULL;
1279*61046927SAndroid Build Coastguard Worker 
1280*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_shared_atomic:
1281*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_shared_atomic_swap:
1282*61046927SAndroid Build Coastguard Worker       agx_emit_atomic(b, dst, instr, true);
1283*61046927SAndroid Build Coastguard Worker       return NULL;
1284*61046927SAndroid Build Coastguard Worker 
1285*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_store_zs_agx:
1286*61046927SAndroid Build Coastguard Worker       assert(stage == MESA_SHADER_FRAGMENT);
1287*61046927SAndroid Build Coastguard Worker       return agx_emit_store_zs(b, instr);
1288*61046927SAndroid Build Coastguard Worker 
1289*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_store_local_pixel_agx:
1290*61046927SAndroid Build Coastguard Worker       return agx_emit_local_store_pixel(b, instr);
1291*61046927SAndroid Build Coastguard Worker 
1292*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_local_pixel_agx:
1293*61046927SAndroid Build Coastguard Worker       assert(stage == MESA_SHADER_FRAGMENT);
1294*61046927SAndroid Build Coastguard Worker       agx_emit_local_load_pixel(b, dst, instr);
1295*61046927SAndroid Build Coastguard Worker       return NULL;
1296*61046927SAndroid Build Coastguard Worker 
1297*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_pixel_coord:
1298*61046927SAndroid Build Coastguard Worker       return agx_emit_collect_to(
1299*61046927SAndroid Build Coastguard Worker          b, dst, 2,
1300*61046927SAndroid Build Coastguard Worker          (agx_index[2]){
1301*61046927SAndroid Build Coastguard Worker             agx_get_sr(b, 16, AGX_SR_THREAD_POSITION_IN_GRID_X),
1302*61046927SAndroid Build Coastguard Worker             agx_get_sr(b, 16, AGX_SR_THREAD_POSITION_IN_GRID_Y),
1303*61046927SAndroid Build Coastguard Worker          });
1304*61046927SAndroid Build Coastguard Worker 
1305*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_frag_coord_zw: {
1306*61046927SAndroid Build Coastguard Worker       agx_index cf = agx_get_cf(b->shader, VARYING_SLOT_POS,
1307*61046927SAndroid Build Coastguard Worker                                 nir_intrinsic_component(instr));
1308*61046927SAndroid Build Coastguard Worker 
1309*61046927SAndroid Build Coastguard Worker       return agx_iter_to(b, dst, cf, agx_zero(), 1, AGX_INTERPOLATION_CENTER);
1310*61046927SAndroid Build Coastguard Worker    }
1311*61046927SAndroid Build Coastguard Worker 
1312*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_sample_mask_agx: {
1313*61046927SAndroid Build Coastguard Worker       assert(stage == MESA_SHADER_FRAGMENT);
1314*61046927SAndroid Build Coastguard Worker       b->shader->out->writes_sample_mask = true;
1315*61046927SAndroid Build Coastguard Worker 
1316*61046927SAndroid Build Coastguard Worker       /* We need to wait_pix before running Z/S tests, but we don't need to
1317*61046927SAndroid Build Coastguard Worker        * wait_pix before merely discarding. Omit the wait_pix when the affected
1318*61046927SAndroid Build Coastguard Worker        * samples are unconditionally killed.
1319*61046927SAndroid Build Coastguard Worker        */
1320*61046927SAndroid Build Coastguard Worker       bool no_tests =
1321*61046927SAndroid Build Coastguard Worker          nir_src_is_const(instr->src[1]) && nir_src_as_uint(instr->src[1]) == 0;
1322*61046927SAndroid Build Coastguard Worker 
1323*61046927SAndroid Build Coastguard Worker       if (!no_tests)
1324*61046927SAndroid Build Coastguard Worker          agx_wait_pix(b, 0x0001);
1325*61046927SAndroid Build Coastguard Worker 
1326*61046927SAndroid Build Coastguard Worker       return agx_sample_mask(b, agx_src_index(&instr->src[0]),
1327*61046927SAndroid Build Coastguard Worker                              agx_src_index(&instr->src[1]));
1328*61046927SAndroid Build Coastguard Worker    }
1329*61046927SAndroid Build Coastguard Worker 
1330*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_back_face_agx:
1331*61046927SAndroid Build Coastguard Worker       return agx_get_sr_to(b, dst, AGX_SR_BACKFACING);
1332*61046927SAndroid Build Coastguard Worker 
1333*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_samples_log2_agx:
1334*61046927SAndroid Build Coastguard Worker       return agx_get_sr_to(b, dst, AGX_SR_SAMPLES_LOG2);
1335*61046927SAndroid Build Coastguard Worker 
1336*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_sample_mask_in:
1337*61046927SAndroid Build Coastguard Worker       return agx_get_sr_to(b, dst, AGX_SR_INPUT_SAMPLE_MASK);
1338*61046927SAndroid Build Coastguard Worker 
1339*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_sample_mask:
1340*61046927SAndroid Build Coastguard Worker       return agx_get_sr_coverage_to(b, dst, AGX_SR_COVERAGE_MASK);
1341*61046927SAndroid Build Coastguard Worker 
1342*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_helper_invocation:
1343*61046927SAndroid Build Coastguard Worker       /* Compare special register to zero. We could lower this in NIR (letting
1344*61046927SAndroid Build Coastguard Worker        * us fold in an inot) but meh?
1345*61046927SAndroid Build Coastguard Worker        */
1346*61046927SAndroid Build Coastguard Worker       return agx_icmp_to(b, dst,
1347*61046927SAndroid Build Coastguard Worker                          agx_get_sr_coverage(b, 32, AGX_SR_IS_ACTIVE_THREAD),
1348*61046927SAndroid Build Coastguard Worker                          agx_zero(), AGX_ICOND_UEQ, false);
1349*61046927SAndroid Build Coastguard Worker 
1350*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_vertex_id:
1351*61046927SAndroid Build Coastguard Worker       /* We don't assert the HW stage since we use this same ABI with SW VS */
1352*61046927SAndroid Build Coastguard Worker       return agx_mov_to(b, dst, agx_abs(agx_vertex_id(b)));
1353*61046927SAndroid Build Coastguard Worker 
1354*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_instance_id:
1355*61046927SAndroid Build Coastguard Worker       return agx_mov_to(b, dst, agx_abs(agx_instance_id(b)));
1356*61046927SAndroid Build Coastguard Worker 
1357*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_preamble:
1358*61046927SAndroid Build Coastguard Worker       return agx_emit_load_preamble(b, dst, instr);
1359*61046927SAndroid Build Coastguard Worker 
1360*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_store_preamble:
1361*61046927SAndroid Build Coastguard Worker       return agx_emit_store_preamble(b, instr);
1362*61046927SAndroid Build Coastguard Worker 
1363*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_image_load:
1364*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_bindless_image_load:
1365*61046927SAndroid Build Coastguard Worker       return agx_emit_image_load(b, dst, instr);
1366*61046927SAndroid Build Coastguard Worker 
1367*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_image_store:
1368*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_bindless_image_store:
1369*61046927SAndroid Build Coastguard Worker       return agx_emit_image_store(b, instr);
1370*61046927SAndroid Build Coastguard Worker 
1371*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_image_store_block_agx:
1372*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_bindless_image_store_block_agx:
1373*61046927SAndroid Build Coastguard Worker       return agx_emit_block_image_store(b, instr);
1374*61046927SAndroid Build Coastguard Worker 
1375*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_workgroup_id:
1376*61046927SAndroid Build Coastguard Worker       return agx_load_compute_dimension(b, dst, instr,
1377*61046927SAndroid Build Coastguard Worker                                         AGX_SR_THREADGROUP_POSITION_IN_GRID_X);
1378*61046927SAndroid Build Coastguard Worker 
1379*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_workgroup_size:
1380*61046927SAndroid Build Coastguard Worker       return agx_load_compute_dimension(b, dst, instr,
1381*61046927SAndroid Build Coastguard Worker                                         AGX_SR_THREADS_PER_THREADGROUP_X);
1382*61046927SAndroid Build Coastguard Worker 
1383*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_global_invocation_id:
1384*61046927SAndroid Build Coastguard Worker       return agx_load_compute_dimension(b, dst, instr,
1385*61046927SAndroid Build Coastguard Worker                                         AGX_SR_THREAD_POSITION_IN_GRID_X);
1386*61046927SAndroid Build Coastguard Worker 
1387*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_local_invocation_id:
1388*61046927SAndroid Build Coastguard Worker       return agx_load_compute_dimension(
1389*61046927SAndroid Build Coastguard Worker          b, dst, instr, AGX_SR_THREAD_POSITION_IN_THREADGROUP_X);
1390*61046927SAndroid Build Coastguard Worker 
1391*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_local_invocation_index:
1392*61046927SAndroid Build Coastguard Worker       return agx_get_sr_to(b, dst, AGX_SR_THREAD_INDEX_IN_THREADGROUP);
1393*61046927SAndroid Build Coastguard Worker 
1394*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_layer_id:
1395*61046927SAndroid Build Coastguard Worker       return agx_get_sr_to(b, dst, AGX_SR_THREADGROUP_POSITION_IN_GRID_Z);
1396*61046927SAndroid Build Coastguard Worker 
1397*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_barrier: {
1398*61046927SAndroid Build Coastguard Worker       assert(!b->shader->is_preamble && "invalid");
1399*61046927SAndroid Build Coastguard Worker 
1400*61046927SAndroid Build Coastguard Worker       bool needs_image_barriers = false;
1401*61046927SAndroid Build Coastguard Worker 
1402*61046927SAndroid Build Coastguard Worker       if (nir_intrinsic_memory_scope(instr) != SCOPE_NONE) {
1403*61046927SAndroid Build Coastguard Worker          nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
1404*61046927SAndroid Build Coastguard Worker 
1405*61046927SAndroid Build Coastguard Worker          if (modes & (nir_var_mem_global | nir_var_image)) {
1406*61046927SAndroid Build Coastguard Worker             agx_memory_barrier(b);
1407*61046927SAndroid Build Coastguard Worker 
1408*61046927SAndroid Build Coastguard Worker             /* Pull out all the big hammers to make cross-workgroup memory
1409*61046927SAndroid Build Coastguard Worker              * barriers work. Found experimentally, seems to work on G13G at
1410*61046927SAndroid Build Coastguard Worker              * least.
1411*61046927SAndroid Build Coastguard Worker              *
1412*61046927SAndroid Build Coastguard Worker              * TODO: check on other models, we may need more barriers for G13D.
1413*61046927SAndroid Build Coastguard Worker              */
1414*61046927SAndroid Build Coastguard Worker             if (nir_intrinsic_memory_scope(instr) >= SCOPE_QUEUE_FAMILY) {
1415*61046927SAndroid Build Coastguard Worker                agx_memory_barrier_2(b);
1416*61046927SAndroid Build Coastguard Worker                agx_unknown_barrier_1(b);
1417*61046927SAndroid Build Coastguard Worker             }
1418*61046927SAndroid Build Coastguard Worker          }
1419*61046927SAndroid Build Coastguard Worker 
1420*61046927SAndroid Build Coastguard Worker          if (modes & nir_var_image) {
1421*61046927SAndroid Build Coastguard Worker             agx_image_barrier_1(b);
1422*61046927SAndroid Build Coastguard Worker             agx_image_barrier_2(b);
1423*61046927SAndroid Build Coastguard Worker             needs_image_barriers = true;
1424*61046927SAndroid Build Coastguard Worker          }
1425*61046927SAndroid Build Coastguard Worker       }
1426*61046927SAndroid Build Coastguard Worker 
1427*61046927SAndroid Build Coastguard Worker       /* Nothing to do for subgroup barriers */
1428*61046927SAndroid Build Coastguard Worker       if (nir_intrinsic_execution_scope(instr) >= SCOPE_WORKGROUP) {
1429*61046927SAndroid Build Coastguard Worker          assert(gl_shader_stage_is_compute(b->shader->nir->info.stage));
1430*61046927SAndroid Build Coastguard Worker 
1431*61046927SAndroid Build Coastguard Worker          agx_threadgroup_barrier(b);
1432*61046927SAndroid Build Coastguard Worker       }
1433*61046927SAndroid Build Coastguard Worker 
1434*61046927SAndroid Build Coastguard Worker       if (needs_image_barriers) {
1435*61046927SAndroid Build Coastguard Worker          agx_image_barrier_3(b);
1436*61046927SAndroid Build Coastguard Worker          agx_image_barrier_4(b);
1437*61046927SAndroid Build Coastguard Worker       }
1438*61046927SAndroid Build Coastguard Worker 
1439*61046927SAndroid Build Coastguard Worker       return NULL;
1440*61046927SAndroid Build Coastguard Worker    }
1441*61046927SAndroid Build Coastguard Worker 
1442*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_fence_pbe_to_tex_agx: {
1443*61046927SAndroid Build Coastguard Worker       agx_image_barrier_1(b);
1444*61046927SAndroid Build Coastguard Worker       agx_image_barrier_2(b);
1445*61046927SAndroid Build Coastguard Worker       agx_image_barrier_3(b);
1446*61046927SAndroid Build Coastguard Worker       agx_image_barrier_4(b);
1447*61046927SAndroid Build Coastguard Worker       return NULL;
1448*61046927SAndroid Build Coastguard Worker    }
1449*61046927SAndroid Build Coastguard Worker 
1450*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_fence_mem_to_tex_agx: {
1451*61046927SAndroid Build Coastguard Worker       /* Flush out the atomic to main memory... Found experimentally... */
1452*61046927SAndroid Build Coastguard Worker       agx_memory_barrier(b);
1453*61046927SAndroid Build Coastguard Worker       agx_memory_barrier_2(b);
1454*61046927SAndroid Build Coastguard Worker 
1455*61046927SAndroid Build Coastguard Worker       /* TODO: Which ones do we actually need? */
1456*61046927SAndroid Build Coastguard Worker       agx_image_barrier_1(b);
1457*61046927SAndroid Build Coastguard Worker       agx_image_barrier_2(b);
1458*61046927SAndroid Build Coastguard Worker       agx_image_barrier_3(b);
1459*61046927SAndroid Build Coastguard Worker       agx_image_barrier_4(b);
1460*61046927SAndroid Build Coastguard Worker 
1461*61046927SAndroid Build Coastguard Worker       /* Flush out the texture cache */
1462*61046927SAndroid Build Coastguard Worker       agx_flush_memory_to_texture(b);
1463*61046927SAndroid Build Coastguard Worker       return NULL;
1464*61046927SAndroid Build Coastguard Worker    }
1465*61046927SAndroid Build Coastguard Worker 
1466*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_fence_pbe_to_tex_pixel_agx: {
1467*61046927SAndroid Build Coastguard Worker       agx_image_barrier_1(b);
1468*61046927SAndroid Build Coastguard Worker       agx_image_barrier_2(b);
1469*61046927SAndroid Build Coastguard Worker       agx_flush_memory_to_texture(b);
1470*61046927SAndroid Build Coastguard Worker       agx_image_barrier_3(b);
1471*61046927SAndroid Build Coastguard Worker       return NULL;
1472*61046927SAndroid Build Coastguard Worker    }
1473*61046927SAndroid Build Coastguard Worker 
1474*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_fence_helper_exit_agx: {
1475*61046927SAndroid Build Coastguard Worker       assert(b->shader->key->is_helper);
1476*61046927SAndroid Build Coastguard Worker       agx_memory_barrier(b);
1477*61046927SAndroid Build Coastguard Worker       agx_unknown_barrier_1(b);
1478*61046927SAndroid Build Coastguard Worker       agx_memory_barrier_2(b);
1479*61046927SAndroid Build Coastguard Worker       agx_unknown_barrier_2(b);
1480*61046927SAndroid Build Coastguard Worker       agx_memory_barrier_3(b);
1481*61046927SAndroid Build Coastguard Worker       return NULL;
1482*61046927SAndroid Build Coastguard Worker    }
1483*61046927SAndroid Build Coastguard Worker 
1484*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_begin_invocation_interlock: {
1485*61046927SAndroid Build Coastguard Worker       if (!b->shader->did_writeout &&
1486*61046927SAndroid Build Coastguard Worker           !b->shader->key->fs.ignore_tib_dependencies)
1487*61046927SAndroid Build Coastguard Worker          agx_wait_pix(b, 0x000C);
1488*61046927SAndroid Build Coastguard Worker 
1489*61046927SAndroid Build Coastguard Worker       b->shader->did_writeout = true;
1490*61046927SAndroid Build Coastguard Worker       return NULL;
1491*61046927SAndroid Build Coastguard Worker    }
1492*61046927SAndroid Build Coastguard Worker 
1493*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_ddx:
1494*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_ddx_coarse:
1495*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_ddx_fine:
1496*61046927SAndroid Build Coastguard Worker       return agx_dfdx_to(b, dst, agx_src_index(&instr->src[0]));
1497*61046927SAndroid Build Coastguard Worker 
1498*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_ddy:
1499*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_ddy_coarse:
1500*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_ddy_fine:
1501*61046927SAndroid Build Coastguard Worker       return agx_dfdy_to(b, dst, agx_src_index(&instr->src[0]));
1502*61046927SAndroid Build Coastguard Worker 
1503*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_subgroup_invocation:
1504*61046927SAndroid Build Coastguard Worker       return agx_get_sr_to(b, dst, AGX_SR_THREAD_INDEX_IN_SUBGROUP);
1505*61046927SAndroid Build Coastguard Worker 
1506*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_subgroup_id:
1507*61046927SAndroid Build Coastguard Worker       return agx_get_sr_to(b, dst, AGX_SR_SUBGROUP_INDEX_IN_THREADGROUP);
1508*61046927SAndroid Build Coastguard Worker 
1509*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_active_subgroup_invocation_agx:
1510*61046927SAndroid Build Coastguard Worker       return agx_get_sr_coverage_to(b, dst,
1511*61046927SAndroid Build Coastguard Worker                                     AGX_SR_ACTIVE_THREAD_INDEX_IN_SUBGROUP);
1512*61046927SAndroid Build Coastguard Worker 
1513*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_active_subgroup_count_agx:
1514*61046927SAndroid Build Coastguard Worker       return agx_get_sr_coverage_to(b, dst,
1515*61046927SAndroid Build Coastguard Worker                                     AGX_SR_TOTAL_ACTIVE_THREADS_IN_SUBGROUP);
1516*61046927SAndroid Build Coastguard Worker 
1517*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_reduce: {
1518*61046927SAndroid Build Coastguard Worker       assert((instr->def.bit_size == 1 || instr->def.bit_size == 16 ||
1519*61046927SAndroid Build Coastguard Worker               instr->def.bit_size == 32) &&
1520*61046927SAndroid Build Coastguard Worker              "should've been lowered");
1521*61046927SAndroid Build Coastguard Worker 
1522*61046927SAndroid Build Coastguard Worker       unsigned cluster_size = nir_intrinsic_cluster_size(instr);
1523*61046927SAndroid Build Coastguard Worker       assert(cluster_size == 0 || cluster_size == 4 || cluster_size >= 32);
1524*61046927SAndroid Build Coastguard Worker 
1525*61046927SAndroid Build Coastguard Worker       enum agx_simd_op op =
1526*61046927SAndroid Build Coastguard Worker          translate_simd_op(nir_intrinsic_reduction_op(instr));
1527*61046927SAndroid Build Coastguard Worker 
1528*61046927SAndroid Build Coastguard Worker       agx_index src0 = agx_src_index(&instr->src[0]);
1529*61046927SAndroid Build Coastguard Worker 
1530*61046927SAndroid Build Coastguard Worker       if (cluster_size == 4)
1531*61046927SAndroid Build Coastguard Worker          return agx_quad_reduce_to(b, dst, src0, op);
1532*61046927SAndroid Build Coastguard Worker       else
1533*61046927SAndroid Build Coastguard Worker          return agx_simd_reduce_to(b, dst, src0, op);
1534*61046927SAndroid Build Coastguard Worker    }
1535*61046927SAndroid Build Coastguard Worker 
1536*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_exclusive_scan: {
1537*61046927SAndroid Build Coastguard Worker       assert((instr->def.bit_size == 1 || instr->def.bit_size == 16 ||
1538*61046927SAndroid Build Coastguard Worker               instr->def.bit_size == 32) &&
1539*61046927SAndroid Build Coastguard Worker              "should've been lowered");
1540*61046927SAndroid Build Coastguard Worker 
1541*61046927SAndroid Build Coastguard Worker       return agx_simd_prefix_to(
1542*61046927SAndroid Build Coastguard Worker          b, dst, agx_src_index(&instr->src[0]),
1543*61046927SAndroid Build Coastguard Worker          translate_simd_op(nir_intrinsic_reduction_op(instr)));
1544*61046927SAndroid Build Coastguard Worker    }
1545*61046927SAndroid Build Coastguard Worker 
1546*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_read_invocation: {
1547*61046927SAndroid Build Coastguard Worker       /* TODO: Check if we're actually inside divergent control flow */
1548*61046927SAndroid Build Coastguard Worker       b->shader->any_quad_divergent_shuffle |= b->shader->any_cf;
1549*61046927SAndroid Build Coastguard Worker 
1550*61046927SAndroid Build Coastguard Worker       /* Lane ID guaranteed to be uniform */
1551*61046927SAndroid Build Coastguard Worker       return agx_shuffle_to(b, dst, agx_src_index(&instr->src[0]),
1552*61046927SAndroid Build Coastguard Worker                             agx_src_index(&instr->src[1]));
1553*61046927SAndroid Build Coastguard Worker    }
1554*61046927SAndroid Build Coastguard Worker 
1555*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_quad_broadcast: {
1556*61046927SAndroid Build Coastguard Worker       /* TODO: Check if we're actually inside divergent control flow */
1557*61046927SAndroid Build Coastguard Worker       b->shader->any_quad_divergent_shuffle |= b->shader->any_cf;
1558*61046927SAndroid Build Coastguard Worker 
1559*61046927SAndroid Build Coastguard Worker       /* Lane ID guaranteed to be uniform */
1560*61046927SAndroid Build Coastguard Worker       return agx_quad_shuffle_to(b, dst, agx_src_index(&instr->src[0]),
1561*61046927SAndroid Build Coastguard Worker                                  agx_src_index(&instr->src[1]));
1562*61046927SAndroid Build Coastguard Worker    }
1563*61046927SAndroid Build Coastguard Worker 
1564*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_quad_swap_horizontal: {
1565*61046927SAndroid Build Coastguard Worker       return agx_quad_shuffle_xor_to(b, dst, agx_src_index(&instr->src[0]),
1566*61046927SAndroid Build Coastguard Worker                                      agx_immediate(1));
1567*61046927SAndroid Build Coastguard Worker    }
1568*61046927SAndroid Build Coastguard Worker 
1569*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_quad_swap_vertical: {
1570*61046927SAndroid Build Coastguard Worker       return agx_quad_shuffle_xor_to(b, dst, agx_src_index(&instr->src[0]),
1571*61046927SAndroid Build Coastguard Worker                                      agx_immediate(2));
1572*61046927SAndroid Build Coastguard Worker    }
1573*61046927SAndroid Build Coastguard Worker 
1574*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_quad_swap_diagonal: {
1575*61046927SAndroid Build Coastguard Worker       return agx_quad_shuffle_xor_to(b, dst, agx_src_index(&instr->src[0]),
1576*61046927SAndroid Build Coastguard Worker                                      agx_immediate(3));
1577*61046927SAndroid Build Coastguard Worker    }
1578*61046927SAndroid Build Coastguard Worker 
1579*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_ballot: {
1580*61046927SAndroid Build Coastguard Worker       return agx_ballot_to(b, dst, agx_src_index(&instr->src[0]));
1581*61046927SAndroid Build Coastguard Worker    }
1582*61046927SAndroid Build Coastguard Worker 
1583*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_quad_ballot_agx: {
1584*61046927SAndroid Build Coastguard Worker       return agx_quad_ballot_to(b, dst, agx_src_index(&instr->src[0]));
1585*61046927SAndroid Build Coastguard Worker    }
1586*61046927SAndroid Build Coastguard Worker 
1587*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_doorbell_agx: {
1588*61046927SAndroid Build Coastguard Worker       return agx_doorbell(b, nir_src_as_uint(instr->src[0]));
1589*61046927SAndroid Build Coastguard Worker    }
1590*61046927SAndroid Build Coastguard Worker 
1591*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_stack_map_agx: {
1592*61046927SAndroid Build Coastguard Worker       return agx_stack_map(b, agx_src_index(&instr->src[1]),
1593*61046927SAndroid Build Coastguard Worker                            nir_src_as_uint(instr->src[0]));
1594*61046927SAndroid Build Coastguard Worker    }
1595*61046927SAndroid Build Coastguard Worker 
1596*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_stack_unmap_agx: {
1597*61046927SAndroid Build Coastguard Worker       return agx_stack_unmap_to(b, dst, nir_src_as_uint(instr->src[0]));
1598*61046927SAndroid Build Coastguard Worker    }
1599*61046927SAndroid Build Coastguard Worker 
1600*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_scratch:
1601*61046927SAndroid Build Coastguard Worker       agx_emit_load_scratch(b, dst, instr);
1602*61046927SAndroid Build Coastguard Worker       return NULL;
1603*61046927SAndroid Build Coastguard Worker 
1604*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_store_scratch:
1605*61046927SAndroid Build Coastguard Worker       agx_emit_store_scratch(b, instr);
1606*61046927SAndroid Build Coastguard Worker       return NULL;
1607*61046927SAndroid Build Coastguard Worker 
1608*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_core_id_agx:
1609*61046927SAndroid Build Coastguard Worker       return agx_get_sr_to(b, dst, AGX_SR_CORE_ID);
1610*61046927SAndroid Build Coastguard Worker 
1611*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_helper_op_id_agx:
1612*61046927SAndroid Build Coastguard Worker       assert(b->shader->key->is_helper);
1613*61046927SAndroid Build Coastguard Worker       return agx_get_sr_barrier_to(b, dst, AGX_SR_HELPER_OP);
1614*61046927SAndroid Build Coastguard Worker 
1615*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_helper_arg_lo_agx:
1616*61046927SAndroid Build Coastguard Worker       assert(b->shader->key->is_helper);
1617*61046927SAndroid Build Coastguard Worker       return agx_get_sr_barrier_to(b, dst, AGX_SR_HELPER_ARG_L);
1618*61046927SAndroid Build Coastguard Worker 
1619*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_helper_arg_hi_agx:
1620*61046927SAndroid Build Coastguard Worker       assert(b->shader->key->is_helper);
1621*61046927SAndroid Build Coastguard Worker       return agx_get_sr_barrier_to(b, dst, AGX_SR_HELPER_ARG_H);
1622*61046927SAndroid Build Coastguard Worker 
1623*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_exported_agx:
1624*61046927SAndroid Build Coastguard Worker       return agx_load_exported_to(b, dst, nir_intrinsic_base(instr),
1625*61046927SAndroid Build Coastguard Worker                                   instr->def.num_components);
1626*61046927SAndroid Build Coastguard Worker 
1627*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_export_agx:
1628*61046927SAndroid Build Coastguard Worker       return agx_emit_export(b, nir_intrinsic_base(instr), instr->src[0]);
1629*61046927SAndroid Build Coastguard Worker 
1630*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_barycentric_sample:
1631*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_sample_id:
1632*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_sample_pos:
1633*61046927SAndroid Build Coastguard Worker       unreachable("Sample shading should have been lowered");
1634*61046927SAndroid Build Coastguard Worker 
1635*61046927SAndroid Build Coastguard Worker    default:
1636*61046927SAndroid Build Coastguard Worker       fprintf(stderr, "Unhandled intrinsic %s\n",
1637*61046927SAndroid Build Coastguard Worker               nir_intrinsic_infos[instr->intrinsic].name);
1638*61046927SAndroid Build Coastguard Worker       unreachable("Unhandled intrinsic");
1639*61046927SAndroid Build Coastguard Worker    }
1640*61046927SAndroid Build Coastguard Worker }
1641*61046927SAndroid Build Coastguard Worker 
1642*61046927SAndroid Build Coastguard Worker static agx_index
agx_alu_src_index(agx_builder * b,nir_alu_src src)1643*61046927SAndroid Build Coastguard Worker agx_alu_src_index(agx_builder *b, nir_alu_src src)
1644*61046927SAndroid Build Coastguard Worker {
1645*61046927SAndroid Build Coastguard Worker    /* Check well-formedness of the input NIR */
1646*61046927SAndroid Build Coastguard Worker    ASSERTED unsigned bitsize = nir_src_bit_size(src.src);
1647*61046927SAndroid Build Coastguard Worker    unsigned comps = nir_src_num_components(src.src);
1648*61046927SAndroid Build Coastguard Worker    unsigned channel = src.swizzle[0];
1649*61046927SAndroid Build Coastguard Worker 
1650*61046927SAndroid Build Coastguard Worker    assert(bitsize == 1 || bitsize == 8 || bitsize == 16 || bitsize == 32 ||
1651*61046927SAndroid Build Coastguard Worker           bitsize == 64);
1652*61046927SAndroid Build Coastguard Worker    assert(channel < comps);
1653*61046927SAndroid Build Coastguard Worker 
1654*61046927SAndroid Build Coastguard Worker    return agx_extract_nir_src(b, src.src, channel);
1655*61046927SAndroid Build Coastguard Worker }
1656*61046927SAndroid Build Coastguard Worker 
1657*61046927SAndroid Build Coastguard Worker /*
1658*61046927SAndroid Build Coastguard Worker  * Emit an instruction translating (s0 * s1) + (s2 << s3). Assuming s3 is
1659*61046927SAndroid Build Coastguard Worker  * constant, this is an imad instruction. If s1 == 1, then this is optimized to
1660*61046927SAndroid Build Coastguard Worker  * an iadd instruction, which is faster.
1661*61046927SAndroid Build Coastguard Worker  */
1662*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_emit_imadshl_agx(agx_builder * b,nir_alu_instr * alu,agx_index dst,agx_index s0,agx_index s1,agx_index s2,agx_index s3)1663*61046927SAndroid Build Coastguard Worker agx_emit_imadshl_agx(agx_builder *b, nir_alu_instr *alu, agx_index dst,
1664*61046927SAndroid Build Coastguard Worker                      agx_index s0, agx_index s1, agx_index s2, agx_index s3)
1665*61046927SAndroid Build Coastguard Worker {
1666*61046927SAndroid Build Coastguard Worker    /* If the shift is not constant, use a variable shift. This should never
1667*61046927SAndroid Build Coastguard Worker     * happen in practice but we don't want to constrain the NIR.
1668*61046927SAndroid Build Coastguard Worker     */
1669*61046927SAndroid Build Coastguard Worker    unsigned shift;
1670*61046927SAndroid Build Coastguard Worker    if (!nir_src_is_const(alu->src[3].src)) {
1671*61046927SAndroid Build Coastguard Worker       s2 = agx_bfi(b, agx_immediate(0), s2, s3, 0);
1672*61046927SAndroid Build Coastguard Worker       shift = 0;
1673*61046927SAndroid Build Coastguard Worker    } else {
1674*61046927SAndroid Build Coastguard Worker       shift = nir_alu_src_as_uint(alu->src[3]);
1675*61046927SAndroid Build Coastguard Worker    }
1676*61046927SAndroid Build Coastguard Worker 
1677*61046927SAndroid Build Coastguard Worker    assert(shift <= 4 && "domain restriction on the input NIR");
1678*61046927SAndroid Build Coastguard Worker 
1679*61046927SAndroid Build Coastguard Worker    /* Emit iadd if possible, else imad */
1680*61046927SAndroid Build Coastguard Worker    if (nir_src_is_const(alu->src[1].src) &&
1681*61046927SAndroid Build Coastguard Worker        nir_alu_src_as_uint(alu->src[1]) == 1) {
1682*61046927SAndroid Build Coastguard Worker 
1683*61046927SAndroid Build Coastguard Worker       return agx_iadd_to(b, dst, s0, s2, shift);
1684*61046927SAndroid Build Coastguard Worker    } else {
1685*61046927SAndroid Build Coastguard Worker       return agx_imad_to(b, dst, s0, s1, s2, shift);
1686*61046927SAndroid Build Coastguard Worker    }
1687*61046927SAndroid Build Coastguard Worker }
1688*61046927SAndroid Build Coastguard Worker 
1689*61046927SAndroid Build Coastguard Worker static bool
is_conversion_to_8bit(nir_op op)1690*61046927SAndroid Build Coastguard Worker is_conversion_to_8bit(nir_op op)
1691*61046927SAndroid Build Coastguard Worker {
1692*61046927SAndroid Build Coastguard Worker    switch (op) {
1693*61046927SAndroid Build Coastguard Worker    case nir_op_i2i8:
1694*61046927SAndroid Build Coastguard Worker    case nir_op_u2u8:
1695*61046927SAndroid Build Coastguard Worker    case nir_op_f2i8:
1696*61046927SAndroid Build Coastguard Worker    case nir_op_f2u8:
1697*61046927SAndroid Build Coastguard Worker    case nir_op_b2i8:
1698*61046927SAndroid Build Coastguard Worker       return true;
1699*61046927SAndroid Build Coastguard Worker    default:
1700*61046927SAndroid Build Coastguard Worker       return false;
1701*61046927SAndroid Build Coastguard Worker    }
1702*61046927SAndroid Build Coastguard Worker }
1703*61046927SAndroid Build Coastguard Worker 
1704*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_fminmax_to(agx_builder * b,agx_index dst,agx_index s0,agx_index s1,nir_alu_instr * alu)1705*61046927SAndroid Build Coastguard Worker agx_fminmax_to(agx_builder *b, agx_index dst, agx_index s0, agx_index s1,
1706*61046927SAndroid Build Coastguard Worker                nir_alu_instr *alu)
1707*61046927SAndroid Build Coastguard Worker {
1708*61046927SAndroid Build Coastguard Worker    /* The hardware gtn/ltn modes are unfortunately incorrect for signed zeros */
1709*61046927SAndroid Build Coastguard Worker    assert(!nir_alu_instr_is_signed_zero_preserve(alu) &&
1710*61046927SAndroid Build Coastguard Worker           "should've been lowered");
1711*61046927SAndroid Build Coastguard Worker 
1712*61046927SAndroid Build Coastguard Worker    bool fmax = alu->op == nir_op_fmax;
1713*61046927SAndroid Build Coastguard Worker    enum agx_fcond fcond = fmax ? AGX_FCOND_GTN : AGX_FCOND_LTN;
1714*61046927SAndroid Build Coastguard Worker 
1715*61046927SAndroid Build Coastguard Worker    /* Calculate min/max with the appropriate hardware instruction */
1716*61046927SAndroid Build Coastguard Worker    agx_index tmp = agx_fcmpsel(b, s0, s1, s0, s1, fcond);
1717*61046927SAndroid Build Coastguard Worker 
1718*61046927SAndroid Build Coastguard Worker    /* G13 flushes fp32 denorms and preserves fp16 denorms. Since cmpsel
1719*61046927SAndroid Build Coastguard Worker     * preserves denorms, we need to canonicalize for fp32. Canonicalizing fp16
1720*61046927SAndroid Build Coastguard Worker     * would be harmless but wastes an instruction.
1721*61046927SAndroid Build Coastguard Worker     */
1722*61046927SAndroid Build Coastguard Worker    if (alu->def.bit_size == 32)
1723*61046927SAndroid Build Coastguard Worker       return agx_fadd_to(b, dst, tmp, agx_negzero());
1724*61046927SAndroid Build Coastguard Worker    else
1725*61046927SAndroid Build Coastguard Worker       return agx_mov_to(b, dst, tmp);
1726*61046927SAndroid Build Coastguard Worker }
1727*61046927SAndroid Build Coastguard Worker 
1728*61046927SAndroid Build Coastguard Worker static agx_instr *
agx_emit_alu(agx_builder * b,nir_alu_instr * instr)1729*61046927SAndroid Build Coastguard Worker agx_emit_alu(agx_builder *b, nir_alu_instr *instr)
1730*61046927SAndroid Build Coastguard Worker {
1731*61046927SAndroid Build Coastguard Worker    unsigned srcs = nir_op_infos[instr->op].num_inputs;
1732*61046927SAndroid Build Coastguard Worker    unsigned sz = instr->def.bit_size;
1733*61046927SAndroid Build Coastguard Worker    unsigned src_sz = srcs ? nir_src_bit_size(instr->src[0].src) : 0;
1734*61046927SAndroid Build Coastguard Worker    ASSERTED unsigned comps = instr->def.num_components;
1735*61046927SAndroid Build Coastguard Worker 
1736*61046927SAndroid Build Coastguard Worker    assert(comps == 1 || nir_op_is_vec_or_mov(instr->op));
1737*61046927SAndroid Build Coastguard Worker    assert(sz == 1 ||
1738*61046927SAndroid Build Coastguard Worker           ((nir_op_is_vec_or_mov(instr->op) ||
1739*61046927SAndroid Build Coastguard Worker             is_conversion_to_8bit(instr->op) || instr->op == nir_op_bcsel) &&
1740*61046927SAndroid Build Coastguard Worker            sz == 8) ||
1741*61046927SAndroid Build Coastguard Worker           sz == 16 || sz == 32 || sz == 64);
1742*61046927SAndroid Build Coastguard Worker 
1743*61046927SAndroid Build Coastguard Worker    agx_index dst = agx_def_index(&instr->def);
1744*61046927SAndroid Build Coastguard Worker    agx_index s0 = srcs > 0 ? agx_alu_src_index(b, instr->src[0]) : agx_null();
1745*61046927SAndroid Build Coastguard Worker    agx_index s1 = srcs > 1 ? agx_alu_src_index(b, instr->src[1]) : agx_null();
1746*61046927SAndroid Build Coastguard Worker    agx_index s2 = srcs > 2 ? agx_alu_src_index(b, instr->src[2]) : agx_null();
1747*61046927SAndroid Build Coastguard Worker    agx_index s3 = srcs > 3 ? agx_alu_src_index(b, instr->src[3]) : agx_null();
1748*61046927SAndroid Build Coastguard Worker 
1749*61046927SAndroid Build Coastguard Worker    agx_index i0 = agx_immediate(0);
1750*61046927SAndroid Build Coastguard Worker    agx_index i1 = agx_immediate(1);
1751*61046927SAndroid Build Coastguard Worker 
1752*61046927SAndroid Build Coastguard Worker #define UNOP(nop, aop)                                                         \
1753*61046927SAndroid Build Coastguard Worker    case nir_op_##nop:                                                          \
1754*61046927SAndroid Build Coastguard Worker       return agx_##aop##_to(b, dst, s0);
1755*61046927SAndroid Build Coastguard Worker #define BINOP(nop, aop)                                                        \
1756*61046927SAndroid Build Coastguard Worker    case nir_op_##nop:                                                          \
1757*61046927SAndroid Build Coastguard Worker       return agx_##aop##_to(b, dst, s0, s1);
1758*61046927SAndroid Build Coastguard Worker #define TRIOP(nop, aop)                                                        \
1759*61046927SAndroid Build Coastguard Worker    case nir_op_##nop:                                                          \
1760*61046927SAndroid Build Coastguard Worker       return agx_##aop##_to(b, dst, s0, s1, s2);
1761*61046927SAndroid Build Coastguard Worker 
1762*61046927SAndroid Build Coastguard Worker    switch (instr->op) {
1763*61046927SAndroid Build Coastguard Worker       BINOP(fadd, fadd);
1764*61046927SAndroid Build Coastguard Worker       BINOP(fmul, fmul);
1765*61046927SAndroid Build Coastguard Worker       TRIOP(ffma, fma);
1766*61046927SAndroid Build Coastguard Worker 
1767*61046927SAndroid Build Coastguard Worker       UNOP(f2f16, fmov);
1768*61046927SAndroid Build Coastguard Worker       UNOP(f2f16_rtne, fmov);
1769*61046927SAndroid Build Coastguard Worker       UNOP(f2f32, fmov);
1770*61046927SAndroid Build Coastguard Worker       UNOP(fround_even, roundeven);
1771*61046927SAndroid Build Coastguard Worker       UNOP(ftrunc, trunc);
1772*61046927SAndroid Build Coastguard Worker       UNOP(ffloor, floor);
1773*61046927SAndroid Build Coastguard Worker       UNOP(fceil, ceil);
1774*61046927SAndroid Build Coastguard Worker       UNOP(frcp, rcp);
1775*61046927SAndroid Build Coastguard Worker       UNOP(frsq, rsqrt);
1776*61046927SAndroid Build Coastguard Worker       UNOP(flog2, log2);
1777*61046927SAndroid Build Coastguard Worker       UNOP(fexp2, exp2);
1778*61046927SAndroid Build Coastguard Worker 
1779*61046927SAndroid Build Coastguard Worker       UNOP(mov, mov);
1780*61046927SAndroid Build Coastguard Worker       UNOP(u2u32, mov);
1781*61046927SAndroid Build Coastguard Worker       UNOP(bitfield_reverse, bitrev);
1782*61046927SAndroid Build Coastguard Worker       UNOP(bit_count, popcount);
1783*61046927SAndroid Build Coastguard Worker       UNOP(ufind_msb, ffs);
1784*61046927SAndroid Build Coastguard Worker       BINOP(iand, and);
1785*61046927SAndroid Build Coastguard Worker       BINOP(ior, or);
1786*61046927SAndroid Build Coastguard Worker       BINOP(ixor, xor);
1787*61046927SAndroid Build Coastguard Worker       BINOP(interleave_agx, intl);
1788*61046927SAndroid Build Coastguard Worker 
1789*61046927SAndroid Build Coastguard Worker    case nir_op_feq:
1790*61046927SAndroid Build Coastguard Worker       return agx_fcmp_to(b, dst, s0, s1, AGX_FCOND_EQ, false);
1791*61046927SAndroid Build Coastguard Worker    case nir_op_flt:
1792*61046927SAndroid Build Coastguard Worker       return agx_fcmp_to(b, dst, s0, s1, AGX_FCOND_LT, false);
1793*61046927SAndroid Build Coastguard Worker    case nir_op_fge:
1794*61046927SAndroid Build Coastguard Worker       return agx_fcmp_to(b, dst, s0, s1, AGX_FCOND_GE, false);
1795*61046927SAndroid Build Coastguard Worker    case nir_op_fneu:
1796*61046927SAndroid Build Coastguard Worker       return agx_fcmp_to(b, dst, s0, s1, AGX_FCOND_EQ, true);
1797*61046927SAndroid Build Coastguard Worker 
1798*61046927SAndroid Build Coastguard Worker    case nir_op_ieq:
1799*61046927SAndroid Build Coastguard Worker       return agx_icmp_to(b, dst, s0, s1, AGX_ICOND_UEQ, false);
1800*61046927SAndroid Build Coastguard Worker    case nir_op_ine:
1801*61046927SAndroid Build Coastguard Worker       return agx_icmp_to(b, dst, s0, s1, AGX_ICOND_UEQ, true);
1802*61046927SAndroid Build Coastguard Worker    case nir_op_ilt:
1803*61046927SAndroid Build Coastguard Worker       return agx_icmp_to(b, dst, s0, s1, AGX_ICOND_SLT, false);
1804*61046927SAndroid Build Coastguard Worker    case nir_op_ige:
1805*61046927SAndroid Build Coastguard Worker       return agx_icmp_to(b, dst, s0, s1, AGX_ICOND_SLT, true);
1806*61046927SAndroid Build Coastguard Worker    case nir_op_ult:
1807*61046927SAndroid Build Coastguard Worker       return agx_icmp_to(b, dst, s0, s1, AGX_ICOND_ULT, false);
1808*61046927SAndroid Build Coastguard Worker    case nir_op_uge:
1809*61046927SAndroid Build Coastguard Worker       return agx_icmp_to(b, dst, s0, s1, AGX_ICOND_ULT, true);
1810*61046927SAndroid Build Coastguard Worker 
1811*61046927SAndroid Build Coastguard Worker    case nir_op_inot:
1812*61046927SAndroid Build Coastguard Worker       if (sz == 1)
1813*61046927SAndroid Build Coastguard Worker          return agx_xor_to(b, dst, s0, i1);
1814*61046927SAndroid Build Coastguard Worker       else
1815*61046927SAndroid Build Coastguard Worker          return agx_not_to(b, dst, s0);
1816*61046927SAndroid Build Coastguard Worker 
1817*61046927SAndroid Build Coastguard Worker    case nir_op_b2b1:
1818*61046927SAndroid Build Coastguard Worker       return agx_icmp_to(b, dst, s0, i0, AGX_ICOND_UEQ, true);
1819*61046927SAndroid Build Coastguard Worker 
1820*61046927SAndroid Build Coastguard Worker    case nir_op_fsqrt:
1821*61046927SAndroid Build Coastguard Worker       return agx_fmul_to(b, dst, s0, agx_srsqrt(b, s0));
1822*61046927SAndroid Build Coastguard Worker    case nir_op_fabs:
1823*61046927SAndroid Build Coastguard Worker       return agx_fmov_to(b, dst, agx_abs(s0));
1824*61046927SAndroid Build Coastguard Worker    case nir_op_fneg:
1825*61046927SAndroid Build Coastguard Worker       return agx_fmov_to(b, dst, agx_neg(s0));
1826*61046927SAndroid Build Coastguard Worker 
1827*61046927SAndroid Build Coastguard Worker    case nir_op_fmin:
1828*61046927SAndroid Build Coastguard Worker    case nir_op_fmax:
1829*61046927SAndroid Build Coastguard Worker       return agx_fminmax_to(b, dst, s0, s1, instr);
1830*61046927SAndroid Build Coastguard Worker 
1831*61046927SAndroid Build Coastguard Worker    case nir_op_imin:
1832*61046927SAndroid Build Coastguard Worker       return agx_icmpsel_to(b, dst, s0, s1, s0, s1, AGX_ICOND_SLT);
1833*61046927SAndroid Build Coastguard Worker    case nir_op_imax:
1834*61046927SAndroid Build Coastguard Worker       return agx_icmpsel_to(b, dst, s0, s1, s0, s1, AGX_ICOND_SGT);
1835*61046927SAndroid Build Coastguard Worker    case nir_op_umin:
1836*61046927SAndroid Build Coastguard Worker       return agx_icmpsel_to(b, dst, s0, s1, s0, s1, AGX_ICOND_ULT);
1837*61046927SAndroid Build Coastguard Worker    case nir_op_umax:
1838*61046927SAndroid Build Coastguard Worker       return agx_icmpsel_to(b, dst, s0, s1, s0, s1, AGX_ICOND_UGT);
1839*61046927SAndroid Build Coastguard Worker 
1840*61046927SAndroid Build Coastguard Worker    case nir_op_iadd:
1841*61046927SAndroid Build Coastguard Worker       return agx_iadd_to(b, dst, s0, s1, 0);
1842*61046927SAndroid Build Coastguard Worker    case nir_op_imadshl_agx:
1843*61046927SAndroid Build Coastguard Worker       return agx_emit_imadshl_agx(b, instr, dst, s0, s1, s2, s3);
1844*61046927SAndroid Build Coastguard Worker    case nir_op_imsubshl_agx:
1845*61046927SAndroid Build Coastguard Worker       return agx_emit_imadshl_agx(b, instr, dst, s0, s1, agx_neg(s2), s3);
1846*61046927SAndroid Build Coastguard Worker    case nir_op_isub:
1847*61046927SAndroid Build Coastguard Worker       return agx_iadd_to(b, dst, s0, agx_neg(s1), 0);
1848*61046927SAndroid Build Coastguard Worker    case nir_op_ineg:
1849*61046927SAndroid Build Coastguard Worker       return agx_iadd_to(b, dst, i0, agx_neg(s0), 0);
1850*61046927SAndroid Build Coastguard Worker    case nir_op_imul:
1851*61046927SAndroid Build Coastguard Worker       return agx_imad_to(b, dst, s0, s1, i0, 0);
1852*61046927SAndroid Build Coastguard Worker    case nir_op_umul_2x32_64:
1853*61046927SAndroid Build Coastguard Worker       return agx_imad_to(b, dst, agx_abs(s0), agx_abs(s1), i0, 0);
1854*61046927SAndroid Build Coastguard Worker    case nir_op_imul_2x32_64:
1855*61046927SAndroid Build Coastguard Worker       return agx_imad_to(b, dst, s0, s1, i0, 0);
1856*61046927SAndroid Build Coastguard Worker    case nir_op_umul_high:
1857*61046927SAndroid Build Coastguard Worker       return agx_mul_high_to(b, dst, s0, s1, false);
1858*61046927SAndroid Build Coastguard Worker    case nir_op_imul_high:
1859*61046927SAndroid Build Coastguard Worker       return agx_mul_high_to(b, dst, s0, s1, true);
1860*61046927SAndroid Build Coastguard Worker 
1861*61046927SAndroid Build Coastguard Worker    case nir_op_ishl:
1862*61046927SAndroid Build Coastguard Worker       return agx_bfi_to(b, dst, i0, s0, s1, 0);
1863*61046927SAndroid Build Coastguard Worker    case nir_op_ushr:
1864*61046927SAndroid Build Coastguard Worker       return agx_ushr_to(b, dst, s0, s1);
1865*61046927SAndroid Build Coastguard Worker    case nir_op_ishr:
1866*61046927SAndroid Build Coastguard Worker       return agx_asr_to(b, dst, s0, s1);
1867*61046927SAndroid Build Coastguard Worker 
1868*61046927SAndroid Build Coastguard Worker    case nir_op_extr_agx:
1869*61046927SAndroid Build Coastguard Worker       return agx_extr_to(b, dst, s0, s1, s2,
1870*61046927SAndroid Build Coastguard Worker                          nir_alu_src_as_uint(instr->src[3]));
1871*61046927SAndroid Build Coastguard Worker 
1872*61046927SAndroid Build Coastguard Worker    case nir_op_ubitfield_extract: {
1873*61046927SAndroid Build Coastguard Worker       unsigned m = nir_alu_src_as_uint(instr->src[2]);
1874*61046927SAndroid Build Coastguard Worker       assert(m != 0 && "should've been optimized");
1875*61046927SAndroid Build Coastguard Worker 
1876*61046927SAndroid Build Coastguard Worker       /* Disable masking if the whole thing is used */
1877*61046927SAndroid Build Coastguard Worker       if (m >= 32)
1878*61046927SAndroid Build Coastguard Worker          m = 0;
1879*61046927SAndroid Build Coastguard Worker 
1880*61046927SAndroid Build Coastguard Worker       return agx_bfeil_to(b, dst, i0, s0, s1, m);
1881*61046927SAndroid Build Coastguard Worker    }
1882*61046927SAndroid Build Coastguard Worker 
1883*61046927SAndroid Build Coastguard Worker    case nir_op_bcsel:
1884*61046927SAndroid Build Coastguard Worker       return agx_icmpsel_to(b, dst, s0, i0, s2, s1, AGX_ICOND_UEQ);
1885*61046927SAndroid Build Coastguard Worker 
1886*61046927SAndroid Build Coastguard Worker    case nir_op_i2i32: {
1887*61046927SAndroid Build Coastguard Worker       if (src_sz == 8) {
1888*61046927SAndroid Build Coastguard Worker          /* Sign extend in software, NIR likes 8-bit conversions */
1889*61046927SAndroid Build Coastguard Worker          agx_index ishl16 = agx_bfi(b, i0, s0, agx_immediate(8), 0);
1890*61046927SAndroid Build Coastguard Worker          return agx_asr_to(b, dst, ishl16, agx_immediate(8));
1891*61046927SAndroid Build Coastguard Worker       } else {
1892*61046927SAndroid Build Coastguard Worker          assert(s0.size == AGX_SIZE_16 && "other conversions lowered");
1893*61046927SAndroid Build Coastguard Worker          return agx_iadd_to(b, dst, s0, i0, 0);
1894*61046927SAndroid Build Coastguard Worker       }
1895*61046927SAndroid Build Coastguard Worker    }
1896*61046927SAndroid Build Coastguard Worker 
1897*61046927SAndroid Build Coastguard Worker    case nir_op_i2i16: {
1898*61046927SAndroid Build Coastguard Worker       if (src_sz == 8) {
1899*61046927SAndroid Build Coastguard Worker          /* Sign extend in software, NIR likes 8-bit conversions */
1900*61046927SAndroid Build Coastguard Worker          agx_index ishl16 = agx_bfi(b, i0, s0, agx_immediate(8), 0);
1901*61046927SAndroid Build Coastguard Worker          return agx_asr_to(b, dst, ishl16, agx_immediate(8));
1902*61046927SAndroid Build Coastguard Worker       } else {
1903*61046927SAndroid Build Coastguard Worker          assert(s0.size == AGX_SIZE_32 && "other conversions lowered");
1904*61046927SAndroid Build Coastguard Worker          return agx_subdivide_to(b, dst, s0, 0);
1905*61046927SAndroid Build Coastguard Worker       }
1906*61046927SAndroid Build Coastguard Worker    }
1907*61046927SAndroid Build Coastguard Worker 
1908*61046927SAndroid Build Coastguard Worker    case nir_op_u2u16: {
1909*61046927SAndroid Build Coastguard Worker       if (s0.size == AGX_SIZE_32)
1910*61046927SAndroid Build Coastguard Worker          return agx_subdivide_to(b, dst, s0, 0);
1911*61046927SAndroid Build Coastguard Worker       else
1912*61046927SAndroid Build Coastguard Worker          return agx_mov_to(b, dst, s0);
1913*61046927SAndroid Build Coastguard Worker    }
1914*61046927SAndroid Build Coastguard Worker 
1915*61046927SAndroid Build Coastguard Worker    /* It will be put into a 16-bit register, but zero out the garbage. We could
1916*61046927SAndroid Build Coastguard Worker     * optimize this in the future but it ensures correctness for u2u16(u2u8(x))
1917*61046927SAndroid Build Coastguard Worker     * sequences.
1918*61046927SAndroid Build Coastguard Worker     */
1919*61046927SAndroid Build Coastguard Worker    case nir_op_u2u8:
1920*61046927SAndroid Build Coastguard Worker    case nir_op_i2i8:
1921*61046927SAndroid Build Coastguard Worker       return agx_and_to(b, dst, s0, agx_immediate(0xFF));
1922*61046927SAndroid Build Coastguard Worker 
1923*61046927SAndroid Build Coastguard Worker    case nir_op_iadd_sat: {
1924*61046927SAndroid Build Coastguard Worker       agx_instr *I = agx_iadd_to(b, dst, s0, s1, 0);
1925*61046927SAndroid Build Coastguard Worker       I->saturate = true;
1926*61046927SAndroid Build Coastguard Worker       return I;
1927*61046927SAndroid Build Coastguard Worker    }
1928*61046927SAndroid Build Coastguard Worker 
1929*61046927SAndroid Build Coastguard Worker    case nir_op_isub_sat: {
1930*61046927SAndroid Build Coastguard Worker       agx_instr *I = agx_iadd_to(b, dst, s0, agx_neg(s1), 0);
1931*61046927SAndroid Build Coastguard Worker       I->saturate = true;
1932*61046927SAndroid Build Coastguard Worker       return I;
1933*61046927SAndroid Build Coastguard Worker    }
1934*61046927SAndroid Build Coastguard Worker 
1935*61046927SAndroid Build Coastguard Worker    case nir_op_uadd_sat: {
1936*61046927SAndroid Build Coastguard Worker       agx_instr *I = agx_iadd_to(b, dst, agx_abs(s0), agx_abs(s1), 0);
1937*61046927SAndroid Build Coastguard Worker       I->saturate = true;
1938*61046927SAndroid Build Coastguard Worker       return I;
1939*61046927SAndroid Build Coastguard Worker    }
1940*61046927SAndroid Build Coastguard Worker 
1941*61046927SAndroid Build Coastguard Worker    case nir_op_usub_sat: {
1942*61046927SAndroid Build Coastguard Worker       agx_instr *I = agx_iadd_to(b, dst, agx_abs(s0), agx_neg(agx_abs(s1)), 0);
1943*61046927SAndroid Build Coastguard Worker       I->saturate = true;
1944*61046927SAndroid Build Coastguard Worker       return I;
1945*61046927SAndroid Build Coastguard Worker    }
1946*61046927SAndroid Build Coastguard Worker 
1947*61046927SAndroid Build Coastguard Worker    case nir_op_fsat: {
1948*61046927SAndroid Build Coastguard Worker       agx_instr *I = agx_fadd_to(b, dst, s0, agx_negzero());
1949*61046927SAndroid Build Coastguard Worker       I->saturate = true;
1950*61046927SAndroid Build Coastguard Worker       return I;
1951*61046927SAndroid Build Coastguard Worker    }
1952*61046927SAndroid Build Coastguard Worker 
1953*61046927SAndroid Build Coastguard Worker    case nir_op_fsin_agx: {
1954*61046927SAndroid Build Coastguard Worker       agx_index fixup = agx_sin_pt_1(b, s0);
1955*61046927SAndroid Build Coastguard Worker       agx_index sinc = agx_sin_pt_2(b, fixup);
1956*61046927SAndroid Build Coastguard Worker       return agx_fmul_to(b, dst, sinc, fixup);
1957*61046927SAndroid Build Coastguard Worker    }
1958*61046927SAndroid Build Coastguard Worker 
1959*61046927SAndroid Build Coastguard Worker    case nir_op_f2i16:
1960*61046927SAndroid Build Coastguard Worker       return agx_convert_to(b, dst, agx_immediate(AGX_CONVERT_F_TO_S16), s0,
1961*61046927SAndroid Build Coastguard Worker                             AGX_ROUND_RTZ);
1962*61046927SAndroid Build Coastguard Worker 
1963*61046927SAndroid Build Coastguard Worker    case nir_op_f2i32:
1964*61046927SAndroid Build Coastguard Worker       return agx_convert_to(b, dst, agx_immediate(AGX_CONVERT_F_TO_S32), s0,
1965*61046927SAndroid Build Coastguard Worker                             AGX_ROUND_RTZ);
1966*61046927SAndroid Build Coastguard Worker 
1967*61046927SAndroid Build Coastguard Worker    case nir_op_f2u16:
1968*61046927SAndroid Build Coastguard Worker       return agx_convert_to(b, dst, agx_immediate(AGX_CONVERT_F_TO_U16), s0,
1969*61046927SAndroid Build Coastguard Worker                             AGX_ROUND_RTZ);
1970*61046927SAndroid Build Coastguard Worker 
1971*61046927SAndroid Build Coastguard Worker    case nir_op_f2u32:
1972*61046927SAndroid Build Coastguard Worker       return agx_convert_to(b, dst, agx_immediate(AGX_CONVERT_F_TO_U32), s0,
1973*61046927SAndroid Build Coastguard Worker                             AGX_ROUND_RTZ);
1974*61046927SAndroid Build Coastguard Worker 
1975*61046927SAndroid Build Coastguard Worker    case nir_op_u2f16:
1976*61046927SAndroid Build Coastguard Worker    case nir_op_u2f32: {
1977*61046927SAndroid Build Coastguard Worker       if (src_sz == 64)
1978*61046927SAndroid Build Coastguard Worker          unreachable("64-bit conversions unimplemented");
1979*61046927SAndroid Build Coastguard Worker 
1980*61046927SAndroid Build Coastguard Worker       enum agx_convert mode = (src_sz == 32)   ? AGX_CONVERT_U32_TO_F
1981*61046927SAndroid Build Coastguard Worker                               : (src_sz == 16) ? AGX_CONVERT_U16_TO_F
1982*61046927SAndroid Build Coastguard Worker                                                : AGX_CONVERT_U8_TO_F;
1983*61046927SAndroid Build Coastguard Worker 
1984*61046927SAndroid Build Coastguard Worker       return agx_convert_to(b, dst, agx_immediate(mode), s0, AGX_ROUND_RTE);
1985*61046927SAndroid Build Coastguard Worker    }
1986*61046927SAndroid Build Coastguard Worker 
1987*61046927SAndroid Build Coastguard Worker    case nir_op_i2f16:
1988*61046927SAndroid Build Coastguard Worker    case nir_op_i2f32: {
1989*61046927SAndroid Build Coastguard Worker       if (src_sz == 64)
1990*61046927SAndroid Build Coastguard Worker          unreachable("64-bit conversions unimplemented");
1991*61046927SAndroid Build Coastguard Worker 
1992*61046927SAndroid Build Coastguard Worker       enum agx_convert mode = (src_sz == 32)   ? AGX_CONVERT_S32_TO_F
1993*61046927SAndroid Build Coastguard Worker                               : (src_sz == 16) ? AGX_CONVERT_S16_TO_F
1994*61046927SAndroid Build Coastguard Worker                                                : AGX_CONVERT_S8_TO_F;
1995*61046927SAndroid Build Coastguard Worker 
1996*61046927SAndroid Build Coastguard Worker       return agx_convert_to(b, dst, agx_immediate(mode), s0, AGX_ROUND_RTE);
1997*61046927SAndroid Build Coastguard Worker    }
1998*61046927SAndroid Build Coastguard Worker 
1999*61046927SAndroid Build Coastguard Worker    case nir_op_pack_32_2x16_split:
2000*61046927SAndroid Build Coastguard Worker    case nir_op_pack_64_2x32_split: {
2001*61046927SAndroid Build Coastguard Worker       agx_index idx[] = {s0, s1};
2002*61046927SAndroid Build Coastguard Worker       return agx_emit_collect_to(b, dst, 2, idx);
2003*61046927SAndroid Build Coastguard Worker    }
2004*61046927SAndroid Build Coastguard Worker 
2005*61046927SAndroid Build Coastguard Worker    case nir_op_unpack_64_2x32_split_x:
2006*61046927SAndroid Build Coastguard Worker    case nir_op_unpack_32_2x16_split_x:
2007*61046927SAndroid Build Coastguard Worker       return agx_subdivide_to(b, dst, s0, 0);
2008*61046927SAndroid Build Coastguard Worker 
2009*61046927SAndroid Build Coastguard Worker    case nir_op_unpack_64_2x32_split_y:
2010*61046927SAndroid Build Coastguard Worker    case nir_op_unpack_32_2x16_split_y:
2011*61046927SAndroid Build Coastguard Worker       return agx_subdivide_to(b, dst, s0, 1);
2012*61046927SAndroid Build Coastguard Worker 
2013*61046927SAndroid Build Coastguard Worker    case nir_op_vec2:
2014*61046927SAndroid Build Coastguard Worker    case nir_op_vec3:
2015*61046927SAndroid Build Coastguard Worker    case nir_op_vec4: {
2016*61046927SAndroid Build Coastguard Worker       agx_index idx[] = {s0, s1, s2, s3};
2017*61046927SAndroid Build Coastguard Worker       return agx_emit_collect_to(b, dst, srcs, idx);
2018*61046927SAndroid Build Coastguard Worker    }
2019*61046927SAndroid Build Coastguard Worker 
2020*61046927SAndroid Build Coastguard Worker    case nir_op_vec8:
2021*61046927SAndroid Build Coastguard Worker    case nir_op_vec16:
2022*61046927SAndroid Build Coastguard Worker       unreachable("should've been lowered");
2023*61046927SAndroid Build Coastguard Worker 
2024*61046927SAndroid Build Coastguard Worker    default:
2025*61046927SAndroid Build Coastguard Worker       fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
2026*61046927SAndroid Build Coastguard Worker       unreachable("Unhandled ALU instruction");
2027*61046927SAndroid Build Coastguard Worker    }
2028*61046927SAndroid Build Coastguard Worker }
2029*61046927SAndroid Build Coastguard Worker 
2030*61046927SAndroid Build Coastguard Worker static enum agx_lod_mode
agx_lod_mode_for_nir(nir_texop op,bool biased,bool min_lod,bool lod_is_zero)2031*61046927SAndroid Build Coastguard Worker agx_lod_mode_for_nir(nir_texop op, bool biased, bool min_lod, bool lod_is_zero)
2032*61046927SAndroid Build Coastguard Worker {
2033*61046927SAndroid Build Coastguard Worker    switch (op) {
2034*61046927SAndroid Build Coastguard Worker    case nir_texop_tex:
2035*61046927SAndroid Build Coastguard Worker    case nir_texop_tg4:
2036*61046927SAndroid Build Coastguard Worker       /* We could support this for tex, but it's never actually seen because tex
2037*61046927SAndroid Build Coastguard Worker        * is always turned into txb to implement sampler LOD bias in Vulkan.
2038*61046927SAndroid Build Coastguard Worker        */
2039*61046927SAndroid Build Coastguard Worker       assert(!min_lod && "unimplemented");
2040*61046927SAndroid Build Coastguard Worker 
2041*61046927SAndroid Build Coastguard Worker       return AGX_LOD_MODE_AUTO_LOD;
2042*61046927SAndroid Build Coastguard Worker    case nir_texop_txb:
2043*61046927SAndroid Build Coastguard Worker       return min_lod ? AGX_LOD_MODE_AUTO_LOD_BIAS_MIN
2044*61046927SAndroid Build Coastguard Worker                      : AGX_LOD_MODE_AUTO_LOD_BIAS;
2045*61046927SAndroid Build Coastguard Worker    case nir_texop_lod:
2046*61046927SAndroid Build Coastguard Worker       assert(!min_lod);
2047*61046927SAndroid Build Coastguard Worker       return biased ? AGX_LOD_MODE_AUTO_LOD_BIAS : AGX_LOD_MODE_AUTO_LOD;
2048*61046927SAndroid Build Coastguard Worker    case nir_texop_txd:
2049*61046927SAndroid Build Coastguard Worker       return min_lod ? AGX_LOD_MODE_LOD_GRAD_MIN : AGX_LOD_MODE_LOD_GRAD;
2050*61046927SAndroid Build Coastguard Worker    case nir_texop_txl:
2051*61046927SAndroid Build Coastguard Worker       assert(!min_lod);
2052*61046927SAndroid Build Coastguard Worker       return AGX_LOD_MODE_LOD_MIN;
2053*61046927SAndroid Build Coastguard Worker    case nir_texop_txf:
2054*61046927SAndroid Build Coastguard Worker       assert(!min_lod);
2055*61046927SAndroid Build Coastguard Worker       return lod_is_zero ? AGX_LOD_MODE_AUTO_LOD : AGX_LOD_MODE_LOD_MIN;
2056*61046927SAndroid Build Coastguard Worker    case nir_texop_txf_ms:
2057*61046927SAndroid Build Coastguard Worker       assert(!min_lod);
2058*61046927SAndroid Build Coastguard Worker       assert(lod_is_zero && "no mipmapping");
2059*61046927SAndroid Build Coastguard Worker       return AGX_LOD_MODE_AUTO_LOD;
2060*61046927SAndroid Build Coastguard Worker    default:
2061*61046927SAndroid Build Coastguard Worker       unreachable("Unhandled texture op");
2062*61046927SAndroid Build Coastguard Worker    }
2063*61046927SAndroid Build Coastguard Worker }
2064*61046927SAndroid Build Coastguard Worker 
2065*61046927SAndroid Build Coastguard Worker static enum agx_gather
agx_gather_for_nir(nir_tex_instr * tex)2066*61046927SAndroid Build Coastguard Worker agx_gather_for_nir(nir_tex_instr *tex)
2067*61046927SAndroid Build Coastguard Worker {
2068*61046927SAndroid Build Coastguard Worker    if (tex->op == nir_texop_tg4) {
2069*61046927SAndroid Build Coastguard Worker       enum agx_gather components[] = {
2070*61046927SAndroid Build Coastguard Worker          AGX_GATHER_R,
2071*61046927SAndroid Build Coastguard Worker          AGX_GATHER_G,
2072*61046927SAndroid Build Coastguard Worker          AGX_GATHER_B,
2073*61046927SAndroid Build Coastguard Worker          AGX_GATHER_A,
2074*61046927SAndroid Build Coastguard Worker       };
2075*61046927SAndroid Build Coastguard Worker 
2076*61046927SAndroid Build Coastguard Worker       assert(tex->component < ARRAY_SIZE(components));
2077*61046927SAndroid Build Coastguard Worker       return components[tex->component];
2078*61046927SAndroid Build Coastguard Worker    } else {
2079*61046927SAndroid Build Coastguard Worker       return AGX_GATHER_NONE;
2080*61046927SAndroid Build Coastguard Worker    }
2081*61046927SAndroid Build Coastguard Worker }
2082*61046927SAndroid Build Coastguard Worker 
2083*61046927SAndroid Build Coastguard Worker static void
agx_emit_tex(agx_builder * b,nir_tex_instr * instr)2084*61046927SAndroid Build Coastguard Worker agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
2085*61046927SAndroid Build Coastguard Worker {
2086*61046927SAndroid Build Coastguard Worker    agx_index coords = agx_null(), bindless = agx_immediate(0),
2087*61046927SAndroid Build Coastguard Worker              texture = agx_immediate(instr->texture_index),
2088*61046927SAndroid Build Coastguard Worker              sampler = agx_immediate(0), lod = agx_immediate(0),
2089*61046927SAndroid Build Coastguard Worker              compare = agx_null(), packed_offset = agx_null(),
2090*61046927SAndroid Build Coastguard Worker              min_lod = agx_null();
2091*61046927SAndroid Build Coastguard Worker 
2092*61046927SAndroid Build Coastguard Worker    bool lod_is_zero = true;
2093*61046927SAndroid Build Coastguard Worker 
2094*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < instr->num_srcs; ++i) {
2095*61046927SAndroid Build Coastguard Worker       agx_index index = agx_src_index(&instr->src[i].src);
2096*61046927SAndroid Build Coastguard Worker 
2097*61046927SAndroid Build Coastguard Worker       switch (instr->src[i].src_type) {
2098*61046927SAndroid Build Coastguard Worker       case nir_tex_src_backend1:
2099*61046927SAndroid Build Coastguard Worker          coords = index;
2100*61046927SAndroid Build Coastguard Worker          break;
2101*61046927SAndroid Build Coastguard Worker 
2102*61046927SAndroid Build Coastguard Worker       case nir_tex_src_backend2:
2103*61046927SAndroid Build Coastguard Worker          packed_offset = index;
2104*61046927SAndroid Build Coastguard Worker          break;
2105*61046927SAndroid Build Coastguard Worker 
2106*61046927SAndroid Build Coastguard Worker       case nir_tex_src_lod:
2107*61046927SAndroid Build Coastguard Worker       case nir_tex_src_bias:
2108*61046927SAndroid Build Coastguard Worker          lod = index;
2109*61046927SAndroid Build Coastguard Worker          lod_is_zero = nir_src_is_const(instr->src[i].src) &&
2110*61046927SAndroid Build Coastguard Worker                        nir_src_as_uint(instr->src[i].src) == 0;
2111*61046927SAndroid Build Coastguard Worker          break;
2112*61046927SAndroid Build Coastguard Worker 
2113*61046927SAndroid Build Coastguard Worker       case nir_tex_src_min_lod:
2114*61046927SAndroid Build Coastguard Worker          assert(index.size == AGX_SIZE_16);
2115*61046927SAndroid Build Coastguard Worker          min_lod = index;
2116*61046927SAndroid Build Coastguard Worker          break;
2117*61046927SAndroid Build Coastguard Worker 
2118*61046927SAndroid Build Coastguard Worker       case nir_tex_src_comparator:
2119*61046927SAndroid Build Coastguard Worker          assert(index.size == AGX_SIZE_32);
2120*61046927SAndroid Build Coastguard Worker          compare = index;
2121*61046927SAndroid Build Coastguard Worker          break;
2122*61046927SAndroid Build Coastguard Worker 
2123*61046927SAndroid Build Coastguard Worker       case nir_tex_src_texture_offset:
2124*61046927SAndroid Build Coastguard Worker          texture = index;
2125*61046927SAndroid Build Coastguard Worker          break;
2126*61046927SAndroid Build Coastguard Worker       case nir_tex_src_sampler_handle:
2127*61046927SAndroid Build Coastguard Worker          sampler = index;
2128*61046927SAndroid Build Coastguard Worker          break;
2129*61046927SAndroid Build Coastguard Worker 
2130*61046927SAndroid Build Coastguard Worker       case nir_tex_src_texture_handle:
2131*61046927SAndroid Build Coastguard Worker          texture =
2132*61046927SAndroid Build Coastguard Worker             agx_translate_bindless_handle(b, &instr->src[i].src, &bindless);
2133*61046927SAndroid Build Coastguard Worker          break;
2134*61046927SAndroid Build Coastguard Worker 
2135*61046927SAndroid Build Coastguard Worker       case nir_tex_src_ddx: {
2136*61046927SAndroid Build Coastguard Worker          int y_idx = nir_tex_instr_src_index(instr, nir_tex_src_ddy);
2137*61046927SAndroid Build Coastguard Worker          assert(y_idx >= 0 && "we only handle gradients");
2138*61046927SAndroid Build Coastguard Worker 
2139*61046927SAndroid Build Coastguard Worker          int min_idx = nir_tex_instr_src_index(instr, nir_tex_src_min_lod);
2140*61046927SAndroid Build Coastguard Worker          bool has_min = min_idx >= 0;
2141*61046927SAndroid Build Coastguard Worker          agx_index min;
2142*61046927SAndroid Build Coastguard Worker 
2143*61046927SAndroid Build Coastguard Worker          unsigned n = nir_tex_instr_src_size(instr, y_idx);
2144*61046927SAndroid Build Coastguard Worker          assert((n == 2 || n == 3) && "other sizes not supported");
2145*61046927SAndroid Build Coastguard Worker 
2146*61046927SAndroid Build Coastguard Worker          agx_index index2 = agx_src_index(&instr->src[y_idx].src);
2147*61046927SAndroid Build Coastguard Worker 
2148*61046927SAndroid Build Coastguard Worker          if (has_min) {
2149*61046927SAndroid Build Coastguard Worker             min = agx_src_index(&instr->src[min_idx].src);
2150*61046927SAndroid Build Coastguard Worker 
2151*61046927SAndroid Build Coastguard Worker             /* Undef extend to 32-bit since our IR is iffy */
2152*61046927SAndroid Build Coastguard Worker             min = agx_vec2(b, min, agx_undef(AGX_SIZE_16));
2153*61046927SAndroid Build Coastguard Worker             min.channels_m1--;
2154*61046927SAndroid Build Coastguard Worker             min.size = AGX_SIZE_32;
2155*61046927SAndroid Build Coastguard Worker          }
2156*61046927SAndroid Build Coastguard Worker 
2157*61046927SAndroid Build Coastguard Worker          /* We explicitly don't cache about the split cache for this */
2158*61046927SAndroid Build Coastguard Worker          unsigned chans = (2 * n) + (has_min ? 1 : 0);
2159*61046927SAndroid Build Coastguard Worker          lod = agx_vec_temp(b->shader, AGX_SIZE_32, chans);
2160*61046927SAndroid Build Coastguard Worker          agx_instr *I = agx_collect_to(b, lod, chans);
2161*61046927SAndroid Build Coastguard Worker 
2162*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < n; ++i) {
2163*61046927SAndroid Build Coastguard Worker             I->src[(2 * i) + 0] = agx_emit_extract(b, index, i);
2164*61046927SAndroid Build Coastguard Worker             I->src[(2 * i) + 1] = agx_emit_extract(b, index2, i);
2165*61046927SAndroid Build Coastguard Worker          }
2166*61046927SAndroid Build Coastguard Worker 
2167*61046927SAndroid Build Coastguard Worker          if (has_min)
2168*61046927SAndroid Build Coastguard Worker             I->src[2 * n] = min;
2169*61046927SAndroid Build Coastguard Worker 
2170*61046927SAndroid Build Coastguard Worker          break;
2171*61046927SAndroid Build Coastguard Worker       }
2172*61046927SAndroid Build Coastguard Worker 
2173*61046927SAndroid Build Coastguard Worker       case nir_tex_src_ddy:
2174*61046927SAndroid Build Coastguard Worker          /* handled above */
2175*61046927SAndroid Build Coastguard Worker          break;
2176*61046927SAndroid Build Coastguard Worker 
2177*61046927SAndroid Build Coastguard Worker       default:
2178*61046927SAndroid Build Coastguard Worker          unreachable("Unexpected texture source");
2179*61046927SAndroid Build Coastguard Worker       }
2180*61046927SAndroid Build Coastguard Worker    }
2181*61046927SAndroid Build Coastguard Worker 
2182*61046927SAndroid Build Coastguard Worker    enum agx_lod_mode lod_mode = agx_lod_mode_for_nir(
2183*61046927SAndroid Build Coastguard Worker       instr->op, nir_tex_instr_src_index(instr, nir_tex_src_bias) >= 0,
2184*61046927SAndroid Build Coastguard Worker       nir_tex_instr_src_index(instr, nir_tex_src_min_lod) >= 0, lod_is_zero);
2185*61046927SAndroid Build Coastguard Worker 
2186*61046927SAndroid Build Coastguard Worker    if (lod_mode == AGX_LOD_MODE_AUTO_LOD) {
2187*61046927SAndroid Build Coastguard Worker       /* Ignored logically but asserted 0 */
2188*61046927SAndroid Build Coastguard Worker       lod = agx_immediate(0);
2189*61046927SAndroid Build Coastguard Worker    } else if (lod_mode == AGX_LOD_MODE_AUTO_LOD_BIAS_MIN) {
2190*61046927SAndroid Build Coastguard Worker       /* Combine min with lod */
2191*61046927SAndroid Build Coastguard Worker       lod = agx_vec2(b, lod, min_lod);
2192*61046927SAndroid Build Coastguard Worker    }
2193*61046927SAndroid Build Coastguard Worker 
2194*61046927SAndroid Build Coastguard Worker    agx_index dst = agx_def_index(&instr->def);
2195*61046927SAndroid Build Coastguard Worker 
2196*61046927SAndroid Build Coastguard Worker    /* Pack shadow reference value (compare) and packed offset together */
2197*61046927SAndroid Build Coastguard Worker    agx_index compare_offset = agx_null();
2198*61046927SAndroid Build Coastguard Worker 
2199*61046927SAndroid Build Coastguard Worker    if (!agx_is_null(compare) && !agx_is_null(packed_offset))
2200*61046927SAndroid Build Coastguard Worker       compare_offset = agx_vec2(b, compare, packed_offset);
2201*61046927SAndroid Build Coastguard Worker    else if (!agx_is_null(packed_offset))
2202*61046927SAndroid Build Coastguard Worker       compare_offset = packed_offset;
2203*61046927SAndroid Build Coastguard Worker    else if (!agx_is_null(compare))
2204*61046927SAndroid Build Coastguard Worker       compare_offset = compare;
2205*61046927SAndroid Build Coastguard Worker 
2206*61046927SAndroid Build Coastguard Worker    agx_index tmp = agx_vec_temp(b->shader, dst.size, 4);
2207*61046927SAndroid Build Coastguard Worker    agx_instr *I = agx_texture_sample_to(
2208*61046927SAndroid Build Coastguard Worker       b, tmp, coords, lod, bindless, texture, sampler, compare_offset,
2209*61046927SAndroid Build Coastguard Worker       agx_tex_dim(instr->sampler_dim, instr->is_array), lod_mode, 0,
2210*61046927SAndroid Build Coastguard Worker       !agx_is_null(packed_offset), !agx_is_null(compare),
2211*61046927SAndroid Build Coastguard Worker       instr->op == nir_texop_lod, agx_gather_for_nir(instr));
2212*61046927SAndroid Build Coastguard Worker 
2213*61046927SAndroid Build Coastguard Worker    if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms) {
2214*61046927SAndroid Build Coastguard Worker       I->op = AGX_OPCODE_TEXTURE_LOAD;
2215*61046927SAndroid Build Coastguard Worker       b->shader->out->uses_txf = true;
2216*61046927SAndroid Build Coastguard Worker    }
2217*61046927SAndroid Build Coastguard Worker 
2218*61046927SAndroid Build Coastguard Worker    /* Destination masking doesn't seem to work properly for gathers (because
2219*61046927SAndroid Build Coastguard Worker     * it's mostly pointless), but it does show up in the lowering of
2220*61046927SAndroid Build Coastguard Worker     * textureGatherOffsets. Don't try to mask the destination for gathers.
2221*61046927SAndroid Build Coastguard Worker     */
2222*61046927SAndroid Build Coastguard Worker    bool masked = (instr->op != nir_texop_tg4);
2223*61046927SAndroid Build Coastguard Worker    I->mask = agx_expand_tex_to(b, &instr->def, tmp, masked);
2224*61046927SAndroid Build Coastguard Worker }
2225*61046927SAndroid Build Coastguard Worker 
2226*61046927SAndroid Build Coastguard Worker /*
2227*61046927SAndroid Build Coastguard Worker  * Determine if a NIR loop (CF list) uses a continue jump, including within
2228*61046927SAndroid Build Coastguard Worker  * if-else statements but not including nested loops.
2229*61046927SAndroid Build Coastguard Worker  */
2230*61046927SAndroid Build Coastguard Worker static bool
cf_list_uses_continue(struct exec_list * list)2231*61046927SAndroid Build Coastguard Worker cf_list_uses_continue(struct exec_list *list)
2232*61046927SAndroid Build Coastguard Worker {
2233*61046927SAndroid Build Coastguard Worker    foreach_list_typed(nir_cf_node, node, node, list) {
2234*61046927SAndroid Build Coastguard Worker       if (node->type == nir_cf_node_block) {
2235*61046927SAndroid Build Coastguard Worker          nir_block *block = nir_cf_node_as_block(node);
2236*61046927SAndroid Build Coastguard Worker 
2237*61046927SAndroid Build Coastguard Worker          nir_foreach_instr(instr, block) {
2238*61046927SAndroid Build Coastguard Worker             if (instr->type == nir_instr_type_jump &&
2239*61046927SAndroid Build Coastguard Worker                 nir_instr_as_jump(instr)->type == nir_jump_continue)
2240*61046927SAndroid Build Coastguard Worker                return true;
2241*61046927SAndroid Build Coastguard Worker          }
2242*61046927SAndroid Build Coastguard Worker       } else if (node->type == nir_cf_node_if) {
2243*61046927SAndroid Build Coastguard Worker          nir_if *nif = nir_cf_node_as_if(node);
2244*61046927SAndroid Build Coastguard Worker 
2245*61046927SAndroid Build Coastguard Worker          if (cf_list_uses_continue(&nif->then_list) ||
2246*61046927SAndroid Build Coastguard Worker              cf_list_uses_continue(&nif->else_list))
2247*61046927SAndroid Build Coastguard Worker             return true;
2248*61046927SAndroid Build Coastguard Worker       } else {
2249*61046927SAndroid Build Coastguard Worker          assert(node->type == nir_cf_node_loop && "don't care about nesting");
2250*61046927SAndroid Build Coastguard Worker       }
2251*61046927SAndroid Build Coastguard Worker    }
2252*61046927SAndroid Build Coastguard Worker 
2253*61046927SAndroid Build Coastguard Worker    return false;
2254*61046927SAndroid Build Coastguard Worker }
2255*61046927SAndroid Build Coastguard Worker 
2256*61046927SAndroid Build Coastguard Worker static bool
loop_uses_continue(nir_loop * loop)2257*61046927SAndroid Build Coastguard Worker loop_uses_continue(nir_loop *loop)
2258*61046927SAndroid Build Coastguard Worker {
2259*61046927SAndroid Build Coastguard Worker    return cf_list_uses_continue(&loop->body);
2260*61046927SAndroid Build Coastguard Worker }
2261*61046927SAndroid Build Coastguard Worker 
2262*61046927SAndroid Build Coastguard Worker /*
2263*61046927SAndroid Build Coastguard Worker  * NIR loops are treated as a pair of AGX loops:
2264*61046927SAndroid Build Coastguard Worker  *
2265*61046927SAndroid Build Coastguard Worker  *    do {
2266*61046927SAndroid Build Coastguard Worker  *       do {
2267*61046927SAndroid Build Coastguard Worker  *          ...
2268*61046927SAndroid Build Coastguard Worker  *       } while (0);
2269*61046927SAndroid Build Coastguard Worker  *    } while (cond);
2270*61046927SAndroid Build Coastguard Worker  *
2271*61046927SAndroid Build Coastguard Worker  * By manipulating the nesting counter, we may break out of nested loops, so
2272*61046927SAndroid Build Coastguard Worker  * under the model, both break and continue may be implemented as breaks, where
2273*61046927SAndroid Build Coastguard Worker  * break breaks out of the outer loop (2 layers) and continue breaks out of the
2274*61046927SAndroid Build Coastguard Worker  * inner loop (1 layer).
2275*61046927SAndroid Build Coastguard Worker  *
2276*61046927SAndroid Build Coastguard Worker  * After manipulating the nesting counter directly, pop_exec #0 must be used to
2277*61046927SAndroid Build Coastguard Worker  * flush the update to the execution mask.
2278*61046927SAndroid Build Coastguard Worker  */
2279*61046927SAndroid Build Coastguard Worker static void
agx_emit_jump(agx_builder * b,nir_jump_instr * instr)2280*61046927SAndroid Build Coastguard Worker agx_emit_jump(agx_builder *b, nir_jump_instr *instr)
2281*61046927SAndroid Build Coastguard Worker {
2282*61046927SAndroid Build Coastguard Worker    agx_context *ctx = b->shader;
2283*61046927SAndroid Build Coastguard Worker    assert(instr->type == nir_jump_break || instr->type == nir_jump_continue);
2284*61046927SAndroid Build Coastguard Worker 
2285*61046927SAndroid Build Coastguard Worker    /* Break out of either one or two loops */
2286*61046927SAndroid Build Coastguard Worker    unsigned nestings = b->shader->loop_nesting;
2287*61046927SAndroid Build Coastguard Worker 
2288*61046927SAndroid Build Coastguard Worker    if (instr->type == nir_jump_continue) {
2289*61046927SAndroid Build Coastguard Worker       nestings += 1;
2290*61046927SAndroid Build Coastguard Worker       agx_block_add_successor(ctx->current_block, ctx->continue_block);
2291*61046927SAndroid Build Coastguard Worker    } else if (instr->type == nir_jump_break) {
2292*61046927SAndroid Build Coastguard Worker       nestings += ctx->loop_continues ? 2 : 1;
2293*61046927SAndroid Build Coastguard Worker       agx_block_add_successor(ctx->current_block, ctx->break_block);
2294*61046927SAndroid Build Coastguard Worker    }
2295*61046927SAndroid Build Coastguard Worker 
2296*61046927SAndroid Build Coastguard Worker    agx_break(b, nestings, ctx->break_block);
2297*61046927SAndroid Build Coastguard Worker    ctx->current_block->unconditional_jumps = true;
2298*61046927SAndroid Build Coastguard Worker }
2299*61046927SAndroid Build Coastguard Worker 
2300*61046927SAndroid Build Coastguard Worker static void
agx_emit_phi(agx_builder * b,nir_phi_instr * instr)2301*61046927SAndroid Build Coastguard Worker agx_emit_phi(agx_builder *b, nir_phi_instr *instr)
2302*61046927SAndroid Build Coastguard Worker {
2303*61046927SAndroid Build Coastguard Worker    agx_instr *I =
2304*61046927SAndroid Build Coastguard Worker       agx_phi_to(b, agx_def_index(&instr->def), exec_list_length(&instr->srcs));
2305*61046927SAndroid Build Coastguard Worker 
2306*61046927SAndroid Build Coastguard Worker    /* Deferred */
2307*61046927SAndroid Build Coastguard Worker    I->phi = instr;
2308*61046927SAndroid Build Coastguard Worker }
2309*61046927SAndroid Build Coastguard Worker 
2310*61046927SAndroid Build Coastguard Worker /* Look up the AGX block corresponding to a given NIR block. Used when
2311*61046927SAndroid Build Coastguard Worker  * translating phi nodes after emitting all blocks.
2312*61046927SAndroid Build Coastguard Worker  */
2313*61046927SAndroid Build Coastguard Worker static agx_block *
agx_from_nir_block(agx_context * ctx,nir_block * block)2314*61046927SAndroid Build Coastguard Worker agx_from_nir_block(agx_context *ctx, nir_block *block)
2315*61046927SAndroid Build Coastguard Worker {
2316*61046927SAndroid Build Coastguard Worker    return ctx->indexed_nir_blocks[block->index];
2317*61046927SAndroid Build Coastguard Worker }
2318*61046927SAndroid Build Coastguard Worker 
2319*61046927SAndroid Build Coastguard Worker static void
agx_emit_phi_deferred(agx_context * ctx,agx_block * block,agx_instr * I)2320*61046927SAndroid Build Coastguard Worker agx_emit_phi_deferred(agx_context *ctx, agx_block *block, agx_instr *I)
2321*61046927SAndroid Build Coastguard Worker {
2322*61046927SAndroid Build Coastguard Worker    nir_phi_instr *phi = I->phi;
2323*61046927SAndroid Build Coastguard Worker    I->phi = NULL;
2324*61046927SAndroid Build Coastguard Worker 
2325*61046927SAndroid Build Coastguard Worker    /* Guaranteed by lower_phis_to_scalar */
2326*61046927SAndroid Build Coastguard Worker    assert(phi->def.num_components == 1);
2327*61046927SAndroid Build Coastguard Worker 
2328*61046927SAndroid Build Coastguard Worker    nir_foreach_phi_src(src, phi) {
2329*61046927SAndroid Build Coastguard Worker       agx_block *pred = agx_from_nir_block(ctx, src->pred);
2330*61046927SAndroid Build Coastguard Worker       unsigned i = agx_predecessor_index(block, pred);
2331*61046927SAndroid Build Coastguard Worker       assert(i < I->nr_srcs);
2332*61046927SAndroid Build Coastguard Worker 
2333*61046927SAndroid Build Coastguard Worker       I->src[i] = agx_src_index(&src->src);
2334*61046927SAndroid Build Coastguard Worker    }
2335*61046927SAndroid Build Coastguard Worker }
2336*61046927SAndroid Build Coastguard Worker 
2337*61046927SAndroid Build Coastguard Worker static void
agx_emit_phis_deferred(agx_context * ctx)2338*61046927SAndroid Build Coastguard Worker agx_emit_phis_deferred(agx_context *ctx)
2339*61046927SAndroid Build Coastguard Worker {
2340*61046927SAndroid Build Coastguard Worker    agx_foreach_block(ctx, block) {
2341*61046927SAndroid Build Coastguard Worker       agx_foreach_phi_in_block(block, I)
2342*61046927SAndroid Build Coastguard Worker          agx_emit_phi_deferred(ctx, block, I);
2343*61046927SAndroid Build Coastguard Worker    }
2344*61046927SAndroid Build Coastguard Worker }
2345*61046927SAndroid Build Coastguard Worker 
2346*61046927SAndroid Build Coastguard Worker static void
agx_emit_undef(agx_builder * b,nir_undef_instr * instr)2347*61046927SAndroid Build Coastguard Worker agx_emit_undef(agx_builder *b, nir_undef_instr *instr)
2348*61046927SAndroid Build Coastguard Worker {
2349*61046927SAndroid Build Coastguard Worker    /* For now, just lower undefs to zero. This doesn't matter too much, since
2350*61046927SAndroid Build Coastguard Worker     * the lowering happens in NIR and this just allows for late lowering passes
2351*61046927SAndroid Build Coastguard Worker     * to result in undefs.
2352*61046927SAndroid Build Coastguard Worker     */
2353*61046927SAndroid Build Coastguard Worker    if (instr->def.num_components > 1) {
2354*61046927SAndroid Build Coastguard Worker       assert(instr->def.num_components <= 4);
2355*61046927SAndroid Build Coastguard Worker       agx_index zero = agx_mov_imm(b, instr->def.bit_size, 0);
2356*61046927SAndroid Build Coastguard Worker 
2357*61046927SAndroid Build Coastguard Worker       agx_emit_collect_to(b, agx_def_index(&instr->def),
2358*61046927SAndroid Build Coastguard Worker                           instr->def.num_components,
2359*61046927SAndroid Build Coastguard Worker                           (agx_index[4]){zero, zero, zero, zero});
2360*61046927SAndroid Build Coastguard Worker    } else {
2361*61046927SAndroid Build Coastguard Worker       agx_mov_imm_to(b, agx_def_index(&instr->def), 0);
2362*61046927SAndroid Build Coastguard Worker    }
2363*61046927SAndroid Build Coastguard Worker }
2364*61046927SAndroid Build Coastguard Worker 
2365*61046927SAndroid Build Coastguard Worker static void
agx_emit_instr(agx_builder * b,struct nir_instr * instr)2366*61046927SAndroid Build Coastguard Worker agx_emit_instr(agx_builder *b, struct nir_instr *instr)
2367*61046927SAndroid Build Coastguard Worker {
2368*61046927SAndroid Build Coastguard Worker    switch (instr->type) {
2369*61046927SAndroid Build Coastguard Worker    case nir_instr_type_load_const:
2370*61046927SAndroid Build Coastguard Worker       agx_emit_load_const(b, nir_instr_as_load_const(instr));
2371*61046927SAndroid Build Coastguard Worker       break;
2372*61046927SAndroid Build Coastguard Worker 
2373*61046927SAndroid Build Coastguard Worker    case nir_instr_type_intrinsic:
2374*61046927SAndroid Build Coastguard Worker       agx_emit_intrinsic(b, nir_instr_as_intrinsic(instr));
2375*61046927SAndroid Build Coastguard Worker       break;
2376*61046927SAndroid Build Coastguard Worker 
2377*61046927SAndroid Build Coastguard Worker    case nir_instr_type_alu:
2378*61046927SAndroid Build Coastguard Worker       agx_emit_alu(b, nir_instr_as_alu(instr));
2379*61046927SAndroid Build Coastguard Worker       break;
2380*61046927SAndroid Build Coastguard Worker 
2381*61046927SAndroid Build Coastguard Worker    case nir_instr_type_tex:
2382*61046927SAndroid Build Coastguard Worker       agx_emit_tex(b, nir_instr_as_tex(instr));
2383*61046927SAndroid Build Coastguard Worker       break;
2384*61046927SAndroid Build Coastguard Worker 
2385*61046927SAndroid Build Coastguard Worker    case nir_instr_type_jump:
2386*61046927SAndroid Build Coastguard Worker       agx_emit_jump(b, nir_instr_as_jump(instr));
2387*61046927SAndroid Build Coastguard Worker       break;
2388*61046927SAndroid Build Coastguard Worker 
2389*61046927SAndroid Build Coastguard Worker    case nir_instr_type_phi:
2390*61046927SAndroid Build Coastguard Worker       agx_emit_phi(b, nir_instr_as_phi(instr));
2391*61046927SAndroid Build Coastguard Worker       break;
2392*61046927SAndroid Build Coastguard Worker 
2393*61046927SAndroid Build Coastguard Worker    case nir_instr_type_undef:
2394*61046927SAndroid Build Coastguard Worker       agx_emit_undef(b, nir_instr_as_undef(instr));
2395*61046927SAndroid Build Coastguard Worker       break;
2396*61046927SAndroid Build Coastguard Worker 
2397*61046927SAndroid Build Coastguard Worker    default:
2398*61046927SAndroid Build Coastguard Worker       unreachable("should've been lowered");
2399*61046927SAndroid Build Coastguard Worker    }
2400*61046927SAndroid Build Coastguard Worker }
2401*61046927SAndroid Build Coastguard Worker 
2402*61046927SAndroid Build Coastguard Worker static agx_block *
agx_create_block(agx_context * ctx)2403*61046927SAndroid Build Coastguard Worker agx_create_block(agx_context *ctx)
2404*61046927SAndroid Build Coastguard Worker {
2405*61046927SAndroid Build Coastguard Worker    agx_block *blk = rzalloc(ctx, agx_block);
2406*61046927SAndroid Build Coastguard Worker 
2407*61046927SAndroid Build Coastguard Worker    util_dynarray_init(&blk->predecessors, blk);
2408*61046927SAndroid Build Coastguard Worker 
2409*61046927SAndroid Build Coastguard Worker    return blk;
2410*61046927SAndroid Build Coastguard Worker }
2411*61046927SAndroid Build Coastguard Worker 
2412*61046927SAndroid Build Coastguard Worker static agx_block *
emit_block(agx_context * ctx,nir_block * block)2413*61046927SAndroid Build Coastguard Worker emit_block(agx_context *ctx, nir_block *block)
2414*61046927SAndroid Build Coastguard Worker {
2415*61046927SAndroid Build Coastguard Worker    if (ctx->after_block) {
2416*61046927SAndroid Build Coastguard Worker       ctx->current_block = ctx->after_block;
2417*61046927SAndroid Build Coastguard Worker       ctx->after_block = NULL;
2418*61046927SAndroid Build Coastguard Worker    } else {
2419*61046927SAndroid Build Coastguard Worker       ctx->current_block = agx_create_block(ctx);
2420*61046927SAndroid Build Coastguard Worker    }
2421*61046927SAndroid Build Coastguard Worker 
2422*61046927SAndroid Build Coastguard Worker    agx_block *blk = ctx->current_block;
2423*61046927SAndroid Build Coastguard Worker    list_addtail(&blk->link, &ctx->blocks);
2424*61046927SAndroid Build Coastguard Worker    list_inithead(&blk->instructions);
2425*61046927SAndroid Build Coastguard Worker 
2426*61046927SAndroid Build Coastguard Worker    ctx->indexed_nir_blocks[block->index] = blk;
2427*61046927SAndroid Build Coastguard Worker 
2428*61046927SAndroid Build Coastguard Worker    agx_builder _b = agx_init_builder(ctx, agx_after_block(blk));
2429*61046927SAndroid Build Coastguard Worker 
2430*61046927SAndroid Build Coastguard Worker    nir_foreach_instr(instr, block) {
2431*61046927SAndroid Build Coastguard Worker       agx_emit_instr(&_b, instr);
2432*61046927SAndroid Build Coastguard Worker    }
2433*61046927SAndroid Build Coastguard Worker 
2434*61046927SAndroid Build Coastguard Worker    return blk;
2435*61046927SAndroid Build Coastguard Worker }
2436*61046927SAndroid Build Coastguard Worker 
2437*61046927SAndroid Build Coastguard Worker static agx_block *emit_cf_list(agx_context *ctx, struct exec_list *list);
2438*61046927SAndroid Build Coastguard Worker 
2439*61046927SAndroid Build Coastguard Worker /* Emit if-else as
2440*61046927SAndroid Build Coastguard Worker  *
2441*61046927SAndroid Build Coastguard Worker  *    if_icmp cond != 0
2442*61046927SAndroid Build Coastguard Worker  *       ...
2443*61046927SAndroid Build Coastguard Worker  *    else_icmp cond == 0
2444*61046927SAndroid Build Coastguard Worker  *       ...
2445*61046927SAndroid Build Coastguard Worker  *    pop_exec
2446*61046927SAndroid Build Coastguard Worker  *
2447*61046927SAndroid Build Coastguard Worker  * If the else is empty, we can omit the else_icmp. This happens elsewhere, as
2448*61046927SAndroid Build Coastguard Worker  * an empty else block can become nonempty after RA due to phi lowering. This is
2449*61046927SAndroid Build Coastguard Worker  * not usually optimal, but it's a start.
2450*61046927SAndroid Build Coastguard Worker  */
2451*61046927SAndroid Build Coastguard Worker 
2452*61046927SAndroid Build Coastguard Worker static void
emit_if(agx_context * ctx,nir_if * nif)2453*61046927SAndroid Build Coastguard Worker emit_if(agx_context *ctx, nir_if *nif)
2454*61046927SAndroid Build Coastguard Worker {
2455*61046927SAndroid Build Coastguard Worker    agx_block *first_block = ctx->current_block;
2456*61046927SAndroid Build Coastguard Worker    agx_builder _b = agx_init_builder(ctx, agx_after_block(first_block));
2457*61046927SAndroid Build Coastguard Worker    agx_index cond = agx_src_index(&nif->condition);
2458*61046927SAndroid Build Coastguard Worker 
2459*61046927SAndroid Build Coastguard Worker    agx_instr *if_ = agx_if_icmp(&_b, cond, agx_zero(), 1, AGX_ICOND_UEQ, true,
2460*61046927SAndroid Build Coastguard Worker                                 NULL /* filled in later */);
2461*61046927SAndroid Build Coastguard Worker    ctx->loop_nesting++;
2462*61046927SAndroid Build Coastguard Worker    ctx->total_nesting++;
2463*61046927SAndroid Build Coastguard Worker 
2464*61046927SAndroid Build Coastguard Worker    /* Emit the two subblocks. */
2465*61046927SAndroid Build Coastguard Worker    agx_block *if_block = emit_cf_list(ctx, &nif->then_list);
2466*61046927SAndroid Build Coastguard Worker    agx_block *end_then = ctx->current_block;
2467*61046927SAndroid Build Coastguard Worker 
2468*61046927SAndroid Build Coastguard Worker    _b.cursor = agx_after_block(ctx->current_block);
2469*61046927SAndroid Build Coastguard Worker 
2470*61046927SAndroid Build Coastguard Worker    agx_block *else_block = emit_cf_list(ctx, &nif->else_list);
2471*61046927SAndroid Build Coastguard Worker    agx_block *end_else = ctx->current_block;
2472*61046927SAndroid Build Coastguard Worker 
2473*61046927SAndroid Build Coastguard Worker    /* If the "if" fails, we fallthrough to the else */
2474*61046927SAndroid Build Coastguard Worker    if_->target = else_block;
2475*61046927SAndroid Build Coastguard Worker 
2476*61046927SAndroid Build Coastguard Worker    /* Insert an else instruction at the beginning of the else block. We use
2477*61046927SAndroid Build Coastguard Worker     * "else_fcmp 0.0, 0.0, eq" as unconditional else, matching the blob.
2478*61046927SAndroid Build Coastguard Worker     *
2479*61046927SAndroid Build Coastguard Worker     * If it fails, we fall through to the logical end of the last else block.
2480*61046927SAndroid Build Coastguard Worker     */
2481*61046927SAndroid Build Coastguard Worker    _b.cursor = agx_before_block(else_block);
2482*61046927SAndroid Build Coastguard Worker    agx_else_fcmp(&_b, agx_zero(), agx_zero(), 1, AGX_FCOND_EQ, false, end_else);
2483*61046927SAndroid Build Coastguard Worker 
2484*61046927SAndroid Build Coastguard Worker    ctx->after_block = agx_create_block(ctx);
2485*61046927SAndroid Build Coastguard Worker 
2486*61046927SAndroid Build Coastguard Worker    agx_block_add_successor(first_block, if_block);
2487*61046927SAndroid Build Coastguard Worker    agx_block_add_successor(first_block, else_block);
2488*61046927SAndroid Build Coastguard Worker    agx_block_add_successor(end_then, ctx->after_block);
2489*61046927SAndroid Build Coastguard Worker    agx_block_add_successor(end_else, ctx->after_block);
2490*61046927SAndroid Build Coastguard Worker 
2491*61046927SAndroid Build Coastguard Worker    _b.cursor = agx_after_block(ctx->current_block);
2492*61046927SAndroid Build Coastguard Worker    agx_pop_exec(&_b, 1);
2493*61046927SAndroid Build Coastguard Worker    ctx->loop_nesting--;
2494*61046927SAndroid Build Coastguard Worker    ctx->total_nesting--;
2495*61046927SAndroid Build Coastguard Worker }
2496*61046927SAndroid Build Coastguard Worker 
2497*61046927SAndroid Build Coastguard Worker static void
emit_loop(agx_context * ctx,nir_loop * nloop)2498*61046927SAndroid Build Coastguard Worker emit_loop(agx_context *ctx, nir_loop *nloop)
2499*61046927SAndroid Build Coastguard Worker {
2500*61046927SAndroid Build Coastguard Worker    assert(!nir_loop_has_continue_construct(nloop));
2501*61046927SAndroid Build Coastguard Worker    /* We only track nesting within the innermost loop, so push and reset */
2502*61046927SAndroid Build Coastguard Worker    unsigned pushed_nesting = ctx->loop_nesting;
2503*61046927SAndroid Build Coastguard Worker    ctx->loop_nesting = 0;
2504*61046927SAndroid Build Coastguard Worker    ctx->total_nesting++;
2505*61046927SAndroid Build Coastguard Worker 
2506*61046927SAndroid Build Coastguard Worker    bool old_continues = ctx->loop_continues;
2507*61046927SAndroid Build Coastguard Worker    ctx->loop_continues = loop_uses_continue(nloop);
2508*61046927SAndroid Build Coastguard Worker 
2509*61046927SAndroid Build Coastguard Worker    agx_block *popped_break = ctx->break_block;
2510*61046927SAndroid Build Coastguard Worker    agx_block *popped_continue = ctx->continue_block;
2511*61046927SAndroid Build Coastguard Worker 
2512*61046927SAndroid Build Coastguard Worker    ctx->break_block = agx_create_block(ctx);
2513*61046927SAndroid Build Coastguard Worker    ctx->continue_block = agx_create_block(ctx);
2514*61046927SAndroid Build Coastguard Worker 
2515*61046927SAndroid Build Coastguard Worker    /* If we are emitting a loop inside other control flow, there might be
2516*61046927SAndroid Build Coastguard Worker     * threads masked off (TODO: divergence analysis), so push_exec them so
2517*61046927SAndroid Build Coastguard Worker     * we get the lower nesting count values to ourselves.
2518*61046927SAndroid Build Coastguard Worker     */
2519*61046927SAndroid Build Coastguard Worker    agx_builder _b = agx_init_builder(ctx, agx_after_block(ctx->current_block));
2520*61046927SAndroid Build Coastguard Worker    if (ctx->total_nesting > 1)
2521*61046927SAndroid Build Coastguard Worker       agx_push_exec(&_b, ctx->loop_continues ? 2 : 1);
2522*61046927SAndroid Build Coastguard Worker 
2523*61046927SAndroid Build Coastguard Worker    /* Fallthrough to body */
2524*61046927SAndroid Build Coastguard Worker    agx_block_add_successor(ctx->current_block, ctx->continue_block);
2525*61046927SAndroid Build Coastguard Worker 
2526*61046927SAndroid Build Coastguard Worker    /* Emit the body */
2527*61046927SAndroid Build Coastguard Worker    ctx->after_block = ctx->continue_block;
2528*61046927SAndroid Build Coastguard Worker    ctx->after_block->loop_header = true;
2529*61046927SAndroid Build Coastguard Worker    agx_block *start_block = emit_cf_list(ctx, &nloop->body);
2530*61046927SAndroid Build Coastguard Worker 
2531*61046927SAndroid Build Coastguard Worker    /* If we used any continue jumps, we need to reactivate the continued
2532*61046927SAndroid Build Coastguard Worker     * threads. We do this with an always true while_icmp, which behaves like:
2533*61046927SAndroid Build Coastguard Worker     *
2534*61046927SAndroid Build Coastguard Worker     *    if (r0l == 1) {
2535*61046927SAndroid Build Coastguard Worker     *       r0l = 0;
2536*61046927SAndroid Build Coastguard Worker     *    }
2537*61046927SAndroid Build Coastguard Worker     *    update_exec
2538*61046927SAndroid Build Coastguard Worker     *
2539*61046927SAndroid Build Coastguard Worker     * If we did not use continue, this would be a no-op so it is omitted.
2540*61046927SAndroid Build Coastguard Worker     */
2541*61046927SAndroid Build Coastguard Worker    _b.cursor = agx_after_block(ctx->current_block);
2542*61046927SAndroid Build Coastguard Worker 
2543*61046927SAndroid Build Coastguard Worker    if (ctx->loop_continues) {
2544*61046927SAndroid Build Coastguard Worker       agx_while_icmp(
2545*61046927SAndroid Build Coastguard Worker          &_b, agx_zero(), agx_zero(), 2, AGX_ICOND_UEQ, false,
2546*61046927SAndroid Build Coastguard Worker          NULL /* no semantic target, used purely for side effects */);
2547*61046927SAndroid Build Coastguard Worker    }
2548*61046927SAndroid Build Coastguard Worker 
2549*61046927SAndroid Build Coastguard Worker    agx_jmp_exec_any(&_b, start_block);
2550*61046927SAndroid Build Coastguard Worker    agx_pop_exec(&_b, ctx->loop_continues ? 2 : 1);
2551*61046927SAndroid Build Coastguard Worker    agx_block_add_successor(ctx->current_block, ctx->continue_block);
2552*61046927SAndroid Build Coastguard Worker 
2553*61046927SAndroid Build Coastguard Worker    /* Pop off */
2554*61046927SAndroid Build Coastguard Worker    ctx->after_block = ctx->break_block;
2555*61046927SAndroid Build Coastguard Worker    ctx->break_block = popped_break;
2556*61046927SAndroid Build Coastguard Worker    ctx->continue_block = popped_continue;
2557*61046927SAndroid Build Coastguard Worker 
2558*61046927SAndroid Build Coastguard Worker    /* Update shader-db stats */
2559*61046927SAndroid Build Coastguard Worker    ++ctx->loop_count;
2560*61046927SAndroid Build Coastguard Worker 
2561*61046927SAndroid Build Coastguard Worker    /* All nested control flow must have finished */
2562*61046927SAndroid Build Coastguard Worker    assert(ctx->loop_nesting == 0);
2563*61046927SAndroid Build Coastguard Worker 
2564*61046927SAndroid Build Coastguard Worker    /* Restore loop nesting (we might be inside an if inside an outer loop) */
2565*61046927SAndroid Build Coastguard Worker    ctx->loop_nesting = pushed_nesting;
2566*61046927SAndroid Build Coastguard Worker    ctx->total_nesting--;
2567*61046927SAndroid Build Coastguard Worker    ctx->loop_continues = old_continues;
2568*61046927SAndroid Build Coastguard Worker }
2569*61046927SAndroid Build Coastguard Worker 
2570*61046927SAndroid Build Coastguard Worker /* Before the first control flow structure, the nesting counter needs to be
2571*61046927SAndroid Build Coastguard Worker  * zeroed for correct operation. This only happens at most once, since by
2572*61046927SAndroid Build Coastguard Worker  * definition this occurs at the end of the first block, which dominates the
2573*61046927SAndroid Build Coastguard Worker  * rest of the program. */
2574*61046927SAndroid Build Coastguard Worker 
2575*61046927SAndroid Build Coastguard Worker static void
emit_first_cf(agx_context * ctx)2576*61046927SAndroid Build Coastguard Worker emit_first_cf(agx_context *ctx)
2577*61046927SAndroid Build Coastguard Worker {
2578*61046927SAndroid Build Coastguard Worker    if (ctx->any_cf)
2579*61046927SAndroid Build Coastguard Worker       return;
2580*61046927SAndroid Build Coastguard Worker 
2581*61046927SAndroid Build Coastguard Worker    agx_builder _b = agx_init_builder(ctx, agx_after_block(ctx->current_block));
2582*61046927SAndroid Build Coastguard Worker    agx_begin_cf(&_b);
2583*61046927SAndroid Build Coastguard Worker    ctx->any_cf = true;
2584*61046927SAndroid Build Coastguard Worker }
2585*61046927SAndroid Build Coastguard Worker 
2586*61046927SAndroid Build Coastguard Worker static agx_block *
emit_cf_list(agx_context * ctx,struct exec_list * list)2587*61046927SAndroid Build Coastguard Worker emit_cf_list(agx_context *ctx, struct exec_list *list)
2588*61046927SAndroid Build Coastguard Worker {
2589*61046927SAndroid Build Coastguard Worker    agx_block *start_block = NULL;
2590*61046927SAndroid Build Coastguard Worker 
2591*61046927SAndroid Build Coastguard Worker    foreach_list_typed(nir_cf_node, node, node, list) {
2592*61046927SAndroid Build Coastguard Worker       switch (node->type) {
2593*61046927SAndroid Build Coastguard Worker       case nir_cf_node_block: {
2594*61046927SAndroid Build Coastguard Worker          agx_block *block = emit_block(ctx, nir_cf_node_as_block(node));
2595*61046927SAndroid Build Coastguard Worker 
2596*61046927SAndroid Build Coastguard Worker          if (!start_block)
2597*61046927SAndroid Build Coastguard Worker             start_block = block;
2598*61046927SAndroid Build Coastguard Worker 
2599*61046927SAndroid Build Coastguard Worker          break;
2600*61046927SAndroid Build Coastguard Worker       }
2601*61046927SAndroid Build Coastguard Worker 
2602*61046927SAndroid Build Coastguard Worker       case nir_cf_node_if:
2603*61046927SAndroid Build Coastguard Worker          emit_first_cf(ctx);
2604*61046927SAndroid Build Coastguard Worker          emit_if(ctx, nir_cf_node_as_if(node));
2605*61046927SAndroid Build Coastguard Worker          break;
2606*61046927SAndroid Build Coastguard Worker 
2607*61046927SAndroid Build Coastguard Worker       case nir_cf_node_loop:
2608*61046927SAndroid Build Coastguard Worker          emit_first_cf(ctx);
2609*61046927SAndroid Build Coastguard Worker          emit_loop(ctx, nir_cf_node_as_loop(node));
2610*61046927SAndroid Build Coastguard Worker          break;
2611*61046927SAndroid Build Coastguard Worker 
2612*61046927SAndroid Build Coastguard Worker       default:
2613*61046927SAndroid Build Coastguard Worker          unreachable("Unknown control flow");
2614*61046927SAndroid Build Coastguard Worker       }
2615*61046927SAndroid Build Coastguard Worker    }
2616*61046927SAndroid Build Coastguard Worker 
2617*61046927SAndroid Build Coastguard Worker    return start_block;
2618*61046927SAndroid Build Coastguard Worker }
2619*61046927SAndroid Build Coastguard Worker 
2620*61046927SAndroid Build Coastguard Worker static void
agx_set_st_vary_final(agx_context * ctx)2621*61046927SAndroid Build Coastguard Worker agx_set_st_vary_final(agx_context *ctx)
2622*61046927SAndroid Build Coastguard Worker {
2623*61046927SAndroid Build Coastguard Worker    agx_foreach_instr_global_rev(ctx, I) {
2624*61046927SAndroid Build Coastguard Worker       if (I->op == AGX_OPCODE_ST_VARY) {
2625*61046927SAndroid Build Coastguard Worker          I->last = true;
2626*61046927SAndroid Build Coastguard Worker          return;
2627*61046927SAndroid Build Coastguard Worker       }
2628*61046927SAndroid Build Coastguard Worker    }
2629*61046927SAndroid Build Coastguard Worker 
2630*61046927SAndroid Build Coastguard Worker    /* If we got here, there was no varying written. We need to mark that. */
2631*61046927SAndroid Build Coastguard Worker    agx_block *last_block = list_last_entry(&ctx->blocks, agx_block, link);
2632*61046927SAndroid Build Coastguard Worker    agx_builder _b = agx_init_builder(ctx, agx_after_block_logical(last_block));
2633*61046927SAndroid Build Coastguard Worker    agx_no_varyings(&_b);
2634*61046927SAndroid Build Coastguard Worker }
2635*61046927SAndroid Build Coastguard Worker 
2636*61046927SAndroid Build Coastguard Worker static int
agx_dump_stats(agx_context * ctx,unsigned size,char ** out)2637*61046927SAndroid Build Coastguard Worker agx_dump_stats(agx_context *ctx, unsigned size, char **out)
2638*61046927SAndroid Build Coastguard Worker {
2639*61046927SAndroid Build Coastguard Worker    unsigned nr_ins = 0, spills = 0, fills = 0;
2640*61046927SAndroid Build Coastguard Worker 
2641*61046927SAndroid Build Coastguard Worker    /* Count instructions */
2642*61046927SAndroid Build Coastguard Worker    agx_foreach_instr_global(ctx, I) {
2643*61046927SAndroid Build Coastguard Worker       nr_ins++;
2644*61046927SAndroid Build Coastguard Worker 
2645*61046927SAndroid Build Coastguard Worker       if (I->op == AGX_OPCODE_STACK_STORE)
2646*61046927SAndroid Build Coastguard Worker          spills++;
2647*61046927SAndroid Build Coastguard Worker       else if (I->op == AGX_OPCODE_STACK_LOAD)
2648*61046927SAndroid Build Coastguard Worker          fills++;
2649*61046927SAndroid Build Coastguard Worker    }
2650*61046927SAndroid Build Coastguard Worker 
2651*61046927SAndroid Build Coastguard Worker    struct agx_cycle_estimate cycles = agx_estimate_cycles(ctx);
2652*61046927SAndroid Build Coastguard Worker 
2653*61046927SAndroid Build Coastguard Worker    unsigned nr_threads =
2654*61046927SAndroid Build Coastguard Worker       agx_occupancy_for_register_count(ctx->max_reg).max_threads;
2655*61046927SAndroid Build Coastguard Worker 
2656*61046927SAndroid Build Coastguard Worker    return asprintf(
2657*61046927SAndroid Build Coastguard Worker       out,
2658*61046927SAndroid Build Coastguard Worker       "%s shader: %u inst, %u alu, %u fscib, %u ic, %u bytes, %u regs, "
2659*61046927SAndroid Build Coastguard Worker       "%u uniforms, %u scratch, %u threads, %u loops, "
2660*61046927SAndroid Build Coastguard Worker       "%u:%u spills:fills",
2661*61046927SAndroid Build Coastguard Worker       gl_shader_stage_name(ctx->stage), nr_ins, cycles.alu, cycles.f_scib,
2662*61046927SAndroid Build Coastguard Worker       cycles.ic, size, ctx->max_reg, ctx->out->push_count, ctx->scratch_size,
2663*61046927SAndroid Build Coastguard Worker       nr_threads, ctx->loop_count, spills, fills);
2664*61046927SAndroid Build Coastguard Worker }
2665*61046927SAndroid Build Coastguard Worker 
2666*61046927SAndroid Build Coastguard Worker static bool
agx_lower_sincos_filter(const nir_instr * instr,UNUSED const void * _)2667*61046927SAndroid Build Coastguard Worker agx_lower_sincos_filter(const nir_instr *instr, UNUSED const void *_)
2668*61046927SAndroid Build Coastguard Worker {
2669*61046927SAndroid Build Coastguard Worker    if (instr->type != nir_instr_type_alu)
2670*61046927SAndroid Build Coastguard Worker       return false;
2671*61046927SAndroid Build Coastguard Worker 
2672*61046927SAndroid Build Coastguard Worker    nir_alu_instr *alu = nir_instr_as_alu(instr);
2673*61046927SAndroid Build Coastguard Worker    return alu->op == nir_op_fsin || alu->op == nir_op_fcos;
2674*61046927SAndroid Build Coastguard Worker }
2675*61046927SAndroid Build Coastguard Worker 
2676*61046927SAndroid Build Coastguard Worker /* Sine and cosine are implemented via the sin_pt_1 and sin_pt_2 opcodes for
2677*61046927SAndroid Build Coastguard Worker  * heavy lifting. sin_pt_2 implements sinc in the first quadrant, expressed in
2678*61046927SAndroid Build Coastguard Worker  * turns (sin (tau x) / x), while sin_pt_1 implements a piecewise sign/offset
2679*61046927SAndroid Build Coastguard Worker  * fixup to transform a quadrant angle [0, 4] to [-1, 1]. The NIR opcode
2680*61046927SAndroid Build Coastguard Worker  * fsin_agx models the fixup, sinc, and multiply to obtain sine, so we just
2681*61046927SAndroid Build Coastguard Worker  * need to change units from radians to quadrants modulo turns. Cosine is
2682*61046927SAndroid Build Coastguard Worker  * implemented by shifting by one quadrant: cos(x) = sin(x + tau/4).
2683*61046927SAndroid Build Coastguard Worker  */
2684*61046927SAndroid Build Coastguard Worker 
2685*61046927SAndroid Build Coastguard Worker static nir_def *
agx_lower_sincos_impl(struct nir_builder * b,nir_instr * instr,UNUSED void * _)2686*61046927SAndroid Build Coastguard Worker agx_lower_sincos_impl(struct nir_builder *b, nir_instr *instr, UNUSED void *_)
2687*61046927SAndroid Build Coastguard Worker {
2688*61046927SAndroid Build Coastguard Worker    nir_alu_instr *alu = nir_instr_as_alu(instr);
2689*61046927SAndroid Build Coastguard Worker    nir_def *x = nir_mov_alu(b, alu->src[0], 1);
2690*61046927SAndroid Build Coastguard Worker    nir_def *turns = nir_fmul_imm(b, x, M_1_PI * 0.5f);
2691*61046927SAndroid Build Coastguard Worker 
2692*61046927SAndroid Build Coastguard Worker    if (alu->op == nir_op_fcos)
2693*61046927SAndroid Build Coastguard Worker       turns = nir_fadd_imm(b, turns, 0.25f);
2694*61046927SAndroid Build Coastguard Worker 
2695*61046927SAndroid Build Coastguard Worker    nir_def *quadrants = nir_fmul_imm(b, nir_ffract(b, turns), 4.0);
2696*61046927SAndroid Build Coastguard Worker    return nir_fsin_agx(b, quadrants);
2697*61046927SAndroid Build Coastguard Worker }
2698*61046927SAndroid Build Coastguard Worker 
2699*61046927SAndroid Build Coastguard Worker static bool
agx_lower_sincos(nir_shader * shader)2700*61046927SAndroid Build Coastguard Worker agx_lower_sincos(nir_shader *shader)
2701*61046927SAndroid Build Coastguard Worker {
2702*61046927SAndroid Build Coastguard Worker    return nir_shader_lower_instructions(shader, agx_lower_sincos_filter,
2703*61046927SAndroid Build Coastguard Worker                                         agx_lower_sincos_impl, NULL);
2704*61046927SAndroid Build Coastguard Worker }
2705*61046927SAndroid Build Coastguard Worker 
2706*61046927SAndroid Build Coastguard Worker static bool
agx_lower_front_face(struct nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)2707*61046927SAndroid Build Coastguard Worker agx_lower_front_face(struct nir_builder *b, nir_intrinsic_instr *intr,
2708*61046927SAndroid Build Coastguard Worker                      UNUSED void *data)
2709*61046927SAndroid Build Coastguard Worker {
2710*61046927SAndroid Build Coastguard Worker    if (intr->intrinsic != nir_intrinsic_load_front_face)
2711*61046927SAndroid Build Coastguard Worker       return false;
2712*61046927SAndroid Build Coastguard Worker 
2713*61046927SAndroid Build Coastguard Worker    nir_def *def = &intr->def;
2714*61046927SAndroid Build Coastguard Worker    assert(def->bit_size == 1);
2715*61046927SAndroid Build Coastguard Worker 
2716*61046927SAndroid Build Coastguard Worker    b->cursor = nir_before_instr(&intr->instr);
2717*61046927SAndroid Build Coastguard Worker    nir_def_rewrite_uses(def, nir_inot(b, nir_load_back_face_agx(b, 1)));
2718*61046927SAndroid Build Coastguard Worker    return true;
2719*61046927SAndroid Build Coastguard Worker }
2720*61046927SAndroid Build Coastguard Worker 
2721*61046927SAndroid Build Coastguard Worker /*
2722*61046927SAndroid Build Coastguard Worker  * Standard NIR optimization loop. This is run in agx_preprocess_nir, then once
2723*61046927SAndroid Build Coastguard Worker  * again at shader variant compile time. Unless there was a complex shader key,
2724*61046927SAndroid Build Coastguard Worker  * the latter run should be almost a no-op.
2725*61046927SAndroid Build Coastguard Worker  */
2726*61046927SAndroid Build Coastguard Worker static void
agx_optimize_loop_nir(nir_shader * nir)2727*61046927SAndroid Build Coastguard Worker agx_optimize_loop_nir(nir_shader *nir)
2728*61046927SAndroid Build Coastguard Worker {
2729*61046927SAndroid Build Coastguard Worker    bool progress;
2730*61046927SAndroid Build Coastguard Worker 
2731*61046927SAndroid Build Coastguard Worker    do {
2732*61046927SAndroid Build Coastguard Worker       progress = false;
2733*61046927SAndroid Build Coastguard Worker 
2734*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_copy_prop);
2735*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_remove_phis);
2736*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_dce);
2737*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_dead_cf);
2738*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_cse);
2739*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
2740*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_phi_precision);
2741*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_algebraic);
2742*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_constant_folding);
2743*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_undef);
2744*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
2745*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_loop_unroll);
2746*61046927SAndroid Build Coastguard Worker    } while (progress);
2747*61046927SAndroid Build Coastguard Worker }
2748*61046927SAndroid Build Coastguard Worker 
2749*61046927SAndroid Build Coastguard Worker static bool
mem_vectorize_cb(unsigned align_mul,unsigned align_offset,unsigned bit_size,unsigned num_components,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)2750*61046927SAndroid Build Coastguard Worker mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size,
2751*61046927SAndroid Build Coastguard Worker                  unsigned num_components, nir_intrinsic_instr *low,
2752*61046927SAndroid Build Coastguard Worker                  nir_intrinsic_instr *high, void *data)
2753*61046927SAndroid Build Coastguard Worker {
2754*61046927SAndroid Build Coastguard Worker    /* Must be aligned to the size of the load */
2755*61046927SAndroid Build Coastguard Worker    unsigned align = nir_combined_align(align_mul, align_offset);
2756*61046927SAndroid Build Coastguard Worker    if ((bit_size / 8) > align)
2757*61046927SAndroid Build Coastguard Worker       return false;
2758*61046927SAndroid Build Coastguard Worker 
2759*61046927SAndroid Build Coastguard Worker    if (num_components > 4)
2760*61046927SAndroid Build Coastguard Worker       return false;
2761*61046927SAndroid Build Coastguard Worker 
2762*61046927SAndroid Build Coastguard Worker    if (bit_size > 32)
2763*61046927SAndroid Build Coastguard Worker       return false;
2764*61046927SAndroid Build Coastguard Worker 
2765*61046927SAndroid Build Coastguard Worker    return true;
2766*61046927SAndroid Build Coastguard Worker }
2767*61046927SAndroid Build Coastguard Worker 
2768*61046927SAndroid Build Coastguard Worker static bool
set_speculate(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * _)2769*61046927SAndroid Build Coastguard Worker set_speculate(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_)
2770*61046927SAndroid Build Coastguard Worker {
2771*61046927SAndroid Build Coastguard Worker    if (!nir_intrinsic_has_access(intr))
2772*61046927SAndroid Build Coastguard Worker       return false;
2773*61046927SAndroid Build Coastguard Worker 
2774*61046927SAndroid Build Coastguard Worker    nir_intrinsic_set_access(intr,
2775*61046927SAndroid Build Coastguard Worker                             ACCESS_CAN_SPECULATE | nir_intrinsic_access(intr));
2776*61046927SAndroid Build Coastguard Worker    return true;
2777*61046927SAndroid Build Coastguard Worker }
2778*61046927SAndroid Build Coastguard Worker 
2779*61046927SAndroid Build Coastguard Worker static void
agx_optimize_nir(nir_shader * nir,bool soft_fault,unsigned * preamble_size)2780*61046927SAndroid Build Coastguard Worker agx_optimize_nir(nir_shader *nir, bool soft_fault, unsigned *preamble_size)
2781*61046927SAndroid Build Coastguard Worker {
2782*61046927SAndroid Build Coastguard Worker    /* This runs only once up front since other optimizations don't affect it */
2783*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_shrink_stores, true);
2784*61046927SAndroid Build Coastguard Worker 
2785*61046927SAndroid Build Coastguard Worker    agx_optimize_loop_nir(nir);
2786*61046927SAndroid Build Coastguard Worker 
2787*61046927SAndroid Build Coastguard Worker    /* If soft fault is enabled, we can freely speculate everything. That lets us
2788*61046927SAndroid Build Coastguard Worker     * peephole select and form preambles more aggressively.
2789*61046927SAndroid Build Coastguard Worker     */
2790*61046927SAndroid Build Coastguard Worker    if (soft_fault) {
2791*61046927SAndroid Build Coastguard Worker       NIR_PASS(_, nir, nir_shader_intrinsics_pass, set_speculate,
2792*61046927SAndroid Build Coastguard Worker                nir_metadata_control_flow, NULL);
2793*61046927SAndroid Build Coastguard Worker    }
2794*61046927SAndroid Build Coastguard Worker 
2795*61046927SAndroid Build Coastguard Worker    /* Peephole select again after setting the speculate flag but before
2796*61046927SAndroid Build Coastguard Worker     * vectorizing. This cleans up short-circuit loads in unrolled loops.
2797*61046927SAndroid Build Coastguard Worker     *
2798*61046927SAndroid Build Coastguard Worker     * XXX: Set indirect_load_ok once we can investigate CTS flakes.
2799*61046927SAndroid Build Coastguard Worker     */
2800*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_peephole_select, 64, false, true);
2801*61046927SAndroid Build Coastguard Worker 
2802*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_load_store_vectorize,
2803*61046927SAndroid Build Coastguard Worker             &(const nir_load_store_vectorize_options){
2804*61046927SAndroid Build Coastguard Worker                .modes = nir_var_mem_global | nir_var_mem_constant,
2805*61046927SAndroid Build Coastguard Worker                .callback = mem_vectorize_cb,
2806*61046927SAndroid Build Coastguard Worker             });
2807*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_pack);
2808*61046927SAndroid Build Coastguard Worker 
2809*61046927SAndroid Build Coastguard Worker    nir_convert_to_lcssa(nir, true, true);
2810*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_divergence_analysis);
2811*61046927SAndroid Build Coastguard Worker    bool progress = false;
2812*61046927SAndroid Build Coastguard Worker 
2813*61046927SAndroid Build Coastguard Worker    static const nir_lower_subgroups_options subgroups_options = {
2814*61046927SAndroid Build Coastguard Worker       .ballot_bit_size = 32,
2815*61046927SAndroid Build Coastguard Worker       .ballot_components = 1,
2816*61046927SAndroid Build Coastguard Worker       .lower_elect = true,
2817*61046927SAndroid Build Coastguard Worker       .lower_subgroup_masks = true,
2818*61046927SAndroid Build Coastguard Worker    };
2819*61046927SAndroid Build Coastguard Worker 
2820*61046927SAndroid Build Coastguard Worker    NIR_PASS(progress, nir, nir_opt_uniform_atomics, true);
2821*61046927SAndroid Build Coastguard Worker    NIR_PASS(progress, nir, nir_opt_uniform_subgroup, &subgroups_options);
2822*61046927SAndroid Build Coastguard Worker 
2823*61046927SAndroid Build Coastguard Worker    /* The above create operations that need lowering/optimizing */
2824*61046927SAndroid Build Coastguard Worker    if (progress) {
2825*61046927SAndroid Build Coastguard Worker       NIR_PASS(_, nir, agx_nir_lower_subgroups);
2826*61046927SAndroid Build Coastguard Worker       NIR_PASS(_, nir, nir_opt_algebraic);
2827*61046927SAndroid Build Coastguard Worker    }
2828*61046927SAndroid Build Coastguard Worker 
2829*61046927SAndroid Build Coastguard Worker    progress = false;
2830*61046927SAndroid Build Coastguard Worker    NIR_PASS(progress, nir, agx_nir_lower_address);
2831*61046927SAndroid Build Coastguard Worker 
2832*61046927SAndroid Build Coastguard Worker    /* If address lowering made progress, clean up before forming preambles.
2833*61046927SAndroid Build Coastguard Worker     * Otherwise the optimized preambles might just be constants! Do it before
2834*61046927SAndroid Build Coastguard Worker     * lowering int64 too, to avoid lowering constant int64 arithmetic.
2835*61046927SAndroid Build Coastguard Worker     */
2836*61046927SAndroid Build Coastguard Worker    if (progress) {
2837*61046927SAndroid Build Coastguard Worker       NIR_PASS(_, nir, nir_opt_constant_folding);
2838*61046927SAndroid Build Coastguard Worker       NIR_PASS(_, nir, nir_opt_dce);
2839*61046927SAndroid Build Coastguard Worker    }
2840*61046927SAndroid Build Coastguard Worker 
2841*61046927SAndroid Build Coastguard Worker    /* Only lower int64 after optimizing address arithmetic, so that u2u64/i2i64
2842*61046927SAndroid Build Coastguard Worker     * conversions remain.
2843*61046927SAndroid Build Coastguard Worker     */
2844*61046927SAndroid Build Coastguard Worker    progress = false;
2845*61046927SAndroid Build Coastguard Worker    NIR_PASS(progress, nir, nir_lower_int64);
2846*61046927SAndroid Build Coastguard Worker 
2847*61046927SAndroid Build Coastguard Worker    /* If we lowered actual int64 arithmetic (not folded into the address
2848*61046927SAndroid Build Coastguard Worker     * calculations), then clean up after the lowering.
2849*61046927SAndroid Build Coastguard Worker     */
2850*61046927SAndroid Build Coastguard Worker    if (progress) {
2851*61046927SAndroid Build Coastguard Worker       do {
2852*61046927SAndroid Build Coastguard Worker          progress = false;
2853*61046927SAndroid Build Coastguard Worker 
2854*61046927SAndroid Build Coastguard Worker          NIR_PASS(progress, nir, nir_opt_algebraic);
2855*61046927SAndroid Build Coastguard Worker          NIR_PASS(progress, nir, nir_opt_constant_folding);
2856*61046927SAndroid Build Coastguard Worker          NIR_PASS(progress, nir, nir_opt_dce);
2857*61046927SAndroid Build Coastguard Worker       } while (progress);
2858*61046927SAndroid Build Coastguard Worker    }
2859*61046927SAndroid Build Coastguard Worker 
2860*61046927SAndroid Build Coastguard Worker    if (preamble_size && (!(agx_compiler_debug & AGX_DBG_NOPREAMBLE)))
2861*61046927SAndroid Build Coastguard Worker       NIR_PASS(_, nir, agx_nir_opt_preamble, preamble_size);
2862*61046927SAndroid Build Coastguard Worker 
2863*61046927SAndroid Build Coastguard Worker    /* Forming preambles may dramatically reduce the instruction count
2864*61046927SAndroid Build Coastguard Worker     * in certain blocks, causing some if-else statements to become
2865*61046927SAndroid Build Coastguard Worker     * trivial. We want to peephole select those, given that control flow
2866*61046927SAndroid Build Coastguard Worker     * prediction instructions are costly.
2867*61046927SAndroid Build Coastguard Worker     *
2868*61046927SAndroid Build Coastguard Worker     * We need to lower int64 again to deal with the resulting 64-bit csels.
2869*61046927SAndroid Build Coastguard Worker     */
2870*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_peephole_select, 64, false, true);
2871*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_int64);
2872*61046927SAndroid Build Coastguard Worker 
2873*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_algebraic_late);
2874*61046927SAndroid Build Coastguard Worker 
2875*61046927SAndroid Build Coastguard Worker    /* Fuse add/sub/multiplies/shifts after running opt_algebraic_late to fuse
2876*61046927SAndroid Build Coastguard Worker     * isub but before shifts are lowered.
2877*61046927SAndroid Build Coastguard Worker     */
2878*61046927SAndroid Build Coastguard Worker    do {
2879*61046927SAndroid Build Coastguard Worker       progress = false;
2880*61046927SAndroid Build Coastguard Worker 
2881*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_dce);
2882*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_cse);
2883*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, agx_nir_fuse_algebraic_late);
2884*61046927SAndroid Build Coastguard Worker    } while (progress);
2885*61046927SAndroid Build Coastguard Worker 
2886*61046927SAndroid Build Coastguard Worker    /* Do remaining lowering late, since this inserts &s for shifts so we want to
2887*61046927SAndroid Build Coastguard Worker     * do it after fusing constant shifts. Constant folding will clean up.
2888*61046927SAndroid Build Coastguard Worker     */
2889*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, agx_nir_lower_algebraic_late);
2890*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, agx_nir_fuse_selects);
2891*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_constant_folding);
2892*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_combine_barriers, NULL, NULL);
2893*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_copy_prop);
2894*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_dce);
2895*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_cse);
2896*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL);
2897*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
2898*61046927SAndroid Build Coastguard Worker 
2899*61046927SAndroid Build Coastguard Worker    /* Cleanup optimizations */
2900*61046927SAndroid Build Coastguard Worker    nir_move_options move_all = nir_move_const_undef | nir_move_load_ubo |
2901*61046927SAndroid Build Coastguard Worker                                nir_move_load_input | nir_move_comparisons |
2902*61046927SAndroid Build Coastguard Worker                                nir_move_copies | nir_move_load_ssbo |
2903*61046927SAndroid Build Coastguard Worker                                nir_move_alu;
2904*61046927SAndroid Build Coastguard Worker 
2905*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_sink, move_all);
2906*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_move, move_all);
2907*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_phis_to_scalar, true);
2908*61046927SAndroid Build Coastguard Worker }
2909*61046927SAndroid Build Coastguard Worker 
2910*61046927SAndroid Build Coastguard Worker /*
2911*61046927SAndroid Build Coastguard Worker  * Varyings that are used as texture coordinates should be kept at fp32, because
2912*61046927SAndroid Build Coastguard Worker  * fp16 does not have enough precision for large textures. It's technically
2913*61046927SAndroid Build Coastguard Worker  * conformant not to, but every app gets this wrong.
2914*61046927SAndroid Build Coastguard Worker  */
2915*61046927SAndroid Build Coastguard Worker static bool
gather_texcoords(nir_builder * b,nir_instr * instr,void * data)2916*61046927SAndroid Build Coastguard Worker gather_texcoords(nir_builder *b, nir_instr *instr, void *data)
2917*61046927SAndroid Build Coastguard Worker {
2918*61046927SAndroid Build Coastguard Worker    uint64_t *mask = data;
2919*61046927SAndroid Build Coastguard Worker 
2920*61046927SAndroid Build Coastguard Worker    if (instr->type != nir_instr_type_tex)
2921*61046927SAndroid Build Coastguard Worker       return false;
2922*61046927SAndroid Build Coastguard Worker 
2923*61046927SAndroid Build Coastguard Worker    nir_tex_instr *tex = nir_instr_as_tex(instr);
2924*61046927SAndroid Build Coastguard Worker 
2925*61046927SAndroid Build Coastguard Worker    int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
2926*61046927SAndroid Build Coastguard Worker    if (coord_idx < 0)
2927*61046927SAndroid Build Coastguard Worker       return false;
2928*61046927SAndroid Build Coastguard Worker 
2929*61046927SAndroid Build Coastguard Worker    nir_src src = tex->src[coord_idx].src;
2930*61046927SAndroid Build Coastguard Worker    nir_scalar x = nir_scalar_resolved(src.ssa, 0);
2931*61046927SAndroid Build Coastguard Worker    nir_scalar y = nir_scalar_resolved(src.ssa, 1);
2932*61046927SAndroid Build Coastguard Worker 
2933*61046927SAndroid Build Coastguard Worker    if (x.def != y.def)
2934*61046927SAndroid Build Coastguard Worker       return false;
2935*61046927SAndroid Build Coastguard Worker 
2936*61046927SAndroid Build Coastguard Worker    nir_instr *parent = x.def->parent_instr;
2937*61046927SAndroid Build Coastguard Worker 
2938*61046927SAndroid Build Coastguard Worker    if (parent->type != nir_instr_type_intrinsic)
2939*61046927SAndroid Build Coastguard Worker       return false;
2940*61046927SAndroid Build Coastguard Worker 
2941*61046927SAndroid Build Coastguard Worker    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
2942*61046927SAndroid Build Coastguard Worker 
2943*61046927SAndroid Build Coastguard Worker    if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
2944*61046927SAndroid Build Coastguard Worker       return false;
2945*61046927SAndroid Build Coastguard Worker 
2946*61046927SAndroid Build Coastguard Worker    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
2947*61046927SAndroid Build Coastguard Worker    *mask |= BITFIELD64_BIT(sem.location);
2948*61046927SAndroid Build Coastguard Worker    return false;
2949*61046927SAndroid Build Coastguard Worker }
2950*61046927SAndroid Build Coastguard Worker 
2951*61046927SAndroid Build Coastguard Worker static bool
gather_interp(nir_builder * b,nir_intrinsic_instr * intr,void * data)2952*61046927SAndroid Build Coastguard Worker gather_interp(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2953*61046927SAndroid Build Coastguard Worker {
2954*61046927SAndroid Build Coastguard Worker    struct agx_interp_info *masks = data;
2955*61046927SAndroid Build Coastguard Worker 
2956*61046927SAndroid Build Coastguard Worker    if (intr->intrinsic == nir_intrinsic_load_input) {
2957*61046927SAndroid Build Coastguard Worker       nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
2958*61046927SAndroid Build Coastguard Worker       masks->flat |= BITFIELD64_RANGE(sem.location, sem.num_slots);
2959*61046927SAndroid Build Coastguard Worker    } else if (intr->intrinsic == nir_intrinsic_load_interpolated_input &&
2960*61046927SAndroid Build Coastguard Worker               nir_intrinsic_interp_mode(nir_src_as_intrinsic(intr->src[0])) ==
2961*61046927SAndroid Build Coastguard Worker                  INTERP_MODE_NOPERSPECTIVE) {
2962*61046927SAndroid Build Coastguard Worker       nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
2963*61046927SAndroid Build Coastguard Worker       masks->linear |= BITFIELD64_RANGE(sem.location, sem.num_slots);
2964*61046927SAndroid Build Coastguard Worker    }
2965*61046927SAndroid Build Coastguard Worker 
2966*61046927SAndroid Build Coastguard Worker    return false;
2967*61046927SAndroid Build Coastguard Worker }
2968*61046927SAndroid Build Coastguard Worker 
2969*61046927SAndroid Build Coastguard Worker /*
2970*61046927SAndroid Build Coastguard Worker  * Build a bit mask of varyings (by location) that are flatshaded and linear
2971*61046927SAndroid Build Coastguard Worker  * shaded. This information is needed by the driver.
2972*61046927SAndroid Build Coastguard Worker  */
2973*61046927SAndroid Build Coastguard Worker struct agx_interp_info
agx_gather_interp_info(nir_shader * nir)2974*61046927SAndroid Build Coastguard Worker agx_gather_interp_info(nir_shader *nir)
2975*61046927SAndroid Build Coastguard Worker {
2976*61046927SAndroid Build Coastguard Worker    assert(nir->info.stage == MESA_SHADER_FRAGMENT);
2977*61046927SAndroid Build Coastguard Worker 
2978*61046927SAndroid Build Coastguard Worker    struct agx_interp_info masks = {0};
2979*61046927SAndroid Build Coastguard Worker    nir_shader_intrinsics_pass(nir, gather_interp, nir_metadata_all, &masks);
2980*61046927SAndroid Build Coastguard Worker    return masks;
2981*61046927SAndroid Build Coastguard Worker }
2982*61046927SAndroid Build Coastguard Worker 
2983*61046927SAndroid Build Coastguard Worker /*
2984*61046927SAndroid Build Coastguard Worker  * Build a bit mask of varyings (by location) that are used as texture
2985*61046927SAndroid Build Coastguard Worker  * coordinates. This information is needed by lower_mediump_io.
2986*61046927SAndroid Build Coastguard Worker  */
2987*61046927SAndroid Build Coastguard Worker uint64_t
agx_gather_texcoords(nir_shader * nir)2988*61046927SAndroid Build Coastguard Worker agx_gather_texcoords(nir_shader *nir)
2989*61046927SAndroid Build Coastguard Worker {
2990*61046927SAndroid Build Coastguard Worker    assert(nir->info.stage == MESA_SHADER_FRAGMENT);
2991*61046927SAndroid Build Coastguard Worker 
2992*61046927SAndroid Build Coastguard Worker    uint64_t mask = 0;
2993*61046927SAndroid Build Coastguard Worker    nir_shader_instructions_pass(nir, gather_texcoords, nir_metadata_all, &mask);
2994*61046927SAndroid Build Coastguard Worker    return mask;
2995*61046927SAndroid Build Coastguard Worker }
2996*61046927SAndroid Build Coastguard Worker 
2997*61046927SAndroid Build Coastguard Worker static nir_mem_access_size_align
mem_access_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,const void * cb_data)2998*61046927SAndroid Build Coastguard Worker mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
2999*61046927SAndroid Build Coastguard Worker                          uint8_t bit_size, uint32_t align,
3000*61046927SAndroid Build Coastguard Worker                          uint32_t align_offset, bool offset_is_const,
3001*61046927SAndroid Build Coastguard Worker                          const void *cb_data)
3002*61046927SAndroid Build Coastguard Worker {
3003*61046927SAndroid Build Coastguard Worker    align = nir_combined_align(align, align_offset);
3004*61046927SAndroid Build Coastguard Worker 
3005*61046927SAndroid Build Coastguard Worker    assert(util_is_power_of_two_nonzero(align));
3006*61046927SAndroid Build Coastguard Worker 
3007*61046927SAndroid Build Coastguard Worker    if ((bytes & 1) || (align == 1))
3008*61046927SAndroid Build Coastguard Worker       bit_size = 8;
3009*61046927SAndroid Build Coastguard Worker    else if ((bytes & 2) || (align == 2))
3010*61046927SAndroid Build Coastguard Worker       bit_size = 16;
3011*61046927SAndroid Build Coastguard Worker    else if (bit_size >= 32)
3012*61046927SAndroid Build Coastguard Worker       bit_size = 32;
3013*61046927SAndroid Build Coastguard Worker 
3014*61046927SAndroid Build Coastguard Worker    return (nir_mem_access_size_align){
3015*61046927SAndroid Build Coastguard Worker       .num_components = MIN2(bytes / (bit_size / 8), 4),
3016*61046927SAndroid Build Coastguard Worker       .bit_size = bit_size,
3017*61046927SAndroid Build Coastguard Worker       .align = bit_size / 8,
3018*61046927SAndroid Build Coastguard Worker    };
3019*61046927SAndroid Build Coastguard Worker }
3020*61046927SAndroid Build Coastguard Worker 
3021*61046927SAndroid Build Coastguard Worker static unsigned
lower_bit_size_callback(const nir_instr * instr,UNUSED void * _)3022*61046927SAndroid Build Coastguard Worker lower_bit_size_callback(const nir_instr *instr, UNUSED void *_)
3023*61046927SAndroid Build Coastguard Worker {
3024*61046927SAndroid Build Coastguard Worker    if (instr->type == nir_instr_type_intrinsic) {
3025*61046927SAndroid Build Coastguard Worker       /* Handle small subgroup ops */
3026*61046927SAndroid Build Coastguard Worker       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3027*61046927SAndroid Build Coastguard Worker 
3028*61046927SAndroid Build Coastguard Worker       switch (intr->intrinsic) {
3029*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_reduce:
3030*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_exclusive_scan:
3031*61046927SAndroid Build Coastguard Worker       case nir_intrinsic_inclusive_scan:
3032*61046927SAndroid Build Coastguard Worker          /* The identity for iand doesn't work for lowered 1-bit booleans, so
3033*61046927SAndroid Build Coastguard Worker           * lower that explicitly.
3034*61046927SAndroid Build Coastguard Worker           */
3035*61046927SAndroid Build Coastguard Worker          if (nir_intrinsic_reduction_op(intr) == nir_op_iand &&
3036*61046927SAndroid Build Coastguard Worker              intr->def.bit_size == 1)
3037*61046927SAndroid Build Coastguard Worker             return 16;
3038*61046927SAndroid Build Coastguard Worker 
3039*61046927SAndroid Build Coastguard Worker          /* In general, we have 16-bit ops instead of 8-bit, so lower those. */
3040*61046927SAndroid Build Coastguard Worker          return intr->def.bit_size == 8 ? 16 : 0;
3041*61046927SAndroid Build Coastguard Worker       default:
3042*61046927SAndroid Build Coastguard Worker          return 0;
3043*61046927SAndroid Build Coastguard Worker       }
3044*61046927SAndroid Build Coastguard Worker    } else if (instr->type == nir_instr_type_alu) {
3045*61046927SAndroid Build Coastguard Worker       /* Lower 8-bit ALU to 16-bit. We check the destination, as we do not want
3046*61046927SAndroid Build Coastguard Worker        * to lower conversions from 8-bit to larger types. Those conversions get
3047*61046927SAndroid Build Coastguard Worker        * implemented natively.
3048*61046927SAndroid Build Coastguard Worker        */
3049*61046927SAndroid Build Coastguard Worker       nir_alu_instr *alu = nir_instr_as_alu(instr);
3050*61046927SAndroid Build Coastguard Worker       if (alu->def.bit_size == 8 && !is_conversion_to_8bit(alu->op))
3051*61046927SAndroid Build Coastguard Worker          return 16;
3052*61046927SAndroid Build Coastguard Worker       else if (alu->def.bit_size == 1 && alu->src[0].src.ssa->bit_size == 8)
3053*61046927SAndroid Build Coastguard Worker          return 16 /* comparisons */;
3054*61046927SAndroid Build Coastguard Worker    }
3055*61046927SAndroid Build Coastguard Worker 
3056*61046927SAndroid Build Coastguard Worker    return 0;
3057*61046927SAndroid Build Coastguard Worker }
3058*61046927SAndroid Build Coastguard Worker 
3059*61046927SAndroid Build Coastguard Worker static bool
lower_load_from_texture_handle(nir_builder * b,nir_intrinsic_instr * intr,void * data)3060*61046927SAndroid Build Coastguard Worker lower_load_from_texture_handle(nir_builder *b, nir_intrinsic_instr *intr,
3061*61046927SAndroid Build Coastguard Worker                                void *data)
3062*61046927SAndroid Build Coastguard Worker {
3063*61046927SAndroid Build Coastguard Worker    if (intr->intrinsic != nir_intrinsic_load_from_texture_handle_agx)
3064*61046927SAndroid Build Coastguard Worker       return false;
3065*61046927SAndroid Build Coastguard Worker 
3066*61046927SAndroid Build Coastguard Worker    /* Bindless handles are a vec2, where the first source is the (constant)
3067*61046927SAndroid Build Coastguard Worker     * uniform register number and the second source is the byte offset.
3068*61046927SAndroid Build Coastguard Worker     */
3069*61046927SAndroid Build Coastguard Worker    nir_scalar uniform = nir_scalar_resolved(intr->src[0].ssa, 0);
3070*61046927SAndroid Build Coastguard Worker    unsigned uniform_idx = nir_scalar_as_uint(uniform);
3071*61046927SAndroid Build Coastguard Worker 
3072*61046927SAndroid Build Coastguard Worker    b->cursor = nir_instr_remove(&intr->instr);
3073*61046927SAndroid Build Coastguard Worker    nir_def *base = nir_load_preamble(b, 1, 64, uniform_idx);
3074*61046927SAndroid Build Coastguard Worker    nir_def *offset = nir_u2u64(b, nir_channel(b, intr->src[0].ssa, 1));
3075*61046927SAndroid Build Coastguard Worker 
3076*61046927SAndroid Build Coastguard Worker    nir_def_rewrite_uses(&intr->def, nir_iadd(b, base, offset));
3077*61046927SAndroid Build Coastguard Worker    return true;
3078*61046927SAndroid Build Coastguard Worker }
3079*61046927SAndroid Build Coastguard Worker 
3080*61046927SAndroid Build Coastguard Worker static void
agx_remove_unreachable_block(agx_block * block)3081*61046927SAndroid Build Coastguard Worker agx_remove_unreachable_block(agx_block *block)
3082*61046927SAndroid Build Coastguard Worker {
3083*61046927SAndroid Build Coastguard Worker    /* Delete the edges */
3084*61046927SAndroid Build Coastguard Worker    agx_foreach_successor(block, succ) {
3085*61046927SAndroid Build Coastguard Worker       unsigned block_idx = agx_predecessor_index(succ, block);
3086*61046927SAndroid Build Coastguard Worker 
3087*61046927SAndroid Build Coastguard Worker       /* Remove the corresponding predecessor from the successor */
3088*61046927SAndroid Build Coastguard Worker       struct util_dynarray *blocks = &succ->predecessors;
3089*61046927SAndroid Build Coastguard Worker       int remaining = agx_num_predecessors(succ) - (block_idx + 1);
3090*61046927SAndroid Build Coastguard Worker       assert(remaining >= 0);
3091*61046927SAndroid Build Coastguard Worker 
3092*61046927SAndroid Build Coastguard Worker       memcpy(util_dynarray_element(blocks, agx_block *, block_idx),
3093*61046927SAndroid Build Coastguard Worker              util_dynarray_element(blocks, agx_block *, block_idx + 1),
3094*61046927SAndroid Build Coastguard Worker              remaining * sizeof(agx_block *));
3095*61046927SAndroid Build Coastguard Worker       blocks->size -= sizeof(agx_block *);
3096*61046927SAndroid Build Coastguard Worker 
3097*61046927SAndroid Build Coastguard Worker       /* Remove the corresponding source from the phis */
3098*61046927SAndroid Build Coastguard Worker       agx_foreach_phi_in_block(succ, phi) {
3099*61046927SAndroid Build Coastguard Worker          assert(block_idx + 1 <= phi->nr_srcs);
3100*61046927SAndroid Build Coastguard Worker 
3101*61046927SAndroid Build Coastguard Worker          memcpy(phi->src + block_idx, phi->src + block_idx + 1,
3102*61046927SAndroid Build Coastguard Worker                 (phi->nr_srcs - (block_idx + 1)) * sizeof(phi->src[0]));
3103*61046927SAndroid Build Coastguard Worker 
3104*61046927SAndroid Build Coastguard Worker          phi->nr_srcs--;
3105*61046927SAndroid Build Coastguard Worker 
3106*61046927SAndroid Build Coastguard Worker          /* This might cause phis to become trivial. Lower 1-source phis to
3107*61046927SAndroid Build Coastguard Worker           * moves and let copyprop take it from here.
3108*61046927SAndroid Build Coastguard Worker           */
3109*61046927SAndroid Build Coastguard Worker          if (phi->nr_srcs == 1) {
3110*61046927SAndroid Build Coastguard Worker             phi->op = AGX_OPCODE_MOV;
3111*61046927SAndroid Build Coastguard Worker          }
3112*61046927SAndroid Build Coastguard Worker       }
3113*61046927SAndroid Build Coastguard Worker    }
3114*61046927SAndroid Build Coastguard Worker 
3115*61046927SAndroid Build Coastguard Worker    /* Remove the successor from the predecessor. */
3116*61046927SAndroid Build Coastguard Worker    block->successors[0] = NULL;
3117*61046927SAndroid Build Coastguard Worker    block->successors[1] = NULL;
3118*61046927SAndroid Build Coastguard Worker 
3119*61046927SAndroid Build Coastguard Worker    /* Note: we do not remove the block itself, although it is now fully orphaned
3120*61046927SAndroid Build Coastguard Worker     * in the control flow graph. We still need it in source order if it has any
3121*61046927SAndroid Build Coastguard Worker     * pop_exec instructions, for a loop continue block.
3122*61046927SAndroid Build Coastguard Worker     *
3123*61046927SAndroid Build Coastguard Worker     * TODO: Is there a better way to handle this?
3124*61046927SAndroid Build Coastguard Worker     *
3125*61046927SAndroid Build Coastguard Worker     * Affects: dEQP-VK.graphicsfuzz.cov-matching-if-always-true-inside-loop
3126*61046927SAndroid Build Coastguard Worker     */
3127*61046927SAndroid Build Coastguard Worker }
3128*61046927SAndroid Build Coastguard Worker 
3129*61046927SAndroid Build Coastguard Worker /*
3130*61046927SAndroid Build Coastguard Worker  * NIR sometimes contains unreachable blocks (e.g. due to infinite loops). These
3131*61046927SAndroid Build Coastguard Worker  * blocks have no predecessors, but do have successors and can contribute to
3132*61046927SAndroid Build Coastguard Worker  * phis. They are dead and do not need to be here. Further, they violate the IR
3133*61046927SAndroid Build Coastguard Worker  * invariant:
3134*61046927SAndroid Build Coastguard Worker  *
3135*61046927SAndroid Build Coastguard Worker  *    Live-in sources are live-out in all predecessors.
3136*61046927SAndroid Build Coastguard Worker  *
3137*61046927SAndroid Build Coastguard Worker  * ...which RA depends on when handling live range splits. The simplest solution
3138*61046927SAndroid Build Coastguard Worker  * is to simply delete these dead blocks. Fortunately, because they are
3139*61046927SAndroid Build Coastguard Worker  * unreachable, this does not have any ill effects. Notably, this cannot
3140*61046927SAndroid Build Coastguard Worker  * introduce critical edges.
3141*61046927SAndroid Build Coastguard Worker  *
3142*61046927SAndroid Build Coastguard Worker  * Deleting a block may cause a successor to become unreachable, so we use a
3143*61046927SAndroid Build Coastguard Worker  * fixed-point algorithm to converge.
3144*61046927SAndroid Build Coastguard Worker  */
3145*61046927SAndroid Build Coastguard Worker static void
agx_remove_unreachable_blocks(agx_context * ctx)3146*61046927SAndroid Build Coastguard Worker agx_remove_unreachable_blocks(agx_context *ctx)
3147*61046927SAndroid Build Coastguard Worker {
3148*61046927SAndroid Build Coastguard Worker    agx_block *start = agx_start_block(ctx);
3149*61046927SAndroid Build Coastguard Worker    bool progress;
3150*61046927SAndroid Build Coastguard Worker 
3151*61046927SAndroid Build Coastguard Worker    do {
3152*61046927SAndroid Build Coastguard Worker       progress = false;
3153*61046927SAndroid Build Coastguard Worker 
3154*61046927SAndroid Build Coastguard Worker       agx_foreach_block_safe(ctx, pred) {
3155*61046927SAndroid Build Coastguard Worker          if (pred != start && agx_num_predecessors(pred) == 0 &&
3156*61046927SAndroid Build Coastguard Worker              agx_num_successors(pred) > 0) {
3157*61046927SAndroid Build Coastguard Worker 
3158*61046927SAndroid Build Coastguard Worker             agx_remove_unreachable_block(pred);
3159*61046927SAndroid Build Coastguard Worker             progress = true;
3160*61046927SAndroid Build Coastguard Worker          }
3161*61046927SAndroid Build Coastguard Worker       }
3162*61046927SAndroid Build Coastguard Worker    } while (progress);
3163*61046927SAndroid Build Coastguard Worker }
3164*61046927SAndroid Build Coastguard Worker 
3165*61046927SAndroid Build Coastguard Worker static bool
agx_should_dump(nir_shader * nir,unsigned agx_dbg_bit)3166*61046927SAndroid Build Coastguard Worker agx_should_dump(nir_shader *nir, unsigned agx_dbg_bit)
3167*61046927SAndroid Build Coastguard Worker {
3168*61046927SAndroid Build Coastguard Worker    return (agx_compiler_debug & agx_dbg_bit) &&
3169*61046927SAndroid Build Coastguard Worker           !(nir->info.internal && !(agx_compiler_debug & AGX_DBG_INTERNAL));
3170*61046927SAndroid Build Coastguard Worker }
3171*61046927SAndroid Build Coastguard Worker 
3172*61046927SAndroid Build Coastguard Worker static unsigned
agx_compile_function_nir(nir_shader * nir,nir_function_impl * impl,struct agx_shader_key * key,struct util_debug_callback * debug,struct util_dynarray * binary,struct agx_shader_info * out)3173*61046927SAndroid Build Coastguard Worker agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
3174*61046927SAndroid Build Coastguard Worker                          struct agx_shader_key *key,
3175*61046927SAndroid Build Coastguard Worker                          struct util_debug_callback *debug,
3176*61046927SAndroid Build Coastguard Worker                          struct util_dynarray *binary,
3177*61046927SAndroid Build Coastguard Worker                          struct agx_shader_info *out)
3178*61046927SAndroid Build Coastguard Worker {
3179*61046927SAndroid Build Coastguard Worker    nir_index_blocks(impl);
3180*61046927SAndroid Build Coastguard Worker    nir_index_ssa_defs(impl);
3181*61046927SAndroid Build Coastguard Worker 
3182*61046927SAndroid Build Coastguard Worker    agx_context *ctx = rzalloc(NULL, agx_context);
3183*61046927SAndroid Build Coastguard Worker    ctx->nir = nir;
3184*61046927SAndroid Build Coastguard Worker    ctx->is_preamble = impl->function->is_preamble;
3185*61046927SAndroid Build Coastguard Worker    ctx->out = out;
3186*61046927SAndroid Build Coastguard Worker    ctx->key = key;
3187*61046927SAndroid Build Coastguard Worker    ctx->stage = nir->info.stage;
3188*61046927SAndroid Build Coastguard Worker    ctx->allocated_vec = _mesa_hash_table_u64_create(ctx);
3189*61046927SAndroid Build Coastguard Worker    ctx->indexed_nir_blocks = rzalloc_array(ctx, agx_block *, impl->num_blocks);
3190*61046927SAndroid Build Coastguard Worker    list_inithead(&ctx->blocks);
3191*61046927SAndroid Build Coastguard Worker 
3192*61046927SAndroid Build Coastguard Worker    if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_preamble) {
3193*61046927SAndroid Build Coastguard Worker       ctx->any_cf = key->fs.inside_sample_loop;
3194*61046927SAndroid Build Coastguard Worker    }
3195*61046927SAndroid Build Coastguard Worker 
3196*61046927SAndroid Build Coastguard Worker    ctx->alloc = impl->ssa_alloc;
3197*61046927SAndroid Build Coastguard Worker    emit_cf_list(ctx, &impl->body);
3198*61046927SAndroid Build Coastguard Worker    agx_emit_phis_deferred(ctx);
3199*61046927SAndroid Build Coastguard Worker 
3200*61046927SAndroid Build Coastguard Worker    /* Index blocks now that we're done emitting so the order is consistent. Do
3201*61046927SAndroid Build Coastguard Worker     * this before agx_remove_unreachable_blocks so we match NIR indexing. This
3202*61046927SAndroid Build Coastguard Worker     * makes for easier debugging.
3203*61046927SAndroid Build Coastguard Worker     */
3204*61046927SAndroid Build Coastguard Worker    agx_foreach_block(ctx, block) {
3205*61046927SAndroid Build Coastguard Worker       block->index = ctx->num_blocks++;
3206*61046927SAndroid Build Coastguard Worker    }
3207*61046927SAndroid Build Coastguard Worker 
3208*61046927SAndroid Build Coastguard Worker    agx_remove_unreachable_blocks(ctx);
3209*61046927SAndroid Build Coastguard Worker 
3210*61046927SAndroid Build Coastguard Worker    /* Only allocate scratch if it's statically used, regardless of if the NIR
3211*61046927SAndroid Build Coastguard Worker     * info claims otherwise.
3212*61046927SAndroid Build Coastguard Worker     */
3213*61046927SAndroid Build Coastguard Worker    if (ctx->any_scratch) {
3214*61046927SAndroid Build Coastguard Worker       assert(!ctx->is_preamble && "preambles don't use scratch");
3215*61046927SAndroid Build Coastguard Worker       ctx->scratch_size = ALIGN(nir->scratch_size, 16);
3216*61046927SAndroid Build Coastguard Worker    }
3217*61046927SAndroid Build Coastguard Worker 
3218*61046927SAndroid Build Coastguard Worker    /* Stop the main shader or preamble shader after the exit block. For real
3219*61046927SAndroid Build Coastguard Worker     * functions, we would return here.
3220*61046927SAndroid Build Coastguard Worker     */
3221*61046927SAndroid Build Coastguard Worker    if (!ctx->key->no_stop || ctx->is_preamble) {
3222*61046927SAndroid Build Coastguard Worker       agx_block *last_block = list_last_entry(&ctx->blocks, agx_block, link);
3223*61046927SAndroid Build Coastguard Worker       agx_builder _b = agx_init_builder(ctx, agx_after_block(last_block));
3224*61046927SAndroid Build Coastguard Worker       agx_stop(&_b);
3225*61046927SAndroid Build Coastguard Worker    }
3226*61046927SAndroid Build Coastguard Worker 
3227*61046927SAndroid Build Coastguard Worker    agx_validate(ctx, "IR translation");
3228*61046927SAndroid Build Coastguard Worker 
3229*61046927SAndroid Build Coastguard Worker    if (likely(!(agx_compiler_debug & AGX_DBG_NOOPT))) {
3230*61046927SAndroid Build Coastguard Worker       /* Eliminate dead instructions before CSE to avoid silly scheduling */
3231*61046927SAndroid Build Coastguard Worker       agx_dce(ctx, false);
3232*61046927SAndroid Build Coastguard Worker 
3233*61046927SAndroid Build Coastguard Worker       /* CSE before eliminating dead destinations so that subdivision is
3234*61046927SAndroid Build Coastguard Worker        * optimized properly.
3235*61046927SAndroid Build Coastguard Worker        */
3236*61046927SAndroid Build Coastguard Worker       agx_opt_cse(ctx);
3237*61046927SAndroid Build Coastguard Worker 
3238*61046927SAndroid Build Coastguard Worker       /* After DCE, use counts are right so we can run the optimizer. */
3239*61046927SAndroid Build Coastguard Worker       agx_optimizer(ctx);
3240*61046927SAndroid Build Coastguard Worker       agx_opt_compact_constants(ctx);
3241*61046927SAndroid Build Coastguard Worker 
3242*61046927SAndroid Build Coastguard Worker       /* After inlining constants, promote what's left */
3243*61046927SAndroid Build Coastguard Worker       if (key->promote_constants && !key->secondary &&
3244*61046927SAndroid Build Coastguard Worker           !(agx_compiler_debug & AGX_DBG_NOPROMOTE)) {
3245*61046927SAndroid Build Coastguard Worker          agx_opt_promote_constants(ctx);
3246*61046927SAndroid Build Coastguard Worker       }
3247*61046927SAndroid Build Coastguard Worker    }
3248*61046927SAndroid Build Coastguard Worker 
3249*61046927SAndroid Build Coastguard Worker    /* For correctness, lower uniform sources after copyprop (for correctness,
3250*61046927SAndroid Build Coastguard Worker     * as copyprop creates uniform sources). To keep register pressure in
3251*61046927SAndroid Build Coastguard Worker     * check, lower after CSE, since moves are cheaper than registers.
3252*61046927SAndroid Build Coastguard Worker     */
3253*61046927SAndroid Build Coastguard Worker    agx_lower_uniform_sources(ctx);
3254*61046927SAndroid Build Coastguard Worker 
3255*61046927SAndroid Build Coastguard Worker    /* RA correctness depends on DCE */
3256*61046927SAndroid Build Coastguard Worker    agx_dce(ctx, true);
3257*61046927SAndroid Build Coastguard Worker    agx_validate(ctx, "Pre-RA passes");
3258*61046927SAndroid Build Coastguard Worker 
3259*61046927SAndroid Build Coastguard Worker    if (agx_should_dump(nir, AGX_DBG_SHADERS))
3260*61046927SAndroid Build Coastguard Worker       agx_print_shader(ctx, stdout);
3261*61046927SAndroid Build Coastguard Worker 
3262*61046927SAndroid Build Coastguard Worker    if (likely(!(agx_compiler_debug & AGX_DBG_NOSCHED))) {
3263*61046927SAndroid Build Coastguard Worker       agx_pressure_schedule(ctx);
3264*61046927SAndroid Build Coastguard Worker       agx_validate(ctx, "Pre-RA scheduler");
3265*61046927SAndroid Build Coastguard Worker    }
3266*61046927SAndroid Build Coastguard Worker 
3267*61046927SAndroid Build Coastguard Worker    if (agx_should_dump(nir, AGX_DBG_SHADERS))
3268*61046927SAndroid Build Coastguard Worker       agx_print_shader(ctx, stdout);
3269*61046927SAndroid Build Coastguard Worker 
3270*61046927SAndroid Build Coastguard Worker    agx_ra(ctx);
3271*61046927SAndroid Build Coastguard Worker    agx_validate(ctx, "RA");
3272*61046927SAndroid Build Coastguard Worker    agx_lower_64bit_postra(ctx);
3273*61046927SAndroid Build Coastguard Worker 
3274*61046927SAndroid Build Coastguard Worker    if (ctx->scratch_size > 0) {
3275*61046927SAndroid Build Coastguard Worker       /* Apple always allocate 40 more bytes in the entrypoint and align to 4. */
3276*61046927SAndroid Build Coastguard Worker       uint64_t stack_size = ALIGN(DIV_ROUND_UP(ctx->scratch_size, 4) + 10, 4);
3277*61046927SAndroid Build Coastguard Worker 
3278*61046927SAndroid Build Coastguard Worker       assert(stack_size < INT16_MAX);
3279*61046927SAndroid Build Coastguard Worker 
3280*61046927SAndroid Build Coastguard Worker       agx_block *start_block = agx_start_block(ctx);
3281*61046927SAndroid Build Coastguard Worker       agx_builder _b = agx_init_builder(ctx, agx_before_block(start_block));
3282*61046927SAndroid Build Coastguard Worker       agx_stack_adjust(&_b, stack_size);
3283*61046927SAndroid Build Coastguard Worker 
3284*61046927SAndroid Build Coastguard Worker       /* If we're going to execute multiple times, make sure we clean up after
3285*61046927SAndroid Build Coastguard Worker        * ourselves, else the hardware faults.
3286*61046927SAndroid Build Coastguard Worker        */
3287*61046927SAndroid Build Coastguard Worker       if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->is_preamble &&
3288*61046927SAndroid Build Coastguard Worker           ctx->key->fs.inside_sample_loop) {
3289*61046927SAndroid Build Coastguard Worker 
3290*61046927SAndroid Build Coastguard Worker          _b = agx_init_builder(ctx, agx_after_block(agx_end_block(ctx)));
3291*61046927SAndroid Build Coastguard Worker          agx_stack_adjust(&_b, -stack_size);
3292*61046927SAndroid Build Coastguard Worker       }
3293*61046927SAndroid Build Coastguard Worker 
3294*61046927SAndroid Build Coastguard Worker       if (ctx->is_preamble)
3295*61046927SAndroid Build Coastguard Worker          out->preamble_scratch_size = stack_size;
3296*61046927SAndroid Build Coastguard Worker       else
3297*61046927SAndroid Build Coastguard Worker          out->scratch_size = stack_size;
3298*61046927SAndroid Build Coastguard Worker    }
3299*61046927SAndroid Build Coastguard Worker 
3300*61046927SAndroid Build Coastguard Worker    if (ctx->stage == MESA_SHADER_VERTEX && !impl->function->is_preamble &&
3301*61046927SAndroid Build Coastguard Worker        !ctx->key->secondary)
3302*61046927SAndroid Build Coastguard Worker       agx_set_st_vary_final(ctx);
3303*61046927SAndroid Build Coastguard Worker 
3304*61046927SAndroid Build Coastguard Worker    agx_insert_waits(ctx);
3305*61046927SAndroid Build Coastguard Worker    agx_opt_empty_else(ctx);
3306*61046927SAndroid Build Coastguard Worker    agx_opt_break_if(ctx);
3307*61046927SAndroid Build Coastguard Worker    agx_opt_jmp_none(ctx);
3308*61046927SAndroid Build Coastguard Worker 
3309*61046927SAndroid Build Coastguard Worker    if (ctx->any_quad_divergent_shuffle)
3310*61046927SAndroid Build Coastguard Worker       agx_lower_divergent_shuffle(ctx);
3311*61046927SAndroid Build Coastguard Worker 
3312*61046927SAndroid Build Coastguard Worker    agx_lower_pseudo(ctx);
3313*61046927SAndroid Build Coastguard Worker 
3314*61046927SAndroid Build Coastguard Worker    if (agx_should_dump(nir, AGX_DBG_SHADERS))
3315*61046927SAndroid Build Coastguard Worker       agx_print_shader(ctx, stdout);
3316*61046927SAndroid Build Coastguard Worker 
3317*61046927SAndroid Build Coastguard Worker    /* Pad binary */
3318*61046927SAndroid Build Coastguard Worker    if (binary->size % AGX_CODE_ALIGN) {
3319*61046927SAndroid Build Coastguard Worker       unsigned ngrow = AGX_CODE_ALIGN - (binary->size % AGX_CODE_ALIGN);
3320*61046927SAndroid Build Coastguard Worker       memset(util_dynarray_grow_bytes(binary, ngrow, 1), 0, ngrow);
3321*61046927SAndroid Build Coastguard Worker    }
3322*61046927SAndroid Build Coastguard Worker 
3323*61046927SAndroid Build Coastguard Worker    unsigned offset = binary->size;
3324*61046927SAndroid Build Coastguard Worker    assert((offset % AGX_CODE_ALIGN) == 0);
3325*61046927SAndroid Build Coastguard Worker 
3326*61046927SAndroid Build Coastguard Worker    agx_pack_binary(ctx, binary);
3327*61046927SAndroid Build Coastguard Worker 
3328*61046927SAndroid Build Coastguard Worker    unsigned nr_gprs = ctx->max_reg + 1;
3329*61046927SAndroid Build Coastguard Worker 
3330*61046927SAndroid Build Coastguard Worker    /* If the preamble uses scratch (due to spilling), we need to set maximal
3331*61046927SAndroid Build Coastguard Worker     * GPRs. Do it here so the driver doesn't have to worry about it.
3332*61046927SAndroid Build Coastguard Worker     */
3333*61046927SAndroid Build Coastguard Worker    if (impl->function->is_preamble)
3334*61046927SAndroid Build Coastguard Worker       out->nr_preamble_gprs = ctx->scratch_size ? 256 : nr_gprs;
3335*61046927SAndroid Build Coastguard Worker    else
3336*61046927SAndroid Build Coastguard Worker       out->nr_gprs = nr_gprs;
3337*61046927SAndroid Build Coastguard Worker 
3338*61046927SAndroid Build Coastguard Worker    /* Don't dump statistics for preambles, since they're not worth optimizing */
3339*61046927SAndroid Build Coastguard Worker    if (!impl->function->is_preamble) {
3340*61046927SAndroid Build Coastguard Worker       char *stats;
3341*61046927SAndroid Build Coastguard Worker       int ret = agx_dump_stats(ctx, binary->size, &stats);
3342*61046927SAndroid Build Coastguard Worker 
3343*61046927SAndroid Build Coastguard Worker       if (ret >= 0) {
3344*61046927SAndroid Build Coastguard Worker          if (agx_should_dump(nir, AGX_DBG_SHADERDB)) {
3345*61046927SAndroid Build Coastguard Worker             fprintf(stderr, "SHADER-DB: %s - %s\n", nir->info.label ?: "",
3346*61046927SAndroid Build Coastguard Worker                     stats);
3347*61046927SAndroid Build Coastguard Worker          }
3348*61046927SAndroid Build Coastguard Worker 
3349*61046927SAndroid Build Coastguard Worker          if (debug)
3350*61046927SAndroid Build Coastguard Worker             util_debug_message(debug, SHADER_INFO, "%s", stats);
3351*61046927SAndroid Build Coastguard Worker 
3352*61046927SAndroid Build Coastguard Worker          free(stats);
3353*61046927SAndroid Build Coastguard Worker       }
3354*61046927SAndroid Build Coastguard Worker    }
3355*61046927SAndroid Build Coastguard Worker 
3356*61046927SAndroid Build Coastguard Worker    ralloc_free(ctx);
3357*61046927SAndroid Build Coastguard Worker 
3358*61046927SAndroid Build Coastguard Worker    return offset;
3359*61046927SAndroid Build Coastguard Worker }
3360*61046927SAndroid Build Coastguard Worker 
3361*61046927SAndroid Build Coastguard Worker void
agx_link_libagx(nir_shader * nir,const nir_shader * libagx)3362*61046927SAndroid Build Coastguard Worker agx_link_libagx(nir_shader *nir, const nir_shader *libagx)
3363*61046927SAndroid Build Coastguard Worker {
3364*61046927SAndroid Build Coastguard Worker    nir_link_shader_functions(nir, libagx);
3365*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_inline_functions);
3366*61046927SAndroid Build Coastguard Worker    nir_remove_non_entrypoints(nir);
3367*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_deref);
3368*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_vars_to_ssa);
3369*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_remove_dead_derefs);
3370*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_remove_dead_variables,
3371*61046927SAndroid Build Coastguard Worker             nir_var_function_temp | nir_var_shader_temp, NULL);
3372*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
3373*61046927SAndroid Build Coastguard Worker             nir_var_shader_temp | nir_var_function_temp,
3374*61046927SAndroid Build Coastguard Worker             glsl_get_cl_type_size_align);
3375*61046927SAndroid Build Coastguard Worker }
3376*61046927SAndroid Build Coastguard Worker 
3377*61046927SAndroid Build Coastguard Worker /*
3378*61046927SAndroid Build Coastguard Worker  * The hardware frcp instruction is sometimes off by 1 ULP. For correctly
3379*61046927SAndroid Build Coastguard Worker  * rounded frcp, a refinement step is required. This routine has been
3380*61046927SAndroid Build Coastguard Worker  * exhaustively tested with a modified math_bruteforce.
3381*61046927SAndroid Build Coastguard Worker  *
3382*61046927SAndroid Build Coastguard Worker  * While Khronos APIs allow 2.5 ULP error for divides, nir_lower_idiv relies on
3383*61046927SAndroid Build Coastguard Worker  * correctly rounded frcp. This is therefore load bearing for integer division
3384*61046927SAndroid Build Coastguard Worker  * on all APIs.
3385*61046927SAndroid Build Coastguard Worker  */
3386*61046927SAndroid Build Coastguard Worker static nir_def *
libagx_frcp(nir_builder * b,nir_def * x)3387*61046927SAndroid Build Coastguard Worker libagx_frcp(nir_builder *b, nir_def *x)
3388*61046927SAndroid Build Coastguard Worker {
3389*61046927SAndroid Build Coastguard Worker    nir_def *u = nir_frcp(b, x);
3390*61046927SAndroid Build Coastguard Worker 
3391*61046927SAndroid Build Coastguard Worker    /* Do 1 Newton-Raphson refinement step.
3392*61046927SAndroid Build Coastguard Worker     *
3393*61046927SAndroid Build Coastguard Worker     * Define f(u) = xu - 1. Then f(u) = 0 iff u = 1/x. Newton's method gives:
3394*61046927SAndroid Build Coastguard Worker     *
3395*61046927SAndroid Build Coastguard Worker     * u_2 = u - f(u) / f'(u) = u - (xu - 1) / x
3396*61046927SAndroid Build Coastguard Worker     *
3397*61046927SAndroid Build Coastguard Worker     * Our original guess is close, so we approximate (1 / x) by u:
3398*61046927SAndroid Build Coastguard Worker     *
3399*61046927SAndroid Build Coastguard Worker     * u_2 = u - u(xu - 1) = u + u(1 - xu)
3400*61046927SAndroid Build Coastguard Worker     *     = fma(fma(-x, u, 1), u, u)
3401*61046927SAndroid Build Coastguard Worker     */
3402*61046927SAndroid Build Coastguard Worker    nir_def *one = nir_imm_float(b, 1.0);
3403*61046927SAndroid Build Coastguard Worker    nir_def *u_2 = nir_ffma(b, nir_ffma(b, nir_fneg(b, x), u, one), u, u);
3404*61046927SAndroid Build Coastguard Worker 
3405*61046927SAndroid Build Coastguard Worker    /* If the original value was infinite, frcp will generate the correct zero.
3406*61046927SAndroid Build Coastguard Worker     * However, the Newton-Raphson step would multiply 0 * Inf and get a NaN. So
3407*61046927SAndroid Build Coastguard Worker     * skip the refinement step for infinite inputs. We do this backwards,
3408*61046927SAndroid Build Coastguard Worker     * checking whether the refined result is NaN, since we can implement this
3409*61046927SAndroid Build Coastguard Worker     * check in a single fcmpsel instruction. The other case where the refinement
3410*61046927SAndroid Build Coastguard Worker     * is NaN is a NaN input, in which skipping refinement is acceptable.
3411*61046927SAndroid Build Coastguard Worker     */
3412*61046927SAndroid Build Coastguard Worker    return nir_bcsel(b, nir_fisnan(b, u_2), u, u_2);
3413*61046927SAndroid Build Coastguard Worker }
3414*61046927SAndroid Build Coastguard Worker 
3415*61046927SAndroid Build Coastguard Worker static bool
agx_nir_lower_fdiv(nir_builder * b,nir_alu_instr * alu,void * _)3416*61046927SAndroid Build Coastguard Worker agx_nir_lower_fdiv(nir_builder *b, nir_alu_instr *alu, void *_)
3417*61046927SAndroid Build Coastguard Worker {
3418*61046927SAndroid Build Coastguard Worker    if (alu->op != nir_op_frcp || !alu->exact || alu->def.bit_size != 32)
3419*61046927SAndroid Build Coastguard Worker       return false;
3420*61046927SAndroid Build Coastguard Worker 
3421*61046927SAndroid Build Coastguard Worker    b->cursor = nir_before_instr(&alu->instr);
3422*61046927SAndroid Build Coastguard Worker    nir_def_replace(&alu->def, libagx_frcp(b, nir_ssa_for_alu_src(b, alu, 0)));
3423*61046927SAndroid Build Coastguard Worker    return true;
3424*61046927SAndroid Build Coastguard Worker }
3425*61046927SAndroid Build Coastguard Worker 
3426*61046927SAndroid Build Coastguard Worker /* Preprocess NIR independent of shader state */
3427*61046927SAndroid Build Coastguard Worker void
agx_preprocess_nir(nir_shader * nir,const nir_shader * libagx)3428*61046927SAndroid Build Coastguard Worker agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx)
3429*61046927SAndroid Build Coastguard Worker {
3430*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_vars_to_ssa);
3431*61046927SAndroid Build Coastguard Worker 
3432*61046927SAndroid Build Coastguard Worker    /* Lower large arrays to scratch and small arrays to csel */
3433*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 16,
3434*61046927SAndroid Build Coastguard Worker             glsl_get_natural_size_align_bytes,
3435*61046927SAndroid Build Coastguard Worker             glsl_get_natural_size_align_bytes);
3436*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0);
3437*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_split_var_copies);
3438*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_global_vars_to_local);
3439*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_var_copies);
3440*61046927SAndroid Build Coastguard Worker 
3441*61046927SAndroid Build Coastguard Worker    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
3442*61046927SAndroid Build Coastguard Worker       NIR_PASS(_, nir, agx_nir_lower_frag_sidefx);
3443*61046927SAndroid Build Coastguard Worker    }
3444*61046927SAndroid Build Coastguard Worker 
3445*61046927SAndroid Build Coastguard Worker    /* Clean up deref gunk after lowering I/O */
3446*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_dce);
3447*61046927SAndroid Build Coastguard Worker 
3448*61046927SAndroid Build Coastguard Worker    agx_link_libagx(nir, libagx);
3449*61046927SAndroid Build Coastguard Worker 
3450*61046927SAndroid Build Coastguard Worker    /* Runs before we lower away idiv, to work at all. But runs after lowering
3451*61046927SAndroid Build Coastguard Worker     * textures, since the cube map array lowering generates division by 6.
3452*61046927SAndroid Build Coastguard Worker     */
3453*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_idiv_const, 16);
3454*61046927SAndroid Build Coastguard Worker 
3455*61046927SAndroid Build Coastguard Worker    nir_lower_idiv_options idiv_options = {
3456*61046927SAndroid Build Coastguard Worker       .allow_fp16 = true,
3457*61046927SAndroid Build Coastguard Worker    };
3458*61046927SAndroid Build Coastguard Worker 
3459*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_idiv, &idiv_options);
3460*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_frexp);
3461*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_alu);
3462*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL);
3463*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
3464*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_flrp, 16 | 32 | 64, false);
3465*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, agx_lower_sincos);
3466*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_shader_intrinsics_pass, agx_lower_front_face,
3467*61046927SAndroid Build Coastguard Worker             nir_metadata_control_flow, NULL);
3468*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, agx_nir_lower_subgroups);
3469*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_phis_to_scalar, true);
3470*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_shader_alu_pass, agx_nir_lower_fdiv,
3471*61046927SAndroid Build Coastguard Worker             nir_metadata_control_flow, NULL);
3472*61046927SAndroid Build Coastguard Worker 
3473*61046927SAndroid Build Coastguard Worker    /* After lowering, run through the standard suite of NIR optimizations. We
3474*61046927SAndroid Build Coastguard Worker     * will run through the loop later, once we have the shader key, but if we
3475*61046927SAndroid Build Coastguard Worker     * run now, that run will ideally be almost a no-op.
3476*61046927SAndroid Build Coastguard Worker     */
3477*61046927SAndroid Build Coastguard Worker    agx_optimize_loop_nir(nir);
3478*61046927SAndroid Build Coastguard Worker 
3479*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_deref);
3480*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_vars_to_ssa);
3481*61046927SAndroid Build Coastguard Worker 
3482*61046927SAndroid Build Coastguard Worker    /* We're lowered away all variables. Remove them all for smaller shaders. */
3483*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL);
3484*61046927SAndroid Build Coastguard Worker    nir->info.io_lowered = true;
3485*61046927SAndroid Build Coastguard Worker 
3486*61046927SAndroid Build Coastguard Worker    /* Move before lowering */
3487*61046927SAndroid Build Coastguard Worker    nir_move_options move_all = nir_move_const_undef | nir_move_load_ubo |
3488*61046927SAndroid Build Coastguard Worker                                nir_move_load_input | nir_move_comparisons |
3489*61046927SAndroid Build Coastguard Worker                                nir_move_copies | nir_move_load_ssbo |
3490*61046927SAndroid Build Coastguard Worker                                nir_move_alu;
3491*61046927SAndroid Build Coastguard Worker 
3492*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_sink, move_all);
3493*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_move, move_all);
3494*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, agx_nir_lower_shared_bitsize);
3495*61046927SAndroid Build Coastguard Worker }
3496*61046927SAndroid Build Coastguard Worker 
3497*61046927SAndroid Build Coastguard Worker void
agx_compile_shader_nir(nir_shader * nir,struct agx_shader_key * key,struct util_debug_callback * debug,struct agx_shader_part * out)3498*61046927SAndroid Build Coastguard Worker agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
3499*61046927SAndroid Build Coastguard Worker                        struct util_debug_callback *debug,
3500*61046927SAndroid Build Coastguard Worker                        struct agx_shader_part *out)
3501*61046927SAndroid Build Coastguard Worker {
3502*61046927SAndroid Build Coastguard Worker    agx_compiler_debug = agx_get_compiler_debug();
3503*61046927SAndroid Build Coastguard Worker    struct agx_shader_info *info = &out->info;
3504*61046927SAndroid Build Coastguard Worker 
3505*61046927SAndroid Build Coastguard Worker    struct util_dynarray binary;
3506*61046927SAndroid Build Coastguard Worker    util_dynarray_init(&binary, NULL);
3507*61046927SAndroid Build Coastguard Worker 
3508*61046927SAndroid Build Coastguard Worker    memset(out, 0, sizeof *out);
3509*61046927SAndroid Build Coastguard Worker 
3510*61046927SAndroid Build Coastguard Worker    assert(nir->info.io_lowered &&
3511*61046927SAndroid Build Coastguard Worker           "agx_preprocess_nir is called first, then the shader is specalized,"
3512*61046927SAndroid Build Coastguard Worker           "then the specialized shader is compiled");
3513*61046927SAndroid Build Coastguard Worker 
3514*61046927SAndroid Build Coastguard Worker    /* If required, tag writes will be enabled by instruction selection */
3515*61046927SAndroid Build Coastguard Worker    if (nir->info.stage == MESA_SHADER_FRAGMENT)
3516*61046927SAndroid Build Coastguard Worker       info->tag_write_disable = !nir->info.writes_memory;
3517*61046927SAndroid Build Coastguard Worker 
3518*61046927SAndroid Build Coastguard Worker    bool needs_libagx = true /* TODO: Optimize */;
3519*61046927SAndroid Build Coastguard Worker 
3520*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_frag_coord_to_pixel_coord);
3521*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_vars_to_ssa);
3522*61046927SAndroid Build Coastguard Worker 
3523*61046927SAndroid Build Coastguard Worker    if (needs_libagx) {
3524*61046927SAndroid Build Coastguard Worker       agx_link_libagx(nir, key->libagx);
3525*61046927SAndroid Build Coastguard Worker 
3526*61046927SAndroid Build Coastguard Worker       NIR_PASS(_, nir, nir_opt_deref);
3527*61046927SAndroid Build Coastguard Worker       NIR_PASS(_, nir, nir_lower_vars_to_ssa);
3528*61046927SAndroid Build Coastguard Worker       NIR_PASS(_, nir, nir_lower_explicit_io,
3529*61046927SAndroid Build Coastguard Worker                nir_var_shader_temp | nir_var_function_temp |
3530*61046927SAndroid Build Coastguard Worker                   nir_var_mem_shared | nir_var_mem_global,
3531*61046927SAndroid Build Coastguard Worker                nir_address_format_62bit_generic);
3532*61046927SAndroid Build Coastguard Worker    }
3533*61046927SAndroid Build Coastguard Worker 
3534*61046927SAndroid Build Coastguard Worker    /* Late sysval lowering creates large loads. Load lowering creates unpacks */
3535*61046927SAndroid Build Coastguard Worker    nir_lower_mem_access_bit_sizes_options lower_mem_access_options = {
3536*61046927SAndroid Build Coastguard Worker       .modes = nir_var_mem_ssbo | nir_var_mem_constant |
3537*61046927SAndroid Build Coastguard Worker                nir_var_mem_task_payload | nir_var_shader_temp |
3538*61046927SAndroid Build Coastguard Worker                nir_var_function_temp | nir_var_mem_global | nir_var_mem_shared,
3539*61046927SAndroid Build Coastguard Worker       .callback = mem_access_size_align_cb,
3540*61046927SAndroid Build Coastguard Worker    };
3541*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_mem_access_bit_sizes, &lower_mem_access_options);
3542*61046927SAndroid Build Coastguard Worker 
3543*61046927SAndroid Build Coastguard Worker    /* Cleanup 8-bit math before lowering */
3544*61046927SAndroid Build Coastguard Worker    bool progress;
3545*61046927SAndroid Build Coastguard Worker    do {
3546*61046927SAndroid Build Coastguard Worker       progress = false;
3547*61046927SAndroid Build Coastguard Worker 
3548*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_algebraic);
3549*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_constant_folding);
3550*61046927SAndroid Build Coastguard Worker       NIR_PASS(progress, nir, nir_opt_dce);
3551*61046927SAndroid Build Coastguard Worker    } while (progress);
3552*61046927SAndroid Build Coastguard Worker 
3553*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_bit_size, lower_bit_size_callback, NULL);
3554*61046927SAndroid Build Coastguard Worker 
3555*61046927SAndroid Build Coastguard Worker    /* Late blend lowering creates vectors */
3556*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL);
3557*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
3558*61046927SAndroid Build Coastguard Worker 
3559*61046927SAndroid Build Coastguard Worker    /* Late VBO lowering creates constant udiv instructions */
3560*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_idiv_const, 16);
3561*61046927SAndroid Build Coastguard Worker 
3562*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_opt_constant_folding);
3563*61046927SAndroid Build Coastguard Worker    NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_load_from_texture_handle,
3564*61046927SAndroid Build Coastguard Worker             nir_metadata_control_flow, NULL);
3565*61046927SAndroid Build Coastguard Worker 
3566*61046927SAndroid Build Coastguard Worker    info->push_count = key->reserved_preamble;
3567*61046927SAndroid Build Coastguard Worker    agx_optimize_nir(nir, key->dev.soft_fault,
3568*61046927SAndroid Build Coastguard Worker                     key->secondary ? NULL : &info->push_count);
3569*61046927SAndroid Build Coastguard Worker 
3570*61046927SAndroid Build Coastguard Worker    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
3571*61046927SAndroid Build Coastguard Worker       info->varyings.fs.nr_cf = key->fs.cf_base;
3572*61046927SAndroid Build Coastguard Worker       assign_coefficient_regs(nir, &info->varyings.fs);
3573*61046927SAndroid Build Coastguard Worker    }
3574*61046927SAndroid Build Coastguard Worker 
3575*61046927SAndroid Build Coastguard Worker    if (agx_should_dump(nir, AGX_DBG_SHADERS))
3576*61046927SAndroid Build Coastguard Worker       nir_print_shader(nir, stdout);
3577*61046927SAndroid Build Coastguard Worker 
3578*61046927SAndroid Build Coastguard Worker    info->local_size = nir->info.shared_size;
3579*61046927SAndroid Build Coastguard Worker 
3580*61046927SAndroid Build Coastguard Worker    nir_foreach_function_with_impl(func, impl, nir) {
3581*61046927SAndroid Build Coastguard Worker       unsigned offset =
3582*61046927SAndroid Build Coastguard Worker          agx_compile_function_nir(nir, impl, key, debug, &binary, &out->info);
3583*61046927SAndroid Build Coastguard Worker 
3584*61046927SAndroid Build Coastguard Worker       if (func->is_preamble) {
3585*61046927SAndroid Build Coastguard Worker          info->preamble_offset = offset;
3586*61046927SAndroid Build Coastguard Worker          info->has_preamble = true;
3587*61046927SAndroid Build Coastguard Worker       } else if (func->is_entrypoint) {
3588*61046927SAndroid Build Coastguard Worker          info->main_offset = offset;
3589*61046927SAndroid Build Coastguard Worker          info->main_size = binary.size - offset;
3590*61046927SAndroid Build Coastguard Worker       } else {
3591*61046927SAndroid Build Coastguard Worker          unreachable("General functions not yet supported");
3592*61046927SAndroid Build Coastguard Worker       }
3593*61046927SAndroid Build Coastguard Worker    }
3594*61046927SAndroid Build Coastguard Worker 
3595*61046927SAndroid Build Coastguard Worker    info->stage = nir->info.stage;
3596*61046927SAndroid Build Coastguard Worker 
3597*61046927SAndroid Build Coastguard Worker    /* Check these outside the stage check since nir->info.stage is the hardware
3598*61046927SAndroid Build Coastguard Worker     * stage and these are read in the vertex *software* stage.
3599*61046927SAndroid Build Coastguard Worker     */
3600*61046927SAndroid Build Coastguard Worker    info->uses_draw_id =
3601*61046927SAndroid Build Coastguard Worker       BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
3602*61046927SAndroid Build Coastguard Worker 
3603*61046927SAndroid Build Coastguard Worker    info->uses_base_param =
3604*61046927SAndroid Build Coastguard Worker       BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX) ||
3605*61046927SAndroid Build Coastguard Worker       BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
3606*61046927SAndroid Build Coastguard Worker 
3607*61046927SAndroid Build Coastguard Worker    if (nir->info.stage == MESA_SHADER_VERTEX) {
3608*61046927SAndroid Build Coastguard Worker       info->nonzero_viewport = nir->info.outputs_written & VARYING_BIT_VIEWPORT;
3609*61046927SAndroid Build Coastguard Worker 
3610*61046927SAndroid Build Coastguard Worker       info->writes_layer_viewport =
3611*61046927SAndroid Build Coastguard Worker          nir->info.outputs_written & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
3612*61046927SAndroid Build Coastguard Worker 
3613*61046927SAndroid Build Coastguard Worker    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
3614*61046927SAndroid Build Coastguard Worker       info->disable_tri_merging = nir->info.uses_wide_subgroup_intrinsics ||
3615*61046927SAndroid Build Coastguard Worker                                   nir->info.fs.needs_quad_helper_invocations ||
3616*61046927SAndroid Build Coastguard Worker                                   nir->info.writes_memory;
3617*61046927SAndroid Build Coastguard Worker 
3618*61046927SAndroid Build Coastguard Worker       /* Writing the sample mask requires tag writes */
3619*61046927SAndroid Build Coastguard Worker       info->tag_write_disable &= !info->writes_sample_mask;
3620*61046927SAndroid Build Coastguard Worker 
3621*61046927SAndroid Build Coastguard Worker       /* Report a canonical depth layout. This happens at the end because the
3622*61046927SAndroid Build Coastguard Worker        * sample mask lowering affects it.
3623*61046927SAndroid Build Coastguard Worker        */
3624*61046927SAndroid Build Coastguard Worker       enum gl_frag_depth_layout layout = nir->info.fs.depth_layout;
3625*61046927SAndroid Build Coastguard Worker 
3626*61046927SAndroid Build Coastguard Worker       if (!(nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)))
3627*61046927SAndroid Build Coastguard Worker          info->depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED;
3628*61046927SAndroid Build Coastguard Worker       else if (layout == FRAG_DEPTH_LAYOUT_NONE)
3629*61046927SAndroid Build Coastguard Worker          info->depth_layout = FRAG_DEPTH_LAYOUT_ANY;
3630*61046927SAndroid Build Coastguard Worker       else
3631*61046927SAndroid Build Coastguard Worker          info->depth_layout = layout;
3632*61046927SAndroid Build Coastguard Worker 
3633*61046927SAndroid Build Coastguard Worker       info->reads_tib = nir->info.fs.uses_fbfetch_output;
3634*61046927SAndroid Build Coastguard Worker       info->early_fragment_tests = nir->info.fs.early_fragment_tests;
3635*61046927SAndroid Build Coastguard Worker    } else if (nir->info.stage == MESA_SHADER_COMPUTE) {
3636*61046927SAndroid Build Coastguard Worker       info->imageblock_stride = nir->info.cs.image_block_size_per_thread_agx;
3637*61046927SAndroid Build Coastguard Worker    }
3638*61046927SAndroid Build Coastguard Worker 
3639*61046927SAndroid Build Coastguard Worker    out->binary = binary.data;
3640*61046927SAndroid Build Coastguard Worker    out->binary_size = binary.size;
3641*61046927SAndroid Build Coastguard Worker }
3642