xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/agx_nir_lower_texture.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Valve Corporation
3  * Copyright 2021 Alyssa Rosenzweig
4  * Copyright 2020 Collabora Ltd.
5  * Copyright 2016 Broadcom
6  * SPDX-License-Identifier: MIT
7  */
8 
9 #include "compiler/nir/nir.h"
10 #include "compiler/nir/nir_builder.h"
11 #include "agx_nir_passes.h"
12 #include "glsl_types.h"
13 #include "libagx_shaders.h"
14 #include "nir_builder_opcodes.h"
15 #include "nir_builtin_builder.h"
16 #include "nir_intrinsics.h"
17 #include "nir_intrinsics_indices.h"
18 #include "shader_enums.h"
19 
20 static bool
fence_image(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)21 fence_image(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
22 {
23    b->cursor = nir_after_instr(&intr->instr);
24 
25    /* If the image is write-only, there is no fencing needed */
26    if (nir_intrinsic_has_access(intr) &&
27        (nir_intrinsic_access(intr) & ACCESS_NON_READABLE)) {
28       return false;
29    }
30 
31    switch (intr->intrinsic) {
32    case nir_intrinsic_image_store:
33    case nir_intrinsic_bindless_image_store:
34       nir_fence_pbe_to_tex_agx(b);
35       return true;
36 
37    case nir_intrinsic_image_atomic:
38    case nir_intrinsic_bindless_image_atomic:
39    case nir_intrinsic_image_atomic_swap:
40    case nir_intrinsic_bindless_image_atomic_swap:
41       nir_fence_mem_to_tex_agx(b);
42       return true;
43 
44    default:
45       return false;
46    }
47 }
48 
49 static nir_def *
texture_descriptor_ptr(nir_builder * b,nir_tex_instr * tex)50 texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex)
51 {
52    int handle_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
53    assert(handle_idx >= 0 && "must be bindless");
54    return nir_load_from_texture_handle_agx(b, tex->src[handle_idx].src.ssa);
55 }
56 
57 static bool
has_nonzero_lod(nir_tex_instr * tex)58 has_nonzero_lod(nir_tex_instr *tex)
59 {
60    int idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
61    if (idx < 0)
62       return false;
63 
64    nir_src src = tex->src[idx].src;
65    return !(nir_src_is_const(src) && nir_src_as_uint(src) == 0);
66 }
67 
68 static bool
lower_tex_crawl(nir_builder * b,nir_instr * instr,UNUSED void * data)69 lower_tex_crawl(nir_builder *b, nir_instr *instr, UNUSED void *data)
70 {
71    if (instr->type != nir_instr_type_tex)
72       return false;
73 
74    nir_tex_instr *tex = nir_instr_as_tex(instr);
75    b->cursor = nir_before_instr(instr);
76 
77    if (tex->op != nir_texop_txs && tex->op != nir_texop_texture_samples &&
78        tex->op != nir_texop_query_levels)
79       return false;
80 
81    nir_def *ptr = texture_descriptor_ptr(b, tex);
82    unsigned nr_comps = tex->def.num_components;
83    assert(nr_comps <= 3);
84 
85    int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
86    nir_def *lod = lod_idx >= 0 ? nir_u2u16(b, tex->src[lod_idx].src.ssa)
87                                : nir_imm_intN_t(b, 0, 16);
88 
89    nir_def *res;
90    if (tex->op == nir_texop_txs) {
91       res =
92          libagx_txs(b, ptr, lod, nir_imm_int(b, nr_comps),
93                     nir_imm_bool(b, tex->sampler_dim == GLSL_SAMPLER_DIM_BUF),
94                     nir_imm_bool(b, tex->sampler_dim == GLSL_SAMPLER_DIM_1D),
95                     nir_imm_bool(b, tex->sampler_dim == GLSL_SAMPLER_DIM_2D),
96                     nir_imm_bool(b, tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE),
97                     nir_imm_bool(b, tex->is_array));
98    } else if (tex->op == nir_texop_query_levels) {
99       res = libagx_texture_levels(b, ptr);
100    } else {
101       res = libagx_texture_samples(b, ptr);
102    }
103 
104    nir_def_rewrite_uses(&tex->def, nir_trim_vector(b, res, nr_comps));
105    nir_instr_remove(instr);
106    return true;
107 }
108 
109 /*
110  * Given a 1D buffer texture coordinate, calculate the 2D coordinate vector that
111  * will be used to access the linear 2D texture bound to the buffer.
112  */
113 static nir_def *
coords_for_buffer_texture(nir_builder * b,nir_def * coord)114 coords_for_buffer_texture(nir_builder *b, nir_def *coord)
115 {
116    return nir_vec2(b, nir_umod_imm(b, coord, AGX_TEXTURE_BUFFER_WIDTH),
117                    nir_udiv_imm(b, coord, AGX_TEXTURE_BUFFER_WIDTH));
118 }
119 
120 /*
121  * Buffer textures are lowered to 2D (1024xN) textures in the driver to access
122  * more storage. When lowering, we need to fix up the coordinate accordingly.
123  *
124  * Furthermore, RGB32 formats are emulated by lowering to global memory access,
125  * so to read a buffer texture we generate code that looks like:
126  *
127  *    if (descriptor->format == RGB32)
128  *       return ((uint32_t *) descriptor->address)[x];
129  *    else
130  *       return txf(texture_as_2d, vec2(x % 1024, x / 1024));
131  */
132 static bool
lower_buffer_texture(nir_builder * b,nir_tex_instr * tex)133 lower_buffer_texture(nir_builder *b, nir_tex_instr *tex)
134 {
135    nir_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
136    nir_def *size = nir_get_texture_size(b, tex);
137    nir_def *oob = nir_uge(b, coord, size);
138 
139    /* Apply the buffer offset after calculating oob but before remapping */
140    nir_def *desc = texture_descriptor_ptr(b, tex);
141    coord = libagx_buffer_texture_offset(b, desc, coord);
142 
143    /* Map out-of-bounds indices to out-of-bounds coordinates for robustness2
144     * semantics from the hardware.
145     */
146    coord = nir_bcsel(b, oob, nir_imm_int(b, -1), coord);
147 
148    bool is_float = nir_alu_type_get_base_type(tex->dest_type) == nir_type_float;
149 
150    /* Lower RGB32 reads if the format requires. If we are out-of-bounds, we use
151     * the hardware path so we get a zero texel.
152     */
153    nir_if *nif = nir_push_if(
154       b, nir_iand(b, libagx_texture_is_rgb32(b, desc), nir_inot(b, oob)));
155 
156    nir_def *rgb32 = nir_trim_vector(
157       b, libagx_texture_load_rgb32(b, desc, coord, nir_imm_bool(b, is_float)),
158       nir_tex_instr_dest_size(tex));
159 
160    nir_push_else(b, nif);
161 
162    /* Otherwise, lower the texture instruction to read from 2D */
163    assert(coord->num_components == 1 && "buffer textures are 1D");
164    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
165 
166    nir_def *coord2d = coords_for_buffer_texture(b, coord);
167    nir_instr_remove(&tex->instr);
168    nir_builder_instr_insert(b, &tex->instr);
169    nir_tex_instr_add_src(tex, nir_tex_src_backend1, coord2d);
170    nir_steal_tex_src(tex, nir_tex_src_sampler_handle);
171    nir_steal_tex_src(tex, nir_tex_src_sampler_offset);
172    nir_block *else_block = nir_cursor_current_block(b->cursor);
173    nir_pop_if(b, nif);
174 
175    /* Put it together with a phi */
176    nir_def *phi = nir_if_phi(b, rgb32, &tex->def);
177    nir_def_rewrite_uses(&tex->def, phi);
178    nir_phi_instr *phi_instr = nir_instr_as_phi(phi->parent_instr);
179    nir_phi_src *else_src = nir_phi_get_src_from_block(phi_instr, else_block);
180    nir_src_rewrite(&else_src->src, &tex->def);
181    return true;
182 }
183 
184 /*
185  * NIR indexes into array textures with unclamped floats (integer for txf). AGX
186  * requires the index to be a clamped integer. Lower tex_src_coord into
187  * tex_src_backend1 for array textures by type-converting and clamping.
188  */
189 static bool
lower_regular_texture(nir_builder * b,nir_instr * instr,UNUSED void * data)190 lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
191 {
192    if (instr->type != nir_instr_type_tex)
193       return false;
194 
195    nir_tex_instr *tex = nir_instr_as_tex(instr);
196    b->cursor = nir_before_instr(instr);
197 
198    if (nir_tex_instr_is_query(tex) && tex->op != nir_texop_lod)
199       return false;
200 
201    if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
202       return lower_buffer_texture(b, tex);
203 
204    /* Don't lower twice */
205    if (nir_tex_instr_src_index(tex, nir_tex_src_backend1) >= 0)
206       return false;
207 
208    /* Get the coordinates */
209    nir_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
210    nir_def *ms_idx = nir_steal_tex_src(tex, nir_tex_src_ms_index);
211 
212    /* Apply txf workaround, see libagx_lower_txf_robustness */
213    bool is_txf = ((tex->op == nir_texop_txf) || (tex->op == nir_texop_txf_ms));
214 
215    if (is_txf && (has_nonzero_lod(tex) || tex->is_array) &&
216        !(tex->backend_flags & AGX_TEXTURE_FLAG_NO_CLAMP)) {
217       int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
218       nir_def *lod =
219          lod_idx >= 0 ? tex->src[lod_idx].src.ssa : nir_undef(b, 1, 16);
220 
221       unsigned lidx = coord->num_components - 1;
222       nir_def *layer = nir_channel(b, coord, lidx);
223 
224       nir_def *replaced = libagx_lower_txf_robustness(
225          b, texture_descriptor_ptr(b, tex),
226          nir_imm_bool(b, has_nonzero_lod(tex)), lod,
227          nir_imm_bool(b, tex->is_array), layer, nir_channel(b, coord, 0));
228 
229       coord = nir_vector_insert_imm(b, coord, replaced, 0);
230    }
231 
232    /* The layer is always the last component of the NIR coordinate, split it off
233     * because we'll need to swizzle.
234     */
235    nir_def *layer = NULL;
236 
237    if (tex->is_array && tex->op != nir_texop_lod) {
238       unsigned lidx = coord->num_components - 1;
239       nir_def *unclamped_layer = nir_channel(b, coord, lidx);
240       coord = nir_trim_vector(b, coord, lidx);
241 
242       /* Round layer to nearest even */
243       if (!is_txf)
244          unclamped_layer = nir_f2u32(b, nir_fround_even(b, unclamped_layer));
245 
246       /* For a cube array, the layer is zero-indexed component 3 of the
247        * coordinate but the number of layers is component 2 of the txs result.
248        */
249       if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
250          assert(lidx == 3 && "4 components");
251          lidx = 2;
252       }
253 
254       /* Clamp to max layer = (# of layers - 1) for out-of-bounds handling.
255        * Layer must be 16-bits for the hardware, drop top bits after clamping.
256        *
257        * For txf, we drop out-of-bounds components rather than clamp, see the
258        * above txf robustness workaround.
259        */
260       if (!(tex->backend_flags & AGX_TEXTURE_FLAG_NO_CLAMP) && !is_txf) {
261          nir_def *txs = nir_get_texture_size(b, tex);
262          nir_def *nr_layers = nir_channel(b, txs, lidx);
263          nir_def *max_layer = nir_iadd_imm(b, nr_layers, -1);
264          layer = nir_umin(b, unclamped_layer, max_layer);
265       } else {
266          layer = unclamped_layer;
267       }
268 
269       layer = nir_u2u16(b, layer);
270    }
271 
272    /* Combine layer and multisample index into 32-bit so we don't need a vec5 or
273     * vec6 16-bit coordinate tuple, which would be inconvenient in NIR for
274     * little benefit (a minor optimization, I guess).
275     */
276    nir_def *sample_array = (ms_idx && layer)
277                               ? nir_pack_32_2x16_split(b, ms_idx, layer)
278                            : ms_idx ? nir_u2u32(b, ms_idx)
279                            : layer  ? nir_u2u32(b, layer)
280                                     : NULL;
281 
282    /* Combine into the final 32-bit tuple */
283    if (sample_array != NULL) {
284       unsigned end = coord->num_components;
285       coord = nir_pad_vector(b, coord, end + 1);
286       coord = nir_vector_insert_imm(b, coord, sample_array, end);
287    }
288 
289    nir_tex_instr_add_src(tex, nir_tex_src_backend1, coord);
290 
291    /* Furthermore, if there is an offset vector, it must be packed */
292    nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
293 
294    if (offset != NULL) {
295       nir_def *packed = NULL;
296 
297       for (unsigned c = 0; c < offset->num_components; ++c) {
298          nir_def *nibble = nir_iand_imm(b, nir_channel(b, offset, c), 0xF);
299          nir_def *shifted = nir_ishl_imm(b, nibble, 4 * c);
300 
301          if (packed != NULL)
302             packed = nir_ior(b, packed, shifted);
303          else
304             packed = shifted;
305       }
306 
307       nir_tex_instr_add_src(tex, nir_tex_src_backend2, packed);
308    }
309 
310    /* We reserve bound sampler #0, so we offset bound samplers by 1 and
311     * otherwise map bound samplers as-is.
312     */
313    nir_def *sampler = nir_steal_tex_src(tex, nir_tex_src_sampler_offset);
314    if (!sampler)
315       sampler = nir_imm_intN_t(b, tex->sampler_index, 16);
316 
317    if (!is_txf &&
318        nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) < 0) {
319 
320       nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle,
321                             nir_iadd_imm(b, nir_u2u16(b, sampler), 1));
322    }
323 
324    return true;
325 }
326 
327 static nir_def *
bias_for_tex(nir_builder * b,nir_tex_instr * tex)328 bias_for_tex(nir_builder *b, nir_tex_instr *tex)
329 {
330    return nir_build_texture_query(b, tex, nir_texop_lod_bias_agx, 1,
331                                   nir_type_float16, false, false);
332 }
333 
334 static bool
lower_sampler_bias(nir_builder * b,nir_instr * instr,UNUSED void * data)335 lower_sampler_bias(nir_builder *b, nir_instr *instr, UNUSED void *data)
336 {
337    if (instr->type != nir_instr_type_tex)
338       return false;
339 
340    nir_tex_instr *tex = nir_instr_as_tex(instr);
341    b->cursor = nir_before_instr(instr);
342 
343    switch (tex->op) {
344    case nir_texop_tex: {
345       tex->op = nir_texop_txb;
346       nir_tex_instr_add_src(tex, nir_tex_src_bias, bias_for_tex(b, tex));
347       return true;
348    }
349 
350    case nir_texop_txb:
351    case nir_texop_txl: {
352       nir_tex_src_type src =
353          tex->op == nir_texop_txl ? nir_tex_src_lod : nir_tex_src_bias;
354 
355       nir_def *orig = nir_steal_tex_src(tex, src);
356       assert(orig != NULL && "invalid NIR");
357 
358       if (orig->bit_size != 16)
359          orig = nir_f2f16(b, orig);
360 
361       nir_tex_instr_add_src(tex, src, nir_fadd(b, orig, bias_for_tex(b, tex)));
362       return true;
363    }
364 
365    case nir_texop_txd: {
366       /* For txd, the computed level-of-detail is log2(rho)
367        * where rho should scale proportionally to all
368        * derivatives. So scale derivatives by exp2(bias) to
369        * get level-of-detail log2(exp2(bias) * rho) = bias + log2(rho).
370        */
371       nir_def *scale = nir_fexp2(b, nir_f2f32(b, bias_for_tex(b, tex)));
372       nir_tex_src_type src[] = {nir_tex_src_ddx, nir_tex_src_ddy};
373 
374       for (unsigned s = 0; s < ARRAY_SIZE(src); ++s) {
375          nir_def *orig = nir_steal_tex_src(tex, src[s]);
376          assert(orig != NULL && "invalid");
377 
378          nir_def *scaled = nir_fmul(b, nir_f2f32(b, orig), scale);
379          nir_tex_instr_add_src(tex, src[s], scaled);
380       }
381 
382       return true;
383    }
384 
385    case nir_texop_lod: {
386       nir_tex_instr_add_src(tex, nir_tex_src_bias, bias_for_tex(b, tex));
387       return true;
388    }
389 
390    case nir_texop_txf:
391    case nir_texop_txf_ms:
392    case nir_texop_txs:
393    case nir_texop_tg4:
394    case nir_texop_texture_samples:
395    case nir_texop_samples_identical:
396    case nir_texop_query_levels:
397       /* These operations do not use a sampler */
398       return false;
399 
400    default:
401       unreachable("Unhandled texture operation");
402    }
403 }
404 
405 static bool
legalize_image_lod(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)406 legalize_image_lod(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
407 {
408    nir_src *src;
409 
410 #define CASE(op, idx)                                                          \
411    case nir_intrinsic_##op:                                                    \
412    case nir_intrinsic_bindless_##op:                                           \
413       src = &intr->src[idx];                                                   \
414       break;
415 
416    switch (intr->intrinsic) {
417       CASE(image_load, 3)
418       CASE(image_store, 4)
419       CASE(image_size, 1)
420    default:
421       return false;
422    }
423 
424 #undef CASE
425 
426    if (src->ssa->bit_size == 16)
427       return false;
428 
429    b->cursor = nir_before_instr(&intr->instr);
430    nir_src_rewrite(src, nir_i2i16(b, src->ssa));
431    return true;
432 }
433 
434 static nir_def *
txs_for_image(nir_builder * b,nir_intrinsic_instr * intr,unsigned num_components,unsigned bit_size,bool query_samples)435 txs_for_image(nir_builder *b, nir_intrinsic_instr *intr,
436               unsigned num_components, unsigned bit_size, bool query_samples)
437 {
438    nir_tex_instr *tex = nir_tex_instr_create(b->shader, query_samples ? 1 : 2);
439    tex->op = query_samples ? nir_texop_texture_samples : nir_texop_txs;
440    tex->is_array = nir_intrinsic_image_array(intr);
441    tex->dest_type = nir_type_uint32;
442    tex->sampler_dim = nir_intrinsic_image_dim(intr);
443 
444    tex->src[0] =
445       nir_tex_src_for_ssa(nir_tex_src_texture_handle, intr->src[0].ssa);
446 
447    if (!query_samples)
448       tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, intr->src[1].ssa);
449 
450    nir_def_init(&tex->instr, &tex->def, num_components, bit_size);
451    nir_builder_instr_insert(b, &tex->instr);
452    nir_def *res = &tex->def;
453 
454    /* Cube images are implemented as 2D arrays, so we need to divide here. */
455    if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && res->num_components > 2 &&
456        !query_samples) {
457       nir_def *divided = nir_udiv_imm(b, nir_channel(b, res, 2), 6);
458       res = nir_vector_insert_imm(b, res, divided, 2);
459    }
460 
461    return res;
462 }
463 
464 static nir_def *
image_texel_address(nir_builder * b,nir_intrinsic_instr * intr,bool return_index)465 image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
466                     bool return_index)
467 {
468    /* First, calculate the address of the PBE descriptor */
469    nir_def *desc_address =
470       nir_load_from_texture_handle_agx(b, intr->src[0].ssa);
471 
472    nir_def *coord = intr->src[1].ssa;
473 
474    /* For atomics, we always infer the format. We only go down this path with
475     * formatless intrinsics when lowering multisampled image stores, but that
476     * uses the return_index path that ignores the block size.
477     */
478    enum pipe_format format = nir_intrinsic_format(intr);
479    assert(return_index || format != PIPE_FORMAT_NONE);
480 
481    nir_def *blocksize_B = nir_imm_int(b, util_format_get_blocksize(format));
482 
483    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
484    bool layered = nir_intrinsic_image_array(intr) ||
485                   (dim == GLSL_SAMPLER_DIM_CUBE) ||
486                   (dim == GLSL_SAMPLER_DIM_3D);
487 
488    if (dim == GLSL_SAMPLER_DIM_BUF && return_index) {
489       return nir_channel(b, coord, 0);
490    } else if (dim == GLSL_SAMPLER_DIM_BUF) {
491       return libagx_buffer_texel_address(b, desc_address, coord, blocksize_B);
492    } else {
493       return libagx_image_texel_address(
494          b, desc_address, coord, nir_u2u32(b, intr->src[2].ssa), blocksize_B,
495          nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_1D),
496          nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_MS), nir_imm_bool(b, layered),
497          nir_imm_bool(b, return_index));
498    }
499 }
500 
501 static void
lower_buffer_image(nir_builder * b,nir_intrinsic_instr * intr)502 lower_buffer_image(nir_builder *b, nir_intrinsic_instr *intr)
503 {
504    nir_def *coord_vector = intr->src[1].ssa;
505    nir_def *coord = nir_channel(b, coord_vector, 0);
506 
507    /* If we're not bindless, assume we don't need an offset (GL driver) */
508    if (intr->intrinsic == nir_intrinsic_bindless_image_load) {
509       nir_def *desc = nir_load_from_texture_handle_agx(b, intr->src[0].ssa);
510       coord = libagx_buffer_texture_offset(b, desc, coord);
511    } else if (intr->intrinsic == nir_intrinsic_bindless_image_store) {
512       nir_def *desc = nir_load_from_texture_handle_agx(b, intr->src[0].ssa);
513       coord = libagx_buffer_image_offset(b, desc, coord);
514    }
515 
516    /* Lower the buffer load/store to a 2D image load/store, matching the 2D
517     * texture/PBE descriptor the driver supplies for buffer images.
518     */
519    nir_def *coord2d = coords_for_buffer_texture(b, coord);
520    nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord2d, 4));
521    nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
522 }
523 
524 static void
lower_1d_image(nir_builder * b,nir_intrinsic_instr * intr)525 lower_1d_image(nir_builder *b, nir_intrinsic_instr *intr)
526 {
527    nir_def *coord = intr->src[1].ssa;
528    bool is_array = nir_intrinsic_image_array(intr);
529    nir_def *zero = nir_imm_intN_t(b, 0, coord->bit_size);
530 
531    if (is_array) {
532       assert(coord->num_components >= 2);
533       coord =
534          nir_vec3(b, nir_channel(b, coord, 0), zero, nir_channel(b, coord, 1));
535    } else {
536       assert(coord->num_components >= 1);
537       coord = nir_vec2(b, coord, zero);
538    }
539 
540    nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord, 4));
541    nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
542 }
543 
544 /*
545  * Just like for txf, we need special handling around layers (and LODs, but we
546  * don't support mipmapped images yet) for robust image_loads. See
547  * libagx_lower_txf_robustness for more info.
548  */
549 static bool
lower_image_load_robustness(nir_builder * b,nir_intrinsic_instr * intr)550 lower_image_load_robustness(nir_builder *b, nir_intrinsic_instr *intr)
551 {
552    if (nir_intrinsic_access(intr) & ACCESS_IN_BOUNDS_AGX)
553       return false;
554 
555    /* We only need to worry about array-like loads */
556    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
557    if (!nir_intrinsic_image_array(intr) && dim != GLSL_SAMPLER_DIM_CUBE)
558       return false;
559 
560    /* Determine the coordinate component of the layer. Cubes and cube arrays
561     * keep their array in their last non-array coordinate component, other
562     * arrays are immediately after.
563     */
564    unsigned lidx = glsl_get_sampler_dim_coordinate_components(dim);
565    if (dim == GLSL_SAMPLER_DIM_CUBE)
566       lidx--;
567 
568    nir_def *coord = intr->src[1].ssa;
569    nir_def *lod = nir_undef(b, 1, 16);
570    nir_def *layer = nir_channel(b, coord, lidx);
571 
572    /* image_load is effectively the same as txf, reuse the txf lower */
573    nir_def *replaced = libagx_lower_txf_robustness(
574       b, nir_load_from_texture_handle_agx(b, intr->src[0].ssa),
575       nir_imm_bool(b, false /* lower LOD */), lod,
576       nir_imm_bool(b, true /* lower layer */), layer, nir_channel(b, coord, 0));
577 
578    nir_src_rewrite(&intr->src[1], nir_vector_insert_imm(b, coord, replaced, 0));
579    return true;
580 }
581 
582 static bool
lower_images(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)583 lower_images(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
584 {
585    b->cursor = nir_before_instr(&intr->instr);
586 
587    switch (intr->intrinsic) {
588    case nir_intrinsic_image_load:
589    case nir_intrinsic_image_store:
590    case nir_intrinsic_bindless_image_load:
591    case nir_intrinsic_bindless_image_store: {
592       /* Legalize MSAA index */
593       nir_src_rewrite(&intr->src[2], nir_u2u16(b, intr->src[2].ssa));
594 
595       if (intr->intrinsic == nir_intrinsic_image_load ||
596           intr->intrinsic == nir_intrinsic_bindless_image_load) {
597          lower_image_load_robustness(b, intr);
598       }
599 
600       switch (nir_intrinsic_image_dim(intr)) {
601       case GLSL_SAMPLER_DIM_1D:
602          lower_1d_image(b, intr);
603          return true;
604 
605       case GLSL_SAMPLER_DIM_BUF:
606          lower_buffer_image(b, intr);
607          return true;
608 
609       default:
610          return true;
611       }
612    }
613 
614    case nir_intrinsic_bindless_image_size:
615    case nir_intrinsic_bindless_image_samples:
616       nir_def_rewrite_uses(
617          &intr->def,
618          txs_for_image(
619             b, intr, intr->def.num_components, intr->def.bit_size,
620             intr->intrinsic == nir_intrinsic_bindless_image_samples));
621       return true;
622 
623    case nir_intrinsic_bindless_image_texel_address:
624       nir_def_rewrite_uses(&intr->def, image_texel_address(b, intr, false));
625       return true;
626 
627    case nir_intrinsic_image_size:
628    case nir_intrinsic_image_texel_address:
629       unreachable("should've been lowered");
630 
631    default:
632       return false;
633    }
634 }
635 
636 /*
637  * Map out-of-bounds storage texel buffer accesses and multisampled image stores
638  * to -1 indices, which will become an out-of-bounds hardware access. This gives
639  * cheap robustness2.
640  */
641 static bool
lower_robustness(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)642 lower_robustness(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
643 {
644    b->cursor = nir_before_instr(&intr->instr);
645 
646    switch (intr->intrinsic) {
647    case nir_intrinsic_image_deref_load:
648    case nir_intrinsic_image_deref_store:
649       break;
650    default:
651       return false;
652    }
653 
654    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
655    bool array = nir_intrinsic_image_array(intr);
656    unsigned size_components = nir_image_intrinsic_coord_components(intr);
657 
658    nir_def *deref = intr->src[0].ssa;
659    nir_def *coord = intr->src[1].ssa;
660 
661    if (dim != GLSL_SAMPLER_DIM_BUF &&
662        !(dim == GLSL_SAMPLER_DIM_MS &&
663          intr->intrinsic == nir_intrinsic_image_deref_store))
664       return false;
665 
666    /* Bounds check the coordinate */
667    nir_def *size =
668       nir_image_deref_size(b, size_components, 32, deref, nir_imm_int(b, 0),
669                            .image_dim = dim, .image_array = array);
670    nir_def *oob = nir_bany(b, nir_uge(b, coord, size));
671 
672    /* Bounds check the sample */
673    if (dim == GLSL_SAMPLER_DIM_MS) {
674       nir_def *samples = nir_image_deref_samples(b, 32, deref, .image_dim = dim,
675                                                  .image_array = array);
676 
677       oob = nir_ior(b, oob, nir_uge(b, intr->src[2].ssa, samples));
678    }
679 
680    /* Replace the last coordinate component with a large coordinate for
681     * out-of-bounds. We pick 0xFFF0 as it fits in 16-bit, and it is not signed
682     * as 32-bit so we won't get in-bounds coordinates for arrays due to two's
683     * complement wraparound. Additionally it still meets this requirement after
684     * adding 0xF, the maximum tail offset.
685     *
686     * This ensures the resulting hardware coordinate is definitely
687     * out-of-bounds, giving hardware-level robustness2 behaviour.
688     */
689    unsigned c = size_components - 1;
690    nir_def *r =
691       nir_bcsel(b, oob, nir_imm_int(b, 0xFFF0), nir_channel(b, coord, c));
692 
693    nir_src_rewrite(&intr->src[1], nir_vector_insert_imm(b, coord, r, c));
694    return true;
695 }
696 
697 /*
698  * Early texture lowering passes, called by the driver before lowering
699  * descriptor bindings. That means these passes operate on texture derefs. The
700  * purpose is to make descriptor crawls explicit in the NIR, so that the driver
701  * can accurately lower descriptors after this pass but before calling
702  * the full agx_nir_lower_texture.
703  */
704 bool
agx_nir_lower_texture_early(nir_shader * s,bool support_lod_bias)705 agx_nir_lower_texture_early(nir_shader *s, bool support_lod_bias)
706 {
707    bool progress = false;
708 
709    NIR_PASS(progress, s, nir_shader_intrinsics_pass, lower_robustness,
710             nir_metadata_control_flow, NULL);
711 
712    nir_lower_tex_options lower_tex_options = {
713       .lower_txp = ~0,
714       .lower_invalid_implicit_lod = true,
715       .lower_tg4_offsets = true,
716       .lower_index_to_offset = true,
717 
718       /* Unclear if/how mipmapped 1D textures work in the hardware. */
719       .lower_1d = true,
720 
721       /* XXX: Metal seems to handle just like 3D txd, so why doesn't it work?
722        * TODO: Stop using this lowering
723        */
724       .lower_txd_cube_map = true,
725    };
726 
727    NIR_PASS(progress, s, nir_lower_tex, &lower_tex_options);
728 
729    /* Lower bias after nir_lower_tex (to get rid of txd) but before
730     * lower_regular_texture (which will shuffle around the sources)
731     */
732    if (support_lod_bias) {
733       NIR_PASS(progress, s, nir_shader_instructions_pass, lower_sampler_bias,
734                nir_metadata_control_flow, NULL);
735    }
736 
737    return progress;
738 }
739 
740 bool
agx_nir_lower_texture(nir_shader * s)741 agx_nir_lower_texture(nir_shader *s)
742 {
743    bool progress = false;
744 
745    nir_tex_src_type_constraints tex_constraints = {
746       [nir_tex_src_lod] = {true, 16},
747       [nir_tex_src_bias] = {true, 16},
748       [nir_tex_src_ms_index] = {true, 16},
749       [nir_tex_src_min_lod] = {true, 16},
750       [nir_tex_src_texture_offset] = {true, 16},
751       [nir_tex_src_sampler_offset] = {true, 16},
752    };
753 
754    /* Insert fences before lowering image atomics, since image atomics need
755     * different fencing than other image operations.
756     */
757    NIR_PASS(progress, s, nir_shader_intrinsics_pass, fence_image,
758             nir_metadata_control_flow, NULL);
759 
760    NIR_PASS(progress, s, nir_lower_image_atomics_to_global);
761 
762    NIR_PASS(progress, s, nir_shader_intrinsics_pass, legalize_image_lod,
763             nir_metadata_control_flow, NULL);
764    NIR_PASS(progress, s, nir_shader_intrinsics_pass, lower_images,
765             nir_metadata_control_flow, NULL);
766    NIR_PASS(progress, s, nir_legalize_16bit_sampler_srcs, tex_constraints);
767 
768    /* Fold constants after nir_legalize_16bit_sampler_srcs so we can detect 0 in
769     * lower_regular_texture. This is required for correctness.
770     */
771    NIR_PASS(progress, s, nir_opt_constant_folding);
772 
773    /* Lower texture sources after legalizing types (as the lowering depends on
774     * 16-bit multisample indices) but before lowering queries (as the lowering
775     * generates txs for array textures).
776     */
777    NIR_PASS(progress, s, nir_shader_instructions_pass, lower_regular_texture,
778             nir_metadata_none, NULL);
779    NIR_PASS(progress, s, nir_shader_instructions_pass, lower_tex_crawl,
780             nir_metadata_control_flow, NULL);
781 
782    return progress;
783 }
784 
785 static bool
lower_multisampled_store(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)786 lower_multisampled_store(nir_builder *b, nir_intrinsic_instr *intr,
787                          UNUSED void *data)
788 {
789    b->cursor = nir_before_instr(&intr->instr);
790 
791    if (intr->intrinsic != nir_intrinsic_bindless_image_store)
792       return false;
793 
794    if (nir_intrinsic_image_dim(intr) != GLSL_SAMPLER_DIM_MS)
795       return false;
796 
797    nir_def *index_px = nir_u2u32(b, image_texel_address(b, intr, true));
798    nir_def *coord2d = coords_for_buffer_texture(b, index_px);
799 
800    nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord2d, 4));
801    nir_src_rewrite(&intr->src[2], nir_imm_int(b, 0));
802    nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
803    nir_intrinsic_set_image_array(intr, false);
804    return true;
805 }
806 
807 bool
agx_nir_lower_multisampled_image_store(nir_shader * s)808 agx_nir_lower_multisampled_image_store(nir_shader *s)
809 {
810    return nir_shader_intrinsics_pass(s, lower_multisampled_store,
811                                      nir_metadata_control_flow, NULL);
812 }
813 
814 /*
815  * Given a non-bindless instruction, return whether agx_nir_lower_texture will
816  * lower it to something involving a descriptor crawl. This requires the driver
817  * to lower the instruction to bindless before calling agx_nir_lower_texture.
818  * The implementation just enumerates the cases handled in this file.
819  */
820 bool
agx_nir_needs_texture_crawl(nir_instr * instr)821 agx_nir_needs_texture_crawl(nir_instr *instr)
822 {
823    if (instr->type == nir_instr_type_intrinsic) {
824       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
825 
826       switch (intr->intrinsic) {
827       /* Queries, atomics always become a crawl */
828       case nir_intrinsic_image_size:
829       case nir_intrinsic_image_deref_size:
830       case nir_intrinsic_image_samples:
831       case nir_intrinsic_image_deref_samples:
832       case nir_intrinsic_image_atomic:
833       case nir_intrinsic_image_deref_atomic:
834       case nir_intrinsic_image_atomic_swap:
835       case nir_intrinsic_image_deref_atomic_swap:
836          return true;
837 
838       /* Multisampled stores need a crawl, others do not */
839       case nir_intrinsic_image_store:
840       case nir_intrinsic_image_deref_store:
841          return nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_MS;
842 
843       /* Array loads need a crawl, other load do not */
844       case nir_intrinsic_image_load:
845          return nir_intrinsic_image_array(intr) ||
846                 nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_CUBE;
847 
848       default:
849          return false;
850       }
851    } else if (instr->type == nir_instr_type_tex) {
852       nir_tex_instr *tex = nir_instr_as_tex(instr);
853 
854       /* Array textures get clamped to their size via txs */
855       if (tex->is_array && !(tex->backend_flags & AGX_TEXTURE_FLAG_NO_CLAMP))
856          return true;
857 
858       switch (tex->op) {
859       /* Queries always become a crawl */
860       case nir_texop_txs:
861       case nir_texop_texture_samples:
862       case nir_texop_query_levels:
863          return true;
864 
865       /* Buffer textures need their format read and txf needs its LOD/layer
866        * clamped.  Buffer textures are only read through txf.
867        */
868       case nir_texop_txf:
869       case nir_texop_txf_ms:
870          return has_nonzero_lod(tex) || tex->is_array ||
871                 tex->sampler_dim == GLSL_SAMPLER_DIM_BUF;
872 
873       default:
874          return false;
875       }
876    }
877 
878    return false;
879 }
880