1 /*
2 * Copyright 2023 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "asahi/lib/agx_nir_passes.h"
7 #include "compiler/glsl_types.h"
8 #include "compiler/nir/nir_builder.h"
9 #include "util/bitset.h"
10 #include "agx_state.h"
11 #include "nir.h"
12 #include "nir_builder_opcodes.h"
13 #include "nir_intrinsics.h"
14 #include "nir_intrinsics_indices.h"
15 #include "shader_enums.h"
16
17 /*
18 * Lower binding table textures and images to texture state registers and (if
19 * necessary) bindless access into an internal table mapped like additional
20 * texture state registers. The following layout is used:
21 *
22 * 1. Textures
23 * 2. Images (read/write interleaved)
24 */
25
26 static bool
lower_sampler(nir_builder * b,nir_tex_instr * tex)27 lower_sampler(nir_builder *b, nir_tex_instr *tex)
28 {
29 if (!nir_tex_instr_need_sampler(tex))
30 return false;
31
32 nir_def *index = nir_steal_tex_src(tex, nir_tex_src_sampler_offset);
33 if (!index)
34 index = nir_imm_int(b, tex->sampler_index);
35
36 nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle,
37 nir_load_sampler_handle_agx(b, index));
38 return true;
39 }
40
41 static bool
lower(nir_builder * b,nir_instr * instr,void * data)42 lower(nir_builder *b, nir_instr *instr, void *data)
43 {
44 bool *uses_bindless_samplers = data;
45 bool progress = false;
46 bool force_bindless = agx_nir_needs_texture_crawl(instr);
47 b->cursor = nir_before_instr(instr);
48
49 if (instr->type == nir_instr_type_intrinsic) {
50 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
51 nir_intrinsic_op bindless_op;
52
53 #define CASE(op) \
54 case nir_intrinsic_##op: \
55 bindless_op = nir_intrinsic_bindless_##op; \
56 break;
57
58 switch (intr->intrinsic) {
59 CASE(image_load)
60 CASE(image_store)
61 CASE(image_size)
62 CASE(image_samples)
63 CASE(image_atomic)
64 CASE(image_atomic_swap)
65 default:
66 return false;
67 }
68 #undef CASE
69
70 nir_def *index = intr->src[0].ssa;
71 nir_scalar index_scalar = nir_scalar_resolved(index, 0);
72
73 /* Remap according to the driver layout */
74 unsigned offset = BITSET_LAST_BIT(b->shader->info.textures_used);
75
76 /* For reads and queries, we use the texture descriptor which is first.
77 * Writes and atomics use the PBE descriptor.
78 */
79 if (intr->intrinsic != nir_intrinsic_image_load &&
80 intr->intrinsic != nir_intrinsic_image_size &&
81 intr->intrinsic != nir_intrinsic_image_samples)
82 offset++;
83
84 /* If we can determine statically that the image fits in texture state
85 * registers, avoid lowering to bindless access.
86 */
87 if (nir_scalar_is_const(index_scalar) && !force_bindless) {
88 unsigned idx = (nir_scalar_as_uint(index_scalar) * 2) + offset;
89
90 if (idx < AGX_NUM_TEXTURE_STATE_REGS) {
91 nir_src_rewrite(&intr->src[0], nir_imm_intN_t(b, idx, 16));
92 return true;
93 }
94 }
95
96 nir_atomic_op op = nir_atomic_op_iadd /* irrelevant */;
97 if (nir_intrinsic_has_atomic_op(intr))
98 op = nir_intrinsic_atomic_op(intr);
99
100 /* Otherwise, lower to bindless */
101 intr->intrinsic = bindless_op;
102
103 if (nir_intrinsic_has_atomic_op(intr))
104 nir_intrinsic_set_atomic_op(intr, op);
105
106 /* The driver uploads enough null texture/PBE descriptors for robustness
107 * given the shader limit, but we still need to clamp since we're lowering
108 * to bindless so the hardware doesn't know the limit.
109 *
110 * The GL spec says out-of-bounds image indexing is undefined, but
111 * faulting is not acceptable for robustness.
112 */
113 index = nir_umin(
114 b, index,
115 nir_imm_intN_t(b, b->shader->info.num_images - 1, index->bit_size));
116
117 index = nir_iadd_imm(b, nir_imul_imm(b, index, 2), offset);
118 nir_src_rewrite(&intr->src[0], nir_load_texture_handle_agx(b, index));
119 } else if (instr->type == nir_instr_type_tex) {
120 nir_tex_instr *tex = nir_instr_as_tex(instr);
121
122 if (((BITSET_COUNT(b->shader->info.samplers_used) > 16) &&
123 (nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset) >= 0 ||
124 tex->sampler_index >= 16)) &&
125 lower_sampler(b, tex)) {
126 progress = true;
127 *uses_bindless_samplers = true;
128 }
129
130 /* Nothing to do for "real" bindless */
131 if (nir_tex_instr_src_index(tex, nir_tex_src_texture_handle) >= 0)
132 return progress;
133
134 /* Textures are mapped 1:1, so if we can prove it fits in a texture state
135 * register, use the texture state register.
136 */
137 if (tex->texture_index < AGX_NUM_TEXTURE_STATE_REGS &&
138 nir_tex_instr_src_index(tex, nir_tex_src_texture_offset) == -1 &&
139 !force_bindless)
140 return progress;
141
142 /* Otherwise, lower to bindless. Could be optimized. */
143 nir_def *index = nir_steal_tex_src(tex, nir_tex_src_texture_offset);
144 if (!index)
145 index = nir_imm_int(b, tex->texture_index);
146
147 /* As above */
148 index = nir_umin(
149 b, index,
150 nir_imm_intN_t(b, b->shader->info.num_textures - 1, index->bit_size));
151
152 nir_tex_instr_add_src(tex, nir_tex_src_texture_handle,
153 nir_load_texture_handle_agx(b, index));
154 }
155
156 return true;
157 }
158
159 bool
agx_nir_lower_bindings(nir_shader * shader,bool * uses_bindless_samplers)160 agx_nir_lower_bindings(nir_shader *shader, bool *uses_bindless_samplers)
161 {
162 /* First lower index to offset so we can lower more naturally */
163 bool progress = nir_lower_tex(
164 shader, &(nir_lower_tex_options){.lower_index_to_offset = true});
165
166 /* Next run constant folding so the constant optimizations above have a
167 * chance.
168 */
169 progress |= nir_opt_constant_folding(shader);
170
171 progress |= nir_shader_instructions_pass(
172 shader, lower, nir_metadata_control_flow, uses_bindless_samplers);
173 return progress;
174 }
175