xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_nir_lower_tex_prefetch.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019 Igalia S.L.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "ir3_nir.h"
7 
8 /**
9  * A pass which detects tex instructions which are candidate to be executed
10  * prior to FS shader start, and change them to nir_texop_tex_prefetch.
11  */
12 
13 static int
coord_offset(nir_def * ssa)14 coord_offset(nir_def *ssa)
15 {
16    nir_instr *parent_instr = ssa->parent_instr;
17 
18    /* The coordinate of a texture sampling instruction eligible for
19     * pre-fetch is either going to be a load_interpolated_input/
20     * load_input, or a vec2 assembling non-swizzled components of
21     * a load_interpolated_input/load_input (due to varying packing)
22     */
23 
24    if (parent_instr->type == nir_instr_type_alu) {
25       nir_alu_instr *alu = nir_instr_as_alu(parent_instr);
26 
27       if (alu->op != nir_op_vec2)
28          return -1;
29 
30       int base_src_offset = coord_offset(alu->src[0].src.ssa);
31       if (base_src_offset < 0)
32          return -1;
33 
34       int base_offset = base_src_offset + alu->src[0].swizzle[0];
35 
36       /* NOTE it might be possible to support more than 2D? */
37       for (int i = 1; i < 2; i++) {
38          int nth_src_offset = coord_offset(alu->src[i].src.ssa);
39          if (nth_src_offset < 0)
40             return -1;
41          int nth_offset = nth_src_offset + alu->src[i].swizzle[0];
42 
43          if (nth_offset != (base_offset + i))
44             return -1;
45       }
46 
47       return base_offset;
48    }
49 
50    if (parent_instr->type != nir_instr_type_intrinsic)
51       return -1;
52 
53    nir_intrinsic_instr *input = nir_instr_as_intrinsic(parent_instr);
54 
55    if (input->intrinsic != nir_intrinsic_load_interpolated_input)
56       return -1;
57 
58    /* Happens with lowered load_barycentric_at_offset */
59    if (input->src[0].ssa->parent_instr->type != nir_instr_type_intrinsic)
60       return -1;
61 
62    nir_intrinsic_instr *interp =
63       nir_instr_as_intrinsic(input->src[0].ssa->parent_instr);
64 
65    if (interp->intrinsic != nir_intrinsic_load_barycentric_pixel)
66       return -1;
67 
68    /* interpolation modes such as noperspective aren't covered by the other
69     * test, we need to explicitly check for them here.
70     */
71    unsigned interp_mode = nir_intrinsic_interp_mode(interp);
72    if (interp_mode != INTERP_MODE_NONE && interp_mode != INTERP_MODE_SMOOTH)
73       return -1;
74 
75    /* we also need a const input offset: */
76    if (!nir_src_is_const(input->src[1]))
77       return -1;
78 
79    unsigned base = nir_src_as_uint(input->src[1]) + nir_intrinsic_base(input);
80    unsigned comp = nir_intrinsic_component(input);
81 
82    return (4 * base) + comp;
83 }
84 
85 int
ir3_nir_coord_offset(nir_def * ssa)86 ir3_nir_coord_offset(nir_def *ssa)
87 {
88 
89    assert(ssa->num_components == 2);
90    return coord_offset(ssa);
91 }
92 
93 static bool
has_src(nir_tex_instr * tex,nir_tex_src_type type)94 has_src(nir_tex_instr *tex, nir_tex_src_type type)
95 {
96    return nir_tex_instr_src_index(tex, type) >= 0;
97 }
98 
99 static bool
ok_bindless_src(nir_tex_instr * tex,nir_tex_src_type type)100 ok_bindless_src(nir_tex_instr *tex, nir_tex_src_type type)
101 {
102    int idx = nir_tex_instr_src_index(tex, type);
103    assert(idx >= 0);
104    nir_intrinsic_instr *bindless = ir3_bindless_resource(tex->src[idx].src);
105 
106    /* TODO from SP_FS_BINDLESS_PREFETCH[n] it looks like this limit should
107     * be 1<<8 ?
108     */
109    return nir_src_is_const(bindless->src[0]) &&
110           (nir_src_as_uint(bindless->src[0]) < (1 << 16));
111 }
112 
113 /**
114  * Check that we will be able to encode the tex/samp parameters
115  * successfully.  These limits are based on the layout of
116  * SP_FS_PREFETCH[n] and SP_FS_BINDLESS_PREFETCH[n], so at some
117  * point (if those regs changes) they may become generation
118  * specific.
119  */
120 static bool
ok_tex_samp(nir_tex_instr * tex)121 ok_tex_samp(nir_tex_instr *tex)
122 {
123    if (has_src(tex, nir_tex_src_texture_handle)) {
124       /* bindless case: */
125 
126       assert(has_src(tex, nir_tex_src_sampler_handle));
127 
128       return ok_bindless_src(tex, nir_tex_src_texture_handle) &&
129              ok_bindless_src(tex, nir_tex_src_sampler_handle);
130    } else {
131       assert(!has_src(tex, nir_tex_src_texture_offset));
132       assert(!has_src(tex, nir_tex_src_sampler_offset));
133 
134       return (tex->texture_index <= 0x1f) && (tex->sampler_index <= 0xf);
135    }
136 }
137 
138 static bool
lower_tex_prefetch_block(nir_block * block)139 lower_tex_prefetch_block(nir_block *block)
140 {
141    bool progress = false;
142 
143    nir_foreach_instr_safe (instr, block) {
144       if (instr->type != nir_instr_type_tex)
145          continue;
146 
147       nir_tex_instr *tex = nir_instr_as_tex(instr);
148       if (tex->op != nir_texop_tex)
149          continue;
150 
151       if (has_src(tex, nir_tex_src_bias) || has_src(tex, nir_tex_src_lod) ||
152           has_src(tex, nir_tex_src_comparator) ||
153           has_src(tex, nir_tex_src_projector) ||
154           has_src(tex, nir_tex_src_offset) || has_src(tex, nir_tex_src_ddx) ||
155           has_src(tex, nir_tex_src_ddy) || has_src(tex, nir_tex_src_ms_index) ||
156           has_src(tex, nir_tex_src_texture_offset) ||
157           has_src(tex, nir_tex_src_sampler_offset))
158          continue;
159 
160       /* only prefetch for simple 2d tex fetch case */
161       if (tex->sampler_dim != GLSL_SAMPLER_DIM_2D || tex->is_array)
162          continue;
163 
164       if (!ok_tex_samp(tex))
165          continue;
166 
167       int idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
168       /* First source should be the sampling coordinate. */
169       nir_tex_src *coord = &tex->src[idx];
170 
171       if (ir3_nir_coord_offset(coord->src.ssa) >= 0) {
172          tex->op = nir_texop_tex_prefetch;
173 
174          progress |= true;
175       }
176    }
177 
178    return progress;
179 }
180 
181 static bool
lower_tex_prefetch_func(nir_function_impl * impl)182 lower_tex_prefetch_func(nir_function_impl *impl)
183 {
184    /* Only instructions in the the outer-most block are considered eligible for
185     * pre-dispatch, because they need to be move-able to the beginning of the
186     * shader to avoid locking down the register holding the pre-fetched result
187     * for too long. However if there is a preamble we should skip the preamble
188     * and only look in the first block after the preamble instead, because that
189     * corresponds to the first block in the original program and texture fetches
190     * in the preamble are never pre-dispatchable.
191     */
192    nir_block *block = nir_start_block(impl);
193 
194    nir_if *nif = nir_block_get_following_if(block);
195    if (nif) {
196       nir_instr *cond = nif->condition.ssa->parent_instr;
197       if (cond->type == nir_instr_type_intrinsic &&
198           nir_instr_as_intrinsic(cond)->intrinsic ==
199           nir_intrinsic_preamble_start_ir3) {
200          block = nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
201       }
202    }
203 
204    bool progress = lower_tex_prefetch_block(block);
205 
206    if (progress) {
207       nir_metadata_preserve(impl,
208                             nir_metadata_control_flow);
209    }
210 
211    return progress;
212 }
213 
214 bool
ir3_nir_lower_tex_prefetch(nir_shader * shader)215 ir3_nir_lower_tex_prefetch(nir_shader *shader)
216 {
217    bool progress = false;
218 
219    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
220 
221    nir_foreach_function (function, shader) {
222       /* Only texture sampling instructions inside the main function
223        * are eligible for pre-dispatch.
224        */
225       if (!function->impl || !function->is_entrypoint)
226          continue;
227 
228       progress |= lower_tex_prefetch_func(function->impl);
229    }
230 
231    return progress;
232 }
233