xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_nir_lower_tex.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "ac_nir.h"
26 #include "nir_builder.h"
27 
28 /**
29  * Build a manual selection sequence for cube face sc/tc coordinates and
30  * major axis vector (multiplied by 2 for consistency) for the given
31  * vec3 \p coords, for the face implied by \p selcoords.
32  *
33  * For the major axis, we always adjust the sign to be in the direction of
34  * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
35  * the selcoords major axis.
36  */
37 static void
build_cube_select(nir_builder * b,nir_def * ma,nir_def * id,nir_def * deriv,nir_def ** out_ma,nir_def ** out_sc,nir_def ** out_tc)38 build_cube_select(nir_builder *b, nir_def *ma, nir_def *id, nir_def *deriv,
39                   nir_def **out_ma, nir_def **out_sc, nir_def **out_tc)
40 {
41    nir_def *deriv_x = nir_channel(b, deriv, 0);
42    nir_def *deriv_y = nir_channel(b, deriv, 1);
43    nir_def *deriv_z = nir_channel(b, deriv, 2);
44 
45    nir_def *is_ma_positive = nir_fge_imm(b, ma, 0.0);
46    nir_def *sgn_ma =
47       nir_bcsel(b, is_ma_positive, nir_imm_float(b, 1.0), nir_imm_float(b, -1.0));
48    nir_def *neg_sgn_ma = nir_fneg(b, sgn_ma);
49 
50    nir_def *is_ma_z = nir_fge_imm(b, id, 4.0);
51    nir_def *is_ma_y = nir_fge_imm(b, id, 2.0);
52    is_ma_y = nir_iand(b, is_ma_y, nir_inot(b, is_ma_z));
53    nir_def *is_not_ma_x = nir_ior(b, is_ma_z, is_ma_y);
54 
55    /* Select sc */
56    nir_def *tmp = nir_bcsel(b, is_not_ma_x, deriv_x, deriv_z);
57    nir_def *sgn =
58       nir_bcsel(b, is_ma_y, nir_imm_float(b, 1.0), nir_bcsel(b, is_ma_z, sgn_ma, neg_sgn_ma));
59    *out_sc = nir_fmul(b, tmp, sgn);
60 
61    /* Select tc */
62    tmp = nir_bcsel(b, is_ma_y, deriv_z, deriv_y);
63    sgn = nir_bcsel(b, is_ma_y, sgn_ma, nir_imm_float(b, -1.0));
64    *out_tc = nir_fmul(b, tmp, sgn);
65 
66    /* Select ma */
67    tmp = nir_bcsel(b, is_ma_z, deriv_z, nir_bcsel(b, is_ma_y, deriv_y, deriv_x));
68    *out_ma = nir_fmul_imm(b, nir_fabs(b, tmp), 2.0);
69 }
70 
71 static void
prepare_cube_coords(nir_builder * b,nir_tex_instr * tex,nir_def ** coord,nir_src * ddx,nir_src * ddy,const ac_nir_lower_tex_options * options)72 prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_def **coord, nir_src *ddx,
73                     nir_src *ddy, const ac_nir_lower_tex_options *options)
74 {
75    nir_def *coords[NIR_MAX_VEC_COMPONENTS] = {0};
76    for (unsigned i = 0; i < (*coord)->num_components; i++)
77       coords[i] = nir_channel(b, *coord, i);
78 
79    /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
80     *
81     *    "For Array forms, the array layer used will be
82     *
83     *       max(0, min(d−1, floor(layer+0.5)))
84     *
85     *     where d is the depth of the texture array and layer
86     *     comes from the component indicated in the tables below.
87     *     Workaroudn for an issue where the layer is taken from a
88     *     helper invocation which happens to fall on a different
89     *     layer due to extrapolation."
90     *
91     * GFX8 and earlier attempt to implement this in hardware by
92     * clamping the value of coords[2] = (8 * layer) + face.
93     * Unfortunately, this means that the we end up with the wrong
94     * face when clamping occurs.
95     *
96     * Clamp the layer earlier to work around the issue.
97     */
98    if (tex->is_array && options->gfx_level <= GFX8 && coords[3])
99       coords[3] = nir_fmax(b, coords[3], nir_imm_float(b, 0.0));
100 
101    nir_def *cube_coords = nir_cube_amd(b, nir_vec(b, coords, 3));
102    nir_def *sc = nir_channel(b, cube_coords, 1);
103    nir_def *tc = nir_channel(b, cube_coords, 0);
104    nir_def *ma = nir_channel(b, cube_coords, 2);
105    nir_def *invma = nir_frcp(b, nir_fabs(b, ma));
106    nir_def *id = nir_channel(b, cube_coords, 3);
107 
108    if (ddx || ddy) {
109       sc = nir_fmul(b, sc, invma);
110       tc = nir_fmul(b, tc, invma);
111 
112       /* Convert cube derivatives to 2D derivatives. */
113       for (unsigned i = 0; i < 2; i++) {
114          /* Transform the derivative alongside the texture
115           * coordinate. Mathematically, the correct formula is
116           * as follows. Assume we're projecting onto the +Z face
117           * and denote by dx/dh the derivative of the (original)
118           * X texture coordinate with respect to horizontal
119           * window coordinates. The projection onto the +Z face
120           * plane is:
121           *
122           *   f(x,z) = x/z
123           *
124           * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
125           *            = 1/z * dx/dh - x/z * 1/z * dz/dh.
126           *
127           * This motivatives the implementation below.
128           *
129           * Whether this actually gives the expected results for
130           * apps that might feed in derivatives obtained via
131           * finite differences is anyone's guess. The OpenGL spec
132           * seems awfully quiet about how textureGrad for cube
133           * maps should be handled.
134           */
135          nir_def *deriv_ma, *deriv_sc, *deriv_tc;
136          build_cube_select(b, ma, id, i ? ddy->ssa : ddx->ssa, &deriv_ma, &deriv_sc, &deriv_tc);
137 
138          deriv_ma = nir_fmul(b, deriv_ma, invma);
139 
140          nir_def *x = nir_fsub(b, nir_fmul(b, deriv_sc, invma), nir_fmul(b, deriv_ma, sc));
141          nir_def *y = nir_fsub(b, nir_fmul(b, deriv_tc, invma), nir_fmul(b, deriv_ma, tc));
142 
143          nir_src_rewrite(i ? ddy : ddx, nir_vec2(b, x, y));
144       }
145 
146       sc = nir_fadd_imm(b, sc, 1.5);
147       tc = nir_fadd_imm(b, tc, 1.5);
148    } else {
149       sc = nir_ffma_imm2(b, sc, invma, 1.5);
150       tc = nir_ffma_imm2(b, tc, invma, 1.5);
151    }
152 
153    if (tex->is_array && coords[3])
154       id = nir_ffma_imm1(b, coords[3], 8.0, id);
155 
156    *coord = nir_vec3(b, sc, tc, id);
157 
158    tex->is_array = true;
159 }
160 
161 static bool
lower_array_layer_round_even(nir_builder * b,nir_tex_instr * tex,nir_def ** coords)162 lower_array_layer_round_even(nir_builder *b, nir_tex_instr *tex, nir_def **coords)
163 {
164    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
165    if (coord_index < 0 || nir_tex_instr_src_type(tex, coord_index) != nir_type_float)
166       return false;
167 
168    unsigned layer = tex->coord_components - 1;
169    nir_def *rounded_layer = nir_fround_even(b, nir_channel(b, *coords, layer));
170    *coords = nir_vector_insert_imm(b, *coords, rounded_layer, layer);
171    return true;
172 }
173 
174 static bool
lower_tex_coords(nir_builder * b,nir_tex_instr * tex,nir_def ** coords,const ac_nir_lower_tex_options * options)175 lower_tex_coords(nir_builder *b, nir_tex_instr *tex, nir_def **coords,
176                  const ac_nir_lower_tex_options *options)
177 {
178    bool progress = false;
179    if ((options->lower_array_layer_round_even || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) &&
180        tex->is_array && tex->op != nir_texop_lod)
181       progress |= lower_array_layer_round_even(b, tex, coords);
182 
183    if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
184       return progress;
185 
186    int ddx_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddx);
187    int ddy_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddy);
188    nir_src *ddx = ddx_idx >= 0 ? &tex->src[ddx_idx].src : NULL;
189    nir_src *ddy = ddy_idx >= 0 ? &tex->src[ddy_idx].src : NULL;
190 
191    prepare_cube_coords(b, tex, coords, ddx, ddy, options);
192 
193    return true;
194 }
195 
196 static bool
lower_tex(nir_builder * b,nir_instr * instr,void * options_)197 lower_tex(nir_builder *b, nir_instr *instr, void *options_)
198 {
199    const ac_nir_lower_tex_options *options = options_;
200    if (instr->type != nir_instr_type_tex)
201       return false;
202 
203    nir_tex_instr *tex = nir_instr_as_tex(instr);
204    int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
205    if (coord_idx < 0 || nir_tex_instr_src_index(tex, nir_tex_src_backend1) >= 0)
206       return false;
207 
208    b->cursor = nir_before_instr(instr);
209    nir_def *coords = tex->src[coord_idx].src.ssa;
210    if (lower_tex_coords(b, tex, &coords, options)) {
211       tex->coord_components = coords->num_components;
212       nir_src_rewrite(&tex->src[coord_idx].src, coords);
213       return true;
214    }
215 
216    return false;
217 }
218 
219 typedef struct {
220    nir_intrinsic_instr *bary;
221    nir_intrinsic_instr *load;
222 } coord_info;
223 
224 static bool
can_move_coord(nir_scalar scalar,coord_info * info)225 can_move_coord(nir_scalar scalar, coord_info *info)
226 {
227    if (scalar.def->bit_size != 32)
228       return false;
229 
230    if (nir_scalar_is_const(scalar))
231       return true;
232 
233    if (!nir_scalar_is_intrinsic(scalar))
234       return false;
235 
236    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr);
237    if (intrin->intrinsic == nir_intrinsic_load_input ||
238        intrin->intrinsic == nir_intrinsic_load_per_primitive_input) {
239       info->bary = NULL;
240       info->load = intrin;
241       return true;
242    }
243 
244    if (intrin->intrinsic != nir_intrinsic_load_interpolated_input)
245       return false;
246 
247    nir_scalar coord_x = nir_scalar_resolved(intrin->src[0].ssa, 0);
248    nir_scalar coord_y = nir_scalar_resolved(intrin->src[0].ssa, 1);
249    if (!nir_scalar_is_intrinsic(coord_x) || coord_x.comp != 0 ||
250        !nir_scalar_is_intrinsic(coord_y) || coord_y.comp != 1)
251       return false;
252 
253    nir_intrinsic_instr *intrin_x = nir_instr_as_intrinsic(coord_x.def->parent_instr);
254    nir_intrinsic_instr *intrin_y = nir_instr_as_intrinsic(coord_y.def->parent_instr);
255    if (intrin_x->intrinsic != intrin_y->intrinsic ||
256        (intrin_x->intrinsic != nir_intrinsic_load_barycentric_sample &&
257         intrin_x->intrinsic != nir_intrinsic_load_barycentric_pixel &&
258         intrin_x->intrinsic != nir_intrinsic_load_barycentric_centroid) ||
259        nir_intrinsic_interp_mode(intrin_x) != nir_intrinsic_interp_mode(intrin_y))
260       return false;
261 
262    info->bary = intrin_x;
263    info->load = intrin;
264 
265    return true;
266 }
267 
268 struct move_tex_coords_state {
269    const ac_nir_lower_tex_options *options;
270    unsigned num_wqm_vgprs;
271    nir_builder toplevel_b;
272 };
273 
274 static nir_def *
build_coordinate(struct move_tex_coords_state * state,nir_scalar scalar,coord_info info)275 build_coordinate(struct move_tex_coords_state *state, nir_scalar scalar, coord_info info)
276 {
277    nir_builder *b = &state->toplevel_b;
278 
279    if (nir_scalar_is_const(scalar))
280       return nir_imm_intN_t(b, nir_scalar_as_uint(scalar), scalar.def->bit_size);
281 
282    ASSERTED nir_src offset = *nir_get_io_offset_src(info.load);
283    assert(nir_src_is_const(offset) && !nir_src_as_uint(offset));
284 
285    nir_def *zero = nir_imm_int(b, 0);
286    nir_def *res;
287    if (info.bary) {
288       enum glsl_interp_mode interp_mode = nir_intrinsic_interp_mode(info.bary);
289       nir_def *bary = nir_load_system_value(b, info.bary->intrinsic, interp_mode, 2, 32);
290       res = nir_load_interpolated_input(b, 1, 32, bary, zero);
291    } else {
292       res = nir_load_input(b, 1, 32, zero);
293    }
294    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(res->parent_instr);
295    nir_intrinsic_set_base(intrin, nir_intrinsic_base(info.load));
296    nir_intrinsic_set_component(intrin, nir_intrinsic_component(info.load) + scalar.comp);
297    nir_intrinsic_set_dest_type(intrin, nir_intrinsic_dest_type(info.load));
298    nir_intrinsic_set_io_semantics(intrin, nir_intrinsic_io_semantics(info.load));
299    return res;
300 }
301 
302 static bool
move_tex_coords(struct move_tex_coords_state * state,nir_function_impl * impl,nir_instr * instr)303 move_tex_coords(struct move_tex_coords_state *state, nir_function_impl *impl, nir_instr *instr)
304 {
305    nir_tex_instr *tex = nir_instr_as_tex(instr);
306    if (tex->op != nir_texop_tex && tex->op != nir_texop_txb && tex->op != nir_texop_lod)
307       return false;
308 
309    switch (tex->sampler_dim) {
310    case GLSL_SAMPLER_DIM_1D:
311    case GLSL_SAMPLER_DIM_2D:
312    case GLSL_SAMPLER_DIM_3D:
313    case GLSL_SAMPLER_DIM_CUBE:
314    case GLSL_SAMPLER_DIM_EXTERNAL:
315       break;
316    case GLSL_SAMPLER_DIM_RECT:
317    case GLSL_SAMPLER_DIM_BUF:
318    case GLSL_SAMPLER_DIM_MS:
319    case GLSL_SAMPLER_DIM_SUBPASS:
320    case GLSL_SAMPLER_DIM_SUBPASS_MS:
321       return false; /* No LOD or can't be sampled. */
322    }
323 
324    if (nir_tex_instr_src_index(tex, nir_tex_src_min_lod) != -1)
325       return false;
326 
327    nir_tex_src *src = &tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)];
328    nir_scalar components[NIR_MAX_VEC_COMPONENTS];
329    coord_info infos[NIR_MAX_VEC_COMPONENTS];
330    bool can_move_all = true;
331    for (unsigned i = 0; i < tex->coord_components; i++) {
332       components[i] = nir_scalar_resolved(src->src.ssa, i);
333       can_move_all &= can_move_coord(components[i], &infos[i]);
334    }
335    if (!can_move_all)
336       return false;
337 
338    int coord_base = 0;
339    unsigned linear_vgpr_size = tex->coord_components;
340    if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array)
341       linear_vgpr_size--; /* cube array layer and face are combined */
342    for (unsigned i = 0; i < tex->num_srcs; i++) {
343       switch (tex->src[i].src_type) {
344       case nir_tex_src_offset:
345       case nir_tex_src_bias:
346       case nir_tex_src_comparator:
347          coord_base++;
348          linear_vgpr_size++;
349          break;
350       default:
351          break;
352       }
353    }
354 
355    if (state->num_wqm_vgprs + linear_vgpr_size > state->options->max_wqm_vgprs)
356       return false;
357 
358    for (unsigned i = 0; i < tex->coord_components; i++)
359       components[i] = nir_get_scalar(build_coordinate(state, components[i], infos[i]), 0);
360 
361    nir_def *linear_vgpr = nir_vec_scalars(&state->toplevel_b, components, tex->coord_components);
362    lower_tex_coords(&state->toplevel_b, tex, &linear_vgpr, state->options);
363 
364    linear_vgpr = nir_strict_wqm_coord_amd(&state->toplevel_b, linear_vgpr, coord_base * 4);
365 
366    nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_coord));
367    tex->coord_components = 0;
368 
369    nir_tex_instr_add_src(tex, nir_tex_src_backend1, linear_vgpr);
370 
371    int offset_src = nir_tex_instr_src_index(tex, nir_tex_src_offset);
372    if (offset_src >= 0) /* Workaround requirement in nir_tex_instr_src_size(). */
373       tex->src[offset_src].src_type = nir_tex_src_backend2;
374 
375    state->num_wqm_vgprs += linear_vgpr_size;
376 
377    return true;
378 }
379 
380 static bool
move_ddxy(struct move_tex_coords_state * state,nir_function_impl * impl,nir_intrinsic_instr * instr)381 move_ddxy(struct move_tex_coords_state *state, nir_function_impl *impl, nir_intrinsic_instr *instr)
382 {
383    unsigned num_components = instr->def.num_components;
384    nir_scalar components[NIR_MAX_VEC_COMPONENTS];
385    coord_info infos[NIR_MAX_VEC_COMPONENTS];
386    bool can_move_all = true;
387    for (unsigned i = 0; i < num_components; i++) {
388       components[i] = nir_scalar_resolved(instr->src[0].ssa, i);
389       can_move_all &= can_move_coord(components[i], &infos[i]);
390    }
391    if (!can_move_all || state->num_wqm_vgprs + num_components > state->options->max_wqm_vgprs)
392       return false;
393 
394    for (unsigned i = 0; i < num_components; i++) {
395       nir_def *def = build_coordinate(state, components[i], infos[i]);
396       components[i] = nir_get_scalar(def, 0);
397    }
398 
399    nir_def *def = nir_vec_scalars(&state->toplevel_b, components, num_components);
400    def = _nir_build_ddx(&state->toplevel_b, def->bit_size, def);
401    nir_instr_as_intrinsic(def->parent_instr)->intrinsic = instr->intrinsic;
402    nir_def_rewrite_uses(&instr->def, def);
403 
404    state->num_wqm_vgprs += num_components;
405 
406    return true;
407 }
408 
409 static bool
move_coords_from_divergent_cf(struct move_tex_coords_state * state,nir_function_impl * impl,struct exec_list * cf_list,bool * divergent_discard,bool divergent_cf)410 move_coords_from_divergent_cf(struct move_tex_coords_state *state, nir_function_impl *impl,
411                               struct exec_list *cf_list, bool *divergent_discard, bool divergent_cf)
412 {
413    bool progress = false;
414    foreach_list_typed (nir_cf_node, cf_node, node, cf_list) {
415       switch (cf_node->type) {
416       case nir_cf_node_block: {
417          nir_block *block = nir_cf_node_as_block(cf_node);
418 
419          bool top_level = cf_list == &impl->body;
420 
421          nir_foreach_instr (instr, block) {
422             if (top_level && !*divergent_discard)
423                state->toplevel_b.cursor = nir_before_instr(instr);
424 
425             if (instr->type == nir_instr_type_tex && (divergent_cf || *divergent_discard)) {
426                progress |= move_tex_coords(state, impl, instr);
427             } else if (instr->type == nir_instr_type_intrinsic) {
428                nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
429                switch (intrin->intrinsic) {
430                case nir_intrinsic_terminate:
431                   if (divergent_cf)
432                      *divergent_discard = true;
433                   break;
434                case nir_intrinsic_terminate_if:
435                   if (divergent_cf || nir_src_is_divergent(intrin->src[0]))
436                      *divergent_discard = true;
437                   break;
438                case nir_intrinsic_ddx:
439                case nir_intrinsic_ddy:
440                case nir_intrinsic_ddx_fine:
441                case nir_intrinsic_ddy_fine:
442                case nir_intrinsic_ddx_coarse:
443                case nir_intrinsic_ddy_coarse:
444                   if (divergent_cf || *divergent_discard)
445                      progress |= move_ddxy(state, impl, intrin);
446                   break;
447                default:
448                   break;
449                }
450             }
451          }
452 
453          if (top_level && !*divergent_discard)
454             state->toplevel_b.cursor = nir_after_block_before_jump(block);
455          break;
456       }
457       case nir_cf_node_if: {
458          nir_if *nif = nir_cf_node_as_if(cf_node);
459          bool divergent_discard_then = *divergent_discard;
460          bool divergent_discard_else = *divergent_discard;
461          bool then_else_divergent = divergent_cf || nir_src_is_divergent(nif->condition);
462          progress |= move_coords_from_divergent_cf(state, impl, &nif->then_list,
463                                                    &divergent_discard_then, then_else_divergent);
464          progress |= move_coords_from_divergent_cf(state, impl, &nif->else_list,
465                                                    &divergent_discard_else, then_else_divergent);
466          *divergent_discard |= divergent_discard_then || divergent_discard_else;
467          break;
468       }
469       case nir_cf_node_loop: {
470          nir_loop *loop = nir_cf_node_as_loop(cf_node);
471          assert(!nir_loop_has_continue_construct(loop));
472          progress |=
473             move_coords_from_divergent_cf(state, impl, &loop->body, divergent_discard, true);
474          break;
475       }
476       case nir_cf_node_function:
477          unreachable("Invalid cf type");
478       }
479    }
480 
481    return progress;
482 }
483 
484 bool
ac_nir_lower_tex(nir_shader * nir,const ac_nir_lower_tex_options * options)485 ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options)
486 {
487    bool progress = false;
488    if (options->fix_derivs_in_divergent_cf) {
489       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
490 
491       struct move_tex_coords_state state;
492       state.toplevel_b = nir_builder_create(impl);
493       state.options = options;
494       state.num_wqm_vgprs = 0;
495 
496       bool divergent_discard = false;
497       if (move_coords_from_divergent_cf(&state, impl, &impl->body, &divergent_discard, false))
498          nir_metadata_preserve(impl, nir_metadata_control_flow);
499       else
500          nir_metadata_preserve(impl, nir_metadata_all);
501    }
502 
503    progress |= nir_shader_instructions_pass(
504       nir, lower_tex, nir_metadata_control_flow, (void *)options);
505 
506    return progress;
507 }
508