xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_nir_lower_resinfo.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 /* Implement query_size, query_levels, and query_samples by extracting the information from
8  * descriptors. This is expected to be faster than image_resinfo.
9  */
10 
11 #include "ac_nir.h"
12 #include "nir_builder.h"
13 #include "amdgfxregs.h"
14 
get_field(nir_builder * b,nir_def * desc,unsigned index,unsigned mask)15 static nir_def *get_field(nir_builder *b, nir_def *desc, unsigned index, unsigned mask)
16 {
17    return nir_ubfe_imm(b, nir_channel(b, desc, index), ffs(mask) - 1, util_bitcount(mask));
18 }
19 
handle_null_desc(nir_builder * b,nir_def * desc,nir_def * value)20 static nir_def *handle_null_desc(nir_builder *b, nir_def *desc, nir_def *value)
21 {
22    nir_def *is_null = nir_ieq_imm(b, nir_channel(b, desc, 1), 0);
23    return nir_bcsel(b, is_null, nir_imm_int(b, 0), value);
24 }
25 
query_samples(nir_builder * b,nir_def * desc,enum glsl_sampler_dim dim,enum amd_gfx_level gfx_level)26 static nir_def *query_samples(nir_builder *b, nir_def *desc, enum glsl_sampler_dim dim,
27                               enum amd_gfx_level gfx_level)
28 {
29    nir_def *samples;
30 
31    if (dim == GLSL_SAMPLER_DIM_MS) {
32       /* LAST_LEVEL contains log2(num_samples). */
33       if (gfx_level >= GFX12)
34          samples = get_field(b, desc, 3, ~C_00A00C_LAST_LEVEL_GFX12);
35       else
36          samples = get_field(b, desc, 3, ~C_00A00C_LAST_LEVEL_GFX10);
37       samples = nir_ishl(b, nir_imm_int(b, 1), samples);
38    } else {
39       samples = nir_imm_int(b, 1);
40    }
41 
42    return handle_null_desc(b, desc, samples);
43 }
44 
query_levels(nir_builder * b,nir_def * desc,enum amd_gfx_level gfx_level)45 static nir_def *query_levels(nir_builder *b, nir_def *desc, enum amd_gfx_level gfx_level)
46 {
47    nir_def *base_level, *last_level;
48 
49    if (gfx_level >= GFX12) {
50       base_level = get_field(b, desc, 1, ~C_00A004_BASE_LEVEL);
51       last_level = get_field(b, desc, 3, ~C_00A00C_LAST_LEVEL_GFX12);
52    } else {
53       base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
54       last_level = get_field(b, desc, 3, ~C_00A00C_LAST_LEVEL_GFX10);
55    }
56 
57    nir_def *levels = nir_iadd_imm(b, nir_isub(b, last_level, base_level), 1);
58 
59    return handle_null_desc(b, desc, levels);
60 }
61 
62 static nir_def *
lower_query_size(nir_builder * b,nir_def * desc,nir_src * lod,enum glsl_sampler_dim dim,bool is_array,enum amd_gfx_level gfx_level)63 lower_query_size(nir_builder *b, nir_def *desc, nir_src *lod,
64                  enum glsl_sampler_dim dim, bool is_array, enum amd_gfx_level gfx_level)
65 {
66    if (dim == GLSL_SAMPLER_DIM_BUF) {
67       nir_def *size = nir_channel(b, desc, 2);
68 
69       if (gfx_level == GFX8) {
70          /* On GFX8, the descriptor contains the size in bytes,
71           * but TXQ must return the size in elements.
72           * The stride is always non-zero for resources using TXQ.
73           * Divide the size by the stride.
74           */
75          size = nir_udiv(b, size, get_field(b, desc, 1, ~C_008F04_STRIDE));
76       }
77       return size;
78    }
79 
80    /* Cube textures return (height, height) instead of (width, height) because it's fewer
81     * instructions.
82     */
83    bool has_width = dim != GLSL_SAMPLER_DIM_CUBE;
84    bool has_height = dim != GLSL_SAMPLER_DIM_1D;
85    bool has_depth = dim == GLSL_SAMPLER_DIM_3D;
86    nir_def *width = NULL, *height = NULL, *layers = NULL, *base_array = NULL;
87    nir_def *last_array = NULL, *depth = NULL;
88 
89    /* Get the width, height, depth, layers. */
90    if (gfx_level >= GFX10) {
91       if (has_width) {
92          nir_def *width_lo = get_field(b, desc, 1, ~C_00A004_WIDTH_LO);
93          nir_def *width_hi = get_field(b, desc, 2, ~C_00A008_WIDTH_HI);
94          /* Use iadd to get s_lshl2_add_u32 in the end. */
95          width = nir_iadd(b, width_lo, nir_ishl_imm(b, width_hi, 2));
96       }
97       if (has_height)
98          height = get_field(b, desc, 2, ~C_00A008_HEIGHT);
99       if (has_depth) {
100          if (gfx_level >= GFX12)
101             depth = get_field(b, desc, 4, ~C_00A010_DEPTH_GFX12);
102          else
103             depth = get_field(b, desc, 4, ~C_00A010_DEPTH_GFX10);
104       }
105 
106       if (is_array) {
107          if (gfx_level >= GFX12)
108             last_array = get_field(b, desc, 4, ~C_00A010_DEPTH_GFX12);
109          else
110             last_array = get_field(b, desc, 4, ~C_00A010_DEPTH_GFX10);
111          base_array = get_field(b, desc, 4, ~C_00A010_BASE_ARRAY);
112       }
113    } else {
114       if (has_width)
115          width = get_field(b, desc, 2, ~C_008F18_WIDTH);
116       if (has_height)
117          height = get_field(b, desc, 2, ~C_008F18_HEIGHT);
118       if (has_depth)
119          depth = get_field(b, desc, 4, ~C_008F20_DEPTH);
120 
121       if (is_array) {
122          base_array = get_field(b, desc, 5, ~C_008F24_BASE_ARRAY);
123 
124          if (gfx_level == GFX9) {
125             last_array = get_field(b, desc, 4, ~C_008F20_DEPTH);
126          } else {
127             last_array = get_field(b, desc, 5, ~C_008F24_LAST_ARRAY);
128          }
129       }
130    }
131 
132    /* On GFX10.3+, DEPTH contains the pitch if the type is 1D, 2D, or 2D_MSAA. We only program
133     * the pitch for 2D. We need to set depth and last_array to 0 in that case.
134     */
135    if (gfx_level >= GFX10_3 && (has_depth || is_array)) {
136       nir_def *type = get_field(b, desc, 3, ~C_00A00C_TYPE);
137       nir_def *is_2d = nir_ieq_imm(b, type, V_008F1C_SQ_RSRC_IMG_2D);
138 
139       if (has_depth)
140          depth = nir_bcsel(b, is_2d, nir_imm_int(b, 0), depth);
141       if (is_array)
142          last_array = nir_bcsel(b, is_2d, nir_imm_int(b, 0), last_array);
143    }
144 
145    /* All values are off by 1. */
146    if (has_width)
147       width = nir_iadd_imm(b, width, 1);
148    if (has_height)
149       height = nir_iadd_imm(b, height, 1);
150    if (has_depth)
151       depth = nir_iadd_imm(b, depth, 1);
152 
153    if (is_array) {
154       layers = nir_isub(b, last_array, base_array);
155       layers = nir_iadd_imm(b, layers, 1);
156    }
157 
158    /* Minify the dimensions according to base_level + lod. */
159    if (dim != GLSL_SAMPLER_DIM_MS && dim != GLSL_SAMPLER_DIM_RECT) {
160       nir_def *base_level;
161 
162       if (gfx_level >= GFX12)
163          base_level = get_field(b, desc, 1, ~C_00A004_BASE_LEVEL);
164       else
165          base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
166 
167       nir_def *level = lod ? nir_iadd(b, base_level, lod->ssa) : base_level;
168 
169       if (has_width)
170          width = nir_ushr(b, width, level);
171       if (has_height)
172          height = nir_ushr(b, height, level);
173       if (has_depth)
174          depth = nir_ushr(b, depth, level);
175 
176       /* 1D and square texture can't have 0 size unless the lod is out-of-bounds, which is
177        * undefined. Only non-square targets can have one of the sizes 0 with an in-bounds lod
178        * after minification.
179        */
180       if (has_width && has_height) {
181          if (has_width)
182             width = nir_umax(b, width, nir_imm_int(b, 1));
183          if (has_height)
184             height = nir_umax(b, height, nir_imm_int(b, 1));
185          if (has_depth)
186             depth = nir_umax(b, depth, nir_imm_int(b, 1));
187       }
188    }
189 
190    /* Special case for sliced storage 3D views which shouldn't be minified. */
191    if (gfx_level >= GFX10 && has_depth) {
192       nir_def *uav3d, *uav_depth;
193 
194       if (gfx_level >= GFX12) {
195          uav_depth = get_field(b, desc, 4, ~C_00A010_DEPTH_GFX12);
196          uav3d = nir_ieq_imm(b, get_field(b, desc, 5, ~C_00A014_UAV3D), 1);
197       } else {
198          uav_depth = get_field(b, desc, 4, ~C_00A010_DEPTH_GFX10);
199          uav3d = nir_ieq_imm(b, get_field(b, desc, 5, ~C_00A014_ARRAY_PITCH), 1);
200       }
201 
202       nir_def *layers_3d =
203          nir_isub(b, uav_depth, get_field(b, desc, 4, ~C_00A010_BASE_ARRAY));
204       layers_3d = nir_iadd_imm(b, layers_3d, 1);
205       depth = nir_bcsel(b, uav3d, layers_3d, depth);
206    }
207 
208    nir_def *result = NULL;
209 
210    /* Construct the result. */
211    switch (dim) {
212    case GLSL_SAMPLER_DIM_1D:
213       result = is_array ? nir_vec2(b, width, layers) : width;
214       break;
215    case GLSL_SAMPLER_DIM_CUBE:
216       result = is_array ? nir_vec3(b, height, height, layers) : nir_vec2(b, height, height);
217       break;
218    case GLSL_SAMPLER_DIM_2D:
219    case GLSL_SAMPLER_DIM_MS:
220    case GLSL_SAMPLER_DIM_RECT:
221    case GLSL_SAMPLER_DIM_EXTERNAL:
222       result = is_array ? nir_vec3(b, width, height, layers) : nir_vec2(b, width, height);
223       break;
224    case GLSL_SAMPLER_DIM_3D:
225       result = nir_vec3(b, width, height, depth);
226       break;
227    default:
228       unreachable("invalid sampler dim");
229    }
230 
231    return handle_null_desc(b, desc, result);
232 }
233 
lower_resinfo(nir_builder * b,nir_instr * instr,void * data)234 static bool lower_resinfo(nir_builder *b, nir_instr *instr, void *data)
235 {
236    enum amd_gfx_level gfx_level = *(enum amd_gfx_level*)data;
237    nir_def *result = NULL, *dst = NULL;
238 
239    if (instr->type == nir_instr_type_intrinsic) {
240       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
241       const struct glsl_type *type;
242       enum glsl_sampler_dim dim;
243       bool is_array;
244       nir_def *desc = NULL;
245 
246       dst = &intr->def;
247       b->cursor = nir_before_instr(instr);
248 
249       switch (intr->intrinsic) {
250       case nir_intrinsic_image_size:
251       case nir_intrinsic_image_samples:
252          dim = nir_intrinsic_image_dim(intr);
253          is_array = nir_intrinsic_image_array(intr);
254          desc = nir_image_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8,
255                                          32, intr->src[0].ssa);
256          break;
257 
258       case nir_intrinsic_image_deref_size:
259       case nir_intrinsic_image_deref_samples:
260          type = nir_instr_as_deref(intr->src[0].ssa->parent_instr)->type;
261          dim = glsl_get_sampler_dim(type);
262          is_array = glsl_sampler_type_is_array(type);
263          desc = nir_image_deref_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8,
264                                                32, intr->src[0].ssa);
265          break;
266 
267       case nir_intrinsic_bindless_image_size:
268       case nir_intrinsic_bindless_image_samples:
269          dim = nir_intrinsic_image_dim(intr);
270          is_array = nir_intrinsic_image_array(intr);
271          desc = nir_bindless_image_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8,
272                                                   32, intr->src[0].ssa);
273          break;
274 
275       default:
276          return false;
277       }
278 
279       switch (intr->intrinsic) {
280       case nir_intrinsic_image_size:
281       case nir_intrinsic_image_deref_size:
282       case nir_intrinsic_bindless_image_size:
283          result = lower_query_size(b, desc, NULL, dim, is_array, gfx_level);
284          break;
285 
286       case nir_intrinsic_image_samples:
287       case nir_intrinsic_image_deref_samples:
288       case nir_intrinsic_bindless_image_samples:
289          result = query_samples(b, desc, dim, gfx_level);
290          break;
291 
292       default:
293          assert(!desc);
294          return false;
295       }
296    } else if (instr->type == nir_instr_type_tex) {
297       nir_tex_instr *tex = nir_instr_as_tex(instr);
298       nir_tex_instr *new_tex;
299       nir_def *desc = NULL;
300       nir_src *lod = NULL;
301 
302       dst = &tex->def;
303       b->cursor = nir_before_instr(instr);
304 
305       switch (tex->op) {
306       case nir_texop_txs:
307       case nir_texop_query_levels:
308       case nir_texop_texture_samples:
309          for (unsigned i = 0; i < tex->num_srcs; i++) {
310             switch (tex->src[i].src_type) {
311             case nir_tex_src_texture_deref:
312             case nir_tex_src_texture_handle:
313                new_tex = nir_tex_instr_create(b->shader, 1);
314                new_tex->op = nir_texop_descriptor_amd;
315                new_tex->sampler_dim = tex->sampler_dim;
316                new_tex->is_array = tex->is_array;
317                new_tex->texture_index = tex->texture_index;
318                new_tex->sampler_index = tex->sampler_index;
319                new_tex->dest_type = nir_type_int32;
320                new_tex->src[0].src = nir_src_for_ssa(tex->src[i].src.ssa);
321                new_tex->src[0].src_type = tex->src[i].src_type;
322                nir_def_init(&new_tex->instr, &new_tex->def,
323                             nir_tex_instr_dest_size(new_tex), 32);
324                nir_builder_instr_insert(b, &new_tex->instr);
325                desc = &new_tex->def;
326                break;
327 
328             case nir_tex_src_lod:
329                lod = &tex->src[i].src;
330                break;
331 
332             default:;
333             }
334          }
335 
336          switch (tex->op) {
337          case nir_texop_txs:
338             result = lower_query_size(b, desc, lod, tex->sampler_dim, tex->is_array,
339                                       gfx_level);
340             break;
341          case nir_texop_query_levels:
342             result = query_levels(b, desc, gfx_level);
343             break;
344          case nir_texop_texture_samples:
345             result = query_samples(b, desc, tex->sampler_dim, gfx_level);
346             break;
347          default:
348             unreachable("shouldn't get here");
349          }
350          break;
351 
352       default:
353          return false;
354       }
355    }
356 
357    if (!result)
358       return false;
359 
360    assert(dst->bit_size == 32 || dst->bit_size == 16);
361    if (dst->bit_size == 16)
362       result = nir_u2u16(b, result);
363 
364    nir_def_rewrite_uses_after(dst, result, instr);
365    nir_instr_remove(instr);
366    return true;
367 }
368 
ac_nir_lower_resinfo(nir_shader * nir,enum amd_gfx_level gfx_level)369 bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level)
370 {
371    return nir_shader_instructions_pass(nir, lower_resinfo,
372                                        nir_metadata_control_flow,
373                                        &gfx_level);
374 }
375