1 /*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 /*
8 * This lowering pass converts index based buffer/image/texture access to
9 * explicite descriptor based, which simplify the compiler backend translation.
10 *
11 * For example: load_ubo(1) -> load_ubo(vec4), where the vec4 is the buffer
12 * descriptor with index==1, so compiler backend don't need to do index-to-descriptor
13 * finding which is the most complicated part (move to nir now).
14 */
15
16 #include "nir_builder.h"
17
18 #include "ac_nir.h"
19 #include "si_pipe.h"
20 #include "si_shader_internal.h"
21 #include "sid.h"
22
23 struct lower_resource_state {
24 struct si_shader *shader;
25 struct si_shader_args *args;
26 };
27
load_ubo_desc_fast_path(nir_builder * b,nir_def * addr_lo,struct si_shader_selector * sel)28 static nir_def *load_ubo_desc_fast_path(nir_builder *b, nir_def *addr_lo,
29 struct si_shader_selector *sel)
30 {
31 const struct ac_buffer_state buffer_state = {
32 .va = (uint64_t)sel->screen->info.address32_hi << 32,
33 .size = sel->info.constbuf0_num_slots * 16,
34 .format = PIPE_FORMAT_R32_FLOAT,
35 .swizzle =
36 {
37 PIPE_SWIZZLE_X,
38 PIPE_SWIZZLE_Y,
39 PIPE_SWIZZLE_Z,
40 PIPE_SWIZZLE_W,
41 },
42 .gfx10_oob_select = V_008F0C_OOB_SELECT_RAW,
43 };
44 uint32_t desc[4];
45
46 ac_build_buffer_descriptor(sel->screen->info.gfx_level, &buffer_state, desc);
47
48 return nir_vec4(b, addr_lo, nir_imm_int(b, desc[1]), nir_imm_int(b, desc[2]),
49 nir_imm_int(b, desc[3]));
50 }
51
clamp_index(nir_builder * b,nir_def * index,unsigned max)52 static nir_def *clamp_index(nir_builder *b, nir_def *index, unsigned max)
53 {
54 if (util_is_power_of_two_or_zero(max))
55 return nir_iand_imm(b, index, max - 1);
56 else {
57 nir_def *clamp = nir_imm_int(b, max - 1);
58 nir_def *cond = nir_uge(b, clamp, index);
59 return nir_bcsel(b, cond, index, clamp);
60 }
61 }
62
load_ubo_desc(nir_builder * b,nir_def * index,struct lower_resource_state * s)63 static nir_def *load_ubo_desc(nir_builder *b, nir_def *index,
64 struct lower_resource_state *s)
65 {
66 struct si_shader_selector *sel = s->shader->selector;
67
68 nir_def *addr = ac_nir_load_arg(b, &s->args->ac, s->args->const_and_shader_buffers);
69
70 if (sel->info.base.num_ubos == 1 && sel->info.base.num_ssbos == 0)
71 return load_ubo_desc_fast_path(b, addr, sel);
72
73 index = clamp_index(b, index, sel->info.base.num_ubos);
74 index = nir_iadd_imm(b, index, SI_NUM_SHADER_BUFFERS);
75
76 nir_def *offset = nir_ishl_imm(b, index, 4);
77 return nir_load_smem_amd(b, 4, addr, offset);
78 }
79
load_ssbo_desc(nir_builder * b,nir_src * index,struct lower_resource_state * s)80 static nir_def *load_ssbo_desc(nir_builder *b, nir_src *index,
81 struct lower_resource_state *s)
82 {
83 struct si_shader_selector *sel = s->shader->selector;
84
85 /* Fast path if the shader buffer is in user SGPRs. */
86 if (nir_src_is_const(*index)) {
87 unsigned slot = nir_src_as_uint(*index);
88 if (slot < sel->cs_num_shaderbufs_in_user_sgprs)
89 return ac_nir_load_arg(b, &s->args->ac, s->args->cs_shaderbuf[slot]);
90 }
91
92 nir_def *addr = ac_nir_load_arg(b, &s->args->ac, s->args->const_and_shader_buffers);
93 nir_def *slot = clamp_index(b, index->ssa, sel->info.base.num_ssbos);
94 slot = nir_isub_imm(b, SI_NUM_SHADER_BUFFERS - 1, slot);
95
96 nir_def *offset = nir_ishl_imm(b, slot, 4);
97 return nir_load_smem_amd(b, 4, addr, offset);
98 }
99
fixup_image_desc(nir_builder * b,nir_def * rsrc,bool uses_store,struct lower_resource_state * s)100 static nir_def *fixup_image_desc(nir_builder *b, nir_def *rsrc, bool uses_store,
101 struct lower_resource_state *s)
102 {
103 struct si_shader_selector *sel = s->shader->selector;
104 struct si_screen *screen = sel->screen;
105
106 /**
107 * Given a 256-bit resource descriptor, force the DCC enable bit to off.
108 *
109 * At least on Tonga, executing image stores on images with DCC enabled and
110 * non-trivial can eventually lead to lockups. This can occur when an
111 * application binds an image as read-only but then uses a shader that writes
112 * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
113 * program termination) in this case, but it doesn't cost much to be a bit
114 * nicer: disabling DCC in the shader still leads to undefined results but
115 * avoids the lockup.
116 */
117 if (uses_store &&
118 screen->info.gfx_level <= GFX9 &&
119 screen->info.gfx_level >= GFX8) {
120 nir_def *tmp = nir_channel(b, rsrc, 6);
121 tmp = nir_iand_imm(b, tmp, C_008F28_COMPRESSION_EN);
122 rsrc = nir_vector_insert_imm(b, rsrc, tmp, 6);
123 }
124
125 if (!uses_store &&
126 screen->info.has_image_load_dcc_bug &&
127 screen->always_allow_dcc_stores) {
128 nir_def *tmp = nir_channel(b, rsrc, 6);
129 tmp = nir_iand_imm(b, tmp, C_00A018_WRITE_COMPRESS_ENABLE);
130 rsrc = nir_vector_insert_imm(b, rsrc, tmp, 6);
131 }
132
133 return rsrc;
134 }
135
136 /* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
137 * adjust "index" to point to FMASK.
138 */
load_image_desc(nir_builder * b,nir_def * list,nir_def * index,enum ac_descriptor_type desc_type,bool uses_store,struct lower_resource_state * s)139 static nir_def *load_image_desc(nir_builder *b, nir_def *list, nir_def *index,
140 enum ac_descriptor_type desc_type, bool uses_store,
141 struct lower_resource_state *s)
142 {
143 /* index is in uvec8 unit, convert to offset in bytes */
144 nir_def *offset = nir_ishl_imm(b, index, 5);
145
146 unsigned num_channels;
147 if (desc_type == AC_DESC_BUFFER) {
148 offset = nir_iadd_imm(b, offset, 16);
149 num_channels = 4;
150 } else {
151 assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_FMASK);
152 num_channels = 8;
153 }
154
155 nir_def *rsrc = nir_load_smem_amd(b, num_channels, list, offset);
156
157 if (desc_type == AC_DESC_IMAGE)
158 rsrc = fixup_image_desc(b, rsrc, uses_store, s);
159
160 return rsrc;
161 }
162
deref_to_index(nir_builder * b,nir_deref_instr * deref,unsigned max_slots,nir_def ** dynamic_index_ret,unsigned * const_index_ret)163 static nir_def *deref_to_index(nir_builder *b,
164 nir_deref_instr *deref,
165 unsigned max_slots,
166 nir_def **dynamic_index_ret,
167 unsigned *const_index_ret)
168 {
169 unsigned const_index = 0;
170 nir_def *dynamic_index = NULL;
171 while (deref->deref_type != nir_deref_type_var) {
172 assert(deref->deref_type == nir_deref_type_array);
173 unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
174
175 if (nir_src_is_const(deref->arr.index)) {
176 const_index += array_size * nir_src_as_uint(deref->arr.index);
177 } else {
178 nir_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size);
179 dynamic_index = dynamic_index ? nir_iadd(b, dynamic_index, tmp) : tmp;
180 }
181
182 deref = nir_deref_instr_parent(deref);
183 }
184
185 unsigned base_index = deref->var->data.binding;
186 const_index += base_index;
187
188 /* Redirect invalid resource indices to the first array element. */
189 if (const_index >= max_slots)
190 const_index = base_index;
191
192 nir_def *index = nir_imm_int(b, const_index);
193 if (dynamic_index) {
194 index = nir_iadd(b, dynamic_index, index);
195
196 /* From the GL_ARB_shader_image_load_store extension spec:
197 *
198 * If a shader performs an image load, store, or atomic
199 * operation using an image variable declared as an array,
200 * and if the index used to select an individual element is
201 * negative or greater than or equal to the size of the
202 * array, the results of the operation are undefined but may
203 * not lead to termination.
204 */
205 index = clamp_index(b, index, max_slots);
206 }
207
208 if (dynamic_index_ret)
209 *dynamic_index_ret = dynamic_index;
210 if (const_index_ret)
211 *const_index_ret = const_index;
212
213 return index;
214 }
215
load_deref_image_desc(nir_builder * b,nir_deref_instr * deref,enum ac_descriptor_type desc_type,bool is_load,struct lower_resource_state * s)216 static nir_def *load_deref_image_desc(nir_builder *b, nir_deref_instr *deref,
217 enum ac_descriptor_type desc_type, bool is_load,
218 struct lower_resource_state *s)
219 {
220 unsigned const_index;
221 nir_def *dynamic_index;
222 nir_def *index = deref_to_index(b, deref, s->shader->selector->info.base.num_images,
223 &dynamic_index, &const_index);
224
225 nir_def *desc;
226 if (!dynamic_index && desc_type != AC_DESC_FMASK &&
227 const_index < s->shader->selector->cs_num_images_in_user_sgprs) {
228 /* Fast path if the image is in user SGPRs. */
229 desc = ac_nir_load_arg(b, &s->args->ac, s->args->cs_image[const_index]);
230
231 if (desc_type == AC_DESC_IMAGE)
232 desc = fixup_image_desc(b, desc, !is_load, s);
233 } else {
234 /* FMASKs are separate from images. */
235 if (desc_type == AC_DESC_FMASK)
236 index = nir_iadd_imm(b, index, SI_NUM_IMAGES);
237
238 index = nir_isub_imm(b, SI_NUM_IMAGE_SLOTS - 1, index);
239
240 nir_def *list = ac_nir_load_arg(b, &s->args->ac, s->args->samplers_and_images);
241 desc = load_image_desc(b, list, index, desc_type, !is_load, s);
242 }
243
244 return desc;
245 }
246
load_bindless_image_desc(nir_builder * b,nir_def * index,enum ac_descriptor_type desc_type,bool is_load,struct lower_resource_state * s)247 static nir_def *load_bindless_image_desc(nir_builder *b, nir_def *index,
248 enum ac_descriptor_type desc_type, bool is_load,
249 struct lower_resource_state *s)
250 {
251 /* Bindless image descriptors use 16-dword slots. */
252 index = nir_ishl_imm(b, index, 1);
253
254 /* FMASK is right after the image. */
255 if (desc_type == AC_DESC_FMASK)
256 index = nir_iadd_imm(b, index, 1);
257
258 nir_def *list = ac_nir_load_arg(b, &s->args->ac, s->args->bindless_samplers_and_images);
259 return load_image_desc(b, list, index, desc_type, !is_load, s);
260 }
261
lower_resource_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_resource_state * s)262 static bool lower_resource_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
263 struct lower_resource_state *s)
264 {
265 switch (intrin->intrinsic) {
266 case nir_intrinsic_load_ubo: {
267 assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
268
269 /* Check if the instruction already sources a descriptor and doesn't need to be lowered. */
270 if (intrin->src[0].ssa->num_components == 4 && intrin->src[0].ssa->bit_size == 32)
271 return false;
272
273 nir_def *desc = load_ubo_desc(b, intrin->src[0].ssa, s);
274 nir_src_rewrite(&intrin->src[0], desc);
275 break;
276 }
277 case nir_intrinsic_load_ssbo:
278 case nir_intrinsic_ssbo_atomic:
279 case nir_intrinsic_ssbo_atomic_swap: {
280 assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
281
282 /* Check if the instruction already sources a descriptor and doesn't need to be lowered. */
283 if (intrin->src[0].ssa->num_components == 4 && intrin->src[0].ssa->bit_size == 32)
284 return false;
285
286 nir_def *desc = load_ssbo_desc(b, &intrin->src[0], s);
287 nir_src_rewrite(&intrin->src[0], desc);
288 break;
289 }
290 case nir_intrinsic_store_ssbo: {
291 assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
292
293 /* Check if the instruction already sources a descriptor and doesn't need to be lowered. */
294 if (intrin->src[1].ssa->num_components == 4 && intrin->src[1].ssa->bit_size == 32)
295 return false;
296
297 nir_def *desc = load_ssbo_desc(b, &intrin->src[1], s);
298 nir_src_rewrite(&intrin->src[1], desc);
299 break;
300 }
301 case nir_intrinsic_load_ssbo_address: {
302 assert(nir_src_as_uint(intrin->src[1]) == 0);
303 nir_def *desc = load_ssbo_desc(b, &intrin->src[0], s);
304 nir_def *lo = nir_channel(b, desc, 0);
305 nir_def *hi = nir_i2i32(b, nir_u2u16(b, nir_channel(b, desc, 1)));
306 nir_def_rewrite_uses(&intrin->def, nir_pack_64_2x32_split(b, lo, hi));
307 nir_instr_remove(&intrin->instr);
308 break;
309 }
310 case nir_intrinsic_get_ssbo_size: {
311 assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
312
313 nir_def *desc = load_ssbo_desc(b, &intrin->src[0], s);
314 nir_def *size = nir_channel(b, desc, 2);
315 nir_def_replace(&intrin->def, size);
316 break;
317 }
318 case nir_intrinsic_image_deref_load:
319 case nir_intrinsic_image_deref_sparse_load:
320 case nir_intrinsic_image_deref_fragment_mask_load_amd:
321 case nir_intrinsic_image_deref_store:
322 case nir_intrinsic_image_deref_atomic:
323 case nir_intrinsic_image_deref_atomic_swap:
324 case nir_intrinsic_image_deref_descriptor_amd: {
325 assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
326
327 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
328
329 enum ac_descriptor_type desc_type;
330 if (intrin->intrinsic == nir_intrinsic_image_deref_fragment_mask_load_amd) {
331 desc_type = AC_DESC_FMASK;
332 } else {
333 enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type);
334 desc_type = dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
335 }
336
337 bool is_load =
338 intrin->intrinsic == nir_intrinsic_image_deref_load ||
339 intrin->intrinsic == nir_intrinsic_image_deref_sparse_load ||
340 intrin->intrinsic == nir_intrinsic_image_deref_fragment_mask_load_amd ||
341 intrin->intrinsic == nir_intrinsic_image_deref_descriptor_amd;
342
343 nir_def *desc = load_deref_image_desc(b, deref, desc_type, is_load, s);
344
345 if (intrin->intrinsic == nir_intrinsic_image_deref_descriptor_amd) {
346 nir_def_replace(&intrin->def, desc);
347 } else {
348 nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
349 nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
350 nir_rewrite_image_intrinsic(intrin, desc, true);
351 }
352 break;
353 }
354 case nir_intrinsic_bindless_image_load:
355 case nir_intrinsic_bindless_image_sparse_load:
356 case nir_intrinsic_bindless_image_fragment_mask_load_amd:
357 case nir_intrinsic_bindless_image_store:
358 case nir_intrinsic_bindless_image_atomic:
359 case nir_intrinsic_bindless_image_atomic_swap:
360 case nir_intrinsic_bindless_image_descriptor_amd: {
361 assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
362
363 enum ac_descriptor_type desc_type;
364 if (intrin->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) {
365 desc_type = AC_DESC_FMASK;
366 } else {
367 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intrin);
368 desc_type = dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
369 }
370
371 /* Check if the instruction already sources a descriptor and doesn't need to be lowered. */
372 if (intrin->src[0].ssa->num_components == (desc_type == AC_DESC_BUFFER ? 4 : 8) &&
373 intrin->src[0].ssa->bit_size == 32)
374 return false;
375
376 bool is_load =
377 intrin->intrinsic == nir_intrinsic_bindless_image_load ||
378 intrin->intrinsic == nir_intrinsic_bindless_image_sparse_load ||
379 intrin->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd ||
380 intrin->intrinsic == nir_intrinsic_bindless_image_descriptor_amd;
381
382 nir_def *index = nir_u2u32(b, intrin->src[0].ssa);
383
384 nir_def *desc = load_bindless_image_desc(b, index, desc_type, is_load, s);
385
386 if (intrin->intrinsic == nir_intrinsic_bindless_image_descriptor_amd) {
387 nir_def_replace(&intrin->def, desc);
388 } else {
389 nir_src_rewrite(&intrin->src[0], desc);
390 }
391 break;
392 }
393 default:
394 return false;
395 }
396
397 return true;
398 }
399
load_sampler_desc(nir_builder * b,nir_def * list,nir_def * index,enum ac_descriptor_type desc_type)400 static nir_def *load_sampler_desc(nir_builder *b, nir_def *list, nir_def *index,
401 enum ac_descriptor_type desc_type)
402 {
403 /* index is in 16 dword unit, convert to offset in bytes */
404 nir_def *offset = nir_ishl_imm(b, index, 6);
405
406 unsigned num_channels = 0;
407 switch (desc_type) {
408 case AC_DESC_IMAGE:
409 /* The image is at [0:7]. */
410 num_channels = 8;
411 break;
412 case AC_DESC_BUFFER:
413 /* The buffer is in [4:7]. */
414 offset = nir_iadd_imm(b, offset, 16);
415 num_channels = 4;
416 break;
417 case AC_DESC_FMASK:
418 /* The FMASK is at [8:15]. */
419 offset = nir_iadd_imm(b, offset, 32);
420 num_channels = 8;
421 break;
422 case AC_DESC_SAMPLER:
423 /* The sampler state is at [12:15]. */
424 offset = nir_iadd_imm(b, offset, 48);
425 num_channels = 4;
426 break;
427 default:
428 unreachable("invalid desc type");
429 break;
430 }
431
432 return nir_load_smem_amd(b, num_channels, list, offset);
433 }
434
load_deref_sampler_desc(nir_builder * b,nir_deref_instr * deref,enum ac_descriptor_type desc_type,struct lower_resource_state * s,bool return_descriptor)435 static nir_def *load_deref_sampler_desc(nir_builder *b, nir_deref_instr *deref,
436 enum ac_descriptor_type desc_type,
437 struct lower_resource_state *s,
438 bool return_descriptor)
439 {
440 unsigned max_slots = BITSET_LAST_BIT(b->shader->info.textures_used);
441 nir_def *index = deref_to_index(b, deref, max_slots, NULL, NULL);
442 index = nir_iadd_imm(b, index, SI_NUM_IMAGE_SLOTS / 2);
443
444 /* return actual desc when required by caller */
445 if (return_descriptor) {
446 nir_def *list = ac_nir_load_arg(b, &s->args->ac, s->args->samplers_and_images);
447 return load_sampler_desc(b, list, index, desc_type);
448 }
449
450 /* Just use index here and let nir-to-llvm backend to translate to actual
451 * descriptor. This is because we need waterfall to handle non-dynamic-uniform
452 * index there.
453 */
454 return index;
455 }
456
load_bindless_sampler_desc(nir_builder * b,nir_def * index,enum ac_descriptor_type desc_type,struct lower_resource_state * s)457 static nir_def *load_bindless_sampler_desc(nir_builder *b, nir_def *index,
458 enum ac_descriptor_type desc_type,
459 struct lower_resource_state *s)
460 {
461 nir_def *list = ac_nir_load_arg(b, &s->args->ac, s->args->bindless_samplers_and_images);
462
463 /* 64 bit to 32 bit */
464 index = nir_u2u32(b, index);
465
466 return load_sampler_desc(b, list, index, desc_type);
467 }
468
fixup_sampler_desc(nir_builder * b,nir_tex_instr * tex,nir_def * sampler,struct lower_resource_state * s)469 static nir_def *fixup_sampler_desc(nir_builder *b,
470 nir_tex_instr *tex,
471 nir_def *sampler,
472 struct lower_resource_state *s)
473 {
474 const struct si_shader_selector *sel = s->shader->selector;
475
476 if (tex->op != nir_texop_tg4 || sel->screen->info.conformant_trunc_coord)
477 return sampler;
478
479 /* Set TRUNC_COORD=0 for textureGather(). */
480 nir_def *dword0 = nir_channel(b, sampler, 0);
481 dword0 = nir_iand_imm(b, dword0, C_008F30_TRUNC_COORD);
482 sampler = nir_vector_insert_imm(b, sampler, dword0, 0);
483 return sampler;
484 }
485
lower_resource_tex(nir_builder * b,nir_tex_instr * tex,struct lower_resource_state * s)486 static bool lower_resource_tex(nir_builder *b, nir_tex_instr *tex,
487 struct lower_resource_state *s)
488 {
489 nir_deref_instr *texture_deref = NULL;
490 nir_deref_instr *sampler_deref = NULL;
491 nir_def *texture_handle = NULL;
492 nir_def *sampler_handle = NULL;
493
494 for (unsigned i = 0; i < tex->num_srcs; i++) {
495 switch (tex->src[i].src_type) {
496 case nir_tex_src_texture_deref:
497 texture_deref = nir_src_as_deref(tex->src[i].src);
498 break;
499 case nir_tex_src_sampler_deref:
500 sampler_deref = nir_src_as_deref(tex->src[i].src);
501 break;
502 case nir_tex_src_texture_handle:
503 texture_handle = tex->src[i].src.ssa;
504 break;
505 case nir_tex_src_sampler_handle:
506 sampler_handle = tex->src[i].src.ssa;
507 break;
508 default:
509 break;
510 }
511 }
512
513 enum ac_descriptor_type desc_type;
514 if (tex->op == nir_texop_fragment_mask_fetch_amd)
515 desc_type = AC_DESC_FMASK;
516 else
517 desc_type = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
518
519 if (tex->op == nir_texop_descriptor_amd) {
520 nir_def *image;
521 if (texture_deref)
522 image = load_deref_sampler_desc(b, texture_deref, desc_type, s, true);
523 else
524 image = load_bindless_sampler_desc(b, texture_handle, desc_type, s);
525 nir_def_replace(&tex->def, image);
526 return true;
527 }
528
529 if (tex->op == nir_texop_sampler_descriptor_amd) {
530 nir_def *sampler;
531 if (sampler_deref)
532 sampler = load_deref_sampler_desc(b, sampler_deref, AC_DESC_SAMPLER, s, true);
533 else
534 sampler = load_bindless_sampler_desc(b, sampler_handle, AC_DESC_SAMPLER, s);
535 nir_def_replace(&tex->def, sampler);
536 return true;
537 }
538
539 nir_def *image = texture_deref ?
540 load_deref_sampler_desc(b, texture_deref, desc_type, s, !tex->texture_non_uniform) :
541 load_bindless_sampler_desc(b, texture_handle, desc_type, s);
542
543 nir_def *sampler = NULL;
544 if (sampler_deref)
545 sampler = load_deref_sampler_desc(b, sampler_deref, AC_DESC_SAMPLER, s, !tex->sampler_non_uniform);
546 else if (sampler_handle)
547 sampler = load_bindless_sampler_desc(b, sampler_handle, AC_DESC_SAMPLER, s);
548
549 if (sampler && sampler->num_components > 1)
550 sampler = fixup_sampler_desc(b, tex, sampler, s);
551
552 for (unsigned i = 0; i < tex->num_srcs; i++) {
553 switch (tex->src[i].src_type) {
554 case nir_tex_src_texture_deref:
555 tex->src[i].src_type = nir_tex_src_texture_handle;
556 FALLTHROUGH;
557 case nir_tex_src_texture_handle:
558 nir_src_rewrite(&tex->src[i].src, image);
559 break;
560 case nir_tex_src_sampler_deref:
561 tex->src[i].src_type = nir_tex_src_sampler_handle;
562 FALLTHROUGH;
563 case nir_tex_src_sampler_handle:
564 nir_src_rewrite(&tex->src[i].src, sampler);
565 break;
566 default:
567 break;
568 }
569 }
570
571 return true;
572 }
573
lower_resource_instr(nir_builder * b,nir_instr * instr,void * state)574 static bool lower_resource_instr(nir_builder *b, nir_instr *instr, void *state)
575 {
576 struct lower_resource_state *s = (struct lower_resource_state *)state;
577
578 b->cursor = nir_before_instr(instr);
579
580 switch (instr->type) {
581 case nir_instr_type_intrinsic: {
582 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
583 return lower_resource_intrinsic(b, intrin, s);
584 }
585 case nir_instr_type_tex: {
586 nir_tex_instr *tex = nir_instr_as_tex(instr);
587 return lower_resource_tex(b, tex, s);
588 }
589 default:
590 return false;
591 }
592 }
593
si_nir_lower_resource(nir_shader * nir,struct si_shader * shader,struct si_shader_args * args)594 bool si_nir_lower_resource(nir_shader *nir, struct si_shader *shader,
595 struct si_shader_args *args)
596 {
597 struct lower_resource_state state = {
598 .shader = shader,
599 .args = args,
600 };
601
602 return nir_shader_instructions_pass(nir, lower_resource_instr,
603 nir_metadata_control_flow,
604 &state);
605 }
606