xref: /aosp_15_r20/external/mesa3d/src/nouveau/vulkan/nvk_nir_lower_descriptors.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_cmd_buffer.h"
6 #include "nvk_descriptor_set_layout.h"
7 #include "nvk_descriptor_types.h"
8 #include "nvk_shader.h"
9 
10 #include "vk_pipeline.h"
11 
12 #include "nir_builder.h"
13 #include "nir_deref.h"
14 
15 #include "clc397.h"
16 #include "clc597.h"
17 
18 struct lower_desc_cbuf {
19    struct nvk_cbuf key;
20 
21    uint32_t use_count;
22 
23    uint64_t start;
24    uint64_t end;
25 };
26 
27 DERIVE_HASH_TABLE(nvk_cbuf);
28 
29 static int
compar_cbufs(const void * _a,const void * _b)30 compar_cbufs(const void *_a, const void *_b)
31 {
32    const struct lower_desc_cbuf *a = _a;
33    const struct lower_desc_cbuf *b = _b;
34 
35 #define COMPAR(field, pos) \
36    if (a->field < b->field) return -(pos); \
37    if (a->field > b->field) return (pos);
38 
39    /* Sort by most used first */
40    COMPAR(use_count, -1)
41 
42    /* Keep the list stable by then sorting by key fields. */
43    COMPAR(key.type, 1)
44    COMPAR(key.desc_set, 1)
45    COMPAR(key.dynamic_idx, 1)
46    COMPAR(key.desc_offset, 1)
47 
48 #undef COMPAR
49 
50    return 0;
51 }
52 
53 struct lower_descriptors_ctx {
54    const struct nv_device_info *dev_info;
55    const struct nvk_descriptor_set_layout *set_layouts[NVK_MAX_SETS];
56 
57    bool use_bindless_cbuf;
58    bool use_edb_buffer_views;
59    bool clamp_desc_array_bounds;
60    nir_address_format ubo_addr_format;
61    nir_address_format ssbo_addr_format;
62 
63    struct hash_table *cbufs;
64    struct nvk_cbuf_map *cbuf_map;
65 };
66 
67 static bool
descriptor_type_is_ubo(VkDescriptorType desc_type)68 descriptor_type_is_ubo(VkDescriptorType desc_type)
69 {
70    switch (desc_type) {
71    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
72    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
73    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
74       return true;
75 
76    default:
77       return false;
78    }
79 }
80 
81 static bool
descriptor_type_is_ssbo(VkDescriptorType desc_type)82 descriptor_type_is_ssbo(VkDescriptorType desc_type)
83 {
84    switch (desc_type) {
85    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
86    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
87       return true;
88 
89    default:
90       return false;
91    }
92 }
93 
94 static void
record_cbuf_use(const struct nvk_cbuf * key,uint64_t start,uint64_t end,struct lower_descriptors_ctx * ctx)95 record_cbuf_use(const struct nvk_cbuf *key, uint64_t start, uint64_t end,
96                 struct lower_descriptors_ctx *ctx)
97 {
98    struct hash_entry *entry = _mesa_hash_table_search(ctx->cbufs, key);
99    if (entry != NULL) {
100       struct lower_desc_cbuf *cbuf = entry->data;
101       cbuf->use_count++;
102       cbuf->start = MIN2(cbuf->start, start);
103       cbuf->end = MAX2(cbuf->end, end);
104    } else {
105       struct lower_desc_cbuf *cbuf =
106          ralloc(ctx->cbufs, struct lower_desc_cbuf);
107       *cbuf = (struct lower_desc_cbuf) {
108          .key = *key,
109          .use_count = 1,
110          .start = start,
111          .end = end,
112       };
113       _mesa_hash_table_insert(ctx->cbufs, &cbuf->key, cbuf);
114    }
115 }
116 
117 static const struct nvk_descriptor_set_binding_layout *
get_binding_layout(uint32_t set,uint32_t binding,const struct lower_descriptors_ctx * ctx)118 get_binding_layout(uint32_t set, uint32_t binding,
119                    const struct lower_descriptors_ctx *ctx)
120 {
121    assert(set < NVK_MAX_SETS);
122    assert(ctx->set_layouts[set] != NULL);
123 
124    const struct nvk_descriptor_set_layout *set_layout = ctx->set_layouts[set];
125 
126    assert(binding < set_layout->binding_count);
127    return &set_layout->binding[binding];
128 }
129 
130 static void
record_descriptor_cbuf_use(uint32_t set,uint32_t binding,nir_src * index,struct lower_descriptors_ctx * ctx)131 record_descriptor_cbuf_use(uint32_t set, uint32_t binding, nir_src *index,
132                            struct lower_descriptors_ctx *ctx)
133 {
134    const struct nvk_descriptor_set_binding_layout *binding_layout =
135       get_binding_layout(set, binding, ctx);
136 
137    const struct nvk_cbuf key = {
138       .type = NVK_CBUF_TYPE_DESC_SET,
139       .desc_set = set,
140    };
141 
142    uint64_t start, end;
143    if (index == NULL) {
144       /* When we don't have an index, assume 0 */
145       start = binding_layout->offset;
146       end = start + binding_layout->stride;
147    } else if (nir_src_is_const(*index)) {
148       start = binding_layout->offset +
149               nir_src_as_uint(*index) * binding_layout->stride;
150       end = start + binding_layout->stride;
151    } else {
152       start = binding_layout->offset;
153       end = start + binding_layout->array_size * binding_layout->stride;
154    }
155 
156    record_cbuf_use(&key, start, end, ctx);
157 }
158 
159 static void
record_vulkan_resource_cbuf_use(nir_intrinsic_instr * intrin,struct lower_descriptors_ctx * ctx)160 record_vulkan_resource_cbuf_use(nir_intrinsic_instr *intrin,
161                                 struct lower_descriptors_ctx *ctx)
162 {
163    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
164 
165    /* These we'll handle later */
166    if (descriptor_type_is_ubo(nir_intrinsic_desc_type(intrin)))
167       return;
168 
169    record_descriptor_cbuf_use(nir_intrinsic_desc_set(intrin),
170                               nir_intrinsic_binding(intrin),
171                               &intrin->src[0], ctx);
172 }
173 
174 static void
record_deref_descriptor_cbuf_use(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)175 record_deref_descriptor_cbuf_use(nir_deref_instr *deref,
176                                  struct lower_descriptors_ctx *ctx)
177 {
178    nir_src *index_src = NULL;
179    if (deref->deref_type == nir_deref_type_array) {
180       index_src = &deref->arr.index;
181       deref = nir_deref_instr_parent(deref);
182    }
183 
184    assert(deref->deref_type == nir_deref_type_var);
185    nir_variable *var = deref->var;
186 
187    record_descriptor_cbuf_use(var->data.descriptor_set,
188                               var->data.binding,
189                               index_src, ctx);
190 }
191 
192 static void
record_tex_descriptor_cbuf_use(nir_tex_instr * tex,struct lower_descriptors_ctx * ctx)193 record_tex_descriptor_cbuf_use(nir_tex_instr *tex,
194                                struct lower_descriptors_ctx *ctx)
195 {
196    const int texture_src_idx =
197       nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
198    const int sampler_src_idx =
199       nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
200 
201    if (texture_src_idx >= 0) {
202       nir_deref_instr *deref = nir_src_as_deref(tex->src[texture_src_idx].src);
203       record_deref_descriptor_cbuf_use(deref, ctx);
204    }
205 
206    if (sampler_src_idx >= 0) {
207       nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
208       record_deref_descriptor_cbuf_use(deref, ctx);
209    }
210 }
211 
212 static struct nvk_cbuf
ubo_deref_to_cbuf(nir_deref_instr * deref,nir_intrinsic_instr ** resource_index_out,uint64_t * offset_out,uint64_t * start_out,uint64_t * end_out,const struct lower_descriptors_ctx * ctx)213 ubo_deref_to_cbuf(nir_deref_instr *deref,
214                   nir_intrinsic_instr **resource_index_out,
215                   uint64_t *offset_out,
216                   uint64_t *start_out, uint64_t *end_out,
217                   const struct lower_descriptors_ctx *ctx)
218 {
219    assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
220 
221    /* In case we early return */
222    *offset_out = 0;
223    *start_out = 0;
224    *end_out = UINT64_MAX;
225    *resource_index_out = NULL;
226 
227    const struct nvk_cbuf invalid = {
228       .type = NVK_CBUF_TYPE_INVALID,
229    };
230 
231    uint64_t offset = 0;
232    uint64_t range = glsl_get_explicit_size(deref->type, false);
233    bool offset_valid = true;
234    while (deref->deref_type != nir_deref_type_cast) {
235       nir_deref_instr *parent = nir_deref_instr_parent(deref);
236 
237       switch (deref->deref_type) {
238       case nir_deref_type_var:
239          unreachable("Buffers don't use variables in Vulkan");
240 
241       case nir_deref_type_array:
242       case nir_deref_type_array_wildcard: {
243          uint32_t stride = nir_deref_instr_array_stride(deref);
244          if (deref->deref_type == nir_deref_type_array &&
245              nir_src_is_const(deref->arr.index)) {
246             offset += nir_src_as_uint(deref->arr.index) * stride;
247          } else {
248             range = glsl_get_length(parent->type) * stride;
249          }
250          break;
251       }
252 
253       case nir_deref_type_ptr_as_array:
254          /* All bets are off.  We shouldn't see these most of the time
255           * anyway, even with variable pointers.
256           */
257          offset_valid = false;
258          unreachable("Variable pointers aren't allowed on UBOs");
259          break;
260 
261       case nir_deref_type_struct: {
262          offset += glsl_get_struct_field_offset(parent->type,
263                                                 deref->strct.index);
264          break;
265       }
266 
267       default:
268          unreachable("Unknown deref type");
269       }
270 
271       deref = parent;
272    }
273 
274    nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
275    if (load_desc == NULL ||
276        load_desc->intrinsic != nir_intrinsic_load_vulkan_descriptor)
277       return invalid;
278 
279    nir_intrinsic_instr *res_index = nir_src_as_intrinsic(load_desc->src[0]);
280    if (res_index == NULL ||
281        res_index->intrinsic != nir_intrinsic_vulkan_resource_index)
282       return invalid;
283 
284    /* We try to early return as little as possible prior to this point so we
285     * can return the resource index intrinsic in as many cases as possible.
286     * After this point, though, early returns are fair game.
287     */
288    *resource_index_out = res_index;
289 
290    if (!offset_valid || !nir_src_is_const(res_index->src[0]))
291       return invalid;
292 
293    uint32_t set = nir_intrinsic_desc_set(res_index);
294    uint32_t binding = nir_intrinsic_binding(res_index);
295    uint32_t index = nir_src_as_uint(res_index->src[0]);
296 
297    const struct nvk_descriptor_set_binding_layout *binding_layout =
298       get_binding_layout(set, binding, ctx);
299 
300    switch (binding_layout->type) {
301    case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
302    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
303       *offset_out = 0;
304       *start_out = offset;
305       *end_out = offset + range;
306       return (struct nvk_cbuf) {
307          .type = NVK_CBUF_TYPE_UBO_DESC,
308          .desc_set = set,
309          .desc_offset = binding_layout->offset +
310                         index * binding_layout->stride,
311       };
312    }
313 
314    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
315       *offset_out = 0;
316       *start_out = offset;
317       *end_out = offset + range;
318 
319       return (struct nvk_cbuf) {
320          .type = NVK_CBUF_TYPE_DYNAMIC_UBO,
321          .desc_set = set,
322          .dynamic_idx = binding_layout->dynamic_buffer_index + index,
323       };
324    }
325 
326    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
327       *offset_out = binding_layout->offset;
328       *start_out = binding_layout->offset + offset;
329       *end_out = *start_out + range;
330 
331       return (struct nvk_cbuf) {
332          .type = NVK_CBUF_TYPE_DESC_SET,
333          .desc_set = set,
334       };
335    }
336 
337    default:
338       return invalid;
339    }
340 }
341 
342 static void
record_load_ubo_cbuf_uses(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)343 record_load_ubo_cbuf_uses(nir_deref_instr *deref,
344                           struct lower_descriptors_ctx *ctx)
345 {
346    assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
347 
348    UNUSED uint64_t offset;
349    uint64_t start, end;
350    nir_intrinsic_instr *res_index;
351    struct nvk_cbuf cbuf =
352       ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
353 
354    if (cbuf.type != NVK_CBUF_TYPE_INVALID) {
355       record_cbuf_use(&cbuf, start, end, ctx);
356    } else if (res_index != NULL) {
357       record_vulkan_resource_cbuf_use(res_index, ctx);
358    }
359 }
360 
361 static bool
record_cbuf_uses_instr(UNUSED nir_builder * b,nir_instr * instr,void * _ctx)362 record_cbuf_uses_instr(UNUSED nir_builder *b, nir_instr *instr, void *_ctx)
363 {
364    struct lower_descriptors_ctx *ctx = _ctx;
365 
366    switch (instr->type) {
367    case nir_instr_type_intrinsic: {
368       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
369       switch (intrin->intrinsic) {
370       case nir_intrinsic_vulkan_resource_index:
371          record_vulkan_resource_cbuf_use(intrin, ctx);
372          return false;
373 
374       case nir_intrinsic_load_deref: {
375          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
376          if (nir_deref_mode_is(deref, nir_var_mem_ubo))
377             record_load_ubo_cbuf_uses(deref, ctx);
378          return false;
379       }
380 
381       case nir_intrinsic_image_deref_load:
382       case nir_intrinsic_image_deref_store:
383       case nir_intrinsic_image_deref_atomic:
384       case nir_intrinsic_image_deref_atomic_swap:
385       case nir_intrinsic_image_deref_size:
386       case nir_intrinsic_image_deref_samples: {
387          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
388          record_deref_descriptor_cbuf_use(deref, ctx);
389          return false;
390       }
391 
392       default:
393          return false;
394       }
395       unreachable("All cases return false");
396    }
397 
398    case nir_instr_type_tex:
399       record_tex_descriptor_cbuf_use(nir_instr_as_tex(instr), ctx);
400       return false;
401 
402    default:
403       return false;
404    }
405 }
406 
407 static void
build_cbuf_map(nir_shader * nir,struct lower_descriptors_ctx * ctx)408 build_cbuf_map(nir_shader *nir, struct lower_descriptors_ctx *ctx)
409 {
410    ctx->cbuf_map->cbuf_count = 0;
411 
412    /* Root descriptors always go in cbuf 0 */
413    ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = (struct nvk_cbuf) {
414       .type = NVK_CBUF_TYPE_ROOT_DESC,
415    };
416 
417    /* If we have constant data, put it at cbuf 1 */
418    if (nir->constant_data_size > 0) {
419       ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = (struct nvk_cbuf) {
420          .type = NVK_CBUF_TYPE_SHADER_DATA,
421       };
422    }
423 
424    ctx->cbufs = nvk_cbuf_table_create(NULL);
425    nir_shader_instructions_pass(nir, record_cbuf_uses_instr,
426                                 nir_metadata_all, (void *)ctx);
427 
428    struct lower_desc_cbuf *cbufs =
429       ralloc_array(ctx->cbufs, struct lower_desc_cbuf,
430                    _mesa_hash_table_num_entries(ctx->cbufs));
431 
432    uint32_t num_cbufs = 0;
433    hash_table_foreach(ctx->cbufs, entry) {
434       struct lower_desc_cbuf *cbuf = entry->data;
435 
436       /* We currently only start cbufs at the beginning so if it starts after
437        * the max cbuf size, there's no point in including it in the list.
438        */
439       if (cbuf->start > NVK_MAX_CBUF_SIZE)
440          continue;
441 
442       cbufs[num_cbufs++] = *cbuf;
443    }
444 
445    qsort(cbufs, num_cbufs, sizeof(*cbufs), compar_cbufs);
446 
447    uint8_t max_cbuf_bindings;
448    if (nir->info.stage == MESA_SHADER_COMPUTE ||
449        nir->info.stage == MESA_SHADER_KERNEL) {
450       max_cbuf_bindings = 8;
451    } else {
452       max_cbuf_bindings = 16;
453    }
454 
455    for (uint32_t i = 0; i < num_cbufs; i++) {
456       if (ctx->cbuf_map->cbuf_count >= max_cbuf_bindings)
457          break;
458 
459       /* We can't support indirect cbufs in compute yet */
460       if ((nir->info.stage == MESA_SHADER_COMPUTE ||
461            nir->info.stage == MESA_SHADER_KERNEL) &&
462           cbufs[i].key.type == NVK_CBUF_TYPE_UBO_DESC)
463          continue;
464 
465       /* Prior to Turing, indirect cbufs require splitting the pushbuf and
466        * pushing bits of the descriptor set.  Doing this every draw call is
467        * probably more overhead than it's worth.
468        */
469       if (ctx->dev_info->cls_eng3d < TURING_A &&
470           cbufs[i].key.type == NVK_CBUF_TYPE_UBO_DESC)
471          continue;
472 
473       ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = cbufs[i].key;
474    }
475 
476    ralloc_free(ctx->cbufs);
477    ctx->cbufs = NULL;
478 }
479 
480 static int
get_mapped_cbuf_idx(const struct nvk_cbuf * key,const struct lower_descriptors_ctx * ctx)481 get_mapped_cbuf_idx(const struct nvk_cbuf *key,
482                     const struct lower_descriptors_ctx *ctx)
483 {
484    if (ctx->cbuf_map == NULL)
485       return -1;
486 
487    for (uint32_t c = 0; c < ctx->cbuf_map->cbuf_count; c++) {
488       if (nvk_cbuf_equal(&ctx->cbuf_map->cbufs[c], key)) {
489          return c;
490       }
491    }
492 
493    return -1;
494 }
495 
496 static bool
lower_load_ubo_intrin(nir_builder * b,nir_intrinsic_instr * load,void * _ctx)497 lower_load_ubo_intrin(nir_builder *b, nir_intrinsic_instr *load, void *_ctx)
498 {
499    const struct lower_descriptors_ctx *ctx = _ctx;
500 
501    if (load->intrinsic != nir_intrinsic_load_deref)
502       return false;
503 
504    nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
505    if (!nir_deref_mode_is(deref, nir_var_mem_ubo))
506       return false;
507 
508    uint64_t offset, end;
509    UNUSED uint64_t start;
510    UNUSED nir_intrinsic_instr *res_index;
511    struct nvk_cbuf cbuf_key =
512       ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
513 
514    if (cbuf_key.type == NVK_CBUF_TYPE_INVALID)
515       return false;
516 
517    if (end > NVK_MAX_CBUF_SIZE)
518       return false;
519 
520    int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
521    if (cbuf_idx < 0)
522       return false;
523 
524    b->cursor = nir_before_instr(&load->instr);
525 
526    nir_deref_path path;
527    nir_deref_path_init(&path, deref, NULL);
528 
529    nir_def *addr = nir_imm_ivec2(b, cbuf_idx, offset);
530    nir_address_format addr_format = nir_address_format_32bit_index_offset;
531    for (nir_deref_instr **p = &path.path[1]; *p != NULL; p++)
532       addr = nir_explicit_io_address_from_deref(b, *p, addr, addr_format);
533 
534    nir_deref_path_finish(&path);
535 
536    nir_lower_explicit_io_instr(b, load, addr, addr_format);
537 
538    return true;
539 }
540 
541 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)542 lower_load_constant(nir_builder *b, nir_intrinsic_instr *load,
543                     const struct lower_descriptors_ctx *ctx)
544 {
545    assert(load->intrinsic == nir_intrinsic_load_constant);
546 
547    const struct nvk_cbuf cbuf_key = {
548       .type = NVK_CBUF_TYPE_SHADER_DATA,
549    };
550    int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
551    assert(cbuf_idx >= 0);
552 
553    uint32_t base = nir_intrinsic_base(load);
554 
555    b->cursor = nir_before_instr(&load->instr);
556 
557    nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base);
558    nir_def *data = nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
559                               nir_imm_int(b, cbuf_idx), offset,
560                               .align_mul = nir_intrinsic_align_mul(load),
561                               .align_offset = nir_intrinsic_align_offset(load));
562 
563    nir_def_rewrite_uses(&load->def, data);
564 
565    return true;
566 }
567 
568 static nir_def *
load_descriptor_set_addr(nir_builder * b,uint32_t set,UNUSED const struct lower_descriptors_ctx * ctx)569 load_descriptor_set_addr(nir_builder *b, uint32_t set,
570                          UNUSED const struct lower_descriptors_ctx *ctx)
571 {
572    uint32_t set_addr_offset = nvk_root_descriptor_offset(sets) +
573       set * sizeof(struct nvk_buffer_address);
574 
575    return nir_ldc_nv(b, 1, 64, nir_imm_int(b, 0),
576                      nir_imm_int(b, set_addr_offset),
577                      .align_mul = 8, .align_offset = 0);
578 }
579 
580 static nir_def *
load_dynamic_buffer_start(nir_builder * b,uint32_t set,const struct lower_descriptors_ctx * ctx)581 load_dynamic_buffer_start(nir_builder *b, uint32_t set,
582                           const struct lower_descriptors_ctx *ctx)
583 {
584    int dynamic_buffer_start_imm = 0;
585    for (uint32_t s = 0; s < set; s++) {
586       if (ctx->set_layouts[s] == NULL) {
587          dynamic_buffer_start_imm = -1;
588          break;
589       }
590 
591       dynamic_buffer_start_imm += ctx->set_layouts[s]->dynamic_buffer_count;
592    }
593 
594    if (dynamic_buffer_start_imm >= 0) {
595       return nir_imm_int(b, dynamic_buffer_start_imm);
596    } else {
597       uint32_t root_offset =
598          nvk_root_descriptor_offset(set_dynamic_buffer_start) + set;
599 
600       return nir_u2u32(b, nir_ldc_nv(b, 1, 8, nir_imm_int(b, 0),
601                                      nir_imm_int(b, root_offset),
602                                      .align_mul = 1, .align_offset = 0));
603    }
604 }
605 
606 static nir_def *
load_descriptor(nir_builder * b,unsigned num_components,unsigned bit_size,uint32_t set,uint32_t binding,nir_def * index,unsigned offset_B,const struct lower_descriptors_ctx * ctx)607 load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
608                 uint32_t set, uint32_t binding, nir_def *index,
609                 unsigned offset_B, const struct lower_descriptors_ctx *ctx)
610 {
611    const struct nvk_descriptor_set_binding_layout *binding_layout =
612       get_binding_layout(set, binding, ctx);
613 
614    if (ctx->clamp_desc_array_bounds)
615       index = nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1));
616 
617    switch (binding_layout->type) {
618    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
619    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
620       /* Get the index in the root descriptor table dynamic_buffers array. */
621       nir_def *dynamic_buffer_start = load_dynamic_buffer_start(b, set, ctx);
622 
623       index = nir_iadd(b, index,
624                        nir_iadd_imm(b, dynamic_buffer_start,
625                                     binding_layout->dynamic_buffer_index));
626       uint32_t desc_size = sizeof(union nvk_buffer_descriptor);
627       nir_def *root_desc_offset =
628          nir_iadd_imm(b, nir_imul_imm(b, index, desc_size),
629                       nvk_root_descriptor_offset(dynamic_buffers));
630 
631       assert(num_components * bit_size <= desc_size * 8);
632       return nir_ldc_nv(b, num_components, bit_size,
633                         nir_imm_int(b, 0), root_desc_offset,
634                         .align_mul = 16, .align_offset = 0);
635    }
636 
637    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
638       nir_def *base_addr =
639          nir_iadd_imm(b, load_descriptor_set_addr(b, set, ctx),
640                           binding_layout->offset);
641 
642       assert(binding_layout->stride == 1);
643       const uint32_t binding_size = binding_layout->array_size;
644 
645       if (ctx->use_bindless_cbuf) {
646          assert(num_components == 1 && bit_size == 64);
647          const uint32_t size = align(binding_size, 16);
648          return nir_ior_imm(b, nir_ishr_imm(b, base_addr, 4),
649                                ((uint64_t)size >> 4) << 45);
650       } else {
651          /* Convert it to nir_address_format_64bit_bounded_global */
652          assert(num_components == 4 && bit_size == 32);
653          return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
654                             nir_unpack_64_2x32_split_y(b, base_addr),
655                             nir_imm_int(b, binding_size),
656                             nir_imm_int(b, 0));
657       }
658    }
659 
660    default: {
661       assert(binding_layout->stride > 0);
662       nir_def *desc_ubo_offset =
663          nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride),
664                          binding_layout->offset + offset_B);
665 
666       uint64_t max_desc_ubo_offset = binding_layout->offset +
667          binding_layout->array_size * binding_layout->stride;
668 
669       unsigned desc_align_mul = (1 << (ffs(binding_layout->stride) - 1));
670       desc_align_mul = MIN2(desc_align_mul, 16);
671       unsigned desc_align_offset = binding_layout->offset + offset_B;
672       desc_align_offset %= desc_align_mul;
673 
674       const struct nvk_cbuf cbuf_key = {
675          .type = NVK_CBUF_TYPE_DESC_SET,
676          .desc_set = set,
677       };
678       int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
679 
680       if (cbuf_idx >= 0 && max_desc_ubo_offset <= NVK_MAX_CBUF_SIZE) {
681          return nir_ldc_nv(b, num_components, bit_size,
682                            nir_imm_int(b, cbuf_idx),
683                            desc_ubo_offset,
684                            .align_mul = desc_align_mul,
685                            .align_offset = desc_align_offset);
686       } else {
687          nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
688          return nir_load_global_constant_offset(b, num_components, bit_size,
689                                                 set_addr, desc_ubo_offset,
690                                                 .align_mul = desc_align_mul,
691                                                 .align_offset = desc_align_offset);
692       }
693    }
694    }
695 }
696 
697 static bool
is_idx_intrin(nir_intrinsic_instr * intrin)698 is_idx_intrin(nir_intrinsic_instr *intrin)
699 {
700    while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
701       intrin = nir_src_as_intrinsic(intrin->src[0]);
702       if (intrin == NULL)
703          return false;
704    }
705 
706    return intrin->intrinsic == nir_intrinsic_vulkan_resource_index;
707 }
708 
709 static nir_def *
buffer_address_to_ldcx_handle(nir_builder * b,nir_def * addr)710 buffer_address_to_ldcx_handle(nir_builder *b, nir_def *addr)
711 {
712    nir_def *base_addr = nir_pack_64_2x32(b, nir_channels(b, addr, 0x3));
713    nir_def *size = nir_channel(b, addr, 2);
714    nir_def *offset = nir_channel(b, addr, 3);
715 
716    nir_def *addr16 = nir_ushr_imm(b, base_addr, 4);
717    nir_def *addr16_lo = nir_unpack_64_2x32_split_x(b, addr16);
718    nir_def *addr16_hi = nir_unpack_64_2x32_split_y(b, addr16);
719 
720    /* If we assume the top bis of the address are 0 as well as the bottom two
721     * bits of the size. (We can trust it since it's a descriptor) then
722     *
723     *    ((size >> 4) << 13) | addr
724     *
725     * is just an imad.
726     */
727    nir_def *handle_hi = nir_imad(b, size, nir_imm_int(b, 1 << 9), addr16_hi);
728 
729    return nir_vec3(b, addr16_lo, handle_hi, offset);
730 }
731 
732 static nir_def *
load_descriptor_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)733 load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
734                                const struct lower_descriptors_ctx *ctx)
735 {
736    nir_def *index = nir_imm_int(b, 0);
737 
738    while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
739       index = nir_iadd(b, index, intrin->src[1].ssa);
740       intrin = nir_src_as_intrinsic(intrin->src[0]);
741    }
742 
743    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
744    uint32_t set = nir_intrinsic_desc_set(intrin);
745    uint32_t binding = nir_intrinsic_binding(intrin);
746    index = nir_iadd(b, index, intrin->src[0].ssa);
747 
748    const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
749    if (descriptor_type_is_ubo(desc_type) && ctx->use_bindless_cbuf) {
750       nir_def *desc = load_descriptor(b, 1, 64, set, binding, index, 0, ctx);
751 
752       /* The descriptor is just the handle.  NIR also needs an offset. */
753       return nir_vec3(b, nir_unpack_64_2x32_split_x(b, desc),
754                          nir_unpack_64_2x32_split_y(b, desc),
755                          nir_imm_int(b, 0));
756    } else {
757       nir_def *desc = load_descriptor(b, 4, 32, set, binding, index, 0, ctx);
758 
759       /* We know a priori that the the .w compnent (offset) is zero */
760       return nir_vec4(b, nir_channel(b, desc, 0),
761                          nir_channel(b, desc, 1),
762                          nir_channel(b, desc, 2),
763                          nir_imm_int(b, 0));
764    }
765 }
766 
767 static bool
try_lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)768 try_lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
769                                  const struct lower_descriptors_ctx *ctx)
770 {
771    ASSERTED const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
772    b->cursor = nir_before_instr(&intrin->instr);
773 
774    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(intrin->src[0]);
775    if (idx_intrin == NULL || !is_idx_intrin(idx_intrin)) {
776       assert(descriptor_type_is_ssbo(desc_type));
777       return false;
778    }
779 
780    nir_def *desc = load_descriptor_for_idx_intrin(b, idx_intrin, ctx);
781 
782    nir_def_rewrite_uses(&intrin->def, desc);
783 
784    return true;
785 }
786 
787 static bool
_lower_sysval_to_root_table(nir_builder * b,nir_intrinsic_instr * intrin,uint32_t root_table_offset,const struct lower_descriptors_ctx * ctx)788 _lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin,
789                             uint32_t root_table_offset,
790                             const struct lower_descriptors_ctx *ctx)
791 {
792    b->cursor = nir_instr_remove(&intrin->instr);
793 
794    nir_def *val = nir_ldc_nv(b, intrin->def.num_components,
795                              intrin->def.bit_size,
796                              nir_imm_int(b, 0), /* Root table */
797                              nir_imm_int(b, root_table_offset),
798                              .align_mul = 4,
799                              .align_offset = 0);
800 
801    nir_def_rewrite_uses(&intrin->def, val);
802 
803    return true;
804 }
805 
806 #define lower_sysval_to_root_table(b, intrin, member, ctx)           \
807    _lower_sysval_to_root_table(b, intrin,                            \
808                                nvk_root_descriptor_offset(member),   \
809                                ctx)
810 
811 static bool
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)812 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load,
813                          const struct lower_descriptors_ctx *ctx)
814 {
815    const uint32_t push_region_offset =
816       nvk_root_descriptor_offset(push);
817    const uint32_t base = nir_intrinsic_base(load);
818 
819    b->cursor = nir_before_instr(&load->instr);
820 
821    nir_def *offset = nir_iadd_imm(b, load->src[0].ssa,
822                                          push_region_offset + base);
823 
824    nir_def *val =
825       nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
826                  nir_imm_int(b, 0), offset,
827                  .align_mul = load->def.bit_size / 8,
828                  .align_offset = 0);
829 
830    nir_def_rewrite_uses(&load->def, val);
831 
832    return true;
833 }
834 
835 static void
get_resource_deref_binding(nir_builder * b,nir_deref_instr * deref,uint32_t * set,uint32_t * binding,nir_def ** index)836 get_resource_deref_binding(nir_builder *b, nir_deref_instr *deref,
837                            uint32_t *set, uint32_t *binding,
838                            nir_def **index)
839 {
840    if (deref->deref_type == nir_deref_type_array) {
841       *index = deref->arr.index.ssa;
842       deref = nir_deref_instr_parent(deref);
843    } else {
844       *index = nir_imm_int(b, 0);
845    }
846 
847    assert(deref->deref_type == nir_deref_type_var);
848    nir_variable *var = deref->var;
849 
850    *set = var->data.descriptor_set;
851    *binding = var->data.binding;
852 }
853 
854 static nir_def *
load_resource_deref_desc(nir_builder * b,unsigned num_components,unsigned bit_size,nir_deref_instr * deref,unsigned offset_B,const struct lower_descriptors_ctx * ctx)855 load_resource_deref_desc(nir_builder *b,
856                          unsigned num_components, unsigned bit_size,
857                          nir_deref_instr *deref, unsigned offset_B,
858                          const struct lower_descriptors_ctx *ctx)
859 {
860    uint32_t set, binding;
861    nir_def *index;
862    get_resource_deref_binding(b, deref, &set, &binding, &index);
863    return load_descriptor(b, num_components, bit_size,
864                           set, binding, index, offset_B, ctx);
865 }
866 
867 static void
lower_msaa_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)868 lower_msaa_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
869                         const struct lower_descriptors_ctx *ctx)
870 {
871    assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS);
872 
873    b->cursor = nir_before_instr(&intrin->instr);
874    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
875    nir_def *desc = load_resource_deref_desc(b, 1, 32, deref, 0, ctx);
876 
877    nir_def *img_index = nir_ubitfield_extract_imm(b, desc, 0, 20);
878    nir_rewrite_image_intrinsic(intrin, img_index, true);
879 
880    nir_def *sw_log2 = nir_ubitfield_extract_imm(b, desc, 20, 2);
881    nir_def *sh_log2 = nir_ubitfield_extract_imm(b, desc, 22, 2);
882 
883    nir_def *sw = nir_ishl(b, nir_imm_int(b, 1), sw_log2);
884    nir_def *sh = nir_ishl(b, nir_imm_int(b, 1), sh_log2);
885    nir_def *num_samples = nir_imul(b, sw, sh);
886 
887    switch (intrin->intrinsic) {
888    case nir_intrinsic_bindless_image_load:
889    case nir_intrinsic_bindless_image_store:
890    case nir_intrinsic_bindless_image_atomic:
891    case nir_intrinsic_bindless_image_atomic_swap: {
892       nir_def *x = nir_channel(b, intrin->src[1].ssa, 0);
893       nir_def *y = nir_channel(b, intrin->src[1].ssa, 1);
894       nir_def *z = nir_channel(b, intrin->src[1].ssa, 2);
895       nir_def *w = nir_channel(b, intrin->src[1].ssa, 3);
896       nir_def *s = intrin->src[2].ssa;
897 
898       nir_def *sw_mask = nir_iadd_imm(b, sw, -1);
899       nir_def *sx = nir_iand(b, s, sw_mask);
900       nir_def *sy = nir_ishr(b, s, sw_log2);
901 
902       x = nir_imad(b, x, sw, sx);
903       y = nir_imad(b, y, sh, sy);
904 
905       /* Make OOB sample indices OOB X/Y indices */
906       x = nir_bcsel(b, nir_ult(b, s, num_samples), x, nir_imm_int(b, -1));
907 
908       nir_src_rewrite(&intrin->src[1], nir_vec4(b, x, y, z, w));
909       nir_src_rewrite(&intrin->src[2], nir_undef(b, 1, 32));
910       break;
911    }
912 
913    case nir_intrinsic_bindless_image_size: {
914       b->cursor = nir_after_instr(&intrin->instr);
915 
916       nir_def *size = &intrin->def;
917       nir_def *w = nir_channel(b, size, 0);
918       nir_def *h = nir_channel(b, size, 1);
919 
920       w = nir_ushr(b, w, sw_log2);
921       h = nir_ushr(b, h, sh_log2);
922 
923       size = nir_vector_insert_imm(b, size, w, 0);
924       size = nir_vector_insert_imm(b, size, h, 1);
925 
926       nir_def_rewrite_uses_after(&intrin->def, size, size->parent_instr);
927       break;
928    }
929 
930    case nir_intrinsic_bindless_image_samples: {
931       /* We need to handle NULL descriptors explicitly */
932       nir_def *samples =
933          nir_bcsel(b, nir_ieq(b, desc, nir_imm_int(b, 0)),
934                       nir_imm_int(b, 0), num_samples);
935       nir_def_rewrite_uses(&intrin->def, samples);
936       break;
937    }
938 
939    default:
940       unreachable("Unknown image intrinsic");
941    }
942 
943    nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D);
944 }
945 
946 static bool
is_edb_buffer_view(nir_deref_instr * deref,const struct lower_descriptors_ctx * ctx)947 is_edb_buffer_view(nir_deref_instr *deref,
948                    const struct lower_descriptors_ctx *ctx)
949 {
950    if (glsl_get_sampler_dim(deref->type) != GLSL_SAMPLER_DIM_BUF)
951       return false;
952 
953    if (ctx->use_edb_buffer_views)
954       return true;
955 
956    nir_variable *var = nir_deref_instr_get_variable(deref);
957    uint8_t set = var->data.descriptor_set;
958 
959    return ctx->set_layouts[set]->flags &
960           VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
961 }
962 
963 static nir_def *
edb_buffer_view_is_null(nir_builder * b,nir_def * desc)964 edb_buffer_view_is_null(nir_builder *b, nir_def *desc)
965 {
966    assert(desc->num_components == 4);
967    nir_def *index = nir_channel(b, desc, 0);
968    return nir_ieq_imm(b, index, 0);
969 }
970 
971 static nir_def *
edb_buffer_view_offset_el(nir_builder * b,nir_def * desc)972 edb_buffer_view_offset_el(nir_builder *b, nir_def *desc)
973 {
974    assert(desc->num_components == 4);
975    return nir_channel(b, desc, 1);
976 }
977 
978 static nir_def *
edb_buffer_view_size_el(nir_builder * b,nir_def * desc)979 edb_buffer_view_size_el(nir_builder *b, nir_def *desc)
980 {
981    assert(desc->num_components == 4);
982    return nir_channel(b, desc, 2);
983 }
984 
985 static nir_def *
edb_buffer_view_oob_alpha(nir_builder * b,nir_def * desc)986 edb_buffer_view_oob_alpha(nir_builder *b, nir_def *desc)
987 {
988    assert(desc->num_components == 4);
989    return nir_channel(b, desc, 3);
990 }
991 
992 static nir_def *
edb_buffer_view_coord_is_in_bounds(nir_builder * b,nir_def * desc,nir_def * coord)993 edb_buffer_view_coord_is_in_bounds(nir_builder *b, nir_def *desc,
994                                    nir_def *coord)
995 {
996    assert(desc->num_components == 4);
997    return nir_ult(b, coord, edb_buffer_view_size_el(b, desc));
998 }
999 
1000 static nir_def *
edb_buffer_view_index(nir_builder * b,nir_def * desc,nir_def * in_bounds)1001 edb_buffer_view_index(nir_builder *b, nir_def *desc, nir_def *in_bounds)
1002 {
1003    assert(desc->num_components == 4);
1004    nir_def *index = nir_channel(b, desc, 0);
1005 
1006    /* Use the NULL descriptor for OOB access */
1007    return nir_bcsel(b, in_bounds, index, nir_imm_int(b, 0));
1008 }
1009 
1010 static nir_def *
adjust_edb_buffer_view_coord(nir_builder * b,nir_def * desc,nir_def * coord)1011 adjust_edb_buffer_view_coord(nir_builder *b, nir_def *desc, nir_def *coord)
1012 {
1013    return nir_iadd(b, coord, edb_buffer_view_offset_el(b, desc));
1014 }
1015 
1016 static nir_def *
fixup_edb_buffer_view_result(nir_builder * b,nir_def * desc,nir_def * in_bounds,nir_def * res,nir_alu_type dest_type)1017 fixup_edb_buffer_view_result(nir_builder *b, nir_def *desc, nir_def *in_bounds,
1018                              nir_def *res, nir_alu_type dest_type)
1019 {
1020    if (res->num_components < 4)
1021       return res;
1022 
1023    nir_def *is_null = edb_buffer_view_is_null(b, desc);
1024    nir_def *oob_alpha = edb_buffer_view_oob_alpha(b, desc);
1025 
1026    nir_def *a = nir_channel(b, res, 3);
1027    a = nir_bcsel(b, nir_ior(b, in_bounds, is_null), a, oob_alpha);
1028    return nir_vector_insert_imm(b, res, a, 3);
1029 }
1030 
1031 static void
lower_edb_buffer_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1032 lower_edb_buffer_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1033                               const struct lower_descriptors_ctx *ctx)
1034 {
1035    assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF);
1036 
1037    b->cursor = nir_before_instr(&intrin->instr);
1038    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1039    nir_def *desc = load_resource_deref_desc(b, 4, 32, deref, 0, ctx);
1040 
1041    switch (intrin->intrinsic) {
1042    case nir_intrinsic_image_deref_load:
1043    case nir_intrinsic_image_deref_store:
1044    case nir_intrinsic_image_deref_atomic:
1045    case nir_intrinsic_image_deref_atomic_swap: {
1046       nir_def *pos = intrin->src[1].ssa;
1047       nir_def *x = nir_channel(b, pos, 0);
1048 
1049       nir_def *in_bounds = edb_buffer_view_coord_is_in_bounds(b, desc, x);
1050       nir_def *index = edb_buffer_view_index(b, desc, in_bounds);
1051 
1052       nir_def *new_x = adjust_edb_buffer_view_coord(b, desc, x);
1053       pos = nir_vector_insert_imm(b, pos, new_x, 0);
1054       nir_src_rewrite(&intrin->src[1], pos);
1055 
1056       if (intrin->intrinsic == nir_intrinsic_image_deref_load) {
1057          b->cursor = nir_after_instr(&intrin->instr);
1058          nir_def *res = &intrin->def;
1059          res = fixup_edb_buffer_view_result(b, desc, in_bounds, res,
1060                                             nir_intrinsic_dest_type(intrin));
1061          nir_def_rewrite_uses_after(&intrin->def, res, res->parent_instr);
1062       }
1063 
1064       nir_rewrite_image_intrinsic(intrin, index, true);
1065       break;
1066    }
1067 
1068    case nir_intrinsic_image_deref_size: {
1069       assert(intrin->def.num_components == 1);
1070       nir_def *size_el = nir_channel(b, desc, 2);
1071       nir_def_rewrite_uses(&intrin->def, size_el);
1072       break;
1073    }
1074 
1075    default:
1076       unreachable("Unknown image intrinsic");
1077    }
1078 }
1079 
1080 static bool
lower_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1081 lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1082                    const struct lower_descriptors_ctx *ctx)
1083 {
1084    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1085 
1086    if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_MS) {
1087       lower_msaa_image_intrin(b, intrin, ctx);
1088    } else if (is_edb_buffer_view(deref, ctx)) {
1089       lower_edb_buffer_image_intrin(b, intrin, ctx);
1090    } else {
1091       b->cursor = nir_before_instr(&intrin->instr);
1092       nir_def *desc = load_resource_deref_desc(b, 1, 32, deref, 0, ctx);
1093       nir_rewrite_image_intrinsic(intrin, desc, true);
1094    }
1095 
1096    return true;
1097 }
1098 
1099 static bool
lower_interp_at_sample(nir_builder * b,nir_intrinsic_instr * interp,const struct lower_descriptors_ctx * ctx)1100 lower_interp_at_sample(nir_builder *b, nir_intrinsic_instr *interp,
1101                        const struct lower_descriptors_ctx *ctx)
1102 {
1103    const uint32_t root_table_offset =
1104       nvk_root_descriptor_offset(draw.sample_locations);
1105 
1106    nir_def *sample = interp->src[1].ssa;
1107 
1108    b->cursor = nir_before_instr(&interp->instr);
1109 
1110    nir_def *loc = nir_ldc_nv(b, 1, 64,
1111                              nir_imm_int(b, 0), /* Root table */
1112                              nir_imm_int(b, root_table_offset),
1113                              .align_mul = 8,
1114                              .align_offset = 0);
1115 
1116    /* Yay little endian */
1117    loc = nir_ushr(b, loc, nir_imul_imm(b, sample, 8));
1118    nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
1119    nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
1120    nir_def *loc_u4 = nir_vec2(b, loc_x_u4, loc_y_u4);
1121    nir_def *loc_f = nir_fmul_imm(b, nir_i2f32(b, loc_u4), 1.0 / 16.0);
1122    nir_def *offset = nir_fadd_imm(b, loc_f, -0.5);
1123 
1124    assert(interp->intrinsic == nir_intrinsic_interp_deref_at_sample);
1125    interp->intrinsic = nir_intrinsic_interp_deref_at_offset;
1126    nir_src_rewrite(&interp->src[1], offset);
1127 
1128    return true;
1129 }
1130 
1131 static bool
try_lower_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1132 try_lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1133                  const struct lower_descriptors_ctx *ctx)
1134 {
1135    switch (intrin->intrinsic) {
1136    case nir_intrinsic_load_constant:
1137       return lower_load_constant(b, intrin, ctx);
1138 
1139    case nir_intrinsic_load_vulkan_descriptor:
1140       return try_lower_load_vulkan_descriptor(b, intrin, ctx);
1141 
1142    case nir_intrinsic_load_workgroup_size:
1143       unreachable("Should have been lowered by nir_lower_cs_intrinsics()");
1144 
1145    case nir_intrinsic_load_num_workgroups:
1146       return lower_sysval_to_root_table(b, intrin, cs.group_count, ctx);
1147 
1148    case nir_intrinsic_load_base_workgroup_id:
1149       return lower_sysval_to_root_table(b, intrin, cs.base_group, ctx);
1150 
1151    case nir_intrinsic_load_push_constant:
1152       return lower_load_push_constant(b, intrin, ctx);
1153 
1154    case nir_intrinsic_load_base_vertex:
1155    case nir_intrinsic_load_first_vertex:
1156       return lower_sysval_to_root_table(b, intrin, draw.base_vertex, ctx);
1157 
1158    case nir_intrinsic_load_base_instance:
1159       return lower_sysval_to_root_table(b, intrin, draw.base_instance, ctx);
1160 
1161    case nir_intrinsic_load_draw_id:
1162       return lower_sysval_to_root_table(b, intrin, draw.draw_index, ctx);
1163 
1164    case nir_intrinsic_load_view_index:
1165       return lower_sysval_to_root_table(b, intrin, draw.view_index, ctx);
1166 
1167    case nir_intrinsic_image_deref_load:
1168    case nir_intrinsic_image_deref_sparse_load:
1169    case nir_intrinsic_image_deref_store:
1170    case nir_intrinsic_image_deref_atomic:
1171    case nir_intrinsic_image_deref_atomic_swap:
1172    case nir_intrinsic_image_deref_size:
1173    case nir_intrinsic_image_deref_samples:
1174       return lower_image_intrin(b, intrin, ctx);
1175 
1176    case nir_intrinsic_interp_deref_at_sample:
1177       return lower_interp_at_sample(b, intrin, ctx);
1178 
1179    default:
1180       return false;
1181    }
1182 }
1183 
1184 static void
lower_edb_buffer_tex_instr(nir_builder * b,nir_tex_instr * tex,const struct lower_descriptors_ctx * ctx)1185 lower_edb_buffer_tex_instr(nir_builder *b, nir_tex_instr *tex,
1186                            const struct lower_descriptors_ctx *ctx)
1187 {
1188    assert(tex->sampler_dim == GLSL_SAMPLER_DIM_BUF);
1189 
1190    b->cursor = nir_before_instr(&tex->instr);
1191 
1192    const int texture_src_idx =
1193       nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1194    nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src);
1195 
1196    nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
1197    ASSERTED const uint32_t plane =
1198       plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
1199    assert(plane == 0);
1200 
1201    nir_def *desc = load_resource_deref_desc(b, 4, 32, texture, 0, ctx);
1202 
1203    switch (tex->op) {
1204    case nir_texop_txf: {
1205       const int coord_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1206       assert(coord_src_idx >= 0);
1207       nir_def *coord = tex->src[coord_src_idx].src.ssa;
1208 
1209       nir_def *in_bounds = edb_buffer_view_coord_is_in_bounds(b, desc, coord);
1210 
1211       nir_def *index = edb_buffer_view_index(b, desc, in_bounds);
1212       nir_src_rewrite(&tex->src[texture_src_idx].src, index);
1213       tex->src[texture_src_idx].src_type = nir_tex_src_texture_handle;
1214 
1215       nir_def *new_coord = adjust_edb_buffer_view_coord(b, desc, coord);
1216       nir_src_rewrite(&tex->src[coord_src_idx].src, new_coord);
1217 
1218       b->cursor = nir_after_instr(&tex->instr);
1219       nir_def *res = &tex->def;
1220       res = fixup_edb_buffer_view_result(b, desc, in_bounds,
1221                                          res, tex->dest_type);
1222       nir_def_rewrite_uses_after(&tex->def, res, res->parent_instr);
1223       break;
1224    }
1225 
1226    case nir_texop_txs: {
1227       assert(tex->def.num_components == 1);
1228       nir_def *size_el = edb_buffer_view_size_el(b, desc);
1229       nir_def_rewrite_uses(&tex->def, size_el);
1230       break;
1231    }
1232 
1233    default:
1234       unreachable("Invalid buffer texture op");
1235    }
1236 }
1237 
1238 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,const struct lower_descriptors_ctx * ctx)1239 lower_tex(nir_builder *b, nir_tex_instr *tex,
1240           const struct lower_descriptors_ctx *ctx)
1241 {
1242    const int texture_src_idx =
1243       nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1244    const int sampler_src_idx =
1245       nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
1246    if (texture_src_idx < 0) {
1247       assert(sampler_src_idx < 0);
1248       return false;
1249    }
1250 
1251    nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src);
1252    nir_deref_instr *sampler = sampler_src_idx < 0 ? NULL :
1253                               nir_src_as_deref(tex->src[sampler_src_idx].src);
1254    assert(texture);
1255 
1256    if (is_edb_buffer_view(texture, ctx)) {
1257       lower_edb_buffer_tex_instr(b, tex, ctx);
1258       return true;
1259    }
1260 
1261    b->cursor = nir_before_instr(&tex->instr);
1262 
1263    nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
1264    const uint32_t plane =
1265       plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
1266    const uint64_t plane_offset_B =
1267       plane * sizeof(struct nvk_sampled_image_descriptor);
1268 
1269    nir_def *texture_desc =
1270          load_resource_deref_desc(b, 1, 32, texture, plane_offset_B, ctx);
1271 
1272    nir_def *combined_handle;
1273    if (texture == sampler) {
1274       combined_handle = texture_desc;
1275    } else {
1276       combined_handle = nir_iand_imm(b, texture_desc,
1277                                      NVK_IMAGE_DESCRIPTOR_IMAGE_INDEX_MASK);
1278 
1279       if (sampler != NULL) {
1280          nir_def *sampler_desc =
1281             load_resource_deref_desc(b, 1, 32, sampler, plane_offset_B, ctx);
1282          nir_def *sampler_index =
1283             nir_iand_imm(b, sampler_desc,
1284                          NVK_IMAGE_DESCRIPTOR_SAMPLER_INDEX_MASK);
1285          combined_handle = nir_ior(b, combined_handle, sampler_index);
1286       }
1287    }
1288 
1289    /* TODO: The nv50 back-end assumes it's 64-bit because of GL */
1290    combined_handle = nir_u2u64(b, combined_handle);
1291 
1292    /* TODO: The nv50 back-end assumes it gets handles both places, even for
1293     * texelFetch.
1294     */
1295    nir_src_rewrite(&tex->src[texture_src_idx].src, combined_handle);
1296    tex->src[texture_src_idx].src_type = nir_tex_src_texture_handle;
1297 
1298    if (sampler_src_idx < 0) {
1299       nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, combined_handle);
1300    } else {
1301       nir_src_rewrite(&tex->src[sampler_src_idx].src, combined_handle);
1302       tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
1303    }
1304 
1305    /* On pre-Volta hardware, we don't have real null descriptors.  Null
1306     * descriptors work well enough for sampling but they may not return the
1307     * correct query results.
1308     */
1309    if (ctx->dev_info->cls_eng3d < VOLTA_A && nir_tex_instr_is_query(tex)) {
1310       b->cursor = nir_after_instr(&tex->instr);
1311 
1312       /* This should get CSE'd with the earlier load */
1313       nir_def *texture_handle =
1314          nir_iand_imm(b, texture_desc, NVK_IMAGE_DESCRIPTOR_IMAGE_INDEX_MASK);
1315       nir_def *is_null = nir_ieq_imm(b, texture_handle, 0);
1316       nir_def *zero = nir_imm_zero(b, tex->def.num_components,
1317                                       tex->def.bit_size);
1318       nir_def *res = nir_bcsel(b, is_null, zero, &tex->def);
1319       nir_def_rewrite_uses_after(&tex->def, res, res->parent_instr);
1320    }
1321 
1322    return true;
1323 }
1324 
1325 static bool
try_lower_descriptors_instr(nir_builder * b,nir_instr * instr,void * _data)1326 try_lower_descriptors_instr(nir_builder *b, nir_instr *instr,
1327                             void *_data)
1328 {
1329    const struct lower_descriptors_ctx *ctx = _data;
1330 
1331    switch (instr->type) {
1332    case nir_instr_type_tex:
1333       return lower_tex(b, nir_instr_as_tex(instr), ctx);
1334    case nir_instr_type_intrinsic:
1335       return try_lower_intrin(b, nir_instr_as_intrinsic(instr), ctx);
1336    default:
1337       return false;
1338    }
1339 }
1340 
1341 #define ROOT_DESC_BASE_ADDR_HI 0x0057de3c
1342 
1343 static bool
lower_ssbo_resource_index(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1344 lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin,
1345                           const struct lower_descriptors_ctx *ctx)
1346 {
1347    if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1348       return false;
1349 
1350    b->cursor = nir_instr_remove(&intrin->instr);
1351 
1352    uint32_t set = nir_intrinsic_desc_set(intrin);
1353    uint32_t binding = nir_intrinsic_binding(intrin);
1354    nir_def *index = intrin->src[0].ssa;
1355 
1356    const struct nvk_descriptor_set_binding_layout *binding_layout =
1357       get_binding_layout(set, binding, ctx);
1358 
1359    nir_def *binding_addr;
1360    uint8_t binding_stride;
1361    switch (binding_layout->type) {
1362    case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
1363    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
1364       nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
1365       binding_addr = nir_iadd_imm(b, set_addr, binding_layout->offset);
1366       binding_stride = binding_layout->stride;
1367       break;
1368    }
1369 
1370    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
1371       nir_def *dynamic_buffer_start =
1372          nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx),
1373                       binding_layout->dynamic_buffer_index);
1374 
1375       nir_def *dynamic_binding_offset =
1376          nir_iadd_imm(b, nir_imul_imm(b, dynamic_buffer_start,
1377                                       sizeof(struct nvk_buffer_address)),
1378                       nvk_root_descriptor_offset(dynamic_buffers));
1379 
1380       binding_addr =
1381          nir_pack_64_2x32_split(b, dynamic_binding_offset,
1382                                 nir_imm_int(b, ROOT_DESC_BASE_ADDR_HI));
1383       binding_stride = sizeof(struct nvk_buffer_address);
1384       break;
1385    }
1386 
1387    default:
1388       unreachable("Not an SSBO descriptor");
1389    }
1390 
1391    /* Tuck the stride in the top 8 bits of the binding address */
1392    binding_addr = nir_ior_imm(b, binding_addr, (uint64_t)binding_stride << 56);
1393 
1394    const uint32_t binding_size = binding_layout->array_size * binding_stride;
1395    nir_def *offset_in_binding = nir_imul_imm(b, index, binding_stride);
1396 
1397    /* We depend on this when we load descrptors */
1398    assert(binding_layout->array_size >= 1);
1399 
1400    nir_def *addr;
1401    switch (ctx->ssbo_addr_format) {
1402    case nir_address_format_64bit_global_32bit_offset:
1403    case nir_address_format_64bit_bounded_global:
1404       addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, binding_addr),
1405                          nir_unpack_64_2x32_split_y(b, binding_addr),
1406                          nir_imm_int(b, binding_size),
1407                          offset_in_binding);
1408       break;
1409 
1410    default:
1411       unreachable("Unknown address mode");
1412    }
1413 
1414    nir_def_rewrite_uses(&intrin->def, addr);
1415 
1416    return true;
1417 }
1418 
1419 static bool
lower_ssbo_resource_reindex(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1420 lower_ssbo_resource_reindex(nir_builder *b, nir_intrinsic_instr *intrin,
1421                             const struct lower_descriptors_ctx *ctx)
1422 {
1423    if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1424       return false;
1425 
1426    b->cursor = nir_instr_remove(&intrin->instr);
1427 
1428    nir_def *addr = intrin->src[0].ssa;
1429    nir_def *index = intrin->src[1].ssa;
1430 
1431    nir_def *addr_high32;
1432    switch (ctx->ssbo_addr_format) {
1433    case nir_address_format_64bit_global_32bit_offset:
1434    case nir_address_format_64bit_bounded_global:
1435       addr_high32 = nir_channel(b, addr, 1);
1436       break;
1437 
1438    default:
1439       unreachable("Unknown address mode");
1440    }
1441 
1442    nir_def *stride = nir_ushr_imm(b, addr_high32, 24);
1443    nir_def *offset = nir_imul(b, index, stride);
1444 
1445    addr = nir_build_addr_iadd(b, addr, ctx->ssbo_addr_format,
1446                               nir_var_mem_ssbo, offset);
1447    nir_def_rewrite_uses(&intrin->def, addr);
1448 
1449    return true;
1450 }
1451 
1452 static bool
lower_load_ssbo_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1453 lower_load_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
1454                            const struct lower_descriptors_ctx *ctx)
1455 {
1456    if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1457       return false;
1458 
1459    b->cursor = nir_instr_remove(&intrin->instr);
1460 
1461    nir_def *addr = intrin->src[0].ssa;
1462 
1463    nir_def *base, *offset, *size = NULL;
1464    switch (ctx->ssbo_addr_format) {
1465    case nir_address_format_64bit_global_32bit_offset: {
1466       base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1467       offset = nir_channel(b, addr, 3);
1468       break;
1469    }
1470 
1471    case nir_address_format_64bit_bounded_global: {
1472       base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1473       size = nir_channel(b, addr, 2);
1474       offset = nir_channel(b, addr, 3);
1475       break;
1476    }
1477 
1478    default:
1479       unreachable("Unknown address mode");
1480    }
1481 
1482    /* Mask off the binding stride */
1483    base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
1484 
1485    nir_def *base_lo = nir_unpack_64_2x32_split_x(b, base);
1486    nir_def *base_hi = nir_unpack_64_2x32_split_y(b, base);
1487 
1488    nir_def *desc_root, *desc_global;
1489    nir_push_if(b, nir_ieq_imm(b, base_hi, ROOT_DESC_BASE_ADDR_HI));
1490    {
1491       desc_root = nir_load_ubo(b, 4, 32, nir_imm_int(b, 0),
1492                                nir_iadd(b, base_lo, offset),
1493                                .align_mul = 16, .align_offset = 0,
1494                                .range = ~0);
1495       if (size != NULL) {
1496          /* assert(binding_layout->array_size >= 1); */
1497          nir_def *is_oob = nir_ult(b, nir_iadd_imm(b, size, -16), offset);
1498          desc_root = nir_bcsel(b, is_oob, nir_imm_zero(b, 4, 32), desc_root);
1499       }
1500    }
1501    nir_push_else(b, NULL);
1502    {
1503       if (size != NULL) {
1504          desc_global = nir_load_global_constant_bounded(b, 4, 32, base,
1505                                                         offset, size,
1506                                                         .align_mul = 16,
1507                                                         .align_offset = 0);
1508       } else {
1509          desc_global = nir_load_global_constant_offset(b, 4, 32, base,
1510                                                        offset,
1511                                                        .align_mul = 16,
1512                                                        .align_offset = 0);
1513       }
1514    }
1515    nir_pop_if(b, NULL);
1516    nir_def *desc = nir_if_phi(b, desc_root, desc_global);
1517 
1518    nir_def_rewrite_uses(&intrin->def, desc);
1519 
1520    return true;
1521 }
1522 
1523 static bool
lower_ssbo_descriptor_instr(nir_builder * b,nir_instr * instr,void * _data)1524 lower_ssbo_descriptor_instr(nir_builder *b, nir_instr *instr,
1525                             void *_data)
1526 {
1527    const struct lower_descriptors_ctx *ctx = _data;
1528 
1529    if (instr->type != nir_instr_type_intrinsic)
1530       return false;
1531 
1532    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1533    switch (intrin->intrinsic) {
1534    case nir_intrinsic_vulkan_resource_index:
1535       return lower_ssbo_resource_index(b, intrin, ctx);
1536    case nir_intrinsic_vulkan_resource_reindex:
1537       return lower_ssbo_resource_reindex(b, intrin, ctx);
1538    case nir_intrinsic_load_vulkan_descriptor:
1539       return lower_load_ssbo_descriptor(b, intrin, ctx);
1540    default:
1541       return false;
1542    }
1543 }
1544 
1545 bool
nvk_nir_lower_descriptors(nir_shader * nir,const struct nvk_physical_device * pdev,const struct vk_pipeline_robustness_state * rs,uint32_t set_layout_count,struct vk_descriptor_set_layout * const * set_layouts,struct nvk_cbuf_map * cbuf_map_out)1546 nvk_nir_lower_descriptors(nir_shader *nir,
1547                           const struct nvk_physical_device *pdev,
1548                           const struct vk_pipeline_robustness_state *rs,
1549                           uint32_t set_layout_count,
1550                           struct vk_descriptor_set_layout * const *set_layouts,
1551                           struct nvk_cbuf_map *cbuf_map_out)
1552 {
1553    struct lower_descriptors_ctx ctx = {
1554       .dev_info = &pdev->info,
1555       .use_bindless_cbuf = nvk_use_bindless_cbuf(&pdev->info),
1556       .use_edb_buffer_views = nvk_use_edb_buffer_views(pdev),
1557       .clamp_desc_array_bounds =
1558          rs->storage_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1559          rs->uniform_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1560          rs->images != VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT,
1561       .ssbo_addr_format = nvk_ssbo_addr_format(pdev, rs),
1562       .ubo_addr_format = nvk_ubo_addr_format(pdev, rs),
1563    };
1564 
1565    assert(set_layout_count <= NVK_MAX_SETS);
1566    for (uint32_t s = 0; s < set_layout_count; s++) {
1567       if (set_layouts[s] != NULL)
1568          ctx.set_layouts[s] = vk_to_nvk_descriptor_set_layout(set_layouts[s]);
1569    }
1570 
1571    /* We run in four passes:
1572     *
1573     *  1. Find ranges of UBOs that we can promote to bound UBOs.  Nothing is
1574     *     actually lowered in this pass.  It's just analysis.
1575     *
1576     *  2. Try to lower UBO loads to cbufs based on the map we just created.
1577     *     We need to do this before the main lowering pass because it relies
1578     *     on the original descriptor load intrinsics.
1579     *
1580     *  3. Attempt to lower everything with direct descriptors.  This may fail
1581     *     to lower some SSBO intrinsics when variable pointers are used.
1582     *
1583     *  4. Clean up any SSBO intrinsics which are left and lower them to
1584     *     slightly less efficient but variable- pointers-correct versions.
1585     */
1586 
1587    bool pass_lower_ubo = false;
1588    if (cbuf_map_out != NULL) {
1589       ctx.cbuf_map = cbuf_map_out;
1590       build_cbuf_map(nir, &ctx);
1591 
1592       pass_lower_ubo =
1593          nir_shader_intrinsics_pass(nir, lower_load_ubo_intrin,
1594                                     nir_metadata_control_flow,
1595                                     (void *)&ctx);
1596    }
1597 
1598    bool pass_lower_descriptors =
1599       nir_shader_instructions_pass(nir, try_lower_descriptors_instr,
1600                                    nir_metadata_control_flow,
1601                                    (void *)&ctx);
1602    bool pass_lower_ssbo =
1603       nir_shader_instructions_pass(nir, lower_ssbo_descriptor_instr,
1604                                    nir_metadata_control_flow,
1605                                    (void *)&ctx);
1606    return pass_lower_ubo || pass_lower_descriptors || pass_lower_ssbo;
1607 }
1608