1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_cmd_buffer.h"
6 #include "nvk_descriptor_set_layout.h"
7 #include "nvk_descriptor_types.h"
8 #include "nvk_shader.h"
9
10 #include "vk_pipeline.h"
11
12 #include "nir_builder.h"
13 #include "nir_deref.h"
14
15 #include "clc397.h"
16 #include "clc597.h"
17
18 struct lower_desc_cbuf {
19 struct nvk_cbuf key;
20
21 uint32_t use_count;
22
23 uint64_t start;
24 uint64_t end;
25 };
26
27 DERIVE_HASH_TABLE(nvk_cbuf);
28
29 static int
compar_cbufs(const void * _a,const void * _b)30 compar_cbufs(const void *_a, const void *_b)
31 {
32 const struct lower_desc_cbuf *a = _a;
33 const struct lower_desc_cbuf *b = _b;
34
35 #define COMPAR(field, pos) \
36 if (a->field < b->field) return -(pos); \
37 if (a->field > b->field) return (pos);
38
39 /* Sort by most used first */
40 COMPAR(use_count, -1)
41
42 /* Keep the list stable by then sorting by key fields. */
43 COMPAR(key.type, 1)
44 COMPAR(key.desc_set, 1)
45 COMPAR(key.dynamic_idx, 1)
46 COMPAR(key.desc_offset, 1)
47
48 #undef COMPAR
49
50 return 0;
51 }
52
53 struct lower_descriptors_ctx {
54 const struct nv_device_info *dev_info;
55 const struct nvk_descriptor_set_layout *set_layouts[NVK_MAX_SETS];
56
57 bool use_bindless_cbuf;
58 bool use_edb_buffer_views;
59 bool clamp_desc_array_bounds;
60 nir_address_format ubo_addr_format;
61 nir_address_format ssbo_addr_format;
62
63 struct hash_table *cbufs;
64 struct nvk_cbuf_map *cbuf_map;
65 };
66
67 static bool
descriptor_type_is_ubo(VkDescriptorType desc_type)68 descriptor_type_is_ubo(VkDescriptorType desc_type)
69 {
70 switch (desc_type) {
71 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
72 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
73 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
74 return true;
75
76 default:
77 return false;
78 }
79 }
80
81 static bool
descriptor_type_is_ssbo(VkDescriptorType desc_type)82 descriptor_type_is_ssbo(VkDescriptorType desc_type)
83 {
84 switch (desc_type) {
85 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
86 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
87 return true;
88
89 default:
90 return false;
91 }
92 }
93
94 static void
record_cbuf_use(const struct nvk_cbuf * key,uint64_t start,uint64_t end,struct lower_descriptors_ctx * ctx)95 record_cbuf_use(const struct nvk_cbuf *key, uint64_t start, uint64_t end,
96 struct lower_descriptors_ctx *ctx)
97 {
98 struct hash_entry *entry = _mesa_hash_table_search(ctx->cbufs, key);
99 if (entry != NULL) {
100 struct lower_desc_cbuf *cbuf = entry->data;
101 cbuf->use_count++;
102 cbuf->start = MIN2(cbuf->start, start);
103 cbuf->end = MAX2(cbuf->end, end);
104 } else {
105 struct lower_desc_cbuf *cbuf =
106 ralloc(ctx->cbufs, struct lower_desc_cbuf);
107 *cbuf = (struct lower_desc_cbuf) {
108 .key = *key,
109 .use_count = 1,
110 .start = start,
111 .end = end,
112 };
113 _mesa_hash_table_insert(ctx->cbufs, &cbuf->key, cbuf);
114 }
115 }
116
117 static const struct nvk_descriptor_set_binding_layout *
get_binding_layout(uint32_t set,uint32_t binding,const struct lower_descriptors_ctx * ctx)118 get_binding_layout(uint32_t set, uint32_t binding,
119 const struct lower_descriptors_ctx *ctx)
120 {
121 assert(set < NVK_MAX_SETS);
122 assert(ctx->set_layouts[set] != NULL);
123
124 const struct nvk_descriptor_set_layout *set_layout = ctx->set_layouts[set];
125
126 assert(binding < set_layout->binding_count);
127 return &set_layout->binding[binding];
128 }
129
130 static void
record_descriptor_cbuf_use(uint32_t set,uint32_t binding,nir_src * index,struct lower_descriptors_ctx * ctx)131 record_descriptor_cbuf_use(uint32_t set, uint32_t binding, nir_src *index,
132 struct lower_descriptors_ctx *ctx)
133 {
134 const struct nvk_descriptor_set_binding_layout *binding_layout =
135 get_binding_layout(set, binding, ctx);
136
137 const struct nvk_cbuf key = {
138 .type = NVK_CBUF_TYPE_DESC_SET,
139 .desc_set = set,
140 };
141
142 uint64_t start, end;
143 if (index == NULL) {
144 /* When we don't have an index, assume 0 */
145 start = binding_layout->offset;
146 end = start + binding_layout->stride;
147 } else if (nir_src_is_const(*index)) {
148 start = binding_layout->offset +
149 nir_src_as_uint(*index) * binding_layout->stride;
150 end = start + binding_layout->stride;
151 } else {
152 start = binding_layout->offset;
153 end = start + binding_layout->array_size * binding_layout->stride;
154 }
155
156 record_cbuf_use(&key, start, end, ctx);
157 }
158
159 static void
record_vulkan_resource_cbuf_use(nir_intrinsic_instr * intrin,struct lower_descriptors_ctx * ctx)160 record_vulkan_resource_cbuf_use(nir_intrinsic_instr *intrin,
161 struct lower_descriptors_ctx *ctx)
162 {
163 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
164
165 /* These we'll handle later */
166 if (descriptor_type_is_ubo(nir_intrinsic_desc_type(intrin)))
167 return;
168
169 record_descriptor_cbuf_use(nir_intrinsic_desc_set(intrin),
170 nir_intrinsic_binding(intrin),
171 &intrin->src[0], ctx);
172 }
173
174 static void
record_deref_descriptor_cbuf_use(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)175 record_deref_descriptor_cbuf_use(nir_deref_instr *deref,
176 struct lower_descriptors_ctx *ctx)
177 {
178 nir_src *index_src = NULL;
179 if (deref->deref_type == nir_deref_type_array) {
180 index_src = &deref->arr.index;
181 deref = nir_deref_instr_parent(deref);
182 }
183
184 assert(deref->deref_type == nir_deref_type_var);
185 nir_variable *var = deref->var;
186
187 record_descriptor_cbuf_use(var->data.descriptor_set,
188 var->data.binding,
189 index_src, ctx);
190 }
191
192 static void
record_tex_descriptor_cbuf_use(nir_tex_instr * tex,struct lower_descriptors_ctx * ctx)193 record_tex_descriptor_cbuf_use(nir_tex_instr *tex,
194 struct lower_descriptors_ctx *ctx)
195 {
196 const int texture_src_idx =
197 nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
198 const int sampler_src_idx =
199 nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
200
201 if (texture_src_idx >= 0) {
202 nir_deref_instr *deref = nir_src_as_deref(tex->src[texture_src_idx].src);
203 record_deref_descriptor_cbuf_use(deref, ctx);
204 }
205
206 if (sampler_src_idx >= 0) {
207 nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
208 record_deref_descriptor_cbuf_use(deref, ctx);
209 }
210 }
211
212 static struct nvk_cbuf
ubo_deref_to_cbuf(nir_deref_instr * deref,nir_intrinsic_instr ** resource_index_out,uint64_t * offset_out,uint64_t * start_out,uint64_t * end_out,const struct lower_descriptors_ctx * ctx)213 ubo_deref_to_cbuf(nir_deref_instr *deref,
214 nir_intrinsic_instr **resource_index_out,
215 uint64_t *offset_out,
216 uint64_t *start_out, uint64_t *end_out,
217 const struct lower_descriptors_ctx *ctx)
218 {
219 assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
220
221 /* In case we early return */
222 *offset_out = 0;
223 *start_out = 0;
224 *end_out = UINT64_MAX;
225 *resource_index_out = NULL;
226
227 const struct nvk_cbuf invalid = {
228 .type = NVK_CBUF_TYPE_INVALID,
229 };
230
231 uint64_t offset = 0;
232 uint64_t range = glsl_get_explicit_size(deref->type, false);
233 bool offset_valid = true;
234 while (deref->deref_type != nir_deref_type_cast) {
235 nir_deref_instr *parent = nir_deref_instr_parent(deref);
236
237 switch (deref->deref_type) {
238 case nir_deref_type_var:
239 unreachable("Buffers don't use variables in Vulkan");
240
241 case nir_deref_type_array:
242 case nir_deref_type_array_wildcard: {
243 uint32_t stride = nir_deref_instr_array_stride(deref);
244 if (deref->deref_type == nir_deref_type_array &&
245 nir_src_is_const(deref->arr.index)) {
246 offset += nir_src_as_uint(deref->arr.index) * stride;
247 } else {
248 range = glsl_get_length(parent->type) * stride;
249 }
250 break;
251 }
252
253 case nir_deref_type_ptr_as_array:
254 /* All bets are off. We shouldn't see these most of the time
255 * anyway, even with variable pointers.
256 */
257 offset_valid = false;
258 unreachable("Variable pointers aren't allowed on UBOs");
259 break;
260
261 case nir_deref_type_struct: {
262 offset += glsl_get_struct_field_offset(parent->type,
263 deref->strct.index);
264 break;
265 }
266
267 default:
268 unreachable("Unknown deref type");
269 }
270
271 deref = parent;
272 }
273
274 nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
275 if (load_desc == NULL ||
276 load_desc->intrinsic != nir_intrinsic_load_vulkan_descriptor)
277 return invalid;
278
279 nir_intrinsic_instr *res_index = nir_src_as_intrinsic(load_desc->src[0]);
280 if (res_index == NULL ||
281 res_index->intrinsic != nir_intrinsic_vulkan_resource_index)
282 return invalid;
283
284 /* We try to early return as little as possible prior to this point so we
285 * can return the resource index intrinsic in as many cases as possible.
286 * After this point, though, early returns are fair game.
287 */
288 *resource_index_out = res_index;
289
290 if (!offset_valid || !nir_src_is_const(res_index->src[0]))
291 return invalid;
292
293 uint32_t set = nir_intrinsic_desc_set(res_index);
294 uint32_t binding = nir_intrinsic_binding(res_index);
295 uint32_t index = nir_src_as_uint(res_index->src[0]);
296
297 const struct nvk_descriptor_set_binding_layout *binding_layout =
298 get_binding_layout(set, binding, ctx);
299
300 switch (binding_layout->type) {
301 case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
302 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
303 *offset_out = 0;
304 *start_out = offset;
305 *end_out = offset + range;
306 return (struct nvk_cbuf) {
307 .type = NVK_CBUF_TYPE_UBO_DESC,
308 .desc_set = set,
309 .desc_offset = binding_layout->offset +
310 index * binding_layout->stride,
311 };
312 }
313
314 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: {
315 *offset_out = 0;
316 *start_out = offset;
317 *end_out = offset + range;
318
319 return (struct nvk_cbuf) {
320 .type = NVK_CBUF_TYPE_DYNAMIC_UBO,
321 .desc_set = set,
322 .dynamic_idx = binding_layout->dynamic_buffer_index + index,
323 };
324 }
325
326 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
327 *offset_out = binding_layout->offset;
328 *start_out = binding_layout->offset + offset;
329 *end_out = *start_out + range;
330
331 return (struct nvk_cbuf) {
332 .type = NVK_CBUF_TYPE_DESC_SET,
333 .desc_set = set,
334 };
335 }
336
337 default:
338 return invalid;
339 }
340 }
341
342 static void
record_load_ubo_cbuf_uses(nir_deref_instr * deref,struct lower_descriptors_ctx * ctx)343 record_load_ubo_cbuf_uses(nir_deref_instr *deref,
344 struct lower_descriptors_ctx *ctx)
345 {
346 assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
347
348 UNUSED uint64_t offset;
349 uint64_t start, end;
350 nir_intrinsic_instr *res_index;
351 struct nvk_cbuf cbuf =
352 ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
353
354 if (cbuf.type != NVK_CBUF_TYPE_INVALID) {
355 record_cbuf_use(&cbuf, start, end, ctx);
356 } else if (res_index != NULL) {
357 record_vulkan_resource_cbuf_use(res_index, ctx);
358 }
359 }
360
361 static bool
record_cbuf_uses_instr(UNUSED nir_builder * b,nir_instr * instr,void * _ctx)362 record_cbuf_uses_instr(UNUSED nir_builder *b, nir_instr *instr, void *_ctx)
363 {
364 struct lower_descriptors_ctx *ctx = _ctx;
365
366 switch (instr->type) {
367 case nir_instr_type_intrinsic: {
368 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
369 switch (intrin->intrinsic) {
370 case nir_intrinsic_vulkan_resource_index:
371 record_vulkan_resource_cbuf_use(intrin, ctx);
372 return false;
373
374 case nir_intrinsic_load_deref: {
375 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
376 if (nir_deref_mode_is(deref, nir_var_mem_ubo))
377 record_load_ubo_cbuf_uses(deref, ctx);
378 return false;
379 }
380
381 case nir_intrinsic_image_deref_load:
382 case nir_intrinsic_image_deref_store:
383 case nir_intrinsic_image_deref_atomic:
384 case nir_intrinsic_image_deref_atomic_swap:
385 case nir_intrinsic_image_deref_size:
386 case nir_intrinsic_image_deref_samples: {
387 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
388 record_deref_descriptor_cbuf_use(deref, ctx);
389 return false;
390 }
391
392 default:
393 return false;
394 }
395 unreachable("All cases return false");
396 }
397
398 case nir_instr_type_tex:
399 record_tex_descriptor_cbuf_use(nir_instr_as_tex(instr), ctx);
400 return false;
401
402 default:
403 return false;
404 }
405 }
406
407 static void
build_cbuf_map(nir_shader * nir,struct lower_descriptors_ctx * ctx)408 build_cbuf_map(nir_shader *nir, struct lower_descriptors_ctx *ctx)
409 {
410 ctx->cbuf_map->cbuf_count = 0;
411
412 /* Root descriptors always go in cbuf 0 */
413 ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = (struct nvk_cbuf) {
414 .type = NVK_CBUF_TYPE_ROOT_DESC,
415 };
416
417 /* If we have constant data, put it at cbuf 1 */
418 if (nir->constant_data_size > 0) {
419 ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = (struct nvk_cbuf) {
420 .type = NVK_CBUF_TYPE_SHADER_DATA,
421 };
422 }
423
424 ctx->cbufs = nvk_cbuf_table_create(NULL);
425 nir_shader_instructions_pass(nir, record_cbuf_uses_instr,
426 nir_metadata_all, (void *)ctx);
427
428 struct lower_desc_cbuf *cbufs =
429 ralloc_array(ctx->cbufs, struct lower_desc_cbuf,
430 _mesa_hash_table_num_entries(ctx->cbufs));
431
432 uint32_t num_cbufs = 0;
433 hash_table_foreach(ctx->cbufs, entry) {
434 struct lower_desc_cbuf *cbuf = entry->data;
435
436 /* We currently only start cbufs at the beginning so if it starts after
437 * the max cbuf size, there's no point in including it in the list.
438 */
439 if (cbuf->start > NVK_MAX_CBUF_SIZE)
440 continue;
441
442 cbufs[num_cbufs++] = *cbuf;
443 }
444
445 qsort(cbufs, num_cbufs, sizeof(*cbufs), compar_cbufs);
446
447 uint8_t max_cbuf_bindings;
448 if (nir->info.stage == MESA_SHADER_COMPUTE ||
449 nir->info.stage == MESA_SHADER_KERNEL) {
450 max_cbuf_bindings = 8;
451 } else {
452 max_cbuf_bindings = 16;
453 }
454
455 for (uint32_t i = 0; i < num_cbufs; i++) {
456 if (ctx->cbuf_map->cbuf_count >= max_cbuf_bindings)
457 break;
458
459 /* We can't support indirect cbufs in compute yet */
460 if ((nir->info.stage == MESA_SHADER_COMPUTE ||
461 nir->info.stage == MESA_SHADER_KERNEL) &&
462 cbufs[i].key.type == NVK_CBUF_TYPE_UBO_DESC)
463 continue;
464
465 /* Prior to Turing, indirect cbufs require splitting the pushbuf and
466 * pushing bits of the descriptor set. Doing this every draw call is
467 * probably more overhead than it's worth.
468 */
469 if (ctx->dev_info->cls_eng3d < TURING_A &&
470 cbufs[i].key.type == NVK_CBUF_TYPE_UBO_DESC)
471 continue;
472
473 ctx->cbuf_map->cbufs[ctx->cbuf_map->cbuf_count++] = cbufs[i].key;
474 }
475
476 ralloc_free(ctx->cbufs);
477 ctx->cbufs = NULL;
478 }
479
480 static int
get_mapped_cbuf_idx(const struct nvk_cbuf * key,const struct lower_descriptors_ctx * ctx)481 get_mapped_cbuf_idx(const struct nvk_cbuf *key,
482 const struct lower_descriptors_ctx *ctx)
483 {
484 if (ctx->cbuf_map == NULL)
485 return -1;
486
487 for (uint32_t c = 0; c < ctx->cbuf_map->cbuf_count; c++) {
488 if (nvk_cbuf_equal(&ctx->cbuf_map->cbufs[c], key)) {
489 return c;
490 }
491 }
492
493 return -1;
494 }
495
496 static bool
lower_load_ubo_intrin(nir_builder * b,nir_intrinsic_instr * load,void * _ctx)497 lower_load_ubo_intrin(nir_builder *b, nir_intrinsic_instr *load, void *_ctx)
498 {
499 const struct lower_descriptors_ctx *ctx = _ctx;
500
501 if (load->intrinsic != nir_intrinsic_load_deref)
502 return false;
503
504 nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
505 if (!nir_deref_mode_is(deref, nir_var_mem_ubo))
506 return false;
507
508 uint64_t offset, end;
509 UNUSED uint64_t start;
510 UNUSED nir_intrinsic_instr *res_index;
511 struct nvk_cbuf cbuf_key =
512 ubo_deref_to_cbuf(deref, &res_index, &offset, &start, &end, ctx);
513
514 if (cbuf_key.type == NVK_CBUF_TYPE_INVALID)
515 return false;
516
517 if (end > NVK_MAX_CBUF_SIZE)
518 return false;
519
520 int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
521 if (cbuf_idx < 0)
522 return false;
523
524 b->cursor = nir_before_instr(&load->instr);
525
526 nir_deref_path path;
527 nir_deref_path_init(&path, deref, NULL);
528
529 nir_def *addr = nir_imm_ivec2(b, cbuf_idx, offset);
530 nir_address_format addr_format = nir_address_format_32bit_index_offset;
531 for (nir_deref_instr **p = &path.path[1]; *p != NULL; p++)
532 addr = nir_explicit_io_address_from_deref(b, *p, addr, addr_format);
533
534 nir_deref_path_finish(&path);
535
536 nir_lower_explicit_io_instr(b, load, addr, addr_format);
537
538 return true;
539 }
540
541 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)542 lower_load_constant(nir_builder *b, nir_intrinsic_instr *load,
543 const struct lower_descriptors_ctx *ctx)
544 {
545 assert(load->intrinsic == nir_intrinsic_load_constant);
546
547 const struct nvk_cbuf cbuf_key = {
548 .type = NVK_CBUF_TYPE_SHADER_DATA,
549 };
550 int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
551 assert(cbuf_idx >= 0);
552
553 uint32_t base = nir_intrinsic_base(load);
554
555 b->cursor = nir_before_instr(&load->instr);
556
557 nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base);
558 nir_def *data = nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
559 nir_imm_int(b, cbuf_idx), offset,
560 .align_mul = nir_intrinsic_align_mul(load),
561 .align_offset = nir_intrinsic_align_offset(load));
562
563 nir_def_rewrite_uses(&load->def, data);
564
565 return true;
566 }
567
568 static nir_def *
load_descriptor_set_addr(nir_builder * b,uint32_t set,UNUSED const struct lower_descriptors_ctx * ctx)569 load_descriptor_set_addr(nir_builder *b, uint32_t set,
570 UNUSED const struct lower_descriptors_ctx *ctx)
571 {
572 uint32_t set_addr_offset = nvk_root_descriptor_offset(sets) +
573 set * sizeof(struct nvk_buffer_address);
574
575 return nir_ldc_nv(b, 1, 64, nir_imm_int(b, 0),
576 nir_imm_int(b, set_addr_offset),
577 .align_mul = 8, .align_offset = 0);
578 }
579
580 static nir_def *
load_dynamic_buffer_start(nir_builder * b,uint32_t set,const struct lower_descriptors_ctx * ctx)581 load_dynamic_buffer_start(nir_builder *b, uint32_t set,
582 const struct lower_descriptors_ctx *ctx)
583 {
584 int dynamic_buffer_start_imm = 0;
585 for (uint32_t s = 0; s < set; s++) {
586 if (ctx->set_layouts[s] == NULL) {
587 dynamic_buffer_start_imm = -1;
588 break;
589 }
590
591 dynamic_buffer_start_imm += ctx->set_layouts[s]->dynamic_buffer_count;
592 }
593
594 if (dynamic_buffer_start_imm >= 0) {
595 return nir_imm_int(b, dynamic_buffer_start_imm);
596 } else {
597 uint32_t root_offset =
598 nvk_root_descriptor_offset(set_dynamic_buffer_start) + set;
599
600 return nir_u2u32(b, nir_ldc_nv(b, 1, 8, nir_imm_int(b, 0),
601 nir_imm_int(b, root_offset),
602 .align_mul = 1, .align_offset = 0));
603 }
604 }
605
606 static nir_def *
load_descriptor(nir_builder * b,unsigned num_components,unsigned bit_size,uint32_t set,uint32_t binding,nir_def * index,unsigned offset_B,const struct lower_descriptors_ctx * ctx)607 load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
608 uint32_t set, uint32_t binding, nir_def *index,
609 unsigned offset_B, const struct lower_descriptors_ctx *ctx)
610 {
611 const struct nvk_descriptor_set_binding_layout *binding_layout =
612 get_binding_layout(set, binding, ctx);
613
614 if (ctx->clamp_desc_array_bounds)
615 index = nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1));
616
617 switch (binding_layout->type) {
618 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
619 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
620 /* Get the index in the root descriptor table dynamic_buffers array. */
621 nir_def *dynamic_buffer_start = load_dynamic_buffer_start(b, set, ctx);
622
623 index = nir_iadd(b, index,
624 nir_iadd_imm(b, dynamic_buffer_start,
625 binding_layout->dynamic_buffer_index));
626 uint32_t desc_size = sizeof(union nvk_buffer_descriptor);
627 nir_def *root_desc_offset =
628 nir_iadd_imm(b, nir_imul_imm(b, index, desc_size),
629 nvk_root_descriptor_offset(dynamic_buffers));
630
631 assert(num_components * bit_size <= desc_size * 8);
632 return nir_ldc_nv(b, num_components, bit_size,
633 nir_imm_int(b, 0), root_desc_offset,
634 .align_mul = 16, .align_offset = 0);
635 }
636
637 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
638 nir_def *base_addr =
639 nir_iadd_imm(b, load_descriptor_set_addr(b, set, ctx),
640 binding_layout->offset);
641
642 assert(binding_layout->stride == 1);
643 const uint32_t binding_size = binding_layout->array_size;
644
645 if (ctx->use_bindless_cbuf) {
646 assert(num_components == 1 && bit_size == 64);
647 const uint32_t size = align(binding_size, 16);
648 return nir_ior_imm(b, nir_ishr_imm(b, base_addr, 4),
649 ((uint64_t)size >> 4) << 45);
650 } else {
651 /* Convert it to nir_address_format_64bit_bounded_global */
652 assert(num_components == 4 && bit_size == 32);
653 return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
654 nir_unpack_64_2x32_split_y(b, base_addr),
655 nir_imm_int(b, binding_size),
656 nir_imm_int(b, 0));
657 }
658 }
659
660 default: {
661 assert(binding_layout->stride > 0);
662 nir_def *desc_ubo_offset =
663 nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride),
664 binding_layout->offset + offset_B);
665
666 uint64_t max_desc_ubo_offset = binding_layout->offset +
667 binding_layout->array_size * binding_layout->stride;
668
669 unsigned desc_align_mul = (1 << (ffs(binding_layout->stride) - 1));
670 desc_align_mul = MIN2(desc_align_mul, 16);
671 unsigned desc_align_offset = binding_layout->offset + offset_B;
672 desc_align_offset %= desc_align_mul;
673
674 const struct nvk_cbuf cbuf_key = {
675 .type = NVK_CBUF_TYPE_DESC_SET,
676 .desc_set = set,
677 };
678 int cbuf_idx = get_mapped_cbuf_idx(&cbuf_key, ctx);
679
680 if (cbuf_idx >= 0 && max_desc_ubo_offset <= NVK_MAX_CBUF_SIZE) {
681 return nir_ldc_nv(b, num_components, bit_size,
682 nir_imm_int(b, cbuf_idx),
683 desc_ubo_offset,
684 .align_mul = desc_align_mul,
685 .align_offset = desc_align_offset);
686 } else {
687 nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
688 return nir_load_global_constant_offset(b, num_components, bit_size,
689 set_addr, desc_ubo_offset,
690 .align_mul = desc_align_mul,
691 .align_offset = desc_align_offset);
692 }
693 }
694 }
695 }
696
697 static bool
is_idx_intrin(nir_intrinsic_instr * intrin)698 is_idx_intrin(nir_intrinsic_instr *intrin)
699 {
700 while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
701 intrin = nir_src_as_intrinsic(intrin->src[0]);
702 if (intrin == NULL)
703 return false;
704 }
705
706 return intrin->intrinsic == nir_intrinsic_vulkan_resource_index;
707 }
708
709 static nir_def *
buffer_address_to_ldcx_handle(nir_builder * b,nir_def * addr)710 buffer_address_to_ldcx_handle(nir_builder *b, nir_def *addr)
711 {
712 nir_def *base_addr = nir_pack_64_2x32(b, nir_channels(b, addr, 0x3));
713 nir_def *size = nir_channel(b, addr, 2);
714 nir_def *offset = nir_channel(b, addr, 3);
715
716 nir_def *addr16 = nir_ushr_imm(b, base_addr, 4);
717 nir_def *addr16_lo = nir_unpack_64_2x32_split_x(b, addr16);
718 nir_def *addr16_hi = nir_unpack_64_2x32_split_y(b, addr16);
719
720 /* If we assume the top bis of the address are 0 as well as the bottom two
721 * bits of the size. (We can trust it since it's a descriptor) then
722 *
723 * ((size >> 4) << 13) | addr
724 *
725 * is just an imad.
726 */
727 nir_def *handle_hi = nir_imad(b, size, nir_imm_int(b, 1 << 9), addr16_hi);
728
729 return nir_vec3(b, addr16_lo, handle_hi, offset);
730 }
731
732 static nir_def *
load_descriptor_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)733 load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
734 const struct lower_descriptors_ctx *ctx)
735 {
736 nir_def *index = nir_imm_int(b, 0);
737
738 while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
739 index = nir_iadd(b, index, intrin->src[1].ssa);
740 intrin = nir_src_as_intrinsic(intrin->src[0]);
741 }
742
743 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
744 uint32_t set = nir_intrinsic_desc_set(intrin);
745 uint32_t binding = nir_intrinsic_binding(intrin);
746 index = nir_iadd(b, index, intrin->src[0].ssa);
747
748 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
749 if (descriptor_type_is_ubo(desc_type) && ctx->use_bindless_cbuf) {
750 nir_def *desc = load_descriptor(b, 1, 64, set, binding, index, 0, ctx);
751
752 /* The descriptor is just the handle. NIR also needs an offset. */
753 return nir_vec3(b, nir_unpack_64_2x32_split_x(b, desc),
754 nir_unpack_64_2x32_split_y(b, desc),
755 nir_imm_int(b, 0));
756 } else {
757 nir_def *desc = load_descriptor(b, 4, 32, set, binding, index, 0, ctx);
758
759 /* We know a priori that the the .w compnent (offset) is zero */
760 return nir_vec4(b, nir_channel(b, desc, 0),
761 nir_channel(b, desc, 1),
762 nir_channel(b, desc, 2),
763 nir_imm_int(b, 0));
764 }
765 }
766
767 static bool
try_lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)768 try_lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
769 const struct lower_descriptors_ctx *ctx)
770 {
771 ASSERTED const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
772 b->cursor = nir_before_instr(&intrin->instr);
773
774 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(intrin->src[0]);
775 if (idx_intrin == NULL || !is_idx_intrin(idx_intrin)) {
776 assert(descriptor_type_is_ssbo(desc_type));
777 return false;
778 }
779
780 nir_def *desc = load_descriptor_for_idx_intrin(b, idx_intrin, ctx);
781
782 nir_def_rewrite_uses(&intrin->def, desc);
783
784 return true;
785 }
786
787 static bool
_lower_sysval_to_root_table(nir_builder * b,nir_intrinsic_instr * intrin,uint32_t root_table_offset,const struct lower_descriptors_ctx * ctx)788 _lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin,
789 uint32_t root_table_offset,
790 const struct lower_descriptors_ctx *ctx)
791 {
792 b->cursor = nir_instr_remove(&intrin->instr);
793
794 nir_def *val = nir_ldc_nv(b, intrin->def.num_components,
795 intrin->def.bit_size,
796 nir_imm_int(b, 0), /* Root table */
797 nir_imm_int(b, root_table_offset),
798 .align_mul = 4,
799 .align_offset = 0);
800
801 nir_def_rewrite_uses(&intrin->def, val);
802
803 return true;
804 }
805
806 #define lower_sysval_to_root_table(b, intrin, member, ctx) \
807 _lower_sysval_to_root_table(b, intrin, \
808 nvk_root_descriptor_offset(member), \
809 ctx)
810
811 static bool
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * load,const struct lower_descriptors_ctx * ctx)812 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load,
813 const struct lower_descriptors_ctx *ctx)
814 {
815 const uint32_t push_region_offset =
816 nvk_root_descriptor_offset(push);
817 const uint32_t base = nir_intrinsic_base(load);
818
819 b->cursor = nir_before_instr(&load->instr);
820
821 nir_def *offset = nir_iadd_imm(b, load->src[0].ssa,
822 push_region_offset + base);
823
824 nir_def *val =
825 nir_ldc_nv(b, load->def.num_components, load->def.bit_size,
826 nir_imm_int(b, 0), offset,
827 .align_mul = load->def.bit_size / 8,
828 .align_offset = 0);
829
830 nir_def_rewrite_uses(&load->def, val);
831
832 return true;
833 }
834
835 static void
get_resource_deref_binding(nir_builder * b,nir_deref_instr * deref,uint32_t * set,uint32_t * binding,nir_def ** index)836 get_resource_deref_binding(nir_builder *b, nir_deref_instr *deref,
837 uint32_t *set, uint32_t *binding,
838 nir_def **index)
839 {
840 if (deref->deref_type == nir_deref_type_array) {
841 *index = deref->arr.index.ssa;
842 deref = nir_deref_instr_parent(deref);
843 } else {
844 *index = nir_imm_int(b, 0);
845 }
846
847 assert(deref->deref_type == nir_deref_type_var);
848 nir_variable *var = deref->var;
849
850 *set = var->data.descriptor_set;
851 *binding = var->data.binding;
852 }
853
854 static nir_def *
load_resource_deref_desc(nir_builder * b,unsigned num_components,unsigned bit_size,nir_deref_instr * deref,unsigned offset_B,const struct lower_descriptors_ctx * ctx)855 load_resource_deref_desc(nir_builder *b,
856 unsigned num_components, unsigned bit_size,
857 nir_deref_instr *deref, unsigned offset_B,
858 const struct lower_descriptors_ctx *ctx)
859 {
860 uint32_t set, binding;
861 nir_def *index;
862 get_resource_deref_binding(b, deref, &set, &binding, &index);
863 return load_descriptor(b, num_components, bit_size,
864 set, binding, index, offset_B, ctx);
865 }
866
867 static void
lower_msaa_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)868 lower_msaa_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
869 const struct lower_descriptors_ctx *ctx)
870 {
871 assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS);
872
873 b->cursor = nir_before_instr(&intrin->instr);
874 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
875 nir_def *desc = load_resource_deref_desc(b, 1, 32, deref, 0, ctx);
876
877 nir_def *img_index = nir_ubitfield_extract_imm(b, desc, 0, 20);
878 nir_rewrite_image_intrinsic(intrin, img_index, true);
879
880 nir_def *sw_log2 = nir_ubitfield_extract_imm(b, desc, 20, 2);
881 nir_def *sh_log2 = nir_ubitfield_extract_imm(b, desc, 22, 2);
882
883 nir_def *sw = nir_ishl(b, nir_imm_int(b, 1), sw_log2);
884 nir_def *sh = nir_ishl(b, nir_imm_int(b, 1), sh_log2);
885 nir_def *num_samples = nir_imul(b, sw, sh);
886
887 switch (intrin->intrinsic) {
888 case nir_intrinsic_bindless_image_load:
889 case nir_intrinsic_bindless_image_store:
890 case nir_intrinsic_bindless_image_atomic:
891 case nir_intrinsic_bindless_image_atomic_swap: {
892 nir_def *x = nir_channel(b, intrin->src[1].ssa, 0);
893 nir_def *y = nir_channel(b, intrin->src[1].ssa, 1);
894 nir_def *z = nir_channel(b, intrin->src[1].ssa, 2);
895 nir_def *w = nir_channel(b, intrin->src[1].ssa, 3);
896 nir_def *s = intrin->src[2].ssa;
897
898 nir_def *sw_mask = nir_iadd_imm(b, sw, -1);
899 nir_def *sx = nir_iand(b, s, sw_mask);
900 nir_def *sy = nir_ishr(b, s, sw_log2);
901
902 x = nir_imad(b, x, sw, sx);
903 y = nir_imad(b, y, sh, sy);
904
905 /* Make OOB sample indices OOB X/Y indices */
906 x = nir_bcsel(b, nir_ult(b, s, num_samples), x, nir_imm_int(b, -1));
907
908 nir_src_rewrite(&intrin->src[1], nir_vec4(b, x, y, z, w));
909 nir_src_rewrite(&intrin->src[2], nir_undef(b, 1, 32));
910 break;
911 }
912
913 case nir_intrinsic_bindless_image_size: {
914 b->cursor = nir_after_instr(&intrin->instr);
915
916 nir_def *size = &intrin->def;
917 nir_def *w = nir_channel(b, size, 0);
918 nir_def *h = nir_channel(b, size, 1);
919
920 w = nir_ushr(b, w, sw_log2);
921 h = nir_ushr(b, h, sh_log2);
922
923 size = nir_vector_insert_imm(b, size, w, 0);
924 size = nir_vector_insert_imm(b, size, h, 1);
925
926 nir_def_rewrite_uses_after(&intrin->def, size, size->parent_instr);
927 break;
928 }
929
930 case nir_intrinsic_bindless_image_samples: {
931 /* We need to handle NULL descriptors explicitly */
932 nir_def *samples =
933 nir_bcsel(b, nir_ieq(b, desc, nir_imm_int(b, 0)),
934 nir_imm_int(b, 0), num_samples);
935 nir_def_rewrite_uses(&intrin->def, samples);
936 break;
937 }
938
939 default:
940 unreachable("Unknown image intrinsic");
941 }
942
943 nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D);
944 }
945
946 static bool
is_edb_buffer_view(nir_deref_instr * deref,const struct lower_descriptors_ctx * ctx)947 is_edb_buffer_view(nir_deref_instr *deref,
948 const struct lower_descriptors_ctx *ctx)
949 {
950 if (glsl_get_sampler_dim(deref->type) != GLSL_SAMPLER_DIM_BUF)
951 return false;
952
953 if (ctx->use_edb_buffer_views)
954 return true;
955
956 nir_variable *var = nir_deref_instr_get_variable(deref);
957 uint8_t set = var->data.descriptor_set;
958
959 return ctx->set_layouts[set]->flags &
960 VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
961 }
962
963 static nir_def *
edb_buffer_view_is_null(nir_builder * b,nir_def * desc)964 edb_buffer_view_is_null(nir_builder *b, nir_def *desc)
965 {
966 assert(desc->num_components == 4);
967 nir_def *index = nir_channel(b, desc, 0);
968 return nir_ieq_imm(b, index, 0);
969 }
970
971 static nir_def *
edb_buffer_view_offset_el(nir_builder * b,nir_def * desc)972 edb_buffer_view_offset_el(nir_builder *b, nir_def *desc)
973 {
974 assert(desc->num_components == 4);
975 return nir_channel(b, desc, 1);
976 }
977
978 static nir_def *
edb_buffer_view_size_el(nir_builder * b,nir_def * desc)979 edb_buffer_view_size_el(nir_builder *b, nir_def *desc)
980 {
981 assert(desc->num_components == 4);
982 return nir_channel(b, desc, 2);
983 }
984
985 static nir_def *
edb_buffer_view_oob_alpha(nir_builder * b,nir_def * desc)986 edb_buffer_view_oob_alpha(nir_builder *b, nir_def *desc)
987 {
988 assert(desc->num_components == 4);
989 return nir_channel(b, desc, 3);
990 }
991
992 static nir_def *
edb_buffer_view_coord_is_in_bounds(nir_builder * b,nir_def * desc,nir_def * coord)993 edb_buffer_view_coord_is_in_bounds(nir_builder *b, nir_def *desc,
994 nir_def *coord)
995 {
996 assert(desc->num_components == 4);
997 return nir_ult(b, coord, edb_buffer_view_size_el(b, desc));
998 }
999
1000 static nir_def *
edb_buffer_view_index(nir_builder * b,nir_def * desc,nir_def * in_bounds)1001 edb_buffer_view_index(nir_builder *b, nir_def *desc, nir_def *in_bounds)
1002 {
1003 assert(desc->num_components == 4);
1004 nir_def *index = nir_channel(b, desc, 0);
1005
1006 /* Use the NULL descriptor for OOB access */
1007 return nir_bcsel(b, in_bounds, index, nir_imm_int(b, 0));
1008 }
1009
1010 static nir_def *
adjust_edb_buffer_view_coord(nir_builder * b,nir_def * desc,nir_def * coord)1011 adjust_edb_buffer_view_coord(nir_builder *b, nir_def *desc, nir_def *coord)
1012 {
1013 return nir_iadd(b, coord, edb_buffer_view_offset_el(b, desc));
1014 }
1015
1016 static nir_def *
fixup_edb_buffer_view_result(nir_builder * b,nir_def * desc,nir_def * in_bounds,nir_def * res,nir_alu_type dest_type)1017 fixup_edb_buffer_view_result(nir_builder *b, nir_def *desc, nir_def *in_bounds,
1018 nir_def *res, nir_alu_type dest_type)
1019 {
1020 if (res->num_components < 4)
1021 return res;
1022
1023 nir_def *is_null = edb_buffer_view_is_null(b, desc);
1024 nir_def *oob_alpha = edb_buffer_view_oob_alpha(b, desc);
1025
1026 nir_def *a = nir_channel(b, res, 3);
1027 a = nir_bcsel(b, nir_ior(b, in_bounds, is_null), a, oob_alpha);
1028 return nir_vector_insert_imm(b, res, a, 3);
1029 }
1030
1031 static void
lower_edb_buffer_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1032 lower_edb_buffer_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1033 const struct lower_descriptors_ctx *ctx)
1034 {
1035 assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF);
1036
1037 b->cursor = nir_before_instr(&intrin->instr);
1038 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1039 nir_def *desc = load_resource_deref_desc(b, 4, 32, deref, 0, ctx);
1040
1041 switch (intrin->intrinsic) {
1042 case nir_intrinsic_image_deref_load:
1043 case nir_intrinsic_image_deref_store:
1044 case nir_intrinsic_image_deref_atomic:
1045 case nir_intrinsic_image_deref_atomic_swap: {
1046 nir_def *pos = intrin->src[1].ssa;
1047 nir_def *x = nir_channel(b, pos, 0);
1048
1049 nir_def *in_bounds = edb_buffer_view_coord_is_in_bounds(b, desc, x);
1050 nir_def *index = edb_buffer_view_index(b, desc, in_bounds);
1051
1052 nir_def *new_x = adjust_edb_buffer_view_coord(b, desc, x);
1053 pos = nir_vector_insert_imm(b, pos, new_x, 0);
1054 nir_src_rewrite(&intrin->src[1], pos);
1055
1056 if (intrin->intrinsic == nir_intrinsic_image_deref_load) {
1057 b->cursor = nir_after_instr(&intrin->instr);
1058 nir_def *res = &intrin->def;
1059 res = fixup_edb_buffer_view_result(b, desc, in_bounds, res,
1060 nir_intrinsic_dest_type(intrin));
1061 nir_def_rewrite_uses_after(&intrin->def, res, res->parent_instr);
1062 }
1063
1064 nir_rewrite_image_intrinsic(intrin, index, true);
1065 break;
1066 }
1067
1068 case nir_intrinsic_image_deref_size: {
1069 assert(intrin->def.num_components == 1);
1070 nir_def *size_el = nir_channel(b, desc, 2);
1071 nir_def_rewrite_uses(&intrin->def, size_el);
1072 break;
1073 }
1074
1075 default:
1076 unreachable("Unknown image intrinsic");
1077 }
1078 }
1079
1080 static bool
lower_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1081 lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1082 const struct lower_descriptors_ctx *ctx)
1083 {
1084 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1085
1086 if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_MS) {
1087 lower_msaa_image_intrin(b, intrin, ctx);
1088 } else if (is_edb_buffer_view(deref, ctx)) {
1089 lower_edb_buffer_image_intrin(b, intrin, ctx);
1090 } else {
1091 b->cursor = nir_before_instr(&intrin->instr);
1092 nir_def *desc = load_resource_deref_desc(b, 1, 32, deref, 0, ctx);
1093 nir_rewrite_image_intrinsic(intrin, desc, true);
1094 }
1095
1096 return true;
1097 }
1098
1099 static bool
lower_interp_at_sample(nir_builder * b,nir_intrinsic_instr * interp,const struct lower_descriptors_ctx * ctx)1100 lower_interp_at_sample(nir_builder *b, nir_intrinsic_instr *interp,
1101 const struct lower_descriptors_ctx *ctx)
1102 {
1103 const uint32_t root_table_offset =
1104 nvk_root_descriptor_offset(draw.sample_locations);
1105
1106 nir_def *sample = interp->src[1].ssa;
1107
1108 b->cursor = nir_before_instr(&interp->instr);
1109
1110 nir_def *loc = nir_ldc_nv(b, 1, 64,
1111 nir_imm_int(b, 0), /* Root table */
1112 nir_imm_int(b, root_table_offset),
1113 .align_mul = 8,
1114 .align_offset = 0);
1115
1116 /* Yay little endian */
1117 loc = nir_ushr(b, loc, nir_imul_imm(b, sample, 8));
1118 nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
1119 nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
1120 nir_def *loc_u4 = nir_vec2(b, loc_x_u4, loc_y_u4);
1121 nir_def *loc_f = nir_fmul_imm(b, nir_i2f32(b, loc_u4), 1.0 / 16.0);
1122 nir_def *offset = nir_fadd_imm(b, loc_f, -0.5);
1123
1124 assert(interp->intrinsic == nir_intrinsic_interp_deref_at_sample);
1125 interp->intrinsic = nir_intrinsic_interp_deref_at_offset;
1126 nir_src_rewrite(&interp->src[1], offset);
1127
1128 return true;
1129 }
1130
1131 static bool
try_lower_intrin(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1132 try_lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
1133 const struct lower_descriptors_ctx *ctx)
1134 {
1135 switch (intrin->intrinsic) {
1136 case nir_intrinsic_load_constant:
1137 return lower_load_constant(b, intrin, ctx);
1138
1139 case nir_intrinsic_load_vulkan_descriptor:
1140 return try_lower_load_vulkan_descriptor(b, intrin, ctx);
1141
1142 case nir_intrinsic_load_workgroup_size:
1143 unreachable("Should have been lowered by nir_lower_cs_intrinsics()");
1144
1145 case nir_intrinsic_load_num_workgroups:
1146 return lower_sysval_to_root_table(b, intrin, cs.group_count, ctx);
1147
1148 case nir_intrinsic_load_base_workgroup_id:
1149 return lower_sysval_to_root_table(b, intrin, cs.base_group, ctx);
1150
1151 case nir_intrinsic_load_push_constant:
1152 return lower_load_push_constant(b, intrin, ctx);
1153
1154 case nir_intrinsic_load_base_vertex:
1155 case nir_intrinsic_load_first_vertex:
1156 return lower_sysval_to_root_table(b, intrin, draw.base_vertex, ctx);
1157
1158 case nir_intrinsic_load_base_instance:
1159 return lower_sysval_to_root_table(b, intrin, draw.base_instance, ctx);
1160
1161 case nir_intrinsic_load_draw_id:
1162 return lower_sysval_to_root_table(b, intrin, draw.draw_index, ctx);
1163
1164 case nir_intrinsic_load_view_index:
1165 return lower_sysval_to_root_table(b, intrin, draw.view_index, ctx);
1166
1167 case nir_intrinsic_image_deref_load:
1168 case nir_intrinsic_image_deref_sparse_load:
1169 case nir_intrinsic_image_deref_store:
1170 case nir_intrinsic_image_deref_atomic:
1171 case nir_intrinsic_image_deref_atomic_swap:
1172 case nir_intrinsic_image_deref_size:
1173 case nir_intrinsic_image_deref_samples:
1174 return lower_image_intrin(b, intrin, ctx);
1175
1176 case nir_intrinsic_interp_deref_at_sample:
1177 return lower_interp_at_sample(b, intrin, ctx);
1178
1179 default:
1180 return false;
1181 }
1182 }
1183
1184 static void
lower_edb_buffer_tex_instr(nir_builder * b,nir_tex_instr * tex,const struct lower_descriptors_ctx * ctx)1185 lower_edb_buffer_tex_instr(nir_builder *b, nir_tex_instr *tex,
1186 const struct lower_descriptors_ctx *ctx)
1187 {
1188 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_BUF);
1189
1190 b->cursor = nir_before_instr(&tex->instr);
1191
1192 const int texture_src_idx =
1193 nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1194 nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src);
1195
1196 nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
1197 ASSERTED const uint32_t plane =
1198 plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
1199 assert(plane == 0);
1200
1201 nir_def *desc = load_resource_deref_desc(b, 4, 32, texture, 0, ctx);
1202
1203 switch (tex->op) {
1204 case nir_texop_txf: {
1205 const int coord_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1206 assert(coord_src_idx >= 0);
1207 nir_def *coord = tex->src[coord_src_idx].src.ssa;
1208
1209 nir_def *in_bounds = edb_buffer_view_coord_is_in_bounds(b, desc, coord);
1210
1211 nir_def *index = edb_buffer_view_index(b, desc, in_bounds);
1212 nir_src_rewrite(&tex->src[texture_src_idx].src, index);
1213 tex->src[texture_src_idx].src_type = nir_tex_src_texture_handle;
1214
1215 nir_def *new_coord = adjust_edb_buffer_view_coord(b, desc, coord);
1216 nir_src_rewrite(&tex->src[coord_src_idx].src, new_coord);
1217
1218 b->cursor = nir_after_instr(&tex->instr);
1219 nir_def *res = &tex->def;
1220 res = fixup_edb_buffer_view_result(b, desc, in_bounds,
1221 res, tex->dest_type);
1222 nir_def_rewrite_uses_after(&tex->def, res, res->parent_instr);
1223 break;
1224 }
1225
1226 case nir_texop_txs: {
1227 assert(tex->def.num_components == 1);
1228 nir_def *size_el = edb_buffer_view_size_el(b, desc);
1229 nir_def_rewrite_uses(&tex->def, size_el);
1230 break;
1231 }
1232
1233 default:
1234 unreachable("Invalid buffer texture op");
1235 }
1236 }
1237
1238 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,const struct lower_descriptors_ctx * ctx)1239 lower_tex(nir_builder *b, nir_tex_instr *tex,
1240 const struct lower_descriptors_ctx *ctx)
1241 {
1242 const int texture_src_idx =
1243 nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1244 const int sampler_src_idx =
1245 nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
1246 if (texture_src_idx < 0) {
1247 assert(sampler_src_idx < 0);
1248 return false;
1249 }
1250
1251 nir_deref_instr *texture = nir_src_as_deref(tex->src[texture_src_idx].src);
1252 nir_deref_instr *sampler = sampler_src_idx < 0 ? NULL :
1253 nir_src_as_deref(tex->src[sampler_src_idx].src);
1254 assert(texture);
1255
1256 if (is_edb_buffer_view(texture, ctx)) {
1257 lower_edb_buffer_tex_instr(b, tex, ctx);
1258 return true;
1259 }
1260
1261 b->cursor = nir_before_instr(&tex->instr);
1262
1263 nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
1264 const uint32_t plane =
1265 plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
1266 const uint64_t plane_offset_B =
1267 plane * sizeof(struct nvk_sampled_image_descriptor);
1268
1269 nir_def *texture_desc =
1270 load_resource_deref_desc(b, 1, 32, texture, plane_offset_B, ctx);
1271
1272 nir_def *combined_handle;
1273 if (texture == sampler) {
1274 combined_handle = texture_desc;
1275 } else {
1276 combined_handle = nir_iand_imm(b, texture_desc,
1277 NVK_IMAGE_DESCRIPTOR_IMAGE_INDEX_MASK);
1278
1279 if (sampler != NULL) {
1280 nir_def *sampler_desc =
1281 load_resource_deref_desc(b, 1, 32, sampler, plane_offset_B, ctx);
1282 nir_def *sampler_index =
1283 nir_iand_imm(b, sampler_desc,
1284 NVK_IMAGE_DESCRIPTOR_SAMPLER_INDEX_MASK);
1285 combined_handle = nir_ior(b, combined_handle, sampler_index);
1286 }
1287 }
1288
1289 /* TODO: The nv50 back-end assumes it's 64-bit because of GL */
1290 combined_handle = nir_u2u64(b, combined_handle);
1291
1292 /* TODO: The nv50 back-end assumes it gets handles both places, even for
1293 * texelFetch.
1294 */
1295 nir_src_rewrite(&tex->src[texture_src_idx].src, combined_handle);
1296 tex->src[texture_src_idx].src_type = nir_tex_src_texture_handle;
1297
1298 if (sampler_src_idx < 0) {
1299 nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, combined_handle);
1300 } else {
1301 nir_src_rewrite(&tex->src[sampler_src_idx].src, combined_handle);
1302 tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
1303 }
1304
1305 /* On pre-Volta hardware, we don't have real null descriptors. Null
1306 * descriptors work well enough for sampling but they may not return the
1307 * correct query results.
1308 */
1309 if (ctx->dev_info->cls_eng3d < VOLTA_A && nir_tex_instr_is_query(tex)) {
1310 b->cursor = nir_after_instr(&tex->instr);
1311
1312 /* This should get CSE'd with the earlier load */
1313 nir_def *texture_handle =
1314 nir_iand_imm(b, texture_desc, NVK_IMAGE_DESCRIPTOR_IMAGE_INDEX_MASK);
1315 nir_def *is_null = nir_ieq_imm(b, texture_handle, 0);
1316 nir_def *zero = nir_imm_zero(b, tex->def.num_components,
1317 tex->def.bit_size);
1318 nir_def *res = nir_bcsel(b, is_null, zero, &tex->def);
1319 nir_def_rewrite_uses_after(&tex->def, res, res->parent_instr);
1320 }
1321
1322 return true;
1323 }
1324
1325 static bool
try_lower_descriptors_instr(nir_builder * b,nir_instr * instr,void * _data)1326 try_lower_descriptors_instr(nir_builder *b, nir_instr *instr,
1327 void *_data)
1328 {
1329 const struct lower_descriptors_ctx *ctx = _data;
1330
1331 switch (instr->type) {
1332 case nir_instr_type_tex:
1333 return lower_tex(b, nir_instr_as_tex(instr), ctx);
1334 case nir_instr_type_intrinsic:
1335 return try_lower_intrin(b, nir_instr_as_intrinsic(instr), ctx);
1336 default:
1337 return false;
1338 }
1339 }
1340
1341 #define ROOT_DESC_BASE_ADDR_HI 0x0057de3c
1342
1343 static bool
lower_ssbo_resource_index(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1344 lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin,
1345 const struct lower_descriptors_ctx *ctx)
1346 {
1347 if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1348 return false;
1349
1350 b->cursor = nir_instr_remove(&intrin->instr);
1351
1352 uint32_t set = nir_intrinsic_desc_set(intrin);
1353 uint32_t binding = nir_intrinsic_binding(intrin);
1354 nir_def *index = intrin->src[0].ssa;
1355
1356 const struct nvk_descriptor_set_binding_layout *binding_layout =
1357 get_binding_layout(set, binding, ctx);
1358
1359 nir_def *binding_addr;
1360 uint8_t binding_stride;
1361 switch (binding_layout->type) {
1362 case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
1363 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
1364 nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
1365 binding_addr = nir_iadd_imm(b, set_addr, binding_layout->offset);
1366 binding_stride = binding_layout->stride;
1367 break;
1368 }
1369
1370 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
1371 nir_def *dynamic_buffer_start =
1372 nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx),
1373 binding_layout->dynamic_buffer_index);
1374
1375 nir_def *dynamic_binding_offset =
1376 nir_iadd_imm(b, nir_imul_imm(b, dynamic_buffer_start,
1377 sizeof(struct nvk_buffer_address)),
1378 nvk_root_descriptor_offset(dynamic_buffers));
1379
1380 binding_addr =
1381 nir_pack_64_2x32_split(b, dynamic_binding_offset,
1382 nir_imm_int(b, ROOT_DESC_BASE_ADDR_HI));
1383 binding_stride = sizeof(struct nvk_buffer_address);
1384 break;
1385 }
1386
1387 default:
1388 unreachable("Not an SSBO descriptor");
1389 }
1390
1391 /* Tuck the stride in the top 8 bits of the binding address */
1392 binding_addr = nir_ior_imm(b, binding_addr, (uint64_t)binding_stride << 56);
1393
1394 const uint32_t binding_size = binding_layout->array_size * binding_stride;
1395 nir_def *offset_in_binding = nir_imul_imm(b, index, binding_stride);
1396
1397 /* We depend on this when we load descrptors */
1398 assert(binding_layout->array_size >= 1);
1399
1400 nir_def *addr;
1401 switch (ctx->ssbo_addr_format) {
1402 case nir_address_format_64bit_global_32bit_offset:
1403 case nir_address_format_64bit_bounded_global:
1404 addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, binding_addr),
1405 nir_unpack_64_2x32_split_y(b, binding_addr),
1406 nir_imm_int(b, binding_size),
1407 offset_in_binding);
1408 break;
1409
1410 default:
1411 unreachable("Unknown address mode");
1412 }
1413
1414 nir_def_rewrite_uses(&intrin->def, addr);
1415
1416 return true;
1417 }
1418
1419 static bool
lower_ssbo_resource_reindex(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1420 lower_ssbo_resource_reindex(nir_builder *b, nir_intrinsic_instr *intrin,
1421 const struct lower_descriptors_ctx *ctx)
1422 {
1423 if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1424 return false;
1425
1426 b->cursor = nir_instr_remove(&intrin->instr);
1427
1428 nir_def *addr = intrin->src[0].ssa;
1429 nir_def *index = intrin->src[1].ssa;
1430
1431 nir_def *addr_high32;
1432 switch (ctx->ssbo_addr_format) {
1433 case nir_address_format_64bit_global_32bit_offset:
1434 case nir_address_format_64bit_bounded_global:
1435 addr_high32 = nir_channel(b, addr, 1);
1436 break;
1437
1438 default:
1439 unreachable("Unknown address mode");
1440 }
1441
1442 nir_def *stride = nir_ushr_imm(b, addr_high32, 24);
1443 nir_def *offset = nir_imul(b, index, stride);
1444
1445 addr = nir_build_addr_iadd(b, addr, ctx->ssbo_addr_format,
1446 nir_var_mem_ssbo, offset);
1447 nir_def_rewrite_uses(&intrin->def, addr);
1448
1449 return true;
1450 }
1451
1452 static bool
lower_load_ssbo_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,const struct lower_descriptors_ctx * ctx)1453 lower_load_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
1454 const struct lower_descriptors_ctx *ctx)
1455 {
1456 if (!descriptor_type_is_ssbo(nir_intrinsic_desc_type(intrin)))
1457 return false;
1458
1459 b->cursor = nir_instr_remove(&intrin->instr);
1460
1461 nir_def *addr = intrin->src[0].ssa;
1462
1463 nir_def *base, *offset, *size = NULL;
1464 switch (ctx->ssbo_addr_format) {
1465 case nir_address_format_64bit_global_32bit_offset: {
1466 base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1467 offset = nir_channel(b, addr, 3);
1468 break;
1469 }
1470
1471 case nir_address_format_64bit_bounded_global: {
1472 base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1473 size = nir_channel(b, addr, 2);
1474 offset = nir_channel(b, addr, 3);
1475 break;
1476 }
1477
1478 default:
1479 unreachable("Unknown address mode");
1480 }
1481
1482 /* Mask off the binding stride */
1483 base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
1484
1485 nir_def *base_lo = nir_unpack_64_2x32_split_x(b, base);
1486 nir_def *base_hi = nir_unpack_64_2x32_split_y(b, base);
1487
1488 nir_def *desc_root, *desc_global;
1489 nir_push_if(b, nir_ieq_imm(b, base_hi, ROOT_DESC_BASE_ADDR_HI));
1490 {
1491 desc_root = nir_load_ubo(b, 4, 32, nir_imm_int(b, 0),
1492 nir_iadd(b, base_lo, offset),
1493 .align_mul = 16, .align_offset = 0,
1494 .range = ~0);
1495 if (size != NULL) {
1496 /* assert(binding_layout->array_size >= 1); */
1497 nir_def *is_oob = nir_ult(b, nir_iadd_imm(b, size, -16), offset);
1498 desc_root = nir_bcsel(b, is_oob, nir_imm_zero(b, 4, 32), desc_root);
1499 }
1500 }
1501 nir_push_else(b, NULL);
1502 {
1503 if (size != NULL) {
1504 desc_global = nir_load_global_constant_bounded(b, 4, 32, base,
1505 offset, size,
1506 .align_mul = 16,
1507 .align_offset = 0);
1508 } else {
1509 desc_global = nir_load_global_constant_offset(b, 4, 32, base,
1510 offset,
1511 .align_mul = 16,
1512 .align_offset = 0);
1513 }
1514 }
1515 nir_pop_if(b, NULL);
1516 nir_def *desc = nir_if_phi(b, desc_root, desc_global);
1517
1518 nir_def_rewrite_uses(&intrin->def, desc);
1519
1520 return true;
1521 }
1522
1523 static bool
lower_ssbo_descriptor_instr(nir_builder * b,nir_instr * instr,void * _data)1524 lower_ssbo_descriptor_instr(nir_builder *b, nir_instr *instr,
1525 void *_data)
1526 {
1527 const struct lower_descriptors_ctx *ctx = _data;
1528
1529 if (instr->type != nir_instr_type_intrinsic)
1530 return false;
1531
1532 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1533 switch (intrin->intrinsic) {
1534 case nir_intrinsic_vulkan_resource_index:
1535 return lower_ssbo_resource_index(b, intrin, ctx);
1536 case nir_intrinsic_vulkan_resource_reindex:
1537 return lower_ssbo_resource_reindex(b, intrin, ctx);
1538 case nir_intrinsic_load_vulkan_descriptor:
1539 return lower_load_ssbo_descriptor(b, intrin, ctx);
1540 default:
1541 return false;
1542 }
1543 }
1544
1545 bool
nvk_nir_lower_descriptors(nir_shader * nir,const struct nvk_physical_device * pdev,const struct vk_pipeline_robustness_state * rs,uint32_t set_layout_count,struct vk_descriptor_set_layout * const * set_layouts,struct nvk_cbuf_map * cbuf_map_out)1546 nvk_nir_lower_descriptors(nir_shader *nir,
1547 const struct nvk_physical_device *pdev,
1548 const struct vk_pipeline_robustness_state *rs,
1549 uint32_t set_layout_count,
1550 struct vk_descriptor_set_layout * const *set_layouts,
1551 struct nvk_cbuf_map *cbuf_map_out)
1552 {
1553 struct lower_descriptors_ctx ctx = {
1554 .dev_info = &pdev->info,
1555 .use_bindless_cbuf = nvk_use_bindless_cbuf(&pdev->info),
1556 .use_edb_buffer_views = nvk_use_edb_buffer_views(pdev),
1557 .clamp_desc_array_bounds =
1558 rs->storage_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1559 rs->uniform_buffers != VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
1560 rs->images != VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT,
1561 .ssbo_addr_format = nvk_ssbo_addr_format(pdev, rs),
1562 .ubo_addr_format = nvk_ubo_addr_format(pdev, rs),
1563 };
1564
1565 assert(set_layout_count <= NVK_MAX_SETS);
1566 for (uint32_t s = 0; s < set_layout_count; s++) {
1567 if (set_layouts[s] != NULL)
1568 ctx.set_layouts[s] = vk_to_nvk_descriptor_set_layout(set_layouts[s]);
1569 }
1570
1571 /* We run in four passes:
1572 *
1573 * 1. Find ranges of UBOs that we can promote to bound UBOs. Nothing is
1574 * actually lowered in this pass. It's just analysis.
1575 *
1576 * 2. Try to lower UBO loads to cbufs based on the map we just created.
1577 * We need to do this before the main lowering pass because it relies
1578 * on the original descriptor load intrinsics.
1579 *
1580 * 3. Attempt to lower everything with direct descriptors. This may fail
1581 * to lower some SSBO intrinsics when variable pointers are used.
1582 *
1583 * 4. Clean up any SSBO intrinsics which are left and lower them to
1584 * slightly less efficient but variable- pointers-correct versions.
1585 */
1586
1587 bool pass_lower_ubo = false;
1588 if (cbuf_map_out != NULL) {
1589 ctx.cbuf_map = cbuf_map_out;
1590 build_cbuf_map(nir, &ctx);
1591
1592 pass_lower_ubo =
1593 nir_shader_intrinsics_pass(nir, lower_load_ubo_intrin,
1594 nir_metadata_control_flow,
1595 (void *)&ctx);
1596 }
1597
1598 bool pass_lower_descriptors =
1599 nir_shader_instructions_pass(nir, try_lower_descriptors_instr,
1600 nir_metadata_control_flow,
1601 (void *)&ctx);
1602 bool pass_lower_ssbo =
1603 nir_shader_instructions_pass(nir, lower_ssbo_descriptor_instr,
1604 nir_metadata_control_flow,
1605 (void *)&ctx);
1606 return pass_lower_ubo || pass_lower_descriptors || pass_lower_ssbo;
1607 }
1608