1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_nir.h"
25 #include "nir/nir_builder.h"
26 #include "compiler/elk/elk_nir.h"
27 #include "util/mesa-sha1.h"
28 #include "util/set.h"
29
30 /* Sampler tables don't actually have a maximum size but we pick one just so
31 * that we don't end up emitting too much state on-the-fly.
32 */
33 #define MAX_SAMPLER_TABLE_SIZE 128
34 #define BINDLESS_OFFSET 255
35
36 #define sizeof_field(type, field) sizeof(((type *)0)->field)
37
38 struct apply_pipeline_layout_state {
39 const struct anv_physical_device *pdevice;
40
41 const struct anv_pipeline_layout *layout;
42 nir_address_format ssbo_addr_format;
43 nir_address_format ubo_addr_format;
44
45 /* Place to flag lowered instructions so we don't lower them twice */
46 struct set *lowered_instrs;
47
48 bool uses_constants;
49 bool has_dynamic_buffers;
50 uint8_t constants_offset;
51 struct {
52 bool desc_buffer_used;
53 uint8_t desc_offset;
54
55 uint8_t *use_count;
56 uint8_t *surface_offsets;
57 uint8_t *sampler_offsets;
58 } set[MAX_SETS];
59 };
60
61 static nir_address_format
addr_format_for_desc_type(VkDescriptorType desc_type,struct apply_pipeline_layout_state * state)62 addr_format_for_desc_type(VkDescriptorType desc_type,
63 struct apply_pipeline_layout_state *state)
64 {
65 switch (desc_type) {
66 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
67 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
68 return state->ssbo_addr_format;
69
70 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
71 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
72 return state->ubo_addr_format;
73
74 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
75 return nir_address_format_32bit_index_offset;
76
77 default:
78 unreachable("Unsupported descriptor type");
79 }
80 }
81
82 static void
add_binding(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)83 add_binding(struct apply_pipeline_layout_state *state,
84 uint32_t set, uint32_t binding)
85 {
86 const struct anv_descriptor_set_binding_layout *bind_layout =
87 &state->layout->set[set].layout->binding[binding];
88
89 if (state->set[set].use_count[binding] < UINT8_MAX)
90 state->set[set].use_count[binding]++;
91
92 /* Only flag the descriptor buffer as used if there's actually data for
93 * this binding. This lets us be lazy and call this function constantly
94 * without worrying about unnecessarily enabling the buffer.
95 */
96 if (bind_layout->descriptor_stride)
97 state->set[set].desc_buffer_used = true;
98 }
99
100 static void
add_deref_src_binding(struct apply_pipeline_layout_state * state,nir_src src)101 add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
102 {
103 nir_deref_instr *deref = nir_src_as_deref(src);
104 nir_variable *var = nir_deref_instr_get_variable(deref);
105 add_binding(state, var->data.descriptor_set, var->data.binding);
106 }
107
108 static void
add_tex_src_binding(struct apply_pipeline_layout_state * state,nir_tex_instr * tex,nir_tex_src_type deref_src_type)109 add_tex_src_binding(struct apply_pipeline_layout_state *state,
110 nir_tex_instr *tex, nir_tex_src_type deref_src_type)
111 {
112 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
113 if (deref_src_idx < 0)
114 return;
115
116 add_deref_src_binding(state, tex->src[deref_src_idx].src);
117 }
118
119 static bool
get_used_bindings(UNUSED nir_builder * _b,nir_instr * instr,void * _state)120 get_used_bindings(UNUSED nir_builder *_b, nir_instr *instr, void *_state)
121 {
122 struct apply_pipeline_layout_state *state = _state;
123
124 switch (instr->type) {
125 case nir_instr_type_intrinsic: {
126 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
127 switch (intrin->intrinsic) {
128 case nir_intrinsic_vulkan_resource_index:
129 add_binding(state, nir_intrinsic_desc_set(intrin),
130 nir_intrinsic_binding(intrin));
131 break;
132
133 case nir_intrinsic_image_deref_load:
134 case nir_intrinsic_image_deref_store:
135 case nir_intrinsic_image_deref_atomic:
136 case nir_intrinsic_image_deref_atomic_swap:
137 case nir_intrinsic_image_deref_size:
138 case nir_intrinsic_image_deref_samples:
139 case nir_intrinsic_image_deref_load_param_intel:
140 case nir_intrinsic_image_deref_load_raw_intel:
141 case nir_intrinsic_image_deref_store_raw_intel:
142 add_deref_src_binding(state, intrin->src[0]);
143 break;
144
145 case nir_intrinsic_load_constant:
146 state->uses_constants = true;
147 break;
148
149 default:
150 break;
151 }
152 break;
153 }
154 case nir_instr_type_tex: {
155 nir_tex_instr *tex = nir_instr_as_tex(instr);
156 add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
157 add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
158 break;
159 }
160 default:
161 break;
162 }
163
164 return false;
165 }
166
167 static nir_intrinsic_instr *
find_descriptor_for_index_src(nir_src src,struct apply_pipeline_layout_state * state)168 find_descriptor_for_index_src(nir_src src,
169 struct apply_pipeline_layout_state *state)
170 {
171 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
172
173 while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex)
174 intrin = nir_src_as_intrinsic(intrin->src[0]);
175
176 if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
177 return NULL;
178
179 return intrin;
180 }
181
182 static bool
descriptor_has_bti(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)183 descriptor_has_bti(nir_intrinsic_instr *intrin,
184 struct apply_pipeline_layout_state *state)
185 {
186 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
187
188 uint32_t set = nir_intrinsic_desc_set(intrin);
189 uint32_t binding = nir_intrinsic_binding(intrin);
190 const struct anv_descriptor_set_binding_layout *bind_layout =
191 &state->layout->set[set].layout->binding[binding];
192
193 uint32_t surface_index;
194 if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM)
195 surface_index = state->set[set].desc_offset;
196 else
197 surface_index = state->set[set].surface_offsets[binding];
198
199 /* Only lower to a BTI message if we have a valid binding table index. */
200 return surface_index < MAX_BINDING_TABLE_SIZE;
201 }
202
203 static nir_address_format
descriptor_address_format(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)204 descriptor_address_format(nir_intrinsic_instr *intrin,
205 struct apply_pipeline_layout_state *state)
206 {
207 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
208
209 return addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
210 }
211
212 static nir_intrinsic_instr *
nir_deref_find_descriptor(nir_deref_instr * deref,struct apply_pipeline_layout_state * state)213 nir_deref_find_descriptor(nir_deref_instr *deref,
214 struct apply_pipeline_layout_state *state)
215 {
216 while (1) {
217 /* Nothing we will use this on has a variable */
218 assert(deref->deref_type != nir_deref_type_var);
219
220 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
221 if (!parent)
222 break;
223
224 deref = parent;
225 }
226 assert(deref->deref_type == nir_deref_type_cast);
227
228 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent);
229 if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
230 return false;
231
232 return find_descriptor_for_index_src(intrin->src[0], state);
233 }
234
235 static nir_def *
build_load_descriptor_mem(nir_builder * b,nir_def * desc_addr,unsigned desc_offset,unsigned num_components,unsigned bit_size,struct apply_pipeline_layout_state * state)236 build_load_descriptor_mem(nir_builder *b,
237 nir_def *desc_addr, unsigned desc_offset,
238 unsigned num_components, unsigned bit_size,
239 struct apply_pipeline_layout_state *state)
240
241 {
242 nir_def *surface_index = nir_channel(b, desc_addr, 0);
243 nir_def *offset32 =
244 nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset);
245
246 return nir_load_ubo(b, num_components, bit_size,
247 surface_index, offset32,
248 .align_mul = 8,
249 .align_offset = desc_offset % 8,
250 .range_base = 0,
251 .range = ~0);
252 }
253
254 /** Build a Vulkan resource index
255 *
256 * A "resource index" is the term used by our SPIR-V parser and the relevant
257 * NIR intrinsics for a reference into a descriptor set. It acts much like a
258 * deref in NIR except that it accesses opaque descriptors instead of memory.
259 *
260 * Coming out of SPIR-V, both the resource indices (in the form of
261 * vulkan_resource_[re]index intrinsics) and the memory derefs (in the form
262 * of nir_deref_instr) use the same vector component/bit size. The meaning
263 * of those values for memory derefs (nir_deref_instr) is given by the
264 * nir_address_format associated with the descriptor type. For resource
265 * indices, it's an entirely internal to ANV encoding which describes, in some
266 * sense, the address of the descriptor. Thanks to the NIR/SPIR-V rules, it
267 * must be packed into the same size SSA values as a memory address. For this
268 * reason, the actual encoding may depend both on the address format for
269 * memory derefs and the descriptor address format.
270 *
271 * The load_vulkan_descriptor intrinsic exists to provide a transition point
272 * between these two forms of derefs: descriptor and memory.
273 */
274 static nir_def *
build_res_index(nir_builder * b,uint32_t set,uint32_t binding,nir_def * array_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)275 build_res_index(nir_builder *b, uint32_t set, uint32_t binding,
276 nir_def *array_index, nir_address_format addr_format,
277 struct apply_pipeline_layout_state *state)
278 {
279 const struct anv_descriptor_set_binding_layout *bind_layout =
280 &state->layout->set[set].layout->binding[binding];
281
282 uint32_t array_size = bind_layout->array_size;
283
284 switch (addr_format) {
285 case nir_address_format_64bit_global_32bit_offset:
286 case nir_address_format_64bit_bounded_global: {
287 assert(state->set[set].desc_offset < MAX_BINDING_TABLE_SIZE);
288 uint32_t set_idx = state->set[set].desc_offset;
289
290 assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS);
291 uint32_t dynamic_offset_index = 0xff; /* No dynamic offset */
292 if (bind_layout->dynamic_offset_index >= 0) {
293 dynamic_offset_index =
294 state->layout->set[set].dynamic_offset_start +
295 bind_layout->dynamic_offset_index;
296 }
297
298 const uint32_t packed = (bind_layout->descriptor_stride << 16) | (set_idx << 8) | dynamic_offset_index;
299
300 return nir_vec4(b, nir_imm_int(b, packed),
301 nir_imm_int(b, bind_layout->descriptor_offset),
302 nir_imm_int(b, array_size - 1),
303 array_index);
304 }
305
306 case nir_address_format_32bit_index_offset: {
307 if (bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
308 uint32_t surface_index = state->set[set].desc_offset;
309 return nir_imm_ivec2(b, surface_index,
310 bind_layout->descriptor_offset);
311 } else {
312 uint32_t surface_index = state->set[set].surface_offsets[binding];
313 assert(array_size > 0 && array_size <= UINT16_MAX);
314 assert(surface_index <= UINT16_MAX);
315 uint32_t packed = ((array_size - 1) << 16) | surface_index;
316 return nir_vec2(b, array_index, nir_imm_int(b, packed));
317 }
318 }
319
320 default:
321 unreachable("Unsupported address format");
322 }
323 }
324
325 struct res_index_defs {
326 nir_def *set_idx;
327 nir_def *dyn_offset_base;
328 nir_def *desc_offset_base;
329 nir_def *array_index;
330 nir_def *desc_stride;
331 };
332
333 static struct res_index_defs
unpack_res_index(nir_builder * b,nir_def * index)334 unpack_res_index(nir_builder *b, nir_def *index)
335 {
336 struct res_index_defs defs;
337
338 nir_def *packed = nir_channel(b, index, 0);
339 defs.desc_stride = nir_extract_u8(b, packed, nir_imm_int(b, 2));
340 defs.set_idx = nir_extract_u8(b, packed, nir_imm_int(b, 1));
341 defs.dyn_offset_base = nir_extract_u8(b, packed, nir_imm_int(b, 0));
342
343 defs.desc_offset_base = nir_channel(b, index, 1);
344 defs.array_index = nir_umin(b, nir_channel(b, index, 2),
345 nir_channel(b, index, 3));
346
347 return defs;
348 }
349
350 /** Adjust a Vulkan resource index
351 *
352 * This is the equivalent of nir_deref_type_ptr_as_array for resource indices.
353 * For array descriptors, it allows us to adjust the array index. Thanks to
354 * variable pointers, we cannot always fold this re-index operation into the
355 * vulkan_resource_index intrinsic and we have to do it based on nothing but
356 * the address format.
357 */
358 static nir_def *
build_res_reindex(nir_builder * b,nir_def * orig,nir_def * delta,nir_address_format addr_format)359 build_res_reindex(nir_builder *b, nir_def *orig, nir_def *delta,
360 nir_address_format addr_format)
361 {
362 switch (addr_format) {
363 case nir_address_format_64bit_global_32bit_offset:
364 case nir_address_format_64bit_bounded_global:
365 return nir_vec4(b, nir_channel(b, orig, 0),
366 nir_channel(b, orig, 1),
367 nir_channel(b, orig, 2),
368 nir_iadd(b, nir_channel(b, orig, 3), delta));
369
370 case nir_address_format_32bit_index_offset:
371 return nir_vec2(b, nir_iadd(b, nir_channel(b, orig, 0), delta),
372 nir_channel(b, orig, 1));
373
374 default:
375 unreachable("Unhandled address format");
376 }
377 }
378
379 /** Get the address for a descriptor given its resource index
380 *
381 * Because of the re-indexing operations, we can't bounds check descriptor
382 * array access until we have the final index. That means we end up doing the
383 * bounds check here, if needed. See unpack_res_index() for more details.
384 *
385 * This function takes both a bind_layout and a desc_type which are used to
386 * determine the descriptor stride for array descriptors. The bind_layout is
387 * optional for buffer descriptor types.
388 */
389 static nir_def *
build_desc_addr(nir_builder * b,const struct anv_descriptor_set_binding_layout * bind_layout,const VkDescriptorType desc_type,nir_def * index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)390 build_desc_addr(nir_builder *b,
391 const struct anv_descriptor_set_binding_layout *bind_layout,
392 const VkDescriptorType desc_type,
393 nir_def *index, nir_address_format addr_format,
394 struct apply_pipeline_layout_state *state)
395 {
396 switch (addr_format) {
397 case nir_address_format_64bit_global_32bit_offset:
398 case nir_address_format_64bit_bounded_global: {
399 struct res_index_defs res = unpack_res_index(b, index);
400
401 nir_def *desc_offset = res.desc_offset_base;
402 if (desc_type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
403 /* Compute the actual descriptor offset. For inline uniform blocks,
404 * the array index is ignored as they are only allowed to be a single
405 * descriptor (not an array) and there is no concept of a "stride".
406 *
407 */
408 desc_offset =
409 nir_iadd(b, desc_offset, nir_imul(b, res.array_index, res.desc_stride));
410 }
411
412 return nir_vec2(b, res.set_idx, desc_offset);
413 }
414
415 case nir_address_format_32bit_index_offset:
416 assert(desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK);
417 return index;
418
419 default:
420 unreachable("Unhandled address format");
421 }
422 }
423
424 /** Convert a Vulkan resource index into a buffer address
425 *
426 * In some cases, this does a memory load from the descriptor set and, in
427 * others, it simply converts from one form to another.
428 *
429 * See build_res_index for details about each resource index format.
430 */
431 static nir_def *
build_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)432 build_buffer_addr_for_res_index(nir_builder *b,
433 const VkDescriptorType desc_type,
434 nir_def *res_index,
435 nir_address_format addr_format,
436 struct apply_pipeline_layout_state *state)
437 {
438 if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
439 assert(addr_format == nir_address_format_32bit_index_offset);
440 return build_desc_addr(b, NULL, desc_type, res_index, addr_format, state);
441 } else if (addr_format == nir_address_format_32bit_index_offset) {
442 nir_def *array_index = nir_channel(b, res_index, 0);
443 nir_def *packed = nir_channel(b, res_index, 1);
444 nir_def *surface_index = nir_extract_u16(b, packed, nir_imm_int(b, 0));
445
446 return nir_vec2(b, nir_iadd(b, surface_index, array_index),
447 nir_imm_int(b, 0));
448 }
449
450 nir_def *desc_addr =
451 build_desc_addr(b, NULL, desc_type, res_index, addr_format, state);
452
453 nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
454
455 if (state->has_dynamic_buffers) {
456 struct res_index_defs res = unpack_res_index(b, res_index);
457
458 /* This shader has dynamic offsets and we have no way of knowing
459 * (save from the dynamic offset base index) if this buffer has a
460 * dynamic offset.
461 */
462 nir_def *dyn_offset_idx =
463 nir_iadd(b, res.dyn_offset_base, res.array_index);
464
465 nir_def *dyn_load =
466 nir_load_push_constant(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4),
467 .base = offsetof(struct anv_push_constants, dynamic_offsets),
468 .range = MAX_DYNAMIC_BUFFERS * 4);
469
470 nir_def *dynamic_offset =
471 nir_bcsel(b, nir_ieq_imm(b, res.dyn_offset_base, 0xff),
472 nir_imm_int(b, 0), dyn_load);
473
474 /* The dynamic offset gets added to the base pointer so that we
475 * have a sliding window range.
476 */
477 nir_def *base_ptr =
478 nir_pack_64_2x32(b, nir_trim_vector(b, desc, 2));
479 base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
480 desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
481 nir_unpack_64_2x32_split_y(b, base_ptr),
482 nir_channel(b, desc, 2),
483 nir_channel(b, desc, 3));
484 }
485
486 /* The last element of the vec4 is always zero.
487 *
488 * See also struct anv_address_range_descriptor
489 */
490 return nir_vec4(b, nir_channel(b, desc, 0),
491 nir_channel(b, desc, 1),
492 nir_channel(b, desc, 2),
493 nir_imm_int(b, 0));
494 }
495
496 /** Loads descriptor memory for a variable-based deref chain
497 *
498 * The deref chain has to terminate at a variable with a descriptor_set and
499 * binding set. This is used for images, textures, and samplers.
500 */
501 static nir_def *
build_load_var_deref_descriptor_mem(nir_builder * b,nir_deref_instr * deref,unsigned desc_offset,unsigned num_components,unsigned bit_size,struct apply_pipeline_layout_state * state)502 build_load_var_deref_descriptor_mem(nir_builder *b, nir_deref_instr *deref,
503 unsigned desc_offset,
504 unsigned num_components, unsigned bit_size,
505 struct apply_pipeline_layout_state *state)
506 {
507 nir_variable *var = nir_deref_instr_get_variable(deref);
508
509 const uint32_t set = var->data.descriptor_set;
510 const uint32_t binding = var->data.binding;
511 const struct anv_descriptor_set_binding_layout *bind_layout =
512 &state->layout->set[set].layout->binding[binding];
513
514 nir_def *array_index;
515 if (deref->deref_type != nir_deref_type_var) {
516 assert(deref->deref_type == nir_deref_type_array);
517 assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
518 array_index = deref->arr.index.ssa;
519 } else {
520 array_index = nir_imm_int(b, 0);
521 }
522
523 /* It doesn't really matter what address format we choose as everything
524 * will constant-fold nicely. Choose one that uses the actual descriptor
525 * buffer so we don't run into issues index/offset assumptions.
526 */
527 const nir_address_format addr_format =
528 nir_address_format_64bit_bounded_global;
529
530 nir_def *res_index =
531 build_res_index(b, set, binding, array_index, addr_format, state);
532
533 nir_def *desc_addr =
534 build_desc_addr(b, bind_layout, bind_layout->type,
535 res_index, addr_format, state);
536
537 return build_load_descriptor_mem(b, desc_addr, desc_offset,
538 num_components, bit_size, state);
539 }
540
541 /** A recursive form of build_res_index()
542 *
543 * This recursively walks a resource [re]index chain and builds the resource
544 * index. It places the new code with the resource [re]index operation in the
545 * hopes of better CSE. This means the cursor is not where you left it when
546 * this function returns.
547 */
548 static nir_def *
build_res_index_for_chain(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format,uint32_t * set,uint32_t * binding,struct apply_pipeline_layout_state * state)549 build_res_index_for_chain(nir_builder *b, nir_intrinsic_instr *intrin,
550 nir_address_format addr_format,
551 uint32_t *set, uint32_t *binding,
552 struct apply_pipeline_layout_state *state)
553 {
554 if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
555 b->cursor = nir_before_instr(&intrin->instr);
556 *set = nir_intrinsic_desc_set(intrin);
557 *binding = nir_intrinsic_binding(intrin);
558 return build_res_index(b, *set, *binding, intrin->src[0].ssa,
559 addr_format, state);
560 } else {
561 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex);
562 nir_intrinsic_instr *parent = nir_src_as_intrinsic(intrin->src[0]);
563 nir_def *index =
564 build_res_index_for_chain(b, parent, addr_format,
565 set, binding, state);
566
567 b->cursor = nir_before_instr(&intrin->instr);
568
569 return build_res_reindex(b, index, intrin->src[1].ssa, addr_format);
570 }
571 }
572
573 /** Builds a buffer address for a given vulkan [re]index intrinsic
574 *
575 * The cursor is not where you left it when this function returns.
576 */
577 static nir_def *
build_buffer_addr_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * idx_intrin,nir_address_format addr_format,struct apply_pipeline_layout_state * state)578 build_buffer_addr_for_idx_intrin(nir_builder *b,
579 nir_intrinsic_instr *idx_intrin,
580 nir_address_format addr_format,
581 struct apply_pipeline_layout_state *state)
582 {
583 uint32_t set = UINT32_MAX, binding = UINT32_MAX;
584 nir_def *res_index =
585 build_res_index_for_chain(b, idx_intrin, addr_format,
586 &set, &binding, state);
587
588 const struct anv_descriptor_set_binding_layout *bind_layout =
589 &state->layout->set[set].layout->binding[binding];
590
591 return build_buffer_addr_for_res_index(b, bind_layout->type,
592 res_index, addr_format, state);
593 }
594
595 /** Builds a buffer address for deref chain
596 *
597 * This assumes that you can chase the chain all the way back to the original
598 * vulkan_resource_index intrinsic.
599 *
600 * The cursor is not where you left it when this function returns.
601 */
602 static nir_def *
build_buffer_addr_for_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format,struct apply_pipeline_layout_state * state)603 build_buffer_addr_for_deref(nir_builder *b, nir_deref_instr *deref,
604 nir_address_format addr_format,
605 struct apply_pipeline_layout_state *state)
606 {
607 nir_deref_instr *parent = nir_deref_instr_parent(deref);
608 if (parent) {
609 nir_def *addr =
610 build_buffer_addr_for_deref(b, parent, addr_format, state);
611
612 b->cursor = nir_before_instr(&deref->instr);
613 return nir_explicit_io_address_from_deref(b, deref, addr, addr_format);
614 }
615
616 nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
617 assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
618
619 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
620
621 b->cursor = nir_before_instr(&deref->instr);
622
623 return build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state);
624 }
625
626 static bool
try_lower_direct_buffer_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)627 try_lower_direct_buffer_intrinsic(nir_builder *b,
628 nir_intrinsic_instr *intrin,
629 struct apply_pipeline_layout_state *state)
630 {
631 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
632 if (!nir_deref_mode_is_one_of(deref, nir_var_mem_ubo | nir_var_mem_ssbo))
633 return false;
634
635 nir_intrinsic_instr *desc = nir_deref_find_descriptor(deref, state);
636 if (desc == NULL) {
637 /* We should always be able to find the descriptor for UBO access. */
638 assert(nir_deref_mode_is_one_of(deref, nir_var_mem_ssbo));
639 return false;
640 }
641
642 nir_address_format addr_format = descriptor_address_format(desc, state);
643
644 if (nir_deref_mode_is(deref, nir_var_mem_ssbo)) {
645 /* Normal binding table-based messages can't handle non-uniform access
646 * so we have to fall back to A64.
647 */
648 if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)
649 return false;
650
651 if (!descriptor_has_bti(desc, state))
652 return false;
653
654 /* Rewrite to 32bit_index_offset whenever we can */
655 addr_format = nir_address_format_32bit_index_offset;
656 } else {
657 assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
658
659 /* Rewrite to 32bit_index_offset whenever we can */
660 if (descriptor_has_bti(desc, state))
661 addr_format = nir_address_format_32bit_index_offset;
662 }
663
664 nir_def *addr =
665 build_buffer_addr_for_deref(b, deref, addr_format, state);
666
667 b->cursor = nir_before_instr(&intrin->instr);
668 nir_lower_explicit_io_instr(b, intrin, addr, addr_format);
669
670 return true;
671 }
672
673 static bool
lower_load_accel_struct_desc(nir_builder * b,nir_intrinsic_instr * load_desc,struct apply_pipeline_layout_state * state)674 lower_load_accel_struct_desc(nir_builder *b,
675 nir_intrinsic_instr *load_desc,
676 struct apply_pipeline_layout_state *state)
677 {
678 assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
679
680 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
681
682 /* It doesn't really matter what address format we choose as
683 * everything will constant-fold nicely. Choose one that uses the
684 * actual descriptor buffer.
685 */
686 const nir_address_format addr_format =
687 nir_address_format_64bit_bounded_global;
688
689 uint32_t set = UINT32_MAX, binding = UINT32_MAX;
690 nir_def *res_index =
691 build_res_index_for_chain(b, idx_intrin, addr_format,
692 &set, &binding, state);
693
694 const struct anv_descriptor_set_binding_layout *bind_layout =
695 &state->layout->set[set].layout->binding[binding];
696
697 b->cursor = nir_before_instr(&load_desc->instr);
698
699 nir_def *desc_addr =
700 build_desc_addr(b, bind_layout, bind_layout->type,
701 res_index, addr_format, state);
702
703 /* Acceleration structure descriptors are always uint64_t */
704 nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 1, 64, state);
705
706 assert(load_desc->def.bit_size == 64);
707 assert(load_desc->def.num_components == 1);
708 nir_def_replace(&load_desc->def, desc);
709
710 return true;
711 }
712
713 static bool
lower_direct_buffer_instr(nir_builder * b,nir_instr * instr,void * _state)714 lower_direct_buffer_instr(nir_builder *b, nir_instr *instr, void *_state)
715 {
716 struct apply_pipeline_layout_state *state = _state;
717
718 if (instr->type != nir_instr_type_intrinsic)
719 return false;
720
721 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
722 switch (intrin->intrinsic) {
723 case nir_intrinsic_load_deref:
724 case nir_intrinsic_store_deref:
725 case nir_intrinsic_deref_atomic:
726 case nir_intrinsic_deref_atomic_swap:
727 return try_lower_direct_buffer_intrinsic(b, intrin, state);
728
729 case nir_intrinsic_load_vulkan_descriptor:
730 if (nir_intrinsic_desc_type(intrin) ==
731 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
732 return lower_load_accel_struct_desc(b, intrin, state);
733 return false;
734
735 default:
736 return false;
737 }
738 }
739
740 static bool
lower_res_index_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)741 lower_res_index_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
742 struct apply_pipeline_layout_state *state)
743 {
744 b->cursor = nir_before_instr(&intrin->instr);
745
746 nir_address_format addr_format =
747 addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
748
749 nir_def *index =
750 build_res_index(b, nir_intrinsic_desc_set(intrin),
751 nir_intrinsic_binding(intrin),
752 intrin->src[0].ssa,
753 addr_format, state);
754
755 assert(intrin->def.bit_size == index->bit_size);
756 assert(intrin->def.num_components == index->num_components);
757 nir_def_replace(&intrin->def, index);
758
759 return true;
760 }
761
762 static bool
lower_res_reindex_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)763 lower_res_reindex_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
764 struct apply_pipeline_layout_state *state)
765 {
766 b->cursor = nir_before_instr(&intrin->instr);
767
768 nir_address_format addr_format =
769 addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
770
771 nir_def *index =
772 build_res_reindex(b, intrin->src[0].ssa,
773 intrin->src[1].ssa,
774 addr_format);
775
776 assert(intrin->def.bit_size == index->bit_size);
777 assert(intrin->def.num_components == index->num_components);
778 nir_def_replace(&intrin->def, index);
779
780 return true;
781 }
782
783 static bool
lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)784 lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
785 struct apply_pipeline_layout_state *state)
786 {
787 b->cursor = nir_before_instr(&intrin->instr);
788
789 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
790 nir_address_format addr_format = addr_format_for_desc_type(desc_type, state);
791
792 nir_def *desc =
793 build_buffer_addr_for_res_index(b, desc_type, intrin->src[0].ssa,
794 addr_format, state);
795
796 assert(intrin->def.bit_size == desc->bit_size);
797 assert(intrin->def.num_components == desc->num_components);
798 nir_def_replace(&intrin->def, desc);
799
800 return true;
801 }
802
803 static bool
lower_get_ssbo_size(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)804 lower_get_ssbo_size(nir_builder *b, nir_intrinsic_instr *intrin,
805 struct apply_pipeline_layout_state *state)
806 {
807 if (_mesa_set_search(state->lowered_instrs, intrin))
808 return false;
809
810 b->cursor = nir_before_instr(&intrin->instr);
811
812 nir_address_format addr_format =
813 addr_format_for_desc_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, state);
814
815 nir_def *desc =
816 build_buffer_addr_for_res_index(b, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
817 intrin->src[0].ssa, addr_format, state);
818
819 switch (addr_format) {
820 case nir_address_format_64bit_global_32bit_offset:
821 case nir_address_format_64bit_bounded_global: {
822 nir_def *size = nir_channel(b, desc, 2);
823 nir_def_replace(&intrin->def, size);
824 break;
825 }
826
827 case nir_address_format_32bit_index_offset:
828 /* The binding table index is the first component of the address. The
829 * back-end wants a scalar binding table index source.
830 */
831 nir_src_rewrite(&intrin->src[0], nir_channel(b, desc, 0));
832 break;
833
834 default:
835 unreachable("Unsupported address format");
836 }
837
838 return true;
839 }
840
841 static bool
image_binding_needs_lowered_surface(nir_variable * var)842 image_binding_needs_lowered_surface(nir_variable *var)
843 {
844 return !(var->data.access & ACCESS_NON_READABLE) &&
845 var->data.image.format != PIPE_FORMAT_NONE;
846 }
847
848 static bool
lower_image_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)849 lower_image_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
850 struct apply_pipeline_layout_state *state)
851 {
852 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
853 nir_variable *var = nir_deref_instr_get_variable(deref);
854
855 unsigned set = var->data.descriptor_set;
856 unsigned binding = var->data.binding;
857 unsigned binding_offset = state->set[set].surface_offsets[binding];
858
859 b->cursor = nir_before_instr(&intrin->instr);
860
861 if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
862 b->cursor = nir_instr_remove(&intrin->instr);
863
864 const unsigned param = nir_intrinsic_base(intrin);
865
866 nir_def *desc =
867 build_load_var_deref_descriptor_mem(b, deref, param * 16,
868 intrin->def.num_components,
869 intrin->def.bit_size, state);
870
871 nir_def_rewrite_uses(&intrin->def, desc);
872 } else {
873 nir_def *index = NULL;
874 if (deref->deref_type != nir_deref_type_var) {
875 assert(deref->deref_type == nir_deref_type_array);
876 index = deref->arr.index.ssa;
877 } else {
878 index = nir_imm_int(b, 0);
879 }
880
881 index = nir_iadd_imm(b, index, binding_offset);
882 nir_rewrite_image_intrinsic(intrin, index, false);
883 }
884
885 return true;
886 }
887
888 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)889 lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin,
890 struct apply_pipeline_layout_state *state)
891 {
892 b->cursor = nir_instr_remove(&intrin->instr);
893
894 /* Any constant-offset load_constant instructions should have been removed
895 * by constant folding.
896 */
897 assert(!nir_src_is_const(intrin->src[0]));
898 nir_def *offset = nir_iadd_imm(b, intrin->src[0].ssa,
899 nir_intrinsic_base(intrin));
900
901 nir_def *data;
902 if (!anv_use_relocations(state->pdevice)) {
903 unsigned load_size = intrin->def.num_components *
904 intrin->def.bit_size / 8;
905 unsigned load_align = intrin->def.bit_size / 8;
906
907 assert(load_size < b->shader->constant_data_size);
908 unsigned max_offset = b->shader->constant_data_size - load_size;
909 offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
910
911 nir_def *const_data_base_addr = nir_pack_64_2x32_split(b,
912 nir_load_reloc_const_intel(b, ELK_SHADER_RELOC_CONST_DATA_ADDR_LOW),
913 nir_load_reloc_const_intel(b, ELK_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
914
915 data = nir_load_global_constant(b, nir_iadd(b, const_data_base_addr,
916 nir_u2u64(b, offset)),
917 load_align,
918 intrin->def.num_components,
919 intrin->def.bit_size);
920 } else {
921 nir_def *index = nir_imm_int(b, state->constants_offset);
922
923 data = nir_load_ubo(b, intrin->num_components, intrin->def.bit_size,
924 index, offset,
925 .align_mul = intrin->def.bit_size / 8,
926 .align_offset = 0,
927 .range_base = nir_intrinsic_base(intrin),
928 .range = nir_intrinsic_range(intrin));
929 }
930
931 nir_def_rewrite_uses(&intrin->def, data);
932
933 return true;
934 }
935
936 static bool
lower_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)937 lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
938 struct apply_pipeline_layout_state *state)
939 {
940 b->cursor = nir_instr_remove(&intrin->instr);
941
942 nir_def *base_workgroup_id =
943 nir_load_push_constant(b, 3, 32, nir_imm_int(b, 0),
944 .base = offsetof(struct anv_push_constants, cs.base_work_group_id),
945 .range = 3 * sizeof(uint32_t));
946 nir_def_rewrite_uses(&intrin->def, base_workgroup_id);
947
948 return true;
949 }
950
951 static void
lower_tex_deref(nir_builder * b,nir_tex_instr * tex,nir_tex_src_type deref_src_type,unsigned * base_index,unsigned plane,struct apply_pipeline_layout_state * state)952 lower_tex_deref(nir_builder *b, nir_tex_instr *tex,
953 nir_tex_src_type deref_src_type,
954 unsigned *base_index, unsigned plane,
955 struct apply_pipeline_layout_state *state)
956 {
957 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
958 if (deref_src_idx < 0)
959 return;
960
961 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
962 nir_variable *var = nir_deref_instr_get_variable(deref);
963
964 unsigned set = var->data.descriptor_set;
965 unsigned binding = var->data.binding;
966 unsigned array_size =
967 state->layout->set[set].layout->binding[binding].array_size;
968
969 unsigned binding_offset;
970 if (deref_src_type == nir_tex_src_texture_deref) {
971 binding_offset = state->set[set].surface_offsets[binding];
972 } else {
973 assert(deref_src_type == nir_tex_src_sampler_deref);
974 binding_offset = state->set[set].sampler_offsets[binding];
975 }
976
977 nir_tex_src_type offset_src_type;
978 nir_def *index = NULL;
979 if (binding_offset > MAX_BINDING_TABLE_SIZE) {
980 const unsigned plane_offset =
981 plane * sizeof(struct anv_sampled_image_descriptor);
982
983 nir_def *desc =
984 build_load_var_deref_descriptor_mem(b, deref, plane_offset,
985 2, 32, state);
986
987 if (deref_src_type == nir_tex_src_texture_deref) {
988 offset_src_type = nir_tex_src_texture_handle;
989 index = nir_channel(b, desc, 0);
990 } else {
991 assert(deref_src_type == nir_tex_src_sampler_deref);
992 offset_src_type = nir_tex_src_sampler_handle;
993 index = nir_channel(b, desc, 1);
994 }
995 } else {
996 if (deref_src_type == nir_tex_src_texture_deref) {
997 offset_src_type = nir_tex_src_texture_offset;
998 } else {
999 assert(deref_src_type == nir_tex_src_sampler_deref);
1000 offset_src_type = nir_tex_src_sampler_offset;
1001 }
1002
1003 *base_index = binding_offset + plane;
1004
1005 if (deref->deref_type != nir_deref_type_var) {
1006 assert(deref->deref_type == nir_deref_type_array);
1007
1008 if (nir_src_is_const(deref->arr.index)) {
1009 unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1);
1010 struct anv_sampler **immutable_samplers =
1011 state->layout->set[set].layout->binding[binding].immutable_samplers;
1012 if (immutable_samplers) {
1013 /* Array of YCbCr samplers are tightly packed in the binding
1014 * tables, compute the offset of an element in the array by
1015 * adding the number of planes of all preceding elements.
1016 */
1017 unsigned desc_arr_index = 0;
1018 for (int i = 0; i < arr_index; i++)
1019 desc_arr_index += immutable_samplers[i]->n_planes;
1020 *base_index += desc_arr_index;
1021 } else {
1022 *base_index += arr_index;
1023 }
1024 } else {
1025 /* From VK_KHR_sampler_ycbcr_conversion:
1026 *
1027 * If sampler Y’CBCR conversion is enabled, the combined image
1028 * sampler must be indexed only by constant integral expressions
1029 * when aggregated into arrays in shader code, irrespective of
1030 * the shaderSampledImageArrayDynamicIndexing feature.
1031 */
1032 assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1);
1033
1034 index = deref->arr.index.ssa;
1035 }
1036 }
1037 }
1038
1039 if (index) {
1040 nir_src_rewrite(&tex->src[deref_src_idx].src, index);
1041 tex->src[deref_src_idx].src_type = offset_src_type;
1042 } else {
1043 nir_tex_instr_remove_src(tex, deref_src_idx);
1044 }
1045 }
1046
1047 static uint32_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)1048 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
1049 {
1050 int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
1051 if (plane_src_idx < 0)
1052 return 0;
1053
1054 unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src);
1055
1056 nir_tex_instr_remove_src(tex, plane_src_idx);
1057
1058 return plane;
1059 }
1060
1061 static nir_def *
build_def_array_select(nir_builder * b,nir_def ** srcs,nir_def * idx,unsigned start,unsigned end)1062 build_def_array_select(nir_builder *b, nir_def **srcs, nir_def *idx,
1063 unsigned start, unsigned end)
1064 {
1065 if (start == end - 1) {
1066 return srcs[start];
1067 } else {
1068 unsigned mid = start + (end - start) / 2;
1069 return nir_bcsel(b, nir_ilt_imm(b, idx, mid),
1070 build_def_array_select(b, srcs, idx, start, mid),
1071 build_def_array_select(b, srcs, idx, mid, end));
1072 }
1073 }
1074
1075 static void
lower_gfx7_tex_swizzle(nir_builder * b,nir_tex_instr * tex,unsigned plane,struct apply_pipeline_layout_state * state)1076 lower_gfx7_tex_swizzle(nir_builder *b, nir_tex_instr *tex, unsigned plane,
1077 struct apply_pipeline_layout_state *state)
1078 {
1079 assert(state->pdevice->info.verx10 == 70);
1080 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ||
1081 nir_tex_instr_is_query(tex) ||
1082 tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */
1083 (tex->is_shadow && tex->is_new_style_shadow))
1084 return;
1085
1086 int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1087 assert(deref_src_idx >= 0);
1088
1089 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
1090 nir_variable *var = nir_deref_instr_get_variable(deref);
1091
1092 unsigned set = var->data.descriptor_set;
1093 unsigned binding = var->data.binding;
1094 const struct anv_descriptor_set_binding_layout *bind_layout =
1095 &state->layout->set[set].layout->binding[binding];
1096
1097 if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0)
1098 return;
1099
1100 b->cursor = nir_before_instr(&tex->instr);
1101
1102 const unsigned plane_offset =
1103 plane * sizeof(struct anv_texture_swizzle_descriptor);
1104 nir_def *swiz =
1105 build_load_var_deref_descriptor_mem(b, deref, plane_offset,
1106 1, 32, state);
1107
1108 b->cursor = nir_after_instr(&tex->instr);
1109
1110 assert(tex->def.bit_size == 32);
1111 assert(tex->def.num_components == 4);
1112
1113 /* Initializing to undef is ok; nir_opt_undef will clean it up. */
1114 nir_def *undef = nir_undef(b, 1, 32);
1115 nir_def *comps[8];
1116 for (unsigned i = 0; i < ARRAY_SIZE(comps); i++)
1117 comps[i] = undef;
1118
1119 comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0);
1120 if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float)
1121 comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1);
1122 else
1123 comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1);
1124 comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->def, 0);
1125 comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->def, 1);
1126 comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->def, 2);
1127 comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->def, 3);
1128
1129 nir_def *swiz_comps[4];
1130 for (unsigned i = 0; i < 4; i++) {
1131 nir_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i));
1132 swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8);
1133 }
1134 nir_def *swiz_tex_res = nir_vec(b, swiz_comps, 4);
1135
1136 /* Rewrite uses before we insert so we don't rewrite this use */
1137 nir_def_rewrite_uses_after(&tex->def,
1138 swiz_tex_res,
1139 swiz_tex_res->parent_instr);
1140 }
1141
1142 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct apply_pipeline_layout_state * state)1143 lower_tex(nir_builder *b, nir_tex_instr *tex,
1144 struct apply_pipeline_layout_state *state)
1145 {
1146 unsigned plane = tex_instr_get_and_remove_plane_src(tex);
1147
1148 /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this
1149 * before we lower the derefs away so we can still find the descriptor.
1150 */
1151 if (state->pdevice->info.verx10 == 70)
1152 lower_gfx7_tex_swizzle(b, tex, plane, state);
1153
1154 b->cursor = nir_before_instr(&tex->instr);
1155
1156 lower_tex_deref(b, tex, nir_tex_src_texture_deref,
1157 &tex->texture_index, plane, state);
1158
1159 lower_tex_deref(b, tex, nir_tex_src_sampler_deref,
1160 &tex->sampler_index, plane, state);
1161
1162 return true;
1163 }
1164
1165 static bool
apply_pipeline_layout(nir_builder * b,nir_instr * instr,void * _state)1166 apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
1167 {
1168 struct apply_pipeline_layout_state *state = _state;
1169
1170 switch (instr->type) {
1171 case nir_instr_type_intrinsic: {
1172 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1173 switch (intrin->intrinsic) {
1174 case nir_intrinsic_vulkan_resource_index:
1175 return lower_res_index_intrinsic(b, intrin, state);
1176 case nir_intrinsic_vulkan_resource_reindex:
1177 return lower_res_reindex_intrinsic(b, intrin, state);
1178 case nir_intrinsic_load_vulkan_descriptor:
1179 return lower_load_vulkan_descriptor(b, intrin, state);
1180 case nir_intrinsic_get_ssbo_size:
1181 return lower_get_ssbo_size(b, intrin, state);
1182 case nir_intrinsic_image_deref_load:
1183 case nir_intrinsic_image_deref_store:
1184 case nir_intrinsic_image_deref_atomic:
1185 case nir_intrinsic_image_deref_atomic_swap:
1186 case nir_intrinsic_image_deref_size:
1187 case nir_intrinsic_image_deref_samples:
1188 case nir_intrinsic_image_deref_load_param_intel:
1189 case nir_intrinsic_image_deref_load_raw_intel:
1190 case nir_intrinsic_image_deref_store_raw_intel:
1191 return lower_image_intrinsic(b, intrin, state);
1192 case nir_intrinsic_load_constant:
1193 return lower_load_constant(b, intrin, state);
1194 case nir_intrinsic_load_base_workgroup_id:
1195 return lower_base_workgroup_id(b, intrin, state);
1196 default:
1197 return false;
1198 }
1199 break;
1200 }
1201 case nir_instr_type_tex:
1202 return lower_tex(b, nir_instr_as_tex(instr), state);
1203 default:
1204 return false;
1205 }
1206 }
1207
1208 struct binding_info {
1209 uint32_t binding;
1210 uint8_t set;
1211 uint16_t score;
1212 };
1213
1214 static int
compare_binding_infos(const void * _a,const void * _b)1215 compare_binding_infos(const void *_a, const void *_b)
1216 {
1217 const struct binding_info *a = _a, *b = _b;
1218 if (a->score != b->score)
1219 return b->score - a->score;
1220
1221 if (a->set != b->set)
1222 return a->set - b->set;
1223
1224 return a->binding - b->binding;
1225 }
1226
1227 void
anv_nir_apply_pipeline_layout(nir_shader * shader,const struct anv_physical_device * pdevice,enum elk_robustness_flags robust_flags,const struct anv_pipeline_layout * layout,struct anv_pipeline_bind_map * map)1228 anv_nir_apply_pipeline_layout(nir_shader *shader,
1229 const struct anv_physical_device *pdevice,
1230 enum elk_robustness_flags robust_flags,
1231 const struct anv_pipeline_layout *layout,
1232 struct anv_pipeline_bind_map *map)
1233 {
1234 void *mem_ctx = ralloc_context(NULL);
1235
1236 struct apply_pipeline_layout_state state = {
1237 .pdevice = pdevice,
1238 .layout = layout,
1239 .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
1240 .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
1241 .lowered_instrs = _mesa_pointer_set_create(mem_ctx),
1242 };
1243
1244 for (unsigned s = 0; s < layout->num_sets; s++) {
1245 const unsigned count = layout->set[s].layout->binding_count;
1246 state.set[s].use_count = rzalloc_array(mem_ctx, uint8_t, count);
1247 state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
1248 state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
1249 }
1250
1251 nir_shader_instructions_pass(shader, get_used_bindings,
1252 nir_metadata_all, &state);
1253
1254 for (unsigned s = 0; s < layout->num_sets; s++) {
1255 if (state.set[s].desc_buffer_used) {
1256 map->surface_to_descriptor[map->surface_count] =
1257 (struct anv_pipeline_binding) {
1258 .set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
1259 .index = s,
1260 };
1261 state.set[s].desc_offset = map->surface_count;
1262 map->surface_count++;
1263 }
1264 }
1265
1266 if (state.uses_constants && anv_use_relocations(pdevice)) {
1267 state.constants_offset = map->surface_count;
1268 map->surface_to_descriptor[map->surface_count].set =
1269 ANV_DESCRIPTOR_SET_SHADER_CONSTANTS;
1270 map->surface_count++;
1271 }
1272
1273 unsigned used_binding_count = 0;
1274 for (uint32_t set = 0; set < layout->num_sets; set++) {
1275 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
1276 for (unsigned b = 0; b < set_layout->binding_count; b++) {
1277 if (state.set[set].use_count[b] == 0)
1278 continue;
1279
1280 used_binding_count++;
1281 }
1282 }
1283
1284 struct binding_info *infos =
1285 rzalloc_array(mem_ctx, struct binding_info, used_binding_count);
1286 used_binding_count = 0;
1287 for (uint32_t set = 0; set < layout->num_sets; set++) {
1288 const struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
1289 for (unsigned b = 0; b < set_layout->binding_count; b++) {
1290 if (state.set[set].use_count[b] == 0)
1291 continue;
1292
1293 const struct anv_descriptor_set_binding_layout *binding =
1294 &layout->set[set].layout->binding[b];
1295
1296 /* Do a fixed-point calculation to generate a score based on the
1297 * number of uses and the binding array size. We shift by 7 instead
1298 * of 8 because we're going to use the top bit below to make
1299 * everything which does not support bindless super higher priority
1300 * than things which do.
1301 */
1302 uint16_t score = ((uint16_t)state.set[set].use_count[b] << 7) /
1303 binding->array_size;
1304
1305 /* If the descriptor type doesn't support bindless then put it at the
1306 * beginning so we guarantee it gets a slot.
1307 */
1308 if (!anv_descriptor_supports_bindless(pdevice, binding, true) ||
1309 !anv_descriptor_supports_bindless(pdevice, binding, false))
1310 score |= 1 << 15;
1311
1312 infos[used_binding_count++] = (struct binding_info) {
1313 .set = set,
1314 .binding = b,
1315 .score = score,
1316 };
1317 }
1318 }
1319
1320 /* Order the binding infos based on score with highest scores first. If
1321 * scores are equal we then order by set and binding.
1322 */
1323 qsort(infos, used_binding_count, sizeof(struct binding_info),
1324 compare_binding_infos);
1325
1326 for (unsigned i = 0; i < used_binding_count; i++) {
1327 unsigned set = infos[i].set, b = infos[i].binding;
1328 const struct anv_descriptor_set_binding_layout *binding =
1329 &layout->set[set].layout->binding[b];
1330
1331 const uint32_t array_size = binding->array_size;
1332
1333 if (binding->dynamic_offset_index >= 0)
1334 state.has_dynamic_buffers = true;
1335
1336 if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) {
1337 assert(map->surface_count + array_size <= MAX_BINDING_TABLE_SIZE);
1338 assert(!anv_descriptor_requires_bindless(pdevice, binding, false));
1339 state.set[set].surface_offsets[b] = map->surface_count;
1340 if (binding->dynamic_offset_index < 0) {
1341 struct anv_sampler **samplers = binding->immutable_samplers;
1342 for (unsigned i = 0; i < binding->array_size; i++) {
1343 uint8_t planes = samplers ? samplers[i]->n_planes : 1;
1344 for (uint8_t p = 0; p < planes; p++) {
1345 map->surface_to_descriptor[map->surface_count++] =
1346 (struct anv_pipeline_binding) {
1347 .set = set,
1348 .index = binding->descriptor_index + i,
1349 .plane = p,
1350 };
1351 }
1352 }
1353 } else {
1354 for (unsigned i = 0; i < binding->array_size; i++) {
1355 map->surface_to_descriptor[map->surface_count++] =
1356 (struct anv_pipeline_binding) {
1357 .set = set,
1358 .index = binding->descriptor_index + i,
1359 .dynamic_offset_index =
1360 layout->set[set].dynamic_offset_start +
1361 binding->dynamic_offset_index + i,
1362 };
1363 }
1364 }
1365 assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
1366 }
1367
1368 if (binding->data & ANV_DESCRIPTOR_SAMPLER_STATE) {
1369 if (map->sampler_count + array_size > MAX_SAMPLER_TABLE_SIZE ||
1370 anv_descriptor_requires_bindless(pdevice, binding, true)) {
1371 /* If this descriptor doesn't fit in the binding table or if it
1372 * requires bindless for some reason, flag it as bindless.
1373 *
1374 * We also make large sampler arrays bindless because we can avoid
1375 * using indirect sends thanks to bindless samplers being packed
1376 * less tightly than the sampler table.
1377 */
1378 assert(anv_descriptor_supports_bindless(pdevice, binding, true));
1379 state.set[set].sampler_offsets[b] = BINDLESS_OFFSET;
1380 } else {
1381 state.set[set].sampler_offsets[b] = map->sampler_count;
1382 struct anv_sampler **samplers = binding->immutable_samplers;
1383 for (unsigned i = 0; i < binding->array_size; i++) {
1384 uint8_t planes = samplers ? samplers[i]->n_planes : 1;
1385 for (uint8_t p = 0; p < planes; p++) {
1386 map->sampler_to_descriptor[map->sampler_count++] =
1387 (struct anv_pipeline_binding) {
1388 .set = set,
1389 .index = binding->descriptor_index + i,
1390 .plane = p,
1391 };
1392 }
1393 }
1394 }
1395 }
1396 }
1397
1398 nir_foreach_image_variable(var, shader) {
1399 const uint32_t set = var->data.descriptor_set;
1400 const uint32_t binding = var->data.binding;
1401 const struct anv_descriptor_set_binding_layout *bind_layout =
1402 &layout->set[set].layout->binding[binding];
1403 const uint32_t array_size = bind_layout->array_size;
1404
1405 if (state.set[set].use_count[binding] == 0)
1406 continue;
1407
1408 if (state.set[set].surface_offsets[binding] >= MAX_BINDING_TABLE_SIZE)
1409 continue;
1410
1411 struct anv_pipeline_binding *pipe_binding =
1412 &map->surface_to_descriptor[state.set[set].surface_offsets[binding]];
1413 for (unsigned i = 0; i < array_size; i++) {
1414 assert(pipe_binding[i].set == set);
1415 assert(pipe_binding[i].index == bind_layout->descriptor_index + i);
1416
1417 pipe_binding[i].lowered_storage_surface =
1418 image_binding_needs_lowered_surface(var);
1419 }
1420 }
1421
1422 /* Before we do the normal lowering, we look for any SSBO operations
1423 * that we can lower to the BTI model and lower them up-front. The BTI
1424 * model can perform better than the A64 model for a couple reasons:
1425 *
1426 * 1. 48-bit address calculations are potentially expensive and using
1427 * the BTI model lets us simply compute 32-bit offsets and the
1428 * hardware adds the 64-bit surface base address.
1429 *
1430 * 2. The BTI messages, because they use surface states, do bounds
1431 * checking for us. With the A64 model, we have to do our own
1432 * bounds checking and this means wider pointers and extra
1433 * calculations and branching in the shader.
1434 *
1435 * The solution to both of these is to convert things to the BTI model
1436 * opportunistically. The reason why we need to do this as a pre-pass
1437 * is for two reasons:
1438 *
1439 * 1. The BTI model requires nir_address_format_32bit_index_offset
1440 * pointers which are not the same type as the pointers needed for
1441 * the A64 model. Because all our derefs are set up for the A64
1442 * model (in case we have variable pointers), we have to crawl all
1443 * the way back to the vulkan_resource_index intrinsic and build a
1444 * completely fresh index+offset calculation.
1445 *
1446 * 2. Because the variable-pointers-capable lowering that we do as part
1447 * of apply_pipeline_layout_block is destructive (It really has to
1448 * be to handle variable pointers properly), we've lost the deref
1449 * information by the time we get to the load/store/atomic
1450 * intrinsics in that pass.
1451 */
1452 nir_shader_instructions_pass(shader, lower_direct_buffer_instr,
1453 nir_metadata_control_flow,
1454 &state);
1455
1456 /* We just got rid of all the direct access. Delete it so it's not in the
1457 * way when we do our indirect lowering.
1458 */
1459 nir_opt_dce(shader);
1460
1461 nir_shader_instructions_pass(shader, apply_pipeline_layout,
1462 nir_metadata_control_flow,
1463 &state);
1464
1465 ralloc_free(mem_ctx);
1466
1467 /* Now that we're done computing the surface and sampler portions of the
1468 * bind map, hash them. This lets us quickly determine if the actual
1469 * mapping has changed and not just a no-op pipeline change.
1470 */
1471 _mesa_sha1_compute(map->surface_to_descriptor,
1472 map->surface_count * sizeof(struct anv_pipeline_binding),
1473 map->surface_sha1);
1474 _mesa_sha1_compute(map->sampler_to_descriptor,
1475 map->sampler_count * sizeof(struct anv_pipeline_binding),
1476 map->sampler_sha1);
1477 }
1478