xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/anv_nir_apply_pipeline_layout.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_nir.h"
25 #include "nir/nir_builder.h"
26 #include "compiler/brw_nir.h"
27 #include "util/mesa-sha1.h"
28 #include "util/set.h"
29 
30 #include "vk_enum_to_str.h"
31 
32 #include "genxml/genX_bits.h"
33 
34 /* Sampler tables don't actually have a maximum size but we pick one just so
35  * that we don't end up emitting too much state on-the-fly.
36  */
37 #define MAX_SAMPLER_TABLE_SIZE 128
38 #define BINDLESS_OFFSET        255
39 
40 enum binding_property {
41    BINDING_PROPERTY_NORMAL            = BITFIELD_BIT(0),
42    BINDING_PROPERTY_PUSHABLE          = BITFIELD_BIT(1),
43    BINDING_PROPERTY_EMBEDDED_SAMPLER  = BITFIELD_BIT(2),
44    BINDING_PROPERTY_NO_BINDING_TABLE  = BITFIELD_BIT(3),
45 };
46 
47 struct apply_pipeline_layout_state {
48    const struct anv_physical_device *pdevice;
49 
50    const struct anv_pipeline_sets_layout *layout;
51    nir_address_format desc_addr_format;
52    nir_address_format ssbo_addr_format;
53    nir_address_format ubo_addr_format;
54 
55    /* Place to flag lowered instructions so we don't lower them twice */
56    struct set *lowered_instrs;
57 
58    bool uses_constants;
59    bool has_dynamic_buffers;
60    bool has_independent_sets;
61    uint8_t constants_offset;
62    struct {
63       bool desc_buffer_used;
64       uint8_t desc_offset;
65 
66       struct anv_binding_apply_layout {
67          uint8_t use_count;
68 
69          /* Binding table offset */
70          uint8_t surface_offset;
71 
72          /* Sampler table offset */
73          uint8_t sampler_offset;
74 
75          /* Embedded sampler index */
76          uint16_t embedded_sampler_index;
77 
78          /* Properties of the binding */
79          enum binding_property properties;
80 
81          /* For each binding is identified with a unique identifier for push
82           * computation.
83           */
84          uint32_t push_block;
85       } *binding;
86    } set[MAX_SETS];
87 };
88 
89 /* For a given binding, tells us how many binding table entries are needed per
90  * element.
91  */
92 static uint32_t
bti_multiplier(const struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)93 bti_multiplier(const struct apply_pipeline_layout_state *state,
94                uint32_t set, uint32_t binding)
95 {
96    const struct anv_descriptor_set_layout *set_layout =
97       state->layout->set[set].layout;
98    const struct anv_descriptor_set_binding_layout *bind_layout =
99       &set_layout->binding[binding];
100 
101    return bind_layout->max_plane_count;
102 }
103 
104 static nir_address_format
addr_format_for_desc_type(VkDescriptorType desc_type,struct apply_pipeline_layout_state * state)105 addr_format_for_desc_type(VkDescriptorType desc_type,
106                           struct apply_pipeline_layout_state *state)
107 {
108    switch (desc_type) {
109    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
110    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
111       return state->ssbo_addr_format;
112 
113    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
114    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
115       return state->ubo_addr_format;
116 
117    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
118       return state->desc_addr_format;
119 
120    default:
121       unreachable("Unsupported descriptor type");
122    }
123 }
124 
125 static struct anv_binding_apply_layout *
add_binding(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)126 add_binding(struct apply_pipeline_layout_state *state,
127             uint32_t set, uint32_t binding)
128 {
129    const struct anv_descriptor_set_layout *set_layout =
130       state->layout->set[set].layout;
131    const struct anv_descriptor_set_binding_layout *bind_layout =
132       &set_layout->binding[binding];
133 
134    assert(set < state->layout->num_sets);
135    assert(binding < state->layout->set[set].layout->binding_count);
136 
137    if (state->set[set].binding[binding].use_count < UINT8_MAX)
138       state->set[set].binding[binding].use_count++;
139 
140    /* Only flag the descriptor buffer as used if there's actually data for
141     * this binding.  This lets us be lazy and call this function constantly
142     * without worrying about unnecessarily enabling the buffer.
143     */
144    if (bind_layout->descriptor_surface_stride)
145       state->set[set].desc_buffer_used = true;
146 
147    if (bind_layout->dynamic_offset_index >= 0)
148       state->has_dynamic_buffers = true;
149 
150    state->set[set].binding[binding].properties |= BINDING_PROPERTY_NORMAL;
151 
152    if (set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT)
153       state->set[set].binding[binding].properties |= BINDING_PROPERTY_EMBEDDED_SAMPLER;
154 
155    return &state->set[set].binding[binding];
156 }
157 
158 const VkDescriptorSetLayoutCreateFlags non_pushable_set_flags =
159    VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT |
160    VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT;
161 
162 const VkDescriptorBindingFlags non_pushable_binding_flags =
163    VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT |
164    VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT |
165    VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT;
166 
167 static void
add_binding_type(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding,VkDescriptorType type)168 add_binding_type(struct apply_pipeline_layout_state *state,
169                  uint32_t set, uint32_t binding, VkDescriptorType type)
170 {
171    add_binding(state, set, binding);
172 
173    const struct anv_descriptor_set_layout *set_layout =
174       state->layout->set[set].layout;
175    const struct anv_descriptor_set_binding_layout *bind_layout =
176       &set_layout->binding[binding];
177 
178    /* We can't push descriptor buffers but we can for push descriptors */
179    const bool is_set_pushable =
180       (set_layout->flags & non_pushable_set_flags) == 0 ||
181       set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
182    const bool is_binding_pushable =
183       (bind_layout->flags & non_pushable_binding_flags) == 0;
184 
185    if (is_set_pushable && is_binding_pushable &&
186        (state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
187         state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
188         state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
189         state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) &&
190        (type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
191         type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK))
192       state->set[set].binding[binding].properties |= BINDING_PROPERTY_PUSHABLE;
193 }
194 
195 static struct anv_binding_apply_layout *
add_deref_src_binding(struct apply_pipeline_layout_state * state,nir_src src)196 add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
197 {
198    nir_deref_instr *deref = nir_src_as_deref(src);
199    nir_variable *var = nir_deref_instr_get_variable(deref);
200    return add_binding(state, var->data.descriptor_set, var->data.binding);
201 }
202 
203 static void
add_tex_src_binding(struct apply_pipeline_layout_state * state,nir_tex_instr * tex,nir_tex_src_type deref_src_type)204 add_tex_src_binding(struct apply_pipeline_layout_state *state,
205                     nir_tex_instr *tex, nir_tex_src_type deref_src_type)
206 {
207    int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
208    if (deref_src_idx < 0)
209       return;
210 
211    struct anv_binding_apply_layout *layout =
212       add_deref_src_binding(state, tex->src[deref_src_idx].src);
213 
214    /* This is likely a fallout of Wa_14020375314 but hasn't fully be
215     * understood by HW people yet.
216     *
217     * In HSD-18037984222 we reported that the render target index given
218     * through a descriptor in the address register is broken. I think the same
219     * issue happening here when we use a descriptor given by the address
220     * register for the sampler and when the
221     * RENDER_SURFACE_STATE::EnableSamplerRoutetoLSC bit is enabled. This seems
222     * to affect only texelFetch() operations.
223     *
224     * We probably don't want to loose the performance benefit of the route to
225     * LSC so instead we disable dynamic descriptors by checking if a binding
226     * array is accessed with a non constant value.
227     *
228     * Fixes a bunch of tests in dEQP-VK.binding_model.*.index_push_constant.*
229     */
230    if (state->pdevice->info.ver >= 20 && tex->op == nir_texop_txf) {
231       nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
232       if (deref->deref_type != nir_deref_type_var) {
233          assert(deref->deref_type == nir_deref_type_array);
234          if (!nir_src_is_const(deref->arr.index))
235             layout->properties |= BINDING_PROPERTY_NO_BINDING_TABLE;
236       }
237    }
238 }
239 
240 static bool
get_used_bindings(UNUSED nir_builder * _b,nir_instr * instr,void * _state)241 get_used_bindings(UNUSED nir_builder *_b, nir_instr *instr, void *_state)
242 {
243    struct apply_pipeline_layout_state *state = _state;
244 
245    switch (instr->type) {
246    case nir_instr_type_intrinsic: {
247       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
248       switch (intrin->intrinsic) {
249       case nir_intrinsic_vulkan_resource_index:
250          add_binding_type(state,
251                           nir_intrinsic_desc_set(intrin),
252                           nir_intrinsic_binding(intrin),
253                           nir_intrinsic_desc_type(intrin));
254          break;
255 
256       case nir_intrinsic_image_deref_load:
257       case nir_intrinsic_image_deref_store:
258       case nir_intrinsic_image_deref_atomic:
259       case nir_intrinsic_image_deref_atomic_swap:
260       case nir_intrinsic_image_deref_size:
261       case nir_intrinsic_image_deref_samples:
262       case nir_intrinsic_image_deref_load_param_intel:
263       case nir_intrinsic_image_deref_load_raw_intel:
264       case nir_intrinsic_image_deref_store_raw_intel:
265       case nir_intrinsic_image_deref_sparse_load:
266          add_deref_src_binding(state, intrin->src[0]);
267          break;
268 
269       case nir_intrinsic_load_constant:
270          state->uses_constants = true;
271          break;
272 
273       default:
274          break;
275       }
276       break;
277    }
278    case nir_instr_type_tex: {
279       nir_tex_instr *tex = nir_instr_as_tex(instr);
280       add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
281       add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
282       break;
283    }
284    default:
285       break;
286    }
287 
288    return false;
289 }
290 
291 static nir_intrinsic_instr *
find_descriptor_for_index_src(nir_src src,struct apply_pipeline_layout_state * state)292 find_descriptor_for_index_src(nir_src src,
293                               struct apply_pipeline_layout_state *state)
294 {
295    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
296 
297    while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex)
298       intrin = nir_src_as_intrinsic(intrin->src[0]);
299 
300    if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
301       return NULL;
302 
303    return intrin;
304 }
305 
306 static bool
descriptor_has_bti(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)307 descriptor_has_bti(nir_intrinsic_instr *intrin,
308                    struct apply_pipeline_layout_state *state)
309 {
310    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
311 
312    uint32_t set = nir_intrinsic_desc_set(intrin);
313    uint32_t binding = nir_intrinsic_binding(intrin);
314    const struct anv_descriptor_set_binding_layout *bind_layout =
315       &state->layout->set[set].layout->binding[binding];
316 
317    if (state->set[set].binding[binding].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER)
318       return false;
319 
320    uint32_t surface_index;
321    if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM)
322       surface_index = state->set[set].desc_offset;
323    else
324       surface_index = state->set[set].binding[binding].surface_offset;
325 
326    /* Only lower to a BTI message if we have a valid binding table index. */
327    return surface_index < MAX_BINDING_TABLE_SIZE;
328 }
329 
330 static nir_address_format
descriptor_address_format(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)331 descriptor_address_format(nir_intrinsic_instr *intrin,
332                           struct apply_pipeline_layout_state *state)
333 {
334    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
335 
336    return addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
337 }
338 
339 static nir_intrinsic_instr *
nir_deref_find_descriptor(nir_deref_instr * deref,struct apply_pipeline_layout_state * state)340 nir_deref_find_descriptor(nir_deref_instr *deref,
341                           struct apply_pipeline_layout_state *state)
342 {
343    while (1) {
344       /* Nothing we will use this on has a variable */
345       assert(deref->deref_type != nir_deref_type_var);
346 
347       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
348       if (!parent)
349          break;
350 
351       deref = parent;
352    }
353    assert(deref->deref_type == nir_deref_type_cast);
354 
355    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent);
356    if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
357       return NULL;
358 
359    return find_descriptor_for_index_src(intrin->src[0], state);
360 }
361 
362 static nir_def *
build_load_descriptor_mem(nir_builder * b,nir_def * desc_addr,unsigned desc_offset,unsigned num_components,unsigned bit_size,const struct apply_pipeline_layout_state * state)363 build_load_descriptor_mem(nir_builder *b,
364                           nir_def *desc_addr, unsigned desc_offset,
365                           unsigned num_components, unsigned bit_size,
366                           const struct apply_pipeline_layout_state *state)
367 
368 {
369    switch (state->desc_addr_format) {
370    case nir_address_format_64bit_global_32bit_offset: {
371       nir_def *base_addr =
372          nir_pack_64_2x32(b, nir_trim_vector(b, desc_addr, 2));
373       nir_def *offset32 =
374          nir_iadd_imm(b, nir_channel(b, desc_addr, 3), desc_offset);
375 
376       return nir_load_global_constant_offset(b, num_components, bit_size,
377                                              base_addr, offset32,
378                                              .align_mul = 8,
379                                              .align_offset = desc_offset % 8);
380    }
381 
382    case nir_address_format_32bit_index_offset: {
383       nir_def *surface_index = nir_channel(b, desc_addr, 0);
384       nir_def *offset32 =
385          nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset);
386 
387       return nir_load_ubo(b, num_components, bit_size,
388                           surface_index, offset32,
389                           .align_mul = 8,
390                           .align_offset = desc_offset % 8,
391                           .range_base = 0,
392                           .range = num_components * bit_size / 8);
393    }
394 
395    default:
396       unreachable("Unsupported address format");
397    }
398 }
399 
400 /* When using direct descriptor, we do not have a structure to read in memory
401  * like anv_address_range_descriptor where all the fields match perfectly the
402  * vec4 address format we need to generate for A64 messages. Instead we need
403  * to build the vec4 from parsing the RENDER_SURFACE_STATE structure. Easy
404  * enough for the surface address, lot less fun for the size where you have to
405  * combine 3 fields scattered over multiple dwords, add one to the total and
406  * do a check against the surface type to deal with the null descriptors.
407  *
408  * Fortunately we can reuse the Auxiliary surface adddress field to stash our
409  * buffer size and just load a vec4.
410  */
411 static nir_def *
build_optimized_load_render_surface_state_address(nir_builder * b,nir_def * desc_addr,struct apply_pipeline_layout_state * state)412 build_optimized_load_render_surface_state_address(nir_builder *b,
413                                                   nir_def *desc_addr,
414                                                   struct apply_pipeline_layout_state *state)
415 
416 {
417    const struct intel_device_info *devinfo = &state->pdevice->info;
418 
419    nir_def *surface_addr =
420       build_load_descriptor_mem(b, desc_addr,
421                                 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) / 8,
422                                 4, 32, state);
423    nir_def *addr_ldw = nir_channel(b, surface_addr, 0);
424    nir_def *addr_udw = nir_channel(b, surface_addr, 1);
425    nir_def *length = nir_channel(b, surface_addr, 3);
426 
427    return nir_vec4(b, addr_ldw, addr_udw, length, nir_imm_int(b, 0));
428 }
429 
430 /* When using direct descriptor, we do not have a structure to read in memory
431  * like anv_address_range_descriptor where all the fields match perfectly the
432  * vec4 address format we need to generate for A64 messages. Instead we need
433  * to build the vec4 from parsing the RENDER_SURFACE_STATE structure. Easy
434  * enough for the surface address, lot less fun for the size.
435  */
436 static nir_def *
build_non_optimized_load_render_surface_state_address(nir_builder * b,nir_def * desc_addr,struct apply_pipeline_layout_state * state)437 build_non_optimized_load_render_surface_state_address(nir_builder *b,
438                                                       nir_def *desc_addr,
439                                                       struct apply_pipeline_layout_state *state)
440 
441 {
442    const struct intel_device_info *devinfo = &state->pdevice->info;
443 
444    assert(((RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) +
445             RENDER_SURFACE_STATE_SurfaceBaseAddress_bits(devinfo) - 1) -
446            RENDER_SURFACE_STATE_Width_start(devinfo)) / 8 <= 32);
447 
448    nir_def *surface_addr =
449       build_load_descriptor_mem(b, desc_addr,
450                                 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) / 8,
451                                 DIV_ROUND_UP(RENDER_SURFACE_STATE_SurfaceBaseAddress_bits(devinfo), 32),
452                                 32, state);
453    nir_def *addr_ldw = nir_channel(b, surface_addr, 0);
454    nir_def *addr_udw = nir_channel(b, surface_addr, 1);
455 
456    /* Take all the RENDER_SURFACE_STATE fields from the beginning of the
457     * structure up to the Depth field.
458     */
459    const uint32_t type_sizes_dwords =
460       DIV_ROUND_UP(RENDER_SURFACE_STATE_Depth_start(devinfo) +
461                    RENDER_SURFACE_STATE_Depth_bits(devinfo), 32);
462    nir_def *type_sizes =
463       build_load_descriptor_mem(b, desc_addr, 0, type_sizes_dwords, 32, state);
464 
465    const unsigned width_start = RENDER_SURFACE_STATE_Width_start(devinfo);
466    /* SKL PRMs, Volume 2d: Command Reference: Structures, RENDER_SURFACE_STATE
467     *
468     *    Width:  "bits [6:0]   of the number of entries in the buffer - 1"
469     *    Height: "bits [20:7]  of the number of entries in the buffer - 1"
470     *    Depth:  "bits [31:21] of the number of entries in the buffer - 1"
471     */
472    const unsigned width_bits = 7;
473    nir_def *width =
474       nir_iand_imm(b,
475                    nir_ishr_imm(b,
476                                 nir_channel(b, type_sizes, width_start / 32),
477                                 width_start % 32),
478                    (1u << width_bits) - 1);
479 
480    const unsigned height_start = RENDER_SURFACE_STATE_Height_start(devinfo);
481    const unsigned height_bits = RENDER_SURFACE_STATE_Height_bits(devinfo);
482    nir_def *height =
483       nir_iand_imm(b,
484                    nir_ishr_imm(b,
485                                 nir_channel(b, type_sizes, height_start / 32),
486                                 height_start % 32),
487                    (1u << height_bits) - 1);
488 
489    const unsigned depth_start = RENDER_SURFACE_STATE_Depth_start(devinfo);
490    const unsigned depth_bits = RENDER_SURFACE_STATE_Depth_bits(devinfo);
491    nir_def *depth =
492       nir_iand_imm(b,
493                    nir_ishr_imm(b,
494                                 nir_channel(b, type_sizes, depth_start / 32),
495                                 depth_start % 32),
496                    (1u << depth_bits) - 1);
497 
498    nir_def *length = width;
499    length = nir_ior(b, length, nir_ishl_imm(b, height, width_bits));
500    length = nir_ior(b, length, nir_ishl_imm(b, depth, width_bits + height_bits));
501    length = nir_iadd_imm(b, length, 1);
502 
503    /* Check the surface type, if it's SURFTYPE_NULL, set the length of the
504     * buffer to 0.
505     */
506    const unsigned type_start = RENDER_SURFACE_STATE_SurfaceType_start(devinfo);
507    const unsigned type_dw = type_start / 32;
508    nir_def *type =
509       nir_iand_imm(b,
510                    nir_ishr_imm(b,
511                                 nir_channel(b, type_sizes, type_dw),
512                                 type_start % 32),
513                    (1u << RENDER_SURFACE_STATE_SurfaceType_bits(devinfo)) - 1);
514 
515    length = nir_bcsel(b,
516                       nir_ieq_imm(b, type, 7 /* SURFTYPE_NULL */),
517                       nir_imm_int(b, 0), length);
518 
519    return nir_vec4(b, addr_ldw, addr_udw, length, nir_imm_int(b, 0));
520 }
521 
522 static inline nir_def *
build_load_render_surface_state_address(nir_builder * b,nir_def * desc_addr,struct apply_pipeline_layout_state * state)523 build_load_render_surface_state_address(nir_builder *b,
524                                         nir_def *desc_addr,
525                                         struct apply_pipeline_layout_state *state)
526 {
527    if (state->pdevice->isl_dev.buffer_length_in_aux_addr)
528       return build_optimized_load_render_surface_state_address(b, desc_addr, state);
529    /* Wa_14019708328 */
530    return build_non_optimized_load_render_surface_state_address(b, desc_addr, state);
531 }
532 
533 /* Load the depth of a 3D storage image.
534  *
535  * Either by reading the indirect descriptor value, or reading the value from
536  * RENDER_SURFACE_STATE.
537  *
538  * This is necessary for VK_EXT_image_sliced_view_of_3d.
539  */
540 static nir_def *
build_load_storage_3d_image_depth(nir_builder * b,nir_def * desc_addr,nir_def * resinfo_depth,struct apply_pipeline_layout_state * state)541 build_load_storage_3d_image_depth(nir_builder *b,
542                                   nir_def *desc_addr,
543                                   nir_def *resinfo_depth,
544                                   struct apply_pipeline_layout_state *state)
545 
546 {
547    const struct intel_device_info *devinfo = &state->pdevice->info;
548 
549    if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
550       return build_load_descriptor_mem(
551          b, desc_addr,
552          offsetof(struct anv_storage_image_descriptor, image_depth),
553          1, 32, state);
554    } else {
555       nir_def *data = build_load_descriptor_mem(
556          b, desc_addr,
557          RENDER_SURFACE_STATE_RenderTargetViewExtent_start(devinfo) / 8,
558          1, 32, state);
559       nir_def *depth =
560          nir_ushr_imm(
561             b, data,
562             RENDER_SURFACE_STATE_RenderTargetViewExtent_start(devinfo) % 32);
563       depth = nir_iand_imm(
564          b, depth,
565          (1u << RENDER_SURFACE_STATE_RenderTargetViewExtent_bits(devinfo)) - 1);
566       depth = nir_iadd_imm(b, depth, 1);
567 
568       /* Return the minimum between the RESINFO value and the
569        * RENDER_SURFACE_STATE::RenderTargetViewExtent value.
570        *
571        * Both are expressed for the current view LOD, but in the case of a
572        * SURFTYPE_NULL, RESINFO will return the right value, while the -1
573        * value in RENDER_SURFACE_STATE should be ignored.
574        */
575       return nir_umin(b, resinfo_depth, depth);
576    }
577 }
578 
579 static nir_def *
build_load_desc_set_dynamic_index(nir_builder * b,unsigned set_idx)580 build_load_desc_set_dynamic_index(nir_builder *b, unsigned set_idx)
581 {
582    return nir_iand_imm(
583       b,
584       anv_load_driver_uniform(b, 1, desc_surface_offsets[set_idx]),
585       ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
586 }
587 
588 static nir_def *
build_load_desc_address(nir_builder * b,nir_def * set_idx,unsigned set_idx_imm,const struct apply_pipeline_layout_state * state)589 build_load_desc_address(nir_builder *b, nir_def *set_idx, unsigned set_idx_imm,
590                         const struct apply_pipeline_layout_state *state)
591 {
592    nir_def *desc_offset = set_idx != NULL ?
593       anv_load_driver_uniform_indexed(b, 1, desc_surface_offsets, set_idx) :
594       anv_load_driver_uniform(b, 1, desc_surface_offsets[set_idx_imm]);
595    desc_offset = nir_iand_imm(b, desc_offset, ANV_DESCRIPTOR_SET_OFFSET_MASK);
596    if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER &&
597        !state->pdevice->uses_ex_bso) {
598       nir_def *bindless_base_offset =
599          anv_load_driver_uniform(b, 1, surfaces_base_offset);
600       desc_offset = nir_iadd(b, bindless_base_offset, desc_offset);
601    }
602    return nir_pack_64_2x32_split(
603       b, desc_offset,
604       nir_load_reloc_const_intel(
605          b,
606          state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER ?
607          BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH :
608          BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH));
609 }
610 
611 /** Build a Vulkan resource index
612  *
613  * A "resource index" is the term used by our SPIR-V parser and the relevant
614  * NIR intrinsics for a reference into a descriptor set.  It acts much like a
615  * deref in NIR except that it accesses opaque descriptors instead of memory.
616  *
617  * Coming out of SPIR-V, both the resource indices (in the form of
618  * vulkan_resource_[re]index intrinsics) and the memory derefs (in the form
619  * of nir_deref_instr) use the same vector component/bit size.  The meaning
620  * of those values for memory derefs (nir_deref_instr) is given by the
621  * nir_address_format associated with the descriptor type.  For resource
622  * indices, it's an entirely internal to ANV encoding which describes, in some
623  * sense, the address of the descriptor.  Thanks to the NIR/SPIR-V rules, it
624  * must be packed into the same size SSA values as a memory address.  For this
625  * reason, the actual encoding may depend both on the address format for
626  * memory derefs and the descriptor address format.
627  *
628  * The load_vulkan_descriptor intrinsic exists to provide a transition point
629  * between these two forms of derefs: descriptor and memory.
630  */
631 static nir_def *
build_res_index(nir_builder * b,uint32_t set,uint32_t binding,nir_def * array_index,struct apply_pipeline_layout_state * state)632 build_res_index(nir_builder *b,
633                 uint32_t set, uint32_t binding,
634                 nir_def *array_index,
635                 struct apply_pipeline_layout_state *state)
636 {
637    const struct anv_descriptor_set_binding_layout *bind_layout =
638       &state->layout->set[set].layout->binding[binding];
639 
640    uint32_t array_size = bind_layout->array_size;
641 
642    uint32_t set_idx;
643    switch (state->desc_addr_format) {
644    case nir_address_format_64bit_global_32bit_offset:
645       /* Descriptor set buffer accesses will go through A64 messages, so the
646        * index to get the descriptor set buffer address is located in the
647        * anv_push_constants::desc_surface_offsets and it's indexed by the set
648        * number.
649        */
650       set_idx = set;
651       break;
652 
653    case nir_address_format_32bit_index_offset:
654       /* Descriptor set buffer accesses will go through the binding table. The
655        * offset is the entry in the binding table.
656        */
657       assert(state->set[set].desc_offset < MAX_BINDING_TABLE_SIZE);
658       set_idx = state->set[set].desc_offset;
659       break;
660 
661    default:
662       unreachable("Unsupported address format");
663    }
664 
665    assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS);
666       nir_def *dynamic_offset_index;
667       if (bind_layout->dynamic_offset_index >= 0) {
668          if (state->has_independent_sets) {
669             nir_def *dynamic_offset_start =
670                build_load_desc_set_dynamic_index(b, set);
671             dynamic_offset_index =
672                nir_iadd_imm(b, dynamic_offset_start,
673                             bind_layout->dynamic_offset_index);
674          } else {
675             dynamic_offset_index =
676                nir_imm_int(b,
677                            state->layout->set[set].dynamic_offset_start +
678                            bind_layout->dynamic_offset_index);
679          }
680       } else {
681          dynamic_offset_index = nir_imm_int(b, 0xff); /* No dynamic offset */
682       }
683 
684    const uint32_t desc_bti = state->set[set].binding[binding].surface_offset;
685    /* We don't care about the stride field for inline uniforms (see
686     * build_desc_addr_for_res_index), but for anything else we should be
687     * aligned to 8 bytes because we store a multiple of 8 in the packed info
688     * to be able to encode a stride up to 2040 (8 * 255).
689     */
690    assert(bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
691           bind_layout->descriptor_surface_stride % 8 == 0);
692    const uint32_t desc_stride =
693       bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ? 0 :
694       bind_layout->descriptor_surface_stride / 8;
695 
696       nir_def *packed =
697          nir_ior_imm(b,
698                      dynamic_offset_index,
699                      (desc_stride << 24) |
700                      (desc_bti << 16)    |
701                      (set_idx << 8));
702 
703 
704    return nir_vec4(b, packed,
705                       nir_imm_int(b, bind_layout->descriptor_surface_offset),
706                       nir_imm_int(b, array_size - 1),
707                       array_index);
708 }
709 
710 struct res_index_defs {
711    nir_def *bti_idx;
712    nir_def *set_idx;
713    nir_def *dyn_offset_base;
714    nir_def *desc_offset_base;
715    nir_def *array_index;
716    nir_def *desc_stride;
717 };
718 
719 static struct res_index_defs
unpack_res_index(nir_builder * b,nir_def * index)720 unpack_res_index(nir_builder *b, nir_def *index)
721 {
722    struct res_index_defs defs;
723 
724    nir_def *packed = nir_channel(b, index, 0);
725    defs.desc_stride =
726       nir_imul_imm(b, nir_extract_u8(b, packed, nir_imm_int(b, 3)), 8);
727    defs.bti_idx = nir_extract_u8(b, packed, nir_imm_int(b, 2));
728    defs.set_idx = nir_extract_u8(b, packed, nir_imm_int(b, 1));
729    defs.dyn_offset_base = nir_extract_u8(b, packed, nir_imm_int(b, 0));
730 
731    defs.desc_offset_base = nir_channel(b, index, 1);
732    defs.array_index = nir_channel(b, index, 3);
733 
734    return defs;
735 }
736 
737 /** Whether a surface is accessed through the bindless surface state heap */
738 static bool
is_binding_bindless(unsigned set,unsigned binding,bool sampler,const struct apply_pipeline_layout_state * state)739 is_binding_bindless(unsigned set, unsigned binding, bool sampler,
740                     const struct apply_pipeline_layout_state *state)
741 {
742    /* Has binding table entry has been allocated for this binding? */
743    if (sampler &&
744        state->set[set].binding[binding].sampler_offset != BINDLESS_OFFSET)
745       return false;
746    if (!sampler &&
747        state->set[set].binding[binding].surface_offset != BINDLESS_OFFSET)
748       return false;
749 
750    return true;
751 }
752 
753 /** Adjust a Vulkan resource index
754  *
755  * This is the equivalent of nir_deref_type_ptr_as_array for resource indices.
756  * For array descriptors, it allows us to adjust the array index.  Thanks to
757  * variable pointers, we cannot always fold this re-index operation into the
758  * vulkan_resource_index intrinsic and we have to do it based on nothing but
759  * the address format.
760  */
761 static nir_def *
build_res_reindex(nir_builder * b,nir_def * orig,nir_def * delta)762 build_res_reindex(nir_builder *b, nir_def *orig, nir_def *delta)
763 {
764    return nir_vec4(b, nir_channel(b, orig, 0),
765                       nir_channel(b, orig, 1),
766                       nir_channel(b, orig, 2),
767                       nir_iadd(b, nir_channel(b, orig, 3), delta));
768 }
769 
770 /** Get the address for a descriptor given its resource index
771  *
772  * Because of the re-indexing operations, we can't bounds check descriptor
773  * array access until we have the final index.  That means we end up doing the
774  * bounds check here, if needed.  See unpack_res_index() for more details.
775  *
776  * This function takes both a bind_layout and a desc_type which are used to
777  * determine the descriptor stride for array descriptors.  The bind_layout is
778  * optional for buffer descriptor types.
779  */
780 static nir_def *
build_desc_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)781 build_desc_addr_for_res_index(nir_builder *b,
782                               const VkDescriptorType desc_type,
783                               nir_def *index, nir_address_format addr_format,
784                               struct apply_pipeline_layout_state *state)
785 {
786    struct res_index_defs res = unpack_res_index(b, index);
787 
788    nir_def *desc_offset = res.desc_offset_base;
789    if (desc_type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
790       /* Compute the actual descriptor offset.  For inline uniform blocks,
791        * the array index is ignored as they are only allowed to be a single
792        * descriptor (not an array) and there is no concept of a "stride".
793        *
794        */
795       desc_offset =
796          nir_iadd(b, desc_offset, nir_imul(b, res.array_index, res.desc_stride));
797    }
798 
799    switch (addr_format) {
800    case nir_address_format_64bit_global_32bit_offset:
801    case nir_address_format_64bit_bounded_global: {
802       switch (state->desc_addr_format) {
803       case nir_address_format_64bit_global_32bit_offset: {
804          nir_def *base_addr =
805             build_load_desc_address(b, res.set_idx, 0, state);
806          return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
807                             nir_unpack_64_2x32_split_y(b, base_addr),
808                             nir_imm_int(b, UINT32_MAX),
809                             desc_offset);
810       }
811 
812       case nir_address_format_32bit_index_offset:
813          return nir_vec2(b, res.set_idx, desc_offset);
814 
815       default:
816          unreachable("Unhandled address format");
817       }
818    }
819 
820    case nir_address_format_32bit_index_offset:
821       assert(desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK);
822       assert(state->desc_addr_format == nir_address_format_32bit_index_offset);
823       return nir_vec2(b, res.set_idx, desc_offset);
824 
825    default:
826       unreachable("Unhandled address format");
827    }
828 }
829 
830 static nir_def *
build_desc_addr_for_binding(nir_builder * b,unsigned set,unsigned binding,nir_def * array_index,unsigned plane,const struct apply_pipeline_layout_state * state)831 build_desc_addr_for_binding(nir_builder *b,
832                             unsigned set, unsigned binding,
833                             nir_def *array_index, unsigned plane,
834                             const struct apply_pipeline_layout_state *state)
835 {
836    const struct anv_descriptor_set_binding_layout *bind_layout =
837       &state->layout->set[set].layout->binding[binding];
838 
839    switch (state->desc_addr_format) {
840    case nir_address_format_64bit_global_32bit_offset:
841    case nir_address_format_64bit_bounded_global: {
842       nir_def *set_addr = build_load_desc_address(b, NULL, set, state);
843       nir_def *desc_offset =
844          nir_iadd_imm(b,
845                       nir_imul_imm(b,
846                                    array_index,
847                                    bind_layout->descriptor_surface_stride),
848                       bind_layout->descriptor_surface_offset);
849       if (plane != 0) {
850          desc_offset = nir_iadd_imm(
851             b, desc_offset, plane * bind_layout->descriptor_data_surface_size);
852       }
853 
854       return nir_vec4(b, nir_unpack_64_2x32_split_x(b, set_addr),
855                          nir_unpack_64_2x32_split_y(b, set_addr),
856                          nir_imm_int(b, UINT32_MAX),
857                          desc_offset);
858    }
859 
860    case nir_address_format_32bit_index_offset: {
861       nir_def *desc_offset =
862          nir_iadd_imm(b,
863                       nir_imul_imm(b,
864                                    array_index,
865                                    bind_layout->descriptor_surface_stride),
866                       bind_layout->descriptor_surface_offset);
867       if (plane != 0) {
868          desc_offset = nir_iadd_imm(
869             b, desc_offset, plane * bind_layout->descriptor_data_surface_size);
870       }
871       return nir_vec2(b,
872                       nir_imm_int(b, state->set[set].desc_offset),
873                       desc_offset);
874    }
875 
876    default:
877       unreachable("Unhandled address format");
878    }
879 }
880 
881 static unsigned
binding_descriptor_offset(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * bind_layout,bool sampler)882 binding_descriptor_offset(const struct apply_pipeline_layout_state *state,
883                           const struct anv_descriptor_set_binding_layout *bind_layout,
884                           bool sampler)
885 {
886    if (sampler &&
887        state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT)
888       return bind_layout->descriptor_sampler_offset;
889 
890    return bind_layout->descriptor_surface_offset;
891 }
892 
893 static unsigned
binding_descriptor_stride(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * bind_layout,bool sampler)894 binding_descriptor_stride(const struct apply_pipeline_layout_state *state,
895                           const struct anv_descriptor_set_binding_layout *bind_layout,
896                           bool sampler)
897 {
898    if (sampler &&
899        state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT)
900       return bind_layout->descriptor_sampler_stride;
901 
902    return bind_layout->descriptor_surface_stride;
903 }
904 
905 static nir_def *
build_surface_index_for_binding(nir_builder * b,unsigned set,unsigned binding,nir_def * array_index,unsigned plane,bool non_uniform,const struct apply_pipeline_layout_state * state)906 build_surface_index_for_binding(nir_builder *b,
907                                 unsigned set, unsigned binding,
908                                 nir_def *array_index,
909                                 unsigned plane,
910                                 bool non_uniform,
911                                 const struct apply_pipeline_layout_state *state)
912 {
913    const struct anv_descriptor_set_binding_layout *bind_layout =
914       &state->layout->set[set].layout->binding[binding];
915    const unsigned descriptor_offset =
916       binding_descriptor_offset(state, bind_layout, false /* sampler */);
917    const unsigned descriptor_stride =
918       binding_descriptor_stride(state, bind_layout, false /* sampler */);
919    const bool is_bindless =
920       is_binding_bindless(set, binding, false /* sampler */, state);
921 
922    nir_def *set_offset, *surface_index;
923    if (is_bindless) {
924       if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
925          set_offset = nir_imm_int(b, 0xdeaddead);
926 
927          nir_def *desc_addr =
928             build_desc_addr_for_binding(b, set, binding, array_index,
929                                         plane, state);
930 
931          surface_index =
932             build_load_descriptor_mem(b, desc_addr, 0, 1, 32, state);
933       } else {
934          set_offset = anv_load_driver_uniform(b, 1, desc_surface_offsets[set]);
935 
936          /* With bindless indexes are offsets in the descriptor buffer */
937          surface_index =
938             nir_iadd_imm(b,
939                          nir_imul_imm(b, array_index, descriptor_stride),
940                          descriptor_offset);
941          if (plane != 0) {
942             assert(plane < bind_layout->max_plane_count);
943             surface_index = nir_iadd_imm(b, surface_index,
944                                          plane * (descriptor_stride /
945                                                   bind_layout->max_plane_count));
946          }
947 
948          assert(descriptor_offset % 64 == 0);
949          assert(descriptor_stride % 64 == 0);
950       }
951    } else {
952       /* Unused */
953       set_offset = nir_imm_int(b, 0xdeaddead);
954 
955       unsigned bti_stride = bti_multiplier(state, set, binding);
956       assert(bti_stride >= 1);
957 
958       /* For Ycbcr descriptors, add the plane offset */
959       unsigned element_index = plane;
960 
961       /* With the binding table, it's an index in the table */
962       surface_index =
963          nir_iadd_imm(b, nir_imul_imm(b, array_index, bti_stride),
964                          state->set[set].binding[binding].surface_offset + element_index);
965       assert(state->set[set].binding[binding].surface_offset < MAX_BINDING_TABLE_SIZE);
966    }
967 
968    return nir_resource_intel(b,
969                              set_offset,
970                              surface_index,
971                              array_index,
972                              nir_imm_int(b, 0) /* bindless_base_offset */,
973                              .desc_set = set,
974                              .binding = binding,
975                              .resource_block_intel = state->set[set].binding[binding].push_block,
976                              .resource_access_intel =
977                                 (is_bindless ? nir_resource_intel_bindless : 0) |
978                                 (non_uniform ? nir_resource_intel_non_uniform : 0) |
979                                 ((state->set[set].binding[binding].properties &
980                                   BINDING_PROPERTY_PUSHABLE) ? nir_resource_intel_pushable : 0));
981 }
982 
983 static nir_def *
build_sampler_handle_for_binding(nir_builder * b,unsigned set,unsigned binding,nir_def * array_index,unsigned plane,bool non_uniform,const struct apply_pipeline_layout_state * state)984 build_sampler_handle_for_binding(nir_builder *b,
985                                  unsigned set, unsigned binding,
986                                  nir_def *array_index,
987                                  unsigned plane,
988                                  bool non_uniform,
989                                  const struct apply_pipeline_layout_state *state)
990 {
991    const struct anv_descriptor_set_binding_layout *bind_layout =
992       &state->layout->set[set].layout->binding[binding];
993    const unsigned descriptor_offset =
994       binding_descriptor_offset(state, bind_layout, true /* sampler */);
995    const unsigned descriptor_stride =
996       binding_descriptor_stride(state, bind_layout, true /* sampler */);
997    const bool is_embedded =
998       state->set[set].binding[binding].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER;
999    const bool is_bindless =
1000       is_binding_bindless(set, binding, true /* sampler */, state);
1001    nir_def *set_offset, *sampler_index, *sampler_base_offset = nir_imm_int(b, 0);
1002 
1003    if (is_embedded) {
1004       set_offset = nir_imm_int(b, 0xdeaddead);
1005       sampler_index = nir_load_reloc_const_intel(
1006          b, BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE +
1007          state->set[set].binding[binding].embedded_sampler_index);
1008    } else if (is_bindless) {
1009       if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
1010          set_offset = nir_imm_int(b, 0xdeaddead);
1011 
1012          nir_def *desc_addr =
1013             build_desc_addr_for_binding(b, set, binding, array_index,
1014                                         plane, state);
1015 
1016          /* This is anv_sampled_image_descriptor, the sampler handle is always
1017           * in component 1.
1018           */
1019          nir_def *desc_data =
1020             build_load_descriptor_mem(b, desc_addr, 0, 2, 32, state);
1021 
1022          sampler_index = nir_channel(b, desc_data, 1);
1023       } else {
1024          set_offset = anv_load_driver_uniform(b, 1, desc_sampler_offsets[set]);
1025 
1026          uint32_t base_offset = descriptor_offset;
1027 
1028          /* The SAMPLER_STATE can only be located at a 64 byte in the combined
1029           * image/sampler case. Combined image/sampler is not supported to be
1030           * used with mutable descriptor types.
1031           */
1032          if (bind_layout->data & ANV_DESCRIPTOR_SURFACE_SAMPLER)
1033             base_offset += ANV_SURFACE_STATE_SIZE;
1034 
1035          if (plane != 0) {
1036             assert(plane < bind_layout->max_plane_count);
1037             base_offset += plane * (descriptor_stride /
1038                                     bind_layout->max_plane_count);
1039          }
1040 
1041          sampler_index =
1042             nir_iadd_imm(b,
1043                          nir_imul_imm(b, array_index, descriptor_stride),
1044                          base_offset);
1045       }
1046    } else {
1047       /* Unused */
1048       set_offset = nir_imm_int(b, 0xdeaddead);
1049 
1050       sampler_index =
1051          nir_iadd_imm(b, array_index,
1052                       state->set[set].binding[binding].sampler_offset + plane);
1053    }
1054 
1055    nir_resource_data_intel sampler_resource = nir_resource_intel_sampler;
1056    if (is_bindless)
1057       sampler_resource |= nir_resource_intel_bindless;
1058    if (is_embedded)
1059       sampler_resource |= nir_resource_intel_sampler_embedded;
1060    if (non_uniform)
1061       sampler_resource |= nir_resource_intel_non_uniform;
1062 
1063    return nir_resource_intel(b,
1064                              set_offset,
1065                              sampler_index,
1066                              array_index,
1067                              sampler_base_offset,
1068                              .desc_set = set,
1069                              .binding = binding,
1070                              .resource_access_intel = sampler_resource);
1071 }
1072 
1073 static nir_def *
build_buffer_dynamic_offset_for_res_index(nir_builder * b,nir_def * dyn_offset_base,nir_def * array_index,struct apply_pipeline_layout_state * state)1074 build_buffer_dynamic_offset_for_res_index(nir_builder *b,
1075                                           nir_def *dyn_offset_base,
1076                                           nir_def *array_index,
1077                                           struct apply_pipeline_layout_state *state)
1078 {
1079    nir_def *dyn_offset_idx = nir_iadd(b, dyn_offset_base, array_index);
1080 
1081    nir_def *dyn_load =
1082       anv_load_driver_uniform_indexed(b, 1, dynamic_offsets, dyn_offset_idx);
1083 
1084    return nir_bcsel(b, nir_ieq_imm(b, dyn_offset_base, 0xff),
1085                        nir_imm_int(b, 0), dyn_load);
1086 }
1087 
1088 /** Convert a Vulkan resource index into a buffer address
1089  *
1090  * In some cases, this does a  memory load from the descriptor set and, in
1091  * others, it simply converts from one form to another.
1092  *
1093  * See build_res_index for details about each resource index format.
1094  */
1095 static nir_def *
build_indirect_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1096 build_indirect_buffer_addr_for_res_index(nir_builder *b,
1097                                          const VkDescriptorType desc_type,
1098                                          nir_def *res_index,
1099                                          nir_address_format addr_format,
1100                                          struct apply_pipeline_layout_state *state)
1101 {
1102    struct res_index_defs res = unpack_res_index(b, res_index);
1103 
1104    if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1105       assert(addr_format == state->desc_addr_format);
1106       return build_desc_addr_for_res_index(b, desc_type, res_index,
1107                                            addr_format, state);
1108    } else if (addr_format == nir_address_format_32bit_index_offset) {
1109       return nir_vec2(b, nir_iadd(b, res.bti_idx, res.array_index),
1110                          nir_imm_int(b, 0));
1111    }
1112 
1113    nir_def *desc_addr =
1114       build_desc_addr_for_res_index(b, desc_type, res_index,
1115                                     addr_format, state);
1116 
1117    nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
1118 
1119    if (state->has_dynamic_buffers) {
1120       /* This shader has dynamic offsets and we have no way of knowing
1121        * (save from the dynamic offset base index) if this buffer has a
1122        * dynamic offset.
1123        */
1124       nir_def *dyn_offset_idx =
1125          nir_iadd(b, res.dyn_offset_base, res.array_index);
1126 
1127       nir_def *dyn_load =
1128          anv_load_driver_uniform_indexed(b, 1, dynamic_offsets, dyn_offset_idx);
1129 
1130       nir_def *dynamic_offset =
1131          nir_bcsel(b, nir_ieq_imm(b, res.dyn_offset_base, 0xff),
1132                       nir_imm_int(b, 0), dyn_load);
1133 
1134       /* The dynamic offset gets added to the base pointer so that we
1135        * have a sliding window range.
1136        */
1137       nir_def *base_ptr =
1138          nir_pack_64_2x32(b, nir_trim_vector(b, desc, 2));
1139       base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
1140       desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
1141                          nir_unpack_64_2x32_split_y(b, base_ptr),
1142                          nir_channel(b, desc, 2),
1143                          nir_channel(b, desc, 3));
1144    }
1145 
1146    /* The last element of the vec4 is always zero.
1147     *
1148     * See also struct anv_address_range_descriptor
1149     */
1150    return nir_vec4(b, nir_channel(b, desc, 0),
1151                       nir_channel(b, desc, 1),
1152                       nir_channel(b, desc, 2),
1153                       nir_imm_int(b, 0));
1154 }
1155 
1156 static nir_def *
build_direct_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1157 build_direct_buffer_addr_for_res_index(nir_builder *b,
1158                                        const VkDescriptorType desc_type,
1159                                        nir_def *res_index,
1160                                        nir_address_format addr_format,
1161                                        struct apply_pipeline_layout_state *state)
1162 {
1163    if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1164       assert(addr_format == state->desc_addr_format);
1165       return build_desc_addr_for_res_index(b, desc_type, res_index,
1166                                            addr_format, state);
1167    } else if (addr_format == nir_address_format_32bit_index_offset) {
1168       struct res_index_defs res = unpack_res_index(b, res_index);
1169 
1170       return nir_vec2(b, nir_iadd(b, res.desc_offset_base,
1171                                   nir_imul(b, res.array_index, res.desc_stride)),
1172                       nir_imm_int(b, 0));
1173    }
1174 
1175    nir_def *desc_addr =
1176       build_desc_addr_for_res_index(b, desc_type, res_index,
1177                                     addr_format, state);
1178 
1179    nir_def *addr =
1180       build_load_render_surface_state_address(b, desc_addr, state);
1181 
1182    if (state->has_dynamic_buffers) {
1183       struct res_index_defs res = unpack_res_index(b, res_index);
1184 
1185       /* This shader has dynamic offsets and we have no way of knowing (save
1186        * from the dynamic offset base index) if this buffer has a dynamic
1187        * offset.
1188        */
1189       nir_def *dynamic_offset =
1190          build_buffer_dynamic_offset_for_res_index(
1191             b, res.dyn_offset_base, res.array_index, state);
1192 
1193       /* The dynamic offset gets added to the base pointer so that we
1194        * have a sliding window range.
1195        */
1196       nir_def *base_ptr =
1197          nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1198       base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
1199       addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
1200                          nir_unpack_64_2x32_split_y(b, base_ptr),
1201                          nir_channel(b, addr, 2),
1202                          nir_channel(b, addr, 3));
1203    }
1204 
1205    /* The last element of the vec4 is always zero.
1206     *
1207     * See also struct anv_address_range_descriptor
1208     */
1209    return nir_vec4(b, nir_channel(b, addr, 0),
1210                       nir_channel(b, addr, 1),
1211                       nir_channel(b, addr, 2),
1212                       nir_imm_int(b, 0));
1213 }
1214 
1215 static nir_def *
build_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1216 build_buffer_addr_for_res_index(nir_builder *b,
1217                                 const VkDescriptorType desc_type,
1218                                 nir_def *res_index,
1219                                 nir_address_format addr_format,
1220                                 struct apply_pipeline_layout_state *state)
1221 {
1222    if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT)
1223       return build_indirect_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
1224    else
1225       return build_direct_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
1226 }
1227 
1228 static nir_def *
build_buffer_addr_for_binding(nir_builder * b,const VkDescriptorType desc_type,unsigned set,unsigned binding,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1229 build_buffer_addr_for_binding(nir_builder *b,
1230                               const VkDescriptorType desc_type,
1231                               unsigned set,
1232                               unsigned binding,
1233                               nir_def *res_index,
1234                               nir_address_format addr_format,
1235                               struct apply_pipeline_layout_state *state)
1236 {
1237    if (addr_format != nir_address_format_32bit_index_offset)
1238       return build_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
1239 
1240    if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1241       const struct anv_descriptor_set_binding_layout *bind_layout =
1242          &state->layout->set[set].layout->binding[binding];
1243       return nir_vec2(b,
1244                       nir_imm_int(b, state->set[set].desc_offset),
1245                       nir_imm_int(b, bind_layout->descriptor_surface_offset));
1246    }
1247 
1248    struct res_index_defs res = unpack_res_index(b, res_index);
1249 
1250    return nir_vec2(b,
1251                    build_surface_index_for_binding(b, set, binding, res.array_index,
1252                                                    0 /* plane */,
1253                                                    false /* non_uniform */,
1254                                                    state),
1255                    nir_imm_int(b, 0));
1256 }
1257 
1258 /** Loads descriptor memory for a variable-based deref chain
1259  *
1260  * The deref chain has to terminate at a variable with a descriptor_set and
1261  * binding set.  This is used for images, textures, and samplers.
1262  */
1263 static nir_def *
build_load_var_deref_surface_handle(nir_builder * b,nir_deref_instr * deref,bool non_uniform,bool * out_is_bindless,struct apply_pipeline_layout_state * state)1264 build_load_var_deref_surface_handle(nir_builder *b, nir_deref_instr *deref,
1265                                     bool non_uniform,
1266                                     bool *out_is_bindless,
1267                                     struct apply_pipeline_layout_state *state)
1268 {
1269    nir_variable *var = nir_deref_instr_get_variable(deref);
1270 
1271    const uint32_t set = var->data.descriptor_set;
1272    const uint32_t binding = var->data.binding;
1273 
1274    *out_is_bindless =
1275       is_binding_bindless(set, binding, false /* sampler */, state);
1276 
1277    nir_def *array_index;
1278    if (deref->deref_type != nir_deref_type_var) {
1279       assert(deref->deref_type == nir_deref_type_array);
1280       assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
1281       array_index = deref->arr.index.ssa;
1282    } else {
1283       array_index = nir_imm_int(b, 0);
1284    }
1285 
1286    return build_surface_index_for_binding(b, set, binding, array_index,
1287                                           0 /* plane */, non_uniform, state);
1288 }
1289 
1290 /** A recursive form of build_res_index()
1291  *
1292  * This recursively walks a resource [re]index chain and builds the resource
1293  * index.  It places the new code with the resource [re]index operation in the
1294  * hopes of better CSE.  This means the cursor is not where you left it when
1295  * this function returns.
1296  */
1297 static nir_def *
build_res_index_for_chain(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format,uint32_t * set,uint32_t * binding,struct apply_pipeline_layout_state * state)1298 build_res_index_for_chain(nir_builder *b, nir_intrinsic_instr *intrin,
1299                           nir_address_format addr_format,
1300                           uint32_t *set, uint32_t *binding,
1301                           struct apply_pipeline_layout_state *state)
1302 {
1303    if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
1304       b->cursor = nir_before_instr(&intrin->instr);
1305       *set = nir_intrinsic_desc_set(intrin);
1306       *binding = nir_intrinsic_binding(intrin);
1307       return build_res_index(b, *set, *binding, intrin->src[0].ssa, state);
1308    } else {
1309       assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex);
1310       nir_intrinsic_instr *parent = nir_src_as_intrinsic(intrin->src[0]);
1311       nir_def *index =
1312          build_res_index_for_chain(b, parent, addr_format,
1313                                    set, binding, state);
1314 
1315       b->cursor = nir_before_instr(&intrin->instr);
1316 
1317       return build_res_reindex(b, index, intrin->src[1].ssa);
1318    }
1319 }
1320 
1321 /** Builds a buffer address for a given vulkan [re]index intrinsic
1322  *
1323  * The cursor is not where you left it when this function returns.
1324  */
1325 static nir_def *
build_buffer_addr_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * idx_intrin,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1326 build_buffer_addr_for_idx_intrin(nir_builder *b,
1327                                  nir_intrinsic_instr *idx_intrin,
1328                                  nir_address_format addr_format,
1329                                  struct apply_pipeline_layout_state *state)
1330 {
1331    uint32_t set = UINT32_MAX, binding = UINT32_MAX;
1332    nir_def *res_index =
1333       build_res_index_for_chain(b, idx_intrin, addr_format,
1334                                 &set, &binding, state);
1335 
1336    const struct anv_descriptor_set_binding_layout *bind_layout =
1337       &state->layout->set[set].layout->binding[binding];
1338 
1339    return build_buffer_addr_for_binding(b, bind_layout->type,
1340                                         set, binding, res_index,
1341                                         addr_format, state);
1342 }
1343 
1344 /** Builds a buffer address for deref chain
1345  *
1346  * This assumes that you can chase the chain all the way back to the original
1347  * vulkan_resource_index intrinsic.
1348  *
1349  * The cursor is not where you left it when this function returns.
1350  */
1351 static nir_def *
build_buffer_addr_for_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1352 build_buffer_addr_for_deref(nir_builder *b, nir_deref_instr *deref,
1353                             nir_address_format addr_format,
1354                             struct apply_pipeline_layout_state *state)
1355 {
1356    nir_deref_instr *parent = nir_deref_instr_parent(deref);
1357    if (parent) {
1358       nir_def *addr =
1359          build_buffer_addr_for_deref(b, parent, addr_format, state);
1360 
1361       b->cursor = nir_before_instr(&deref->instr);
1362       return nir_explicit_io_address_from_deref(b, deref, addr, addr_format);
1363    }
1364 
1365    nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
1366    assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
1367 
1368    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
1369 
1370    b->cursor = nir_before_instr(&deref->instr);
1371 
1372    return build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state);
1373 }
1374 
1375 static bool
try_lower_direct_buffer_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,bool is_atomic,struct apply_pipeline_layout_state * state)1376 try_lower_direct_buffer_intrinsic(nir_builder *b,
1377                                   nir_intrinsic_instr *intrin, bool is_atomic,
1378                                   struct apply_pipeline_layout_state *state)
1379 {
1380    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1381    if (!nir_deref_mode_is_one_of(deref, nir_var_mem_ubo | nir_var_mem_ssbo))
1382       return false;
1383 
1384    nir_intrinsic_instr *desc = nir_deref_find_descriptor(deref, state);
1385    if (desc == NULL) {
1386       /* We should always be able to find the descriptor for UBO access. */
1387       assert(nir_deref_mode_is_one_of(deref, nir_var_mem_ssbo));
1388       return false;
1389    }
1390 
1391    const unsigned set = nir_intrinsic_desc_set(desc);
1392    const unsigned binding = nir_intrinsic_binding(desc);
1393 
1394    const struct anv_descriptor_set_binding_layout *bind_layout =
1395       &state->layout->set[set].layout->binding[binding];
1396 
1397    nir_address_format addr_format = descriptor_address_format(desc, state);
1398 
1399    /* Although we could lower non uniform binding table accesses with
1400     * nir_opt_non_uniform_access, we might as well use an A64 message and
1401     * avoid the loops inserted by that lowering pass.
1402     */
1403    if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)
1404       return false;
1405 
1406    if (nir_deref_mode_is(deref, nir_var_mem_ssbo)) {
1407       /* 64-bit atomics only support A64 messages so we can't lower them to
1408        * the index+offset model.
1409        */
1410       if (is_atomic && intrin->def.bit_size == 64 &&
1411           !state->pdevice->info.has_lsc)
1412          return false;
1413 
1414       /* If we don't have a BTI for this binding and we're using indirect
1415        * descriptors, we'll use A64 messages. This is handled in the main
1416        * lowering path.
1417        */
1418       if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT &&
1419           !descriptor_has_bti(desc, state))
1420          return false;
1421 
1422       /* Rewrite to 32bit_index_offset whenever we can */
1423       addr_format = nir_address_format_32bit_index_offset;
1424    } else {
1425       assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
1426 
1427       /* If we don't have a BTI for this binding and we're using indirect
1428        * descriptors, we'll use A64 messages. This is handled in the main
1429        * lowering path.
1430        *
1431        * We make an exception for uniform blocks which are built from the
1432        * descriptor set base address + offset. There is no indirect data to
1433        * fetch.
1434        */
1435       if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT &&
1436           bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK &&
1437           !descriptor_has_bti(desc, state))
1438          return false;
1439 
1440       /* If this is an inline uniform and the shader stage is bindless, we
1441        * can't switch to 32bit_index_offset.
1442        */
1443       if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
1444           !brw_shader_stage_requires_bindless_resources(b->shader->info.stage))
1445          addr_format = nir_address_format_32bit_index_offset;
1446    }
1447 
1448    /* If a dynamic has not been assigned a binding table entry, we need to
1449     * bail here.
1450     */
1451    if ((bind_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
1452         bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) &&
1453        !descriptor_has_bti(desc, state))
1454       return false;
1455 
1456    nir_def *addr =
1457       build_buffer_addr_for_deref(b, deref, addr_format, state);
1458 
1459    b->cursor = nir_before_instr(&intrin->instr);
1460    nir_lower_explicit_io_instr(b, intrin, addr, addr_format);
1461 
1462    return true;
1463 }
1464 
1465 static bool
lower_load_accel_struct_desc(nir_builder * b,nir_intrinsic_instr * load_desc,struct apply_pipeline_layout_state * state)1466 lower_load_accel_struct_desc(nir_builder *b,
1467                              nir_intrinsic_instr *load_desc,
1468                              struct apply_pipeline_layout_state *state)
1469 {
1470    assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
1471 
1472    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
1473 
1474    /* It doesn't really matter what address format we choose as
1475     * everything will constant-fold nicely.  Choose one that uses the
1476     * actual descriptor buffer.
1477     */
1478    const nir_address_format addr_format =
1479       nir_address_format_64bit_bounded_global;
1480 
1481    uint32_t set = UINT32_MAX, binding = UINT32_MAX;
1482    nir_def *res_index =
1483       build_res_index_for_chain(b, idx_intrin, addr_format,
1484                                 &set, &binding, state);
1485 
1486    b->cursor = nir_before_instr(&load_desc->instr);
1487 
1488    struct res_index_defs res = unpack_res_index(b, res_index);
1489    nir_def *desc_addr =
1490       build_desc_addr_for_binding(b, set, binding, res.array_index,
1491                                   0 /* plane */, state);
1492 
1493    /* Acceleration structure descriptors are always uint64_t */
1494    nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 1, 64, state);
1495 
1496    assert(load_desc->def.bit_size == 64);
1497    assert(load_desc->def.num_components == 1);
1498    nir_def_replace(&load_desc->def, desc);
1499 
1500    return true;
1501 }
1502 
1503 static bool
lower_direct_buffer_instr(nir_builder * b,nir_instr * instr,void * _state)1504 lower_direct_buffer_instr(nir_builder *b, nir_instr *instr, void *_state)
1505 {
1506    struct apply_pipeline_layout_state *state = _state;
1507 
1508    if (instr->type != nir_instr_type_intrinsic)
1509       return false;
1510 
1511    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1512    switch (intrin->intrinsic) {
1513    case nir_intrinsic_load_deref:
1514    case nir_intrinsic_store_deref:
1515       return try_lower_direct_buffer_intrinsic(b, intrin, false, state);
1516 
1517    case nir_intrinsic_deref_atomic:
1518    case nir_intrinsic_deref_atomic_swap:
1519       return try_lower_direct_buffer_intrinsic(b, intrin, true, state);
1520 
1521    case nir_intrinsic_get_ssbo_size: {
1522       /* The get_ssbo_size intrinsic always just takes a
1523        * index/reindex intrinsic.
1524        */
1525       nir_intrinsic_instr *idx_intrin =
1526          find_descriptor_for_index_src(intrin->src[0], state);
1527       if (idx_intrin == NULL)
1528          return false;
1529 
1530       /* We just checked that this is a BTI descriptor */
1531       const nir_address_format addr_format =
1532          nir_address_format_32bit_index_offset;
1533 
1534       b->cursor = nir_before_instr(&intrin->instr);
1535 
1536       uint32_t set = UINT32_MAX, binding = UINT32_MAX;
1537       nir_def *res_index =
1538          build_res_index_for_chain(b, idx_intrin, addr_format,
1539                                    &set, &binding, state);
1540 
1541       bool non_uniform = nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM;
1542 
1543       nir_def *surface_index =
1544          build_surface_index_for_binding(b, set, binding,
1545                                          nir_channel(b, res_index, 3),
1546                                          0 /* plane */,
1547                                          non_uniform,
1548                                          state);
1549 
1550       nir_src_rewrite(&intrin->src[0], surface_index);
1551       _mesa_set_add(state->lowered_instrs, intrin);
1552       return true;
1553    }
1554 
1555    case nir_intrinsic_load_vulkan_descriptor:
1556       if (nir_intrinsic_desc_type(intrin) ==
1557           VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1558          return lower_load_accel_struct_desc(b, intrin, state);
1559       return false;
1560 
1561    default:
1562       return false;
1563    }
1564 }
1565 
1566 static bool
lower_res_index_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1567 lower_res_index_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1568                           struct apply_pipeline_layout_state *state)
1569 {
1570    b->cursor = nir_before_instr(&intrin->instr);
1571 
1572    nir_def *index =
1573       build_res_index(b, nir_intrinsic_desc_set(intrin),
1574                          nir_intrinsic_binding(intrin),
1575                          intrin->src[0].ssa,
1576                          state);
1577 
1578    assert(intrin->def.bit_size == index->bit_size);
1579    assert(intrin->def.num_components == index->num_components);
1580    nir_def_replace(&intrin->def, index);
1581 
1582    return true;
1583 }
1584 
1585 static bool
lower_res_reindex_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1586 lower_res_reindex_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1587                             struct apply_pipeline_layout_state *state)
1588 {
1589    b->cursor = nir_before_instr(&intrin->instr);
1590 
1591    nir_def *index =
1592       build_res_reindex(b, intrin->src[0].ssa,
1593                            intrin->src[1].ssa);
1594 
1595    assert(intrin->def.bit_size == index->bit_size);
1596    assert(intrin->def.num_components == index->num_components);
1597    nir_def_replace(&intrin->def, index);
1598 
1599    return true;
1600 }
1601 
1602 static bool
lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1603 lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
1604                              struct apply_pipeline_layout_state *state)
1605 {
1606    b->cursor = nir_before_instr(&intrin->instr);
1607 
1608    const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
1609    nir_address_format addr_format = addr_format_for_desc_type(desc_type, state);
1610 
1611    nir_def *desc =
1612       build_buffer_addr_for_res_index(b,
1613                                       desc_type, intrin->src[0].ssa,
1614                                       addr_format, state);
1615 
1616    assert(intrin->def.bit_size == desc->bit_size);
1617    assert(intrin->def.num_components == desc->num_components);
1618    nir_def_replace(&intrin->def, desc);
1619 
1620    return true;
1621 }
1622 
1623 static bool
lower_get_ssbo_size(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1624 lower_get_ssbo_size(nir_builder *b, nir_intrinsic_instr *intrin,
1625                     struct apply_pipeline_layout_state *state)
1626 {
1627    if (_mesa_set_search(state->lowered_instrs, intrin))
1628       return false;
1629 
1630    b->cursor = nir_before_instr(&intrin->instr);
1631 
1632    const nir_address_format addr_format =
1633       nir_address_format_64bit_bounded_global;
1634 
1635    nir_def *desc_addr =
1636       nir_build_addr_iadd_imm(
1637          b,
1638          build_desc_addr_for_res_index(b,
1639                                        VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1640                                        intrin->src[0].ssa,
1641                                        addr_format, state),
1642          addr_format,
1643          nir_var_mem_ssbo,
1644          state->pdevice->isl_dev.ss.size);
1645 
1646    nir_def *desc_range;
1647    if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
1648       /* Load the anv_address_range_descriptor */
1649       desc_range =
1650          build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
1651    } else {
1652       /* Build a vec4 similar to anv_address_range_descriptor using the
1653        * RENDER_SURFACE_STATE.
1654        */
1655       desc_range =
1656          build_load_render_surface_state_address(b, desc_addr, state);
1657    }
1658 
1659    nir_def *size = nir_channel(b, desc_range, 2);
1660    nir_def_replace(&intrin->def, size);
1661 
1662    return true;
1663 }
1664 
1665 static bool
lower_image_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1666 lower_image_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1667                       struct apply_pipeline_layout_state *state)
1668 {
1669    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1670 
1671    b->cursor = nir_before_instr(&intrin->instr);
1672 
1673    bool non_uniform = nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM;
1674    bool is_bindless;
1675    nir_def *handle =
1676       build_load_var_deref_surface_handle(b, deref, non_uniform,
1677                                           &is_bindless, state);
1678    nir_rewrite_image_intrinsic(intrin, handle, is_bindless);
1679 
1680    return true;
1681 }
1682 
1683 static bool
lower_image_size_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1684 lower_image_size_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1685                            struct apply_pipeline_layout_state *state)
1686 {
1687    if (nir_intrinsic_image_dim(intrin) != GLSL_SAMPLER_DIM_3D)
1688       return lower_image_intrinsic(b, intrin, state);
1689 
1690    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1691 
1692    b->cursor = nir_before_instr(&intrin->instr);
1693 
1694    bool non_uniform = nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM;
1695    bool is_bindless;
1696    nir_def *handle =
1697       build_load_var_deref_surface_handle(b, deref, non_uniform,
1698                                           &is_bindless, state);
1699    nir_rewrite_image_intrinsic(intrin, handle, is_bindless);
1700 
1701    nir_variable *var = nir_deref_instr_get_variable(deref);
1702    const uint32_t set = var->data.descriptor_set;
1703    const uint32_t binding = var->data.binding;
1704 
1705    nir_def *array_index;
1706    if (deref->deref_type != nir_deref_type_var) {
1707       assert(deref->deref_type == nir_deref_type_array);
1708       assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
1709       array_index = deref->arr.index.ssa;
1710    } else {
1711       array_index = nir_imm_int(b, 0);
1712    }
1713 
1714    nir_def *desc_addr = build_desc_addr_for_binding(
1715       b, set, binding, array_index, 0 /* plane */, state);
1716 
1717    b->cursor = nir_after_instr(&intrin->instr);
1718 
1719    nir_def *image_depth =
1720       build_load_storage_3d_image_depth(b, desc_addr,
1721                                         nir_channel(b, &intrin->def, 2),
1722                                         state);
1723 
1724    nir_def *comps[4] = {};
1725    for (unsigned c = 0; c < intrin->def.num_components; c++)
1726       comps[c] = c == 2 ? image_depth : nir_channel(b, &intrin->def, c);
1727 
1728    nir_def *vec = nir_vec(b, comps, intrin->def.num_components);
1729    nir_def_rewrite_uses_after(&intrin->def, vec, vec->parent_instr);
1730 
1731    return true;
1732 }
1733 
1734 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1735 lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin,
1736                     struct apply_pipeline_layout_state *state)
1737 {
1738    b->cursor = nir_instr_remove(&intrin->instr);
1739 
1740    /* Any constant-offset load_constant instructions should have been removed
1741     * by constant folding.
1742     */
1743    assert(!nir_src_is_const(intrin->src[0]));
1744    nir_def *offset = nir_iadd_imm(b, intrin->src[0].ssa,
1745                                       nir_intrinsic_base(intrin));
1746 
1747    unsigned load_size = intrin->def.num_components *
1748                         intrin->def.bit_size / 8;
1749    unsigned load_align = intrin->def.bit_size / 8;
1750 
1751    assert(load_size < b->shader->constant_data_size);
1752    unsigned max_offset = b->shader->constant_data_size - load_size;
1753    offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
1754 
1755    nir_def *const_data_addr = nir_pack_64_2x32_split(b,
1756       nir_iadd(b,
1757          nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
1758          offset),
1759       nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
1760 
1761    nir_def *data =
1762       nir_load_global_constant(b, const_data_addr,
1763                                load_align,
1764                                intrin->def.num_components,
1765                                intrin->def.bit_size);
1766 
1767    nir_def_rewrite_uses(&intrin->def, data);
1768 
1769    return true;
1770 }
1771 
1772 static bool
lower_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1773 lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
1774                         struct apply_pipeline_layout_state *state)
1775 {
1776    b->cursor = nir_instr_remove(&intrin->instr);
1777 
1778    nir_def *base_workgroup_id =
1779       anv_load_driver_uniform(b, 3, cs.base_work_group_id[0]);
1780    nir_def_rewrite_uses(&intrin->def, base_workgroup_id);
1781 
1782    return true;
1783 }
1784 
1785 static void
lower_tex_deref(nir_builder * b,nir_tex_instr * tex,nir_tex_src_type deref_src_type,unsigned base_index,unsigned plane,struct apply_pipeline_layout_state * state)1786 lower_tex_deref(nir_builder *b, nir_tex_instr *tex,
1787                 nir_tex_src_type deref_src_type,
1788                 unsigned base_index, unsigned plane,
1789                 struct apply_pipeline_layout_state *state)
1790 {
1791    int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
1792    if (deref_src_idx < 0)
1793       return;
1794 
1795    nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
1796    nir_variable *var = nir_deref_instr_get_variable(deref);
1797 
1798    const bool is_sampler = deref_src_type == nir_tex_src_sampler_deref;
1799    const unsigned set = var->data.descriptor_set;
1800    const unsigned binding = var->data.binding;
1801    const bool bindless = is_binding_bindless(set, binding, is_sampler, state);
1802 
1803    nir_def *array_index = NULL;
1804    if (deref->deref_type != nir_deref_type_var) {
1805       assert(deref->deref_type == nir_deref_type_array);
1806 
1807       array_index = deref->arr.index.ssa;
1808    } else {
1809       array_index = nir_imm_int(b, 0);
1810    }
1811 
1812    nir_tex_src_type offset_src_type;
1813    nir_def *index;
1814    if (deref_src_type == nir_tex_src_texture_deref) {
1815       index = build_surface_index_for_binding(b, set, binding, array_index,
1816                                               plane,
1817                                               tex->texture_non_uniform,
1818                                               state);
1819       offset_src_type = bindless ?
1820                         nir_tex_src_texture_handle :
1821                         nir_tex_src_texture_offset;
1822    } else {
1823       assert(deref_src_type == nir_tex_src_sampler_deref);
1824 
1825       index = build_sampler_handle_for_binding(b, set, binding, array_index,
1826                                                plane,
1827                                                tex->sampler_non_uniform,
1828                                                state);
1829       offset_src_type = bindless ?
1830                         nir_tex_src_sampler_handle :
1831                         nir_tex_src_sampler_offset;
1832    }
1833 
1834    nir_src_rewrite(&tex->src[deref_src_idx].src, index);
1835    tex->src[deref_src_idx].src_type = offset_src_type;
1836 }
1837 
1838 static uint32_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)1839 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
1840 {
1841    int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
1842    if (plane_src_idx < 0)
1843       return 0;
1844 
1845    unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src);
1846 
1847    nir_tex_instr_remove_src(tex, plane_src_idx);
1848 
1849    return plane;
1850 }
1851 
1852 static nir_def *
build_def_array_select(nir_builder * b,nir_def ** srcs,nir_def * idx,unsigned start,unsigned end)1853 build_def_array_select(nir_builder *b, nir_def **srcs, nir_def *idx,
1854                        unsigned start, unsigned end)
1855 {
1856    if (start == end - 1) {
1857       return srcs[start];
1858    } else {
1859       unsigned mid = start + (end - start) / 2;
1860       return nir_bcsel(b, nir_ilt_imm(b, idx, mid),
1861                        build_def_array_select(b, srcs, idx, start, mid),
1862                        build_def_array_select(b, srcs, idx, mid, end));
1863    }
1864 }
1865 
1866 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct apply_pipeline_layout_state * state)1867 lower_tex(nir_builder *b, nir_tex_instr *tex,
1868           struct apply_pipeline_layout_state *state)
1869 {
1870    unsigned plane = tex_instr_get_and_remove_plane_src(tex);
1871 
1872    b->cursor = nir_before_instr(&tex->instr);
1873 
1874    lower_tex_deref(b, tex, nir_tex_src_texture_deref,
1875                    tex->texture_index, plane, state);
1876    lower_tex_deref(b, tex, nir_tex_src_sampler_deref,
1877                    tex->sampler_index, plane, state);
1878 
1879    /* The whole lot will be embedded in the offset/handle source */
1880    tex->texture_index = 0;
1881    tex->sampler_index = 0;
1882 
1883    return true;
1884 }
1885 
1886 static bool
lower_ray_query_globals(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1887 lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin,
1888                         struct apply_pipeline_layout_state *state)
1889 {
1890    b->cursor = nir_instr_remove(&intrin->instr);
1891 
1892    nir_def *rq_globals = anv_load_driver_uniform(b, 1, ray_query_globals);
1893    nir_def_rewrite_uses(&intrin->def, rq_globals);
1894 
1895    return true;
1896 }
1897 
1898 static bool
apply_pipeline_layout(nir_builder * b,nir_instr * instr,void * _state)1899 apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
1900 {
1901    struct apply_pipeline_layout_state *state = _state;
1902 
1903    switch (instr->type) {
1904    case nir_instr_type_intrinsic: {
1905       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1906       switch (intrin->intrinsic) {
1907       case nir_intrinsic_vulkan_resource_index:
1908          return lower_res_index_intrinsic(b, intrin, state);
1909       case nir_intrinsic_vulkan_resource_reindex:
1910          return lower_res_reindex_intrinsic(b, intrin, state);
1911       case nir_intrinsic_load_vulkan_descriptor:
1912          return lower_load_vulkan_descriptor(b, intrin, state);
1913       case nir_intrinsic_get_ssbo_size:
1914          return lower_get_ssbo_size(b, intrin, state);
1915       case nir_intrinsic_image_deref_load:
1916       case nir_intrinsic_image_deref_store:
1917       case nir_intrinsic_image_deref_atomic:
1918       case nir_intrinsic_image_deref_atomic_swap:
1919       case nir_intrinsic_image_deref_samples:
1920       case nir_intrinsic_image_deref_load_param_intel:
1921       case nir_intrinsic_image_deref_load_raw_intel:
1922       case nir_intrinsic_image_deref_store_raw_intel:
1923       case nir_intrinsic_image_deref_sparse_load:
1924          return lower_image_intrinsic(b, intrin, state);
1925       case nir_intrinsic_image_deref_size:
1926          return lower_image_size_intrinsic(b, intrin, state);
1927       case nir_intrinsic_load_constant:
1928          return lower_load_constant(b, intrin, state);
1929       case nir_intrinsic_load_base_workgroup_id:
1930          return lower_base_workgroup_id(b, intrin, state);
1931       case nir_intrinsic_load_ray_query_global_intel:
1932          return lower_ray_query_globals(b, intrin, state);
1933       default:
1934          return false;
1935       }
1936       break;
1937    }
1938    case nir_instr_type_tex:
1939       return lower_tex(b, nir_instr_as_tex(instr), state);
1940    default:
1941       return false;
1942    }
1943 }
1944 
1945 struct binding_info {
1946    uint32_t binding;
1947    uint8_t set;
1948    uint16_t score;
1949 };
1950 
1951 static int
compare_binding_infos(const void * _a,const void * _b)1952 compare_binding_infos(const void *_a, const void *_b)
1953 {
1954    const struct binding_info *a = _a, *b = _b;
1955    if (a->score != b->score)
1956       return b->score - a->score;
1957 
1958    if (a->set != b->set)
1959       return a->set - b->set;
1960 
1961    return a->binding - b->binding;
1962 }
1963 
1964 #ifndef NDEBUG
1965 static void
anv_validate_pipeline_layout(const struct anv_pipeline_sets_layout * layout,nir_shader * shader)1966 anv_validate_pipeline_layout(const struct anv_pipeline_sets_layout *layout,
1967                              nir_shader *shader)
1968 {
1969    nir_foreach_function_impl(impl, shader) {
1970       nir_foreach_block(block, impl) {
1971          nir_foreach_instr(instr, block) {
1972             if (instr->type != nir_instr_type_intrinsic)
1973                continue;
1974 
1975             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1976             if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
1977                continue;
1978 
1979             unsigned set = nir_intrinsic_desc_set(intrin);
1980             assert(layout->set[set].layout);
1981          }
1982       }
1983    }
1984 }
1985 #endif
1986 
1987 static bool
binding_is_promotable_to_push(const struct anv_descriptor_set_layout * set_layout,const struct anv_descriptor_set_binding_layout * bind_layout)1988 binding_is_promotable_to_push(const struct anv_descriptor_set_layout *set_layout,
1989                               const struct anv_descriptor_set_binding_layout *bind_layout)
1990 {
1991    if (set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)
1992       return true;
1993 
1994    if (set_layout->flags & (VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT |
1995                             VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT))
1996       return false;
1997 
1998    return (bind_layout->flags & non_pushable_binding_flags) == 0;
1999 }
2000 
2001 static void
add_null_bti_entry(struct anv_pipeline_bind_map * map)2002 add_null_bti_entry(struct anv_pipeline_bind_map *map)
2003 {
2004    map->surface_to_descriptor[map->surface_count++] =
2005       (struct anv_pipeline_binding) {
2006          .set = ANV_DESCRIPTOR_SET_NULL,
2007    };
2008    assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
2009 }
2010 
2011 static void
add_bti_entry(struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding,uint32_t element,uint32_t plane,const struct anv_descriptor_set_binding_layout * bind_layout)2012 add_bti_entry(struct anv_pipeline_bind_map *map,
2013               uint32_t set,
2014               uint32_t binding,
2015               uint32_t element,
2016               uint32_t plane,
2017               const struct anv_descriptor_set_binding_layout *bind_layout)
2018 {
2019    map->surface_to_descriptor[map->surface_count++] =
2020       (struct anv_pipeline_binding) {
2021          .set = set,
2022          .binding = binding,
2023          .index = bind_layout->descriptor_index + element,
2024          .set_offset = bind_layout->descriptor_surface_offset +
2025                        element * bind_layout->descriptor_surface_stride +
2026                        plane * bind_layout->descriptor_data_surface_size,
2027          .plane = plane,
2028    };
2029    assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
2030 }
2031 
2032 static void
add_dynamic_bti_entry(struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding,uint32_t element,const struct anv_pipeline_sets_layout * layout,const struct anv_descriptor_set_binding_layout * bind_layout)2033 add_dynamic_bti_entry(struct anv_pipeline_bind_map *map,
2034                       uint32_t set,
2035                       uint32_t binding,
2036                       uint32_t element,
2037                       const struct anv_pipeline_sets_layout *layout,
2038                       const struct anv_descriptor_set_binding_layout *bind_layout)
2039 {
2040    map->surface_to_descriptor[map->surface_count++] =
2041       (struct anv_pipeline_binding) {
2042          .set = set,
2043          .binding = binding,
2044          .index = bind_layout->descriptor_index + element,
2045          .set_offset = bind_layout->descriptor_surface_offset +
2046                        element * bind_layout->descriptor_surface_stride,
2047          .dynamic_offset_index = bind_layout->dynamic_offset_index + element,
2048    };
2049    assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
2050 }
2051 
2052 static void
add_sampler_entry(struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding,uint32_t element,uint32_t plane,const struct anv_pipeline_sets_layout * layout,const struct anv_descriptor_set_binding_layout * bind_layout)2053 add_sampler_entry(struct anv_pipeline_bind_map *map,
2054                   uint32_t set,
2055                   uint32_t binding,
2056                   uint32_t element,
2057                   uint32_t plane,
2058                   const struct anv_pipeline_sets_layout *layout,
2059                   const struct anv_descriptor_set_binding_layout *bind_layout)
2060 {
2061    assert((bind_layout->descriptor_index + element) < layout->set[set].layout->descriptor_count);
2062    map->sampler_to_descriptor[map->sampler_count++] =
2063       (struct anv_pipeline_binding) {
2064          .set = set,
2065          .binding = binding,
2066          .index = bind_layout->descriptor_index + element,
2067          .plane = plane,
2068    };
2069 }
2070 
2071 static void
add_push_entry(struct anv_pipeline_push_map * push_map,uint32_t set,uint32_t binding,uint32_t element,const struct anv_pipeline_sets_layout * layout,const struct anv_descriptor_set_binding_layout * bind_layout)2072 add_push_entry(struct anv_pipeline_push_map *push_map,
2073                uint32_t set,
2074                uint32_t binding,
2075                uint32_t element,
2076                const struct anv_pipeline_sets_layout *layout,
2077                const struct anv_descriptor_set_binding_layout *bind_layout)
2078 {
2079    push_map->block_to_descriptor[push_map->block_count++] =
2080       (struct anv_pipeline_binding) {
2081          .set = set,
2082          .binding = binding,
2083          .index = bind_layout->descriptor_index + element,
2084          .dynamic_offset_index = bind_layout->dynamic_offset_index + element,
2085    };
2086 }
2087 
2088 static void
add_embedded_sampler_entry(struct apply_pipeline_layout_state * state,struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding)2089 add_embedded_sampler_entry(struct apply_pipeline_layout_state *state,
2090                            struct anv_pipeline_bind_map *map,
2091                            uint32_t set, uint32_t binding)
2092 {
2093    state->set[set].binding[binding].embedded_sampler_index =
2094       map->embedded_sampler_count;
2095    struct anv_pipeline_embedded_sampler_binding *sampler =
2096       &map->embedded_sampler_to_binding[map->embedded_sampler_count++];
2097    const struct anv_descriptor_set_layout *set_layout =
2098       state->layout->set[set].layout;
2099    const struct anv_descriptor_set_binding_layout *bind_layout =
2100       &set_layout->binding[binding];
2101 
2102    *sampler = (struct anv_pipeline_embedded_sampler_binding) {
2103       .set = set,
2104       .binding = binding,
2105    };
2106 
2107    assert(sizeof(sampler->key.sampler) ==
2108           sizeof(bind_layout->immutable_samplers[0]->state_no_bc[0]));
2109    memcpy(sampler->key.sampler,
2110           bind_layout->immutable_samplers[0]->state_no_bc[0],
2111           sizeof(sampler->key.sampler));
2112 
2113    assert(sizeof(sampler->key.color) ==
2114           sizeof(bind_layout->immutable_samplers[0]->vk.border_color_value.uint32));
2115    memcpy(sampler->key.color,
2116           bind_layout->immutable_samplers[0]->vk.border_color_value.uint32,
2117           sizeof(sampler->key.color));
2118 }
2119 
2120 static bool
binding_should_use_surface_binding_table(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * bind_layout,uint32_t set,uint32_t binding)2121 binding_should_use_surface_binding_table(const struct apply_pipeline_layout_state *state,
2122                                          const struct anv_descriptor_set_binding_layout *bind_layout,
2123                                          uint32_t set, uint32_t binding)
2124 {
2125    if ((bind_layout->data & ANV_DESCRIPTOR_BTI_SURFACE_STATE) == 0)
2126       return false;
2127 
2128    if (state->pdevice->always_use_bindless &&
2129        (bind_layout->data & ANV_DESCRIPTOR_SURFACE))
2130       return false;
2131 
2132    if (state->set[set].binding[binding].properties &
2133        BINDING_PROPERTY_NO_BINDING_TABLE)
2134       return false;
2135 
2136    return true;
2137 }
2138 
2139 static bool
binding_should_use_sampler_binding_table(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * binding)2140 binding_should_use_sampler_binding_table(const struct apply_pipeline_layout_state *state,
2141                                          const struct anv_descriptor_set_binding_layout *binding)
2142 {
2143    if ((binding->data & ANV_DESCRIPTOR_BTI_SAMPLER_STATE) == 0)
2144       return false;
2145 
2146    if (state->pdevice->always_use_bindless &&
2147        (binding->data & ANV_DESCRIPTOR_SAMPLER))
2148       return false;
2149 
2150    return true;
2151 }
2152 
2153 void
anv_nir_apply_pipeline_layout(nir_shader * shader,const struct anv_physical_device * pdevice,enum brw_robustness_flags robust_flags,bool independent_sets,const struct anv_pipeline_sets_layout * layout,struct anv_pipeline_bind_map * map,struct anv_pipeline_push_map * push_map,void * push_map_mem_ctx)2154 anv_nir_apply_pipeline_layout(nir_shader *shader,
2155                               const struct anv_physical_device *pdevice,
2156                               enum brw_robustness_flags robust_flags,
2157                               bool independent_sets,
2158                               const struct anv_pipeline_sets_layout *layout,
2159                               struct anv_pipeline_bind_map *map,
2160                               struct anv_pipeline_push_map *push_map,
2161                               void *push_map_mem_ctx)
2162 {
2163    void *mem_ctx = ralloc_context(NULL);
2164 
2165 #ifndef NDEBUG
2166    /* We should not have have any reference to a descriptor set that is not
2167     * given through the pipeline layout (layout->set[set].layout = NULL).
2168     */
2169    anv_validate_pipeline_layout(layout, shader);
2170 #endif
2171 
2172    const bool bindless_stage =
2173       brw_shader_stage_requires_bindless_resources(shader->info.stage);
2174    struct apply_pipeline_layout_state state = {
2175       .pdevice = pdevice,
2176       .layout = layout,
2177       .desc_addr_format = bindless_stage ?
2178                           nir_address_format_64bit_global_32bit_offset :
2179                           nir_address_format_32bit_index_offset,
2180       .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
2181       .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
2182       .lowered_instrs = _mesa_pointer_set_create(mem_ctx),
2183       .has_independent_sets = independent_sets,
2184    };
2185 
2186    /* Compute the amount of push block items required. */
2187    unsigned push_block_count = 0;
2188    for (unsigned s = 0; s < layout->num_sets; s++) {
2189       if (!layout->set[s].layout)
2190          continue;
2191 
2192       const unsigned count = layout->set[s].layout->binding_count;
2193       state.set[s].binding = rzalloc_array_size(mem_ctx, sizeof(state.set[s].binding[0]), count);
2194 
2195       const struct anv_descriptor_set_layout *set_layout = layout->set[s].layout;
2196       for (unsigned b = 0; b < set_layout->binding_count; b++) {
2197          if (set_layout->binding[b].type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
2198             push_block_count += set_layout->binding[b].array_size;
2199       }
2200    }
2201 
2202    /* Find all use sets/bindings */
2203    nir_shader_instructions_pass(shader, get_used_bindings,
2204                                 nir_metadata_all, &state);
2205 
2206    /* Assign a BTI to each used descriptor set */
2207    for (unsigned s = 0; s < layout->num_sets; s++) {
2208       if (state.desc_addr_format != nir_address_format_32bit_index_offset) {
2209          state.set[s].desc_offset = BINDLESS_OFFSET;
2210       } else if (state.set[s].desc_buffer_used) {
2211          map->surface_to_descriptor[map->surface_count] =
2212             (struct anv_pipeline_binding) {
2213                .set = (layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) ?
2214                       ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER :
2215                       ANV_DESCRIPTOR_SET_DESCRIPTORS,
2216                .binding = UINT32_MAX,
2217                .index = s,
2218             };
2219          state.set[s].desc_offset = map->surface_count++;
2220       }
2221    }
2222 
2223    /* Assign a block index for each surface */
2224    push_map->block_to_descriptor =
2225       rzalloc_array(push_map_mem_ctx, struct anv_pipeline_binding,
2226                     map->surface_count + push_block_count);
2227 
2228    memcpy(push_map->block_to_descriptor,
2229           map->surface_to_descriptor,
2230           sizeof(push_map->block_to_descriptor[0]) * map->surface_count);
2231    push_map->block_count = map->surface_count;
2232 
2233    /* Count used bindings, assign embedded sampler indices & add push blocks
2234     * for promotion to push constants
2235     */
2236    unsigned used_binding_count = 0;
2237    for (uint32_t set = 0; set < layout->num_sets; set++) {
2238       struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2239       if (!set_layout)
2240          continue;
2241 
2242       for (unsigned b = 0; b < set_layout->binding_count; b++) {
2243          if (state.set[set].binding[b].use_count == 0)
2244             continue;
2245 
2246          used_binding_count++;
2247 
2248          const struct anv_descriptor_set_binding_layout *bind_layout =
2249             &set_layout->binding[b];
2250 
2251          if (state.set[set].binding[b].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER)
2252             add_embedded_sampler_entry(&state, map, set, b);
2253 
2254          if (binding_is_promotable_to_push(set_layout, bind_layout)) {
2255             if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
2256                state.set[set].binding[b].push_block = push_map->block_count;
2257                for (unsigned i = 0; i < bind_layout->array_size; i++)
2258                   add_push_entry(push_map, set, b, i, layout, bind_layout);
2259             } else {
2260                state.set[set].binding[b].push_block = state.set[set].desc_offset;
2261             }
2262          }
2263       }
2264    }
2265 
2266    struct binding_info *infos =
2267       rzalloc_array(mem_ctx, struct binding_info, used_binding_count);
2268    used_binding_count = 0;
2269    for (uint32_t set = 0; set < layout->num_sets; set++) {
2270       const struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2271       if (!set_layout)
2272          continue;
2273 
2274       for (unsigned b = 0; b < set_layout->binding_count; b++) {
2275          if (state.set[set].binding[b].use_count == 0)
2276             continue;
2277 
2278          const struct anv_descriptor_set_binding_layout *binding =
2279                &layout->set[set].layout->binding[b];
2280 
2281          /* Do a fixed-point calculation to generate a score based on the
2282           * number of uses and the binding array size.  We shift by 7 instead
2283           * of 8 because we're going to use the top bit below to make
2284           * everything which does not support bindless super higher priority
2285           * than things which do.
2286           */
2287          uint16_t score = ((uint16_t)state.set[set].binding[b].use_count << 7) /
2288                           binding->array_size;
2289 
2290          /* If the descriptor type doesn't support bindless then put it at the
2291           * beginning so we guarantee it gets a slot.
2292           */
2293          if (!anv_descriptor_supports_bindless(pdevice, set_layout, binding))
2294             score |= 1 << 15;
2295 
2296          infos[used_binding_count++] = (struct binding_info) {
2297             .set = set,
2298             .binding = b,
2299             .score = score,
2300          };
2301       }
2302    }
2303 
2304    /* Order the binding infos based on score with highest scores first.  If
2305     * scores are equal we then order by set and binding.
2306     */
2307    qsort(infos, used_binding_count, sizeof(struct binding_info),
2308          compare_binding_infos);
2309 
2310    for (unsigned i = 0; i < used_binding_count; i++) {
2311       unsigned set = infos[i].set, b = infos[i].binding;
2312       assert(layout->set[set].layout);
2313       const struct anv_descriptor_set_layout *set_layout =
2314          layout->set[set].layout;
2315       const struct anv_descriptor_set_binding_layout *binding =
2316             &set_layout->binding[b];
2317 
2318       const uint32_t array_size = binding->array_size;
2319 
2320       if (binding->dynamic_offset_index >= 0)
2321          state.has_dynamic_buffers = true;
2322 
2323       const unsigned array_multiplier = bti_multiplier(&state, set, b);
2324       assert(array_multiplier >= 1);
2325 
2326       /* Assume bindless by default */
2327       state.set[set].binding[b].surface_offset = BINDLESS_OFFSET;
2328       state.set[set].binding[b].sampler_offset = BINDLESS_OFFSET;
2329 
2330       if (binding_should_use_surface_binding_table(&state, binding, set, b)) {
2331          if (map->surface_count + array_size * array_multiplier > MAX_BINDING_TABLE_SIZE ||
2332              anv_descriptor_requires_bindless(pdevice, set_layout, binding) ||
2333              brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
2334             /* If this descriptor doesn't fit in the binding table or if it
2335              * requires bindless for some reason, flag it as bindless.
2336              */
2337             assert(anv_descriptor_supports_bindless(pdevice, set_layout, binding));
2338          } else {
2339             state.set[set].binding[b].surface_offset = map->surface_count;
2340             if (binding->dynamic_offset_index < 0) {
2341                struct anv_sampler **samplers = binding->immutable_samplers;
2342                uint8_t max_planes = bti_multiplier(&state, set, b);
2343                for (unsigned i = 0; i < binding->array_size; i++) {
2344                   uint8_t planes = samplers ? samplers[i]->n_planes : 1;
2345                   for (uint8_t p = 0; p < max_planes; p++) {
2346                      if (p < planes) {
2347                         add_bti_entry(map, set, b, i, p, binding);
2348                      } else {
2349                         add_null_bti_entry(map);
2350                      }
2351                   }
2352                }
2353             } else {
2354                for (unsigned i = 0; i < binding->array_size; i++)
2355                   add_dynamic_bti_entry(map, set, b, i, layout, binding);
2356             }
2357          }
2358          assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
2359       }
2360 
2361       if (binding_should_use_sampler_binding_table(&state, binding)) {
2362          if (map->sampler_count + array_size * array_multiplier > MAX_SAMPLER_TABLE_SIZE ||
2363              anv_descriptor_requires_bindless(pdevice, set_layout, binding) ||
2364              brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
2365             /* If this descriptor doesn't fit in the binding table or if it
2366              * requires bindless for some reason, flag it as bindless.
2367              *
2368              * We also make large sampler arrays bindless because we can avoid
2369              * using indirect sends thanks to bindless samplers being packed
2370              * less tightly than the sampler table.
2371              */
2372             assert(anv_descriptor_supports_bindless(pdevice, set_layout, binding));
2373          } else {
2374             state.set[set].binding[b].sampler_offset = map->sampler_count;
2375             uint8_t max_planes = bti_multiplier(&state, set, b);
2376             for (unsigned i = 0; i < binding->array_size; i++) {
2377                for (uint8_t p = 0; p < max_planes; p++) {
2378                   add_sampler_entry(map, set, b, i, p, layout, binding);
2379                }
2380             }
2381          }
2382       }
2383 
2384       if (binding->data & ANV_DESCRIPTOR_INLINE_UNIFORM) {
2385          state.set[set].binding[b].surface_offset = state.set[set].desc_offset;
2386       }
2387 
2388 #if 0
2389       fprintf(stderr, "set=%u binding=%u surface_offset=0x%08x require_bindless=%u type=%s\n",
2390               set, b,
2391               state.set[set].binding[b].surface_offset,
2392               anv_descriptor_requires_bindless(pdevice, set_layout, binding),
2393               vk_DescriptorType_to_str(binding->type));
2394 #endif
2395    }
2396 
2397    /* Before we do the normal lowering, we look for any SSBO operations
2398     * that we can lower to the BTI model and lower them up-front.  The BTI
2399     * model can perform better than the A64 model for a couple reasons:
2400     *
2401     *  1. 48-bit address calculations are potentially expensive and using
2402     *     the BTI model lets us simply compute 32-bit offsets and the
2403     *     hardware adds the 64-bit surface base address.
2404     *
2405     *  2. The BTI messages, because they use surface states, do bounds
2406     *     checking for us.  With the A64 model, we have to do our own
2407     *     bounds checking and this means wider pointers and extra
2408     *     calculations and branching in the shader.
2409     *
2410     * The solution to both of these is to convert things to the BTI model
2411     * opportunistically.  The reason why we need to do this as a pre-pass
2412     * is for two reasons:
2413     *
2414     *  1. The BTI model requires nir_address_format_32bit_index_offset
2415     *     pointers which are not the same type as the pointers needed for
2416     *     the A64 model.  Because all our derefs are set up for the A64
2417     *     model (in case we have variable pointers), we have to crawl all
2418     *     the way back to the vulkan_resource_index intrinsic and build a
2419     *     completely fresh index+offset calculation.
2420     *
2421     *  2. Because the variable-pointers-capable lowering that we do as part
2422     *     of apply_pipeline_layout_block is destructive (It really has to
2423     *     be to handle variable pointers properly), we've lost the deref
2424     *     information by the time we get to the load/store/atomic
2425     *     intrinsics in that pass.
2426     */
2427    nir_shader_instructions_pass(shader, lower_direct_buffer_instr,
2428                                 nir_metadata_control_flow,
2429                                 &state);
2430 
2431    /* We just got rid of all the direct access.  Delete it so it's not in the
2432     * way when we do our indirect lowering.
2433     */
2434    nir_opt_dce(shader);
2435 
2436    nir_shader_instructions_pass(shader, apply_pipeline_layout,
2437                                 nir_metadata_control_flow,
2438                                 &state);
2439 
2440    ralloc_free(mem_ctx);
2441 
2442    if (brw_shader_stage_is_bindless(shader->info.stage)) {
2443       assert(map->surface_count == 0);
2444       assert(map->sampler_count == 0);
2445    }
2446 
2447 #if 0
2448    fprintf(stderr, "bti:\n");
2449    for (unsigned i = 0; i < map->surface_count; i++) {
2450       fprintf(stderr, "  %03i: set=%03u binding=%06i index=%u plane=%u set_offset=0x%08x dyn_offset=0x%08x\n", i,
2451               map->surface_to_descriptor[i].set,
2452               map->surface_to_descriptor[i].binding,
2453               map->surface_to_descriptor[i].index,
2454               map->surface_to_descriptor[i].plane,
2455               map->surface_to_descriptor[i].set_offset,
2456               map->surface_to_descriptor[i].dynamic_offset_index);
2457    }
2458    fprintf(stderr, "sti:\n");
2459    for (unsigned i = 0; i < map->sampler_count; i++) {
2460       fprintf(stderr, "  %03i: set=%03u binding=%06i index=%u plane=%u\n", i,
2461               map->sampler_to_descriptor[i].set,
2462               map->sampler_to_descriptor[i].binding,
2463               map->sampler_to_descriptor[i].index,
2464               map->sampler_to_descriptor[i].plane);
2465    }
2466 #endif
2467 
2468    /* Now that we're done computing the surface and sampler portions of the
2469     * bind map, hash them.  This lets us quickly determine if the actual
2470     * mapping has changed and not just a no-op pipeline change.
2471     */
2472    _mesa_sha1_compute(map->surface_to_descriptor,
2473                       map->surface_count * sizeof(struct anv_pipeline_binding),
2474                       map->surface_sha1);
2475    _mesa_sha1_compute(map->sampler_to_descriptor,
2476                       map->sampler_count * sizeof(struct anv_pipeline_binding),
2477                       map->sampler_sha1);
2478 }
2479