xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan_hasvk/anv_nir_apply_pipeline_layout.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_nir.h"
25 #include "nir/nir_builder.h"
26 #include "compiler/elk/elk_nir.h"
27 #include "util/mesa-sha1.h"
28 #include "util/set.h"
29 
30 /* Sampler tables don't actually have a maximum size but we pick one just so
31  * that we don't end up emitting too much state on-the-fly.
32  */
33 #define MAX_SAMPLER_TABLE_SIZE 128
34 #define BINDLESS_OFFSET        255
35 
36 #define sizeof_field(type, field) sizeof(((type *)0)->field)
37 
38 struct apply_pipeline_layout_state {
39    const struct anv_physical_device *pdevice;
40 
41    const struct anv_pipeline_layout *layout;
42    nir_address_format ssbo_addr_format;
43    nir_address_format ubo_addr_format;
44 
45    /* Place to flag lowered instructions so we don't lower them twice */
46    struct set *lowered_instrs;
47 
48    bool uses_constants;
49    bool has_dynamic_buffers;
50    uint8_t constants_offset;
51    struct {
52       bool desc_buffer_used;
53       uint8_t desc_offset;
54 
55       uint8_t *use_count;
56       uint8_t *surface_offsets;
57       uint8_t *sampler_offsets;
58    } set[MAX_SETS];
59 };
60 
61 static nir_address_format
addr_format_for_desc_type(VkDescriptorType desc_type,struct apply_pipeline_layout_state * state)62 addr_format_for_desc_type(VkDescriptorType desc_type,
63                           struct apply_pipeline_layout_state *state)
64 {
65    switch (desc_type) {
66    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
67    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
68       return state->ssbo_addr_format;
69 
70    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
71    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
72       return state->ubo_addr_format;
73 
74    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
75       return nir_address_format_32bit_index_offset;
76 
77    default:
78       unreachable("Unsupported descriptor type");
79    }
80 }
81 
82 static void
add_binding(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)83 add_binding(struct apply_pipeline_layout_state *state,
84             uint32_t set, uint32_t binding)
85 {
86    const struct anv_descriptor_set_binding_layout *bind_layout =
87       &state->layout->set[set].layout->binding[binding];
88 
89    if (state->set[set].use_count[binding] < UINT8_MAX)
90       state->set[set].use_count[binding]++;
91 
92    /* Only flag the descriptor buffer as used if there's actually data for
93     * this binding.  This lets us be lazy and call this function constantly
94     * without worrying about unnecessarily enabling the buffer.
95     */
96    if (bind_layout->descriptor_stride)
97       state->set[set].desc_buffer_used = true;
98 }
99 
100 static void
add_deref_src_binding(struct apply_pipeline_layout_state * state,nir_src src)101 add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
102 {
103    nir_deref_instr *deref = nir_src_as_deref(src);
104    nir_variable *var = nir_deref_instr_get_variable(deref);
105    add_binding(state, var->data.descriptor_set, var->data.binding);
106 }
107 
108 static void
add_tex_src_binding(struct apply_pipeline_layout_state * state,nir_tex_instr * tex,nir_tex_src_type deref_src_type)109 add_tex_src_binding(struct apply_pipeline_layout_state *state,
110                     nir_tex_instr *tex, nir_tex_src_type deref_src_type)
111 {
112    int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
113    if (deref_src_idx < 0)
114       return;
115 
116    add_deref_src_binding(state, tex->src[deref_src_idx].src);
117 }
118 
119 static bool
get_used_bindings(UNUSED nir_builder * _b,nir_instr * instr,void * _state)120 get_used_bindings(UNUSED nir_builder *_b, nir_instr *instr, void *_state)
121 {
122    struct apply_pipeline_layout_state *state = _state;
123 
124    switch (instr->type) {
125    case nir_instr_type_intrinsic: {
126       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
127       switch (intrin->intrinsic) {
128       case nir_intrinsic_vulkan_resource_index:
129          add_binding(state, nir_intrinsic_desc_set(intrin),
130                      nir_intrinsic_binding(intrin));
131          break;
132 
133       case nir_intrinsic_image_deref_load:
134       case nir_intrinsic_image_deref_store:
135       case nir_intrinsic_image_deref_atomic:
136       case nir_intrinsic_image_deref_atomic_swap:
137       case nir_intrinsic_image_deref_size:
138       case nir_intrinsic_image_deref_samples:
139       case nir_intrinsic_image_deref_load_param_intel:
140       case nir_intrinsic_image_deref_load_raw_intel:
141       case nir_intrinsic_image_deref_store_raw_intel:
142          add_deref_src_binding(state, intrin->src[0]);
143          break;
144 
145       case nir_intrinsic_load_constant:
146          state->uses_constants = true;
147          break;
148 
149       default:
150          break;
151       }
152       break;
153    }
154    case nir_instr_type_tex: {
155       nir_tex_instr *tex = nir_instr_as_tex(instr);
156       add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
157       add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
158       break;
159    }
160    default:
161       break;
162    }
163 
164    return false;
165 }
166 
167 static nir_intrinsic_instr *
find_descriptor_for_index_src(nir_src src,struct apply_pipeline_layout_state * state)168 find_descriptor_for_index_src(nir_src src,
169                               struct apply_pipeline_layout_state *state)
170 {
171    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
172 
173    while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex)
174       intrin = nir_src_as_intrinsic(intrin->src[0]);
175 
176    if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
177       return NULL;
178 
179    return intrin;
180 }
181 
182 static bool
descriptor_has_bti(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)183 descriptor_has_bti(nir_intrinsic_instr *intrin,
184                    struct apply_pipeline_layout_state *state)
185 {
186    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
187 
188    uint32_t set = nir_intrinsic_desc_set(intrin);
189    uint32_t binding = nir_intrinsic_binding(intrin);
190    const struct anv_descriptor_set_binding_layout *bind_layout =
191       &state->layout->set[set].layout->binding[binding];
192 
193    uint32_t surface_index;
194    if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM)
195       surface_index = state->set[set].desc_offset;
196    else
197       surface_index = state->set[set].surface_offsets[binding];
198 
199    /* Only lower to a BTI message if we have a valid binding table index. */
200    return surface_index < MAX_BINDING_TABLE_SIZE;
201 }
202 
203 static nir_address_format
descriptor_address_format(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)204 descriptor_address_format(nir_intrinsic_instr *intrin,
205                           struct apply_pipeline_layout_state *state)
206 {
207    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
208 
209    return addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
210 }
211 
212 static nir_intrinsic_instr *
nir_deref_find_descriptor(nir_deref_instr * deref,struct apply_pipeline_layout_state * state)213 nir_deref_find_descriptor(nir_deref_instr *deref,
214                           struct apply_pipeline_layout_state *state)
215 {
216    while (1) {
217       /* Nothing we will use this on has a variable */
218       assert(deref->deref_type != nir_deref_type_var);
219 
220       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
221       if (!parent)
222          break;
223 
224       deref = parent;
225    }
226    assert(deref->deref_type == nir_deref_type_cast);
227 
228    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent);
229    if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
230       return false;
231 
232    return find_descriptor_for_index_src(intrin->src[0], state);
233 }
234 
235 static nir_def *
build_load_descriptor_mem(nir_builder * b,nir_def * desc_addr,unsigned desc_offset,unsigned num_components,unsigned bit_size,struct apply_pipeline_layout_state * state)236 build_load_descriptor_mem(nir_builder *b,
237                           nir_def *desc_addr, unsigned desc_offset,
238                           unsigned num_components, unsigned bit_size,
239                           struct apply_pipeline_layout_state *state)
240 
241 {
242    nir_def *surface_index = nir_channel(b, desc_addr, 0);
243    nir_def *offset32 =
244       nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset);
245 
246    return nir_load_ubo(b, num_components, bit_size,
247                        surface_index, offset32,
248                        .align_mul = 8,
249                        .align_offset = desc_offset % 8,
250                        .range_base = 0,
251                        .range = ~0);
252 }
253 
254 /** Build a Vulkan resource index
255  *
256  * A "resource index" is the term used by our SPIR-V parser and the relevant
257  * NIR intrinsics for a reference into a descriptor set.  It acts much like a
258  * deref in NIR except that it accesses opaque descriptors instead of memory.
259  *
260  * Coming out of SPIR-V, both the resource indices (in the form of
261  * vulkan_resource_[re]index intrinsics) and the memory derefs (in the form
262  * of nir_deref_instr) use the same vector component/bit size.  The meaning
263  * of those values for memory derefs (nir_deref_instr) is given by the
264  * nir_address_format associated with the descriptor type.  For resource
265  * indices, it's an entirely internal to ANV encoding which describes, in some
266  * sense, the address of the descriptor.  Thanks to the NIR/SPIR-V rules, it
267  * must be packed into the same size SSA values as a memory address.  For this
268  * reason, the actual encoding may depend both on the address format for
269  * memory derefs and the descriptor address format.
270  *
271  * The load_vulkan_descriptor intrinsic exists to provide a transition point
272  * between these two forms of derefs: descriptor and memory.
273  */
274 static nir_def *
build_res_index(nir_builder * b,uint32_t set,uint32_t binding,nir_def * array_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)275 build_res_index(nir_builder *b, uint32_t set, uint32_t binding,
276                 nir_def *array_index, nir_address_format addr_format,
277                 struct apply_pipeline_layout_state *state)
278 {
279    const struct anv_descriptor_set_binding_layout *bind_layout =
280       &state->layout->set[set].layout->binding[binding];
281 
282    uint32_t array_size = bind_layout->array_size;
283 
284    switch (addr_format) {
285    case nir_address_format_64bit_global_32bit_offset:
286    case nir_address_format_64bit_bounded_global: {
287       assert(state->set[set].desc_offset < MAX_BINDING_TABLE_SIZE);
288       uint32_t set_idx = state->set[set].desc_offset;
289 
290       assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS);
291       uint32_t dynamic_offset_index = 0xff; /* No dynamic offset */
292       if (bind_layout->dynamic_offset_index >= 0) {
293          dynamic_offset_index =
294             state->layout->set[set].dynamic_offset_start +
295             bind_layout->dynamic_offset_index;
296       }
297 
298       const uint32_t packed = (bind_layout->descriptor_stride << 16) | (set_idx << 8) | dynamic_offset_index;
299 
300       return nir_vec4(b, nir_imm_int(b, packed),
301                          nir_imm_int(b, bind_layout->descriptor_offset),
302                          nir_imm_int(b, array_size - 1),
303                          array_index);
304    }
305 
306    case nir_address_format_32bit_index_offset: {
307       if (bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
308          uint32_t surface_index = state->set[set].desc_offset;
309          return nir_imm_ivec2(b, surface_index,
310                                  bind_layout->descriptor_offset);
311       } else {
312          uint32_t surface_index = state->set[set].surface_offsets[binding];
313          assert(array_size > 0 && array_size <= UINT16_MAX);
314          assert(surface_index <= UINT16_MAX);
315          uint32_t packed = ((array_size - 1) << 16) | surface_index;
316          return nir_vec2(b, array_index, nir_imm_int(b, packed));
317       }
318    }
319 
320    default:
321       unreachable("Unsupported address format");
322    }
323 }
324 
325 struct res_index_defs {
326    nir_def *set_idx;
327    nir_def *dyn_offset_base;
328    nir_def *desc_offset_base;
329    nir_def *array_index;
330    nir_def *desc_stride;
331 };
332 
333 static struct res_index_defs
unpack_res_index(nir_builder * b,nir_def * index)334 unpack_res_index(nir_builder *b, nir_def *index)
335 {
336    struct res_index_defs defs;
337 
338    nir_def *packed = nir_channel(b, index, 0);
339    defs.desc_stride = nir_extract_u8(b, packed, nir_imm_int(b, 2));
340    defs.set_idx = nir_extract_u8(b, packed, nir_imm_int(b, 1));
341    defs.dyn_offset_base = nir_extract_u8(b, packed, nir_imm_int(b, 0));
342 
343    defs.desc_offset_base = nir_channel(b, index, 1);
344    defs.array_index = nir_umin(b, nir_channel(b, index, 2),
345                                   nir_channel(b, index, 3));
346 
347    return defs;
348 }
349 
350 /** Adjust a Vulkan resource index
351  *
352  * This is the equivalent of nir_deref_type_ptr_as_array for resource indices.
353  * For array descriptors, it allows us to adjust the array index.  Thanks to
354  * variable pointers, we cannot always fold this re-index operation into the
355  * vulkan_resource_index intrinsic and we have to do it based on nothing but
356  * the address format.
357  */
358 static nir_def *
build_res_reindex(nir_builder * b,nir_def * orig,nir_def * delta,nir_address_format addr_format)359 build_res_reindex(nir_builder *b, nir_def *orig, nir_def *delta,
360                   nir_address_format addr_format)
361 {
362    switch (addr_format) {
363    case nir_address_format_64bit_global_32bit_offset:
364    case nir_address_format_64bit_bounded_global:
365       return nir_vec4(b, nir_channel(b, orig, 0),
366                          nir_channel(b, orig, 1),
367                          nir_channel(b, orig, 2),
368                          nir_iadd(b, nir_channel(b, orig, 3), delta));
369 
370    case nir_address_format_32bit_index_offset:
371       return nir_vec2(b, nir_iadd(b, nir_channel(b, orig, 0), delta),
372                          nir_channel(b, orig, 1));
373 
374    default:
375       unreachable("Unhandled address format");
376    }
377 }
378 
379 /** Get the address for a descriptor given its resource index
380  *
381  * Because of the re-indexing operations, we can't bounds check descriptor
382  * array access until we have the final index.  That means we end up doing the
383  * bounds check here, if needed.  See unpack_res_index() for more details.
384  *
385  * This function takes both a bind_layout and a desc_type which are used to
386  * determine the descriptor stride for array descriptors.  The bind_layout is
387  * optional for buffer descriptor types.
388  */
389 static nir_def *
build_desc_addr(nir_builder * b,const struct anv_descriptor_set_binding_layout * bind_layout,const VkDescriptorType desc_type,nir_def * index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)390 build_desc_addr(nir_builder *b,
391                 const struct anv_descriptor_set_binding_layout *bind_layout,
392                 const VkDescriptorType desc_type,
393                 nir_def *index, nir_address_format addr_format,
394                 struct apply_pipeline_layout_state *state)
395 {
396    switch (addr_format) {
397    case nir_address_format_64bit_global_32bit_offset:
398    case nir_address_format_64bit_bounded_global: {
399       struct res_index_defs res = unpack_res_index(b, index);
400 
401       nir_def *desc_offset = res.desc_offset_base;
402       if (desc_type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
403          /* Compute the actual descriptor offset.  For inline uniform blocks,
404           * the array index is ignored as they are only allowed to be a single
405           * descriptor (not an array) and there is no concept of a "stride".
406           *
407           */
408          desc_offset =
409             nir_iadd(b, desc_offset, nir_imul(b, res.array_index, res.desc_stride));
410       }
411 
412       return nir_vec2(b, res.set_idx, desc_offset);
413    }
414 
415    case nir_address_format_32bit_index_offset:
416       assert(desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK);
417       return index;
418 
419    default:
420       unreachable("Unhandled address format");
421    }
422 }
423 
424 /** Convert a Vulkan resource index into a buffer address
425  *
426  * In some cases, this does a  memory load from the descriptor set and, in
427  * others, it simply converts from one form to another.
428  *
429  * See build_res_index for details about each resource index format.
430  */
431 static nir_def *
build_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)432 build_buffer_addr_for_res_index(nir_builder *b,
433                                 const VkDescriptorType desc_type,
434                                 nir_def *res_index,
435                                 nir_address_format addr_format,
436                                 struct apply_pipeline_layout_state *state)
437 {
438    if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
439       assert(addr_format == nir_address_format_32bit_index_offset);
440       return build_desc_addr(b, NULL, desc_type, res_index, addr_format, state);
441    } else if (addr_format == nir_address_format_32bit_index_offset) {
442       nir_def *array_index = nir_channel(b, res_index, 0);
443       nir_def *packed = nir_channel(b, res_index, 1);
444       nir_def *surface_index = nir_extract_u16(b, packed, nir_imm_int(b, 0));
445 
446       return nir_vec2(b, nir_iadd(b, surface_index, array_index),
447                          nir_imm_int(b, 0));
448    }
449 
450    nir_def *desc_addr =
451       build_desc_addr(b, NULL, desc_type, res_index, addr_format, state);
452 
453    nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
454 
455    if (state->has_dynamic_buffers) {
456       struct res_index_defs res = unpack_res_index(b, res_index);
457 
458       /* This shader has dynamic offsets and we have no way of knowing
459        * (save from the dynamic offset base index) if this buffer has a
460        * dynamic offset.
461        */
462       nir_def *dyn_offset_idx =
463          nir_iadd(b, res.dyn_offset_base, res.array_index);
464 
465       nir_def *dyn_load =
466          nir_load_push_constant(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4),
467                                 .base = offsetof(struct anv_push_constants, dynamic_offsets),
468                                 .range = MAX_DYNAMIC_BUFFERS * 4);
469 
470       nir_def *dynamic_offset =
471          nir_bcsel(b, nir_ieq_imm(b, res.dyn_offset_base, 0xff),
472                       nir_imm_int(b, 0), dyn_load);
473 
474       /* The dynamic offset gets added to the base pointer so that we
475        * have a sliding window range.
476        */
477       nir_def *base_ptr =
478          nir_pack_64_2x32(b, nir_trim_vector(b, desc, 2));
479       base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
480       desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
481                          nir_unpack_64_2x32_split_y(b, base_ptr),
482                          nir_channel(b, desc, 2),
483                          nir_channel(b, desc, 3));
484    }
485 
486    /* The last element of the vec4 is always zero.
487     *
488     * See also struct anv_address_range_descriptor
489     */
490    return nir_vec4(b, nir_channel(b, desc, 0),
491                       nir_channel(b, desc, 1),
492                       nir_channel(b, desc, 2),
493                       nir_imm_int(b, 0));
494 }
495 
496 /** Loads descriptor memory for a variable-based deref chain
497  *
498  * The deref chain has to terminate at a variable with a descriptor_set and
499  * binding set.  This is used for images, textures, and samplers.
500  */
501 static nir_def *
build_load_var_deref_descriptor_mem(nir_builder * b,nir_deref_instr * deref,unsigned desc_offset,unsigned num_components,unsigned bit_size,struct apply_pipeline_layout_state * state)502 build_load_var_deref_descriptor_mem(nir_builder *b, nir_deref_instr *deref,
503                                     unsigned desc_offset,
504                                     unsigned num_components, unsigned bit_size,
505                                     struct apply_pipeline_layout_state *state)
506 {
507    nir_variable *var = nir_deref_instr_get_variable(deref);
508 
509    const uint32_t set = var->data.descriptor_set;
510    const uint32_t binding = var->data.binding;
511    const struct anv_descriptor_set_binding_layout *bind_layout =
512          &state->layout->set[set].layout->binding[binding];
513 
514    nir_def *array_index;
515    if (deref->deref_type != nir_deref_type_var) {
516       assert(deref->deref_type == nir_deref_type_array);
517       assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
518       array_index = deref->arr.index.ssa;
519    } else {
520       array_index = nir_imm_int(b, 0);
521    }
522 
523    /* It doesn't really matter what address format we choose as everything
524     * will constant-fold nicely.  Choose one that uses the actual descriptor
525     * buffer so we don't run into issues index/offset assumptions.
526     */
527    const nir_address_format addr_format =
528       nir_address_format_64bit_bounded_global;
529 
530    nir_def *res_index =
531       build_res_index(b, set, binding, array_index, addr_format, state);
532 
533    nir_def *desc_addr =
534       build_desc_addr(b, bind_layout, bind_layout->type,
535                       res_index, addr_format, state);
536 
537    return build_load_descriptor_mem(b, desc_addr, desc_offset,
538                                     num_components, bit_size, state);
539 }
540 
541 /** A recursive form of build_res_index()
542  *
543  * This recursively walks a resource [re]index chain and builds the resource
544  * index.  It places the new code with the resource [re]index operation in the
545  * hopes of better CSE.  This means the cursor is not where you left it when
546  * this function returns.
547  */
548 static nir_def *
build_res_index_for_chain(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format,uint32_t * set,uint32_t * binding,struct apply_pipeline_layout_state * state)549 build_res_index_for_chain(nir_builder *b, nir_intrinsic_instr *intrin,
550                           nir_address_format addr_format,
551                           uint32_t *set, uint32_t *binding,
552                           struct apply_pipeline_layout_state *state)
553 {
554    if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
555       b->cursor = nir_before_instr(&intrin->instr);
556       *set = nir_intrinsic_desc_set(intrin);
557       *binding = nir_intrinsic_binding(intrin);
558       return build_res_index(b, *set, *binding, intrin->src[0].ssa,
559                              addr_format, state);
560    } else {
561       assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex);
562       nir_intrinsic_instr *parent = nir_src_as_intrinsic(intrin->src[0]);
563       nir_def *index =
564          build_res_index_for_chain(b, parent, addr_format,
565                                    set, binding, state);
566 
567       b->cursor = nir_before_instr(&intrin->instr);
568 
569       return build_res_reindex(b, index, intrin->src[1].ssa, addr_format);
570    }
571 }
572 
573 /** Builds a buffer address for a given vulkan [re]index intrinsic
574  *
575  * The cursor is not where you left it when this function returns.
576  */
577 static nir_def *
build_buffer_addr_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * idx_intrin,nir_address_format addr_format,struct apply_pipeline_layout_state * state)578 build_buffer_addr_for_idx_intrin(nir_builder *b,
579                                  nir_intrinsic_instr *idx_intrin,
580                                  nir_address_format addr_format,
581                                  struct apply_pipeline_layout_state *state)
582 {
583    uint32_t set = UINT32_MAX, binding = UINT32_MAX;
584    nir_def *res_index =
585       build_res_index_for_chain(b, idx_intrin, addr_format,
586                                 &set, &binding, state);
587 
588    const struct anv_descriptor_set_binding_layout *bind_layout =
589       &state->layout->set[set].layout->binding[binding];
590 
591    return build_buffer_addr_for_res_index(b, bind_layout->type,
592                                           res_index, addr_format, state);
593 }
594 
595 /** Builds a buffer address for deref chain
596  *
597  * This assumes that you can chase the chain all the way back to the original
598  * vulkan_resource_index intrinsic.
599  *
600  * The cursor is not where you left it when this function returns.
601  */
602 static nir_def *
build_buffer_addr_for_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format,struct apply_pipeline_layout_state * state)603 build_buffer_addr_for_deref(nir_builder *b, nir_deref_instr *deref,
604                             nir_address_format addr_format,
605                             struct apply_pipeline_layout_state *state)
606 {
607    nir_deref_instr *parent = nir_deref_instr_parent(deref);
608    if (parent) {
609       nir_def *addr =
610          build_buffer_addr_for_deref(b, parent, addr_format, state);
611 
612       b->cursor = nir_before_instr(&deref->instr);
613       return nir_explicit_io_address_from_deref(b, deref, addr, addr_format);
614    }
615 
616    nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
617    assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
618 
619    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
620 
621    b->cursor = nir_before_instr(&deref->instr);
622 
623    return build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state);
624 }
625 
626 static bool
try_lower_direct_buffer_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)627 try_lower_direct_buffer_intrinsic(nir_builder *b,
628                                   nir_intrinsic_instr *intrin,
629                                   struct apply_pipeline_layout_state *state)
630 {
631    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
632    if (!nir_deref_mode_is_one_of(deref, nir_var_mem_ubo | nir_var_mem_ssbo))
633       return false;
634 
635    nir_intrinsic_instr *desc = nir_deref_find_descriptor(deref, state);
636    if (desc == NULL) {
637       /* We should always be able to find the descriptor for UBO access. */
638       assert(nir_deref_mode_is_one_of(deref, nir_var_mem_ssbo));
639       return false;
640    }
641 
642    nir_address_format addr_format = descriptor_address_format(desc, state);
643 
644    if (nir_deref_mode_is(deref, nir_var_mem_ssbo)) {
645       /* Normal binding table-based messages can't handle non-uniform access
646        * so we have to fall back to A64.
647        */
648       if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)
649          return false;
650 
651       if (!descriptor_has_bti(desc, state))
652          return false;
653 
654       /* Rewrite to 32bit_index_offset whenever we can */
655       addr_format = nir_address_format_32bit_index_offset;
656    } else {
657       assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
658 
659       /* Rewrite to 32bit_index_offset whenever we can */
660       if (descriptor_has_bti(desc, state))
661          addr_format = nir_address_format_32bit_index_offset;
662    }
663 
664    nir_def *addr =
665       build_buffer_addr_for_deref(b, deref, addr_format, state);
666 
667    b->cursor = nir_before_instr(&intrin->instr);
668    nir_lower_explicit_io_instr(b, intrin, addr, addr_format);
669 
670    return true;
671 }
672 
673 static bool
lower_load_accel_struct_desc(nir_builder * b,nir_intrinsic_instr * load_desc,struct apply_pipeline_layout_state * state)674 lower_load_accel_struct_desc(nir_builder *b,
675                              nir_intrinsic_instr *load_desc,
676                              struct apply_pipeline_layout_state *state)
677 {
678    assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
679 
680    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
681 
682    /* It doesn't really matter what address format we choose as
683     * everything will constant-fold nicely.  Choose one that uses the
684     * actual descriptor buffer.
685     */
686    const nir_address_format addr_format =
687       nir_address_format_64bit_bounded_global;
688 
689    uint32_t set = UINT32_MAX, binding = UINT32_MAX;
690    nir_def *res_index =
691       build_res_index_for_chain(b, idx_intrin, addr_format,
692                                 &set, &binding, state);
693 
694    const struct anv_descriptor_set_binding_layout *bind_layout =
695       &state->layout->set[set].layout->binding[binding];
696 
697    b->cursor = nir_before_instr(&load_desc->instr);
698 
699    nir_def *desc_addr =
700       build_desc_addr(b, bind_layout, bind_layout->type,
701                       res_index, addr_format, state);
702 
703    /* Acceleration structure descriptors are always uint64_t */
704    nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 1, 64, state);
705 
706    assert(load_desc->def.bit_size == 64);
707    assert(load_desc->def.num_components == 1);
708    nir_def_replace(&load_desc->def, desc);
709 
710    return true;
711 }
712 
713 static bool
lower_direct_buffer_instr(nir_builder * b,nir_instr * instr,void * _state)714 lower_direct_buffer_instr(nir_builder *b, nir_instr *instr, void *_state)
715 {
716    struct apply_pipeline_layout_state *state = _state;
717 
718    if (instr->type != nir_instr_type_intrinsic)
719       return false;
720 
721    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
722    switch (intrin->intrinsic) {
723    case nir_intrinsic_load_deref:
724    case nir_intrinsic_store_deref:
725    case nir_intrinsic_deref_atomic:
726    case nir_intrinsic_deref_atomic_swap:
727       return try_lower_direct_buffer_intrinsic(b, intrin, state);
728 
729    case nir_intrinsic_load_vulkan_descriptor:
730       if (nir_intrinsic_desc_type(intrin) ==
731           VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
732          return lower_load_accel_struct_desc(b, intrin, state);
733       return false;
734 
735    default:
736       return false;
737    }
738 }
739 
740 static bool
lower_res_index_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)741 lower_res_index_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
742                           struct apply_pipeline_layout_state *state)
743 {
744    b->cursor = nir_before_instr(&intrin->instr);
745 
746    nir_address_format addr_format =
747       addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
748 
749    nir_def *index =
750       build_res_index(b, nir_intrinsic_desc_set(intrin),
751                          nir_intrinsic_binding(intrin),
752                          intrin->src[0].ssa,
753                          addr_format, state);
754 
755    assert(intrin->def.bit_size == index->bit_size);
756    assert(intrin->def.num_components == index->num_components);
757    nir_def_replace(&intrin->def, index);
758 
759    return true;
760 }
761 
762 static bool
lower_res_reindex_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)763 lower_res_reindex_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
764                             struct apply_pipeline_layout_state *state)
765 {
766    b->cursor = nir_before_instr(&intrin->instr);
767 
768    nir_address_format addr_format =
769       addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
770 
771    nir_def *index =
772       build_res_reindex(b, intrin->src[0].ssa,
773                            intrin->src[1].ssa,
774                            addr_format);
775 
776    assert(intrin->def.bit_size == index->bit_size);
777    assert(intrin->def.num_components == index->num_components);
778    nir_def_replace(&intrin->def, index);
779 
780    return true;
781 }
782 
783 static bool
lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)784 lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
785                              struct apply_pipeline_layout_state *state)
786 {
787    b->cursor = nir_before_instr(&intrin->instr);
788 
789    const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
790    nir_address_format addr_format = addr_format_for_desc_type(desc_type, state);
791 
792    nir_def *desc =
793       build_buffer_addr_for_res_index(b, desc_type, intrin->src[0].ssa,
794                                       addr_format, state);
795 
796    assert(intrin->def.bit_size == desc->bit_size);
797    assert(intrin->def.num_components == desc->num_components);
798    nir_def_replace(&intrin->def, desc);
799 
800    return true;
801 }
802 
803 static bool
lower_get_ssbo_size(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)804 lower_get_ssbo_size(nir_builder *b, nir_intrinsic_instr *intrin,
805                     struct apply_pipeline_layout_state *state)
806 {
807    if (_mesa_set_search(state->lowered_instrs, intrin))
808       return false;
809 
810    b->cursor = nir_before_instr(&intrin->instr);
811 
812    nir_address_format addr_format =
813       addr_format_for_desc_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, state);
814 
815    nir_def *desc =
816       build_buffer_addr_for_res_index(b, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
817                                       intrin->src[0].ssa, addr_format, state);
818 
819    switch (addr_format) {
820    case nir_address_format_64bit_global_32bit_offset:
821    case nir_address_format_64bit_bounded_global: {
822       nir_def *size = nir_channel(b, desc, 2);
823       nir_def_replace(&intrin->def, size);
824       break;
825    }
826 
827    case nir_address_format_32bit_index_offset:
828       /* The binding table index is the first component of the address.  The
829        * back-end wants a scalar binding table index source.
830        */
831       nir_src_rewrite(&intrin->src[0], nir_channel(b, desc, 0));
832       break;
833 
834    default:
835       unreachable("Unsupported address format");
836    }
837 
838    return true;
839 }
840 
841 static bool
image_binding_needs_lowered_surface(nir_variable * var)842 image_binding_needs_lowered_surface(nir_variable *var)
843 {
844    return !(var->data.access & ACCESS_NON_READABLE) &&
845           var->data.image.format != PIPE_FORMAT_NONE;
846 }
847 
848 static bool
lower_image_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)849 lower_image_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
850                       struct apply_pipeline_layout_state *state)
851 {
852    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
853    nir_variable *var = nir_deref_instr_get_variable(deref);
854 
855    unsigned set = var->data.descriptor_set;
856    unsigned binding = var->data.binding;
857    unsigned binding_offset = state->set[set].surface_offsets[binding];
858 
859    b->cursor = nir_before_instr(&intrin->instr);
860 
861    if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
862       b->cursor = nir_instr_remove(&intrin->instr);
863 
864       const unsigned param = nir_intrinsic_base(intrin);
865 
866       nir_def *desc =
867          build_load_var_deref_descriptor_mem(b, deref, param * 16,
868                                              intrin->def.num_components,
869                                              intrin->def.bit_size, state);
870 
871       nir_def_rewrite_uses(&intrin->def, desc);
872    } else {
873       nir_def *index = NULL;
874       if (deref->deref_type != nir_deref_type_var) {
875          assert(deref->deref_type == nir_deref_type_array);
876          index = deref->arr.index.ssa;
877       } else {
878          index = nir_imm_int(b, 0);
879       }
880 
881       index = nir_iadd_imm(b, index, binding_offset);
882       nir_rewrite_image_intrinsic(intrin, index, false);
883    }
884 
885    return true;
886 }
887 
888 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)889 lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin,
890                     struct apply_pipeline_layout_state *state)
891 {
892    b->cursor = nir_instr_remove(&intrin->instr);
893 
894    /* Any constant-offset load_constant instructions should have been removed
895     * by constant folding.
896     */
897    assert(!nir_src_is_const(intrin->src[0]));
898    nir_def *offset = nir_iadd_imm(b, intrin->src[0].ssa,
899                                       nir_intrinsic_base(intrin));
900 
901    nir_def *data;
902    if (!anv_use_relocations(state->pdevice)) {
903       unsigned load_size = intrin->def.num_components *
904                            intrin->def.bit_size / 8;
905       unsigned load_align = intrin->def.bit_size / 8;
906 
907       assert(load_size < b->shader->constant_data_size);
908       unsigned max_offset = b->shader->constant_data_size - load_size;
909       offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
910 
911       nir_def *const_data_base_addr = nir_pack_64_2x32_split(b,
912          nir_load_reloc_const_intel(b, ELK_SHADER_RELOC_CONST_DATA_ADDR_LOW),
913          nir_load_reloc_const_intel(b, ELK_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
914 
915       data = nir_load_global_constant(b, nir_iadd(b, const_data_base_addr,
916                                                      nir_u2u64(b, offset)),
917                                       load_align,
918                                       intrin->def.num_components,
919                                       intrin->def.bit_size);
920    } else {
921       nir_def *index = nir_imm_int(b, state->constants_offset);
922 
923       data = nir_load_ubo(b, intrin->num_components, intrin->def.bit_size,
924                           index, offset,
925                           .align_mul = intrin->def.bit_size / 8,
926                           .align_offset =  0,
927                           .range_base = nir_intrinsic_base(intrin),
928                           .range = nir_intrinsic_range(intrin));
929    }
930 
931    nir_def_rewrite_uses(&intrin->def, data);
932 
933    return true;
934 }
935 
936 static bool
lower_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)937 lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
938                         struct apply_pipeline_layout_state *state)
939 {
940    b->cursor = nir_instr_remove(&intrin->instr);
941 
942    nir_def *base_workgroup_id =
943       nir_load_push_constant(b, 3, 32, nir_imm_int(b, 0),
944                              .base = offsetof(struct anv_push_constants, cs.base_work_group_id),
945                              .range = 3 * sizeof(uint32_t));
946    nir_def_rewrite_uses(&intrin->def, base_workgroup_id);
947 
948    return true;
949 }
950 
951 static void
lower_tex_deref(nir_builder * b,nir_tex_instr * tex,nir_tex_src_type deref_src_type,unsigned * base_index,unsigned plane,struct apply_pipeline_layout_state * state)952 lower_tex_deref(nir_builder *b, nir_tex_instr *tex,
953                 nir_tex_src_type deref_src_type,
954                 unsigned *base_index, unsigned plane,
955                 struct apply_pipeline_layout_state *state)
956 {
957    int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
958    if (deref_src_idx < 0)
959       return;
960 
961    nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
962    nir_variable *var = nir_deref_instr_get_variable(deref);
963 
964    unsigned set = var->data.descriptor_set;
965    unsigned binding = var->data.binding;
966    unsigned array_size =
967       state->layout->set[set].layout->binding[binding].array_size;
968 
969    unsigned binding_offset;
970    if (deref_src_type == nir_tex_src_texture_deref) {
971       binding_offset = state->set[set].surface_offsets[binding];
972    } else {
973       assert(deref_src_type == nir_tex_src_sampler_deref);
974       binding_offset = state->set[set].sampler_offsets[binding];
975    }
976 
977    nir_tex_src_type offset_src_type;
978    nir_def *index = NULL;
979    if (binding_offset > MAX_BINDING_TABLE_SIZE) {
980       const unsigned plane_offset =
981          plane * sizeof(struct anv_sampled_image_descriptor);
982 
983       nir_def *desc =
984          build_load_var_deref_descriptor_mem(b, deref, plane_offset,
985                                              2, 32, state);
986 
987       if (deref_src_type == nir_tex_src_texture_deref) {
988          offset_src_type = nir_tex_src_texture_handle;
989          index = nir_channel(b, desc, 0);
990       } else {
991          assert(deref_src_type == nir_tex_src_sampler_deref);
992          offset_src_type = nir_tex_src_sampler_handle;
993          index = nir_channel(b, desc, 1);
994       }
995    } else {
996       if (deref_src_type == nir_tex_src_texture_deref) {
997          offset_src_type = nir_tex_src_texture_offset;
998       } else {
999          assert(deref_src_type == nir_tex_src_sampler_deref);
1000          offset_src_type = nir_tex_src_sampler_offset;
1001       }
1002 
1003       *base_index = binding_offset + plane;
1004 
1005       if (deref->deref_type != nir_deref_type_var) {
1006          assert(deref->deref_type == nir_deref_type_array);
1007 
1008          if (nir_src_is_const(deref->arr.index)) {
1009             unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1);
1010             struct anv_sampler **immutable_samplers =
1011                state->layout->set[set].layout->binding[binding].immutable_samplers;
1012             if (immutable_samplers) {
1013                /* Array of YCbCr samplers are tightly packed in the binding
1014                 * tables, compute the offset of an element in the array by
1015                 * adding the number of planes of all preceding elements.
1016                 */
1017                unsigned desc_arr_index = 0;
1018                for (int i = 0; i < arr_index; i++)
1019                   desc_arr_index += immutable_samplers[i]->n_planes;
1020                *base_index += desc_arr_index;
1021             } else {
1022                *base_index += arr_index;
1023             }
1024          } else {
1025             /* From VK_KHR_sampler_ycbcr_conversion:
1026              *
1027              * If sampler Y’CBCR conversion is enabled, the combined image
1028              * sampler must be indexed only by constant integral expressions
1029              * when aggregated into arrays in shader code, irrespective of
1030              * the shaderSampledImageArrayDynamicIndexing feature.
1031              */
1032             assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1);
1033 
1034             index = deref->arr.index.ssa;
1035          }
1036       }
1037    }
1038 
1039    if (index) {
1040       nir_src_rewrite(&tex->src[deref_src_idx].src, index);
1041       tex->src[deref_src_idx].src_type = offset_src_type;
1042    } else {
1043       nir_tex_instr_remove_src(tex, deref_src_idx);
1044    }
1045 }
1046 
1047 static uint32_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)1048 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
1049 {
1050    int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
1051    if (plane_src_idx < 0)
1052       return 0;
1053 
1054    unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src);
1055 
1056    nir_tex_instr_remove_src(tex, plane_src_idx);
1057 
1058    return plane;
1059 }
1060 
1061 static nir_def *
build_def_array_select(nir_builder * b,nir_def ** srcs,nir_def * idx,unsigned start,unsigned end)1062 build_def_array_select(nir_builder *b, nir_def **srcs, nir_def *idx,
1063                        unsigned start, unsigned end)
1064 {
1065    if (start == end - 1) {
1066       return srcs[start];
1067    } else {
1068       unsigned mid = start + (end - start) / 2;
1069       return nir_bcsel(b, nir_ilt_imm(b, idx, mid),
1070                        build_def_array_select(b, srcs, idx, start, mid),
1071                        build_def_array_select(b, srcs, idx, mid, end));
1072    }
1073 }
1074 
1075 static void
lower_gfx7_tex_swizzle(nir_builder * b,nir_tex_instr * tex,unsigned plane,struct apply_pipeline_layout_state * state)1076 lower_gfx7_tex_swizzle(nir_builder *b, nir_tex_instr *tex, unsigned plane,
1077                        struct apply_pipeline_layout_state *state)
1078 {
1079    assert(state->pdevice->info.verx10 == 70);
1080    if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ||
1081        nir_tex_instr_is_query(tex) ||
1082        tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */
1083        (tex->is_shadow && tex->is_new_style_shadow))
1084       return;
1085 
1086    int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1087    assert(deref_src_idx >= 0);
1088 
1089    nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
1090    nir_variable *var = nir_deref_instr_get_variable(deref);
1091 
1092    unsigned set = var->data.descriptor_set;
1093    unsigned binding = var->data.binding;
1094    const struct anv_descriptor_set_binding_layout *bind_layout =
1095       &state->layout->set[set].layout->binding[binding];
1096 
1097    if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0)
1098       return;
1099 
1100    b->cursor = nir_before_instr(&tex->instr);
1101 
1102    const unsigned plane_offset =
1103       plane * sizeof(struct anv_texture_swizzle_descriptor);
1104    nir_def *swiz =
1105       build_load_var_deref_descriptor_mem(b, deref, plane_offset,
1106                                           1, 32, state);
1107 
1108    b->cursor = nir_after_instr(&tex->instr);
1109 
1110    assert(tex->def.bit_size == 32);
1111    assert(tex->def.num_components == 4);
1112 
1113    /* Initializing to undef is ok; nir_opt_undef will clean it up. */
1114    nir_def *undef = nir_undef(b, 1, 32);
1115    nir_def *comps[8];
1116    for (unsigned i = 0; i < ARRAY_SIZE(comps); i++)
1117       comps[i] = undef;
1118 
1119    comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0);
1120    if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float)
1121       comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1);
1122    else
1123       comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1);
1124    comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->def, 0);
1125    comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->def, 1);
1126    comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->def, 2);
1127    comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->def, 3);
1128 
1129    nir_def *swiz_comps[4];
1130    for (unsigned i = 0; i < 4; i++) {
1131       nir_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i));
1132       swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8);
1133    }
1134    nir_def *swiz_tex_res = nir_vec(b, swiz_comps, 4);
1135 
1136    /* Rewrite uses before we insert so we don't rewrite this use */
1137    nir_def_rewrite_uses_after(&tex->def,
1138                                   swiz_tex_res,
1139                                   swiz_tex_res->parent_instr);
1140 }
1141 
1142 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct apply_pipeline_layout_state * state)1143 lower_tex(nir_builder *b, nir_tex_instr *tex,
1144           struct apply_pipeline_layout_state *state)
1145 {
1146    unsigned plane = tex_instr_get_and_remove_plane_src(tex);
1147 
1148    /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader.  Do this
1149     * before we lower the derefs away so we can still find the descriptor.
1150     */
1151    if (state->pdevice->info.verx10 == 70)
1152       lower_gfx7_tex_swizzle(b, tex, plane, state);
1153 
1154    b->cursor = nir_before_instr(&tex->instr);
1155 
1156    lower_tex_deref(b, tex, nir_tex_src_texture_deref,
1157                    &tex->texture_index, plane, state);
1158 
1159    lower_tex_deref(b, tex, nir_tex_src_sampler_deref,
1160                    &tex->sampler_index, plane, state);
1161 
1162    return true;
1163 }
1164 
1165 static bool
apply_pipeline_layout(nir_builder * b,nir_instr * instr,void * _state)1166 apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
1167 {
1168    struct apply_pipeline_layout_state *state = _state;
1169 
1170    switch (instr->type) {
1171    case nir_instr_type_intrinsic: {
1172       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1173       switch (intrin->intrinsic) {
1174       case nir_intrinsic_vulkan_resource_index:
1175          return lower_res_index_intrinsic(b, intrin, state);
1176       case nir_intrinsic_vulkan_resource_reindex:
1177          return lower_res_reindex_intrinsic(b, intrin, state);
1178       case nir_intrinsic_load_vulkan_descriptor:
1179          return lower_load_vulkan_descriptor(b, intrin, state);
1180       case nir_intrinsic_get_ssbo_size:
1181          return lower_get_ssbo_size(b, intrin, state);
1182       case nir_intrinsic_image_deref_load:
1183       case nir_intrinsic_image_deref_store:
1184       case nir_intrinsic_image_deref_atomic:
1185       case nir_intrinsic_image_deref_atomic_swap:
1186       case nir_intrinsic_image_deref_size:
1187       case nir_intrinsic_image_deref_samples:
1188       case nir_intrinsic_image_deref_load_param_intel:
1189       case nir_intrinsic_image_deref_load_raw_intel:
1190       case nir_intrinsic_image_deref_store_raw_intel:
1191          return lower_image_intrinsic(b, intrin, state);
1192       case nir_intrinsic_load_constant:
1193          return lower_load_constant(b, intrin, state);
1194       case nir_intrinsic_load_base_workgroup_id:
1195          return lower_base_workgroup_id(b, intrin, state);
1196       default:
1197          return false;
1198       }
1199       break;
1200    }
1201    case nir_instr_type_tex:
1202       return lower_tex(b, nir_instr_as_tex(instr), state);
1203    default:
1204       return false;
1205    }
1206 }
1207 
1208 struct binding_info {
1209    uint32_t binding;
1210    uint8_t set;
1211    uint16_t score;
1212 };
1213 
1214 static int
compare_binding_infos(const void * _a,const void * _b)1215 compare_binding_infos(const void *_a, const void *_b)
1216 {
1217    const struct binding_info *a = _a, *b = _b;
1218    if (a->score != b->score)
1219       return b->score - a->score;
1220 
1221    if (a->set != b->set)
1222       return a->set - b->set;
1223 
1224    return a->binding - b->binding;
1225 }
1226 
1227 void
anv_nir_apply_pipeline_layout(nir_shader * shader,const struct anv_physical_device * pdevice,enum elk_robustness_flags robust_flags,const struct anv_pipeline_layout * layout,struct anv_pipeline_bind_map * map)1228 anv_nir_apply_pipeline_layout(nir_shader *shader,
1229                               const struct anv_physical_device *pdevice,
1230                               enum elk_robustness_flags robust_flags,
1231                               const struct anv_pipeline_layout *layout,
1232                               struct anv_pipeline_bind_map *map)
1233 {
1234    void *mem_ctx = ralloc_context(NULL);
1235 
1236    struct apply_pipeline_layout_state state = {
1237       .pdevice = pdevice,
1238       .layout = layout,
1239       .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
1240       .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
1241       .lowered_instrs = _mesa_pointer_set_create(mem_ctx),
1242    };
1243 
1244    for (unsigned s = 0; s < layout->num_sets; s++) {
1245       const unsigned count = layout->set[s].layout->binding_count;
1246       state.set[s].use_count = rzalloc_array(mem_ctx, uint8_t, count);
1247       state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
1248       state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
1249    }
1250 
1251    nir_shader_instructions_pass(shader, get_used_bindings,
1252                                 nir_metadata_all, &state);
1253 
1254    for (unsigned s = 0; s < layout->num_sets; s++) {
1255       if (state.set[s].desc_buffer_used) {
1256          map->surface_to_descriptor[map->surface_count] =
1257             (struct anv_pipeline_binding) {
1258                .set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
1259                .index = s,
1260             };
1261          state.set[s].desc_offset = map->surface_count;
1262          map->surface_count++;
1263       }
1264    }
1265 
1266    if (state.uses_constants && anv_use_relocations(pdevice)) {
1267       state.constants_offset = map->surface_count;
1268       map->surface_to_descriptor[map->surface_count].set =
1269          ANV_DESCRIPTOR_SET_SHADER_CONSTANTS;
1270       map->surface_count++;
1271    }
1272 
1273    unsigned used_binding_count = 0;
1274    for (uint32_t set = 0; set < layout->num_sets; set++) {
1275       struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
1276       for (unsigned b = 0; b < set_layout->binding_count; b++) {
1277          if (state.set[set].use_count[b] == 0)
1278             continue;
1279 
1280          used_binding_count++;
1281       }
1282    }
1283 
1284    struct binding_info *infos =
1285       rzalloc_array(mem_ctx, struct binding_info, used_binding_count);
1286    used_binding_count = 0;
1287    for (uint32_t set = 0; set < layout->num_sets; set++) {
1288       const struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
1289       for (unsigned b = 0; b < set_layout->binding_count; b++) {
1290          if (state.set[set].use_count[b] == 0)
1291             continue;
1292 
1293          const struct anv_descriptor_set_binding_layout *binding =
1294                &layout->set[set].layout->binding[b];
1295 
1296          /* Do a fixed-point calculation to generate a score based on the
1297           * number of uses and the binding array size.  We shift by 7 instead
1298           * of 8 because we're going to use the top bit below to make
1299           * everything which does not support bindless super higher priority
1300           * than things which do.
1301           */
1302          uint16_t score = ((uint16_t)state.set[set].use_count[b] << 7) /
1303                           binding->array_size;
1304 
1305          /* If the descriptor type doesn't support bindless then put it at the
1306           * beginning so we guarantee it gets a slot.
1307           */
1308          if (!anv_descriptor_supports_bindless(pdevice, binding, true) ||
1309              !anv_descriptor_supports_bindless(pdevice, binding, false))
1310             score |= 1 << 15;
1311 
1312          infos[used_binding_count++] = (struct binding_info) {
1313             .set = set,
1314             .binding = b,
1315             .score = score,
1316          };
1317       }
1318    }
1319 
1320    /* Order the binding infos based on score with highest scores first.  If
1321     * scores are equal we then order by set and binding.
1322     */
1323    qsort(infos, used_binding_count, sizeof(struct binding_info),
1324          compare_binding_infos);
1325 
1326    for (unsigned i = 0; i < used_binding_count; i++) {
1327       unsigned set = infos[i].set, b = infos[i].binding;
1328       const struct anv_descriptor_set_binding_layout *binding =
1329             &layout->set[set].layout->binding[b];
1330 
1331       const uint32_t array_size = binding->array_size;
1332 
1333       if (binding->dynamic_offset_index >= 0)
1334          state.has_dynamic_buffers = true;
1335 
1336       if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) {
1337          assert(map->surface_count + array_size <= MAX_BINDING_TABLE_SIZE);
1338          assert(!anv_descriptor_requires_bindless(pdevice, binding, false));
1339          state.set[set].surface_offsets[b] = map->surface_count;
1340          if (binding->dynamic_offset_index < 0) {
1341             struct anv_sampler **samplers = binding->immutable_samplers;
1342             for (unsigned i = 0; i < binding->array_size; i++) {
1343                uint8_t planes = samplers ? samplers[i]->n_planes : 1;
1344                for (uint8_t p = 0; p < planes; p++) {
1345                   map->surface_to_descriptor[map->surface_count++] =
1346                      (struct anv_pipeline_binding) {
1347                         .set = set,
1348                         .index = binding->descriptor_index + i,
1349                         .plane = p,
1350                      };
1351                }
1352             }
1353          } else {
1354             for (unsigned i = 0; i < binding->array_size; i++) {
1355                map->surface_to_descriptor[map->surface_count++] =
1356                   (struct anv_pipeline_binding) {
1357                      .set = set,
1358                      .index = binding->descriptor_index + i,
1359                      .dynamic_offset_index =
1360                         layout->set[set].dynamic_offset_start +
1361                         binding->dynamic_offset_index + i,
1362                   };
1363             }
1364          }
1365          assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
1366       }
1367 
1368       if (binding->data & ANV_DESCRIPTOR_SAMPLER_STATE) {
1369          if (map->sampler_count + array_size > MAX_SAMPLER_TABLE_SIZE ||
1370              anv_descriptor_requires_bindless(pdevice, binding, true)) {
1371             /* If this descriptor doesn't fit in the binding table or if it
1372              * requires bindless for some reason, flag it as bindless.
1373              *
1374              * We also make large sampler arrays bindless because we can avoid
1375              * using indirect sends thanks to bindless samplers being packed
1376              * less tightly than the sampler table.
1377              */
1378             assert(anv_descriptor_supports_bindless(pdevice, binding, true));
1379             state.set[set].sampler_offsets[b] = BINDLESS_OFFSET;
1380          } else {
1381             state.set[set].sampler_offsets[b] = map->sampler_count;
1382             struct anv_sampler **samplers = binding->immutable_samplers;
1383             for (unsigned i = 0; i < binding->array_size; i++) {
1384                uint8_t planes = samplers ? samplers[i]->n_planes : 1;
1385                for (uint8_t p = 0; p < planes; p++) {
1386                   map->sampler_to_descriptor[map->sampler_count++] =
1387                      (struct anv_pipeline_binding) {
1388                         .set = set,
1389                         .index = binding->descriptor_index + i,
1390                         .plane = p,
1391                      };
1392                }
1393             }
1394          }
1395       }
1396    }
1397 
1398    nir_foreach_image_variable(var, shader) {
1399       const uint32_t set = var->data.descriptor_set;
1400       const uint32_t binding = var->data.binding;
1401       const struct anv_descriptor_set_binding_layout *bind_layout =
1402             &layout->set[set].layout->binding[binding];
1403       const uint32_t array_size = bind_layout->array_size;
1404 
1405       if (state.set[set].use_count[binding] == 0)
1406          continue;
1407 
1408       if (state.set[set].surface_offsets[binding] >= MAX_BINDING_TABLE_SIZE)
1409          continue;
1410 
1411       struct anv_pipeline_binding *pipe_binding =
1412          &map->surface_to_descriptor[state.set[set].surface_offsets[binding]];
1413       for (unsigned i = 0; i < array_size; i++) {
1414          assert(pipe_binding[i].set == set);
1415          assert(pipe_binding[i].index == bind_layout->descriptor_index + i);
1416 
1417          pipe_binding[i].lowered_storage_surface =
1418             image_binding_needs_lowered_surface(var);
1419       }
1420    }
1421 
1422    /* Before we do the normal lowering, we look for any SSBO operations
1423     * that we can lower to the BTI model and lower them up-front.  The BTI
1424     * model can perform better than the A64 model for a couple reasons:
1425     *
1426     *  1. 48-bit address calculations are potentially expensive and using
1427     *     the BTI model lets us simply compute 32-bit offsets and the
1428     *     hardware adds the 64-bit surface base address.
1429     *
1430     *  2. The BTI messages, because they use surface states, do bounds
1431     *     checking for us.  With the A64 model, we have to do our own
1432     *     bounds checking and this means wider pointers and extra
1433     *     calculations and branching in the shader.
1434     *
1435     * The solution to both of these is to convert things to the BTI model
1436     * opportunistically.  The reason why we need to do this as a pre-pass
1437     * is for two reasons:
1438     *
1439     *  1. The BTI model requires nir_address_format_32bit_index_offset
1440     *     pointers which are not the same type as the pointers needed for
1441     *     the A64 model.  Because all our derefs are set up for the A64
1442     *     model (in case we have variable pointers), we have to crawl all
1443     *     the way back to the vulkan_resource_index intrinsic and build a
1444     *     completely fresh index+offset calculation.
1445     *
1446     *  2. Because the variable-pointers-capable lowering that we do as part
1447     *     of apply_pipeline_layout_block is destructive (It really has to
1448     *     be to handle variable pointers properly), we've lost the deref
1449     *     information by the time we get to the load/store/atomic
1450     *     intrinsics in that pass.
1451     */
1452    nir_shader_instructions_pass(shader, lower_direct_buffer_instr,
1453                                 nir_metadata_control_flow,
1454                                 &state);
1455 
1456    /* We just got rid of all the direct access.  Delete it so it's not in the
1457     * way when we do our indirect lowering.
1458     */
1459    nir_opt_dce(shader);
1460 
1461    nir_shader_instructions_pass(shader, apply_pipeline_layout,
1462                                 nir_metadata_control_flow,
1463                                 &state);
1464 
1465    ralloc_free(mem_ctx);
1466 
1467    /* Now that we're done computing the surface and sampler portions of the
1468     * bind map, hash them.  This lets us quickly determine if the actual
1469     * mapping has changed and not just a no-op pipeline change.
1470     */
1471    _mesa_sha1_compute(map->surface_to_descriptor,
1472                       map->surface_count * sizeof(struct anv_pipeline_binding),
1473                       map->surface_sha1);
1474    _mesa_sha1_compute(map->sampler_to_descriptor,
1475                       map->sampler_count * sizeof(struct anv_pipeline_binding),
1476                       map->sampler_sha1);
1477 }
1478