xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_lower_io.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /*
25  * This lowering pass converts references to input/output variables with
26  * loads/stores to actual input/output intrinsics.
27  */
28 
29 #include "nir.h"
30 #include "nir_builder.h"
31 #include "nir_deref.h"
32 #include "nir_xfb_info.h"
33 
34 #include "util/u_math.h"
35 
36 struct lower_io_state {
37    void *dead_ctx;
38    nir_builder builder;
39    int (*type_size)(const struct glsl_type *type, bool);
40    nir_variable_mode modes;
41    nir_lower_io_options options;
42    struct set variable_names;
43 };
44 
45 static const char *
add_variable_name(struct lower_io_state * state,const char * name)46 add_variable_name(struct lower_io_state *state, const char *name)
47 {
48    if (!name)
49       return NULL;
50 
51    bool found = false;
52    struct set_entry *entry = _mesa_set_search_or_add(&state->variable_names, name, &found);
53    if (!found)
54       entry->key = (void*)ralloc_strdup(state->builder.shader, name);
55    return entry->key;
56 }
57 
58 static nir_intrinsic_op
ssbo_atomic_for_deref(nir_intrinsic_op deref_op)59 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
60 {
61    switch (deref_op) {
62    case nir_intrinsic_deref_atomic:
63       return nir_intrinsic_ssbo_atomic;
64    case nir_intrinsic_deref_atomic_swap:
65       return nir_intrinsic_ssbo_atomic_swap;
66    default:
67       unreachable("Invalid SSBO atomic");
68    }
69 }
70 
71 static nir_intrinsic_op
global_atomic_for_deref(nir_address_format addr_format,nir_intrinsic_op deref_op)72 global_atomic_for_deref(nir_address_format addr_format,
73                         nir_intrinsic_op deref_op)
74 {
75    switch (deref_op) {
76    case nir_intrinsic_deref_atomic:
77       if (addr_format != nir_address_format_2x32bit_global)
78          return nir_intrinsic_global_atomic;
79       else
80          return nir_intrinsic_global_atomic_2x32;
81 
82    case nir_intrinsic_deref_atomic_swap:
83       if (addr_format != nir_address_format_2x32bit_global)
84          return nir_intrinsic_global_atomic_swap;
85       else
86          return nir_intrinsic_global_atomic_swap_2x32;
87 
88    default:
89       unreachable("Invalid SSBO atomic");
90    }
91 }
92 
93 static nir_intrinsic_op
shared_atomic_for_deref(nir_intrinsic_op deref_op)94 shared_atomic_for_deref(nir_intrinsic_op deref_op)
95 {
96    switch (deref_op) {
97    case nir_intrinsic_deref_atomic:
98       return nir_intrinsic_shared_atomic;
99    case nir_intrinsic_deref_atomic_swap:
100       return nir_intrinsic_shared_atomic_swap;
101    default:
102       unreachable("Invalid shared atomic");
103    }
104 }
105 
106 static nir_intrinsic_op
task_payload_atomic_for_deref(nir_intrinsic_op deref_op)107 task_payload_atomic_for_deref(nir_intrinsic_op deref_op)
108 {
109    switch (deref_op) {
110    case nir_intrinsic_deref_atomic:
111       return nir_intrinsic_task_payload_atomic;
112    case nir_intrinsic_deref_atomic_swap:
113       return nir_intrinsic_task_payload_atomic_swap;
114    default:
115       unreachable("Invalid task payload atomic");
116    }
117 }
118 
119 void
nir_assign_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,int (* type_size)(const struct glsl_type *,bool))120 nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
121                          unsigned *size,
122                          int (*type_size)(const struct glsl_type *, bool))
123 {
124    unsigned location = 0;
125 
126    nir_foreach_variable_with_modes(var, shader, mode) {
127       var->data.driver_location = location;
128       bool bindless_type_size = var->data.mode == nir_var_shader_in ||
129                                 var->data.mode == nir_var_shader_out ||
130                                 var->data.bindless;
131       location += type_size(var->type, bindless_type_size);
132    }
133 
134    *size = location;
135 }
136 
137 /**
138  * Some inputs and outputs are arrayed, meaning that there is an extra level
139  * of array indexing to handle mismatches between the shader interface and the
140  * dispatch pattern of the shader.  For instance, geometry shaders are
141  * executed per-primitive while their inputs and outputs are specified
142  * per-vertex so all inputs and outputs have to be additionally indexed with
143  * the vertex index within the primitive.
144  */
145 bool
nir_is_arrayed_io(const nir_variable * var,gl_shader_stage stage)146 nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
147 {
148    if (var->data.patch || !glsl_type_is_array(var->type))
149       return false;
150 
151    if (stage == MESA_SHADER_MESH) {
152       /* NV_mesh_shader: this is flat array for the whole workgroup. */
153       if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
154          return var->data.per_primitive;
155    }
156 
157    if (var->data.mode == nir_var_shader_in) {
158       if (var->data.per_vertex) {
159          assert(stage == MESA_SHADER_FRAGMENT);
160          return true;
161       }
162 
163       return stage == MESA_SHADER_GEOMETRY ||
164              stage == MESA_SHADER_TESS_CTRL ||
165              stage == MESA_SHADER_TESS_EVAL;
166    }
167 
168    if (var->data.mode == nir_var_shader_out)
169       return stage == MESA_SHADER_TESS_CTRL ||
170              stage == MESA_SHADER_MESH;
171 
172    return false;
173 }
174 
175 static bool
uses_high_dvec2_semantic(struct lower_io_state * state,const nir_variable * var)176 uses_high_dvec2_semantic(struct lower_io_state *state,
177                          const nir_variable *var)
178 {
179    return state->builder.shader->info.stage == MESA_SHADER_VERTEX &&
180           state->options & nir_lower_io_lower_64bit_to_32_new &&
181           var->data.mode == nir_var_shader_in &&
182           glsl_type_is_dual_slot(glsl_without_array(var->type));
183 }
184 
185 static unsigned
get_number_of_slots(struct lower_io_state * state,const nir_variable * var)186 get_number_of_slots(struct lower_io_state *state,
187                     const nir_variable *var)
188 {
189    const struct glsl_type *type = var->type;
190 
191    if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
192       assert(glsl_type_is_array(type));
193       type = glsl_get_array_element(type);
194    }
195 
196    /* NV_mesh_shader:
197     * PRIMITIVE_INDICES is a flat array, not a proper arrayed output,
198     * as opposed to D3D-style mesh shaders where it's addressed by
199     * the primitive index.
200     * Prevent assigning several slots to primitive indices,
201     * to avoid some issues.
202     */
203    if (state->builder.shader->info.stage == MESA_SHADER_MESH &&
204        var->data.location == VARYING_SLOT_PRIMITIVE_INDICES &&
205        !nir_is_arrayed_io(var, state->builder.shader->info.stage))
206       return 1;
207 
208    return state->type_size(type, var->data.bindless) /
209           (uses_high_dvec2_semantic(state, var) ? 2 : 1);
210 }
211 
212 static nir_def *
get_io_offset(nir_builder * b,nir_deref_instr * deref,nir_def ** array_index,int (* type_size)(const struct glsl_type *,bool),unsigned * component,bool bts)213 get_io_offset(nir_builder *b, nir_deref_instr *deref,
214               nir_def **array_index,
215               int (*type_size)(const struct glsl_type *, bool),
216               unsigned *component, bool bts)
217 {
218    nir_deref_path path;
219    nir_deref_path_init(&path, deref, NULL);
220 
221    assert(path.path[0]->deref_type == nir_deref_type_var);
222    nir_deref_instr **p = &path.path[1];
223 
224    /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
225     * inputs), skip the outermost array index.  Process the rest normally.
226     */
227    if (array_index != NULL) {
228       assert((*p)->deref_type == nir_deref_type_array);
229       *array_index = (*p)->arr.index.ssa;
230       p++;
231    }
232 
233    if (path.path[0]->var->data.compact && nir_src_is_const((*p)->arr.index)) {
234       assert((*p)->deref_type == nir_deref_type_array);
235       assert(glsl_type_is_scalar((*p)->type));
236 
237       /* We always lower indirect dereferences for "compact" array vars. */
238       const unsigned index = nir_src_as_uint((*p)->arr.index);
239       const unsigned total_offset = *component + index;
240       const unsigned slot_offset = total_offset / 4;
241       *component = total_offset % 4;
242       return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
243    }
244 
245    /* Just emit code and let constant-folding go to town */
246    nir_def *offset = nir_imm_int(b, 0);
247 
248    for (; *p; p++) {
249       if ((*p)->deref_type == nir_deref_type_array) {
250          unsigned size = type_size((*p)->type, bts);
251 
252          nir_def *mul =
253             nir_amul_imm(b, (*p)->arr.index.ssa, size);
254 
255          offset = nir_iadd(b, offset, mul);
256       } else if ((*p)->deref_type == nir_deref_type_struct) {
257          /* p starts at path[1], so this is safe */
258          nir_deref_instr *parent = *(p - 1);
259 
260          unsigned field_offset = 0;
261          for (unsigned i = 0; i < (*p)->strct.index; i++) {
262             field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
263          }
264          offset = nir_iadd_imm(b, offset, field_offset);
265       } else {
266          unreachable("Unsupported deref type");
267       }
268    }
269 
270    nir_deref_path_finish(&path);
271 
272    return offset;
273 }
274 
275 static bool
is_medium_precision(const nir_shader * shader,const nir_variable * var)276 is_medium_precision(const nir_shader *shader, const nir_variable *var)
277 {
278    if (shader->options->io_options & nir_io_mediump_is_32bit)
279       return false;
280 
281    return var->data.precision == GLSL_PRECISION_MEDIUM ||
282           var->data.precision == GLSL_PRECISION_LOW;
283 }
284 
285 static nir_def *
emit_load(struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,unsigned num_components,unsigned bit_size,nir_alu_type dest_type,bool high_dvec2)286 emit_load(struct lower_io_state *state,
287           nir_def *array_index, nir_variable *var, nir_def *offset,
288           unsigned component, unsigned num_components, unsigned bit_size,
289           nir_alu_type dest_type, bool high_dvec2)
290 {
291    nir_builder *b = &state->builder;
292    const nir_shader *nir = b->shader;
293    nir_variable_mode mode = var->data.mode;
294    nir_def *barycentric = NULL;
295 
296    nir_intrinsic_op op;
297    switch (mode) {
298    case nir_var_shader_in:
299       if (nir->info.stage == MESA_SHADER_FRAGMENT &&
300           nir->options->use_interpolated_input_intrinsics &&
301           var->data.interpolation != INTERP_MODE_FLAT &&
302           !var->data.per_primitive) {
303          if (var->data.interpolation == INTERP_MODE_EXPLICIT ||
304              var->data.per_vertex) {
305             assert(array_index != NULL);
306             op = nir_intrinsic_load_input_vertex;
307          } else {
308             assert(array_index == NULL);
309 
310             nir_intrinsic_op bary_op;
311             if (var->data.sample)
312                bary_op = nir_intrinsic_load_barycentric_sample;
313             else if (var->data.centroid)
314                bary_op = nir_intrinsic_load_barycentric_centroid;
315             else
316                bary_op = nir_intrinsic_load_barycentric_pixel;
317 
318             barycentric = nir_load_barycentric(&state->builder, bary_op,
319                                                var->data.interpolation);
320             op = nir_intrinsic_load_interpolated_input;
321          }
322       } else {
323          if (var->data.per_primitive)
324             op = nir_intrinsic_load_per_primitive_input;
325          else if (array_index)
326             op = nir_intrinsic_load_per_vertex_input;
327          else
328             op = nir_intrinsic_load_input;
329       }
330       break;
331    case nir_var_shader_out:
332       op = !array_index ? nir_intrinsic_load_output : var->data.per_primitive ? nir_intrinsic_load_per_primitive_output
333                                                                               : nir_intrinsic_load_per_vertex_output;
334       break;
335    case nir_var_uniform:
336       op = nir_intrinsic_load_uniform;
337       break;
338    default:
339       unreachable("Unknown variable mode");
340    }
341 
342    nir_intrinsic_instr *load =
343       nir_intrinsic_instr_create(state->builder.shader, op);
344    load->num_components = num_components;
345    load->name = add_variable_name(state, var->name);
346 
347    nir_intrinsic_set_base(load, var->data.driver_location);
348    if (nir_intrinsic_has_range(load)) {
349       const struct glsl_type *type = var->type;
350       if (array_index)
351          type = glsl_get_array_element(type);
352       unsigned var_size = state->type_size(type, var->data.bindless);
353       nir_intrinsic_set_range(load, var_size);
354    }
355 
356    if (mode == nir_var_shader_in || mode == nir_var_shader_out)
357       nir_intrinsic_set_component(load, component);
358 
359    if (nir_intrinsic_has_access(load))
360       nir_intrinsic_set_access(load, var->data.access);
361 
362    nir_intrinsic_set_dest_type(load, dest_type);
363 
364    if (load->intrinsic != nir_intrinsic_load_uniform) {
365       nir_io_semantics semantics = { 0 };
366       semantics.location = var->data.location;
367       semantics.num_slots = get_number_of_slots(state, var);
368       semantics.fb_fetch_output = var->data.fb_fetch_output;
369       semantics.medium_precision = is_medium_precision(b->shader, var);
370       semantics.high_dvec2 = high_dvec2;
371       /* "per_vertex" is misnamed. It means "explicit interpolation with
372        * the original vertex order", which is a stricter version of
373        * INTERP_MODE_EXPLICIT.
374        */
375       semantics.interp_explicit_strict = var->data.per_vertex;
376       nir_intrinsic_set_io_semantics(load, semantics);
377    }
378 
379    if (array_index) {
380       load->src[0] = nir_src_for_ssa(array_index);
381       load->src[1] = nir_src_for_ssa(offset);
382    } else if (barycentric) {
383       load->src[0] = nir_src_for_ssa(barycentric);
384       load->src[1] = nir_src_for_ssa(offset);
385    } else {
386       load->src[0] = nir_src_for_ssa(offset);
387    }
388 
389    nir_def_init(&load->instr, &load->def, num_components, bit_size);
390    nir_builder_instr_insert(b, &load->instr);
391 
392    return &load->def;
393 }
394 
395 static nir_def *
lower_load(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)396 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
397            nir_def *array_index, nir_variable *var, nir_def *offset,
398            unsigned component, const struct glsl_type *type)
399 {
400    const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
401    if (intrin->def.bit_size == 64 &&
402        (lower_double || (state->options & (nir_lower_io_lower_64bit_to_32_new |
403                                            nir_lower_io_lower_64bit_to_32)))) {
404       nir_builder *b = &state->builder;
405       bool use_high_dvec2_semantic = uses_high_dvec2_semantic(state, var);
406 
407       /* Each slot is a dual slot, so divide the offset within the variable
408        * by 2.
409        */
410       if (use_high_dvec2_semantic)
411          offset = nir_ushr_imm(b, offset, 1);
412 
413       const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
414 
415       nir_def *comp64[4];
416       assert(component == 0 || component == 2);
417       unsigned dest_comp = 0;
418       bool high_dvec2 = false;
419       while (dest_comp < intrin->def.num_components) {
420          const unsigned num_comps =
421             MIN2(intrin->def.num_components - dest_comp,
422                  (4 - component) / 2);
423 
424          nir_def *data32 =
425             emit_load(state, array_index, var, offset, component,
426                       num_comps * 2, 32, nir_type_uint32, high_dvec2);
427          for (unsigned i = 0; i < num_comps; i++) {
428             comp64[dest_comp + i] =
429                nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
430          }
431 
432          /* Only the first store has a component offset */
433          component = 0;
434          dest_comp += num_comps;
435 
436          if (use_high_dvec2_semantic) {
437             /* Increment the offset when we wrap around the dual slot. */
438             if (high_dvec2)
439                offset = nir_iadd_imm(b, offset, slot_size);
440             high_dvec2 = !high_dvec2;
441          } else {
442             offset = nir_iadd_imm(b, offset, slot_size);
443          }
444       }
445 
446       return nir_vec(b, comp64, intrin->def.num_components);
447    } else if (intrin->def.bit_size == 1) {
448       /* Booleans are 32-bit */
449       assert(glsl_type_is_boolean(type));
450       return nir_b2b1(&state->builder,
451                       emit_load(state, array_index, var, offset, component,
452                                 intrin->def.num_components, 32,
453                                 nir_type_bool32, false));
454    } else {
455       return emit_load(state, array_index, var, offset, component,
456                        intrin->def.num_components,
457                        intrin->def.bit_size,
458                        nir_get_nir_type_for_glsl_type(type), false);
459    }
460 }
461 
462 static void
emit_store(struct lower_io_state * state,nir_def * data,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,unsigned num_components,nir_component_mask_t write_mask,nir_alu_type src_type)463 emit_store(struct lower_io_state *state, nir_def *data,
464            nir_def *array_index, nir_variable *var, nir_def *offset,
465            unsigned component, unsigned num_components,
466            nir_component_mask_t write_mask, nir_alu_type src_type)
467 {
468    nir_builder *b = &state->builder;
469 
470    assert(var->data.mode == nir_var_shader_out);
471    nir_intrinsic_op op =
472       !array_index ? nir_intrinsic_store_output : var->data.per_primitive ? nir_intrinsic_store_per_primitive_output
473                                                                           : nir_intrinsic_store_per_vertex_output;
474 
475    nir_intrinsic_instr *store =
476       nir_intrinsic_instr_create(state->builder.shader, op);
477    store->num_components = num_components;
478    store->name = add_variable_name(state, var->name);
479 
480    store->src[0] = nir_src_for_ssa(data);
481 
482    const struct glsl_type *type = var->type;
483    if (array_index)
484       type = glsl_get_array_element(type);
485    unsigned var_size = state->type_size(type, var->data.bindless);
486    nir_intrinsic_set_base(store, var->data.driver_location);
487    nir_intrinsic_set_range(store, var_size);
488    nir_intrinsic_set_component(store, component);
489    nir_intrinsic_set_src_type(store, src_type);
490 
491    nir_intrinsic_set_write_mask(store, write_mask);
492 
493    if (nir_intrinsic_has_access(store))
494       nir_intrinsic_set_access(store, var->data.access);
495 
496    if (array_index)
497       store->src[1] = nir_src_for_ssa(array_index);
498 
499    store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
500 
501    unsigned gs_streams = 0;
502    if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
503       if (var->data.stream & NIR_STREAM_PACKED) {
504          gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
505       } else {
506          assert(var->data.stream < 4);
507          gs_streams = 0;
508          for (unsigned i = 0; i < num_components; ++i)
509             gs_streams |= var->data.stream << (2 * i);
510       }
511    }
512 
513    nir_io_semantics semantics = { 0 };
514    semantics.location = var->data.location;
515    semantics.num_slots = get_number_of_slots(state, var);
516    semantics.dual_source_blend_index = var->data.index;
517    semantics.gs_streams = gs_streams;
518    semantics.medium_precision = is_medium_precision(b->shader, var);
519    semantics.per_view = var->data.per_view;
520    semantics.invariant = var->data.invariant;
521 
522    nir_intrinsic_set_io_semantics(store, semantics);
523 
524    nir_builder_instr_insert(b, &store->instr);
525 }
526 
527 static void
lower_store(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)528 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
529             nir_def *array_index, nir_variable *var, nir_def *offset,
530             unsigned component, const struct glsl_type *type)
531 {
532    const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
533    if (intrin->src[1].ssa->bit_size == 64 &&
534        (lower_double || (state->options & (nir_lower_io_lower_64bit_to_32 |
535                                            nir_lower_io_lower_64bit_to_32_new)))) {
536       nir_builder *b = &state->builder;
537 
538       const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
539 
540       assert(component == 0 || component == 2);
541       unsigned src_comp = 0;
542       nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
543       while (src_comp < intrin->num_components) {
544          const unsigned num_comps =
545             MIN2(intrin->num_components - src_comp,
546                  (4 - component) / 2);
547 
548          if (write_mask & BITFIELD_MASK(num_comps)) {
549             nir_def *data =
550                nir_channels(b, intrin->src[1].ssa,
551                             BITFIELD_RANGE(src_comp, num_comps));
552             nir_def *data32 = nir_bitcast_vector(b, data, 32);
553 
554             uint32_t write_mask32 = 0;
555             for (unsigned i = 0; i < num_comps; i++) {
556                if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
557                   write_mask32 |= 3 << (i * 2);
558             }
559 
560             emit_store(state, data32, array_index, var, offset,
561                        component, data32->num_components, write_mask32,
562                        nir_type_uint32);
563          }
564 
565          /* Only the first store has a component offset */
566          component = 0;
567          src_comp += num_comps;
568          write_mask >>= num_comps;
569          offset = nir_iadd_imm(b, offset, slot_size);
570       }
571    } else if (intrin->def.bit_size == 1) {
572       /* Booleans are 32-bit */
573       assert(glsl_type_is_boolean(type));
574       nir_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
575       emit_store(state, b32_val, array_index, var, offset,
576                  component, intrin->num_components,
577                  nir_intrinsic_write_mask(intrin),
578                  nir_type_bool32);
579    } else {
580       emit_store(state, intrin->src[1].ssa, array_index, var, offset,
581                  component, intrin->num_components,
582                  nir_intrinsic_write_mask(intrin),
583                  nir_get_nir_type_for_glsl_type(type));
584    }
585 }
586 
587 static nir_def *
lower_interpolate_at(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)588 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
589                      nir_variable *var, nir_def *offset, unsigned component,
590                      const struct glsl_type *type)
591 {
592    nir_builder *b = &state->builder;
593    assert(var->data.mode == nir_var_shader_in);
594 
595    /* Ignore interpolateAt() for flat variables - flat is flat. Lower
596     * interpolateAtVertex() for explicit variables.
597     */
598    if (var->data.interpolation == INTERP_MODE_FLAT ||
599        var->data.interpolation == INTERP_MODE_EXPLICIT) {
600       nir_def *vertex_index = NULL;
601 
602       if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
603          assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
604          vertex_index = intrin->src[1].ssa;
605       }
606 
607       return lower_load(intrin, state, vertex_index, var, offset, component, type);
608    }
609 
610    /* None of the supported APIs allow interpolation on 64-bit things */
611    assert(intrin->def.bit_size <= 32);
612 
613    nir_intrinsic_op bary_op;
614    switch (intrin->intrinsic) {
615    case nir_intrinsic_interp_deref_at_centroid:
616       bary_op = nir_intrinsic_load_barycentric_centroid;
617       break;
618    case nir_intrinsic_interp_deref_at_sample:
619       bary_op = nir_intrinsic_load_barycentric_at_sample;
620       break;
621    case nir_intrinsic_interp_deref_at_offset:
622       bary_op = nir_intrinsic_load_barycentric_at_offset;
623       break;
624    default:
625       unreachable("Bogus interpolateAt() intrinsic.");
626    }
627 
628    nir_intrinsic_instr *bary_setup =
629       nir_intrinsic_instr_create(state->builder.shader, bary_op);
630 
631    nir_def_init(&bary_setup->instr, &bary_setup->def, 2, 32);
632    nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
633 
634    if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
635        intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
636        intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
637       bary_setup->src[0] = nir_src_for_ssa(intrin->src[1].ssa);
638 
639    nir_builder_instr_insert(b, &bary_setup->instr);
640 
641    nir_io_semantics semantics = { 0 };
642    semantics.location = var->data.location;
643    semantics.num_slots = get_number_of_slots(state, var);
644    semantics.medium_precision = is_medium_precision(b->shader, var);
645 
646    nir_def *load =
647       nir_load_interpolated_input(&state->builder,
648                                   intrin->def.num_components,
649                                   intrin->def.bit_size,
650                                   &bary_setup->def,
651                                   offset,
652                                   .base = var->data.driver_location,
653                                   .component = component,
654                                   .io_semantics = semantics,
655                                   .dest_type = nir_type_float | intrin->def.bit_size);
656 
657    return load;
658 }
659 
660 static bool
nir_lower_io_block(nir_block * block,struct lower_io_state * state)661 nir_lower_io_block(nir_block *block,
662                    struct lower_io_state *state)
663 {
664    nir_builder *b = &state->builder;
665    const nir_shader_compiler_options *options = b->shader->options;
666    bool progress = false;
667 
668    nir_foreach_instr_safe(instr, block) {
669       if (instr->type != nir_instr_type_intrinsic)
670          continue;
671 
672       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
673 
674       switch (intrin->intrinsic) {
675       case nir_intrinsic_load_deref:
676       case nir_intrinsic_store_deref:
677          /* We can lower the io for this nir instrinsic */
678          break;
679       case nir_intrinsic_interp_deref_at_centroid:
680       case nir_intrinsic_interp_deref_at_sample:
681       case nir_intrinsic_interp_deref_at_offset:
682       case nir_intrinsic_interp_deref_at_vertex:
683          /* We can optionally lower these to load_interpolated_input */
684          if (options->use_interpolated_input_intrinsics ||
685              options->lower_interpolate_at)
686             break;
687          FALLTHROUGH;
688       default:
689          /* We can't lower the io for this nir instrinsic, so skip it */
690          continue;
691       }
692 
693       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
694       if (!nir_deref_mode_is_one_of(deref, state->modes))
695          continue;
696 
697       nir_variable *var = nir_deref_instr_get_variable(deref);
698 
699       b->cursor = nir_before_instr(instr);
700 
701       const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
702 
703       nir_def *offset;
704       nir_def *array_index = NULL;
705       unsigned component_offset = var->data.location_frac;
706       bool bindless_type_size = var->data.mode == nir_var_shader_in ||
707                                 var->data.mode == nir_var_shader_out ||
708                                 var->data.bindless;
709 
710       if (nir_deref_instr_is_known_out_of_bounds(deref)) {
711          /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
712           *
713           *    In the subsections described above for array, vector, matrix and
714           *    structure accesses, any out-of-bounds access produced undefined
715           *    behavior....
716           *    Out-of-bounds reads return undefined values, which
717           *    include values from other variables of the active program or zero.
718           *    Out-of-bounds writes may be discarded or overwrite
719           *    other variables of the active program.
720           *
721           * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
722           * for reads.
723           *
724           * Otherwise get_io_offset would return out-of-bound offset which may
725           * result in out-of-bound loading/storing of inputs/outputs,
726           * that could cause issues in drivers down the line.
727           */
728          if (intrin->intrinsic != nir_intrinsic_store_deref) {
729             nir_def *zero =
730                nir_imm_zero(b, intrin->def.num_components,
731                             intrin->def.bit_size);
732             nir_def_rewrite_uses(&intrin->def,
733                                  zero);
734          }
735 
736          nir_instr_remove(&intrin->instr);
737          progress = true;
738          continue;
739       }
740 
741       offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
742                              state->type_size, &component_offset,
743                              bindless_type_size);
744 
745       nir_def *replacement = NULL;
746 
747       switch (intrin->intrinsic) {
748       case nir_intrinsic_load_deref:
749          replacement = lower_load(intrin, state, array_index, var, offset,
750                                   component_offset, deref->type);
751          break;
752 
753       case nir_intrinsic_store_deref:
754          lower_store(intrin, state, array_index, var, offset,
755                      component_offset, deref->type);
756          break;
757 
758       case nir_intrinsic_interp_deref_at_centroid:
759       case nir_intrinsic_interp_deref_at_sample:
760       case nir_intrinsic_interp_deref_at_offset:
761       case nir_intrinsic_interp_deref_at_vertex:
762          assert(array_index == NULL);
763          replacement = lower_interpolate_at(intrin, state, var, offset,
764                                             component_offset, deref->type);
765          break;
766 
767       default:
768          continue;
769       }
770 
771       if (replacement) {
772          nir_def_rewrite_uses(&intrin->def,
773                               replacement);
774       }
775       nir_instr_remove(&intrin->instr);
776       progress = true;
777    }
778 
779    return progress;
780 }
781 
782 static bool
nir_lower_io_impl(nir_function_impl * impl,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)783 nir_lower_io_impl(nir_function_impl *impl,
784                   nir_variable_mode modes,
785                   int (*type_size)(const struct glsl_type *, bool),
786                   nir_lower_io_options options)
787 {
788    struct lower_io_state state;
789    bool progress = false;
790 
791    state.builder = nir_builder_create(impl);
792    state.dead_ctx = ralloc_context(NULL);
793    state.modes = modes;
794    state.type_size = type_size;
795    state.options = options;
796    _mesa_set_init(&state.variable_names, state.dead_ctx,
797                   _mesa_hash_string, _mesa_key_string_equal);
798 
799    ASSERTED nir_variable_mode supported_modes =
800       nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
801    assert(!(modes & ~supported_modes));
802 
803    nir_foreach_block(block, impl) {
804       progress |= nir_lower_io_block(block, &state);
805    }
806 
807    ralloc_free(state.dead_ctx);
808 
809    nir_metadata_preserve(impl, nir_metadata_none);
810 
811    return progress;
812 }
813 
814 /** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
815  *
816  * This pass is intended to be used for cross-stage shader I/O and driver-
817  * managed uniforms to turn deref-based access into a simpler model using
818  * locations or offsets.  For fragment shader inputs, it can optionally turn
819  * load_deref into an explicit interpolation using barycentrics coming from
820  * one of the load_barycentric_* intrinsics.  This pass requires that all
821  * deref chains are complete and contain no casts.
822  */
823 bool
nir_lower_io(nir_shader * shader,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)824 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
825              int (*type_size)(const struct glsl_type *, bool),
826              nir_lower_io_options options)
827 {
828    bool progress = false;
829 
830    nir_foreach_function_impl(impl, shader) {
831       progress |= nir_lower_io_impl(impl, modes, type_size, options);
832    }
833 
834    return progress;
835 }
836 
837 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)838 type_scalar_size_bytes(const struct glsl_type *type)
839 {
840    assert(glsl_type_is_vector_or_scalar(type) ||
841           glsl_type_is_matrix(type));
842    return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
843 }
844 
845 nir_def *
nir_build_addr_iadd(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,nir_def * offset)846 nir_build_addr_iadd(nir_builder *b, nir_def *addr,
847                     nir_address_format addr_format,
848                     nir_variable_mode modes,
849                     nir_def *offset)
850 {
851    assert(offset->num_components == 1);
852 
853    switch (addr_format) {
854    case nir_address_format_32bit_global:
855    case nir_address_format_64bit_global:
856    case nir_address_format_32bit_offset:
857       assert(addr->bit_size == offset->bit_size);
858       assert(addr->num_components == 1);
859       return nir_iadd(b, addr, offset);
860 
861    case nir_address_format_2x32bit_global: {
862       assert(addr->num_components == 2);
863       nir_def *lo = nir_channel(b, addr, 0);
864       nir_def *hi = nir_channel(b, addr, 1);
865       nir_def *res_lo = nir_iadd(b, lo, offset);
866       nir_def *carry = nir_b2i32(b, nir_ult(b, res_lo, lo));
867       nir_def *res_hi = nir_iadd(b, hi, carry);
868       return nir_vec2(b, res_lo, res_hi);
869    }
870 
871    case nir_address_format_32bit_offset_as_64bit:
872       assert(addr->num_components == 1);
873       assert(offset->bit_size == 32);
874       return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
875 
876    case nir_address_format_64bit_global_32bit_offset:
877    case nir_address_format_64bit_bounded_global:
878       assert(addr->num_components == 4);
879       assert(addr->bit_size == offset->bit_size);
880       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3);
881 
882    case nir_address_format_32bit_index_offset:
883       assert(addr->num_components == 2);
884       assert(addr->bit_size == offset->bit_size);
885       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1);
886 
887    case nir_address_format_32bit_index_offset_pack64:
888       assert(addr->num_components == 1);
889       assert(offset->bit_size == 32);
890       return nir_pack_64_2x32_split(b,
891                                     nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
892                                     nir_unpack_64_2x32_split_y(b, addr));
893 
894    case nir_address_format_vec2_index_32bit_offset:
895       assert(addr->num_components == 3);
896       assert(offset->bit_size == 32);
897       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2);
898 
899    case nir_address_format_62bit_generic:
900       assert(addr->num_components == 1);
901       assert(addr->bit_size == 64);
902       assert(offset->bit_size == 64);
903       if (!(modes & ~(nir_var_function_temp |
904                       nir_var_shader_temp |
905                       nir_var_mem_shared))) {
906          /* If we're sure it's one of these modes, we can do an easy 32-bit
907           * addition and don't need to bother with 64-bit math.
908           */
909          nir_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
910          nir_def *type = nir_unpack_64_2x32_split_y(b, addr);
911          addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
912          return nir_pack_64_2x32_split(b, addr32, type);
913       } else {
914          return nir_iadd(b, addr, offset);
915       }
916 
917    case nir_address_format_logical:
918       unreachable("Unsupported address format");
919    }
920    unreachable("Invalid address format");
921 }
922 
923 static unsigned
addr_get_offset_bit_size(nir_def * addr,nir_address_format addr_format)924 addr_get_offset_bit_size(nir_def *addr, nir_address_format addr_format)
925 {
926    if (addr_format == nir_address_format_32bit_offset_as_64bit ||
927        addr_format == nir_address_format_32bit_index_offset_pack64)
928       return 32;
929    return addr->bit_size;
930 }
931 
932 nir_def *
nir_build_addr_iadd_imm(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,int64_t offset)933 nir_build_addr_iadd_imm(nir_builder *b, nir_def *addr,
934                         nir_address_format addr_format,
935                         nir_variable_mode modes,
936                         int64_t offset)
937 {
938    if (!offset)
939       return addr;
940 
941    return nir_build_addr_iadd(
942       b, addr, addr_format, modes,
943       nir_imm_intN_t(b, offset,
944                      addr_get_offset_bit_size(addr, addr_format)));
945 }
946 
947 static nir_def *
build_addr_for_var(nir_builder * b,nir_variable * var,nir_address_format addr_format)948 build_addr_for_var(nir_builder *b, nir_variable *var,
949                    nir_address_format addr_format)
950 {
951    assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
952                             nir_var_mem_task_payload |
953                             nir_var_mem_global |
954                             nir_var_shader_temp | nir_var_function_temp |
955                             nir_var_mem_push_const | nir_var_mem_constant));
956 
957    const unsigned num_comps = nir_address_format_num_components(addr_format);
958    const unsigned bit_size = nir_address_format_bit_size(addr_format);
959 
960    switch (addr_format) {
961    case nir_address_format_2x32bit_global:
962    case nir_address_format_32bit_global:
963    case nir_address_format_64bit_global: {
964       nir_def *base_addr;
965       switch (var->data.mode) {
966       case nir_var_shader_temp:
967          base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
968          break;
969 
970       case nir_var_function_temp:
971          base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
972          break;
973 
974       case nir_var_mem_constant:
975          base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
976          break;
977 
978       case nir_var_mem_shared:
979          base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
980          break;
981 
982       case nir_var_mem_global:
983          base_addr = nir_load_global_base_ptr(b, num_comps, bit_size);
984          break;
985 
986       default:
987          unreachable("Unsupported variable mode");
988       }
989 
990       return nir_build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
991                                      var->data.driver_location);
992    }
993 
994    case nir_address_format_32bit_offset:
995       assert(var->data.driver_location <= UINT32_MAX);
996       return nir_imm_int(b, var->data.driver_location);
997 
998    case nir_address_format_32bit_offset_as_64bit:
999       assert(var->data.driver_location <= UINT32_MAX);
1000       return nir_imm_int64(b, var->data.driver_location);
1001 
1002    case nir_address_format_62bit_generic:
1003       switch (var->data.mode) {
1004       case nir_var_shader_temp:
1005       case nir_var_function_temp:
1006          assert(var->data.driver_location <= UINT32_MAX);
1007          return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
1008 
1009       case nir_var_mem_shared:
1010          assert(var->data.driver_location <= UINT32_MAX);
1011          return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
1012 
1013       case nir_var_mem_global:
1014          return nir_iadd_imm(b, nir_load_global_base_ptr(b, num_comps, bit_size),
1015                              var->data.driver_location);
1016 
1017       default:
1018          unreachable("Unsupported variable mode");
1019       }
1020 
1021    default:
1022       unreachable("Unsupported address format");
1023    }
1024 }
1025 
1026 static nir_def *
build_runtime_addr_mode_check(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode mode)1027 build_runtime_addr_mode_check(nir_builder *b, nir_def *addr,
1028                               nir_address_format addr_format,
1029                               nir_variable_mode mode)
1030 {
1031    /* The compile-time check failed; do a run-time check */
1032    switch (addr_format) {
1033    case nir_address_format_62bit_generic: {
1034       assert(addr->num_components == 1);
1035       assert(addr->bit_size == 64);
1036       nir_def *mode_enum = nir_ushr_imm(b, addr, 62);
1037       switch (mode) {
1038       case nir_var_function_temp:
1039       case nir_var_shader_temp:
1040          return nir_ieq_imm(b, mode_enum, 0x2);
1041 
1042       case nir_var_mem_shared:
1043          return nir_ieq_imm(b, mode_enum, 0x1);
1044 
1045       case nir_var_mem_global:
1046          return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
1047                         nir_ieq_imm(b, mode_enum, 0x3));
1048 
1049       default:
1050          unreachable("Invalid mode check intrinsic");
1051       }
1052    }
1053 
1054    default:
1055       unreachable("Unsupported address mode");
1056    }
1057 }
1058 
1059 unsigned
nir_address_format_bit_size(nir_address_format addr_format)1060 nir_address_format_bit_size(nir_address_format addr_format)
1061 {
1062    switch (addr_format) {
1063    case nir_address_format_32bit_global:
1064       return 32;
1065    case nir_address_format_2x32bit_global:
1066       return 32;
1067    case nir_address_format_64bit_global:
1068       return 64;
1069    case nir_address_format_64bit_global_32bit_offset:
1070       return 32;
1071    case nir_address_format_64bit_bounded_global:
1072       return 32;
1073    case nir_address_format_32bit_index_offset:
1074       return 32;
1075    case nir_address_format_32bit_index_offset_pack64:
1076       return 64;
1077    case nir_address_format_vec2_index_32bit_offset:
1078       return 32;
1079    case nir_address_format_62bit_generic:
1080       return 64;
1081    case nir_address_format_32bit_offset:
1082       return 32;
1083    case nir_address_format_32bit_offset_as_64bit:
1084       return 64;
1085    case nir_address_format_logical:
1086       return 32;
1087    }
1088    unreachable("Invalid address format");
1089 }
1090 
1091 unsigned
nir_address_format_num_components(nir_address_format addr_format)1092 nir_address_format_num_components(nir_address_format addr_format)
1093 {
1094    switch (addr_format) {
1095    case nir_address_format_32bit_global:
1096       return 1;
1097    case nir_address_format_2x32bit_global:
1098       return 2;
1099    case nir_address_format_64bit_global:
1100       return 1;
1101    case nir_address_format_64bit_global_32bit_offset:
1102       return 4;
1103    case nir_address_format_64bit_bounded_global:
1104       return 4;
1105    case nir_address_format_32bit_index_offset:
1106       return 2;
1107    case nir_address_format_32bit_index_offset_pack64:
1108       return 1;
1109    case nir_address_format_vec2_index_32bit_offset:
1110       return 3;
1111    case nir_address_format_62bit_generic:
1112       return 1;
1113    case nir_address_format_32bit_offset:
1114       return 1;
1115    case nir_address_format_32bit_offset_as_64bit:
1116       return 1;
1117    case nir_address_format_logical:
1118       return 1;
1119    }
1120    unreachable("Invalid address format");
1121 }
1122 
1123 static nir_def *
addr_to_index(nir_builder * b,nir_def * addr,nir_address_format addr_format)1124 addr_to_index(nir_builder *b, nir_def *addr,
1125               nir_address_format addr_format)
1126 {
1127    switch (addr_format) {
1128    case nir_address_format_32bit_index_offset:
1129       assert(addr->num_components == 2);
1130       return nir_channel(b, addr, 0);
1131    case nir_address_format_32bit_index_offset_pack64:
1132       return nir_unpack_64_2x32_split_y(b, addr);
1133    case nir_address_format_vec2_index_32bit_offset:
1134       assert(addr->num_components == 3);
1135       return nir_trim_vector(b, addr, 2);
1136    default:
1137       unreachable("Invalid address format");
1138    }
1139 }
1140 
1141 static nir_def *
addr_to_offset(nir_builder * b,nir_def * addr,nir_address_format addr_format)1142 addr_to_offset(nir_builder *b, nir_def *addr,
1143                nir_address_format addr_format)
1144 {
1145    switch (addr_format) {
1146    case nir_address_format_32bit_index_offset:
1147       assert(addr->num_components == 2);
1148       return nir_channel(b, addr, 1);
1149    case nir_address_format_32bit_index_offset_pack64:
1150       return nir_unpack_64_2x32_split_x(b, addr);
1151    case nir_address_format_vec2_index_32bit_offset:
1152       assert(addr->num_components == 3);
1153       return nir_channel(b, addr, 2);
1154    case nir_address_format_32bit_offset:
1155       return addr;
1156    case nir_address_format_32bit_offset_as_64bit:
1157    case nir_address_format_62bit_generic:
1158       return nir_u2u32(b, addr);
1159    default:
1160       unreachable("Invalid address format");
1161    }
1162 }
1163 
1164 /** Returns true if the given address format resolves to a global address */
1165 static bool
addr_format_is_global(nir_address_format addr_format,nir_variable_mode mode)1166 addr_format_is_global(nir_address_format addr_format,
1167                       nir_variable_mode mode)
1168 {
1169    if (addr_format == nir_address_format_62bit_generic)
1170       return mode == nir_var_mem_global;
1171 
1172    return addr_format == nir_address_format_32bit_global ||
1173           addr_format == nir_address_format_2x32bit_global ||
1174           addr_format == nir_address_format_64bit_global ||
1175           addr_format == nir_address_format_64bit_global_32bit_offset ||
1176           addr_format == nir_address_format_64bit_bounded_global;
1177 }
1178 
1179 static bool
addr_format_is_offset(nir_address_format addr_format,nir_variable_mode mode)1180 addr_format_is_offset(nir_address_format addr_format,
1181                       nir_variable_mode mode)
1182 {
1183    if (addr_format == nir_address_format_62bit_generic)
1184       return mode != nir_var_mem_global;
1185 
1186    return addr_format == nir_address_format_32bit_offset ||
1187           addr_format == nir_address_format_32bit_offset_as_64bit;
1188 }
1189 
1190 static nir_def *
addr_to_global(nir_builder * b,nir_def * addr,nir_address_format addr_format)1191 addr_to_global(nir_builder *b, nir_def *addr,
1192                nir_address_format addr_format)
1193 {
1194    switch (addr_format) {
1195    case nir_address_format_32bit_global:
1196    case nir_address_format_64bit_global:
1197    case nir_address_format_62bit_generic:
1198       assert(addr->num_components == 1);
1199       return addr;
1200 
1201    case nir_address_format_2x32bit_global:
1202       assert(addr->num_components == 2);
1203       return addr;
1204 
1205    case nir_address_format_64bit_global_32bit_offset:
1206    case nir_address_format_64bit_bounded_global:
1207       assert(addr->num_components == 4);
1208       return nir_iadd(b, nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)),
1209                       nir_u2u64(b, nir_channel(b, addr, 3)));
1210 
1211    case nir_address_format_32bit_index_offset:
1212    case nir_address_format_32bit_index_offset_pack64:
1213    case nir_address_format_vec2_index_32bit_offset:
1214    case nir_address_format_32bit_offset:
1215    case nir_address_format_32bit_offset_as_64bit:
1216    case nir_address_format_logical:
1217       unreachable("Cannot get a 64-bit address with this address format");
1218    }
1219 
1220    unreachable("Invalid address format");
1221 }
1222 
1223 static bool
addr_format_needs_bounds_check(nir_address_format addr_format)1224 addr_format_needs_bounds_check(nir_address_format addr_format)
1225 {
1226    return addr_format == nir_address_format_64bit_bounded_global;
1227 }
1228 
1229 static nir_def *
addr_is_in_bounds(nir_builder * b,nir_def * addr,nir_address_format addr_format,unsigned size)1230 addr_is_in_bounds(nir_builder *b, nir_def *addr,
1231                   nir_address_format addr_format, unsigned size)
1232 {
1233    assert(addr_format == nir_address_format_64bit_bounded_global);
1234    assert(addr->num_components == 4);
1235    assert(size > 0);
1236    return nir_ult(b, nir_iadd_imm(b, nir_channel(b, addr, 3), size - 1),
1237                   nir_channel(b, addr, 2));
1238 }
1239 
1240 static void
nir_get_explicit_deref_range(nir_deref_instr * deref,nir_address_format addr_format,uint32_t * out_base,uint32_t * out_range)1241 nir_get_explicit_deref_range(nir_deref_instr *deref,
1242                              nir_address_format addr_format,
1243                              uint32_t *out_base,
1244                              uint32_t *out_range)
1245 {
1246    uint32_t base = 0;
1247    uint32_t range = glsl_get_explicit_size(deref->type, false);
1248 
1249    while (true) {
1250       nir_deref_instr *parent = nir_deref_instr_parent(deref);
1251 
1252       switch (deref->deref_type) {
1253       case nir_deref_type_array:
1254       case nir_deref_type_array_wildcard:
1255       case nir_deref_type_ptr_as_array: {
1256          const unsigned stride = nir_deref_instr_array_stride(deref);
1257          if (stride == 0)
1258             goto fail;
1259 
1260          if (!parent)
1261             goto fail;
1262 
1263          if (deref->deref_type != nir_deref_type_array_wildcard &&
1264              nir_src_is_const(deref->arr.index)) {
1265             base += stride * nir_src_as_uint(deref->arr.index);
1266          } else {
1267             if (glsl_get_length(parent->type) == 0)
1268                goto fail;
1269             range += stride * (glsl_get_length(parent->type) - 1);
1270          }
1271          break;
1272       }
1273 
1274       case nir_deref_type_struct: {
1275          if (!parent)
1276             goto fail;
1277 
1278          base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
1279          break;
1280       }
1281 
1282       case nir_deref_type_cast: {
1283          nir_instr *parent_instr = deref->parent.ssa->parent_instr;
1284 
1285          switch (parent_instr->type) {
1286          case nir_instr_type_load_const: {
1287             nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
1288 
1289             switch (addr_format) {
1290             case nir_address_format_32bit_offset:
1291                base += load->value[1].u32;
1292                break;
1293             case nir_address_format_32bit_index_offset:
1294                base += load->value[1].u32;
1295                break;
1296             case nir_address_format_vec2_index_32bit_offset:
1297                base += load->value[2].u32;
1298                break;
1299             default:
1300                goto fail;
1301             }
1302 
1303             *out_base = base;
1304             *out_range = range;
1305             return;
1306          }
1307 
1308          case nir_instr_type_intrinsic: {
1309             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
1310             switch (intr->intrinsic) {
1311             case nir_intrinsic_load_vulkan_descriptor:
1312                /* Assume that a load_vulkan_descriptor won't contribute to an
1313                 * offset within the resource.
1314                 */
1315                break;
1316             default:
1317                goto fail;
1318             }
1319 
1320             *out_base = base;
1321             *out_range = range;
1322             return;
1323          }
1324 
1325          default:
1326             goto fail;
1327          }
1328       }
1329 
1330       default:
1331          goto fail;
1332       }
1333 
1334       deref = parent;
1335    }
1336 
1337 fail:
1338    *out_base = 0;
1339    *out_range = ~0;
1340 }
1341 
1342 static nir_variable_mode
canonicalize_generic_modes(nir_variable_mode modes)1343 canonicalize_generic_modes(nir_variable_mode modes)
1344 {
1345    assert(modes != 0);
1346    if (util_bitcount(modes) == 1)
1347       return modes;
1348 
1349    assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
1350                       nir_var_mem_shared | nir_var_mem_global)));
1351 
1352    /* Canonicalize by converting shader_temp to function_temp */
1353    if (modes & nir_var_shader_temp) {
1354       modes &= ~nir_var_shader_temp;
1355       modes |= nir_var_function_temp;
1356    }
1357 
1358    return modes;
1359 }
1360 
1361 static nir_intrinsic_op
get_store_global_op_from_addr_format(nir_address_format addr_format)1362 get_store_global_op_from_addr_format(nir_address_format addr_format)
1363 {
1364    if (addr_format != nir_address_format_2x32bit_global)
1365       return nir_intrinsic_store_global;
1366    else
1367       return nir_intrinsic_store_global_2x32;
1368 }
1369 
1370 static nir_intrinsic_op
get_load_global_op_from_addr_format(nir_address_format addr_format)1371 get_load_global_op_from_addr_format(nir_address_format addr_format)
1372 {
1373    if (addr_format != nir_address_format_2x32bit_global)
1374       return nir_intrinsic_load_global;
1375    else
1376       return nir_intrinsic_load_global_2x32;
1377 }
1378 
1379 static nir_intrinsic_op
get_load_global_constant_op_from_addr_format(nir_address_format addr_format)1380 get_load_global_constant_op_from_addr_format(nir_address_format addr_format)
1381 {
1382    if (addr_format != nir_address_format_2x32bit_global)
1383       return nir_intrinsic_load_global_constant;
1384    else
1385       return nir_intrinsic_load_global_2x32; /* no dedicated op, fallback */
1386 }
1387 
1388 static nir_def *
build_explicit_io_load(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,unsigned num_components)1389 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
1390                        nir_def *addr, nir_address_format addr_format,
1391                        nir_variable_mode modes,
1392                        uint32_t align_mul, uint32_t align_offset,
1393                        unsigned num_components)
1394 {
1395    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1396    modes = canonicalize_generic_modes(modes);
1397 
1398    if (util_bitcount(modes) > 1) {
1399       if (addr_format_is_global(addr_format, modes)) {
1400          return build_explicit_io_load(b, intrin, addr, addr_format,
1401                                        nir_var_mem_global,
1402                                        align_mul, align_offset,
1403                                        num_components);
1404       } else if (modes & nir_var_function_temp) {
1405          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1406                                                       nir_var_function_temp));
1407          nir_def *res1 =
1408             build_explicit_io_load(b, intrin, addr, addr_format,
1409                                    nir_var_function_temp,
1410                                    align_mul, align_offset,
1411                                    num_components);
1412          nir_push_else(b, NULL);
1413          nir_def *res2 =
1414             build_explicit_io_load(b, intrin, addr, addr_format,
1415                                    modes & ~nir_var_function_temp,
1416                                    align_mul, align_offset,
1417                                    num_components);
1418          nir_pop_if(b, NULL);
1419          return nir_if_phi(b, res1, res2);
1420       } else {
1421          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1422                                                       nir_var_mem_shared));
1423          assert(modes & nir_var_mem_shared);
1424          nir_def *res1 =
1425             build_explicit_io_load(b, intrin, addr, addr_format,
1426                                    nir_var_mem_shared,
1427                                    align_mul, align_offset,
1428                                    num_components);
1429          nir_push_else(b, NULL);
1430          assert(modes & nir_var_mem_global);
1431          nir_def *res2 =
1432             build_explicit_io_load(b, intrin, addr, addr_format,
1433                                    nir_var_mem_global,
1434                                    align_mul, align_offset,
1435                                    num_components);
1436          nir_pop_if(b, NULL);
1437          return nir_if_phi(b, res1, res2);
1438       }
1439    }
1440 
1441    assert(util_bitcount(modes) == 1);
1442    const nir_variable_mode mode = modes;
1443 
1444    nir_intrinsic_op op;
1445    switch (intrin->intrinsic) {
1446    case nir_intrinsic_load_deref:
1447       switch (mode) {
1448       case nir_var_mem_ubo:
1449          if (addr_format == nir_address_format_64bit_global_32bit_offset)
1450             op = nir_intrinsic_load_global_constant_offset;
1451          else if (addr_format == nir_address_format_64bit_bounded_global)
1452             op = nir_intrinsic_load_global_constant_bounded;
1453          else if (addr_format_is_global(addr_format, mode))
1454             op = nir_intrinsic_load_global_constant;
1455          else
1456             op = nir_intrinsic_load_ubo;
1457          break;
1458       case nir_var_mem_ssbo:
1459          if (addr_format_is_global(addr_format, mode))
1460             op = nir_intrinsic_load_global;
1461          else
1462             op = nir_intrinsic_load_ssbo;
1463          break;
1464       case nir_var_mem_global:
1465          assert(addr_format_is_global(addr_format, mode));
1466          op = get_load_global_op_from_addr_format(addr_format);
1467          break;
1468       case nir_var_uniform:
1469          assert(addr_format_is_offset(addr_format, mode));
1470          assert(b->shader->info.stage == MESA_SHADER_KERNEL);
1471          op = nir_intrinsic_load_kernel_input;
1472          break;
1473       case nir_var_mem_shared:
1474          assert(addr_format_is_offset(addr_format, mode));
1475          op = nir_intrinsic_load_shared;
1476          break;
1477       case nir_var_mem_task_payload:
1478          assert(addr_format_is_offset(addr_format, mode));
1479          op = nir_intrinsic_load_task_payload;
1480          break;
1481       case nir_var_shader_temp:
1482       case nir_var_function_temp:
1483          if (addr_format_is_offset(addr_format, mode)) {
1484             op = nir_intrinsic_load_scratch;
1485          } else {
1486             assert(addr_format_is_global(addr_format, mode));
1487             op = get_load_global_op_from_addr_format(addr_format);
1488          }
1489          break;
1490       case nir_var_mem_push_const:
1491          assert(addr_format == nir_address_format_32bit_offset);
1492          op = nir_intrinsic_load_push_constant;
1493          break;
1494       case nir_var_mem_constant:
1495          if (addr_format_is_offset(addr_format, mode)) {
1496             op = nir_intrinsic_load_constant;
1497          } else {
1498             assert(addr_format_is_global(addr_format, mode));
1499             op = get_load_global_constant_op_from_addr_format(addr_format);
1500          }
1501          break;
1502       default:
1503          unreachable("Unsupported explicit IO variable mode");
1504       }
1505       break;
1506 
1507    case nir_intrinsic_load_deref_block_intel:
1508       switch (mode) {
1509       case nir_var_mem_ssbo:
1510          if (addr_format_is_global(addr_format, mode))
1511             op = nir_intrinsic_load_global_block_intel;
1512          else
1513             op = nir_intrinsic_load_ssbo_block_intel;
1514          break;
1515       case nir_var_mem_global:
1516          op = nir_intrinsic_load_global_block_intel;
1517          break;
1518       case nir_var_mem_shared:
1519          op = nir_intrinsic_load_shared_block_intel;
1520          break;
1521       default:
1522          unreachable("Unsupported explicit IO variable mode");
1523       }
1524       break;
1525 
1526    default:
1527       unreachable("Invalid intrinsic");
1528    }
1529 
1530    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
1531 
1532    if (op == nir_intrinsic_load_global_constant_offset) {
1533       assert(addr_format == nir_address_format_64bit_global_32bit_offset);
1534       load->src[0] = nir_src_for_ssa(
1535          nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
1536       load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1537    } else if (op == nir_intrinsic_load_global_constant_bounded) {
1538       assert(addr_format == nir_address_format_64bit_bounded_global);
1539       load->src[0] = nir_src_for_ssa(
1540          nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
1541       load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1542       load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2));
1543    } else if (addr_format_is_global(addr_format, mode)) {
1544       load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1545    } else if (addr_format_is_offset(addr_format, mode)) {
1546       assert(addr->num_components == 1);
1547       load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1548    } else {
1549       load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1550       load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1551    }
1552 
1553    if (nir_intrinsic_has_access(load))
1554       nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
1555 
1556    if (op == nir_intrinsic_load_constant) {
1557       nir_intrinsic_set_base(load, 0);
1558       nir_intrinsic_set_range(load, b->shader->constant_data_size);
1559    } else if (op == nir_intrinsic_load_kernel_input) {
1560       nir_intrinsic_set_base(load, 0);
1561       nir_intrinsic_set_range(load, b->shader->num_uniforms);
1562    } else if (mode == nir_var_mem_push_const) {
1563       /* Push constants are required to be able to be chased back to the
1564        * variable so we can provide a base/range.
1565        */
1566       nir_variable *var = nir_deref_instr_get_variable(deref);
1567       nir_intrinsic_set_base(load, 0);
1568       nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
1569    }
1570 
1571    unsigned bit_size = intrin->def.bit_size;
1572    if (bit_size == 1) {
1573       /* TODO: Make the native bool bit_size an option. */
1574       bit_size = 32;
1575    }
1576 
1577    if (nir_intrinsic_has_align(load))
1578       nir_intrinsic_set_align(load, align_mul, align_offset);
1579 
1580    if (nir_intrinsic_has_range_base(load)) {
1581       unsigned base, range;
1582       nir_get_explicit_deref_range(deref, addr_format, &base, &range);
1583       nir_intrinsic_set_range_base(load, base);
1584       nir_intrinsic_set_range(load, range);
1585    }
1586 
1587    load->num_components = num_components;
1588    nir_def_init(&load->instr, &load->def, num_components, bit_size);
1589 
1590    assert(bit_size % 8 == 0);
1591 
1592    nir_def *result;
1593    if (addr_format_needs_bounds_check(addr_format) &&
1594        op != nir_intrinsic_load_global_constant_bounded) {
1595       /* We don't need to bounds-check global_constant_bounded because bounds
1596        * checking is handled by the intrinsic itself.
1597        *
1598        * The Vulkan spec for robustBufferAccess gives us quite a few options
1599        * as to what we can do with an OOB read.  Unfortunately, returning
1600        * undefined values isn't one of them so we return an actual zero.
1601        */
1602       nir_def *zero = nir_imm_zero(b, load->num_components, bit_size);
1603 
1604       /* TODO: Better handle block_intel. */
1605       assert(load->num_components == 1);
1606       const unsigned load_size = bit_size / 8;
1607       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
1608 
1609       nir_builder_instr_insert(b, &load->instr);
1610 
1611       nir_pop_if(b, NULL);
1612 
1613       result = nir_if_phi(b, &load->def, zero);
1614    } else {
1615       nir_builder_instr_insert(b, &load->instr);
1616       result = &load->def;
1617    }
1618 
1619    if (intrin->def.bit_size == 1) {
1620       /* For shared, we can go ahead and use NIR's and/or the back-end's
1621        * standard encoding for booleans rather than forcing a 0/1 boolean.
1622        * This should save an instruction or two.
1623        */
1624       if (mode == nir_var_mem_shared ||
1625           mode == nir_var_shader_temp ||
1626           mode == nir_var_function_temp)
1627          result = nir_b2b1(b, result);
1628       else
1629          result = nir_i2b(b, result);
1630    }
1631 
1632    return result;
1633 }
1634 
1635 static void
build_explicit_io_store(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,nir_def * value,nir_component_mask_t write_mask)1636 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
1637                         nir_def *addr, nir_address_format addr_format,
1638                         nir_variable_mode modes,
1639                         uint32_t align_mul, uint32_t align_offset,
1640                         nir_def *value, nir_component_mask_t write_mask)
1641 {
1642    modes = canonicalize_generic_modes(modes);
1643 
1644    if (util_bitcount(modes) > 1) {
1645       if (addr_format_is_global(addr_format, modes)) {
1646          build_explicit_io_store(b, intrin, addr, addr_format,
1647                                  nir_var_mem_global,
1648                                  align_mul, align_offset,
1649                                  value, write_mask);
1650       } else if (modes & nir_var_function_temp) {
1651          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1652                                                       nir_var_function_temp));
1653          build_explicit_io_store(b, intrin, addr, addr_format,
1654                                  nir_var_function_temp,
1655                                  align_mul, align_offset,
1656                                  value, write_mask);
1657          nir_push_else(b, NULL);
1658          build_explicit_io_store(b, intrin, addr, addr_format,
1659                                  modes & ~nir_var_function_temp,
1660                                  align_mul, align_offset,
1661                                  value, write_mask);
1662          nir_pop_if(b, NULL);
1663       } else {
1664          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1665                                                       nir_var_mem_shared));
1666          assert(modes & nir_var_mem_shared);
1667          build_explicit_io_store(b, intrin, addr, addr_format,
1668                                  nir_var_mem_shared,
1669                                  align_mul, align_offset,
1670                                  value, write_mask);
1671          nir_push_else(b, NULL);
1672          assert(modes & nir_var_mem_global);
1673          build_explicit_io_store(b, intrin, addr, addr_format,
1674                                  nir_var_mem_global,
1675                                  align_mul, align_offset,
1676                                  value, write_mask);
1677          nir_pop_if(b, NULL);
1678       }
1679       return;
1680    }
1681 
1682    assert(util_bitcount(modes) == 1);
1683    const nir_variable_mode mode = modes;
1684 
1685    nir_intrinsic_op op;
1686    switch (intrin->intrinsic) {
1687    case nir_intrinsic_store_deref:
1688       assert(write_mask != 0);
1689 
1690       switch (mode) {
1691       case nir_var_mem_ssbo:
1692          if (addr_format_is_global(addr_format, mode))
1693             op = get_store_global_op_from_addr_format(addr_format);
1694          else
1695             op = nir_intrinsic_store_ssbo;
1696          break;
1697       case nir_var_mem_global:
1698          assert(addr_format_is_global(addr_format, mode));
1699          op = get_store_global_op_from_addr_format(addr_format);
1700          break;
1701       case nir_var_mem_shared:
1702          assert(addr_format_is_offset(addr_format, mode));
1703          op = nir_intrinsic_store_shared;
1704          break;
1705       case nir_var_mem_task_payload:
1706          assert(addr_format_is_offset(addr_format, mode));
1707          op = nir_intrinsic_store_task_payload;
1708          break;
1709       case nir_var_shader_temp:
1710       case nir_var_function_temp:
1711          if (addr_format_is_offset(addr_format, mode)) {
1712             op = nir_intrinsic_store_scratch;
1713          } else {
1714             assert(addr_format_is_global(addr_format, mode));
1715             op = get_store_global_op_from_addr_format(addr_format);
1716          }
1717          break;
1718       default:
1719          unreachable("Unsupported explicit IO variable mode");
1720       }
1721       break;
1722 
1723    case nir_intrinsic_store_deref_block_intel:
1724       assert(write_mask == 0);
1725 
1726       switch (mode) {
1727       case nir_var_mem_ssbo:
1728          if (addr_format_is_global(addr_format, mode))
1729             op = nir_intrinsic_store_global_block_intel;
1730          else
1731             op = nir_intrinsic_store_ssbo_block_intel;
1732          break;
1733       case nir_var_mem_global:
1734          op = nir_intrinsic_store_global_block_intel;
1735          break;
1736       case nir_var_mem_shared:
1737          op = nir_intrinsic_store_shared_block_intel;
1738          break;
1739       default:
1740          unreachable("Unsupported explicit IO variable mode");
1741       }
1742       break;
1743 
1744    default:
1745       unreachable("Invalid intrinsic");
1746    }
1747 
1748    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
1749 
1750    if (value->bit_size == 1) {
1751       /* For shared, we can go ahead and use NIR's and/or the back-end's
1752        * standard encoding for booleans rather than forcing a 0/1 boolean.
1753        * This should save an instruction or two.
1754        *
1755        * TODO: Make the native bool bit_size an option.
1756        */
1757       if (mode == nir_var_mem_shared ||
1758           mode == nir_var_shader_temp ||
1759           mode == nir_var_function_temp)
1760          value = nir_b2b32(b, value);
1761       else
1762          value = nir_b2iN(b, value, 32);
1763    }
1764 
1765    store->src[0] = nir_src_for_ssa(value);
1766    if (addr_format_is_global(addr_format, mode)) {
1767       store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1768    } else if (addr_format_is_offset(addr_format, mode)) {
1769       assert(addr->num_components == 1);
1770       store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1771    } else {
1772       store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1773       store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1774    }
1775 
1776    nir_intrinsic_set_write_mask(store, write_mask);
1777 
1778    if (nir_intrinsic_has_access(store))
1779       nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1780 
1781    nir_intrinsic_set_align(store, align_mul, align_offset);
1782 
1783    assert(value->num_components == 1 ||
1784           value->num_components == intrin->num_components);
1785    store->num_components = value->num_components;
1786 
1787    assert(value->bit_size % 8 == 0);
1788 
1789    if (addr_format_needs_bounds_check(addr_format)) {
1790       /* TODO: Better handle block_intel. */
1791       assert(store->num_components == 1);
1792       const unsigned store_size = value->bit_size / 8;
1793       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1794 
1795       nir_builder_instr_insert(b, &store->instr);
1796 
1797       nir_pop_if(b, NULL);
1798    } else {
1799       nir_builder_instr_insert(b, &store->instr);
1800    }
1801 }
1802 
1803 static nir_def *
build_explicit_io_atomic(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes)1804 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1805                          nir_def *addr, nir_address_format addr_format,
1806                          nir_variable_mode modes)
1807 {
1808    modes = canonicalize_generic_modes(modes);
1809 
1810    if (util_bitcount(modes) > 1) {
1811       if (addr_format_is_global(addr_format, modes)) {
1812          return build_explicit_io_atomic(b, intrin, addr, addr_format,
1813                                          nir_var_mem_global);
1814       } else if (modes & nir_var_function_temp) {
1815          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1816                                                       nir_var_function_temp));
1817          nir_def *res1 =
1818             build_explicit_io_atomic(b, intrin, addr, addr_format,
1819                                      nir_var_function_temp);
1820          nir_push_else(b, NULL);
1821          nir_def *res2 =
1822             build_explicit_io_atomic(b, intrin, addr, addr_format,
1823                                      modes & ~nir_var_function_temp);
1824          nir_pop_if(b, NULL);
1825          return nir_if_phi(b, res1, res2);
1826       } else {
1827          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1828                                                       nir_var_mem_shared));
1829          assert(modes & nir_var_mem_shared);
1830          nir_def *res1 =
1831             build_explicit_io_atomic(b, intrin, addr, addr_format,
1832                                      nir_var_mem_shared);
1833          nir_push_else(b, NULL);
1834          assert(modes & nir_var_mem_global);
1835          nir_def *res2 =
1836             build_explicit_io_atomic(b, intrin, addr, addr_format,
1837                                      nir_var_mem_global);
1838          nir_pop_if(b, NULL);
1839          return nir_if_phi(b, res1, res2);
1840       }
1841    }
1842 
1843    assert(util_bitcount(modes) == 1);
1844    const nir_variable_mode mode = modes;
1845 
1846    const unsigned num_data_srcs =
1847       nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1848 
1849    nir_intrinsic_op op;
1850    switch (mode) {
1851    case nir_var_mem_ssbo:
1852       if (addr_format_is_global(addr_format, mode))
1853          op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1854       else
1855          op = ssbo_atomic_for_deref(intrin->intrinsic);
1856       break;
1857    case nir_var_mem_global:
1858       assert(addr_format_is_global(addr_format, mode));
1859       op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1860       break;
1861    case nir_var_mem_shared:
1862       assert(addr_format_is_offset(addr_format, mode));
1863       op = shared_atomic_for_deref(intrin->intrinsic);
1864       break;
1865    case nir_var_mem_task_payload:
1866       assert(addr_format_is_offset(addr_format, mode));
1867       op = task_payload_atomic_for_deref(intrin->intrinsic);
1868       break;
1869    default:
1870       unreachable("Unsupported explicit IO variable mode");
1871    }
1872 
1873    nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1874    nir_intrinsic_set_atomic_op(atomic, nir_intrinsic_atomic_op(intrin));
1875 
1876    unsigned src = 0;
1877    if (addr_format_is_global(addr_format, mode)) {
1878       atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1879    } else if (addr_format_is_offset(addr_format, mode)) {
1880       assert(addr->num_components == 1);
1881       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1882    } else {
1883       atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1884       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1885    }
1886    for (unsigned i = 0; i < num_data_srcs; i++) {
1887       atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1888    }
1889 
1890    /* Global atomics don't have access flags because they assume that the
1891     * address may be non-uniform.
1892     */
1893    if (nir_intrinsic_has_access(atomic))
1894       nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1895 
1896    assert(intrin->def.num_components == 1);
1897    nir_def_init(&atomic->instr, &atomic->def, 1,
1898                 intrin->def.bit_size);
1899 
1900    assert(atomic->def.bit_size % 8 == 0);
1901 
1902    if (addr_format_needs_bounds_check(addr_format)) {
1903       const unsigned atomic_size = atomic->def.bit_size / 8;
1904       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1905 
1906       nir_builder_instr_insert(b, &atomic->instr);
1907 
1908       nir_pop_if(b, NULL);
1909       return nir_if_phi(b, &atomic->def,
1910                         nir_undef(b, 1, atomic->def.bit_size));
1911    } else {
1912       nir_builder_instr_insert(b, &atomic->instr);
1913       return &atomic->def;
1914    }
1915 }
1916 
1917 nir_def *
nir_explicit_io_address_from_deref(nir_builder * b,nir_deref_instr * deref,nir_def * base_addr,nir_address_format addr_format)1918 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1919                                    nir_def *base_addr,
1920                                    nir_address_format addr_format)
1921 {
1922    switch (deref->deref_type) {
1923    case nir_deref_type_var:
1924       return build_addr_for_var(b, deref->var, addr_format);
1925 
1926    case nir_deref_type_ptr_as_array:
1927    case nir_deref_type_array: {
1928       unsigned stride = nir_deref_instr_array_stride(deref);
1929       assert(stride > 0);
1930 
1931       unsigned offset_bit_size = addr_get_offset_bit_size(base_addr, addr_format);
1932       nir_def *index = deref->arr.index.ssa;
1933       nir_def *offset;
1934 
1935       /* If the access chain has been declared in-bounds, then we know it doesn't
1936        * overflow the type.  For nir_deref_type_array, this implies it cannot be
1937        * negative. Also, since types in NIR have a maximum 32-bit size, we know the
1938        * final result will fit in a 32-bit value so we can convert the index to
1939        * 32-bit before multiplying and save ourselves from a 64-bit multiply.
1940        */
1941       if (deref->arr.in_bounds && deref->deref_type == nir_deref_type_array) {
1942          index = nir_u2u32(b, index);
1943          offset = nir_u2uN(b, nir_amul_imm(b, index, stride), offset_bit_size);
1944       } else {
1945          index = nir_i2iN(b, index, offset_bit_size);
1946          offset = nir_amul_imm(b, index, stride);
1947       }
1948 
1949       return nir_build_addr_iadd(b, base_addr, addr_format,
1950                                  deref->modes, offset);
1951    }
1952 
1953    case nir_deref_type_array_wildcard:
1954       unreachable("Wildcards should be lowered by now");
1955       break;
1956 
1957    case nir_deref_type_struct: {
1958       nir_deref_instr *parent = nir_deref_instr_parent(deref);
1959       int offset = glsl_get_struct_field_offset(parent->type,
1960                                                 deref->strct.index);
1961       assert(offset >= 0);
1962       return nir_build_addr_iadd_imm(b, base_addr, addr_format,
1963                                      deref->modes, offset);
1964    }
1965 
1966    case nir_deref_type_cast:
1967       /* Nothing to do here */
1968       return base_addr;
1969    }
1970 
1971    unreachable("Invalid NIR deref type");
1972 }
1973 
1974 void
nir_lower_explicit_io_instr(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format)1975 nir_lower_explicit_io_instr(nir_builder *b,
1976                             nir_intrinsic_instr *intrin,
1977                             nir_def *addr,
1978                             nir_address_format addr_format)
1979 {
1980    b->cursor = nir_after_instr(&intrin->instr);
1981 
1982    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1983    unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1984    unsigned scalar_size = type_scalar_size_bytes(deref->type);
1985    if (vec_stride == 0) {
1986       vec_stride = scalar_size;
1987    } else {
1988       assert(glsl_type_is_vector(deref->type));
1989       assert(vec_stride >= scalar_size);
1990    }
1991 
1992    uint32_t align_mul, align_offset;
1993    if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
1994       /* If we don't have an alignment from the deref, assume scalar */
1995       align_mul = scalar_size;
1996       align_offset = 0;
1997    }
1998 
1999    /* In order for bounds checking to be correct as per the Vulkan spec,
2000     * we need to check at the individual component granularity.  Prior to
2001     * robustness2, we're technically allowed to be sloppy by 16B.  Even with
2002     * robustness2, UBO loads are allowed to have a granularity as high as 256B
2003     * depending on hardware limits.  However, we have none of that information
2004     * here.  Short of adding new address formats, the easiest way to do that
2005     * is to just split any loads and stores into individual components here.
2006     *
2007     * TODO: At some point in the future we may want to add more ops similar to
2008     * nir_intrinsic_load_global_constant_bounded and make bouds checking the
2009     * back-end's problem.  Another option would be to somehow plumb more of
2010     * that information through to nir_lower_explicit_io.  For now, however,
2011     * scalarizing is at least correct.
2012     */
2013    bool scalarize = vec_stride > scalar_size ||
2014                     addr_format_needs_bounds_check(addr_format);
2015 
2016    switch (intrin->intrinsic) {
2017    case nir_intrinsic_load_deref: {
2018       nir_def *value;
2019       if (scalarize) {
2020          nir_def *comps[NIR_MAX_VEC_COMPONENTS] = {
2021             NULL,
2022          };
2023          for (unsigned i = 0; i < intrin->num_components; i++) {
2024             unsigned comp_offset = i * vec_stride;
2025             nir_def *comp_addr = nir_build_addr_iadd_imm(b, addr, addr_format,
2026                                                          deref->modes,
2027                                                          comp_offset);
2028             comps[i] = build_explicit_io_load(b, intrin, comp_addr,
2029                                               addr_format, deref->modes,
2030                                               align_mul,
2031                                               (align_offset + comp_offset) %
2032                                                  align_mul,
2033                                               1);
2034          }
2035          value = nir_vec(b, comps, intrin->num_components);
2036       } else {
2037          value = build_explicit_io_load(b, intrin, addr, addr_format,
2038                                         deref->modes, align_mul, align_offset,
2039                                         intrin->num_components);
2040       }
2041       nir_def_rewrite_uses(&intrin->def, value);
2042       break;
2043    }
2044 
2045    case nir_intrinsic_store_deref: {
2046       nir_def *value = intrin->src[1].ssa;
2047       nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
2048       if (scalarize) {
2049          for (unsigned i = 0; i < intrin->num_components; i++) {
2050             if (!(write_mask & (1 << i)))
2051                continue;
2052 
2053             unsigned comp_offset = i * vec_stride;
2054             nir_def *comp_addr = nir_build_addr_iadd_imm(b, addr, addr_format,
2055                                                          deref->modes,
2056                                                          comp_offset);
2057             build_explicit_io_store(b, intrin, comp_addr, addr_format,
2058                                     deref->modes, align_mul,
2059                                     (align_offset + comp_offset) % align_mul,
2060                                     nir_channel(b, value, i), 1);
2061          }
2062       } else {
2063          build_explicit_io_store(b, intrin, addr, addr_format,
2064                                  deref->modes, align_mul, align_offset,
2065                                  value, write_mask);
2066       }
2067       break;
2068    }
2069 
2070    case nir_intrinsic_load_deref_block_intel: {
2071       nir_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
2072                                               deref->modes,
2073                                               align_mul, align_offset,
2074                                               intrin->num_components);
2075       nir_def_rewrite_uses(&intrin->def, value);
2076       break;
2077    }
2078 
2079    case nir_intrinsic_store_deref_block_intel: {
2080       nir_def *value = intrin->src[1].ssa;
2081       const nir_component_mask_t write_mask = 0;
2082       build_explicit_io_store(b, intrin, addr, addr_format,
2083                               deref->modes, align_mul, align_offset,
2084                               value, write_mask);
2085       break;
2086    }
2087 
2088    default: {
2089       nir_def *value =
2090          build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
2091       nir_def_rewrite_uses(&intrin->def, value);
2092       break;
2093    }
2094    }
2095 
2096    nir_instr_remove(&intrin->instr);
2097 }
2098 
2099 bool
nir_get_explicit_deref_align(nir_deref_instr * deref,bool default_to_type_align,uint32_t * align_mul,uint32_t * align_offset)2100 nir_get_explicit_deref_align(nir_deref_instr *deref,
2101                              bool default_to_type_align,
2102                              uint32_t *align_mul,
2103                              uint32_t *align_offset)
2104 {
2105    if (deref->deref_type == nir_deref_type_var) {
2106       /* If we see a variable, align_mul is effectively infinite because we
2107        * know the offset exactly (up to the offset of the base pointer for the
2108        * given variable mode).   We have to pick something so we choose 256B
2109        * as an arbitrary alignment which seems high enough for any reasonable
2110        * wide-load use-case.  Back-ends should clamp alignments down if 256B
2111        * is too large for some reason.
2112        */
2113       *align_mul = 256;
2114       *align_offset = deref->var->data.driver_location % 256;
2115       return true;
2116    }
2117 
2118    /* If we're a cast deref that has an alignment, use that. */
2119    if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
2120       *align_mul = deref->cast.align_mul;
2121       *align_offset = deref->cast.align_offset;
2122       return true;
2123    }
2124 
2125    /* Otherwise, we need to compute the alignment based on the parent */
2126    nir_deref_instr *parent = nir_deref_instr_parent(deref);
2127    if (parent == NULL) {
2128       assert(deref->deref_type == nir_deref_type_cast);
2129       if (default_to_type_align) {
2130          /* If we don't have a parent, assume the type's alignment, if any. */
2131          unsigned type_align = glsl_get_explicit_alignment(deref->type);
2132          if (type_align == 0)
2133             return false;
2134 
2135          *align_mul = type_align;
2136          *align_offset = 0;
2137          return true;
2138       } else {
2139          return false;
2140       }
2141    }
2142 
2143    uint32_t parent_mul, parent_offset;
2144    if (!nir_get_explicit_deref_align(parent, default_to_type_align,
2145                                      &parent_mul, &parent_offset))
2146       return false;
2147 
2148    switch (deref->deref_type) {
2149    case nir_deref_type_var:
2150       unreachable("Handled above");
2151 
2152    case nir_deref_type_array:
2153    case nir_deref_type_array_wildcard:
2154    case nir_deref_type_ptr_as_array: {
2155       const unsigned stride = nir_deref_instr_array_stride(deref);
2156       if (stride == 0)
2157          return false;
2158 
2159       if (deref->deref_type != nir_deref_type_array_wildcard &&
2160           nir_src_is_const(deref->arr.index)) {
2161          unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
2162          *align_mul = parent_mul;
2163          *align_offset = (parent_offset + offset) % parent_mul;
2164       } else {
2165          /* If this is a wildcard or an indirect deref, we have to go with the
2166           * power-of-two gcd.
2167           */
2168          *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
2169          *align_offset = parent_offset % *align_mul;
2170       }
2171       return true;
2172    }
2173 
2174    case nir_deref_type_struct: {
2175       const int offset = glsl_get_struct_field_offset(parent->type,
2176                                                       deref->strct.index);
2177       if (offset < 0)
2178          return false;
2179 
2180       *align_mul = parent_mul;
2181       *align_offset = (parent_offset + offset) % parent_mul;
2182       return true;
2183    }
2184 
2185    case nir_deref_type_cast:
2186       /* We handled the explicit alignment case above. */
2187       assert(deref->cast.align_mul == 0);
2188       *align_mul = parent_mul;
2189       *align_offset = parent_offset;
2190       return true;
2191    }
2192 
2193    unreachable("Invalid deref_instr_type");
2194 }
2195 
2196 static void
lower_explicit_io_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format)2197 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
2198                         nir_address_format addr_format)
2199 {
2200    /* Ignore samplers/textures, because they are handled by other passes like `nir_lower_samplers`.
2201     * Also do it only for those being uniforms, otherwise it will break GL bindless textures handles
2202     * stored in UBOs.
2203     */
2204    if (nir_deref_mode_is_in_set(deref, nir_var_uniform) &&
2205        (glsl_type_is_sampler(deref->type) ||
2206         glsl_type_is_texture(deref->type)))
2207       return;
2208 
2209    /* Just delete the deref if it's not used.  We can't use
2210     * nir_deref_instr_remove_if_unused here because it may remove more than
2211     * one deref which could break our list walking since we walk the list
2212     * backwards.
2213     */
2214    if (nir_def_is_unused(&deref->def)) {
2215       nir_instr_remove(&deref->instr);
2216       return;
2217    }
2218 
2219    b->cursor = nir_after_instr(&deref->instr);
2220 
2221    nir_def *base_addr = NULL;
2222    if (deref->deref_type != nir_deref_type_var) {
2223       base_addr = deref->parent.ssa;
2224    }
2225 
2226    nir_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
2227                                                       addr_format);
2228    assert(addr->bit_size == deref->def.bit_size);
2229    assert(addr->num_components == deref->def.num_components);
2230 
2231    nir_instr_remove(&deref->instr);
2232    nir_def_rewrite_uses(&deref->def, addr);
2233 }
2234 
2235 static void
lower_explicit_io_access(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2236 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
2237                          nir_address_format addr_format)
2238 {
2239    nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
2240 }
2241 
2242 static void
lower_explicit_io_array_length(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2243 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
2244                                nir_address_format addr_format)
2245 {
2246    b->cursor = nir_after_instr(&intrin->instr);
2247 
2248    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2249 
2250    assert(glsl_type_is_array(deref->type));
2251    assert(glsl_get_length(deref->type) == 0);
2252    assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
2253    unsigned stride = glsl_get_explicit_stride(deref->type);
2254    assert(stride > 0);
2255 
2256    nir_def *addr = &deref->def;
2257 
2258    nir_def *offset, *size;
2259    switch (addr_format) {
2260    case nir_address_format_64bit_global_32bit_offset:
2261    case nir_address_format_64bit_bounded_global:
2262       offset = nir_channel(b, addr, 3);
2263       size = nir_channel(b, addr, 2);
2264       break;
2265 
2266    case nir_address_format_32bit_index_offset:
2267    case nir_address_format_32bit_index_offset_pack64:
2268    case nir_address_format_vec2_index_32bit_offset: {
2269       offset = addr_to_offset(b, addr, addr_format);
2270       nir_def *index = addr_to_index(b, addr, addr_format);
2271       unsigned access = nir_intrinsic_access(intrin);
2272       size = nir_get_ssbo_size(b, index, .access = access);
2273       break;
2274    }
2275 
2276    default:
2277       unreachable("Cannot determine SSBO size");
2278    }
2279 
2280    nir_def *remaining = nir_usub_sat(b, size, offset);
2281    nir_def *arr_size = nir_udiv_imm(b, remaining, stride);
2282 
2283    nir_def_replace(&intrin->def, arr_size);
2284 }
2285 
2286 static void
lower_explicit_io_mode_check(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2287 lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
2288                              nir_address_format addr_format)
2289 {
2290    if (addr_format_is_global(addr_format, 0)) {
2291       /* If the address format is always global, then the driver can use
2292        * global addresses regardless of the mode.  In that case, don't create
2293        * a check, just whack the intrinsic to addr_mode_is and delegate to the
2294        * driver lowering.
2295        */
2296       intrin->intrinsic = nir_intrinsic_addr_mode_is;
2297       return;
2298    }
2299 
2300    nir_def *addr = intrin->src[0].ssa;
2301 
2302    b->cursor = nir_instr_remove(&intrin->instr);
2303 
2304    nir_def *is_mode =
2305       build_runtime_addr_mode_check(b, addr, addr_format,
2306                                     nir_intrinsic_memory_modes(intrin));
2307 
2308    nir_def_rewrite_uses(&intrin->def, is_mode);
2309 }
2310 
2311 static bool
nir_lower_explicit_io_impl(nir_function_impl * impl,nir_variable_mode modes,nir_address_format addr_format)2312 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
2313                            nir_address_format addr_format)
2314 {
2315    bool progress = false;
2316 
2317    nir_builder b = nir_builder_create(impl);
2318 
2319    /* Walk in reverse order so that we can see the full deref chain when we
2320     * lower the access operations.  We lower them assuming that the derefs
2321     * will be turned into address calculations later.
2322     */
2323    nir_foreach_block_reverse(block, impl) {
2324       nir_foreach_instr_reverse_safe(instr, block) {
2325          switch (instr->type) {
2326          case nir_instr_type_deref: {
2327             nir_deref_instr *deref = nir_instr_as_deref(instr);
2328             if (nir_deref_mode_is_in_set(deref, modes)) {
2329                lower_explicit_io_deref(&b, deref, addr_format);
2330                progress = true;
2331             }
2332             break;
2333          }
2334 
2335          case nir_instr_type_intrinsic: {
2336             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2337             switch (intrin->intrinsic) {
2338             case nir_intrinsic_load_deref:
2339             case nir_intrinsic_store_deref:
2340             case nir_intrinsic_load_deref_block_intel:
2341             case nir_intrinsic_store_deref_block_intel:
2342             case nir_intrinsic_deref_atomic:
2343             case nir_intrinsic_deref_atomic_swap: {
2344                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2345                if (nir_deref_mode_is_in_set(deref, modes)) {
2346                   lower_explicit_io_access(&b, intrin, addr_format);
2347                   progress = true;
2348                }
2349                break;
2350             }
2351 
2352             case nir_intrinsic_deref_buffer_array_length: {
2353                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2354                if (nir_deref_mode_is_in_set(deref, modes)) {
2355                   lower_explicit_io_array_length(&b, intrin, addr_format);
2356                   progress = true;
2357                }
2358                break;
2359             }
2360 
2361             case nir_intrinsic_deref_mode_is: {
2362                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2363                if (nir_deref_mode_is_in_set(deref, modes)) {
2364                   lower_explicit_io_mode_check(&b, intrin, addr_format);
2365                   progress = true;
2366                }
2367                break;
2368             }
2369 
2370             case nir_intrinsic_launch_mesh_workgroups_with_payload_deref: {
2371                if (modes & nir_var_mem_task_payload) {
2372                   /* Get address and size of the payload variable. */
2373                   nir_deref_instr *deref = nir_src_as_deref(intrin->src[1]);
2374                   assert(deref->deref_type == nir_deref_type_var);
2375                   unsigned base = deref->var->data.explicit_location;
2376                   unsigned size = glsl_get_explicit_size(deref->var->type, false);
2377 
2378                   /* Replace the current instruction with the explicit intrinsic. */
2379                   nir_def *dispatch_3d = intrin->src[0].ssa;
2380                   b.cursor = nir_instr_remove(instr);
2381                   nir_launch_mesh_workgroups(&b, dispatch_3d, .base = base, .range = size);
2382                   progress = true;
2383                }
2384 
2385                break;
2386             }
2387 
2388             default:
2389                break;
2390             }
2391             break;
2392          }
2393 
2394          default:
2395             /* Nothing to do */
2396             break;
2397          }
2398       }
2399    }
2400 
2401    if (progress) {
2402       nir_metadata_preserve(impl, nir_metadata_none);
2403    } else {
2404       nir_metadata_preserve(impl, nir_metadata_all);
2405    }
2406 
2407    return progress;
2408 }
2409 
2410 /** Lower explicitly laid out I/O access to byte offset/address intrinsics
2411  *
2412  * This pass is intended to be used for any I/O which touches memory external
2413  * to the shader or which is directly visible to the client.  It requires that
2414  * all data types in the given modes have a explicit stride/offset decorations
2415  * to tell it exactly how to calculate the offset/address for the given load,
2416  * store, or atomic operation.  If the offset/stride information does not come
2417  * from the client explicitly (as with shared variables in GL or Vulkan),
2418  * nir_lower_vars_to_explicit_types() can be used to add them.
2419  *
2420  * Unlike nir_lower_io, this pass is fully capable of handling incomplete
2421  * pointer chains which may contain cast derefs.  It does so by walking the
2422  * deref chain backwards and simply replacing each deref, one at a time, with
2423  * the appropriate address calculation.  The pass takes a nir_address_format
2424  * parameter which describes how the offset or address is to be represented
2425  * during calculations.  By ensuring that the address is always in a
2426  * consistent format, pointers can safely be conjured from thin air by the
2427  * driver, stored to variables, passed through phis, etc.
2428  *
2429  * The one exception to the simple algorithm described above is for handling
2430  * row-major matrices in which case we may look down one additional level of
2431  * the deref chain.
2432  *
2433  * This pass is also capable of handling OpenCL generic pointers.  If the
2434  * address mode is global, it will lower any ambiguous (more than one mode)
2435  * access to global and pass through the deref_mode_is run-time checks as
2436  * addr_mode_is.  This assumes the driver has somehow mapped shared and
2437  * scratch memory to the global address space.  For other modes such as
2438  * 62bit_generic, there is an enum embedded in the address and we lower
2439  * ambiguous access to an if-ladder and deref_mode_is to a check against the
2440  * embedded enum.  If nir_lower_explicit_io is called on any shader that
2441  * contains generic pointers, it must either be used on all of the generic
2442  * modes or none.
2443  */
2444 bool
nir_lower_explicit_io(nir_shader * shader,nir_variable_mode modes,nir_address_format addr_format)2445 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
2446                       nir_address_format addr_format)
2447 {
2448    bool progress = false;
2449 
2450    nir_foreach_function_impl(impl, shader) {
2451       if (impl && nir_lower_explicit_io_impl(impl, modes, addr_format))
2452          progress = true;
2453    }
2454 
2455    return progress;
2456 }
2457 
2458 static bool
nir_lower_vars_to_explicit_types_impl(nir_function_impl * impl,nir_variable_mode modes,glsl_type_size_align_func type_info)2459 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
2460                                       nir_variable_mode modes,
2461                                       glsl_type_size_align_func type_info)
2462 {
2463    bool progress = false;
2464 
2465    nir_foreach_block(block, impl) {
2466       nir_foreach_instr(instr, block) {
2467          if (instr->type != nir_instr_type_deref)
2468             continue;
2469 
2470          nir_deref_instr *deref = nir_instr_as_deref(instr);
2471          if (!nir_deref_mode_is_in_set(deref, modes))
2472             continue;
2473 
2474          unsigned size, alignment;
2475          const struct glsl_type *new_type =
2476             glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
2477          if (new_type != deref->type) {
2478             progress = true;
2479             deref->type = new_type;
2480          }
2481          if (deref->deref_type == nir_deref_type_cast) {
2482             /* See also glsl_type::get_explicit_type_for_size_align() */
2483             unsigned new_stride = align(size, alignment);
2484             if (new_stride != deref->cast.ptr_stride) {
2485                deref->cast.ptr_stride = new_stride;
2486                progress = true;
2487             }
2488          }
2489       }
2490    }
2491 
2492    if (progress) {
2493       nir_metadata_preserve(impl, nir_metadata_control_flow |
2494                                      nir_metadata_live_defs |
2495                                      nir_metadata_loop_analysis);
2496    } else {
2497       nir_metadata_preserve(impl, nir_metadata_all);
2498    }
2499 
2500    return progress;
2501 }
2502 
2503 static bool
lower_vars_to_explicit(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,glsl_type_size_align_func type_info)2504 lower_vars_to_explicit(nir_shader *shader,
2505                        struct exec_list *vars, nir_variable_mode mode,
2506                        glsl_type_size_align_func type_info)
2507 {
2508    bool progress = false;
2509    unsigned offset;
2510    switch (mode) {
2511    case nir_var_uniform:
2512       assert(shader->info.stage == MESA_SHADER_KERNEL);
2513       offset = 0;
2514       break;
2515    case nir_var_function_temp:
2516    case nir_var_shader_temp:
2517       offset = shader->scratch_size;
2518       break;
2519    case nir_var_mem_shared:
2520       offset = shader->info.shared_size;
2521       break;
2522    case nir_var_mem_task_payload:
2523       offset = shader->info.task_payload_size;
2524       break;
2525    case nir_var_mem_node_payload:
2526       assert(!shader->info.cs.node_payloads_size);
2527       offset = 0;
2528       break;
2529    case nir_var_mem_global:
2530       offset = shader->global_mem_size;
2531       break;
2532    case nir_var_mem_constant:
2533       offset = shader->constant_data_size;
2534       break;
2535    case nir_var_shader_call_data:
2536    case nir_var_ray_hit_attrib:
2537    case nir_var_mem_node_payload_in:
2538       offset = 0;
2539       break;
2540    default:
2541       unreachable("Unsupported mode");
2542    }
2543    nir_foreach_variable_in_list(var, vars) {
2544       if (var->data.mode != mode)
2545          continue;
2546 
2547       unsigned size, alignment;
2548       const struct glsl_type *explicit_type =
2549          glsl_get_explicit_type_for_size_align(var->type, type_info,
2550                                                &size, &alignment);
2551 
2552       if (explicit_type != var->type)
2553          var->type = explicit_type;
2554 
2555       UNUSED bool is_empty_struct =
2556          glsl_type_is_struct_or_ifc(explicit_type) &&
2557          glsl_get_length(explicit_type) == 0;
2558 
2559       assert(util_is_power_of_two_nonzero(alignment) || is_empty_struct ||
2560              glsl_type_is_cmat(glsl_without_array(explicit_type)));
2561       assert(util_is_power_of_two_or_zero(var->data.alignment));
2562       alignment = MAX2(alignment, var->data.alignment);
2563 
2564       var->data.driver_location = ALIGN_POT(offset, alignment);
2565       offset = var->data.driver_location + size;
2566       progress = true;
2567    }
2568 
2569    switch (mode) {
2570    case nir_var_uniform:
2571       assert(shader->info.stage == MESA_SHADER_KERNEL);
2572       shader->num_uniforms = offset;
2573       break;
2574    case nir_var_shader_temp:
2575    case nir_var_function_temp:
2576       shader->scratch_size = offset;
2577       break;
2578    case nir_var_mem_shared:
2579       shader->info.shared_size = offset;
2580       break;
2581    case nir_var_mem_task_payload:
2582       shader->info.task_payload_size = offset;
2583       break;
2584    case nir_var_mem_node_payload:
2585       shader->info.cs.node_payloads_size = offset;
2586       break;
2587    case nir_var_mem_global:
2588       shader->global_mem_size = offset;
2589       break;
2590    case nir_var_mem_constant:
2591       shader->constant_data_size = offset;
2592       break;
2593    case nir_var_shader_call_data:
2594    case nir_var_ray_hit_attrib:
2595    case nir_var_mem_node_payload_in:
2596       break;
2597    default:
2598       unreachable("Unsupported mode");
2599    }
2600 
2601    return progress;
2602 }
2603 
2604 /* If nir_lower_vars_to_explicit_types is called on any shader that contains
2605  * generic pointers, it must either be used on all of the generic modes or
2606  * none.
2607  */
2608 bool
nir_lower_vars_to_explicit_types(nir_shader * shader,nir_variable_mode modes,glsl_type_size_align_func type_info)2609 nir_lower_vars_to_explicit_types(nir_shader *shader,
2610                                  nir_variable_mode modes,
2611                                  glsl_type_size_align_func type_info)
2612 {
2613    /* TODO: Situations which need to be handled to support more modes:
2614     * - row-major matrices
2615     * - compact shader inputs/outputs
2616     * - interface types
2617     */
2618    ASSERTED nir_variable_mode supported =
2619       nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
2620       nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
2621       nir_var_shader_call_data | nir_var_ray_hit_attrib |
2622       nir_var_mem_task_payload | nir_var_mem_node_payload |
2623       nir_var_mem_node_payload_in;
2624    assert(!(modes & ~supported) && "unsupported");
2625 
2626    bool progress = false;
2627 
2628    if (modes & nir_var_uniform)
2629       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
2630    if (modes & nir_var_mem_global)
2631       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info);
2632 
2633    if (modes & nir_var_mem_shared) {
2634       assert(!shader->info.shared_memory_explicit_layout);
2635       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
2636    }
2637 
2638    if (modes & nir_var_shader_temp)
2639       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
2640    if (modes & nir_var_mem_constant)
2641       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
2642    if (modes & nir_var_shader_call_data)
2643       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
2644    if (modes & nir_var_ray_hit_attrib)
2645       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
2646    if (modes & nir_var_mem_task_payload)
2647       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_task_payload, type_info);
2648    if (modes & nir_var_mem_node_payload)
2649       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload, type_info);
2650    if (modes & nir_var_mem_node_payload_in)
2651       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload_in, type_info);
2652 
2653    nir_foreach_function_impl(impl, shader) {
2654       if (modes & nir_var_function_temp)
2655          progress |= lower_vars_to_explicit(shader, &impl->locals, nir_var_function_temp, type_info);
2656 
2657       progress |= nir_lower_vars_to_explicit_types_impl(impl, modes, type_info);
2658    }
2659 
2660    return progress;
2661 }
2662 
2663 static void
write_constant(void * dst,size_t dst_size,const nir_constant * c,const struct glsl_type * type)2664 write_constant(void *dst, size_t dst_size,
2665                const nir_constant *c, const struct glsl_type *type)
2666 {
2667    if (c->is_null_constant) {
2668       memset(dst, 0, dst_size);
2669       return;
2670    }
2671 
2672    if (glsl_type_is_vector_or_scalar(type)) {
2673       const unsigned num_components = glsl_get_vector_elements(type);
2674       const unsigned bit_size = glsl_get_bit_size(type);
2675       if (bit_size == 1) {
2676          /* Booleans are special-cased to be 32-bit
2677           *
2678           * TODO: Make the native bool bit_size an option.
2679           */
2680          assert(num_components * 4 <= dst_size);
2681          for (unsigned i = 0; i < num_components; i++) {
2682             int32_t b32 = -(int)c->values[i].b;
2683             memcpy((char *)dst + i * 4, &b32, 4);
2684          }
2685       } else {
2686          assert(bit_size >= 8 && bit_size % 8 == 0);
2687          const unsigned byte_size = bit_size / 8;
2688          assert(num_components * byte_size <= dst_size);
2689          for (unsigned i = 0; i < num_components; i++) {
2690             /* Annoyingly, thanks to packed structs, we can't make any
2691              * assumptions about the alignment of dst.  To avoid any strange
2692              * issues with unaligned writes, we always use memcpy.
2693              */
2694             memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
2695          }
2696       }
2697    } else if (glsl_type_is_array_or_matrix(type)) {
2698       const unsigned array_len = glsl_get_length(type);
2699       const unsigned stride = glsl_get_explicit_stride(type);
2700       assert(stride > 0);
2701       const struct glsl_type *elem_type = glsl_get_array_element(type);
2702       for (unsigned i = 0; i < array_len; i++) {
2703          unsigned elem_offset = i * stride;
2704          assert(elem_offset < dst_size);
2705          write_constant((char *)dst + elem_offset, dst_size - elem_offset,
2706                         c->elements[i], elem_type);
2707       }
2708    } else {
2709       assert(glsl_type_is_struct_or_ifc(type));
2710       const unsigned num_fields = glsl_get_length(type);
2711       for (unsigned i = 0; i < num_fields; i++) {
2712          const int field_offset = glsl_get_struct_field_offset(type, i);
2713          assert(field_offset >= 0 && field_offset < dst_size);
2714          const struct glsl_type *field_type = glsl_get_struct_field(type, i);
2715          write_constant((char *)dst + field_offset, dst_size - field_offset,
2716                         c->elements[i], field_type);
2717       }
2718    }
2719 }
2720 
2721 void
nir_gather_explicit_io_initializers(nir_shader * shader,void * dst,size_t dst_size,nir_variable_mode mode)2722 nir_gather_explicit_io_initializers(nir_shader *shader,
2723                                     void *dst, size_t dst_size,
2724                                     nir_variable_mode mode)
2725 {
2726    /* It doesn't really make sense to gather initializers for more than one
2727     * mode at a time.  If this ever becomes well-defined, we can drop the
2728     * assert then.
2729     */
2730    assert(util_bitcount(mode) == 1);
2731 
2732    nir_foreach_variable_with_modes(var, shader, mode) {
2733       assert(var->data.driver_location < dst_size);
2734       write_constant((char *)dst + var->data.driver_location,
2735                      dst_size - var->data.driver_location,
2736                      var->constant_initializer, var->type);
2737    }
2738 }
2739 
2740 /**
2741  * Return the offset source number for a load/store intrinsic or -1 if there's no offset.
2742  */
2743 int
nir_get_io_offset_src_number(const nir_intrinsic_instr * instr)2744 nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
2745 {
2746    switch (instr->intrinsic) {
2747    case nir_intrinsic_load_input:
2748    case nir_intrinsic_load_per_primitive_input:
2749    case nir_intrinsic_load_output:
2750    case nir_intrinsic_load_shared:
2751    case nir_intrinsic_load_task_payload:
2752    case nir_intrinsic_load_uniform:
2753    case nir_intrinsic_load_push_constant:
2754    case nir_intrinsic_load_kernel_input:
2755    case nir_intrinsic_load_global:
2756    case nir_intrinsic_load_global_2x32:
2757    case nir_intrinsic_load_global_constant:
2758    case nir_intrinsic_load_global_etna:
2759    case nir_intrinsic_load_scratch:
2760    case nir_intrinsic_load_fs_input_interp_deltas:
2761    case nir_intrinsic_shared_atomic:
2762    case nir_intrinsic_shared_atomic_swap:
2763    case nir_intrinsic_task_payload_atomic:
2764    case nir_intrinsic_task_payload_atomic_swap:
2765    case nir_intrinsic_global_atomic:
2766    case nir_intrinsic_global_atomic_2x32:
2767    case nir_intrinsic_global_atomic_swap:
2768    case nir_intrinsic_global_atomic_swap_2x32:
2769    case nir_intrinsic_load_coefficients_agx:
2770       return 0;
2771    case nir_intrinsic_load_ubo:
2772    case nir_intrinsic_load_ssbo:
2773    case nir_intrinsic_load_input_vertex:
2774    case nir_intrinsic_load_per_vertex_input:
2775    case nir_intrinsic_load_per_vertex_output:
2776    case nir_intrinsic_load_per_primitive_output:
2777    case nir_intrinsic_load_interpolated_input:
2778    case nir_intrinsic_store_output:
2779    case nir_intrinsic_store_shared:
2780    case nir_intrinsic_store_task_payload:
2781    case nir_intrinsic_store_global:
2782    case nir_intrinsic_store_global_2x32:
2783    case nir_intrinsic_store_global_etna:
2784    case nir_intrinsic_store_scratch:
2785    case nir_intrinsic_ssbo_atomic:
2786    case nir_intrinsic_ssbo_atomic_swap:
2787    case nir_intrinsic_ldc_nv:
2788    case nir_intrinsic_ldcx_nv:
2789       return 1;
2790    case nir_intrinsic_store_ssbo:
2791    case nir_intrinsic_store_per_vertex_output:
2792    case nir_intrinsic_store_per_primitive_output:
2793       return 2;
2794    default:
2795       return -1;
2796    }
2797 }
2798 
2799 /**
2800  * Return the offset source for a load/store intrinsic.
2801  */
2802 nir_src *
nir_get_io_offset_src(nir_intrinsic_instr * instr)2803 nir_get_io_offset_src(nir_intrinsic_instr *instr)
2804 {
2805    const int idx = nir_get_io_offset_src_number(instr);
2806    return idx >= 0 ? &instr->src[idx] : NULL;
2807 }
2808 
2809 /**
2810  * Return the vertex index source number for a load/store per_vertex intrinsic or -1 if there's no offset.
2811  */
2812 int
nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr * instr)2813 nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr)
2814 {
2815    switch (instr->intrinsic) {
2816    case nir_intrinsic_load_per_vertex_input:
2817    case nir_intrinsic_load_per_vertex_output:
2818    case nir_intrinsic_load_per_primitive_output:
2819       return 0;
2820    case nir_intrinsic_store_per_vertex_output:
2821    case nir_intrinsic_store_per_primitive_output:
2822       return 1;
2823    default:
2824       return -1;
2825    }
2826 }
2827 
2828 /**
2829  * Return the vertex index source for a load/store per_vertex intrinsic.
2830  */
2831 nir_src *
nir_get_io_arrayed_index_src(nir_intrinsic_instr * instr)2832 nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr)
2833 {
2834    const int idx = nir_get_io_arrayed_index_src_number(instr);
2835    return idx >= 0 ? &instr->src[idx] : NULL;
2836 }
2837 
2838 /**
2839  * Return the numeric constant that identify a NULL pointer for each address
2840  * format.
2841  */
2842 const nir_const_value *
nir_address_format_null_value(nir_address_format addr_format)2843 nir_address_format_null_value(nir_address_format addr_format)
2844 {
2845    const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
2846       [nir_address_format_32bit_global] = { { 0 } },
2847       [nir_address_format_2x32bit_global] = { { 0 } },
2848       [nir_address_format_64bit_global] = { { 0 } },
2849       [nir_address_format_64bit_global_32bit_offset] = { { 0 } },
2850       [nir_address_format_64bit_bounded_global] = { { 0 } },
2851       [nir_address_format_32bit_index_offset] = { { .u32 = ~0 }, { .u32 = ~0 } },
2852       [nir_address_format_32bit_index_offset_pack64] = { { .u64 = ~0ull } },
2853       [nir_address_format_vec2_index_32bit_offset] = { { .u32 = ~0 }, { .u32 = ~0 }, { .u32 = ~0 } },
2854       [nir_address_format_32bit_offset] = { { .u32 = ~0 } },
2855       [nir_address_format_32bit_offset_as_64bit] = { { .u64 = ~0ull } },
2856       [nir_address_format_62bit_generic] = { { .u64 = 0 } },
2857       [nir_address_format_logical] = { { .u32 = ~0 } },
2858    };
2859 
2860    assert(addr_format < ARRAY_SIZE(null_values));
2861    return null_values[addr_format];
2862 }
2863 
2864 nir_def *
nir_build_addr_ieq(nir_builder * b,nir_def * addr0,nir_def * addr1,nir_address_format addr_format)2865 nir_build_addr_ieq(nir_builder *b, nir_def *addr0, nir_def *addr1,
2866                    nir_address_format addr_format)
2867 {
2868    switch (addr_format) {
2869    case nir_address_format_32bit_global:
2870    case nir_address_format_2x32bit_global:
2871    case nir_address_format_64bit_global:
2872    case nir_address_format_64bit_bounded_global:
2873    case nir_address_format_32bit_index_offset:
2874    case nir_address_format_vec2_index_32bit_offset:
2875    case nir_address_format_32bit_offset:
2876    case nir_address_format_62bit_generic:
2877       return nir_ball_iequal(b, addr0, addr1);
2878 
2879    case nir_address_format_64bit_global_32bit_offset:
2880       return nir_ball_iequal(b, nir_channels(b, addr0, 0xb),
2881                              nir_channels(b, addr1, 0xb));
2882 
2883    case nir_address_format_32bit_offset_as_64bit:
2884       assert(addr0->num_components == 1 && addr1->num_components == 1);
2885       return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
2886 
2887    case nir_address_format_32bit_index_offset_pack64:
2888       assert(addr0->num_components == 1 && addr1->num_components == 1);
2889       return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
2890 
2891    case nir_address_format_logical:
2892       unreachable("Unsupported address format");
2893    }
2894 
2895    unreachable("Invalid address format");
2896 }
2897 
2898 nir_def *
nir_build_addr_isub(nir_builder * b,nir_def * addr0,nir_def * addr1,nir_address_format addr_format)2899 nir_build_addr_isub(nir_builder *b, nir_def *addr0, nir_def *addr1,
2900                     nir_address_format addr_format)
2901 {
2902    switch (addr_format) {
2903    case nir_address_format_32bit_global:
2904    case nir_address_format_64bit_global:
2905    case nir_address_format_32bit_offset:
2906    case nir_address_format_32bit_index_offset_pack64:
2907    case nir_address_format_62bit_generic:
2908       assert(addr0->num_components == 1);
2909       assert(addr1->num_components == 1);
2910       return nir_isub(b, addr0, addr1);
2911 
2912    case nir_address_format_2x32bit_global:
2913       return nir_isub(b, addr_to_global(b, addr0, addr_format),
2914                       addr_to_global(b, addr1, addr_format));
2915 
2916    case nir_address_format_32bit_offset_as_64bit:
2917       assert(addr0->num_components == 1);
2918       assert(addr1->num_components == 1);
2919       return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
2920 
2921    case nir_address_format_64bit_global_32bit_offset:
2922    case nir_address_format_64bit_bounded_global:
2923       return nir_isub(b, addr_to_global(b, addr0, addr_format),
2924                       addr_to_global(b, addr1, addr_format));
2925 
2926    case nir_address_format_32bit_index_offset:
2927       assert(addr0->num_components == 2);
2928       assert(addr1->num_components == 2);
2929       /* Assume the same buffer index. */
2930       return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
2931 
2932    case nir_address_format_vec2_index_32bit_offset:
2933       assert(addr0->num_components == 3);
2934       assert(addr1->num_components == 3);
2935       /* Assume the same buffer index. */
2936       return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
2937 
2938    case nir_address_format_logical:
2939       unreachable("Unsupported address format");
2940    }
2941 
2942    unreachable("Invalid address format");
2943 }
2944 
2945 static bool
is_input(nir_intrinsic_instr * intrin)2946 is_input(nir_intrinsic_instr *intrin)
2947 {
2948    return intrin->intrinsic == nir_intrinsic_load_input ||
2949           intrin->intrinsic == nir_intrinsic_load_per_primitive_input ||
2950           intrin->intrinsic == nir_intrinsic_load_input_vertex ||
2951           intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
2952           intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
2953           intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
2954 }
2955 
2956 static bool
is_output(nir_intrinsic_instr * intrin)2957 is_output(nir_intrinsic_instr *intrin)
2958 {
2959    return intrin->intrinsic == nir_intrinsic_load_output ||
2960           intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
2961           intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
2962           intrin->intrinsic == nir_intrinsic_store_output ||
2963           intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2964           intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
2965 }
2966 
2967 static bool
is_dual_slot(nir_intrinsic_instr * intrin)2968 is_dual_slot(nir_intrinsic_instr *intrin)
2969 {
2970    if (intrin->intrinsic == nir_intrinsic_store_output ||
2971        intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2972        intrin->intrinsic == nir_intrinsic_store_per_primitive_output) {
2973       return nir_src_bit_size(intrin->src[0]) == 64 &&
2974              nir_src_num_components(intrin->src[0]) >= 3;
2975    }
2976 
2977    return intrin->def.bit_size == 64 &&
2978    intrin->def.num_components >= 3;
2979 }
2980 
2981 /**
2982  * This pass adds constant offsets to instr->const_index[0] for input/output
2983  * intrinsics, and resets the offset source to 0.  Non-constant offsets remain
2984  * unchanged - since we don't know what part of a compound variable is
2985  * accessed, we allocate storage for the entire thing. For drivers that use
2986  * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
2987  * the offset source will be 0, so that they don't have to add it in manually.
2988  */
2989 
2990 static bool
add_const_offset_to_base_block(nir_block * block,nir_builder * b,nir_variable_mode modes)2991 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
2992                                nir_variable_mode modes)
2993 {
2994    bool progress = false;
2995    nir_foreach_instr_safe(instr, block) {
2996       if (instr->type != nir_instr_type_intrinsic)
2997          continue;
2998 
2999       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3000 
3001       if (((modes & nir_var_shader_in) && is_input(intrin)) ||
3002           ((modes & nir_var_shader_out) && is_output(intrin))) {
3003          nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
3004 
3005          /* NV_mesh_shader: ignore MS primitive indices. */
3006          if (b->shader->info.stage == MESA_SHADER_MESH &&
3007              sem.location == VARYING_SLOT_PRIMITIVE_INDICES &&
3008              !(b->shader->info.per_primitive_outputs &
3009                BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES)))
3010             continue;
3011 
3012          nir_src *offset = nir_get_io_offset_src(intrin);
3013 
3014          /* TODO: Better handling of per-view variables here */
3015          if (nir_src_is_const(*offset) &&
3016              !nir_intrinsic_io_semantics(intrin).per_view) {
3017             unsigned off = nir_src_as_uint(*offset);
3018 
3019             nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
3020 
3021             sem.location += off;
3022             /* non-indirect indexing should reduce num_slots */
3023             sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
3024             nir_intrinsic_set_io_semantics(intrin, sem);
3025 
3026             b->cursor = nir_before_instr(&intrin->instr);
3027             nir_src_rewrite(offset, nir_imm_int(b, 0));
3028             progress = true;
3029          }
3030       }
3031    }
3032 
3033    return progress;
3034 }
3035 
3036 bool
nir_io_add_const_offset_to_base(nir_shader * nir,nir_variable_mode modes)3037 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
3038 {
3039    bool progress = false;
3040 
3041    nir_foreach_function_impl(impl, nir) {
3042       bool impl_progress = false;
3043       nir_builder b = nir_builder_create(impl);
3044       nir_foreach_block(block, impl) {
3045          impl_progress |= add_const_offset_to_base_block(block, &b, modes);
3046       }
3047       progress |= impl_progress;
3048       if (impl_progress)
3049          nir_metadata_preserve(impl, nir_metadata_control_flow);
3050       else
3051          nir_metadata_preserve(impl, nir_metadata_all);
3052    }
3053 
3054    return progress;
3055 }
3056 
3057 bool
nir_lower_color_inputs(nir_shader * nir)3058 nir_lower_color_inputs(nir_shader *nir)
3059 {
3060    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3061    bool progress = false;
3062 
3063    nir_builder b = nir_builder_create(impl);
3064 
3065    nir_foreach_block(block, impl) {
3066       nir_foreach_instr_safe(instr, block) {
3067          if (instr->type != nir_instr_type_intrinsic)
3068             continue;
3069 
3070          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3071 
3072          if (intrin->intrinsic != nir_intrinsic_load_input &&
3073              intrin->intrinsic != nir_intrinsic_load_interpolated_input)
3074             continue;
3075 
3076          nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
3077 
3078          if (sem.location != VARYING_SLOT_COL0 &&
3079              sem.location != VARYING_SLOT_COL1)
3080             continue;
3081 
3082          /* Default to FLAT (for load_input) */
3083          enum glsl_interp_mode interp = INTERP_MODE_FLAT;
3084          bool sample = false;
3085          bool centroid = false;
3086 
3087          if (intrin->intrinsic == nir_intrinsic_load_interpolated_input) {
3088             nir_intrinsic_instr *baryc =
3089                nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr);
3090 
3091             centroid =
3092                baryc->intrinsic == nir_intrinsic_load_barycentric_centroid;
3093             sample =
3094                baryc->intrinsic == nir_intrinsic_load_barycentric_sample;
3095             assert(centroid || sample ||
3096                    baryc->intrinsic == nir_intrinsic_load_barycentric_pixel);
3097 
3098             interp = nir_intrinsic_interp_mode(baryc);
3099          }
3100 
3101          b.cursor = nir_before_instr(instr);
3102          nir_def *load = NULL;
3103 
3104          if (sem.location == VARYING_SLOT_COL0) {
3105             load = nir_load_color0(&b);
3106             nir->info.fs.color0_interp = interp;
3107             nir->info.fs.color0_sample = sample;
3108             nir->info.fs.color0_centroid = centroid;
3109          } else {
3110             assert(sem.location == VARYING_SLOT_COL1);
3111             load = nir_load_color1(&b);
3112             nir->info.fs.color1_interp = interp;
3113             nir->info.fs.color1_sample = sample;
3114             nir->info.fs.color1_centroid = centroid;
3115          }
3116 
3117          if (intrin->num_components != 4) {
3118             unsigned start = nir_intrinsic_component(intrin);
3119             unsigned count = intrin->num_components;
3120             load = nir_channels(&b, load, BITFIELD_RANGE(start, count));
3121          }
3122 
3123          nir_def_replace(&intrin->def, load);
3124          progress = true;
3125       }
3126    }
3127 
3128    if (progress) {
3129       nir_metadata_preserve(impl, nir_metadata_control_flow);
3130    } else {
3131       nir_metadata_preserve(impl, nir_metadata_all);
3132    }
3133    return progress;
3134 }
3135 
3136 bool
nir_io_add_intrinsic_xfb_info(nir_shader * nir)3137 nir_io_add_intrinsic_xfb_info(nir_shader *nir)
3138 {
3139    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3140    bool progress = false;
3141 
3142    for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++)
3143       nir->info.xfb_stride[i] = nir->xfb_info->buffers[i].stride / 4;
3144 
3145    nir_foreach_block(block, impl) {
3146       nir_foreach_instr_safe(instr, block) {
3147          if (instr->type != nir_instr_type_intrinsic)
3148             continue;
3149 
3150          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3151 
3152          if (!nir_intrinsic_has_io_xfb(intr))
3153             continue;
3154 
3155          /* No indirect indexing allowed. The index is implied to be 0. */
3156          ASSERTED nir_src offset = *nir_get_io_offset_src(intr);
3157          assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
3158 
3159          /* Calling this pass for the second time shouldn't do anything. */
3160          if (nir_intrinsic_io_xfb(intr).out[0].num_components ||
3161              nir_intrinsic_io_xfb(intr).out[1].num_components ||
3162              nir_intrinsic_io_xfb2(intr).out[0].num_components ||
3163              nir_intrinsic_io_xfb2(intr).out[1].num_components)
3164             continue;
3165 
3166          nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3167          unsigned writemask = nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
3168 
3169          nir_io_xfb xfb[2];
3170          memset(xfb, 0, sizeof(xfb));
3171 
3172          for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
3173             nir_xfb_output_info *out = &nir->xfb_info->outputs[i];
3174             if (out->location == sem.location) {
3175                unsigned xfb_mask = writemask & out->component_mask;
3176 
3177                /*fprintf(stdout, "output%u: buffer=%u, offset=%u, location=%u, "
3178                            "component_offset=%u, component_mask=0x%x, xfb_mask=0x%x, slots=%u\n",
3179                        i, out->buffer,
3180                        out->offset,
3181                        out->location,
3182                        out->component_offset,
3183                        out->component_mask,
3184                        xfb_mask, sem.num_slots);*/
3185 
3186                while (xfb_mask) {
3187                   int start, count;
3188                   u_bit_scan_consecutive_range(&xfb_mask, &start, &count);
3189 
3190                   xfb[start / 2].out[start % 2].num_components = count;
3191                   xfb[start / 2].out[start % 2].buffer = out->buffer;
3192                   /* out->offset is relative to the first stored xfb component */
3193                   /* start is relative to component 0 */
3194                   xfb[start / 2].out[start % 2].offset =
3195                      out->offset / 4 - out->component_offset + start;
3196 
3197                   progress = true;
3198                }
3199             }
3200          }
3201 
3202          nir_intrinsic_set_io_xfb(intr, xfb[0]);
3203          nir_intrinsic_set_io_xfb2(intr, xfb[1]);
3204       }
3205    }
3206 
3207    nir_metadata_preserve(impl, nir_metadata_all);
3208    return progress;
3209 }
3210 
3211 static int
type_size_vec4(const struct glsl_type * type,bool bindless)3212 type_size_vec4(const struct glsl_type *type, bool bindless)
3213 {
3214    return glsl_count_attribute_slots(type, false);
3215 }
3216 
3217 /**
3218  * This runs all compiler passes needed to lower IO, lower indirect IO access,
3219  * set transform feedback info in IO intrinsics, and clean up the IR.
3220  *
3221  * \param renumber_vs_inputs
3222  *    Set to true if holes between VS inputs should be removed, which is safe
3223  *    to do in any shader linker that can handle that. Set to false if you want
3224  *    to keep holes between VS inputs, which is recommended to do in gallium
3225  *    drivers so as not to break the mapping of vertex elements to VS inputs
3226  *    expected by gallium frontends.
3227  */
3228 void
nir_lower_io_passes(nir_shader * nir,bool renumber_vs_inputs)3229 nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
3230 {
3231    if (nir->info.stage == MESA_SHADER_COMPUTE)
3232       return;
3233 
3234    bool has_indirect_inputs =
3235       (nir->options->support_indirect_inputs >> nir->info.stage) & 0x1;
3236 
3237    /* Transform feedback requires that indirect outputs are lowered. */
3238    bool has_indirect_outputs =
3239       (nir->options->support_indirect_outputs >> nir->info.stage) & 0x1 &&
3240       nir->xfb_info == NULL;
3241 
3242    /* TODO: Sorting variables by location is required due to some bug
3243     * in nir_lower_io_to_temporaries. If variables are not sorted,
3244     * dEQP-GLES31.functional.separate_shader.random.0 fails.
3245     *
3246     * This isn't needed if nir_assign_io_var_locations is called because it
3247     * also sorts variables. However, if IO is lowered sooner than that, we
3248     * must sort explicitly here to get what nir_assign_io_var_locations does.
3249     */
3250    unsigned varying_var_mask =
3251       (nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0) |
3252       (nir->info.stage != MESA_SHADER_FRAGMENT ? nir_var_shader_out : 0);
3253    nir_sort_variables_by_location(nir, varying_var_mask);
3254 
3255    if (!has_indirect_inputs || !has_indirect_outputs) {
3256       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
3257                  nir_shader_get_entrypoint(nir), !has_indirect_outputs,
3258                  !has_indirect_inputs);
3259 
3260       /* We need to lower all the copy_deref's introduced by lower_io_to-
3261        * _temporaries before calling nir_lower_io.
3262        */
3263       NIR_PASS_V(nir, nir_split_var_copies);
3264       NIR_PASS_V(nir, nir_lower_var_copies);
3265       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
3266    }
3267 
3268    /* The correct lower_64bit_to_32 flag is required by st/mesa depending
3269     * on whether the GLSL linker lowers IO or not. Setting the wrong flag
3270     * would break 64-bit vertex attribs for GLSL.
3271     */
3272    NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
3273               type_size_vec4,
3274               renumber_vs_inputs ? nir_lower_io_lower_64bit_to_32_new :
3275                                    nir_lower_io_lower_64bit_to_32);
3276 
3277    /* nir_io_add_const_offset_to_base needs actual constants. */
3278    NIR_PASS_V(nir, nir_opt_constant_folding);
3279    NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
3280 
3281    /* Lower and remove dead derefs and variables to clean up the IR. */
3282    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3283    NIR_PASS_V(nir, nir_opt_dce);
3284    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3285 
3286    /* If IO is lowered before var->data.driver_location is assigned, driver
3287     * locations are all 0, which means IO bases are all 0. It's not necessary
3288     * to set driver_location before lowering IO because the only thing that
3289     * identifies outputs is their semantic, and IO bases can always be
3290     * computed from the semantics.
3291     *
3292     * This assigns IO bases from scratch, using IO semantics to tell which
3293     * intrinsics refer to the same IO. If the bases already exist, they
3294     * will be reassigned, sorted by the semantic, and all holes removed.
3295     * This kind of canonicalizes all bases.
3296     *
3297     * This must be done after DCE to remove dead load_input intrinsics.
3298     */
3299    NIR_PASS_V(nir, nir_recompute_io_bases,
3300               (nir->info.stage != MESA_SHADER_VERTEX || renumber_vs_inputs ?
3301                nir_var_shader_in : 0) | nir_var_shader_out);
3302 
3303    if (nir->xfb_info)
3304       NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
3305 
3306    if (nir->options->lower_mediump_io)
3307       nir->options->lower_mediump_io(nir);
3308 
3309    nir->info.io_lowered = true;
3310 }
3311