1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /*
25 * This lowering pass converts references to input/output variables with
26 * loads/stores to actual input/output intrinsics.
27 */
28
29 #include "nir.h"
30 #include "nir_builder.h"
31 #include "nir_deref.h"
32 #include "nir_xfb_info.h"
33
34 #include "util/u_math.h"
35
36 struct lower_io_state {
37 void *dead_ctx;
38 nir_builder builder;
39 int (*type_size)(const struct glsl_type *type, bool);
40 nir_variable_mode modes;
41 nir_lower_io_options options;
42 struct set variable_names;
43 };
44
45 static const char *
add_variable_name(struct lower_io_state * state,const char * name)46 add_variable_name(struct lower_io_state *state, const char *name)
47 {
48 if (!name)
49 return NULL;
50
51 bool found = false;
52 struct set_entry *entry = _mesa_set_search_or_add(&state->variable_names, name, &found);
53 if (!found)
54 entry->key = (void*)ralloc_strdup(state->builder.shader, name);
55 return entry->key;
56 }
57
58 static nir_intrinsic_op
ssbo_atomic_for_deref(nir_intrinsic_op deref_op)59 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
60 {
61 switch (deref_op) {
62 case nir_intrinsic_deref_atomic:
63 return nir_intrinsic_ssbo_atomic;
64 case nir_intrinsic_deref_atomic_swap:
65 return nir_intrinsic_ssbo_atomic_swap;
66 default:
67 unreachable("Invalid SSBO atomic");
68 }
69 }
70
71 static nir_intrinsic_op
global_atomic_for_deref(nir_address_format addr_format,nir_intrinsic_op deref_op)72 global_atomic_for_deref(nir_address_format addr_format,
73 nir_intrinsic_op deref_op)
74 {
75 switch (deref_op) {
76 case nir_intrinsic_deref_atomic:
77 if (addr_format != nir_address_format_2x32bit_global)
78 return nir_intrinsic_global_atomic;
79 else
80 return nir_intrinsic_global_atomic_2x32;
81
82 case nir_intrinsic_deref_atomic_swap:
83 if (addr_format != nir_address_format_2x32bit_global)
84 return nir_intrinsic_global_atomic_swap;
85 else
86 return nir_intrinsic_global_atomic_swap_2x32;
87
88 default:
89 unreachable("Invalid SSBO atomic");
90 }
91 }
92
93 static nir_intrinsic_op
shared_atomic_for_deref(nir_intrinsic_op deref_op)94 shared_atomic_for_deref(nir_intrinsic_op deref_op)
95 {
96 switch (deref_op) {
97 case nir_intrinsic_deref_atomic:
98 return nir_intrinsic_shared_atomic;
99 case nir_intrinsic_deref_atomic_swap:
100 return nir_intrinsic_shared_atomic_swap;
101 default:
102 unreachable("Invalid shared atomic");
103 }
104 }
105
106 static nir_intrinsic_op
task_payload_atomic_for_deref(nir_intrinsic_op deref_op)107 task_payload_atomic_for_deref(nir_intrinsic_op deref_op)
108 {
109 switch (deref_op) {
110 case nir_intrinsic_deref_atomic:
111 return nir_intrinsic_task_payload_atomic;
112 case nir_intrinsic_deref_atomic_swap:
113 return nir_intrinsic_task_payload_atomic_swap;
114 default:
115 unreachable("Invalid task payload atomic");
116 }
117 }
118
119 void
nir_assign_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,int (* type_size)(const struct glsl_type *,bool))120 nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
121 unsigned *size,
122 int (*type_size)(const struct glsl_type *, bool))
123 {
124 unsigned location = 0;
125
126 nir_foreach_variable_with_modes(var, shader, mode) {
127 var->data.driver_location = location;
128 bool bindless_type_size = var->data.mode == nir_var_shader_in ||
129 var->data.mode == nir_var_shader_out ||
130 var->data.bindless;
131 location += type_size(var->type, bindless_type_size);
132 }
133
134 *size = location;
135 }
136
137 /**
138 * Some inputs and outputs are arrayed, meaning that there is an extra level
139 * of array indexing to handle mismatches between the shader interface and the
140 * dispatch pattern of the shader. For instance, geometry shaders are
141 * executed per-primitive while their inputs and outputs are specified
142 * per-vertex so all inputs and outputs have to be additionally indexed with
143 * the vertex index within the primitive.
144 */
145 bool
nir_is_arrayed_io(const nir_variable * var,gl_shader_stage stage)146 nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
147 {
148 if (var->data.patch || !glsl_type_is_array(var->type))
149 return false;
150
151 if (stage == MESA_SHADER_MESH) {
152 /* NV_mesh_shader: this is flat array for the whole workgroup. */
153 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
154 return var->data.per_primitive;
155 }
156
157 if (var->data.mode == nir_var_shader_in) {
158 if (var->data.per_vertex) {
159 assert(stage == MESA_SHADER_FRAGMENT);
160 return true;
161 }
162
163 return stage == MESA_SHADER_GEOMETRY ||
164 stage == MESA_SHADER_TESS_CTRL ||
165 stage == MESA_SHADER_TESS_EVAL;
166 }
167
168 if (var->data.mode == nir_var_shader_out)
169 return stage == MESA_SHADER_TESS_CTRL ||
170 stage == MESA_SHADER_MESH;
171
172 return false;
173 }
174
175 static bool
uses_high_dvec2_semantic(struct lower_io_state * state,const nir_variable * var)176 uses_high_dvec2_semantic(struct lower_io_state *state,
177 const nir_variable *var)
178 {
179 return state->builder.shader->info.stage == MESA_SHADER_VERTEX &&
180 state->options & nir_lower_io_lower_64bit_to_32_new &&
181 var->data.mode == nir_var_shader_in &&
182 glsl_type_is_dual_slot(glsl_without_array(var->type));
183 }
184
185 static unsigned
get_number_of_slots(struct lower_io_state * state,const nir_variable * var)186 get_number_of_slots(struct lower_io_state *state,
187 const nir_variable *var)
188 {
189 const struct glsl_type *type = var->type;
190
191 if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
192 assert(glsl_type_is_array(type));
193 type = glsl_get_array_element(type);
194 }
195
196 /* NV_mesh_shader:
197 * PRIMITIVE_INDICES is a flat array, not a proper arrayed output,
198 * as opposed to D3D-style mesh shaders where it's addressed by
199 * the primitive index.
200 * Prevent assigning several slots to primitive indices,
201 * to avoid some issues.
202 */
203 if (state->builder.shader->info.stage == MESA_SHADER_MESH &&
204 var->data.location == VARYING_SLOT_PRIMITIVE_INDICES &&
205 !nir_is_arrayed_io(var, state->builder.shader->info.stage))
206 return 1;
207
208 return state->type_size(type, var->data.bindless) /
209 (uses_high_dvec2_semantic(state, var) ? 2 : 1);
210 }
211
212 static nir_def *
get_io_offset(nir_builder * b,nir_deref_instr * deref,nir_def ** array_index,int (* type_size)(const struct glsl_type *,bool),unsigned * component,bool bts)213 get_io_offset(nir_builder *b, nir_deref_instr *deref,
214 nir_def **array_index,
215 int (*type_size)(const struct glsl_type *, bool),
216 unsigned *component, bool bts)
217 {
218 nir_deref_path path;
219 nir_deref_path_init(&path, deref, NULL);
220
221 assert(path.path[0]->deref_type == nir_deref_type_var);
222 nir_deref_instr **p = &path.path[1];
223
224 /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
225 * inputs), skip the outermost array index. Process the rest normally.
226 */
227 if (array_index != NULL) {
228 assert((*p)->deref_type == nir_deref_type_array);
229 *array_index = (*p)->arr.index.ssa;
230 p++;
231 }
232
233 if (path.path[0]->var->data.compact && nir_src_is_const((*p)->arr.index)) {
234 assert((*p)->deref_type == nir_deref_type_array);
235 assert(glsl_type_is_scalar((*p)->type));
236
237 /* We always lower indirect dereferences for "compact" array vars. */
238 const unsigned index = nir_src_as_uint((*p)->arr.index);
239 const unsigned total_offset = *component + index;
240 const unsigned slot_offset = total_offset / 4;
241 *component = total_offset % 4;
242 return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
243 }
244
245 /* Just emit code and let constant-folding go to town */
246 nir_def *offset = nir_imm_int(b, 0);
247
248 for (; *p; p++) {
249 if ((*p)->deref_type == nir_deref_type_array) {
250 unsigned size = type_size((*p)->type, bts);
251
252 nir_def *mul =
253 nir_amul_imm(b, (*p)->arr.index.ssa, size);
254
255 offset = nir_iadd(b, offset, mul);
256 } else if ((*p)->deref_type == nir_deref_type_struct) {
257 /* p starts at path[1], so this is safe */
258 nir_deref_instr *parent = *(p - 1);
259
260 unsigned field_offset = 0;
261 for (unsigned i = 0; i < (*p)->strct.index; i++) {
262 field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
263 }
264 offset = nir_iadd_imm(b, offset, field_offset);
265 } else {
266 unreachable("Unsupported deref type");
267 }
268 }
269
270 nir_deref_path_finish(&path);
271
272 return offset;
273 }
274
275 static bool
is_medium_precision(const nir_shader * shader,const nir_variable * var)276 is_medium_precision(const nir_shader *shader, const nir_variable *var)
277 {
278 if (shader->options->io_options & nir_io_mediump_is_32bit)
279 return false;
280
281 return var->data.precision == GLSL_PRECISION_MEDIUM ||
282 var->data.precision == GLSL_PRECISION_LOW;
283 }
284
285 static nir_def *
emit_load(struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,unsigned num_components,unsigned bit_size,nir_alu_type dest_type,bool high_dvec2)286 emit_load(struct lower_io_state *state,
287 nir_def *array_index, nir_variable *var, nir_def *offset,
288 unsigned component, unsigned num_components, unsigned bit_size,
289 nir_alu_type dest_type, bool high_dvec2)
290 {
291 nir_builder *b = &state->builder;
292 const nir_shader *nir = b->shader;
293 nir_variable_mode mode = var->data.mode;
294 nir_def *barycentric = NULL;
295
296 nir_intrinsic_op op;
297 switch (mode) {
298 case nir_var_shader_in:
299 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
300 nir->options->use_interpolated_input_intrinsics &&
301 var->data.interpolation != INTERP_MODE_FLAT &&
302 !var->data.per_primitive) {
303 if (var->data.interpolation == INTERP_MODE_EXPLICIT ||
304 var->data.per_vertex) {
305 assert(array_index != NULL);
306 op = nir_intrinsic_load_input_vertex;
307 } else {
308 assert(array_index == NULL);
309
310 nir_intrinsic_op bary_op;
311 if (var->data.sample)
312 bary_op = nir_intrinsic_load_barycentric_sample;
313 else if (var->data.centroid)
314 bary_op = nir_intrinsic_load_barycentric_centroid;
315 else
316 bary_op = nir_intrinsic_load_barycentric_pixel;
317
318 barycentric = nir_load_barycentric(&state->builder, bary_op,
319 var->data.interpolation);
320 op = nir_intrinsic_load_interpolated_input;
321 }
322 } else {
323 if (var->data.per_primitive)
324 op = nir_intrinsic_load_per_primitive_input;
325 else if (array_index)
326 op = nir_intrinsic_load_per_vertex_input;
327 else
328 op = nir_intrinsic_load_input;
329 }
330 break;
331 case nir_var_shader_out:
332 op = !array_index ? nir_intrinsic_load_output : var->data.per_primitive ? nir_intrinsic_load_per_primitive_output
333 : nir_intrinsic_load_per_vertex_output;
334 break;
335 case nir_var_uniform:
336 op = nir_intrinsic_load_uniform;
337 break;
338 default:
339 unreachable("Unknown variable mode");
340 }
341
342 nir_intrinsic_instr *load =
343 nir_intrinsic_instr_create(state->builder.shader, op);
344 load->num_components = num_components;
345 load->name = add_variable_name(state, var->name);
346
347 nir_intrinsic_set_base(load, var->data.driver_location);
348 if (nir_intrinsic_has_range(load)) {
349 const struct glsl_type *type = var->type;
350 if (array_index)
351 type = glsl_get_array_element(type);
352 unsigned var_size = state->type_size(type, var->data.bindless);
353 nir_intrinsic_set_range(load, var_size);
354 }
355
356 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
357 nir_intrinsic_set_component(load, component);
358
359 if (nir_intrinsic_has_access(load))
360 nir_intrinsic_set_access(load, var->data.access);
361
362 nir_intrinsic_set_dest_type(load, dest_type);
363
364 if (load->intrinsic != nir_intrinsic_load_uniform) {
365 nir_io_semantics semantics = { 0 };
366 semantics.location = var->data.location;
367 semantics.num_slots = get_number_of_slots(state, var);
368 semantics.fb_fetch_output = var->data.fb_fetch_output;
369 semantics.medium_precision = is_medium_precision(b->shader, var);
370 semantics.high_dvec2 = high_dvec2;
371 /* "per_vertex" is misnamed. It means "explicit interpolation with
372 * the original vertex order", which is a stricter version of
373 * INTERP_MODE_EXPLICIT.
374 */
375 semantics.interp_explicit_strict = var->data.per_vertex;
376 nir_intrinsic_set_io_semantics(load, semantics);
377 }
378
379 if (array_index) {
380 load->src[0] = nir_src_for_ssa(array_index);
381 load->src[1] = nir_src_for_ssa(offset);
382 } else if (barycentric) {
383 load->src[0] = nir_src_for_ssa(barycentric);
384 load->src[1] = nir_src_for_ssa(offset);
385 } else {
386 load->src[0] = nir_src_for_ssa(offset);
387 }
388
389 nir_def_init(&load->instr, &load->def, num_components, bit_size);
390 nir_builder_instr_insert(b, &load->instr);
391
392 return &load->def;
393 }
394
395 static nir_def *
lower_load(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)396 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
397 nir_def *array_index, nir_variable *var, nir_def *offset,
398 unsigned component, const struct glsl_type *type)
399 {
400 const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
401 if (intrin->def.bit_size == 64 &&
402 (lower_double || (state->options & (nir_lower_io_lower_64bit_to_32_new |
403 nir_lower_io_lower_64bit_to_32)))) {
404 nir_builder *b = &state->builder;
405 bool use_high_dvec2_semantic = uses_high_dvec2_semantic(state, var);
406
407 /* Each slot is a dual slot, so divide the offset within the variable
408 * by 2.
409 */
410 if (use_high_dvec2_semantic)
411 offset = nir_ushr_imm(b, offset, 1);
412
413 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
414
415 nir_def *comp64[4];
416 assert(component == 0 || component == 2);
417 unsigned dest_comp = 0;
418 bool high_dvec2 = false;
419 while (dest_comp < intrin->def.num_components) {
420 const unsigned num_comps =
421 MIN2(intrin->def.num_components - dest_comp,
422 (4 - component) / 2);
423
424 nir_def *data32 =
425 emit_load(state, array_index, var, offset, component,
426 num_comps * 2, 32, nir_type_uint32, high_dvec2);
427 for (unsigned i = 0; i < num_comps; i++) {
428 comp64[dest_comp + i] =
429 nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
430 }
431
432 /* Only the first store has a component offset */
433 component = 0;
434 dest_comp += num_comps;
435
436 if (use_high_dvec2_semantic) {
437 /* Increment the offset when we wrap around the dual slot. */
438 if (high_dvec2)
439 offset = nir_iadd_imm(b, offset, slot_size);
440 high_dvec2 = !high_dvec2;
441 } else {
442 offset = nir_iadd_imm(b, offset, slot_size);
443 }
444 }
445
446 return nir_vec(b, comp64, intrin->def.num_components);
447 } else if (intrin->def.bit_size == 1) {
448 /* Booleans are 32-bit */
449 assert(glsl_type_is_boolean(type));
450 return nir_b2b1(&state->builder,
451 emit_load(state, array_index, var, offset, component,
452 intrin->def.num_components, 32,
453 nir_type_bool32, false));
454 } else {
455 return emit_load(state, array_index, var, offset, component,
456 intrin->def.num_components,
457 intrin->def.bit_size,
458 nir_get_nir_type_for_glsl_type(type), false);
459 }
460 }
461
462 static void
emit_store(struct lower_io_state * state,nir_def * data,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,unsigned num_components,nir_component_mask_t write_mask,nir_alu_type src_type)463 emit_store(struct lower_io_state *state, nir_def *data,
464 nir_def *array_index, nir_variable *var, nir_def *offset,
465 unsigned component, unsigned num_components,
466 nir_component_mask_t write_mask, nir_alu_type src_type)
467 {
468 nir_builder *b = &state->builder;
469
470 assert(var->data.mode == nir_var_shader_out);
471 nir_intrinsic_op op =
472 !array_index ? nir_intrinsic_store_output : var->data.per_primitive ? nir_intrinsic_store_per_primitive_output
473 : nir_intrinsic_store_per_vertex_output;
474
475 nir_intrinsic_instr *store =
476 nir_intrinsic_instr_create(state->builder.shader, op);
477 store->num_components = num_components;
478 store->name = add_variable_name(state, var->name);
479
480 store->src[0] = nir_src_for_ssa(data);
481
482 const struct glsl_type *type = var->type;
483 if (array_index)
484 type = glsl_get_array_element(type);
485 unsigned var_size = state->type_size(type, var->data.bindless);
486 nir_intrinsic_set_base(store, var->data.driver_location);
487 nir_intrinsic_set_range(store, var_size);
488 nir_intrinsic_set_component(store, component);
489 nir_intrinsic_set_src_type(store, src_type);
490
491 nir_intrinsic_set_write_mask(store, write_mask);
492
493 if (nir_intrinsic_has_access(store))
494 nir_intrinsic_set_access(store, var->data.access);
495
496 if (array_index)
497 store->src[1] = nir_src_for_ssa(array_index);
498
499 store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
500
501 unsigned gs_streams = 0;
502 if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
503 if (var->data.stream & NIR_STREAM_PACKED) {
504 gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
505 } else {
506 assert(var->data.stream < 4);
507 gs_streams = 0;
508 for (unsigned i = 0; i < num_components; ++i)
509 gs_streams |= var->data.stream << (2 * i);
510 }
511 }
512
513 nir_io_semantics semantics = { 0 };
514 semantics.location = var->data.location;
515 semantics.num_slots = get_number_of_slots(state, var);
516 semantics.dual_source_blend_index = var->data.index;
517 semantics.gs_streams = gs_streams;
518 semantics.medium_precision = is_medium_precision(b->shader, var);
519 semantics.per_view = var->data.per_view;
520 semantics.invariant = var->data.invariant;
521
522 nir_intrinsic_set_io_semantics(store, semantics);
523
524 nir_builder_instr_insert(b, &store->instr);
525 }
526
527 static void
lower_store(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_def * array_index,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)528 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
529 nir_def *array_index, nir_variable *var, nir_def *offset,
530 unsigned component, const struct glsl_type *type)
531 {
532 const bool lower_double = !glsl_type_is_integer(type) && state->options & nir_lower_io_lower_64bit_float_to_32;
533 if (intrin->src[1].ssa->bit_size == 64 &&
534 (lower_double || (state->options & (nir_lower_io_lower_64bit_to_32 |
535 nir_lower_io_lower_64bit_to_32_new)))) {
536 nir_builder *b = &state->builder;
537
538 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
539
540 assert(component == 0 || component == 2);
541 unsigned src_comp = 0;
542 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
543 while (src_comp < intrin->num_components) {
544 const unsigned num_comps =
545 MIN2(intrin->num_components - src_comp,
546 (4 - component) / 2);
547
548 if (write_mask & BITFIELD_MASK(num_comps)) {
549 nir_def *data =
550 nir_channels(b, intrin->src[1].ssa,
551 BITFIELD_RANGE(src_comp, num_comps));
552 nir_def *data32 = nir_bitcast_vector(b, data, 32);
553
554 uint32_t write_mask32 = 0;
555 for (unsigned i = 0; i < num_comps; i++) {
556 if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
557 write_mask32 |= 3 << (i * 2);
558 }
559
560 emit_store(state, data32, array_index, var, offset,
561 component, data32->num_components, write_mask32,
562 nir_type_uint32);
563 }
564
565 /* Only the first store has a component offset */
566 component = 0;
567 src_comp += num_comps;
568 write_mask >>= num_comps;
569 offset = nir_iadd_imm(b, offset, slot_size);
570 }
571 } else if (intrin->def.bit_size == 1) {
572 /* Booleans are 32-bit */
573 assert(glsl_type_is_boolean(type));
574 nir_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
575 emit_store(state, b32_val, array_index, var, offset,
576 component, intrin->num_components,
577 nir_intrinsic_write_mask(intrin),
578 nir_type_bool32);
579 } else {
580 emit_store(state, intrin->src[1].ssa, array_index, var, offset,
581 component, intrin->num_components,
582 nir_intrinsic_write_mask(intrin),
583 nir_get_nir_type_for_glsl_type(type));
584 }
585 }
586
587 static nir_def *
lower_interpolate_at(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_variable * var,nir_def * offset,unsigned component,const struct glsl_type * type)588 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
589 nir_variable *var, nir_def *offset, unsigned component,
590 const struct glsl_type *type)
591 {
592 nir_builder *b = &state->builder;
593 assert(var->data.mode == nir_var_shader_in);
594
595 /* Ignore interpolateAt() for flat variables - flat is flat. Lower
596 * interpolateAtVertex() for explicit variables.
597 */
598 if (var->data.interpolation == INTERP_MODE_FLAT ||
599 var->data.interpolation == INTERP_MODE_EXPLICIT) {
600 nir_def *vertex_index = NULL;
601
602 if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
603 assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
604 vertex_index = intrin->src[1].ssa;
605 }
606
607 return lower_load(intrin, state, vertex_index, var, offset, component, type);
608 }
609
610 /* None of the supported APIs allow interpolation on 64-bit things */
611 assert(intrin->def.bit_size <= 32);
612
613 nir_intrinsic_op bary_op;
614 switch (intrin->intrinsic) {
615 case nir_intrinsic_interp_deref_at_centroid:
616 bary_op = nir_intrinsic_load_barycentric_centroid;
617 break;
618 case nir_intrinsic_interp_deref_at_sample:
619 bary_op = nir_intrinsic_load_barycentric_at_sample;
620 break;
621 case nir_intrinsic_interp_deref_at_offset:
622 bary_op = nir_intrinsic_load_barycentric_at_offset;
623 break;
624 default:
625 unreachable("Bogus interpolateAt() intrinsic.");
626 }
627
628 nir_intrinsic_instr *bary_setup =
629 nir_intrinsic_instr_create(state->builder.shader, bary_op);
630
631 nir_def_init(&bary_setup->instr, &bary_setup->def, 2, 32);
632 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
633
634 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
635 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
636 intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
637 bary_setup->src[0] = nir_src_for_ssa(intrin->src[1].ssa);
638
639 nir_builder_instr_insert(b, &bary_setup->instr);
640
641 nir_io_semantics semantics = { 0 };
642 semantics.location = var->data.location;
643 semantics.num_slots = get_number_of_slots(state, var);
644 semantics.medium_precision = is_medium_precision(b->shader, var);
645
646 nir_def *load =
647 nir_load_interpolated_input(&state->builder,
648 intrin->def.num_components,
649 intrin->def.bit_size,
650 &bary_setup->def,
651 offset,
652 .base = var->data.driver_location,
653 .component = component,
654 .io_semantics = semantics,
655 .dest_type = nir_type_float | intrin->def.bit_size);
656
657 return load;
658 }
659
660 static bool
nir_lower_io_block(nir_block * block,struct lower_io_state * state)661 nir_lower_io_block(nir_block *block,
662 struct lower_io_state *state)
663 {
664 nir_builder *b = &state->builder;
665 const nir_shader_compiler_options *options = b->shader->options;
666 bool progress = false;
667
668 nir_foreach_instr_safe(instr, block) {
669 if (instr->type != nir_instr_type_intrinsic)
670 continue;
671
672 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
673
674 switch (intrin->intrinsic) {
675 case nir_intrinsic_load_deref:
676 case nir_intrinsic_store_deref:
677 /* We can lower the io for this nir instrinsic */
678 break;
679 case nir_intrinsic_interp_deref_at_centroid:
680 case nir_intrinsic_interp_deref_at_sample:
681 case nir_intrinsic_interp_deref_at_offset:
682 case nir_intrinsic_interp_deref_at_vertex:
683 /* We can optionally lower these to load_interpolated_input */
684 if (options->use_interpolated_input_intrinsics ||
685 options->lower_interpolate_at)
686 break;
687 FALLTHROUGH;
688 default:
689 /* We can't lower the io for this nir instrinsic, so skip it */
690 continue;
691 }
692
693 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
694 if (!nir_deref_mode_is_one_of(deref, state->modes))
695 continue;
696
697 nir_variable *var = nir_deref_instr_get_variable(deref);
698
699 b->cursor = nir_before_instr(instr);
700
701 const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
702
703 nir_def *offset;
704 nir_def *array_index = NULL;
705 unsigned component_offset = var->data.location_frac;
706 bool bindless_type_size = var->data.mode == nir_var_shader_in ||
707 var->data.mode == nir_var_shader_out ||
708 var->data.bindless;
709
710 if (nir_deref_instr_is_known_out_of_bounds(deref)) {
711 /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
712 *
713 * In the subsections described above for array, vector, matrix and
714 * structure accesses, any out-of-bounds access produced undefined
715 * behavior....
716 * Out-of-bounds reads return undefined values, which
717 * include values from other variables of the active program or zero.
718 * Out-of-bounds writes may be discarded or overwrite
719 * other variables of the active program.
720 *
721 * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
722 * for reads.
723 *
724 * Otherwise get_io_offset would return out-of-bound offset which may
725 * result in out-of-bound loading/storing of inputs/outputs,
726 * that could cause issues in drivers down the line.
727 */
728 if (intrin->intrinsic != nir_intrinsic_store_deref) {
729 nir_def *zero =
730 nir_imm_zero(b, intrin->def.num_components,
731 intrin->def.bit_size);
732 nir_def_rewrite_uses(&intrin->def,
733 zero);
734 }
735
736 nir_instr_remove(&intrin->instr);
737 progress = true;
738 continue;
739 }
740
741 offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
742 state->type_size, &component_offset,
743 bindless_type_size);
744
745 nir_def *replacement = NULL;
746
747 switch (intrin->intrinsic) {
748 case nir_intrinsic_load_deref:
749 replacement = lower_load(intrin, state, array_index, var, offset,
750 component_offset, deref->type);
751 break;
752
753 case nir_intrinsic_store_deref:
754 lower_store(intrin, state, array_index, var, offset,
755 component_offset, deref->type);
756 break;
757
758 case nir_intrinsic_interp_deref_at_centroid:
759 case nir_intrinsic_interp_deref_at_sample:
760 case nir_intrinsic_interp_deref_at_offset:
761 case nir_intrinsic_interp_deref_at_vertex:
762 assert(array_index == NULL);
763 replacement = lower_interpolate_at(intrin, state, var, offset,
764 component_offset, deref->type);
765 break;
766
767 default:
768 continue;
769 }
770
771 if (replacement) {
772 nir_def_rewrite_uses(&intrin->def,
773 replacement);
774 }
775 nir_instr_remove(&intrin->instr);
776 progress = true;
777 }
778
779 return progress;
780 }
781
782 static bool
nir_lower_io_impl(nir_function_impl * impl,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)783 nir_lower_io_impl(nir_function_impl *impl,
784 nir_variable_mode modes,
785 int (*type_size)(const struct glsl_type *, bool),
786 nir_lower_io_options options)
787 {
788 struct lower_io_state state;
789 bool progress = false;
790
791 state.builder = nir_builder_create(impl);
792 state.dead_ctx = ralloc_context(NULL);
793 state.modes = modes;
794 state.type_size = type_size;
795 state.options = options;
796 _mesa_set_init(&state.variable_names, state.dead_ctx,
797 _mesa_hash_string, _mesa_key_string_equal);
798
799 ASSERTED nir_variable_mode supported_modes =
800 nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
801 assert(!(modes & ~supported_modes));
802
803 nir_foreach_block(block, impl) {
804 progress |= nir_lower_io_block(block, &state);
805 }
806
807 ralloc_free(state.dead_ctx);
808
809 nir_metadata_preserve(impl, nir_metadata_none);
810
811 return progress;
812 }
813
814 /** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
815 *
816 * This pass is intended to be used for cross-stage shader I/O and driver-
817 * managed uniforms to turn deref-based access into a simpler model using
818 * locations or offsets. For fragment shader inputs, it can optionally turn
819 * load_deref into an explicit interpolation using barycentrics coming from
820 * one of the load_barycentric_* intrinsics. This pass requires that all
821 * deref chains are complete and contain no casts.
822 */
823 bool
nir_lower_io(nir_shader * shader,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)824 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
825 int (*type_size)(const struct glsl_type *, bool),
826 nir_lower_io_options options)
827 {
828 bool progress = false;
829
830 nir_foreach_function_impl(impl, shader) {
831 progress |= nir_lower_io_impl(impl, modes, type_size, options);
832 }
833
834 return progress;
835 }
836
837 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)838 type_scalar_size_bytes(const struct glsl_type *type)
839 {
840 assert(glsl_type_is_vector_or_scalar(type) ||
841 glsl_type_is_matrix(type));
842 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
843 }
844
845 nir_def *
nir_build_addr_iadd(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,nir_def * offset)846 nir_build_addr_iadd(nir_builder *b, nir_def *addr,
847 nir_address_format addr_format,
848 nir_variable_mode modes,
849 nir_def *offset)
850 {
851 assert(offset->num_components == 1);
852
853 switch (addr_format) {
854 case nir_address_format_32bit_global:
855 case nir_address_format_64bit_global:
856 case nir_address_format_32bit_offset:
857 assert(addr->bit_size == offset->bit_size);
858 assert(addr->num_components == 1);
859 return nir_iadd(b, addr, offset);
860
861 case nir_address_format_2x32bit_global: {
862 assert(addr->num_components == 2);
863 nir_def *lo = nir_channel(b, addr, 0);
864 nir_def *hi = nir_channel(b, addr, 1);
865 nir_def *res_lo = nir_iadd(b, lo, offset);
866 nir_def *carry = nir_b2i32(b, nir_ult(b, res_lo, lo));
867 nir_def *res_hi = nir_iadd(b, hi, carry);
868 return nir_vec2(b, res_lo, res_hi);
869 }
870
871 case nir_address_format_32bit_offset_as_64bit:
872 assert(addr->num_components == 1);
873 assert(offset->bit_size == 32);
874 return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
875
876 case nir_address_format_64bit_global_32bit_offset:
877 case nir_address_format_64bit_bounded_global:
878 assert(addr->num_components == 4);
879 assert(addr->bit_size == offset->bit_size);
880 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3);
881
882 case nir_address_format_32bit_index_offset:
883 assert(addr->num_components == 2);
884 assert(addr->bit_size == offset->bit_size);
885 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1);
886
887 case nir_address_format_32bit_index_offset_pack64:
888 assert(addr->num_components == 1);
889 assert(offset->bit_size == 32);
890 return nir_pack_64_2x32_split(b,
891 nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
892 nir_unpack_64_2x32_split_y(b, addr));
893
894 case nir_address_format_vec2_index_32bit_offset:
895 assert(addr->num_components == 3);
896 assert(offset->bit_size == 32);
897 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2);
898
899 case nir_address_format_62bit_generic:
900 assert(addr->num_components == 1);
901 assert(addr->bit_size == 64);
902 assert(offset->bit_size == 64);
903 if (!(modes & ~(nir_var_function_temp |
904 nir_var_shader_temp |
905 nir_var_mem_shared))) {
906 /* If we're sure it's one of these modes, we can do an easy 32-bit
907 * addition and don't need to bother with 64-bit math.
908 */
909 nir_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
910 nir_def *type = nir_unpack_64_2x32_split_y(b, addr);
911 addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
912 return nir_pack_64_2x32_split(b, addr32, type);
913 } else {
914 return nir_iadd(b, addr, offset);
915 }
916
917 case nir_address_format_logical:
918 unreachable("Unsupported address format");
919 }
920 unreachable("Invalid address format");
921 }
922
923 static unsigned
addr_get_offset_bit_size(nir_def * addr,nir_address_format addr_format)924 addr_get_offset_bit_size(nir_def *addr, nir_address_format addr_format)
925 {
926 if (addr_format == nir_address_format_32bit_offset_as_64bit ||
927 addr_format == nir_address_format_32bit_index_offset_pack64)
928 return 32;
929 return addr->bit_size;
930 }
931
932 nir_def *
nir_build_addr_iadd_imm(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,int64_t offset)933 nir_build_addr_iadd_imm(nir_builder *b, nir_def *addr,
934 nir_address_format addr_format,
935 nir_variable_mode modes,
936 int64_t offset)
937 {
938 if (!offset)
939 return addr;
940
941 return nir_build_addr_iadd(
942 b, addr, addr_format, modes,
943 nir_imm_intN_t(b, offset,
944 addr_get_offset_bit_size(addr, addr_format)));
945 }
946
947 static nir_def *
build_addr_for_var(nir_builder * b,nir_variable * var,nir_address_format addr_format)948 build_addr_for_var(nir_builder *b, nir_variable *var,
949 nir_address_format addr_format)
950 {
951 assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
952 nir_var_mem_task_payload |
953 nir_var_mem_global |
954 nir_var_shader_temp | nir_var_function_temp |
955 nir_var_mem_push_const | nir_var_mem_constant));
956
957 const unsigned num_comps = nir_address_format_num_components(addr_format);
958 const unsigned bit_size = nir_address_format_bit_size(addr_format);
959
960 switch (addr_format) {
961 case nir_address_format_2x32bit_global:
962 case nir_address_format_32bit_global:
963 case nir_address_format_64bit_global: {
964 nir_def *base_addr;
965 switch (var->data.mode) {
966 case nir_var_shader_temp:
967 base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
968 break;
969
970 case nir_var_function_temp:
971 base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
972 break;
973
974 case nir_var_mem_constant:
975 base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
976 break;
977
978 case nir_var_mem_shared:
979 base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
980 break;
981
982 case nir_var_mem_global:
983 base_addr = nir_load_global_base_ptr(b, num_comps, bit_size);
984 break;
985
986 default:
987 unreachable("Unsupported variable mode");
988 }
989
990 return nir_build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
991 var->data.driver_location);
992 }
993
994 case nir_address_format_32bit_offset:
995 assert(var->data.driver_location <= UINT32_MAX);
996 return nir_imm_int(b, var->data.driver_location);
997
998 case nir_address_format_32bit_offset_as_64bit:
999 assert(var->data.driver_location <= UINT32_MAX);
1000 return nir_imm_int64(b, var->data.driver_location);
1001
1002 case nir_address_format_62bit_generic:
1003 switch (var->data.mode) {
1004 case nir_var_shader_temp:
1005 case nir_var_function_temp:
1006 assert(var->data.driver_location <= UINT32_MAX);
1007 return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
1008
1009 case nir_var_mem_shared:
1010 assert(var->data.driver_location <= UINT32_MAX);
1011 return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
1012
1013 case nir_var_mem_global:
1014 return nir_iadd_imm(b, nir_load_global_base_ptr(b, num_comps, bit_size),
1015 var->data.driver_location);
1016
1017 default:
1018 unreachable("Unsupported variable mode");
1019 }
1020
1021 default:
1022 unreachable("Unsupported address format");
1023 }
1024 }
1025
1026 static nir_def *
build_runtime_addr_mode_check(nir_builder * b,nir_def * addr,nir_address_format addr_format,nir_variable_mode mode)1027 build_runtime_addr_mode_check(nir_builder *b, nir_def *addr,
1028 nir_address_format addr_format,
1029 nir_variable_mode mode)
1030 {
1031 /* The compile-time check failed; do a run-time check */
1032 switch (addr_format) {
1033 case nir_address_format_62bit_generic: {
1034 assert(addr->num_components == 1);
1035 assert(addr->bit_size == 64);
1036 nir_def *mode_enum = nir_ushr_imm(b, addr, 62);
1037 switch (mode) {
1038 case nir_var_function_temp:
1039 case nir_var_shader_temp:
1040 return nir_ieq_imm(b, mode_enum, 0x2);
1041
1042 case nir_var_mem_shared:
1043 return nir_ieq_imm(b, mode_enum, 0x1);
1044
1045 case nir_var_mem_global:
1046 return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
1047 nir_ieq_imm(b, mode_enum, 0x3));
1048
1049 default:
1050 unreachable("Invalid mode check intrinsic");
1051 }
1052 }
1053
1054 default:
1055 unreachable("Unsupported address mode");
1056 }
1057 }
1058
1059 unsigned
nir_address_format_bit_size(nir_address_format addr_format)1060 nir_address_format_bit_size(nir_address_format addr_format)
1061 {
1062 switch (addr_format) {
1063 case nir_address_format_32bit_global:
1064 return 32;
1065 case nir_address_format_2x32bit_global:
1066 return 32;
1067 case nir_address_format_64bit_global:
1068 return 64;
1069 case nir_address_format_64bit_global_32bit_offset:
1070 return 32;
1071 case nir_address_format_64bit_bounded_global:
1072 return 32;
1073 case nir_address_format_32bit_index_offset:
1074 return 32;
1075 case nir_address_format_32bit_index_offset_pack64:
1076 return 64;
1077 case nir_address_format_vec2_index_32bit_offset:
1078 return 32;
1079 case nir_address_format_62bit_generic:
1080 return 64;
1081 case nir_address_format_32bit_offset:
1082 return 32;
1083 case nir_address_format_32bit_offset_as_64bit:
1084 return 64;
1085 case nir_address_format_logical:
1086 return 32;
1087 }
1088 unreachable("Invalid address format");
1089 }
1090
1091 unsigned
nir_address_format_num_components(nir_address_format addr_format)1092 nir_address_format_num_components(nir_address_format addr_format)
1093 {
1094 switch (addr_format) {
1095 case nir_address_format_32bit_global:
1096 return 1;
1097 case nir_address_format_2x32bit_global:
1098 return 2;
1099 case nir_address_format_64bit_global:
1100 return 1;
1101 case nir_address_format_64bit_global_32bit_offset:
1102 return 4;
1103 case nir_address_format_64bit_bounded_global:
1104 return 4;
1105 case nir_address_format_32bit_index_offset:
1106 return 2;
1107 case nir_address_format_32bit_index_offset_pack64:
1108 return 1;
1109 case nir_address_format_vec2_index_32bit_offset:
1110 return 3;
1111 case nir_address_format_62bit_generic:
1112 return 1;
1113 case nir_address_format_32bit_offset:
1114 return 1;
1115 case nir_address_format_32bit_offset_as_64bit:
1116 return 1;
1117 case nir_address_format_logical:
1118 return 1;
1119 }
1120 unreachable("Invalid address format");
1121 }
1122
1123 static nir_def *
addr_to_index(nir_builder * b,nir_def * addr,nir_address_format addr_format)1124 addr_to_index(nir_builder *b, nir_def *addr,
1125 nir_address_format addr_format)
1126 {
1127 switch (addr_format) {
1128 case nir_address_format_32bit_index_offset:
1129 assert(addr->num_components == 2);
1130 return nir_channel(b, addr, 0);
1131 case nir_address_format_32bit_index_offset_pack64:
1132 return nir_unpack_64_2x32_split_y(b, addr);
1133 case nir_address_format_vec2_index_32bit_offset:
1134 assert(addr->num_components == 3);
1135 return nir_trim_vector(b, addr, 2);
1136 default:
1137 unreachable("Invalid address format");
1138 }
1139 }
1140
1141 static nir_def *
addr_to_offset(nir_builder * b,nir_def * addr,nir_address_format addr_format)1142 addr_to_offset(nir_builder *b, nir_def *addr,
1143 nir_address_format addr_format)
1144 {
1145 switch (addr_format) {
1146 case nir_address_format_32bit_index_offset:
1147 assert(addr->num_components == 2);
1148 return nir_channel(b, addr, 1);
1149 case nir_address_format_32bit_index_offset_pack64:
1150 return nir_unpack_64_2x32_split_x(b, addr);
1151 case nir_address_format_vec2_index_32bit_offset:
1152 assert(addr->num_components == 3);
1153 return nir_channel(b, addr, 2);
1154 case nir_address_format_32bit_offset:
1155 return addr;
1156 case nir_address_format_32bit_offset_as_64bit:
1157 case nir_address_format_62bit_generic:
1158 return nir_u2u32(b, addr);
1159 default:
1160 unreachable("Invalid address format");
1161 }
1162 }
1163
1164 /** Returns true if the given address format resolves to a global address */
1165 static bool
addr_format_is_global(nir_address_format addr_format,nir_variable_mode mode)1166 addr_format_is_global(nir_address_format addr_format,
1167 nir_variable_mode mode)
1168 {
1169 if (addr_format == nir_address_format_62bit_generic)
1170 return mode == nir_var_mem_global;
1171
1172 return addr_format == nir_address_format_32bit_global ||
1173 addr_format == nir_address_format_2x32bit_global ||
1174 addr_format == nir_address_format_64bit_global ||
1175 addr_format == nir_address_format_64bit_global_32bit_offset ||
1176 addr_format == nir_address_format_64bit_bounded_global;
1177 }
1178
1179 static bool
addr_format_is_offset(nir_address_format addr_format,nir_variable_mode mode)1180 addr_format_is_offset(nir_address_format addr_format,
1181 nir_variable_mode mode)
1182 {
1183 if (addr_format == nir_address_format_62bit_generic)
1184 return mode != nir_var_mem_global;
1185
1186 return addr_format == nir_address_format_32bit_offset ||
1187 addr_format == nir_address_format_32bit_offset_as_64bit;
1188 }
1189
1190 static nir_def *
addr_to_global(nir_builder * b,nir_def * addr,nir_address_format addr_format)1191 addr_to_global(nir_builder *b, nir_def *addr,
1192 nir_address_format addr_format)
1193 {
1194 switch (addr_format) {
1195 case nir_address_format_32bit_global:
1196 case nir_address_format_64bit_global:
1197 case nir_address_format_62bit_generic:
1198 assert(addr->num_components == 1);
1199 return addr;
1200
1201 case nir_address_format_2x32bit_global:
1202 assert(addr->num_components == 2);
1203 return addr;
1204
1205 case nir_address_format_64bit_global_32bit_offset:
1206 case nir_address_format_64bit_bounded_global:
1207 assert(addr->num_components == 4);
1208 return nir_iadd(b, nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)),
1209 nir_u2u64(b, nir_channel(b, addr, 3)));
1210
1211 case nir_address_format_32bit_index_offset:
1212 case nir_address_format_32bit_index_offset_pack64:
1213 case nir_address_format_vec2_index_32bit_offset:
1214 case nir_address_format_32bit_offset:
1215 case nir_address_format_32bit_offset_as_64bit:
1216 case nir_address_format_logical:
1217 unreachable("Cannot get a 64-bit address with this address format");
1218 }
1219
1220 unreachable("Invalid address format");
1221 }
1222
1223 static bool
addr_format_needs_bounds_check(nir_address_format addr_format)1224 addr_format_needs_bounds_check(nir_address_format addr_format)
1225 {
1226 return addr_format == nir_address_format_64bit_bounded_global;
1227 }
1228
1229 static nir_def *
addr_is_in_bounds(nir_builder * b,nir_def * addr,nir_address_format addr_format,unsigned size)1230 addr_is_in_bounds(nir_builder *b, nir_def *addr,
1231 nir_address_format addr_format, unsigned size)
1232 {
1233 assert(addr_format == nir_address_format_64bit_bounded_global);
1234 assert(addr->num_components == 4);
1235 assert(size > 0);
1236 return nir_ult(b, nir_iadd_imm(b, nir_channel(b, addr, 3), size - 1),
1237 nir_channel(b, addr, 2));
1238 }
1239
1240 static void
nir_get_explicit_deref_range(nir_deref_instr * deref,nir_address_format addr_format,uint32_t * out_base,uint32_t * out_range)1241 nir_get_explicit_deref_range(nir_deref_instr *deref,
1242 nir_address_format addr_format,
1243 uint32_t *out_base,
1244 uint32_t *out_range)
1245 {
1246 uint32_t base = 0;
1247 uint32_t range = glsl_get_explicit_size(deref->type, false);
1248
1249 while (true) {
1250 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1251
1252 switch (deref->deref_type) {
1253 case nir_deref_type_array:
1254 case nir_deref_type_array_wildcard:
1255 case nir_deref_type_ptr_as_array: {
1256 const unsigned stride = nir_deref_instr_array_stride(deref);
1257 if (stride == 0)
1258 goto fail;
1259
1260 if (!parent)
1261 goto fail;
1262
1263 if (deref->deref_type != nir_deref_type_array_wildcard &&
1264 nir_src_is_const(deref->arr.index)) {
1265 base += stride * nir_src_as_uint(deref->arr.index);
1266 } else {
1267 if (glsl_get_length(parent->type) == 0)
1268 goto fail;
1269 range += stride * (glsl_get_length(parent->type) - 1);
1270 }
1271 break;
1272 }
1273
1274 case nir_deref_type_struct: {
1275 if (!parent)
1276 goto fail;
1277
1278 base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
1279 break;
1280 }
1281
1282 case nir_deref_type_cast: {
1283 nir_instr *parent_instr = deref->parent.ssa->parent_instr;
1284
1285 switch (parent_instr->type) {
1286 case nir_instr_type_load_const: {
1287 nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
1288
1289 switch (addr_format) {
1290 case nir_address_format_32bit_offset:
1291 base += load->value[1].u32;
1292 break;
1293 case nir_address_format_32bit_index_offset:
1294 base += load->value[1].u32;
1295 break;
1296 case nir_address_format_vec2_index_32bit_offset:
1297 base += load->value[2].u32;
1298 break;
1299 default:
1300 goto fail;
1301 }
1302
1303 *out_base = base;
1304 *out_range = range;
1305 return;
1306 }
1307
1308 case nir_instr_type_intrinsic: {
1309 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
1310 switch (intr->intrinsic) {
1311 case nir_intrinsic_load_vulkan_descriptor:
1312 /* Assume that a load_vulkan_descriptor won't contribute to an
1313 * offset within the resource.
1314 */
1315 break;
1316 default:
1317 goto fail;
1318 }
1319
1320 *out_base = base;
1321 *out_range = range;
1322 return;
1323 }
1324
1325 default:
1326 goto fail;
1327 }
1328 }
1329
1330 default:
1331 goto fail;
1332 }
1333
1334 deref = parent;
1335 }
1336
1337 fail:
1338 *out_base = 0;
1339 *out_range = ~0;
1340 }
1341
1342 static nir_variable_mode
canonicalize_generic_modes(nir_variable_mode modes)1343 canonicalize_generic_modes(nir_variable_mode modes)
1344 {
1345 assert(modes != 0);
1346 if (util_bitcount(modes) == 1)
1347 return modes;
1348
1349 assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
1350 nir_var_mem_shared | nir_var_mem_global)));
1351
1352 /* Canonicalize by converting shader_temp to function_temp */
1353 if (modes & nir_var_shader_temp) {
1354 modes &= ~nir_var_shader_temp;
1355 modes |= nir_var_function_temp;
1356 }
1357
1358 return modes;
1359 }
1360
1361 static nir_intrinsic_op
get_store_global_op_from_addr_format(nir_address_format addr_format)1362 get_store_global_op_from_addr_format(nir_address_format addr_format)
1363 {
1364 if (addr_format != nir_address_format_2x32bit_global)
1365 return nir_intrinsic_store_global;
1366 else
1367 return nir_intrinsic_store_global_2x32;
1368 }
1369
1370 static nir_intrinsic_op
get_load_global_op_from_addr_format(nir_address_format addr_format)1371 get_load_global_op_from_addr_format(nir_address_format addr_format)
1372 {
1373 if (addr_format != nir_address_format_2x32bit_global)
1374 return nir_intrinsic_load_global;
1375 else
1376 return nir_intrinsic_load_global_2x32;
1377 }
1378
1379 static nir_intrinsic_op
get_load_global_constant_op_from_addr_format(nir_address_format addr_format)1380 get_load_global_constant_op_from_addr_format(nir_address_format addr_format)
1381 {
1382 if (addr_format != nir_address_format_2x32bit_global)
1383 return nir_intrinsic_load_global_constant;
1384 else
1385 return nir_intrinsic_load_global_2x32; /* no dedicated op, fallback */
1386 }
1387
1388 static nir_def *
build_explicit_io_load(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,unsigned num_components)1389 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
1390 nir_def *addr, nir_address_format addr_format,
1391 nir_variable_mode modes,
1392 uint32_t align_mul, uint32_t align_offset,
1393 unsigned num_components)
1394 {
1395 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1396 modes = canonicalize_generic_modes(modes);
1397
1398 if (util_bitcount(modes) > 1) {
1399 if (addr_format_is_global(addr_format, modes)) {
1400 return build_explicit_io_load(b, intrin, addr, addr_format,
1401 nir_var_mem_global,
1402 align_mul, align_offset,
1403 num_components);
1404 } else if (modes & nir_var_function_temp) {
1405 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1406 nir_var_function_temp));
1407 nir_def *res1 =
1408 build_explicit_io_load(b, intrin, addr, addr_format,
1409 nir_var_function_temp,
1410 align_mul, align_offset,
1411 num_components);
1412 nir_push_else(b, NULL);
1413 nir_def *res2 =
1414 build_explicit_io_load(b, intrin, addr, addr_format,
1415 modes & ~nir_var_function_temp,
1416 align_mul, align_offset,
1417 num_components);
1418 nir_pop_if(b, NULL);
1419 return nir_if_phi(b, res1, res2);
1420 } else {
1421 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1422 nir_var_mem_shared));
1423 assert(modes & nir_var_mem_shared);
1424 nir_def *res1 =
1425 build_explicit_io_load(b, intrin, addr, addr_format,
1426 nir_var_mem_shared,
1427 align_mul, align_offset,
1428 num_components);
1429 nir_push_else(b, NULL);
1430 assert(modes & nir_var_mem_global);
1431 nir_def *res2 =
1432 build_explicit_io_load(b, intrin, addr, addr_format,
1433 nir_var_mem_global,
1434 align_mul, align_offset,
1435 num_components);
1436 nir_pop_if(b, NULL);
1437 return nir_if_phi(b, res1, res2);
1438 }
1439 }
1440
1441 assert(util_bitcount(modes) == 1);
1442 const nir_variable_mode mode = modes;
1443
1444 nir_intrinsic_op op;
1445 switch (intrin->intrinsic) {
1446 case nir_intrinsic_load_deref:
1447 switch (mode) {
1448 case nir_var_mem_ubo:
1449 if (addr_format == nir_address_format_64bit_global_32bit_offset)
1450 op = nir_intrinsic_load_global_constant_offset;
1451 else if (addr_format == nir_address_format_64bit_bounded_global)
1452 op = nir_intrinsic_load_global_constant_bounded;
1453 else if (addr_format_is_global(addr_format, mode))
1454 op = nir_intrinsic_load_global_constant;
1455 else
1456 op = nir_intrinsic_load_ubo;
1457 break;
1458 case nir_var_mem_ssbo:
1459 if (addr_format_is_global(addr_format, mode))
1460 op = nir_intrinsic_load_global;
1461 else
1462 op = nir_intrinsic_load_ssbo;
1463 break;
1464 case nir_var_mem_global:
1465 assert(addr_format_is_global(addr_format, mode));
1466 op = get_load_global_op_from_addr_format(addr_format);
1467 break;
1468 case nir_var_uniform:
1469 assert(addr_format_is_offset(addr_format, mode));
1470 assert(b->shader->info.stage == MESA_SHADER_KERNEL);
1471 op = nir_intrinsic_load_kernel_input;
1472 break;
1473 case nir_var_mem_shared:
1474 assert(addr_format_is_offset(addr_format, mode));
1475 op = nir_intrinsic_load_shared;
1476 break;
1477 case nir_var_mem_task_payload:
1478 assert(addr_format_is_offset(addr_format, mode));
1479 op = nir_intrinsic_load_task_payload;
1480 break;
1481 case nir_var_shader_temp:
1482 case nir_var_function_temp:
1483 if (addr_format_is_offset(addr_format, mode)) {
1484 op = nir_intrinsic_load_scratch;
1485 } else {
1486 assert(addr_format_is_global(addr_format, mode));
1487 op = get_load_global_op_from_addr_format(addr_format);
1488 }
1489 break;
1490 case nir_var_mem_push_const:
1491 assert(addr_format == nir_address_format_32bit_offset);
1492 op = nir_intrinsic_load_push_constant;
1493 break;
1494 case nir_var_mem_constant:
1495 if (addr_format_is_offset(addr_format, mode)) {
1496 op = nir_intrinsic_load_constant;
1497 } else {
1498 assert(addr_format_is_global(addr_format, mode));
1499 op = get_load_global_constant_op_from_addr_format(addr_format);
1500 }
1501 break;
1502 default:
1503 unreachable("Unsupported explicit IO variable mode");
1504 }
1505 break;
1506
1507 case nir_intrinsic_load_deref_block_intel:
1508 switch (mode) {
1509 case nir_var_mem_ssbo:
1510 if (addr_format_is_global(addr_format, mode))
1511 op = nir_intrinsic_load_global_block_intel;
1512 else
1513 op = nir_intrinsic_load_ssbo_block_intel;
1514 break;
1515 case nir_var_mem_global:
1516 op = nir_intrinsic_load_global_block_intel;
1517 break;
1518 case nir_var_mem_shared:
1519 op = nir_intrinsic_load_shared_block_intel;
1520 break;
1521 default:
1522 unreachable("Unsupported explicit IO variable mode");
1523 }
1524 break;
1525
1526 default:
1527 unreachable("Invalid intrinsic");
1528 }
1529
1530 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
1531
1532 if (op == nir_intrinsic_load_global_constant_offset) {
1533 assert(addr_format == nir_address_format_64bit_global_32bit_offset);
1534 load->src[0] = nir_src_for_ssa(
1535 nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
1536 load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1537 } else if (op == nir_intrinsic_load_global_constant_bounded) {
1538 assert(addr_format == nir_address_format_64bit_bounded_global);
1539 load->src[0] = nir_src_for_ssa(
1540 nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)));
1541 load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1542 load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2));
1543 } else if (addr_format_is_global(addr_format, mode)) {
1544 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1545 } else if (addr_format_is_offset(addr_format, mode)) {
1546 assert(addr->num_components == 1);
1547 load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1548 } else {
1549 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1550 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1551 }
1552
1553 if (nir_intrinsic_has_access(load))
1554 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
1555
1556 if (op == nir_intrinsic_load_constant) {
1557 nir_intrinsic_set_base(load, 0);
1558 nir_intrinsic_set_range(load, b->shader->constant_data_size);
1559 } else if (op == nir_intrinsic_load_kernel_input) {
1560 nir_intrinsic_set_base(load, 0);
1561 nir_intrinsic_set_range(load, b->shader->num_uniforms);
1562 } else if (mode == nir_var_mem_push_const) {
1563 /* Push constants are required to be able to be chased back to the
1564 * variable so we can provide a base/range.
1565 */
1566 nir_variable *var = nir_deref_instr_get_variable(deref);
1567 nir_intrinsic_set_base(load, 0);
1568 nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
1569 }
1570
1571 unsigned bit_size = intrin->def.bit_size;
1572 if (bit_size == 1) {
1573 /* TODO: Make the native bool bit_size an option. */
1574 bit_size = 32;
1575 }
1576
1577 if (nir_intrinsic_has_align(load))
1578 nir_intrinsic_set_align(load, align_mul, align_offset);
1579
1580 if (nir_intrinsic_has_range_base(load)) {
1581 unsigned base, range;
1582 nir_get_explicit_deref_range(deref, addr_format, &base, &range);
1583 nir_intrinsic_set_range_base(load, base);
1584 nir_intrinsic_set_range(load, range);
1585 }
1586
1587 load->num_components = num_components;
1588 nir_def_init(&load->instr, &load->def, num_components, bit_size);
1589
1590 assert(bit_size % 8 == 0);
1591
1592 nir_def *result;
1593 if (addr_format_needs_bounds_check(addr_format) &&
1594 op != nir_intrinsic_load_global_constant_bounded) {
1595 /* We don't need to bounds-check global_constant_bounded because bounds
1596 * checking is handled by the intrinsic itself.
1597 *
1598 * The Vulkan spec for robustBufferAccess gives us quite a few options
1599 * as to what we can do with an OOB read. Unfortunately, returning
1600 * undefined values isn't one of them so we return an actual zero.
1601 */
1602 nir_def *zero = nir_imm_zero(b, load->num_components, bit_size);
1603
1604 /* TODO: Better handle block_intel. */
1605 assert(load->num_components == 1);
1606 const unsigned load_size = bit_size / 8;
1607 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
1608
1609 nir_builder_instr_insert(b, &load->instr);
1610
1611 nir_pop_if(b, NULL);
1612
1613 result = nir_if_phi(b, &load->def, zero);
1614 } else {
1615 nir_builder_instr_insert(b, &load->instr);
1616 result = &load->def;
1617 }
1618
1619 if (intrin->def.bit_size == 1) {
1620 /* For shared, we can go ahead and use NIR's and/or the back-end's
1621 * standard encoding for booleans rather than forcing a 0/1 boolean.
1622 * This should save an instruction or two.
1623 */
1624 if (mode == nir_var_mem_shared ||
1625 mode == nir_var_shader_temp ||
1626 mode == nir_var_function_temp)
1627 result = nir_b2b1(b, result);
1628 else
1629 result = nir_i2b(b, result);
1630 }
1631
1632 return result;
1633 }
1634
1635 static void
build_explicit_io_store(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,nir_def * value,nir_component_mask_t write_mask)1636 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
1637 nir_def *addr, nir_address_format addr_format,
1638 nir_variable_mode modes,
1639 uint32_t align_mul, uint32_t align_offset,
1640 nir_def *value, nir_component_mask_t write_mask)
1641 {
1642 modes = canonicalize_generic_modes(modes);
1643
1644 if (util_bitcount(modes) > 1) {
1645 if (addr_format_is_global(addr_format, modes)) {
1646 build_explicit_io_store(b, intrin, addr, addr_format,
1647 nir_var_mem_global,
1648 align_mul, align_offset,
1649 value, write_mask);
1650 } else if (modes & nir_var_function_temp) {
1651 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1652 nir_var_function_temp));
1653 build_explicit_io_store(b, intrin, addr, addr_format,
1654 nir_var_function_temp,
1655 align_mul, align_offset,
1656 value, write_mask);
1657 nir_push_else(b, NULL);
1658 build_explicit_io_store(b, intrin, addr, addr_format,
1659 modes & ~nir_var_function_temp,
1660 align_mul, align_offset,
1661 value, write_mask);
1662 nir_pop_if(b, NULL);
1663 } else {
1664 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1665 nir_var_mem_shared));
1666 assert(modes & nir_var_mem_shared);
1667 build_explicit_io_store(b, intrin, addr, addr_format,
1668 nir_var_mem_shared,
1669 align_mul, align_offset,
1670 value, write_mask);
1671 nir_push_else(b, NULL);
1672 assert(modes & nir_var_mem_global);
1673 build_explicit_io_store(b, intrin, addr, addr_format,
1674 nir_var_mem_global,
1675 align_mul, align_offset,
1676 value, write_mask);
1677 nir_pop_if(b, NULL);
1678 }
1679 return;
1680 }
1681
1682 assert(util_bitcount(modes) == 1);
1683 const nir_variable_mode mode = modes;
1684
1685 nir_intrinsic_op op;
1686 switch (intrin->intrinsic) {
1687 case nir_intrinsic_store_deref:
1688 assert(write_mask != 0);
1689
1690 switch (mode) {
1691 case nir_var_mem_ssbo:
1692 if (addr_format_is_global(addr_format, mode))
1693 op = get_store_global_op_from_addr_format(addr_format);
1694 else
1695 op = nir_intrinsic_store_ssbo;
1696 break;
1697 case nir_var_mem_global:
1698 assert(addr_format_is_global(addr_format, mode));
1699 op = get_store_global_op_from_addr_format(addr_format);
1700 break;
1701 case nir_var_mem_shared:
1702 assert(addr_format_is_offset(addr_format, mode));
1703 op = nir_intrinsic_store_shared;
1704 break;
1705 case nir_var_mem_task_payload:
1706 assert(addr_format_is_offset(addr_format, mode));
1707 op = nir_intrinsic_store_task_payload;
1708 break;
1709 case nir_var_shader_temp:
1710 case nir_var_function_temp:
1711 if (addr_format_is_offset(addr_format, mode)) {
1712 op = nir_intrinsic_store_scratch;
1713 } else {
1714 assert(addr_format_is_global(addr_format, mode));
1715 op = get_store_global_op_from_addr_format(addr_format);
1716 }
1717 break;
1718 default:
1719 unreachable("Unsupported explicit IO variable mode");
1720 }
1721 break;
1722
1723 case nir_intrinsic_store_deref_block_intel:
1724 assert(write_mask == 0);
1725
1726 switch (mode) {
1727 case nir_var_mem_ssbo:
1728 if (addr_format_is_global(addr_format, mode))
1729 op = nir_intrinsic_store_global_block_intel;
1730 else
1731 op = nir_intrinsic_store_ssbo_block_intel;
1732 break;
1733 case nir_var_mem_global:
1734 op = nir_intrinsic_store_global_block_intel;
1735 break;
1736 case nir_var_mem_shared:
1737 op = nir_intrinsic_store_shared_block_intel;
1738 break;
1739 default:
1740 unreachable("Unsupported explicit IO variable mode");
1741 }
1742 break;
1743
1744 default:
1745 unreachable("Invalid intrinsic");
1746 }
1747
1748 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
1749
1750 if (value->bit_size == 1) {
1751 /* For shared, we can go ahead and use NIR's and/or the back-end's
1752 * standard encoding for booleans rather than forcing a 0/1 boolean.
1753 * This should save an instruction or two.
1754 *
1755 * TODO: Make the native bool bit_size an option.
1756 */
1757 if (mode == nir_var_mem_shared ||
1758 mode == nir_var_shader_temp ||
1759 mode == nir_var_function_temp)
1760 value = nir_b2b32(b, value);
1761 else
1762 value = nir_b2iN(b, value, 32);
1763 }
1764
1765 store->src[0] = nir_src_for_ssa(value);
1766 if (addr_format_is_global(addr_format, mode)) {
1767 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1768 } else if (addr_format_is_offset(addr_format, mode)) {
1769 assert(addr->num_components == 1);
1770 store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1771 } else {
1772 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1773 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1774 }
1775
1776 nir_intrinsic_set_write_mask(store, write_mask);
1777
1778 if (nir_intrinsic_has_access(store))
1779 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1780
1781 nir_intrinsic_set_align(store, align_mul, align_offset);
1782
1783 assert(value->num_components == 1 ||
1784 value->num_components == intrin->num_components);
1785 store->num_components = value->num_components;
1786
1787 assert(value->bit_size % 8 == 0);
1788
1789 if (addr_format_needs_bounds_check(addr_format)) {
1790 /* TODO: Better handle block_intel. */
1791 assert(store->num_components == 1);
1792 const unsigned store_size = value->bit_size / 8;
1793 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1794
1795 nir_builder_instr_insert(b, &store->instr);
1796
1797 nir_pop_if(b, NULL);
1798 } else {
1799 nir_builder_instr_insert(b, &store->instr);
1800 }
1801 }
1802
1803 static nir_def *
build_explicit_io_atomic(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format,nir_variable_mode modes)1804 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1805 nir_def *addr, nir_address_format addr_format,
1806 nir_variable_mode modes)
1807 {
1808 modes = canonicalize_generic_modes(modes);
1809
1810 if (util_bitcount(modes) > 1) {
1811 if (addr_format_is_global(addr_format, modes)) {
1812 return build_explicit_io_atomic(b, intrin, addr, addr_format,
1813 nir_var_mem_global);
1814 } else if (modes & nir_var_function_temp) {
1815 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1816 nir_var_function_temp));
1817 nir_def *res1 =
1818 build_explicit_io_atomic(b, intrin, addr, addr_format,
1819 nir_var_function_temp);
1820 nir_push_else(b, NULL);
1821 nir_def *res2 =
1822 build_explicit_io_atomic(b, intrin, addr, addr_format,
1823 modes & ~nir_var_function_temp);
1824 nir_pop_if(b, NULL);
1825 return nir_if_phi(b, res1, res2);
1826 } else {
1827 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1828 nir_var_mem_shared));
1829 assert(modes & nir_var_mem_shared);
1830 nir_def *res1 =
1831 build_explicit_io_atomic(b, intrin, addr, addr_format,
1832 nir_var_mem_shared);
1833 nir_push_else(b, NULL);
1834 assert(modes & nir_var_mem_global);
1835 nir_def *res2 =
1836 build_explicit_io_atomic(b, intrin, addr, addr_format,
1837 nir_var_mem_global);
1838 nir_pop_if(b, NULL);
1839 return nir_if_phi(b, res1, res2);
1840 }
1841 }
1842
1843 assert(util_bitcount(modes) == 1);
1844 const nir_variable_mode mode = modes;
1845
1846 const unsigned num_data_srcs =
1847 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1848
1849 nir_intrinsic_op op;
1850 switch (mode) {
1851 case nir_var_mem_ssbo:
1852 if (addr_format_is_global(addr_format, mode))
1853 op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1854 else
1855 op = ssbo_atomic_for_deref(intrin->intrinsic);
1856 break;
1857 case nir_var_mem_global:
1858 assert(addr_format_is_global(addr_format, mode));
1859 op = global_atomic_for_deref(addr_format, intrin->intrinsic);
1860 break;
1861 case nir_var_mem_shared:
1862 assert(addr_format_is_offset(addr_format, mode));
1863 op = shared_atomic_for_deref(intrin->intrinsic);
1864 break;
1865 case nir_var_mem_task_payload:
1866 assert(addr_format_is_offset(addr_format, mode));
1867 op = task_payload_atomic_for_deref(intrin->intrinsic);
1868 break;
1869 default:
1870 unreachable("Unsupported explicit IO variable mode");
1871 }
1872
1873 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1874 nir_intrinsic_set_atomic_op(atomic, nir_intrinsic_atomic_op(intrin));
1875
1876 unsigned src = 0;
1877 if (addr_format_is_global(addr_format, mode)) {
1878 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1879 } else if (addr_format_is_offset(addr_format, mode)) {
1880 assert(addr->num_components == 1);
1881 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1882 } else {
1883 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1884 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1885 }
1886 for (unsigned i = 0; i < num_data_srcs; i++) {
1887 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1888 }
1889
1890 /* Global atomics don't have access flags because they assume that the
1891 * address may be non-uniform.
1892 */
1893 if (nir_intrinsic_has_access(atomic))
1894 nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1895
1896 assert(intrin->def.num_components == 1);
1897 nir_def_init(&atomic->instr, &atomic->def, 1,
1898 intrin->def.bit_size);
1899
1900 assert(atomic->def.bit_size % 8 == 0);
1901
1902 if (addr_format_needs_bounds_check(addr_format)) {
1903 const unsigned atomic_size = atomic->def.bit_size / 8;
1904 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1905
1906 nir_builder_instr_insert(b, &atomic->instr);
1907
1908 nir_pop_if(b, NULL);
1909 return nir_if_phi(b, &atomic->def,
1910 nir_undef(b, 1, atomic->def.bit_size));
1911 } else {
1912 nir_builder_instr_insert(b, &atomic->instr);
1913 return &atomic->def;
1914 }
1915 }
1916
1917 nir_def *
nir_explicit_io_address_from_deref(nir_builder * b,nir_deref_instr * deref,nir_def * base_addr,nir_address_format addr_format)1918 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1919 nir_def *base_addr,
1920 nir_address_format addr_format)
1921 {
1922 switch (deref->deref_type) {
1923 case nir_deref_type_var:
1924 return build_addr_for_var(b, deref->var, addr_format);
1925
1926 case nir_deref_type_ptr_as_array:
1927 case nir_deref_type_array: {
1928 unsigned stride = nir_deref_instr_array_stride(deref);
1929 assert(stride > 0);
1930
1931 unsigned offset_bit_size = addr_get_offset_bit_size(base_addr, addr_format);
1932 nir_def *index = deref->arr.index.ssa;
1933 nir_def *offset;
1934
1935 /* If the access chain has been declared in-bounds, then we know it doesn't
1936 * overflow the type. For nir_deref_type_array, this implies it cannot be
1937 * negative. Also, since types in NIR have a maximum 32-bit size, we know the
1938 * final result will fit in a 32-bit value so we can convert the index to
1939 * 32-bit before multiplying and save ourselves from a 64-bit multiply.
1940 */
1941 if (deref->arr.in_bounds && deref->deref_type == nir_deref_type_array) {
1942 index = nir_u2u32(b, index);
1943 offset = nir_u2uN(b, nir_amul_imm(b, index, stride), offset_bit_size);
1944 } else {
1945 index = nir_i2iN(b, index, offset_bit_size);
1946 offset = nir_amul_imm(b, index, stride);
1947 }
1948
1949 return nir_build_addr_iadd(b, base_addr, addr_format,
1950 deref->modes, offset);
1951 }
1952
1953 case nir_deref_type_array_wildcard:
1954 unreachable("Wildcards should be lowered by now");
1955 break;
1956
1957 case nir_deref_type_struct: {
1958 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1959 int offset = glsl_get_struct_field_offset(parent->type,
1960 deref->strct.index);
1961 assert(offset >= 0);
1962 return nir_build_addr_iadd_imm(b, base_addr, addr_format,
1963 deref->modes, offset);
1964 }
1965
1966 case nir_deref_type_cast:
1967 /* Nothing to do here */
1968 return base_addr;
1969 }
1970
1971 unreachable("Invalid NIR deref type");
1972 }
1973
1974 void
nir_lower_explicit_io_instr(nir_builder * b,nir_intrinsic_instr * intrin,nir_def * addr,nir_address_format addr_format)1975 nir_lower_explicit_io_instr(nir_builder *b,
1976 nir_intrinsic_instr *intrin,
1977 nir_def *addr,
1978 nir_address_format addr_format)
1979 {
1980 b->cursor = nir_after_instr(&intrin->instr);
1981
1982 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1983 unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1984 unsigned scalar_size = type_scalar_size_bytes(deref->type);
1985 if (vec_stride == 0) {
1986 vec_stride = scalar_size;
1987 } else {
1988 assert(glsl_type_is_vector(deref->type));
1989 assert(vec_stride >= scalar_size);
1990 }
1991
1992 uint32_t align_mul, align_offset;
1993 if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
1994 /* If we don't have an alignment from the deref, assume scalar */
1995 align_mul = scalar_size;
1996 align_offset = 0;
1997 }
1998
1999 /* In order for bounds checking to be correct as per the Vulkan spec,
2000 * we need to check at the individual component granularity. Prior to
2001 * robustness2, we're technically allowed to be sloppy by 16B. Even with
2002 * robustness2, UBO loads are allowed to have a granularity as high as 256B
2003 * depending on hardware limits. However, we have none of that information
2004 * here. Short of adding new address formats, the easiest way to do that
2005 * is to just split any loads and stores into individual components here.
2006 *
2007 * TODO: At some point in the future we may want to add more ops similar to
2008 * nir_intrinsic_load_global_constant_bounded and make bouds checking the
2009 * back-end's problem. Another option would be to somehow plumb more of
2010 * that information through to nir_lower_explicit_io. For now, however,
2011 * scalarizing is at least correct.
2012 */
2013 bool scalarize = vec_stride > scalar_size ||
2014 addr_format_needs_bounds_check(addr_format);
2015
2016 switch (intrin->intrinsic) {
2017 case nir_intrinsic_load_deref: {
2018 nir_def *value;
2019 if (scalarize) {
2020 nir_def *comps[NIR_MAX_VEC_COMPONENTS] = {
2021 NULL,
2022 };
2023 for (unsigned i = 0; i < intrin->num_components; i++) {
2024 unsigned comp_offset = i * vec_stride;
2025 nir_def *comp_addr = nir_build_addr_iadd_imm(b, addr, addr_format,
2026 deref->modes,
2027 comp_offset);
2028 comps[i] = build_explicit_io_load(b, intrin, comp_addr,
2029 addr_format, deref->modes,
2030 align_mul,
2031 (align_offset + comp_offset) %
2032 align_mul,
2033 1);
2034 }
2035 value = nir_vec(b, comps, intrin->num_components);
2036 } else {
2037 value = build_explicit_io_load(b, intrin, addr, addr_format,
2038 deref->modes, align_mul, align_offset,
2039 intrin->num_components);
2040 }
2041 nir_def_rewrite_uses(&intrin->def, value);
2042 break;
2043 }
2044
2045 case nir_intrinsic_store_deref: {
2046 nir_def *value = intrin->src[1].ssa;
2047 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
2048 if (scalarize) {
2049 for (unsigned i = 0; i < intrin->num_components; i++) {
2050 if (!(write_mask & (1 << i)))
2051 continue;
2052
2053 unsigned comp_offset = i * vec_stride;
2054 nir_def *comp_addr = nir_build_addr_iadd_imm(b, addr, addr_format,
2055 deref->modes,
2056 comp_offset);
2057 build_explicit_io_store(b, intrin, comp_addr, addr_format,
2058 deref->modes, align_mul,
2059 (align_offset + comp_offset) % align_mul,
2060 nir_channel(b, value, i), 1);
2061 }
2062 } else {
2063 build_explicit_io_store(b, intrin, addr, addr_format,
2064 deref->modes, align_mul, align_offset,
2065 value, write_mask);
2066 }
2067 break;
2068 }
2069
2070 case nir_intrinsic_load_deref_block_intel: {
2071 nir_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
2072 deref->modes,
2073 align_mul, align_offset,
2074 intrin->num_components);
2075 nir_def_rewrite_uses(&intrin->def, value);
2076 break;
2077 }
2078
2079 case nir_intrinsic_store_deref_block_intel: {
2080 nir_def *value = intrin->src[1].ssa;
2081 const nir_component_mask_t write_mask = 0;
2082 build_explicit_io_store(b, intrin, addr, addr_format,
2083 deref->modes, align_mul, align_offset,
2084 value, write_mask);
2085 break;
2086 }
2087
2088 default: {
2089 nir_def *value =
2090 build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
2091 nir_def_rewrite_uses(&intrin->def, value);
2092 break;
2093 }
2094 }
2095
2096 nir_instr_remove(&intrin->instr);
2097 }
2098
2099 bool
nir_get_explicit_deref_align(nir_deref_instr * deref,bool default_to_type_align,uint32_t * align_mul,uint32_t * align_offset)2100 nir_get_explicit_deref_align(nir_deref_instr *deref,
2101 bool default_to_type_align,
2102 uint32_t *align_mul,
2103 uint32_t *align_offset)
2104 {
2105 if (deref->deref_type == nir_deref_type_var) {
2106 /* If we see a variable, align_mul is effectively infinite because we
2107 * know the offset exactly (up to the offset of the base pointer for the
2108 * given variable mode). We have to pick something so we choose 256B
2109 * as an arbitrary alignment which seems high enough for any reasonable
2110 * wide-load use-case. Back-ends should clamp alignments down if 256B
2111 * is too large for some reason.
2112 */
2113 *align_mul = 256;
2114 *align_offset = deref->var->data.driver_location % 256;
2115 return true;
2116 }
2117
2118 /* If we're a cast deref that has an alignment, use that. */
2119 if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
2120 *align_mul = deref->cast.align_mul;
2121 *align_offset = deref->cast.align_offset;
2122 return true;
2123 }
2124
2125 /* Otherwise, we need to compute the alignment based on the parent */
2126 nir_deref_instr *parent = nir_deref_instr_parent(deref);
2127 if (parent == NULL) {
2128 assert(deref->deref_type == nir_deref_type_cast);
2129 if (default_to_type_align) {
2130 /* If we don't have a parent, assume the type's alignment, if any. */
2131 unsigned type_align = glsl_get_explicit_alignment(deref->type);
2132 if (type_align == 0)
2133 return false;
2134
2135 *align_mul = type_align;
2136 *align_offset = 0;
2137 return true;
2138 } else {
2139 return false;
2140 }
2141 }
2142
2143 uint32_t parent_mul, parent_offset;
2144 if (!nir_get_explicit_deref_align(parent, default_to_type_align,
2145 &parent_mul, &parent_offset))
2146 return false;
2147
2148 switch (deref->deref_type) {
2149 case nir_deref_type_var:
2150 unreachable("Handled above");
2151
2152 case nir_deref_type_array:
2153 case nir_deref_type_array_wildcard:
2154 case nir_deref_type_ptr_as_array: {
2155 const unsigned stride = nir_deref_instr_array_stride(deref);
2156 if (stride == 0)
2157 return false;
2158
2159 if (deref->deref_type != nir_deref_type_array_wildcard &&
2160 nir_src_is_const(deref->arr.index)) {
2161 unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
2162 *align_mul = parent_mul;
2163 *align_offset = (parent_offset + offset) % parent_mul;
2164 } else {
2165 /* If this is a wildcard or an indirect deref, we have to go with the
2166 * power-of-two gcd.
2167 */
2168 *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
2169 *align_offset = parent_offset % *align_mul;
2170 }
2171 return true;
2172 }
2173
2174 case nir_deref_type_struct: {
2175 const int offset = glsl_get_struct_field_offset(parent->type,
2176 deref->strct.index);
2177 if (offset < 0)
2178 return false;
2179
2180 *align_mul = parent_mul;
2181 *align_offset = (parent_offset + offset) % parent_mul;
2182 return true;
2183 }
2184
2185 case nir_deref_type_cast:
2186 /* We handled the explicit alignment case above. */
2187 assert(deref->cast.align_mul == 0);
2188 *align_mul = parent_mul;
2189 *align_offset = parent_offset;
2190 return true;
2191 }
2192
2193 unreachable("Invalid deref_instr_type");
2194 }
2195
2196 static void
lower_explicit_io_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format)2197 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
2198 nir_address_format addr_format)
2199 {
2200 /* Ignore samplers/textures, because they are handled by other passes like `nir_lower_samplers`.
2201 * Also do it only for those being uniforms, otherwise it will break GL bindless textures handles
2202 * stored in UBOs.
2203 */
2204 if (nir_deref_mode_is_in_set(deref, nir_var_uniform) &&
2205 (glsl_type_is_sampler(deref->type) ||
2206 glsl_type_is_texture(deref->type)))
2207 return;
2208
2209 /* Just delete the deref if it's not used. We can't use
2210 * nir_deref_instr_remove_if_unused here because it may remove more than
2211 * one deref which could break our list walking since we walk the list
2212 * backwards.
2213 */
2214 if (nir_def_is_unused(&deref->def)) {
2215 nir_instr_remove(&deref->instr);
2216 return;
2217 }
2218
2219 b->cursor = nir_after_instr(&deref->instr);
2220
2221 nir_def *base_addr = NULL;
2222 if (deref->deref_type != nir_deref_type_var) {
2223 base_addr = deref->parent.ssa;
2224 }
2225
2226 nir_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
2227 addr_format);
2228 assert(addr->bit_size == deref->def.bit_size);
2229 assert(addr->num_components == deref->def.num_components);
2230
2231 nir_instr_remove(&deref->instr);
2232 nir_def_rewrite_uses(&deref->def, addr);
2233 }
2234
2235 static void
lower_explicit_io_access(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2236 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
2237 nir_address_format addr_format)
2238 {
2239 nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
2240 }
2241
2242 static void
lower_explicit_io_array_length(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2243 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
2244 nir_address_format addr_format)
2245 {
2246 b->cursor = nir_after_instr(&intrin->instr);
2247
2248 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2249
2250 assert(glsl_type_is_array(deref->type));
2251 assert(glsl_get_length(deref->type) == 0);
2252 assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
2253 unsigned stride = glsl_get_explicit_stride(deref->type);
2254 assert(stride > 0);
2255
2256 nir_def *addr = &deref->def;
2257
2258 nir_def *offset, *size;
2259 switch (addr_format) {
2260 case nir_address_format_64bit_global_32bit_offset:
2261 case nir_address_format_64bit_bounded_global:
2262 offset = nir_channel(b, addr, 3);
2263 size = nir_channel(b, addr, 2);
2264 break;
2265
2266 case nir_address_format_32bit_index_offset:
2267 case nir_address_format_32bit_index_offset_pack64:
2268 case nir_address_format_vec2_index_32bit_offset: {
2269 offset = addr_to_offset(b, addr, addr_format);
2270 nir_def *index = addr_to_index(b, addr, addr_format);
2271 unsigned access = nir_intrinsic_access(intrin);
2272 size = nir_get_ssbo_size(b, index, .access = access);
2273 break;
2274 }
2275
2276 default:
2277 unreachable("Cannot determine SSBO size");
2278 }
2279
2280 nir_def *remaining = nir_usub_sat(b, size, offset);
2281 nir_def *arr_size = nir_udiv_imm(b, remaining, stride);
2282
2283 nir_def_replace(&intrin->def, arr_size);
2284 }
2285
2286 static void
lower_explicit_io_mode_check(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2287 lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
2288 nir_address_format addr_format)
2289 {
2290 if (addr_format_is_global(addr_format, 0)) {
2291 /* If the address format is always global, then the driver can use
2292 * global addresses regardless of the mode. In that case, don't create
2293 * a check, just whack the intrinsic to addr_mode_is and delegate to the
2294 * driver lowering.
2295 */
2296 intrin->intrinsic = nir_intrinsic_addr_mode_is;
2297 return;
2298 }
2299
2300 nir_def *addr = intrin->src[0].ssa;
2301
2302 b->cursor = nir_instr_remove(&intrin->instr);
2303
2304 nir_def *is_mode =
2305 build_runtime_addr_mode_check(b, addr, addr_format,
2306 nir_intrinsic_memory_modes(intrin));
2307
2308 nir_def_rewrite_uses(&intrin->def, is_mode);
2309 }
2310
2311 static bool
nir_lower_explicit_io_impl(nir_function_impl * impl,nir_variable_mode modes,nir_address_format addr_format)2312 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
2313 nir_address_format addr_format)
2314 {
2315 bool progress = false;
2316
2317 nir_builder b = nir_builder_create(impl);
2318
2319 /* Walk in reverse order so that we can see the full deref chain when we
2320 * lower the access operations. We lower them assuming that the derefs
2321 * will be turned into address calculations later.
2322 */
2323 nir_foreach_block_reverse(block, impl) {
2324 nir_foreach_instr_reverse_safe(instr, block) {
2325 switch (instr->type) {
2326 case nir_instr_type_deref: {
2327 nir_deref_instr *deref = nir_instr_as_deref(instr);
2328 if (nir_deref_mode_is_in_set(deref, modes)) {
2329 lower_explicit_io_deref(&b, deref, addr_format);
2330 progress = true;
2331 }
2332 break;
2333 }
2334
2335 case nir_instr_type_intrinsic: {
2336 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2337 switch (intrin->intrinsic) {
2338 case nir_intrinsic_load_deref:
2339 case nir_intrinsic_store_deref:
2340 case nir_intrinsic_load_deref_block_intel:
2341 case nir_intrinsic_store_deref_block_intel:
2342 case nir_intrinsic_deref_atomic:
2343 case nir_intrinsic_deref_atomic_swap: {
2344 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2345 if (nir_deref_mode_is_in_set(deref, modes)) {
2346 lower_explicit_io_access(&b, intrin, addr_format);
2347 progress = true;
2348 }
2349 break;
2350 }
2351
2352 case nir_intrinsic_deref_buffer_array_length: {
2353 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2354 if (nir_deref_mode_is_in_set(deref, modes)) {
2355 lower_explicit_io_array_length(&b, intrin, addr_format);
2356 progress = true;
2357 }
2358 break;
2359 }
2360
2361 case nir_intrinsic_deref_mode_is: {
2362 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2363 if (nir_deref_mode_is_in_set(deref, modes)) {
2364 lower_explicit_io_mode_check(&b, intrin, addr_format);
2365 progress = true;
2366 }
2367 break;
2368 }
2369
2370 case nir_intrinsic_launch_mesh_workgroups_with_payload_deref: {
2371 if (modes & nir_var_mem_task_payload) {
2372 /* Get address and size of the payload variable. */
2373 nir_deref_instr *deref = nir_src_as_deref(intrin->src[1]);
2374 assert(deref->deref_type == nir_deref_type_var);
2375 unsigned base = deref->var->data.explicit_location;
2376 unsigned size = glsl_get_explicit_size(deref->var->type, false);
2377
2378 /* Replace the current instruction with the explicit intrinsic. */
2379 nir_def *dispatch_3d = intrin->src[0].ssa;
2380 b.cursor = nir_instr_remove(instr);
2381 nir_launch_mesh_workgroups(&b, dispatch_3d, .base = base, .range = size);
2382 progress = true;
2383 }
2384
2385 break;
2386 }
2387
2388 default:
2389 break;
2390 }
2391 break;
2392 }
2393
2394 default:
2395 /* Nothing to do */
2396 break;
2397 }
2398 }
2399 }
2400
2401 if (progress) {
2402 nir_metadata_preserve(impl, nir_metadata_none);
2403 } else {
2404 nir_metadata_preserve(impl, nir_metadata_all);
2405 }
2406
2407 return progress;
2408 }
2409
2410 /** Lower explicitly laid out I/O access to byte offset/address intrinsics
2411 *
2412 * This pass is intended to be used for any I/O which touches memory external
2413 * to the shader or which is directly visible to the client. It requires that
2414 * all data types in the given modes have a explicit stride/offset decorations
2415 * to tell it exactly how to calculate the offset/address for the given load,
2416 * store, or atomic operation. If the offset/stride information does not come
2417 * from the client explicitly (as with shared variables in GL or Vulkan),
2418 * nir_lower_vars_to_explicit_types() can be used to add them.
2419 *
2420 * Unlike nir_lower_io, this pass is fully capable of handling incomplete
2421 * pointer chains which may contain cast derefs. It does so by walking the
2422 * deref chain backwards and simply replacing each deref, one at a time, with
2423 * the appropriate address calculation. The pass takes a nir_address_format
2424 * parameter which describes how the offset or address is to be represented
2425 * during calculations. By ensuring that the address is always in a
2426 * consistent format, pointers can safely be conjured from thin air by the
2427 * driver, stored to variables, passed through phis, etc.
2428 *
2429 * The one exception to the simple algorithm described above is for handling
2430 * row-major matrices in which case we may look down one additional level of
2431 * the deref chain.
2432 *
2433 * This pass is also capable of handling OpenCL generic pointers. If the
2434 * address mode is global, it will lower any ambiguous (more than one mode)
2435 * access to global and pass through the deref_mode_is run-time checks as
2436 * addr_mode_is. This assumes the driver has somehow mapped shared and
2437 * scratch memory to the global address space. For other modes such as
2438 * 62bit_generic, there is an enum embedded in the address and we lower
2439 * ambiguous access to an if-ladder and deref_mode_is to a check against the
2440 * embedded enum. If nir_lower_explicit_io is called on any shader that
2441 * contains generic pointers, it must either be used on all of the generic
2442 * modes or none.
2443 */
2444 bool
nir_lower_explicit_io(nir_shader * shader,nir_variable_mode modes,nir_address_format addr_format)2445 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
2446 nir_address_format addr_format)
2447 {
2448 bool progress = false;
2449
2450 nir_foreach_function_impl(impl, shader) {
2451 if (impl && nir_lower_explicit_io_impl(impl, modes, addr_format))
2452 progress = true;
2453 }
2454
2455 return progress;
2456 }
2457
2458 static bool
nir_lower_vars_to_explicit_types_impl(nir_function_impl * impl,nir_variable_mode modes,glsl_type_size_align_func type_info)2459 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
2460 nir_variable_mode modes,
2461 glsl_type_size_align_func type_info)
2462 {
2463 bool progress = false;
2464
2465 nir_foreach_block(block, impl) {
2466 nir_foreach_instr(instr, block) {
2467 if (instr->type != nir_instr_type_deref)
2468 continue;
2469
2470 nir_deref_instr *deref = nir_instr_as_deref(instr);
2471 if (!nir_deref_mode_is_in_set(deref, modes))
2472 continue;
2473
2474 unsigned size, alignment;
2475 const struct glsl_type *new_type =
2476 glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
2477 if (new_type != deref->type) {
2478 progress = true;
2479 deref->type = new_type;
2480 }
2481 if (deref->deref_type == nir_deref_type_cast) {
2482 /* See also glsl_type::get_explicit_type_for_size_align() */
2483 unsigned new_stride = align(size, alignment);
2484 if (new_stride != deref->cast.ptr_stride) {
2485 deref->cast.ptr_stride = new_stride;
2486 progress = true;
2487 }
2488 }
2489 }
2490 }
2491
2492 if (progress) {
2493 nir_metadata_preserve(impl, nir_metadata_control_flow |
2494 nir_metadata_live_defs |
2495 nir_metadata_loop_analysis);
2496 } else {
2497 nir_metadata_preserve(impl, nir_metadata_all);
2498 }
2499
2500 return progress;
2501 }
2502
2503 static bool
lower_vars_to_explicit(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,glsl_type_size_align_func type_info)2504 lower_vars_to_explicit(nir_shader *shader,
2505 struct exec_list *vars, nir_variable_mode mode,
2506 glsl_type_size_align_func type_info)
2507 {
2508 bool progress = false;
2509 unsigned offset;
2510 switch (mode) {
2511 case nir_var_uniform:
2512 assert(shader->info.stage == MESA_SHADER_KERNEL);
2513 offset = 0;
2514 break;
2515 case nir_var_function_temp:
2516 case nir_var_shader_temp:
2517 offset = shader->scratch_size;
2518 break;
2519 case nir_var_mem_shared:
2520 offset = shader->info.shared_size;
2521 break;
2522 case nir_var_mem_task_payload:
2523 offset = shader->info.task_payload_size;
2524 break;
2525 case nir_var_mem_node_payload:
2526 assert(!shader->info.cs.node_payloads_size);
2527 offset = 0;
2528 break;
2529 case nir_var_mem_global:
2530 offset = shader->global_mem_size;
2531 break;
2532 case nir_var_mem_constant:
2533 offset = shader->constant_data_size;
2534 break;
2535 case nir_var_shader_call_data:
2536 case nir_var_ray_hit_attrib:
2537 case nir_var_mem_node_payload_in:
2538 offset = 0;
2539 break;
2540 default:
2541 unreachable("Unsupported mode");
2542 }
2543 nir_foreach_variable_in_list(var, vars) {
2544 if (var->data.mode != mode)
2545 continue;
2546
2547 unsigned size, alignment;
2548 const struct glsl_type *explicit_type =
2549 glsl_get_explicit_type_for_size_align(var->type, type_info,
2550 &size, &alignment);
2551
2552 if (explicit_type != var->type)
2553 var->type = explicit_type;
2554
2555 UNUSED bool is_empty_struct =
2556 glsl_type_is_struct_or_ifc(explicit_type) &&
2557 glsl_get_length(explicit_type) == 0;
2558
2559 assert(util_is_power_of_two_nonzero(alignment) || is_empty_struct ||
2560 glsl_type_is_cmat(glsl_without_array(explicit_type)));
2561 assert(util_is_power_of_two_or_zero(var->data.alignment));
2562 alignment = MAX2(alignment, var->data.alignment);
2563
2564 var->data.driver_location = ALIGN_POT(offset, alignment);
2565 offset = var->data.driver_location + size;
2566 progress = true;
2567 }
2568
2569 switch (mode) {
2570 case nir_var_uniform:
2571 assert(shader->info.stage == MESA_SHADER_KERNEL);
2572 shader->num_uniforms = offset;
2573 break;
2574 case nir_var_shader_temp:
2575 case nir_var_function_temp:
2576 shader->scratch_size = offset;
2577 break;
2578 case nir_var_mem_shared:
2579 shader->info.shared_size = offset;
2580 break;
2581 case nir_var_mem_task_payload:
2582 shader->info.task_payload_size = offset;
2583 break;
2584 case nir_var_mem_node_payload:
2585 shader->info.cs.node_payloads_size = offset;
2586 break;
2587 case nir_var_mem_global:
2588 shader->global_mem_size = offset;
2589 break;
2590 case nir_var_mem_constant:
2591 shader->constant_data_size = offset;
2592 break;
2593 case nir_var_shader_call_data:
2594 case nir_var_ray_hit_attrib:
2595 case nir_var_mem_node_payload_in:
2596 break;
2597 default:
2598 unreachable("Unsupported mode");
2599 }
2600
2601 return progress;
2602 }
2603
2604 /* If nir_lower_vars_to_explicit_types is called on any shader that contains
2605 * generic pointers, it must either be used on all of the generic modes or
2606 * none.
2607 */
2608 bool
nir_lower_vars_to_explicit_types(nir_shader * shader,nir_variable_mode modes,glsl_type_size_align_func type_info)2609 nir_lower_vars_to_explicit_types(nir_shader *shader,
2610 nir_variable_mode modes,
2611 glsl_type_size_align_func type_info)
2612 {
2613 /* TODO: Situations which need to be handled to support more modes:
2614 * - row-major matrices
2615 * - compact shader inputs/outputs
2616 * - interface types
2617 */
2618 ASSERTED nir_variable_mode supported =
2619 nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
2620 nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
2621 nir_var_shader_call_data | nir_var_ray_hit_attrib |
2622 nir_var_mem_task_payload | nir_var_mem_node_payload |
2623 nir_var_mem_node_payload_in;
2624 assert(!(modes & ~supported) && "unsupported");
2625
2626 bool progress = false;
2627
2628 if (modes & nir_var_uniform)
2629 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
2630 if (modes & nir_var_mem_global)
2631 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info);
2632
2633 if (modes & nir_var_mem_shared) {
2634 assert(!shader->info.shared_memory_explicit_layout);
2635 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
2636 }
2637
2638 if (modes & nir_var_shader_temp)
2639 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
2640 if (modes & nir_var_mem_constant)
2641 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
2642 if (modes & nir_var_shader_call_data)
2643 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
2644 if (modes & nir_var_ray_hit_attrib)
2645 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
2646 if (modes & nir_var_mem_task_payload)
2647 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_task_payload, type_info);
2648 if (modes & nir_var_mem_node_payload)
2649 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload, type_info);
2650 if (modes & nir_var_mem_node_payload_in)
2651 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload_in, type_info);
2652
2653 nir_foreach_function_impl(impl, shader) {
2654 if (modes & nir_var_function_temp)
2655 progress |= lower_vars_to_explicit(shader, &impl->locals, nir_var_function_temp, type_info);
2656
2657 progress |= nir_lower_vars_to_explicit_types_impl(impl, modes, type_info);
2658 }
2659
2660 return progress;
2661 }
2662
2663 static void
write_constant(void * dst,size_t dst_size,const nir_constant * c,const struct glsl_type * type)2664 write_constant(void *dst, size_t dst_size,
2665 const nir_constant *c, const struct glsl_type *type)
2666 {
2667 if (c->is_null_constant) {
2668 memset(dst, 0, dst_size);
2669 return;
2670 }
2671
2672 if (glsl_type_is_vector_or_scalar(type)) {
2673 const unsigned num_components = glsl_get_vector_elements(type);
2674 const unsigned bit_size = glsl_get_bit_size(type);
2675 if (bit_size == 1) {
2676 /* Booleans are special-cased to be 32-bit
2677 *
2678 * TODO: Make the native bool bit_size an option.
2679 */
2680 assert(num_components * 4 <= dst_size);
2681 for (unsigned i = 0; i < num_components; i++) {
2682 int32_t b32 = -(int)c->values[i].b;
2683 memcpy((char *)dst + i * 4, &b32, 4);
2684 }
2685 } else {
2686 assert(bit_size >= 8 && bit_size % 8 == 0);
2687 const unsigned byte_size = bit_size / 8;
2688 assert(num_components * byte_size <= dst_size);
2689 for (unsigned i = 0; i < num_components; i++) {
2690 /* Annoyingly, thanks to packed structs, we can't make any
2691 * assumptions about the alignment of dst. To avoid any strange
2692 * issues with unaligned writes, we always use memcpy.
2693 */
2694 memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
2695 }
2696 }
2697 } else if (glsl_type_is_array_or_matrix(type)) {
2698 const unsigned array_len = glsl_get_length(type);
2699 const unsigned stride = glsl_get_explicit_stride(type);
2700 assert(stride > 0);
2701 const struct glsl_type *elem_type = glsl_get_array_element(type);
2702 for (unsigned i = 0; i < array_len; i++) {
2703 unsigned elem_offset = i * stride;
2704 assert(elem_offset < dst_size);
2705 write_constant((char *)dst + elem_offset, dst_size - elem_offset,
2706 c->elements[i], elem_type);
2707 }
2708 } else {
2709 assert(glsl_type_is_struct_or_ifc(type));
2710 const unsigned num_fields = glsl_get_length(type);
2711 for (unsigned i = 0; i < num_fields; i++) {
2712 const int field_offset = glsl_get_struct_field_offset(type, i);
2713 assert(field_offset >= 0 && field_offset < dst_size);
2714 const struct glsl_type *field_type = glsl_get_struct_field(type, i);
2715 write_constant((char *)dst + field_offset, dst_size - field_offset,
2716 c->elements[i], field_type);
2717 }
2718 }
2719 }
2720
2721 void
nir_gather_explicit_io_initializers(nir_shader * shader,void * dst,size_t dst_size,nir_variable_mode mode)2722 nir_gather_explicit_io_initializers(nir_shader *shader,
2723 void *dst, size_t dst_size,
2724 nir_variable_mode mode)
2725 {
2726 /* It doesn't really make sense to gather initializers for more than one
2727 * mode at a time. If this ever becomes well-defined, we can drop the
2728 * assert then.
2729 */
2730 assert(util_bitcount(mode) == 1);
2731
2732 nir_foreach_variable_with_modes(var, shader, mode) {
2733 assert(var->data.driver_location < dst_size);
2734 write_constant((char *)dst + var->data.driver_location,
2735 dst_size - var->data.driver_location,
2736 var->constant_initializer, var->type);
2737 }
2738 }
2739
2740 /**
2741 * Return the offset source number for a load/store intrinsic or -1 if there's no offset.
2742 */
2743 int
nir_get_io_offset_src_number(const nir_intrinsic_instr * instr)2744 nir_get_io_offset_src_number(const nir_intrinsic_instr *instr)
2745 {
2746 switch (instr->intrinsic) {
2747 case nir_intrinsic_load_input:
2748 case nir_intrinsic_load_per_primitive_input:
2749 case nir_intrinsic_load_output:
2750 case nir_intrinsic_load_shared:
2751 case nir_intrinsic_load_task_payload:
2752 case nir_intrinsic_load_uniform:
2753 case nir_intrinsic_load_push_constant:
2754 case nir_intrinsic_load_kernel_input:
2755 case nir_intrinsic_load_global:
2756 case nir_intrinsic_load_global_2x32:
2757 case nir_intrinsic_load_global_constant:
2758 case nir_intrinsic_load_global_etna:
2759 case nir_intrinsic_load_scratch:
2760 case nir_intrinsic_load_fs_input_interp_deltas:
2761 case nir_intrinsic_shared_atomic:
2762 case nir_intrinsic_shared_atomic_swap:
2763 case nir_intrinsic_task_payload_atomic:
2764 case nir_intrinsic_task_payload_atomic_swap:
2765 case nir_intrinsic_global_atomic:
2766 case nir_intrinsic_global_atomic_2x32:
2767 case nir_intrinsic_global_atomic_swap:
2768 case nir_intrinsic_global_atomic_swap_2x32:
2769 case nir_intrinsic_load_coefficients_agx:
2770 return 0;
2771 case nir_intrinsic_load_ubo:
2772 case nir_intrinsic_load_ssbo:
2773 case nir_intrinsic_load_input_vertex:
2774 case nir_intrinsic_load_per_vertex_input:
2775 case nir_intrinsic_load_per_vertex_output:
2776 case nir_intrinsic_load_per_primitive_output:
2777 case nir_intrinsic_load_interpolated_input:
2778 case nir_intrinsic_store_output:
2779 case nir_intrinsic_store_shared:
2780 case nir_intrinsic_store_task_payload:
2781 case nir_intrinsic_store_global:
2782 case nir_intrinsic_store_global_2x32:
2783 case nir_intrinsic_store_global_etna:
2784 case nir_intrinsic_store_scratch:
2785 case nir_intrinsic_ssbo_atomic:
2786 case nir_intrinsic_ssbo_atomic_swap:
2787 case nir_intrinsic_ldc_nv:
2788 case nir_intrinsic_ldcx_nv:
2789 return 1;
2790 case nir_intrinsic_store_ssbo:
2791 case nir_intrinsic_store_per_vertex_output:
2792 case nir_intrinsic_store_per_primitive_output:
2793 return 2;
2794 default:
2795 return -1;
2796 }
2797 }
2798
2799 /**
2800 * Return the offset source for a load/store intrinsic.
2801 */
2802 nir_src *
nir_get_io_offset_src(nir_intrinsic_instr * instr)2803 nir_get_io_offset_src(nir_intrinsic_instr *instr)
2804 {
2805 const int idx = nir_get_io_offset_src_number(instr);
2806 return idx >= 0 ? &instr->src[idx] : NULL;
2807 }
2808
2809 /**
2810 * Return the vertex index source number for a load/store per_vertex intrinsic or -1 if there's no offset.
2811 */
2812 int
nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr * instr)2813 nir_get_io_arrayed_index_src_number(const nir_intrinsic_instr *instr)
2814 {
2815 switch (instr->intrinsic) {
2816 case nir_intrinsic_load_per_vertex_input:
2817 case nir_intrinsic_load_per_vertex_output:
2818 case nir_intrinsic_load_per_primitive_output:
2819 return 0;
2820 case nir_intrinsic_store_per_vertex_output:
2821 case nir_intrinsic_store_per_primitive_output:
2822 return 1;
2823 default:
2824 return -1;
2825 }
2826 }
2827
2828 /**
2829 * Return the vertex index source for a load/store per_vertex intrinsic.
2830 */
2831 nir_src *
nir_get_io_arrayed_index_src(nir_intrinsic_instr * instr)2832 nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr)
2833 {
2834 const int idx = nir_get_io_arrayed_index_src_number(instr);
2835 return idx >= 0 ? &instr->src[idx] : NULL;
2836 }
2837
2838 /**
2839 * Return the numeric constant that identify a NULL pointer for each address
2840 * format.
2841 */
2842 const nir_const_value *
nir_address_format_null_value(nir_address_format addr_format)2843 nir_address_format_null_value(nir_address_format addr_format)
2844 {
2845 const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
2846 [nir_address_format_32bit_global] = { { 0 } },
2847 [nir_address_format_2x32bit_global] = { { 0 } },
2848 [nir_address_format_64bit_global] = { { 0 } },
2849 [nir_address_format_64bit_global_32bit_offset] = { { 0 } },
2850 [nir_address_format_64bit_bounded_global] = { { 0 } },
2851 [nir_address_format_32bit_index_offset] = { { .u32 = ~0 }, { .u32 = ~0 } },
2852 [nir_address_format_32bit_index_offset_pack64] = { { .u64 = ~0ull } },
2853 [nir_address_format_vec2_index_32bit_offset] = { { .u32 = ~0 }, { .u32 = ~0 }, { .u32 = ~0 } },
2854 [nir_address_format_32bit_offset] = { { .u32 = ~0 } },
2855 [nir_address_format_32bit_offset_as_64bit] = { { .u64 = ~0ull } },
2856 [nir_address_format_62bit_generic] = { { .u64 = 0 } },
2857 [nir_address_format_logical] = { { .u32 = ~0 } },
2858 };
2859
2860 assert(addr_format < ARRAY_SIZE(null_values));
2861 return null_values[addr_format];
2862 }
2863
2864 nir_def *
nir_build_addr_ieq(nir_builder * b,nir_def * addr0,nir_def * addr1,nir_address_format addr_format)2865 nir_build_addr_ieq(nir_builder *b, nir_def *addr0, nir_def *addr1,
2866 nir_address_format addr_format)
2867 {
2868 switch (addr_format) {
2869 case nir_address_format_32bit_global:
2870 case nir_address_format_2x32bit_global:
2871 case nir_address_format_64bit_global:
2872 case nir_address_format_64bit_bounded_global:
2873 case nir_address_format_32bit_index_offset:
2874 case nir_address_format_vec2_index_32bit_offset:
2875 case nir_address_format_32bit_offset:
2876 case nir_address_format_62bit_generic:
2877 return nir_ball_iequal(b, addr0, addr1);
2878
2879 case nir_address_format_64bit_global_32bit_offset:
2880 return nir_ball_iequal(b, nir_channels(b, addr0, 0xb),
2881 nir_channels(b, addr1, 0xb));
2882
2883 case nir_address_format_32bit_offset_as_64bit:
2884 assert(addr0->num_components == 1 && addr1->num_components == 1);
2885 return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
2886
2887 case nir_address_format_32bit_index_offset_pack64:
2888 assert(addr0->num_components == 1 && addr1->num_components == 1);
2889 return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
2890
2891 case nir_address_format_logical:
2892 unreachable("Unsupported address format");
2893 }
2894
2895 unreachable("Invalid address format");
2896 }
2897
2898 nir_def *
nir_build_addr_isub(nir_builder * b,nir_def * addr0,nir_def * addr1,nir_address_format addr_format)2899 nir_build_addr_isub(nir_builder *b, nir_def *addr0, nir_def *addr1,
2900 nir_address_format addr_format)
2901 {
2902 switch (addr_format) {
2903 case nir_address_format_32bit_global:
2904 case nir_address_format_64bit_global:
2905 case nir_address_format_32bit_offset:
2906 case nir_address_format_32bit_index_offset_pack64:
2907 case nir_address_format_62bit_generic:
2908 assert(addr0->num_components == 1);
2909 assert(addr1->num_components == 1);
2910 return nir_isub(b, addr0, addr1);
2911
2912 case nir_address_format_2x32bit_global:
2913 return nir_isub(b, addr_to_global(b, addr0, addr_format),
2914 addr_to_global(b, addr1, addr_format));
2915
2916 case nir_address_format_32bit_offset_as_64bit:
2917 assert(addr0->num_components == 1);
2918 assert(addr1->num_components == 1);
2919 return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
2920
2921 case nir_address_format_64bit_global_32bit_offset:
2922 case nir_address_format_64bit_bounded_global:
2923 return nir_isub(b, addr_to_global(b, addr0, addr_format),
2924 addr_to_global(b, addr1, addr_format));
2925
2926 case nir_address_format_32bit_index_offset:
2927 assert(addr0->num_components == 2);
2928 assert(addr1->num_components == 2);
2929 /* Assume the same buffer index. */
2930 return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
2931
2932 case nir_address_format_vec2_index_32bit_offset:
2933 assert(addr0->num_components == 3);
2934 assert(addr1->num_components == 3);
2935 /* Assume the same buffer index. */
2936 return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
2937
2938 case nir_address_format_logical:
2939 unreachable("Unsupported address format");
2940 }
2941
2942 unreachable("Invalid address format");
2943 }
2944
2945 static bool
is_input(nir_intrinsic_instr * intrin)2946 is_input(nir_intrinsic_instr *intrin)
2947 {
2948 return intrin->intrinsic == nir_intrinsic_load_input ||
2949 intrin->intrinsic == nir_intrinsic_load_per_primitive_input ||
2950 intrin->intrinsic == nir_intrinsic_load_input_vertex ||
2951 intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
2952 intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
2953 intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
2954 }
2955
2956 static bool
is_output(nir_intrinsic_instr * intrin)2957 is_output(nir_intrinsic_instr *intrin)
2958 {
2959 return intrin->intrinsic == nir_intrinsic_load_output ||
2960 intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
2961 intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
2962 intrin->intrinsic == nir_intrinsic_store_output ||
2963 intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2964 intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
2965 }
2966
2967 static bool
is_dual_slot(nir_intrinsic_instr * intrin)2968 is_dual_slot(nir_intrinsic_instr *intrin)
2969 {
2970 if (intrin->intrinsic == nir_intrinsic_store_output ||
2971 intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2972 intrin->intrinsic == nir_intrinsic_store_per_primitive_output) {
2973 return nir_src_bit_size(intrin->src[0]) == 64 &&
2974 nir_src_num_components(intrin->src[0]) >= 3;
2975 }
2976
2977 return intrin->def.bit_size == 64 &&
2978 intrin->def.num_components >= 3;
2979 }
2980
2981 /**
2982 * This pass adds constant offsets to instr->const_index[0] for input/output
2983 * intrinsics, and resets the offset source to 0. Non-constant offsets remain
2984 * unchanged - since we don't know what part of a compound variable is
2985 * accessed, we allocate storage for the entire thing. For drivers that use
2986 * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
2987 * the offset source will be 0, so that they don't have to add it in manually.
2988 */
2989
2990 static bool
add_const_offset_to_base_block(nir_block * block,nir_builder * b,nir_variable_mode modes)2991 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
2992 nir_variable_mode modes)
2993 {
2994 bool progress = false;
2995 nir_foreach_instr_safe(instr, block) {
2996 if (instr->type != nir_instr_type_intrinsic)
2997 continue;
2998
2999 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3000
3001 if (((modes & nir_var_shader_in) && is_input(intrin)) ||
3002 ((modes & nir_var_shader_out) && is_output(intrin))) {
3003 nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
3004
3005 /* NV_mesh_shader: ignore MS primitive indices. */
3006 if (b->shader->info.stage == MESA_SHADER_MESH &&
3007 sem.location == VARYING_SLOT_PRIMITIVE_INDICES &&
3008 !(b->shader->info.per_primitive_outputs &
3009 BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES)))
3010 continue;
3011
3012 nir_src *offset = nir_get_io_offset_src(intrin);
3013
3014 /* TODO: Better handling of per-view variables here */
3015 if (nir_src_is_const(*offset) &&
3016 !nir_intrinsic_io_semantics(intrin).per_view) {
3017 unsigned off = nir_src_as_uint(*offset);
3018
3019 nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
3020
3021 sem.location += off;
3022 /* non-indirect indexing should reduce num_slots */
3023 sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
3024 nir_intrinsic_set_io_semantics(intrin, sem);
3025
3026 b->cursor = nir_before_instr(&intrin->instr);
3027 nir_src_rewrite(offset, nir_imm_int(b, 0));
3028 progress = true;
3029 }
3030 }
3031 }
3032
3033 return progress;
3034 }
3035
3036 bool
nir_io_add_const_offset_to_base(nir_shader * nir,nir_variable_mode modes)3037 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
3038 {
3039 bool progress = false;
3040
3041 nir_foreach_function_impl(impl, nir) {
3042 bool impl_progress = false;
3043 nir_builder b = nir_builder_create(impl);
3044 nir_foreach_block(block, impl) {
3045 impl_progress |= add_const_offset_to_base_block(block, &b, modes);
3046 }
3047 progress |= impl_progress;
3048 if (impl_progress)
3049 nir_metadata_preserve(impl, nir_metadata_control_flow);
3050 else
3051 nir_metadata_preserve(impl, nir_metadata_all);
3052 }
3053
3054 return progress;
3055 }
3056
3057 bool
nir_lower_color_inputs(nir_shader * nir)3058 nir_lower_color_inputs(nir_shader *nir)
3059 {
3060 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3061 bool progress = false;
3062
3063 nir_builder b = nir_builder_create(impl);
3064
3065 nir_foreach_block(block, impl) {
3066 nir_foreach_instr_safe(instr, block) {
3067 if (instr->type != nir_instr_type_intrinsic)
3068 continue;
3069
3070 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3071
3072 if (intrin->intrinsic != nir_intrinsic_load_input &&
3073 intrin->intrinsic != nir_intrinsic_load_interpolated_input)
3074 continue;
3075
3076 nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
3077
3078 if (sem.location != VARYING_SLOT_COL0 &&
3079 sem.location != VARYING_SLOT_COL1)
3080 continue;
3081
3082 /* Default to FLAT (for load_input) */
3083 enum glsl_interp_mode interp = INTERP_MODE_FLAT;
3084 bool sample = false;
3085 bool centroid = false;
3086
3087 if (intrin->intrinsic == nir_intrinsic_load_interpolated_input) {
3088 nir_intrinsic_instr *baryc =
3089 nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr);
3090
3091 centroid =
3092 baryc->intrinsic == nir_intrinsic_load_barycentric_centroid;
3093 sample =
3094 baryc->intrinsic == nir_intrinsic_load_barycentric_sample;
3095 assert(centroid || sample ||
3096 baryc->intrinsic == nir_intrinsic_load_barycentric_pixel);
3097
3098 interp = nir_intrinsic_interp_mode(baryc);
3099 }
3100
3101 b.cursor = nir_before_instr(instr);
3102 nir_def *load = NULL;
3103
3104 if (sem.location == VARYING_SLOT_COL0) {
3105 load = nir_load_color0(&b);
3106 nir->info.fs.color0_interp = interp;
3107 nir->info.fs.color0_sample = sample;
3108 nir->info.fs.color0_centroid = centroid;
3109 } else {
3110 assert(sem.location == VARYING_SLOT_COL1);
3111 load = nir_load_color1(&b);
3112 nir->info.fs.color1_interp = interp;
3113 nir->info.fs.color1_sample = sample;
3114 nir->info.fs.color1_centroid = centroid;
3115 }
3116
3117 if (intrin->num_components != 4) {
3118 unsigned start = nir_intrinsic_component(intrin);
3119 unsigned count = intrin->num_components;
3120 load = nir_channels(&b, load, BITFIELD_RANGE(start, count));
3121 }
3122
3123 nir_def_replace(&intrin->def, load);
3124 progress = true;
3125 }
3126 }
3127
3128 if (progress) {
3129 nir_metadata_preserve(impl, nir_metadata_control_flow);
3130 } else {
3131 nir_metadata_preserve(impl, nir_metadata_all);
3132 }
3133 return progress;
3134 }
3135
3136 bool
nir_io_add_intrinsic_xfb_info(nir_shader * nir)3137 nir_io_add_intrinsic_xfb_info(nir_shader *nir)
3138 {
3139 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3140 bool progress = false;
3141
3142 for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++)
3143 nir->info.xfb_stride[i] = nir->xfb_info->buffers[i].stride / 4;
3144
3145 nir_foreach_block(block, impl) {
3146 nir_foreach_instr_safe(instr, block) {
3147 if (instr->type != nir_instr_type_intrinsic)
3148 continue;
3149
3150 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3151
3152 if (!nir_intrinsic_has_io_xfb(intr))
3153 continue;
3154
3155 /* No indirect indexing allowed. The index is implied to be 0. */
3156 ASSERTED nir_src offset = *nir_get_io_offset_src(intr);
3157 assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
3158
3159 /* Calling this pass for the second time shouldn't do anything. */
3160 if (nir_intrinsic_io_xfb(intr).out[0].num_components ||
3161 nir_intrinsic_io_xfb(intr).out[1].num_components ||
3162 nir_intrinsic_io_xfb2(intr).out[0].num_components ||
3163 nir_intrinsic_io_xfb2(intr).out[1].num_components)
3164 continue;
3165
3166 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3167 unsigned writemask = nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
3168
3169 nir_io_xfb xfb[2];
3170 memset(xfb, 0, sizeof(xfb));
3171
3172 for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
3173 nir_xfb_output_info *out = &nir->xfb_info->outputs[i];
3174 if (out->location == sem.location) {
3175 unsigned xfb_mask = writemask & out->component_mask;
3176
3177 /*fprintf(stdout, "output%u: buffer=%u, offset=%u, location=%u, "
3178 "component_offset=%u, component_mask=0x%x, xfb_mask=0x%x, slots=%u\n",
3179 i, out->buffer,
3180 out->offset,
3181 out->location,
3182 out->component_offset,
3183 out->component_mask,
3184 xfb_mask, sem.num_slots);*/
3185
3186 while (xfb_mask) {
3187 int start, count;
3188 u_bit_scan_consecutive_range(&xfb_mask, &start, &count);
3189
3190 xfb[start / 2].out[start % 2].num_components = count;
3191 xfb[start / 2].out[start % 2].buffer = out->buffer;
3192 /* out->offset is relative to the first stored xfb component */
3193 /* start is relative to component 0 */
3194 xfb[start / 2].out[start % 2].offset =
3195 out->offset / 4 - out->component_offset + start;
3196
3197 progress = true;
3198 }
3199 }
3200 }
3201
3202 nir_intrinsic_set_io_xfb(intr, xfb[0]);
3203 nir_intrinsic_set_io_xfb2(intr, xfb[1]);
3204 }
3205 }
3206
3207 nir_metadata_preserve(impl, nir_metadata_all);
3208 return progress;
3209 }
3210
3211 static int
type_size_vec4(const struct glsl_type * type,bool bindless)3212 type_size_vec4(const struct glsl_type *type, bool bindless)
3213 {
3214 return glsl_count_attribute_slots(type, false);
3215 }
3216
3217 /**
3218 * This runs all compiler passes needed to lower IO, lower indirect IO access,
3219 * set transform feedback info in IO intrinsics, and clean up the IR.
3220 *
3221 * \param renumber_vs_inputs
3222 * Set to true if holes between VS inputs should be removed, which is safe
3223 * to do in any shader linker that can handle that. Set to false if you want
3224 * to keep holes between VS inputs, which is recommended to do in gallium
3225 * drivers so as not to break the mapping of vertex elements to VS inputs
3226 * expected by gallium frontends.
3227 */
3228 void
nir_lower_io_passes(nir_shader * nir,bool renumber_vs_inputs)3229 nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
3230 {
3231 if (nir->info.stage == MESA_SHADER_COMPUTE)
3232 return;
3233
3234 bool has_indirect_inputs =
3235 (nir->options->support_indirect_inputs >> nir->info.stage) & 0x1;
3236
3237 /* Transform feedback requires that indirect outputs are lowered. */
3238 bool has_indirect_outputs =
3239 (nir->options->support_indirect_outputs >> nir->info.stage) & 0x1 &&
3240 nir->xfb_info == NULL;
3241
3242 /* TODO: Sorting variables by location is required due to some bug
3243 * in nir_lower_io_to_temporaries. If variables are not sorted,
3244 * dEQP-GLES31.functional.separate_shader.random.0 fails.
3245 *
3246 * This isn't needed if nir_assign_io_var_locations is called because it
3247 * also sorts variables. However, if IO is lowered sooner than that, we
3248 * must sort explicitly here to get what nir_assign_io_var_locations does.
3249 */
3250 unsigned varying_var_mask =
3251 (nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0) |
3252 (nir->info.stage != MESA_SHADER_FRAGMENT ? nir_var_shader_out : 0);
3253 nir_sort_variables_by_location(nir, varying_var_mask);
3254
3255 if (!has_indirect_inputs || !has_indirect_outputs) {
3256 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
3257 nir_shader_get_entrypoint(nir), !has_indirect_outputs,
3258 !has_indirect_inputs);
3259
3260 /* We need to lower all the copy_deref's introduced by lower_io_to-
3261 * _temporaries before calling nir_lower_io.
3262 */
3263 NIR_PASS_V(nir, nir_split_var_copies);
3264 NIR_PASS_V(nir, nir_lower_var_copies);
3265 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
3266 }
3267
3268 /* The correct lower_64bit_to_32 flag is required by st/mesa depending
3269 * on whether the GLSL linker lowers IO or not. Setting the wrong flag
3270 * would break 64-bit vertex attribs for GLSL.
3271 */
3272 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
3273 type_size_vec4,
3274 renumber_vs_inputs ? nir_lower_io_lower_64bit_to_32_new :
3275 nir_lower_io_lower_64bit_to_32);
3276
3277 /* nir_io_add_const_offset_to_base needs actual constants. */
3278 NIR_PASS_V(nir, nir_opt_constant_folding);
3279 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
3280
3281 /* Lower and remove dead derefs and variables to clean up the IR. */
3282 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3283 NIR_PASS_V(nir, nir_opt_dce);
3284 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
3285
3286 /* If IO is lowered before var->data.driver_location is assigned, driver
3287 * locations are all 0, which means IO bases are all 0. It's not necessary
3288 * to set driver_location before lowering IO because the only thing that
3289 * identifies outputs is their semantic, and IO bases can always be
3290 * computed from the semantics.
3291 *
3292 * This assigns IO bases from scratch, using IO semantics to tell which
3293 * intrinsics refer to the same IO. If the bases already exist, they
3294 * will be reassigned, sorted by the semantic, and all holes removed.
3295 * This kind of canonicalizes all bases.
3296 *
3297 * This must be done after DCE to remove dead load_input intrinsics.
3298 */
3299 NIR_PASS_V(nir, nir_recompute_io_bases,
3300 (nir->info.stage != MESA_SHADER_VERTEX || renumber_vs_inputs ?
3301 nir_var_shader_in : 0) | nir_var_shader_out);
3302
3303 if (nir->xfb_info)
3304 NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
3305
3306 if (nir->options->lower_mediump_io)
3307 nir->options->lower_mediump_io(nir);
3308
3309 nir->info.io_lowered = true;
3310 }
3311