xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_divergence_analysis.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "nir.h"
26 
27 /* This pass computes for each ssa definition if it is uniform.
28  * That is, the variable has the same value for all invocations
29  * of the group.
30  *
31  * This divergence analysis pass expects the shader to be in LCSSA-form.
32  *
33  * This algorithm implements "The Simple Divergence Analysis" from
34  * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35  * Divergence Analysis.  ACM Transactions on Programming Languages and Systems (TOPLAS),
36  * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37  */
38 
39 struct divergence_state {
40    const gl_shader_stage stage;
41    nir_shader *shader;
42 
43    /* Whether the caller requested vertex divergence (meaning between vertices
44     * of the same primitive) instead of subgroup invocation divergence
45     * (between invocations of the same subgroup). For example, patch input
46     * loads are always convergent, while subgroup intrinsics are divergent
47     * because vertices of the same primitive can be processed by different
48     * subgroups.
49     */
50    bool vertex_divergence;
51 
52    /** current control flow state */
53    /* True if some loop-active invocations might take a different control-flow path.
54     * A divergent break does not cause subsequent control-flow to be considered
55     * divergent because those invocations are no longer active in the loop.
56     * For a divergent if, both sides are considered divergent flow because
57     * the other side is still loop-active. */
58    bool divergent_loop_cf;
59    /* True if a divergent continue happened since the loop header */
60    bool divergent_loop_continue;
61    /* True if a divergent break happened since the loop header */
62    bool divergent_loop_break;
63 
64    /* True if we visit the block for the fist time */
65    bool first_visit;
66 };
67 
68 static bool
69 visit_cf_list(struct exec_list *list, struct divergence_state *state);
70 
71 static bool
visit_alu(nir_alu_instr * instr,struct divergence_state * state)72 visit_alu(nir_alu_instr *instr, struct divergence_state *state)
73 {
74    if (instr->def.divergent)
75       return false;
76 
77    unsigned num_src = nir_op_infos[instr->op].num_inputs;
78 
79    for (unsigned i = 0; i < num_src; i++) {
80       if (instr->src[i].src.ssa->divergent) {
81          instr->def.divergent = true;
82          return true;
83       }
84    }
85 
86    return false;
87 }
88 
89 
90 /* On some HW uniform loads where there is a pending store/atomic from another
91  * wave can "tear" so that different invocations see the pre-store value and
92  * the post-store value even though they are loading from the same location.
93  * This means we have to assume it's not uniform unless it's readonly.
94  *
95  * TODO The Vulkan memory model is much more strict here and requires an
96  * atomic or volatile load for the data race to be valid, which could allow us
97  * to do better if it's in use, however we currently don't have that
98  * information plumbed through.
99  */
100 static bool
load_may_tear(nir_shader * shader,nir_intrinsic_instr * instr)101 load_may_tear(nir_shader *shader, nir_intrinsic_instr *instr)
102 {
103    return (shader->options->divergence_analysis_options &
104            nir_divergence_uniform_load_tears) &&
105           !(nir_intrinsic_access(instr) & ACCESS_NON_WRITEABLE);
106 }
107 
108 static bool
visit_intrinsic(nir_intrinsic_instr * instr,struct divergence_state * state)109 visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
110 {
111    if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
112       return false;
113 
114    if (instr->def.divergent)
115       return false;
116 
117    nir_divergence_options options =
118       state->shader->options->divergence_analysis_options;
119    gl_shader_stage stage = state->stage;
120    bool is_divergent = false;
121    switch (instr->intrinsic) {
122    case nir_intrinsic_shader_clock:
123    case nir_intrinsic_ballot:
124    case nir_intrinsic_ballot_relaxed:
125    case nir_intrinsic_as_uniform:
126    case nir_intrinsic_read_invocation:
127    case nir_intrinsic_read_first_invocation:
128    case nir_intrinsic_read_invocation_cond_ir3:
129    case nir_intrinsic_vote_any:
130    case nir_intrinsic_vote_all:
131    case nir_intrinsic_vote_feq:
132    case nir_intrinsic_vote_ieq:
133    case nir_intrinsic_first_invocation:
134    case nir_intrinsic_last_invocation:
135    case nir_intrinsic_load_subgroup_id:
136    case nir_intrinsic_shared_append_amd:
137    case nir_intrinsic_shared_consume_amd:
138       /* VS/TES/GS invocations of the same primitive can be in different
139        * subgroups, so subgroup ops are always divergent between vertices of
140        * the same primitive.
141        */
142       is_divergent = state->vertex_divergence;
143       break;
144 
145    /* Intrinsics which are always uniform */
146    case nir_intrinsic_load_preamble:
147    case nir_intrinsic_load_push_constant:
148    case nir_intrinsic_load_push_constant_zink:
149    case nir_intrinsic_load_work_dim:
150    case nir_intrinsic_load_num_workgroups:
151    case nir_intrinsic_load_workgroup_size:
152    case nir_intrinsic_load_num_subgroups:
153    case nir_intrinsic_load_ray_launch_size:
154    case nir_intrinsic_load_sbt_base_amd:
155    case nir_intrinsic_load_subgroup_size:
156    case nir_intrinsic_load_subgroup_id_shift_ir3:
157    case nir_intrinsic_load_base_instance:
158    case nir_intrinsic_load_base_vertex:
159    case nir_intrinsic_load_first_vertex:
160    case nir_intrinsic_load_draw_id:
161    case nir_intrinsic_load_is_indexed_draw:
162    case nir_intrinsic_load_viewport_scale:
163    case nir_intrinsic_load_user_clip_plane:
164    case nir_intrinsic_load_viewport_x_scale:
165    case nir_intrinsic_load_viewport_y_scale:
166    case nir_intrinsic_load_viewport_z_scale:
167    case nir_intrinsic_load_viewport_offset:
168    case nir_intrinsic_load_viewport_x_offset:
169    case nir_intrinsic_load_viewport_y_offset:
170    case nir_intrinsic_load_viewport_z_offset:
171    case nir_intrinsic_load_viewport_xy_scale_and_offset:
172    case nir_intrinsic_load_blend_const_color_a_float:
173    case nir_intrinsic_load_blend_const_color_b_float:
174    case nir_intrinsic_load_blend_const_color_g_float:
175    case nir_intrinsic_load_blend_const_color_r_float:
176    case nir_intrinsic_load_blend_const_color_rgba:
177    case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
178    case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
179    case nir_intrinsic_load_line_width:
180    case nir_intrinsic_load_aa_line_width:
181    case nir_intrinsic_load_xfb_address:
182    case nir_intrinsic_load_num_vertices:
183    case nir_intrinsic_load_fb_layers_v3d:
184    case nir_intrinsic_load_fep_w_v3d:
185    case nir_intrinsic_load_tcs_num_patches_amd:
186    case nir_intrinsic_load_tcs_tess_levels_to_tes_amd:
187    case nir_intrinsic_load_tcs_primitive_mode_amd:
188    case nir_intrinsic_load_patch_vertices_in:
189    case nir_intrinsic_load_ring_tess_factors_amd:
190    case nir_intrinsic_load_ring_tess_offchip_amd:
191    case nir_intrinsic_load_ring_tess_factors_offset_amd:
192    case nir_intrinsic_load_ring_tess_offchip_offset_amd:
193    case nir_intrinsic_load_ring_mesh_scratch_amd:
194    case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
195    case nir_intrinsic_load_ring_esgs_amd:
196    case nir_intrinsic_load_ring_es2gs_offset_amd:
197    case nir_intrinsic_load_ring_task_draw_amd:
198    case nir_intrinsic_load_ring_task_payload_amd:
199    case nir_intrinsic_load_sample_positions_amd:
200    case nir_intrinsic_load_rasterization_samples_amd:
201    case nir_intrinsic_load_ring_gsvs_amd:
202    case nir_intrinsic_load_ring_gs2vs_offset_amd:
203    case nir_intrinsic_load_streamout_config_amd:
204    case nir_intrinsic_load_streamout_write_index_amd:
205    case nir_intrinsic_load_streamout_offset_amd:
206    case nir_intrinsic_load_task_ring_entry_amd:
207    case nir_intrinsic_load_ring_attr_amd:
208    case nir_intrinsic_load_ring_attr_offset_amd:
209    case nir_intrinsic_load_provoking_vtx_amd:
210    case nir_intrinsic_load_sample_positions_pan:
211    case nir_intrinsic_load_workgroup_num_input_vertices_amd:
212    case nir_intrinsic_load_workgroup_num_input_primitives_amd:
213    case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
214    case nir_intrinsic_load_prim_gen_query_enabled_amd:
215    case nir_intrinsic_load_prim_xfb_query_enabled_amd:
216    case nir_intrinsic_load_merged_wave_info_amd:
217    case nir_intrinsic_load_clamp_vertex_color_amd:
218    case nir_intrinsic_load_cull_front_face_enabled_amd:
219    case nir_intrinsic_load_cull_back_face_enabled_amd:
220    case nir_intrinsic_load_cull_ccw_amd:
221    case nir_intrinsic_load_cull_small_primitives_enabled_amd:
222    case nir_intrinsic_load_cull_any_enabled_amd:
223    case nir_intrinsic_load_cull_small_prim_precision_amd:
224    case nir_intrinsic_load_user_data_amd:
225    case nir_intrinsic_load_force_vrs_rates_amd:
226    case nir_intrinsic_load_tess_level_inner_default:
227    case nir_intrinsic_load_tess_level_outer_default:
228    case nir_intrinsic_load_scalar_arg_amd:
229    case nir_intrinsic_load_smem_amd:
230    case nir_intrinsic_load_resume_shader_address_amd:
231    case nir_intrinsic_load_reloc_const_intel:
232    case nir_intrinsic_load_btd_global_arg_addr_intel:
233    case nir_intrinsic_load_btd_local_arg_addr_intel:
234    case nir_intrinsic_load_mesh_inline_data_intel:
235    case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
236    case nir_intrinsic_load_lshs_vertex_stride_amd:
237    case nir_intrinsic_load_esgs_vertex_stride_amd:
238    case nir_intrinsic_load_hs_out_patch_data_offset_amd:
239    case nir_intrinsic_load_clip_half_line_width_amd:
240    case nir_intrinsic_load_num_vertices_per_primitive_amd:
241    case nir_intrinsic_load_streamout_buffer_amd:
242    case nir_intrinsic_load_ordered_id_amd:
243    case nir_intrinsic_load_gs_wave_id_amd:
244    case nir_intrinsic_load_provoking_vtx_in_prim_amd:
245    case nir_intrinsic_load_lds_ngg_scratch_base_amd:
246    case nir_intrinsic_load_lds_ngg_gs_out_vertex_base_amd:
247    case nir_intrinsic_load_btd_shader_type_intel:
248    case nir_intrinsic_load_base_global_invocation_id:
249    case nir_intrinsic_load_base_workgroup_id:
250    case nir_intrinsic_load_alpha_reference_amd:
251    case nir_intrinsic_load_ubo_uniform_block_intel:
252    case nir_intrinsic_load_ssbo_uniform_block_intel:
253    case nir_intrinsic_load_shared_uniform_block_intel:
254    case nir_intrinsic_load_barycentric_optimize_amd:
255    case nir_intrinsic_load_poly_line_smooth_enabled:
256    case nir_intrinsic_load_rasterization_primitive_amd:
257    case nir_intrinsic_unit_test_uniform_amd:
258    case nir_intrinsic_load_global_constant_uniform_block_intel:
259    case nir_intrinsic_load_debug_log_desc_amd:
260    case nir_intrinsic_load_xfb_state_address_gfx12_amd:
261    case nir_intrinsic_cmat_length:
262    case nir_intrinsic_load_vs_primitive_stride_ir3:
263    case nir_intrinsic_load_vs_vertex_stride_ir3:
264    case nir_intrinsic_load_hs_patch_stride_ir3:
265    case nir_intrinsic_load_tess_factor_base_ir3:
266    case nir_intrinsic_load_tess_param_base_ir3:
267    case nir_intrinsic_load_primitive_location_ir3:
268    case nir_intrinsic_preamble_start_ir3:
269    case nir_intrinsic_optimization_barrier_sgpr_amd:
270    case nir_intrinsic_load_printf_buffer_address:
271    case nir_intrinsic_load_printf_base_identifier:
272    case nir_intrinsic_load_core_id_agx:
273    case nir_intrinsic_load_samples_log2_agx:
274    case nir_intrinsic_load_active_subgroup_count_agx:
275    case nir_intrinsic_load_constant_base_ptr:
276       is_divergent = false;
277       break;
278 
279    /* This is divergent because it specifically loads sequential values into
280     * successive SIMD lanes.
281     */
282    case nir_intrinsic_load_global_block_intel:
283       is_divergent = true;
284       break;
285 
286    case nir_intrinsic_decl_reg:
287       is_divergent = nir_intrinsic_divergent(instr);
288       break;
289 
290    /* Intrinsics with divergence depending on shader stage and hardware */
291    case nir_intrinsic_load_shader_record_ptr:
292       is_divergent = !(options & nir_divergence_shader_record_ptr_uniform);
293       break;
294    case nir_intrinsic_load_frag_shading_rate:
295       is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
296       break;
297    case nir_intrinsic_load_input:
298    case nir_intrinsic_load_per_primitive_input:
299       is_divergent = instr->src[0].ssa->divergent;
300 
301       if (stage == MESA_SHADER_FRAGMENT) {
302          is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
303       } else if (stage == MESA_SHADER_TESS_EVAL) {
304          /* Patch input loads are uniform between vertices of the same primitive. */
305          if (state->vertex_divergence)
306             is_divergent = false;
307          else
308             is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
309       } else {
310          is_divergent = true;
311       }
312       break;
313    case nir_intrinsic_load_per_vertex_input:
314       is_divergent = instr->src[0].ssa->divergent ||
315                      instr->src[1].ssa->divergent;
316       if (stage == MESA_SHADER_TESS_CTRL)
317          is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
318       if (stage == MESA_SHADER_TESS_EVAL)
319          is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
320       else
321          is_divergent = true;
322       break;
323    case nir_intrinsic_load_input_vertex:
324       is_divergent = instr->src[1].ssa->divergent;
325       assert(stage == MESA_SHADER_FRAGMENT);
326       is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
327       break;
328    case nir_intrinsic_load_output:
329       is_divergent = instr->src[0].ssa->divergent;
330       switch (stage) {
331       case MESA_SHADER_TESS_CTRL:
332          is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
333          break;
334       case MESA_SHADER_FRAGMENT:
335          is_divergent = true;
336          break;
337       case MESA_SHADER_TASK:
338       case MESA_SHADER_MESH:
339          /* NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs).
340           * Divergent if src[0] is, so nothing else to do.
341           */
342          break;
343       default:
344          unreachable("Invalid stage for load_output");
345       }
346       break;
347    case nir_intrinsic_load_per_vertex_output:
348       /* TCS and NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs). */
349       assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
350       is_divergent = instr->src[0].ssa->divergent ||
351                      instr->src[1].ssa->divergent ||
352                      (stage == MESA_SHADER_TESS_CTRL &&
353                       !(options & nir_divergence_single_patch_per_tcs_subgroup));
354       break;
355    case nir_intrinsic_load_per_primitive_output:
356       /* NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs). */
357       assert(stage == MESA_SHADER_MESH);
358       is_divergent = instr->src[0].ssa->divergent ||
359                      instr->src[1].ssa->divergent;
360       break;
361    case nir_intrinsic_load_layer_id:
362    case nir_intrinsic_load_front_face:
363    case nir_intrinsic_load_back_face_agx:
364       assert(stage == MESA_SHADER_FRAGMENT || state->shader->info.internal);
365       is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
366       break;
367    case nir_intrinsic_load_view_index:
368       assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
369       if (options & nir_divergence_view_index_uniform)
370          is_divergent = false;
371       else if (stage == MESA_SHADER_FRAGMENT)
372          is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
373       else
374          is_divergent = true;
375       break;
376    case nir_intrinsic_load_fs_input_interp_deltas:
377       assert(stage == MESA_SHADER_FRAGMENT);
378       is_divergent = instr->src[0].ssa->divergent;
379       is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
380       break;
381    case nir_intrinsic_load_instance_id:
382       is_divergent = !state->vertex_divergence;
383       break;
384    case nir_intrinsic_load_primitive_id:
385       if (stage == MESA_SHADER_FRAGMENT)
386          is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
387       else if (stage == MESA_SHADER_TESS_CTRL)
388          is_divergent = !state->vertex_divergence &&
389                         !(options & nir_divergence_single_patch_per_tcs_subgroup);
390       else if (stage == MESA_SHADER_TESS_EVAL)
391          is_divergent = !state->vertex_divergence &&
392                         !(options & nir_divergence_single_patch_per_tes_subgroup);
393       else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
394          is_divergent = !state->vertex_divergence;
395       else if (stage == MESA_SHADER_ANY_HIT ||
396                stage == MESA_SHADER_CLOSEST_HIT ||
397                stage == MESA_SHADER_INTERSECTION)
398          is_divergent = true;
399       else
400          unreachable("Invalid stage for load_primitive_id");
401       break;
402    case nir_intrinsic_load_tess_level_inner:
403    case nir_intrinsic_load_tess_level_outer:
404       if (stage == MESA_SHADER_TESS_CTRL)
405          is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
406       else if (stage == MESA_SHADER_TESS_EVAL)
407          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
408       else
409          unreachable("Invalid stage for load_primitive_tess_level_*");
410       break;
411 
412    case nir_intrinsic_load_workgroup_index:
413    case nir_intrinsic_load_workgroup_id:
414       assert(gl_shader_stage_uses_workgroup(stage) || stage == MESA_SHADER_TESS_CTRL);
415       if (stage == MESA_SHADER_COMPUTE)
416          is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
417       break;
418 
419    /* Clustered reductions are uniform if cluster_size == subgroup_size or
420     * the source is uniform and the operation is invariant.
421     * Inclusive scans are uniform if
422     * the source is uniform and the operation is invariant
423     */
424    case nir_intrinsic_reduce:
425       if (nir_intrinsic_cluster_size(instr) == 0) {
426          /* Cluster size of 0 means the subgroup size.
427           * This is uniform within a subgroup, but divergent between
428           * vertices of the same primitive because they may be in
429           * different subgroups.
430           */
431          is_divergent = state->vertex_divergence;
432          break;
433       }
434       FALLTHROUGH;
435    case nir_intrinsic_inclusive_scan:
436    case nir_intrinsic_inclusive_scan_clusters_ir3: {
437       nir_op op = nir_intrinsic_reduction_op(instr);
438       is_divergent = instr->src[0].ssa->divergent || state->vertex_divergence;
439       if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
440           op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
441           op != nir_op_iand && op != nir_op_ior)
442          is_divergent = true;
443       break;
444    }
445 
446    case nir_intrinsic_reduce_clusters_ir3:
447       /* This reduces the last invocations in all 8-wide clusters. It should
448        * behave the same as reduce with cluster_size == subgroup_size.
449        */
450       is_divergent = state->vertex_divergence;
451       break;
452 
453    case nir_intrinsic_load_ubo:
454    case nir_intrinsic_load_ubo_vec4:
455    case nir_intrinsic_ldc_nv:
456    case nir_intrinsic_ldcx_nv:
457       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
458                      instr->src[1].ssa->divergent;
459       break;
460 
461    case nir_intrinsic_load_ssbo:
462    case nir_intrinsic_load_ssbo_ir3:
463       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
464                      instr->src[1].ssa->divergent ||
465                      load_may_tear(state->shader, instr);
466       break;
467 
468    case nir_intrinsic_load_shared:
469    case nir_intrinsic_load_shared_ir3:
470       is_divergent = instr->src[0].ssa->divergent ||
471          (state->shader->options->divergence_analysis_options &
472           nir_divergence_uniform_load_tears);
473       break;
474 
475    case nir_intrinsic_load_global:
476    case nir_intrinsic_load_global_2x32:
477    case nir_intrinsic_load_global_ir3:
478    case nir_intrinsic_load_deref: {
479       if (load_may_tear(state->shader, instr)) {
480          is_divergent = true;
481          break;
482       }
483 
484       unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
485       for (unsigned i = 0; i < num_srcs; i++) {
486          if (instr->src[i].ssa->divergent) {
487             is_divergent = true;
488             break;
489          }
490       }
491       break;
492    }
493 
494    case nir_intrinsic_get_ssbo_size:
495    case nir_intrinsic_deref_buffer_array_length:
496       is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
497       break;
498 
499    case nir_intrinsic_image_samples_identical:
500    case nir_intrinsic_image_deref_samples_identical:
501    case nir_intrinsic_bindless_image_samples_identical:
502    case nir_intrinsic_image_fragment_mask_load_amd:
503    case nir_intrinsic_image_deref_fragment_mask_load_amd:
504    case nir_intrinsic_bindless_image_fragment_mask_load_amd:
505       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
506                      instr->src[1].ssa->divergent ||
507                      load_may_tear(state->shader, instr);
508       break;
509 
510    case nir_intrinsic_image_texel_address:
511    case nir_intrinsic_image_deref_texel_address:
512    case nir_intrinsic_bindless_image_texel_address:
513       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
514                      instr->src[1].ssa->divergent || instr->src[2].ssa->divergent;
515       break;
516 
517    case nir_intrinsic_image_load:
518    case nir_intrinsic_image_deref_load:
519    case nir_intrinsic_bindless_image_load:
520    case nir_intrinsic_image_sparse_load:
521    case nir_intrinsic_image_deref_sparse_load:
522    case nir_intrinsic_bindless_image_sparse_load:
523       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
524                      instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent ||
525                      load_may_tear(state->shader, instr);
526       break;
527 
528    case nir_intrinsic_optimization_barrier_vgpr_amd:
529       is_divergent = instr->src[0].ssa->divergent;
530       break;
531 
532    /* Intrinsics with divergence depending on sources */
533    case nir_intrinsic_convert_alu_types:
534    case nir_intrinsic_ddx:
535    case nir_intrinsic_ddx_fine:
536    case nir_intrinsic_ddx_coarse:
537    case nir_intrinsic_ddy:
538    case nir_intrinsic_ddy_fine:
539    case nir_intrinsic_ddy_coarse:
540    case nir_intrinsic_ballot_bitfield_extract:
541    case nir_intrinsic_ballot_find_lsb:
542    case nir_intrinsic_ballot_find_msb:
543    case nir_intrinsic_ballot_bit_count_reduce:
544    case nir_intrinsic_rotate:
545    case nir_intrinsic_shuffle_xor:
546    case nir_intrinsic_shuffle_up:
547    case nir_intrinsic_shuffle_down:
548    case nir_intrinsic_quad_broadcast:
549    case nir_intrinsic_quad_swap_horizontal:
550    case nir_intrinsic_quad_swap_vertical:
551    case nir_intrinsic_quad_swap_diagonal:
552    case nir_intrinsic_quad_vote_any:
553    case nir_intrinsic_quad_vote_all:
554    case nir_intrinsic_load_shared2_amd:
555    case nir_intrinsic_load_global_constant:
556    case nir_intrinsic_load_global_amd:
557    case nir_intrinsic_load_uniform:
558    case nir_intrinsic_load_constant:
559    case nir_intrinsic_load_sample_pos_from_id:
560    case nir_intrinsic_load_kernel_input:
561    case nir_intrinsic_load_task_payload:
562    case nir_intrinsic_load_buffer_amd:
563    case nir_intrinsic_load_typed_buffer_amd:
564    case nir_intrinsic_image_levels:
565    case nir_intrinsic_image_deref_levels:
566    case nir_intrinsic_bindless_image_levels:
567    case nir_intrinsic_image_samples:
568    case nir_intrinsic_image_deref_samples:
569    case nir_intrinsic_bindless_image_samples:
570    case nir_intrinsic_image_size:
571    case nir_intrinsic_image_deref_size:
572    case nir_intrinsic_bindless_image_size:
573    case nir_intrinsic_image_descriptor_amd:
574    case nir_intrinsic_image_deref_descriptor_amd:
575    case nir_intrinsic_bindless_image_descriptor_amd:
576    case nir_intrinsic_strict_wqm_coord_amd:
577    case nir_intrinsic_copy_deref:
578    case nir_intrinsic_vulkan_resource_index:
579    case nir_intrinsic_vulkan_resource_reindex:
580    case nir_intrinsic_load_vulkan_descriptor:
581    case nir_intrinsic_atomic_counter_read:
582    case nir_intrinsic_atomic_counter_read_deref:
583    case nir_intrinsic_quad_swizzle_amd:
584    case nir_intrinsic_masked_swizzle_amd:
585    case nir_intrinsic_is_sparse_texels_resident:
586    case nir_intrinsic_is_sparse_resident_zink:
587    case nir_intrinsic_sparse_residency_code_and:
588    case nir_intrinsic_bvh64_intersect_ray_amd:
589    case nir_intrinsic_image_deref_load_param_intel:
590    case nir_intrinsic_image_load_raw_intel:
591    case nir_intrinsic_get_ubo_size:
592    case nir_intrinsic_load_ssbo_address:
593    case nir_intrinsic_load_global_constant_bounded:
594    case nir_intrinsic_load_global_constant_offset:
595    case nir_intrinsic_load_reg:
596    case nir_intrinsic_load_constant_agx:
597    case nir_intrinsic_load_reg_indirect:
598    case nir_intrinsic_load_const_ir3:
599    case nir_intrinsic_load_frag_size_ir3:
600    case nir_intrinsic_load_frag_offset_ir3:
601    case nir_intrinsic_bindless_resource_ir3: {
602       unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
603       for (unsigned i = 0; i < num_srcs; i++) {
604          if (instr->src[i].ssa->divergent) {
605             is_divergent = true;
606             break;
607          }
608       }
609       break;
610    }
611 
612    case nir_intrinsic_resource_intel:
613       /* Not having the non_uniform flag with divergent sources is undefined
614        * behavior. The Intel driver defines it pick the lowest numbered live
615        * SIMD lane (via emit_uniformize).
616        */
617       if ((nir_intrinsic_resource_access_intel(instr) &
618            nir_resource_intel_non_uniform) != 0) {
619          unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
620          for (unsigned i = 0; i < num_srcs; i++) {
621             if (instr->src[i].ssa->divergent) {
622                is_divergent = true;
623                break;
624             }
625          }
626       }
627       break;
628 
629    case nir_intrinsic_shuffle:
630       is_divergent = instr->src[0].ssa->divergent &&
631                      instr->src[1].ssa->divergent;
632       break;
633 
634    /* Intrinsics which are always divergent */
635    case nir_intrinsic_inverse_ballot:
636    case nir_intrinsic_load_color0:
637    case nir_intrinsic_load_color1:
638    case nir_intrinsic_load_param:
639    case nir_intrinsic_load_sample_id:
640    case nir_intrinsic_load_sample_id_no_per_sample:
641    case nir_intrinsic_load_sample_mask_in:
642    case nir_intrinsic_load_interpolated_input:
643    case nir_intrinsic_load_point_coord_maybe_flipped:
644    case nir_intrinsic_load_barycentric_pixel:
645    case nir_intrinsic_load_barycentric_centroid:
646    case nir_intrinsic_load_barycentric_sample:
647    case nir_intrinsic_load_barycentric_model:
648    case nir_intrinsic_load_barycentric_at_sample:
649    case nir_intrinsic_load_barycentric_at_offset:
650    case nir_intrinsic_load_barycentric_at_offset_nv:
651    case nir_intrinsic_load_barycentric_coord_pixel:
652    case nir_intrinsic_load_barycentric_coord_centroid:
653    case nir_intrinsic_load_barycentric_coord_sample:
654    case nir_intrinsic_load_barycentric_coord_at_sample:
655    case nir_intrinsic_load_barycentric_coord_at_offset:
656    case nir_intrinsic_load_persp_center_rhw_ir3:
657    case nir_intrinsic_interp_deref_at_offset:
658    case nir_intrinsic_interp_deref_at_sample:
659    case nir_intrinsic_interp_deref_at_centroid:
660    case nir_intrinsic_interp_deref_at_vertex:
661    case nir_intrinsic_load_tess_coord:
662    case nir_intrinsic_load_tess_coord_xy:
663    case nir_intrinsic_load_point_coord:
664    case nir_intrinsic_load_line_coord:
665    case nir_intrinsic_load_frag_coord:
666    case nir_intrinsic_load_frag_coord_zw:
667    case nir_intrinsic_load_frag_coord_unscaled_ir3:
668    case nir_intrinsic_load_pixel_coord:
669    case nir_intrinsic_load_fully_covered:
670    case nir_intrinsic_load_sample_pos:
671    case nir_intrinsic_load_sample_pos_or_center:
672    case nir_intrinsic_load_vertex_id_zero_base:
673    case nir_intrinsic_load_vertex_id:
674    case nir_intrinsic_load_invocation_id:
675    case nir_intrinsic_load_local_invocation_id:
676    case nir_intrinsic_load_local_invocation_index:
677    case nir_intrinsic_load_global_invocation_id:
678    case nir_intrinsic_load_global_invocation_index:
679    case nir_intrinsic_load_subgroup_invocation:
680    case nir_intrinsic_load_subgroup_eq_mask:
681    case nir_intrinsic_load_subgroup_ge_mask:
682    case nir_intrinsic_load_subgroup_gt_mask:
683    case nir_intrinsic_load_subgroup_le_mask:
684    case nir_intrinsic_load_subgroup_lt_mask:
685    case nir_intrinsic_load_helper_invocation:
686    case nir_intrinsic_is_helper_invocation:
687    case nir_intrinsic_load_scratch:
688    case nir_intrinsic_deref_atomic:
689    case nir_intrinsic_deref_atomic_swap:
690    case nir_intrinsic_ssbo_atomic:
691    case nir_intrinsic_ssbo_atomic_swap:
692    case nir_intrinsic_ssbo_atomic_ir3:
693    case nir_intrinsic_ssbo_atomic_swap_ir3:
694    case nir_intrinsic_image_deref_atomic:
695    case nir_intrinsic_image_deref_atomic_swap:
696    case nir_intrinsic_image_atomic:
697    case nir_intrinsic_image_atomic_swap:
698    case nir_intrinsic_bindless_image_atomic:
699    case nir_intrinsic_bindless_image_atomic_swap:
700    case nir_intrinsic_shared_atomic:
701    case nir_intrinsic_shared_atomic_swap:
702    case nir_intrinsic_task_payload_atomic:
703    case nir_intrinsic_task_payload_atomic_swap:
704    case nir_intrinsic_global_atomic:
705    case nir_intrinsic_global_atomic_swap:
706    case nir_intrinsic_global_atomic_amd:
707    case nir_intrinsic_global_atomic_swap_amd:
708    case nir_intrinsic_global_atomic_2x32:
709    case nir_intrinsic_global_atomic_swap_2x32:
710    case nir_intrinsic_global_atomic_ir3:
711    case nir_intrinsic_global_atomic_swap_ir3:
712    case nir_intrinsic_atomic_counter_add:
713    case nir_intrinsic_atomic_counter_min:
714    case nir_intrinsic_atomic_counter_max:
715    case nir_intrinsic_atomic_counter_and:
716    case nir_intrinsic_atomic_counter_or:
717    case nir_intrinsic_atomic_counter_xor:
718    case nir_intrinsic_atomic_counter_inc:
719    case nir_intrinsic_atomic_counter_pre_dec:
720    case nir_intrinsic_atomic_counter_post_dec:
721    case nir_intrinsic_atomic_counter_exchange:
722    case nir_intrinsic_atomic_counter_comp_swap:
723    case nir_intrinsic_atomic_counter_add_deref:
724    case nir_intrinsic_atomic_counter_min_deref:
725    case nir_intrinsic_atomic_counter_max_deref:
726    case nir_intrinsic_atomic_counter_and_deref:
727    case nir_intrinsic_atomic_counter_or_deref:
728    case nir_intrinsic_atomic_counter_xor_deref:
729    case nir_intrinsic_atomic_counter_inc_deref:
730    case nir_intrinsic_atomic_counter_pre_dec_deref:
731    case nir_intrinsic_atomic_counter_post_dec_deref:
732    case nir_intrinsic_atomic_counter_exchange_deref:
733    case nir_intrinsic_atomic_counter_comp_swap_deref:
734    case nir_intrinsic_exclusive_scan:
735    case nir_intrinsic_exclusive_scan_clusters_ir3:
736    case nir_intrinsic_ballot_bit_count_exclusive:
737    case nir_intrinsic_ballot_bit_count_inclusive:
738    case nir_intrinsic_write_invocation_amd:
739    case nir_intrinsic_mbcnt_amd:
740    case nir_intrinsic_lane_permute_16_amd:
741    case nir_intrinsic_dpp16_shift_amd:
742    case nir_intrinsic_elect:
743    case nir_intrinsic_elect_any_ir3:
744    case nir_intrinsic_load_tlb_color_brcm:
745    case nir_intrinsic_load_tess_rel_patch_id_amd:
746    case nir_intrinsic_load_gs_vertex_offset_amd:
747    case nir_intrinsic_is_subgroup_invocation_lt_amd:
748    case nir_intrinsic_load_packed_passthrough_primitive_amd:
749    case nir_intrinsic_load_initial_edgeflags_amd:
750    case nir_intrinsic_gds_atomic_add_amd:
751    case nir_intrinsic_load_rt_arg_scratch_offset_amd:
752    case nir_intrinsic_load_intersection_opaque_amd:
753    case nir_intrinsic_load_vector_arg_amd:
754    case nir_intrinsic_load_btd_stack_id_intel:
755    case nir_intrinsic_load_topology_id_intel:
756    case nir_intrinsic_load_scratch_base_ptr:
757    case nir_intrinsic_ordered_xfb_counter_add_gfx11_amd:
758    case nir_intrinsic_ordered_add_loop_gfx12_amd:
759    case nir_intrinsic_xfb_counter_sub_gfx11_amd:
760    case nir_intrinsic_unit_test_divergent_amd:
761    case nir_intrinsic_load_stack:
762    case nir_intrinsic_load_ray_launch_id:
763    case nir_intrinsic_load_ray_instance_custom_index:
764    case nir_intrinsic_load_ray_geometry_index:
765    case nir_intrinsic_load_ray_world_direction:
766    case nir_intrinsic_load_ray_world_origin:
767    case nir_intrinsic_load_ray_object_origin:
768    case nir_intrinsic_load_ray_object_direction:
769    case nir_intrinsic_load_ray_t_min:
770    case nir_intrinsic_load_ray_t_max:
771    case nir_intrinsic_load_ray_object_to_world:
772    case nir_intrinsic_load_ray_world_to_object:
773    case nir_intrinsic_load_ray_hit_kind:
774    case nir_intrinsic_load_ray_flags:
775    case nir_intrinsic_load_cull_mask:
776    case nir_intrinsic_load_sysval_nv:
777    case nir_intrinsic_emit_vertex_nv:
778    case nir_intrinsic_end_primitive_nv:
779    case nir_intrinsic_report_ray_intersection:
780    case nir_intrinsic_rq_proceed:
781    case nir_intrinsic_rq_load:
782    case nir_intrinsic_load_ray_triangle_vertex_positions:
783    case nir_intrinsic_cmat_extract:
784    case nir_intrinsic_cmat_muladd_amd:
785    case nir_intrinsic_dpas_intel:
786    case nir_intrinsic_isberd_nv:
787    case nir_intrinsic_al2p_nv:
788    case nir_intrinsic_ald_nv:
789    case nir_intrinsic_ipa_nv:
790    case nir_intrinsic_ldtram_nv:
791    case nir_intrinsic_printf:
792    case nir_intrinsic_load_gs_header_ir3:
793    case nir_intrinsic_load_tcs_header_ir3:
794    case nir_intrinsic_load_rel_patch_id_ir3:
795    case nir_intrinsic_brcst_active_ir3:
796    case nir_intrinsic_load_helper_op_id_agx:
797    case nir_intrinsic_load_helper_arg_lo_agx:
798    case nir_intrinsic_load_helper_arg_hi_agx:
799    case nir_intrinsic_stack_map_agx:
800    case nir_intrinsic_stack_unmap_agx:
801    case nir_intrinsic_load_exported_agx:
802    case nir_intrinsic_load_local_pixel_agx:
803    case nir_intrinsic_load_coefficients_agx:
804    case nir_intrinsic_load_active_subgroup_invocation_agx:
805    case nir_intrinsic_load_sample_mask:
806    case nir_intrinsic_quad_ballot_agx:
807    case nir_intrinsic_load_agx:
808       is_divergent = true;
809       break;
810 
811    default:
812 #ifdef NDEBUG
813       is_divergent = true;
814       break;
815 #else
816       nir_print_instr(&instr->instr, stderr);
817       unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
818 #endif
819    }
820 
821    instr->def.divergent = is_divergent;
822    return is_divergent;
823 }
824 
825 static bool
visit_tex(nir_tex_instr * instr,struct divergence_state * state)826 visit_tex(nir_tex_instr *instr, struct divergence_state *state)
827 {
828    if (instr->def.divergent)
829       return false;
830 
831    bool is_divergent = false;
832 
833    for (unsigned i = 0; i < instr->num_srcs; i++) {
834       switch (instr->src[i].src_type) {
835       case nir_tex_src_sampler_deref:
836       case nir_tex_src_sampler_handle:
837       case nir_tex_src_sampler_offset:
838          is_divergent |= instr->src[i].src.ssa->divergent &&
839                          instr->sampler_non_uniform;
840          break;
841       case nir_tex_src_texture_deref:
842       case nir_tex_src_texture_handle:
843       case nir_tex_src_texture_offset:
844          is_divergent |= instr->src[i].src.ssa->divergent &&
845                          instr->texture_non_uniform;
846          break;
847       default:
848          is_divergent |= instr->src[i].src.ssa->divergent;
849          break;
850       }
851    }
852 
853    instr->def.divergent = is_divergent;
854    return is_divergent;
855 }
856 
857 static bool
visit_def(nir_def * def,struct divergence_state * state)858 visit_def(nir_def *def, struct divergence_state *state)
859 {
860    return false;
861 }
862 
863 static bool
nir_variable_mode_is_uniform(nir_variable_mode mode)864 nir_variable_mode_is_uniform(nir_variable_mode mode)
865 {
866    switch (mode) {
867    case nir_var_uniform:
868    case nir_var_mem_ubo:
869    case nir_var_mem_ssbo:
870    case nir_var_mem_shared:
871    case nir_var_mem_task_payload:
872    case nir_var_mem_global:
873    case nir_var_image:
874       return true;
875    default:
876       return false;
877    }
878 }
879 
880 static bool
nir_variable_is_uniform(nir_shader * shader,nir_variable * var,struct divergence_state * state)881 nir_variable_is_uniform(nir_shader *shader, nir_variable *var,
882                         struct divergence_state *state)
883 {
884    if (nir_variable_mode_is_uniform(var->data.mode))
885       return true;
886 
887    /* Handle system value variables. */
888    if (var->data.mode == nir_var_system_value) {
889       /* Fake the instruction to reuse visit_intrinsic for all sysvals. */
890       nir_intrinsic_instr fake_instr;
891 
892       memset(&fake_instr, 0, sizeof(fake_instr));
893       fake_instr.intrinsic =
894          nir_intrinsic_from_system_value(var->data.location);
895 
896       visit_intrinsic(&fake_instr, state);
897       return !fake_instr.def.divergent;
898    }
899 
900    nir_divergence_options options = shader->options->divergence_analysis_options;
901    gl_shader_stage stage = shader->info.stage;
902 
903    if (stage == MESA_SHADER_FRAGMENT &&
904        (options & nir_divergence_single_prim_per_subgroup) &&
905        var->data.mode == nir_var_shader_in &&
906        var->data.interpolation == INTERP_MODE_FLAT)
907       return true;
908 
909    if (stage == MESA_SHADER_TESS_CTRL &&
910        (options & nir_divergence_single_patch_per_tcs_subgroup) &&
911        var->data.mode == nir_var_shader_out && var->data.patch)
912       return true;
913 
914    if (stage == MESA_SHADER_TESS_EVAL &&
915        (options & nir_divergence_single_patch_per_tes_subgroup) &&
916        var->data.mode == nir_var_shader_in && var->data.patch)
917       return true;
918 
919    return false;
920 }
921 
922 static bool
visit_deref(nir_shader * shader,nir_deref_instr * deref,struct divergence_state * state)923 visit_deref(nir_shader *shader, nir_deref_instr *deref,
924             struct divergence_state *state)
925 {
926    if (deref->def.divergent)
927       return false;
928 
929    bool is_divergent = false;
930    switch (deref->deref_type) {
931    case nir_deref_type_var:
932       is_divergent = !nir_variable_is_uniform(shader, deref->var, state);
933       break;
934    case nir_deref_type_array:
935    case nir_deref_type_ptr_as_array:
936       is_divergent = deref->arr.index.ssa->divergent;
937       FALLTHROUGH;
938    case nir_deref_type_struct:
939    case nir_deref_type_array_wildcard:
940       is_divergent |= deref->parent.ssa->divergent;
941       break;
942    case nir_deref_type_cast:
943       is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
944                      deref->parent.ssa->divergent;
945       break;
946    }
947 
948    deref->def.divergent = is_divergent;
949    return is_divergent;
950 }
951 
952 static bool
visit_jump(nir_jump_instr * jump,struct divergence_state * state)953 visit_jump(nir_jump_instr *jump, struct divergence_state *state)
954 {
955    switch (jump->type) {
956    case nir_jump_continue:
957       if (state->divergent_loop_continue)
958          return false;
959       if (state->divergent_loop_cf)
960          state->divergent_loop_continue = true;
961       return state->divergent_loop_continue;
962    case nir_jump_break:
963       if (state->divergent_loop_break)
964          return false;
965       if (state->divergent_loop_cf)
966          state->divergent_loop_break = true;
967       return state->divergent_loop_break;
968    case nir_jump_halt:
969       /* This totally kills invocations so it doesn't add divergence */
970       break;
971    case nir_jump_return:
972       unreachable("NIR divergence analysis: Unsupported return instruction.");
973       break;
974    case nir_jump_goto:
975    case nir_jump_goto_if:
976       unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
977       break;
978    }
979    return false;
980 }
981 
982 static bool
set_ssa_def_not_divergent(nir_def * def,UNUSED void * _state)983 set_ssa_def_not_divergent(nir_def *def, UNUSED void *_state)
984 {
985    def->divergent = false;
986    return true;
987 }
988 
989 static bool
update_instr_divergence(nir_instr * instr,struct divergence_state * state)990 update_instr_divergence(nir_instr *instr, struct divergence_state *state)
991 {
992    switch (instr->type) {
993    case nir_instr_type_alu:
994       return visit_alu(nir_instr_as_alu(instr), state);
995    case nir_instr_type_intrinsic:
996       return visit_intrinsic(nir_instr_as_intrinsic(instr), state);
997    case nir_instr_type_tex:
998       return visit_tex(nir_instr_as_tex(instr), state);
999    case nir_instr_type_load_const:
1000       return visit_def(&nir_instr_as_load_const(instr)->def, state);
1001    case nir_instr_type_undef:
1002       return visit_def(&nir_instr_as_undef(instr)->def, state);
1003    case nir_instr_type_deref:
1004       return visit_deref(state->shader, nir_instr_as_deref(instr), state);
1005    case nir_instr_type_debug_info:
1006       return false;
1007    case nir_instr_type_jump:
1008    case nir_instr_type_phi:
1009    case nir_instr_type_call:
1010    case nir_instr_type_parallel_copy:
1011    default:
1012       unreachable("NIR divergence analysis: Unsupported instruction type.");
1013    }
1014 }
1015 
1016 static bool
visit_block(nir_block * block,struct divergence_state * state)1017 visit_block(nir_block *block, struct divergence_state *state)
1018 {
1019    bool has_changed = false;
1020 
1021    nir_foreach_instr(instr, block) {
1022       /* phis are handled when processing the branches */
1023       if (instr->type == nir_instr_type_phi)
1024          continue;
1025 
1026       if (state->first_visit)
1027          nir_foreach_def(instr, set_ssa_def_not_divergent, NULL);
1028 
1029       if (instr->type == nir_instr_type_jump) {
1030          has_changed |= visit_jump(nir_instr_as_jump(instr), state);
1031       } else {
1032          has_changed |= update_instr_divergence(instr, state);
1033       }
1034    }
1035 
1036    bool divergent = state->divergent_loop_cf ||
1037                     state->divergent_loop_continue ||
1038                     state->divergent_loop_break;
1039    if (divergent != block->divergent) {
1040       block->divergent = divergent;
1041       has_changed = true;
1042    }
1043 
1044    return has_changed;
1045 }
1046 
1047 /* There are 3 types of phi instructions:
1048  * (1) gamma: represent the joining point of different paths
1049  *     created by an “if-then-else” branch.
1050  *     The resulting value is divergent if the branch condition
1051  *     or any of the source values is divergent. */
1052 static bool
visit_if_merge_phi(nir_phi_instr * phi,bool if_cond_divergent)1053 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
1054 {
1055    if (phi->def.divergent)
1056       return false;
1057 
1058    unsigned defined_srcs = 0;
1059    nir_foreach_phi_src(src, phi) {
1060       /* if any source value is divergent, the resulting value is divergent */
1061       if (src->src.ssa->divergent) {
1062          phi->def.divergent = true;
1063          return true;
1064       }
1065       if (src->src.ssa->parent_instr->type != nir_instr_type_undef) {
1066          defined_srcs++;
1067       }
1068    }
1069 
1070    /* if the condition is divergent and two sources defined, the definition is divergent */
1071    if (defined_srcs > 1 && if_cond_divergent) {
1072       phi->def.divergent = true;
1073       return true;
1074    }
1075 
1076    return false;
1077 }
1078 
1079 /* There are 3 types of phi instructions:
1080  * (2) mu: which only exist at loop headers,
1081  *     merge initial and loop-carried values.
1082  *     The resulting value is divergent if any source value
1083  *     is divergent or a divergent loop continue condition
1084  *     is associated with a different ssa-def. */
1085 static bool
visit_loop_header_phi(nir_phi_instr * phi,nir_block * preheader,bool divergent_continue)1086 visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
1087 {
1088    if (phi->def.divergent)
1089       return false;
1090 
1091    nir_def *same = NULL;
1092    nir_foreach_phi_src(src, phi) {
1093       /* if any source value is divergent, the resulting value is divergent */
1094       if (src->src.ssa->divergent) {
1095          phi->def.divergent = true;
1096          return true;
1097       }
1098       /* if this loop is uniform, we're done here */
1099       if (!divergent_continue)
1100          continue;
1101       /* skip the loop preheader */
1102       if (src->pred == preheader)
1103          continue;
1104       /* skip undef values */
1105       if (nir_src_is_undef(src->src))
1106          continue;
1107 
1108       /* check if all loop-carried values are from the same ssa-def */
1109       if (!same)
1110          same = src->src.ssa;
1111       else if (same != src->src.ssa) {
1112          phi->def.divergent = true;
1113          return true;
1114       }
1115    }
1116 
1117    return false;
1118 }
1119 
1120 /* There are 3 types of phi instructions:
1121  * (3) eta: represent values that leave a loop.
1122  *     The resulting value is divergent if the source value is divergent
1123  *     or any loop exit condition is divergent for a value which is
1124  *     not loop-invariant.
1125  *     (note: there should be no phi for loop-invariant variables.) */
1126 static bool
visit_loop_exit_phi(nir_phi_instr * phi,bool divergent_break)1127 visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
1128 {
1129    if (phi->def.divergent)
1130       return false;
1131 
1132    if (divergent_break) {
1133       phi->def.divergent = true;
1134       return true;
1135    }
1136 
1137    /* if any source value is divergent, the resulting value is divergent */
1138    nir_foreach_phi_src(src, phi) {
1139       if (src->src.ssa->divergent) {
1140          phi->def.divergent = true;
1141          return true;
1142       }
1143    }
1144 
1145    return false;
1146 }
1147 
1148 static bool
visit_if(nir_if * if_stmt,struct divergence_state * state)1149 visit_if(nir_if *if_stmt, struct divergence_state *state)
1150 {
1151    bool progress = false;
1152 
1153    struct divergence_state then_state = *state;
1154    then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
1155    progress |= visit_cf_list(&if_stmt->then_list, &then_state);
1156 
1157    struct divergence_state else_state = *state;
1158    else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
1159    progress |= visit_cf_list(&if_stmt->else_list, &else_state);
1160 
1161    /* handle phis after the IF */
1162    nir_foreach_phi(phi, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
1163       if (state->first_visit)
1164          phi->def.divergent = false;
1165       progress |= visit_if_merge_phi(phi, if_stmt->condition.ssa->divergent);
1166    }
1167 
1168    /* join loop divergence information from both branch legs */
1169    state->divergent_loop_continue |= then_state.divergent_loop_continue ||
1170                                      else_state.divergent_loop_continue;
1171    state->divergent_loop_break |= then_state.divergent_loop_break ||
1172                                   else_state.divergent_loop_break;
1173 
1174    /* A divergent continue makes succeeding loop CF divergent:
1175     * not all loop-active invocations participate in the remaining loop-body
1176     * which means that a following break might be taken by some invocations, only */
1177    state->divergent_loop_cf |= state->divergent_loop_continue;
1178 
1179    return progress;
1180 }
1181 
1182 static bool
visit_loop(nir_loop * loop,struct divergence_state * state)1183 visit_loop(nir_loop *loop, struct divergence_state *state)
1184 {
1185    assert(!nir_loop_has_continue_construct(loop));
1186    bool progress = false;
1187    nir_block *loop_header = nir_loop_first_block(loop);
1188    nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
1189 
1190    /* handle loop header phis first: we have no knowledge yet about
1191     * the loop's control flow or any loop-carried sources. */
1192    nir_foreach_phi(phi, loop_header) {
1193       if (!state->first_visit && phi->def.divergent)
1194          continue;
1195 
1196       nir_foreach_phi_src(src, phi) {
1197          if (src->pred == loop_preheader) {
1198             phi->def.divergent = src->src.ssa->divergent;
1199             break;
1200          }
1201       }
1202       progress |= phi->def.divergent;
1203    }
1204 
1205    /* setup loop state */
1206    struct divergence_state loop_state = *state;
1207    loop_state.divergent_loop_cf = false;
1208    loop_state.divergent_loop_continue = false;
1209    loop_state.divergent_loop_break = false;
1210 
1211    /* process loop body until no further changes are made */
1212    bool repeat;
1213    do {
1214       progress |= visit_cf_list(&loop->body, &loop_state);
1215       repeat = false;
1216 
1217       /* revisit loop header phis to see if something has changed */
1218       nir_foreach_phi(phi, loop_header) {
1219          repeat |= visit_loop_header_phi(phi, loop_preheader,
1220                                          loop_state.divergent_loop_continue);
1221       }
1222 
1223       loop_state.divergent_loop_cf = false;
1224       loop_state.first_visit = false;
1225    } while (repeat);
1226 
1227    /* handle phis after the loop */
1228    nir_foreach_phi(phi, nir_cf_node_cf_tree_next(&loop->cf_node)) {
1229       if (state->first_visit)
1230          phi->def.divergent = false;
1231       progress |= visit_loop_exit_phi(phi, loop_state.divergent_loop_break);
1232    }
1233 
1234    loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);
1235 
1236    return progress;
1237 }
1238 
1239 static bool
visit_cf_list(struct exec_list * list,struct divergence_state * state)1240 visit_cf_list(struct exec_list *list, struct divergence_state *state)
1241 {
1242    bool has_changed = false;
1243 
1244    foreach_list_typed(nir_cf_node, node, node, list) {
1245       switch (node->type) {
1246       case nir_cf_node_block:
1247          has_changed |= visit_block(nir_cf_node_as_block(node), state);
1248          break;
1249       case nir_cf_node_if:
1250          has_changed |= visit_if(nir_cf_node_as_if(node), state);
1251          break;
1252       case nir_cf_node_loop:
1253          has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
1254          break;
1255       case nir_cf_node_function:
1256          unreachable("NIR divergence analysis: Unsupported cf_node type.");
1257       }
1258    }
1259 
1260    return has_changed;
1261 }
1262 
1263 void
nir_divergence_analysis(nir_shader * shader)1264 nir_divergence_analysis(nir_shader *shader)
1265 {
1266    shader->info.divergence_analysis_run = true;
1267 
1268    struct divergence_state state = {
1269       .stage = shader->info.stage,
1270       .shader = shader,
1271       .divergent_loop_cf = false,
1272       .divergent_loop_continue = false,
1273       .divergent_loop_break = false,
1274       .first_visit = true,
1275    };
1276 
1277    visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
1278 }
1279 
1280 /* Compute divergence between vertices of the same primitive. This uses
1281  * the same divergent field in nir_def and nir_loop as the regular divergence
1282  * pass.
1283  */
1284 void
nir_vertex_divergence_analysis(nir_shader * shader)1285 nir_vertex_divergence_analysis(nir_shader *shader)
1286 {
1287    shader->info.divergence_analysis_run = false;
1288 
1289    struct divergence_state state = {
1290       .stage = shader->info.stage,
1291       .shader = shader,
1292       .vertex_divergence = true,
1293       .first_visit = true,
1294    };
1295 
1296    visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
1297 }
1298 
1299 bool
nir_update_instr_divergence(nir_shader * shader,nir_instr * instr)1300 nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
1301 {
1302    nir_foreach_def(instr, set_ssa_def_not_divergent, NULL);
1303 
1304    if (instr->type == nir_instr_type_phi) {
1305       nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
1306       /* can only update gamma/if phis */
1307       if (!prev || prev->type != nir_cf_node_if)
1308          return false;
1309 
1310       nir_if *nif = nir_cf_node_as_if(prev);
1311 
1312       visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
1313       return true;
1314    }
1315 
1316    struct divergence_state state = {
1317       .stage = shader->info.stage,
1318       .shader = shader,
1319       .first_visit = true,
1320    };
1321    update_instr_divergence(instr, &state);
1322    return true;
1323 }
1324 
1325 bool
nir_has_divergent_loop(nir_shader * shader)1326 nir_has_divergent_loop(nir_shader *shader)
1327 {
1328    bool divergent_loop = false;
1329    nir_function_impl *func = nir_shader_get_entrypoint(shader);
1330 
1331    foreach_list_typed(nir_cf_node, node, node, &func->body) {
1332       if (node->type == nir_cf_node_loop && nir_cf_node_as_loop(node)->divergent) {
1333          divergent_loop = true;
1334          break;
1335       }
1336    }
1337 
1338    return divergent_loop;
1339 }
1340