xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/brw_compile_cs.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2010 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "brw_fs.h"
7 #include "brw_fs_builder.h"
8 #include "brw_fs_live_variables.h"
9 #include "brw_nir.h"
10 #include "brw_cfg.h"
11 #include "brw_private.h"
12 #include "intel_nir.h"
13 #include "shader_enums.h"
14 #include "dev/intel_debug.h"
15 #include "dev/intel_wa.h"
16 
17 #include <memory>
18 
19 using namespace brw;
20 
21 static void
fill_push_const_block_info(struct brw_push_const_block * block,unsigned dwords)22 fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
23 {
24    block->dwords = dwords;
25    block->regs = DIV_ROUND_UP(dwords, 8);
26    block->size = block->regs * 32;
27 }
28 
29 static void
cs_fill_push_const_info(const struct intel_device_info * devinfo,struct brw_cs_prog_data * cs_prog_data)30 cs_fill_push_const_info(const struct intel_device_info *devinfo,
31                         struct brw_cs_prog_data *cs_prog_data)
32 {
33    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
34    int subgroup_id_index = brw_get_subgroup_id_param_index(devinfo, prog_data);
35 
36    /* The thread ID should be stored in the last param dword */
37    assert(subgroup_id_index == -1 ||
38           subgroup_id_index == (int)prog_data->nr_params - 1);
39 
40    unsigned cross_thread_dwords, per_thread_dwords;
41    if (subgroup_id_index >= 0) {
42       /* Fill all but the last register with cross-thread payload */
43       cross_thread_dwords = 8 * (subgroup_id_index / 8);
44       per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
45       assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
46    } else {
47       /* Fill all data using cross-thread payload */
48       cross_thread_dwords = prog_data->nr_params;
49       per_thread_dwords = 0u;
50    }
51 
52    fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);
53    fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);
54 
55    assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||
56           cs_prog_data->push.per_thread.size == 0);
57    assert(cs_prog_data->push.cross_thread.dwords +
58           cs_prog_data->push.per_thread.dwords ==
59              prog_data->nr_params);
60 }
61 
62 static bool
run_cs(fs_visitor & s,bool allow_spilling)63 run_cs(fs_visitor &s, bool allow_spilling)
64 {
65    assert(gl_shader_stage_is_compute(s.stage));
66    const fs_builder bld = fs_builder(&s).at_end();
67 
68    s.payload_ = new cs_thread_payload(s);
69 
70    if (s.devinfo->platform == INTEL_PLATFORM_HSW && s.prog_data->total_shared > 0) {
71       /* Move SLM index from g0.0[27:24] to sr0.1[11:8] */
72       const fs_builder abld = bld.exec_all().group(1, 0);
73       abld.MOV(retype(brw_sr0_reg(1), BRW_TYPE_UW),
74                suboffset(retype(brw_vec1_grf(0, 0), BRW_TYPE_UW), 1));
75    }
76 
77    nir_to_brw(&s);
78 
79    if (s.failed)
80       return false;
81 
82    s.emit_cs_terminate();
83 
84    brw_calculate_cfg(s);
85 
86    brw_fs_optimize(s);
87 
88    s.assign_curb_setup();
89 
90    brw_fs_lower_3src_null_dest(s);
91    brw_fs_workaround_memory_fence_before_eot(s);
92    brw_fs_workaround_emit_dummy_mov_instruction(s);
93 
94    brw_allocate_registers(s, allow_spilling);
95 
96    return !s.failed;
97 }
98 
99 const unsigned *
brw_compile_cs(const struct brw_compiler * compiler,struct brw_compile_cs_params * params)100 brw_compile_cs(const struct brw_compiler *compiler,
101                struct brw_compile_cs_params *params)
102 {
103    const nir_shader *nir = params->base.nir;
104    const struct brw_cs_prog_key *key = params->key;
105    struct brw_cs_prog_data *prog_data = params->prog_data;
106 
107    const bool debug_enabled =
108       brw_should_print_shader(nir, params->base.debug_flag ?
109                                    params->base.debug_flag : DEBUG_CS);
110 
111    prog_data->base.stage = MESA_SHADER_COMPUTE;
112    prog_data->base.total_shared = nir->info.shared_size;
113    prog_data->base.ray_queries = nir->info.ray_queries;
114    prog_data->base.total_scratch = 0;
115 
116    if (!nir->info.workgroup_size_variable) {
117       prog_data->local_size[0] = nir->info.workgroup_size[0];
118       prog_data->local_size[1] = nir->info.workgroup_size[1];
119       prog_data->local_size[2] = nir->info.workgroup_size[2];
120    }
121 
122    brw_simd_selection_state simd_state{
123       .devinfo = compiler->devinfo,
124       .prog_data = prog_data,
125       .required_width = brw_required_dispatch_width(&nir->info),
126    };
127 
128    std::unique_ptr<fs_visitor> v[3];
129 
130    for (unsigned simd = 0; simd < 3; simd++) {
131       if (!brw_simd_should_compile(simd_state, simd))
132          continue;
133 
134       const unsigned dispatch_width = 8u << simd;
135 
136       nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir);
137       brw_nir_apply_key(shader, compiler, &key->base,
138                         dispatch_width);
139 
140       NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width);
141 
142       /* Clean up after the local index and ID calculations. */
143       NIR_PASS(_, shader, nir_opt_constant_folding);
144       NIR_PASS(_, shader, nir_opt_dce);
145 
146       brw_postprocess_nir(shader, compiler, debug_enabled,
147                           key->base.robust_flags);
148 
149       v[simd] = std::make_unique<fs_visitor>(compiler, &params->base,
150                                              &key->base,
151                                              &prog_data->base,
152                                              shader, dispatch_width,
153                                              params->base.stats != NULL,
154                                              debug_enabled);
155 
156       const int first = brw_simd_first_compiled(simd_state);
157       if (first >= 0)
158          v[simd]->import_uniforms(v[first].get());
159 
160       const bool allow_spilling = first < 0 || nir->info.workgroup_size_variable;
161 
162       if (run_cs(*v[simd], allow_spilling)) {
163          cs_fill_push_const_info(compiler->devinfo, prog_data);
164 
165          brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
166       } else {
167          simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx, v[simd]->fail_msg);
168          if (simd > 0) {
169             brw_shader_perf_log(compiler, params->base.log_data,
170                                 "SIMD%u shader failed to compile: %s\n",
171                                 dispatch_width, v[simd]->fail_msg);
172          }
173       }
174    }
175 
176    const int selected_simd = brw_simd_select(simd_state);
177    if (selected_simd < 0) {
178       params->base.error_str =
179          ralloc_asprintf(params->base.mem_ctx,
180                          "Can't compile shader: "
181                          "SIMD8 '%s', SIMD16 '%s' and SIMD32 '%s'.\n",
182                          simd_state.error[0], simd_state.error[1],
183                          simd_state.error[2]);
184       return NULL;
185    }
186 
187    assert(selected_simd < 3);
188 
189    if (!nir->info.workgroup_size_variable)
190       prog_data->prog_mask = 1 << selected_simd;
191 
192    fs_generator g(compiler, &params->base, &prog_data->base,
193                   MESA_SHADER_COMPUTE);
194    if (unlikely(debug_enabled)) {
195       char *name = ralloc_asprintf(params->base.mem_ctx,
196                                    "%s compute shader %s",
197                                    nir->info.label ?
198                                    nir->info.label : "unnamed",
199                                    nir->info.name);
200       g.enable_debug(name);
201    }
202 
203    uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1);
204 
205    struct brw_compile_stats *stats = params->base.stats;
206    for (unsigned simd = 0; simd < 3; simd++) {
207       if (prog_data->prog_mask & (1u << simd)) {
208          assert(v[simd]);
209          prog_data->prog_offset[simd] =
210             g.generate_code(v[simd]->cfg, 8u << simd, v[simd]->shader_stats,
211                             v[simd]->performance_analysis.require(), stats);
212          if (stats)
213             stats->max_dispatch_width = max_dispatch_width;
214          stats = stats ? stats + 1 : NULL;
215          max_dispatch_width = 8u << simd;
216       }
217    }
218 
219    g.add_const_data(nir->constant_data, nir->constant_data_size);
220 
221    return g.get_assembly();
222 }
223 
224