1 /*
2 * Copyright © 2010 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "brw_fs.h"
7 #include "brw_fs_builder.h"
8 #include "brw_fs_live_variables.h"
9 #include "brw_nir.h"
10 #include "brw_cfg.h"
11 #include "brw_private.h"
12 #include "intel_nir.h"
13 #include "shader_enums.h"
14 #include "dev/intel_debug.h"
15 #include "dev/intel_wa.h"
16
17 #include <memory>
18
19 using namespace brw;
20
21 static void
fill_push_const_block_info(struct brw_push_const_block * block,unsigned dwords)22 fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
23 {
24 block->dwords = dwords;
25 block->regs = DIV_ROUND_UP(dwords, 8);
26 block->size = block->regs * 32;
27 }
28
29 static void
cs_fill_push_const_info(const struct intel_device_info * devinfo,struct brw_cs_prog_data * cs_prog_data)30 cs_fill_push_const_info(const struct intel_device_info *devinfo,
31 struct brw_cs_prog_data *cs_prog_data)
32 {
33 const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
34 int subgroup_id_index = brw_get_subgroup_id_param_index(devinfo, prog_data);
35
36 /* The thread ID should be stored in the last param dword */
37 assert(subgroup_id_index == -1 ||
38 subgroup_id_index == (int)prog_data->nr_params - 1);
39
40 unsigned cross_thread_dwords, per_thread_dwords;
41 if (subgroup_id_index >= 0) {
42 /* Fill all but the last register with cross-thread payload */
43 cross_thread_dwords = 8 * (subgroup_id_index / 8);
44 per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
45 assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
46 } else {
47 /* Fill all data using cross-thread payload */
48 cross_thread_dwords = prog_data->nr_params;
49 per_thread_dwords = 0u;
50 }
51
52 fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);
53 fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);
54
55 assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||
56 cs_prog_data->push.per_thread.size == 0);
57 assert(cs_prog_data->push.cross_thread.dwords +
58 cs_prog_data->push.per_thread.dwords ==
59 prog_data->nr_params);
60 }
61
62 static bool
run_cs(fs_visitor & s,bool allow_spilling)63 run_cs(fs_visitor &s, bool allow_spilling)
64 {
65 assert(gl_shader_stage_is_compute(s.stage));
66 const fs_builder bld = fs_builder(&s).at_end();
67
68 s.payload_ = new cs_thread_payload(s);
69
70 if (s.devinfo->platform == INTEL_PLATFORM_HSW && s.prog_data->total_shared > 0) {
71 /* Move SLM index from g0.0[27:24] to sr0.1[11:8] */
72 const fs_builder abld = bld.exec_all().group(1, 0);
73 abld.MOV(retype(brw_sr0_reg(1), BRW_TYPE_UW),
74 suboffset(retype(brw_vec1_grf(0, 0), BRW_TYPE_UW), 1));
75 }
76
77 nir_to_brw(&s);
78
79 if (s.failed)
80 return false;
81
82 s.emit_cs_terminate();
83
84 brw_calculate_cfg(s);
85
86 brw_fs_optimize(s);
87
88 s.assign_curb_setup();
89
90 brw_fs_lower_3src_null_dest(s);
91 brw_fs_workaround_memory_fence_before_eot(s);
92 brw_fs_workaround_emit_dummy_mov_instruction(s);
93
94 brw_allocate_registers(s, allow_spilling);
95
96 return !s.failed;
97 }
98
99 const unsigned *
brw_compile_cs(const struct brw_compiler * compiler,struct brw_compile_cs_params * params)100 brw_compile_cs(const struct brw_compiler *compiler,
101 struct brw_compile_cs_params *params)
102 {
103 const nir_shader *nir = params->base.nir;
104 const struct brw_cs_prog_key *key = params->key;
105 struct brw_cs_prog_data *prog_data = params->prog_data;
106
107 const bool debug_enabled =
108 brw_should_print_shader(nir, params->base.debug_flag ?
109 params->base.debug_flag : DEBUG_CS);
110
111 prog_data->base.stage = MESA_SHADER_COMPUTE;
112 prog_data->base.total_shared = nir->info.shared_size;
113 prog_data->base.ray_queries = nir->info.ray_queries;
114 prog_data->base.total_scratch = 0;
115
116 if (!nir->info.workgroup_size_variable) {
117 prog_data->local_size[0] = nir->info.workgroup_size[0];
118 prog_data->local_size[1] = nir->info.workgroup_size[1];
119 prog_data->local_size[2] = nir->info.workgroup_size[2];
120 }
121
122 brw_simd_selection_state simd_state{
123 .devinfo = compiler->devinfo,
124 .prog_data = prog_data,
125 .required_width = brw_required_dispatch_width(&nir->info),
126 };
127
128 std::unique_ptr<fs_visitor> v[3];
129
130 for (unsigned simd = 0; simd < 3; simd++) {
131 if (!brw_simd_should_compile(simd_state, simd))
132 continue;
133
134 const unsigned dispatch_width = 8u << simd;
135
136 nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir);
137 brw_nir_apply_key(shader, compiler, &key->base,
138 dispatch_width);
139
140 NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width);
141
142 /* Clean up after the local index and ID calculations. */
143 NIR_PASS(_, shader, nir_opt_constant_folding);
144 NIR_PASS(_, shader, nir_opt_dce);
145
146 brw_postprocess_nir(shader, compiler, debug_enabled,
147 key->base.robust_flags);
148
149 v[simd] = std::make_unique<fs_visitor>(compiler, ¶ms->base,
150 &key->base,
151 &prog_data->base,
152 shader, dispatch_width,
153 params->base.stats != NULL,
154 debug_enabled);
155
156 const int first = brw_simd_first_compiled(simd_state);
157 if (first >= 0)
158 v[simd]->import_uniforms(v[first].get());
159
160 const bool allow_spilling = first < 0 || nir->info.workgroup_size_variable;
161
162 if (run_cs(*v[simd], allow_spilling)) {
163 cs_fill_push_const_info(compiler->devinfo, prog_data);
164
165 brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
166 } else {
167 simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx, v[simd]->fail_msg);
168 if (simd > 0) {
169 brw_shader_perf_log(compiler, params->base.log_data,
170 "SIMD%u shader failed to compile: %s\n",
171 dispatch_width, v[simd]->fail_msg);
172 }
173 }
174 }
175
176 const int selected_simd = brw_simd_select(simd_state);
177 if (selected_simd < 0) {
178 params->base.error_str =
179 ralloc_asprintf(params->base.mem_ctx,
180 "Can't compile shader: "
181 "SIMD8 '%s', SIMD16 '%s' and SIMD32 '%s'.\n",
182 simd_state.error[0], simd_state.error[1],
183 simd_state.error[2]);
184 return NULL;
185 }
186
187 assert(selected_simd < 3);
188
189 if (!nir->info.workgroup_size_variable)
190 prog_data->prog_mask = 1 << selected_simd;
191
192 fs_generator g(compiler, ¶ms->base, &prog_data->base,
193 MESA_SHADER_COMPUTE);
194 if (unlikely(debug_enabled)) {
195 char *name = ralloc_asprintf(params->base.mem_ctx,
196 "%s compute shader %s",
197 nir->info.label ?
198 nir->info.label : "unnamed",
199 nir->info.name);
200 g.enable_debug(name);
201 }
202
203 uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1);
204
205 struct brw_compile_stats *stats = params->base.stats;
206 for (unsigned simd = 0; simd < 3; simd++) {
207 if (prog_data->prog_mask & (1u << simd)) {
208 assert(v[simd]);
209 prog_data->prog_offset[simd] =
210 g.generate_code(v[simd]->cfg, 8u << simd, v[simd]->shader_stats,
211 v[simd]->performance_analysis.require(), stats);
212 if (stats)
213 stats->max_dispatch_width = max_dispatch_width;
214 stats = stats ? stats + 1 : NULL;
215 max_dispatch_width = 8u << simd;
216 }
217 }
218
219 g.add_const_data(nir->constant_data, nir->constant_data_size);
220
221 return g.get_assembly();
222 }
223
224