1 /*
2 * Copyright © 2006-2022 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "elk_fs.h"
25 #include "elk_fs_builder.h"
26
27 using namespace elk;
28
elk_vs_thread_payload(const elk_fs_visitor & v)29 elk_vs_thread_payload::elk_vs_thread_payload(const elk_fs_visitor &v)
30 {
31 unsigned r = 0;
32
33 /* R0: Thread header. */
34 r += reg_unit(v.devinfo);
35
36 /* R1: URB handles. */
37 urb_handles = elk_ud8_grf(r, 0);
38 r += reg_unit(v.devinfo);
39
40 num_regs = r;
41 }
42
elk_tcs_thread_payload(const elk_fs_visitor & v)43 elk_tcs_thread_payload::elk_tcs_thread_payload(const elk_fs_visitor &v)
44 {
45 struct elk_vue_prog_data *vue_prog_data = elk_vue_prog_data(v.prog_data);
46 struct elk_tcs_prog_data *tcs_prog_data = elk_tcs_prog_data(v.prog_data);
47 struct elk_tcs_prog_key *tcs_key = (struct elk_tcs_prog_key *) v.key;
48
49 if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH) {
50 patch_urb_output = elk_ud1_grf(0, 0);
51 primitive_id = elk_vec1_grf(0, 1);
52
53 /* r1-r4 contain the ICP handles. */
54 icp_handle_start = elk_ud8_grf(1, 0);
55
56 num_regs = 5;
57 } else {
58 assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH);
59 assert(tcs_key->input_vertices <= ELK_MAX_TCS_INPUT_VERTICES);
60
61 unsigned r = 0;
62
63 r += reg_unit(v.devinfo);
64
65 patch_urb_output = elk_ud8_grf(r, 0);
66 r += reg_unit(v.devinfo);
67
68 if (tcs_prog_data->include_primitive_id) {
69 primitive_id = elk_vec8_grf(r, 0);
70 r += reg_unit(v.devinfo);
71 }
72
73 /* ICP handles occupy the next 1-32 registers. */
74 icp_handle_start = elk_ud8_grf(r, 0);
75 r += elk_tcs_prog_key_input_vertices(tcs_key) * reg_unit(v.devinfo);
76
77 num_regs = r;
78 }
79 }
80
elk_tes_thread_payload(const elk_fs_visitor & v)81 elk_tes_thread_payload::elk_tes_thread_payload(const elk_fs_visitor &v)
82 {
83 unsigned r = 0;
84
85 /* R0: Thread Header. */
86 patch_urb_input = retype(elk_vec1_grf(0, 0), ELK_REGISTER_TYPE_UD);
87 primitive_id = elk_vec1_grf(0, 1);
88 r += reg_unit(v.devinfo);
89
90 /* R1-3: gl_TessCoord.xyz. */
91 for (unsigned i = 0; i < 3; i++) {
92 coords[i] = elk_vec8_grf(r, 0);
93 r += reg_unit(v.devinfo);
94 }
95
96 /* R4: URB output handles. */
97 urb_output = elk_ud8_grf(r, 0);
98 r += reg_unit(v.devinfo);
99
100 num_regs = r;
101 }
102
elk_gs_thread_payload(elk_fs_visitor & v)103 elk_gs_thread_payload::elk_gs_thread_payload(elk_fs_visitor &v)
104 {
105 struct elk_vue_prog_data *vue_prog_data = elk_vue_prog_data(v.prog_data);
106 struct elk_gs_prog_data *gs_prog_data = elk_gs_prog_data(v.prog_data);
107 const fs_builder bld = fs_builder(&v).at_end();
108
109 /* R0: thread header. */
110 unsigned r = reg_unit(v.devinfo);
111
112 /* R1: output URB handles. */
113 urb_handles = bld.vgrf(ELK_REGISTER_TYPE_UD);
114 bld.AND(urb_handles, elk_ud8_grf(r, 0), elk_imm_ud(0xFFFF));
115
116 /* R1: Instance ID stored in bits 31:27 */
117 instance_id = bld.vgrf(ELK_REGISTER_TYPE_UD);
118 bld.SHR(instance_id, elk_ud8_grf(r, 0), elk_imm_ud(27u));
119
120 r += reg_unit(v.devinfo);
121
122 if (gs_prog_data->include_primitive_id) {
123 primitive_id = elk_ud8_grf(r, 0);
124 r += reg_unit(v.devinfo);
125 }
126
127 /* Always enable VUE handles so we can safely use pull model if needed.
128 *
129 * The push model for a GS uses a ton of register space even for trivial
130 * scenarios with just a few inputs, so just make things easier and a bit
131 * safer by always having pull model available.
132 */
133 gs_prog_data->base.include_vue_handles = true;
134
135 /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
136 icp_handle_start = elk_ud8_grf(r, 0);
137 r += v.nir->info.gs.vertices_in * reg_unit(v.devinfo);
138
139 num_regs = r;
140
141 /* Use a maximum of 24 registers for push-model inputs. */
142 const unsigned max_push_components = 24;
143
144 /* If pushing our inputs would take too many registers, reduce the URB read
145 * length (which is in HWords, or 8 registers), and resort to pulling.
146 *
147 * Note that the GS reads <URB Read Length> HWords for every vertex - so we
148 * have to multiply by VerticesIn to obtain the total storage requirement.
149 */
150 if (8 * vue_prog_data->urb_read_length * v.nir->info.gs.vertices_in >
151 max_push_components) {
152 vue_prog_data->urb_read_length =
153 ROUND_DOWN_TO(max_push_components / v.nir->info.gs.vertices_in, 8) / 8;
154 }
155 }
156
157 static inline void
setup_fs_payload_gfx6(elk_fs_thread_payload & payload,const elk_fs_visitor & v,bool & source_depth_to_render_target)158 setup_fs_payload_gfx6(elk_fs_thread_payload &payload,
159 const elk_fs_visitor &v,
160 bool &source_depth_to_render_target)
161 {
162 struct elk_wm_prog_data *prog_data = elk_wm_prog_data(v.prog_data);
163
164 const unsigned payload_width = MIN2(16, v.dispatch_width);
165 assert(v.dispatch_width % payload_width == 0);
166 assert(v.devinfo->ver >= 6);
167
168 payload.num_regs = 0;
169
170 /* R0: PS thread payload header. */
171 payload.num_regs++;
172
173 for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
174 /* R1: masks, pixel X/Y coordinates. */
175 payload.subspan_coord_reg[j] = payload.num_regs++;
176 }
177
178 for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
179 /* R3-26: barycentric interpolation coordinates. These appear in the
180 * same order that they appear in the elk_barycentric_mode enum. Each
181 * set of coordinates occupies 2 registers if dispatch width == 8 and 4
182 * registers if dispatch width == 16. Coordinates only appear if they
183 * were enabled using the "Barycentric Interpolation Mode" bits in
184 * WM_STATE.
185 */
186 for (int i = 0; i < ELK_BARYCENTRIC_MODE_COUNT; ++i) {
187 if (prog_data->barycentric_interp_modes & (1 << i)) {
188 payload.barycentric_coord_reg[i][j] = payload.num_regs;
189 payload.num_regs += payload_width / 4;
190 }
191 }
192
193 /* R27-28: interpolated depth if uses source depth */
194 if (prog_data->uses_src_depth) {
195 payload.source_depth_reg[j] = payload.num_regs;
196 payload.num_regs += payload_width / 8;
197 }
198
199 /* R29-30: interpolated W set if GFX6_WM_USES_SOURCE_W. */
200 if (prog_data->uses_src_w) {
201 payload.source_w_reg[j] = payload.num_regs;
202 payload.num_regs += payload_width / 8;
203 }
204
205 /* R31: MSAA position offsets. */
206 if (prog_data->uses_pos_offset) {
207 payload.sample_pos_reg[j] = payload.num_regs;
208 payload.num_regs++;
209 }
210
211 /* R32-33: MSAA input coverage mask */
212 if (prog_data->uses_sample_mask) {
213 assert(v.devinfo->ver >= 7);
214 payload.sample_mask_in_reg[j] = payload.num_regs;
215 payload.num_regs += payload_width / 8;
216 }
217 }
218
219 if (v.nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
220 source_depth_to_render_target = true;
221 }
222 }
223
224 #undef P /* prompted depth */
225 #undef C /* computed */
226 #undef N /* non-promoted? */
227
228 #define P 0
229 #define C 1
230 #define N 2
231
232 static const struct {
233 GLuint mode:2;
234 GLuint sd_present:1;
235 GLuint sd_to_rt:1;
236 GLuint dd_present:1;
237 GLuint ds_present:1;
238 } wm_iz_table[ELK_WM_IZ_BIT_MAX] =
239 {
240 { P, 0, 0, 0, 0 },
241 { P, 0, 0, 0, 0 },
242 { P, 0, 0, 0, 0 },
243 { P, 0, 0, 0, 0 },
244 { P, 0, 0, 0, 0 },
245 { N, 1, 1, 0, 0 },
246 { N, 0, 1, 0, 0 },
247 { N, 0, 1, 0, 0 },
248 { P, 0, 0, 0, 0 },
249 { P, 0, 0, 0, 0 },
250 { C, 0, 1, 1, 0 },
251 { C, 0, 1, 1, 0 },
252 { P, 0, 0, 0, 0 },
253 { N, 1, 1, 0, 0 },
254 { C, 0, 1, 1, 0 },
255 { C, 0, 1, 1, 0 },
256 { P, 0, 0, 0, 0 },
257 { P, 0, 0, 0, 0 },
258 { P, 0, 0, 0, 0 },
259 { P, 0, 0, 0, 0 },
260 { P, 0, 0, 0, 0 },
261 { N, 1, 1, 0, 0 },
262 { N, 0, 1, 0, 0 },
263 { N, 0, 1, 0, 0 },
264 { P, 0, 0, 0, 0 },
265 { P, 0, 0, 0, 0 },
266 { C, 0, 1, 1, 0 },
267 { C, 0, 1, 1, 0 },
268 { P, 0, 0, 0, 0 },
269 { N, 1, 1, 0, 0 },
270 { C, 0, 1, 1, 0 },
271 { C, 0, 1, 1, 0 },
272 { P, 0, 0, 0, 0 },
273 { P, 0, 0, 0, 0 },
274 { P, 0, 0, 0, 0 },
275 { P, 0, 0, 0, 0 },
276 { P, 0, 0, 0, 0 },
277 { N, 1, 1, 0, 1 },
278 { N, 0, 1, 0, 1 },
279 { N, 0, 1, 0, 1 },
280 { P, 0, 0, 0, 0 },
281 { P, 0, 0, 0, 0 },
282 { C, 0, 1, 1, 1 },
283 { C, 0, 1, 1, 1 },
284 { P, 0, 0, 0, 0 },
285 { N, 1, 1, 0, 1 },
286 { C, 0, 1, 1, 1 },
287 { C, 0, 1, 1, 1 },
288 { P, 0, 0, 0, 0 },
289 { C, 0, 0, 0, 1 },
290 { P, 0, 0, 0, 0 },
291 { C, 0, 1, 0, 1 },
292 { P, 0, 0, 0, 0 },
293 { C, 1, 1, 0, 1 },
294 { C, 0, 1, 0, 1 },
295 { C, 0, 1, 0, 1 },
296 { P, 0, 0, 0, 0 },
297 { C, 1, 1, 1, 1 },
298 { C, 0, 1, 1, 1 },
299 { C, 0, 1, 1, 1 },
300 { P, 0, 0, 0, 0 },
301 { C, 1, 1, 1, 1 },
302 { C, 0, 1, 1, 1 },
303 { C, 0, 1, 1, 1 }
304 };
305
306 /**
307 * \param line_aa ELK_NEVER, ELK_ALWAYS or ELK_SOMETIMES
308 * \param lookup bitmask of ELK_WM_IZ_* flags
309 */
310 static inline void
setup_fs_payload_gfx4(elk_fs_thread_payload & payload,const elk_fs_visitor & v,bool & source_depth_to_render_target,bool & runtime_check_aads_emit)311 setup_fs_payload_gfx4(elk_fs_thread_payload &payload,
312 const elk_fs_visitor &v,
313 bool &source_depth_to_render_target,
314 bool &runtime_check_aads_emit)
315 {
316 assert(v.dispatch_width <= 16);
317
318 struct elk_wm_prog_data *prog_data = elk_wm_prog_data(v.prog_data);
319 elk_wm_prog_key *key = (elk_wm_prog_key *) v.key;
320
321 GLuint reg = 1;
322 bool kill_stats_promoted_workaround = false;
323 int lookup = key->iz_lookup;
324
325 assert(lookup < ELK_WM_IZ_BIT_MAX);
326
327 /* Crazy workaround in the windowizer, which we need to track in
328 * our register allocation and render target writes. See the "If
329 * statistics are enabled..." paragraph of 11.5.3.2: Early Depth
330 * Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec.
331 */
332 if (key->stats_wm &&
333 (lookup & ELK_WM_IZ_PS_KILL_ALPHATEST_BIT) &&
334 wm_iz_table[lookup].mode == P) {
335 kill_stats_promoted_workaround = true;
336 }
337
338 payload.subspan_coord_reg[0] = reg++;
339
340 if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth ||
341 kill_stats_promoted_workaround) {
342 payload.source_depth_reg[0] = reg;
343 reg += 2;
344 }
345
346 if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround)
347 source_depth_to_render_target = true;
348
349 if (wm_iz_table[lookup].ds_present || key->line_aa != ELK_NEVER) {
350 payload.aa_dest_stencil_reg[0] = reg;
351 runtime_check_aads_emit =
352 !wm_iz_table[lookup].ds_present && key->line_aa == ELK_SOMETIMES;
353 reg++;
354 }
355
356 if (wm_iz_table[lookup].dd_present) {
357 payload.dest_depth_reg[0] = reg;
358 reg+=2;
359 }
360
361 payload.num_regs = reg;
362 }
363
364 #undef P /* prompted depth */
365 #undef C /* computed */
366 #undef N /* non-promoted? */
367
elk_fs_thread_payload(const elk_fs_visitor & v,bool & source_depth_to_render_target,bool & runtime_check_aads_emit)368 elk_fs_thread_payload::elk_fs_thread_payload(const elk_fs_visitor &v,
369 bool &source_depth_to_render_target,
370 bool &runtime_check_aads_emit)
371 : subspan_coord_reg(),
372 source_depth_reg(),
373 source_w_reg(),
374 aa_dest_stencil_reg(),
375 dest_depth_reg(),
376 sample_pos_reg(),
377 sample_mask_in_reg(),
378 depth_w_coef_reg(),
379 barycentric_coord_reg()
380 {
381 if (v.devinfo->ver >= 6)
382 setup_fs_payload_gfx6(*this, v, source_depth_to_render_target);
383 else
384 setup_fs_payload_gfx4(*this, v, source_depth_to_render_target,
385 runtime_check_aads_emit);
386 }
387
elk_cs_thread_payload(const elk_fs_visitor & v)388 elk_cs_thread_payload::elk_cs_thread_payload(const elk_fs_visitor &v)
389 {
390 num_regs = reg_unit(v.devinfo);
391 }
392
393 void
load_subgroup_id(const fs_builder & bld,elk_fs_reg & dest) const394 elk_cs_thread_payload::load_subgroup_id(const fs_builder &bld,
395 elk_fs_reg &dest) const
396 {
397 auto devinfo = bld.shader->devinfo;
398 dest = retype(dest, ELK_REGISTER_TYPE_UD);
399
400 assert(gl_shader_stage_is_compute(bld.shader->stage));
401 int index = elk_get_subgroup_id_param_index(devinfo,
402 bld.shader->stage_prog_data);
403 bld.MOV(dest, elk_fs_reg(UNIFORM, index, ELK_REGISTER_TYPE_UD));
404 }
405
406