1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29
30 #include "compiler/shader_enums.h"
31 #include "nir/nir.h"
32 #include "rogue.h"
33 #include "util/macros.h"
34
35 /**
36 * \file rogue_build_data.c
37 *
38 * \brief Contains functions to collect build data for the driver.
39 */
40
41 /* N.B. This will all be hoisted into the driver. */
42
43 /**
44 * \brief Allocates the coefficient registers that will contain the iterator
45 * data for the fragment shader input varyings.
46 *
47 * \param[in] args The iterator argument data.
48 * \return The total number of coefficient registers required by the iterators.
49 */
alloc_iterator_regs(struct rogue_iterator_args * args)50 static unsigned alloc_iterator_regs(struct rogue_iterator_args *args)
51 {
52 unsigned coeffs = 0;
53
54 for (unsigned u = 0; u < args->num_fpu_iterators; ++u) {
55 /* Ensure there aren't any gaps. */
56 assert(args->base[u] == ~0);
57
58 args->base[u] = coeffs;
59 coeffs += ROGUE_COEFF_ALIGN * args->components[u];
60 }
61
62 return coeffs;
63 }
64
65 /**
66 * \brief Reserves an iterator for a fragment shader input varying,
67 * and calculates its setup data.
68 *
69 * \param[in] args The iterator argument data.
70 * \param[in] i The iterator index.
71 * \param[in] type The interpolation type of the varying.
72 * \param[in] f16 Whether the data type is F16 or F32.
73 * \param[in] components The number of components in the varying.
74 */
reserve_iterator(struct rogue_iterator_args * args,unsigned i,enum glsl_interp_mode type,bool f16,unsigned components)75 static void reserve_iterator(struct rogue_iterator_args *args,
76 unsigned i,
77 enum glsl_interp_mode type,
78 bool f16,
79 unsigned components)
80 {
81 struct ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC data = { 0 };
82
83 assert(components >= 1 && components <= 4);
84
85 /* The first iterator (W) *must* be INTERP_MODE_NOPERSPECTIVE. */
86 assert(i > 0 || type == INTERP_MODE_NOPERSPECTIVE);
87 assert(i < ARRAY_SIZE(args->fpu_iterators));
88
89 switch (type) {
90 /* Default interpolation is smooth. */
91 case INTERP_MODE_NONE:
92 data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
93 data.perspective = true;
94 break;
95
96 case INTERP_MODE_NOPERSPECTIVE:
97 data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
98 data.perspective = false;
99 break;
100
101 default:
102 unreachable("Unimplemented interpolation type.");
103 }
104
105 /* Number of components in this varying
106 * (corresponds to ROGUE_PDSINST_DOUTI_SIZE_1..4D).
107 */
108 data.size = (components - 1);
109
110 /* TODO: Investigate F16 support. */
111 assert(!f16);
112 data.f16 = f16;
113
114 /* Offsets within the vertex. */
115 data.f32_offset = 2 * i;
116 data.f16_offset = data.f32_offset;
117
118 ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_pack(&args->fpu_iterators[i], &data);
119 args->destination[i] = i;
120 args->base[i] = ~0;
121 args->components[i] = components;
122 ++args->num_fpu_iterators;
123 }
124
nir_count_variables_with_modes(const nir_shader * nir,nir_variable_mode mode)125 static inline unsigned nir_count_variables_with_modes(const nir_shader *nir,
126 nir_variable_mode mode)
127 {
128 unsigned count = 0;
129
130 nir_foreach_variable_with_modes (var, nir, mode) {
131 ++count;
132 }
133
134 return count;
135 }
136
137 /**
138 * \brief Collects the fragment shader I/O data to feed-back to the driver.
139 *
140 * \sa #collect_io_data()
141 *
142 * \param[in] common_data Common build data.
143 * \param[in] fs_data Fragment-specific build data.
144 * \param[in] nir NIR fragment shader.
145 */
collect_io_data_fs(struct rogue_common_build_data * common_data,struct rogue_fs_build_data * fs_data,nir_shader * nir)146 static void collect_io_data_fs(struct rogue_common_build_data *common_data,
147 struct rogue_fs_build_data *fs_data,
148 nir_shader *nir)
149 {
150 unsigned num_inputs = nir_count_variables_with_modes(nir, nir_var_shader_in);
151 assert(num_inputs < (ARRAY_SIZE(fs_data->iterator_args.fpu_iterators) - 1));
152
153 /* Process inputs (if present). */
154 if (num_inputs) {
155 /* If the fragment shader has inputs, the first iterator
156 * must be used for the W component.
157 */
158 reserve_iterator(&fs_data->iterator_args,
159 0,
160 INTERP_MODE_NOPERSPECTIVE,
161 false,
162 1);
163
164 nir_foreach_shader_in_variable (var, nir) {
165 unsigned i = (var->data.location - VARYING_SLOT_VAR0) + 1;
166 unsigned components = glsl_get_components(var->type);
167 enum glsl_interp_mode interp = var->data.interpolation;
168 bool f16 = glsl_type_is_16bit(var->type);
169
170 /* Check that arguments are either F16 or F32. */
171 assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
172 assert(f16 || glsl_type_is_32bit(var->type));
173
174 /* Check input location. */
175 assert(var->data.location >= VARYING_SLOT_VAR0 &&
176 var->data.location <= VARYING_SLOT_VAR31);
177
178 reserve_iterator(&fs_data->iterator_args, i, interp, f16, components);
179 }
180
181 common_data->coeffs = alloc_iterator_regs(&fs_data->iterator_args);
182 assert(common_data->coeffs);
183 assert(common_data->coeffs <= rogue_reg_infos[ROGUE_REG_CLASS_COEFF].num);
184 }
185
186 /* TODO: Process outputs. */
187 }
188
189 /**
190 * \brief Allocates the vertex shader outputs.
191 *
192 * \param[in] outputs The vertex shader output data.
193 * \return The total number of vertex outputs required.
194 */
alloc_vs_outputs(struct rogue_vertex_outputs * outputs)195 static unsigned alloc_vs_outputs(struct rogue_vertex_outputs *outputs)
196 {
197 unsigned vs_outputs = 0;
198
199 for (unsigned u = 0; u < outputs->num_output_vars; ++u) {
200 /* Ensure there aren't any gaps. */
201 assert(outputs->base[u] == ~0);
202
203 outputs->base[u] = vs_outputs;
204 vs_outputs += outputs->components[u];
205 }
206
207 return vs_outputs;
208 }
209
210 /**
211 * \brief Counts the varyings used by the vertex shader.
212 *
213 * \param[in] outputs The vertex shader output data.
214 * \return The number of varyings used.
215 */
count_vs_varyings(struct rogue_vertex_outputs * outputs)216 static unsigned count_vs_varyings(struct rogue_vertex_outputs *outputs)
217 {
218 unsigned varyings = 0;
219
220 /* Skip the position. */
221 for (unsigned u = 1; u < outputs->num_output_vars; ++u)
222 varyings += outputs->components[u];
223
224 return varyings;
225 }
226
227 /**
228 * \brief Reserves space for a vertex shader input.
229 *
230 * \param[in] inputs The vertex input data.
231 * \param[in] i The vertex input index.
232 * \param[in] components The number of components in the input.
233 */
reserve_vs_input(struct rogue_vertex_inputs * inputs,unsigned i,unsigned components)234 static void reserve_vs_input(struct rogue_vertex_inputs *inputs,
235 unsigned i,
236 unsigned components)
237 {
238 assert(components >= 1 && components <= 4);
239
240 assert(i < ARRAY_SIZE(inputs->base));
241
242 inputs->base[i] = ~0;
243 inputs->components[i] = components;
244 ++inputs->num_input_vars;
245 }
246
247 /**
248 * \brief Reserves space for a vertex shader output.
249 *
250 * \param[in] outputs The vertex output data.
251 * \param[in] i The vertex output index.
252 * \param[in] components The number of components in the output.
253 */
reserve_vs_output(struct rogue_vertex_outputs * outputs,unsigned i,unsigned components)254 static void reserve_vs_output(struct rogue_vertex_outputs *outputs,
255 unsigned i,
256 unsigned components)
257 {
258 assert(components >= 1 && components <= 4);
259
260 assert(i < ARRAY_SIZE(outputs->base));
261
262 outputs->base[i] = ~0;
263 outputs->components[i] = components;
264 ++outputs->num_output_vars;
265 }
266
267 /**
268 * \brief Collects the vertex shader I/O data to feed-back to the driver.
269 *
270 * \sa #collect_io_data()
271 *
272 * \param[in] common_data Common build data.
273 * \param[in] vs_data Vertex-specific build data.
274 * \param[in] nir NIR vertex shader.
275 */
collect_io_data_vs(struct rogue_common_build_data * common_data,struct rogue_vs_build_data * vs_data,nir_shader * nir)276 static void collect_io_data_vs(struct rogue_common_build_data *common_data,
277 struct rogue_vs_build_data *vs_data,
278 nir_shader *nir)
279 {
280 ASSERTED bool out_pos_present = false;
281 ASSERTED unsigned num_outputs =
282 nir_count_variables_with_modes(nir, nir_var_shader_out);
283
284 /* Process outputs. */
285
286 /* We should always have at least a position variable. */
287 assert(num_outputs > 0 && "Unsupported number of vertex shader outputs.");
288
289 nir_foreach_shader_out_variable (var, nir) {
290 unsigned components = glsl_get_components(var->type);
291
292 /* Check that outputs are F32. */
293 /* TODO: Support other types. */
294 assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
295 assert(glsl_type_is_32bit(var->type));
296
297 if (var->data.location == VARYING_SLOT_POS) {
298 assert(components == 4);
299 out_pos_present = true;
300
301 reserve_vs_output(&vs_data->outputs, 0, components);
302 } else if ((var->data.location >= VARYING_SLOT_VAR0) &&
303 (var->data.location <= VARYING_SLOT_VAR31)) {
304 unsigned i = (var->data.location - VARYING_SLOT_VAR0) + 1;
305 reserve_vs_output(&vs_data->outputs, i, components);
306 } else {
307 unreachable("Unsupported vertex output type.");
308 }
309 }
310
311 /* Always need the output position to be present. */
312 assert(out_pos_present);
313
314 vs_data->num_vertex_outputs = alloc_vs_outputs(&vs_data->outputs);
315 assert(vs_data->num_vertex_outputs);
316 assert(vs_data->num_vertex_outputs <
317 rogue_reg_infos[ROGUE_REG_CLASS_VTXOUT].num);
318
319 vs_data->num_varyings = count_vs_varyings(&vs_data->outputs);
320 }
321
322 /**
323 * \brief Collects I/O data to feed-back to the driver.
324 *
325 * Collects the inputs/outputs/memory required, and feeds that back to the
326 * driver. Done at this stage rather than at the start of rogue_to_binary, so
327 * that all the I/O of all the shader stages is known before backend
328 * compilation, which would let us do things like cull unused inputs.
329 *
330 * \param[in] ctx Shared multi-stage build context.
331 * \param[in] nir NIR shader.
332 */
333 PUBLIC
rogue_collect_io_data(struct rogue_build_ctx * ctx,nir_shader * nir)334 void rogue_collect_io_data(struct rogue_build_ctx *ctx, nir_shader *nir)
335 {
336 gl_shader_stage stage = nir->info.stage;
337 struct rogue_common_build_data *common_data = &ctx->common_data[stage];
338
339 /* Collect stage-specific data. */
340 switch (stage) {
341 case MESA_SHADER_FRAGMENT:
342 return collect_io_data_fs(common_data, &ctx->stage_data.fs, nir);
343
344 case MESA_SHADER_VERTEX:
345 return collect_io_data_vs(common_data, &ctx->stage_data.vs, nir);
346
347 default:
348 break;
349 }
350
351 unreachable("Unsupported stage.");
352 }
353
354 /**
355 * \brief Returns the allocated coefficient register index for a component of an
356 * input varying location.
357 *
358 * \param[in] args The allocated iterator argument data.
359 * \param[in] location The input varying location, or ~0 for the W coefficient.
360 * \param[in] component The requested component.
361 * \return The coefficient register index.
362 */
363 PUBLIC
rogue_coeff_index_fs(struct rogue_iterator_args * args,gl_varying_slot location,unsigned component)364 unsigned rogue_coeff_index_fs(struct rogue_iterator_args *args,
365 gl_varying_slot location,
366 unsigned component)
367 {
368 unsigned i;
369
370 /* Special case: W coefficient. */
371 if (location == ~0) {
372 /* The W component shouldn't be the only one. */
373 assert(args->num_fpu_iterators > 1);
374 assert(args->destination[0] == 0);
375 return 0;
376 }
377
378 i = (location - VARYING_SLOT_VAR0) + 1;
379 assert(location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31);
380 assert(i < args->num_fpu_iterators);
381 assert(component < args->components[i]);
382 assert(args->base[i] != ~0);
383
384 return args->base[i] + (ROGUE_COEFF_ALIGN * component);
385 }
386
387 /**
388 * \brief Returns the allocated vertex output index for a component of an input
389 * varying location.
390 *
391 * \param[in] outputs The vertex output data.
392 * \param[in] location The output varying location.
393 * \param[in] component The requested component.
394 * \return The vertex output index.
395 */
396 PUBLIC
rogue_output_index_vs(struct rogue_vertex_outputs * outputs,gl_varying_slot location,unsigned component)397 unsigned rogue_output_index_vs(struct rogue_vertex_outputs *outputs,
398 gl_varying_slot location,
399 unsigned component)
400 {
401 unsigned i;
402
403 if (location == VARYING_SLOT_POS) {
404 /* Always at location 0. */
405 assert(outputs->base[0] == 0);
406 i = 0;
407 } else if ((location >= VARYING_SLOT_VAR0) &&
408 (location <= VARYING_SLOT_VAR31)) {
409 i = (location - VARYING_SLOT_VAR0) + 1;
410 } else {
411 unreachable("Unsupported vertex output type.");
412 }
413
414 assert(i < outputs->num_output_vars);
415 assert(component < outputs->components[i]);
416 assert(outputs->base[i] != ~0);
417
418 return outputs->base[i] + component;
419 }
420