xref: /aosp_15_r20/external/mesa3d/src/imagination/rogue/rogue_build_data.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 
30 #include "compiler/shader_enums.h"
31 #include "nir/nir.h"
32 #include "rogue.h"
33 #include "util/macros.h"
34 
35 /**
36  * \file rogue_build_data.c
37  *
38  * \brief Contains functions to collect build data for the driver.
39  */
40 
41 /* N.B. This will all be hoisted into the driver. */
42 
43 /**
44  * \brief Allocates the coefficient registers that will contain the iterator
45  * data for the fragment shader input varyings.
46  *
47  * \param[in] args The iterator argument data.
48  * \return The total number of coefficient registers required by the iterators.
49  */
alloc_iterator_regs(struct rogue_iterator_args * args)50 static unsigned alloc_iterator_regs(struct rogue_iterator_args *args)
51 {
52    unsigned coeffs = 0;
53 
54    for (unsigned u = 0; u < args->num_fpu_iterators; ++u) {
55       /* Ensure there aren't any gaps. */
56       assert(args->base[u] == ~0);
57 
58       args->base[u] = coeffs;
59       coeffs += ROGUE_COEFF_ALIGN * args->components[u];
60    }
61 
62    return coeffs;
63 }
64 
65 /**
66  * \brief Reserves an iterator for a fragment shader input varying,
67  * and calculates its setup data.
68  *
69  * \param[in] args The iterator argument data.
70  * \param[in] i The iterator index.
71  * \param[in] type The interpolation type of the varying.
72  * \param[in] f16 Whether the data type is F16 or F32.
73  * \param[in] components The number of components in the varying.
74  */
reserve_iterator(struct rogue_iterator_args * args,unsigned i,enum glsl_interp_mode type,bool f16,unsigned components)75 static void reserve_iterator(struct rogue_iterator_args *args,
76                              unsigned i,
77                              enum glsl_interp_mode type,
78                              bool f16,
79                              unsigned components)
80 {
81    struct ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC data = { 0 };
82 
83    assert(components >= 1 && components <= 4);
84 
85    /* The first iterator (W) *must* be INTERP_MODE_NOPERSPECTIVE. */
86    assert(i > 0 || type == INTERP_MODE_NOPERSPECTIVE);
87    assert(i < ARRAY_SIZE(args->fpu_iterators));
88 
89    switch (type) {
90    /* Default interpolation is smooth. */
91    case INTERP_MODE_NONE:
92       data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
93       data.perspective = true;
94       break;
95 
96    case INTERP_MODE_NOPERSPECTIVE:
97       data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
98       data.perspective = false;
99       break;
100 
101    default:
102       unreachable("Unimplemented interpolation type.");
103    }
104 
105    /* Number of components in this varying
106     * (corresponds to ROGUE_PDSINST_DOUTI_SIZE_1..4D).
107     */
108    data.size = (components - 1);
109 
110    /* TODO: Investigate F16 support. */
111    assert(!f16);
112    data.f16 = f16;
113 
114    /* Offsets within the vertex. */
115    data.f32_offset = 2 * i;
116    data.f16_offset = data.f32_offset;
117 
118    ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_pack(&args->fpu_iterators[i], &data);
119    args->destination[i] = i;
120    args->base[i] = ~0;
121    args->components[i] = components;
122    ++args->num_fpu_iterators;
123 }
124 
nir_count_variables_with_modes(const nir_shader * nir,nir_variable_mode mode)125 static inline unsigned nir_count_variables_with_modes(const nir_shader *nir,
126                                                       nir_variable_mode mode)
127 {
128    unsigned count = 0;
129 
130    nir_foreach_variable_with_modes (var, nir, mode) {
131       ++count;
132    }
133 
134    return count;
135 }
136 
137 /**
138  * \brief Collects the fragment shader I/O data to feed-back to the driver.
139  *
140  * \sa #collect_io_data()
141  *
142  * \param[in] common_data Common build data.
143  * \param[in] fs_data Fragment-specific build data.
144  * \param[in] nir NIR fragment shader.
145  */
collect_io_data_fs(struct rogue_common_build_data * common_data,struct rogue_fs_build_data * fs_data,nir_shader * nir)146 static void collect_io_data_fs(struct rogue_common_build_data *common_data,
147                                struct rogue_fs_build_data *fs_data,
148                                nir_shader *nir)
149 {
150    unsigned num_inputs = nir_count_variables_with_modes(nir, nir_var_shader_in);
151    assert(num_inputs < (ARRAY_SIZE(fs_data->iterator_args.fpu_iterators) - 1));
152 
153    /* Process inputs (if present). */
154    if (num_inputs) {
155       /* If the fragment shader has inputs, the first iterator
156        * must be used for the W component.
157        */
158       reserve_iterator(&fs_data->iterator_args,
159                        0,
160                        INTERP_MODE_NOPERSPECTIVE,
161                        false,
162                        1);
163 
164       nir_foreach_shader_in_variable (var, nir) {
165          unsigned i = (var->data.location - VARYING_SLOT_VAR0) + 1;
166          unsigned components = glsl_get_components(var->type);
167          enum glsl_interp_mode interp = var->data.interpolation;
168          bool f16 = glsl_type_is_16bit(var->type);
169 
170          /* Check that arguments are either F16 or F32. */
171          assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
172          assert(f16 || glsl_type_is_32bit(var->type));
173 
174          /* Check input location. */
175          assert(var->data.location >= VARYING_SLOT_VAR0 &&
176                 var->data.location <= VARYING_SLOT_VAR31);
177 
178          reserve_iterator(&fs_data->iterator_args, i, interp, f16, components);
179       }
180 
181       common_data->coeffs = alloc_iterator_regs(&fs_data->iterator_args);
182       assert(common_data->coeffs);
183       assert(common_data->coeffs <= rogue_reg_infos[ROGUE_REG_CLASS_COEFF].num);
184    }
185 
186    /* TODO: Process outputs. */
187 }
188 
189 /**
190  * \brief Allocates the vertex shader outputs.
191  *
192  * \param[in] outputs The vertex shader output data.
193  * \return The total number of vertex outputs required.
194  */
alloc_vs_outputs(struct rogue_vertex_outputs * outputs)195 static unsigned alloc_vs_outputs(struct rogue_vertex_outputs *outputs)
196 {
197    unsigned vs_outputs = 0;
198 
199    for (unsigned u = 0; u < outputs->num_output_vars; ++u) {
200       /* Ensure there aren't any gaps. */
201       assert(outputs->base[u] == ~0);
202 
203       outputs->base[u] = vs_outputs;
204       vs_outputs += outputs->components[u];
205    }
206 
207    return vs_outputs;
208 }
209 
210 /**
211  * \brief Counts the varyings used by the vertex shader.
212  *
213  * \param[in] outputs The vertex shader output data.
214  * \return The number of varyings used.
215  */
count_vs_varyings(struct rogue_vertex_outputs * outputs)216 static unsigned count_vs_varyings(struct rogue_vertex_outputs *outputs)
217 {
218    unsigned varyings = 0;
219 
220    /* Skip the position. */
221    for (unsigned u = 1; u < outputs->num_output_vars; ++u)
222       varyings += outputs->components[u];
223 
224    return varyings;
225 }
226 
227 /**
228  * \brief Reserves space for a vertex shader input.
229  *
230  * \param[in] inputs The vertex input data.
231  * \param[in] i The vertex input index.
232  * \param[in] components The number of components in the input.
233  */
reserve_vs_input(struct rogue_vertex_inputs * inputs,unsigned i,unsigned components)234 static void reserve_vs_input(struct rogue_vertex_inputs *inputs,
235                              unsigned i,
236                              unsigned components)
237 {
238    assert(components >= 1 && components <= 4);
239 
240    assert(i < ARRAY_SIZE(inputs->base));
241 
242    inputs->base[i] = ~0;
243    inputs->components[i] = components;
244    ++inputs->num_input_vars;
245 }
246 
247 /**
248  * \brief Reserves space for a vertex shader output.
249  *
250  * \param[in] outputs The vertex output data.
251  * \param[in] i The vertex output index.
252  * \param[in] components The number of components in the output.
253  */
reserve_vs_output(struct rogue_vertex_outputs * outputs,unsigned i,unsigned components)254 static void reserve_vs_output(struct rogue_vertex_outputs *outputs,
255                               unsigned i,
256                               unsigned components)
257 {
258    assert(components >= 1 && components <= 4);
259 
260    assert(i < ARRAY_SIZE(outputs->base));
261 
262    outputs->base[i] = ~0;
263    outputs->components[i] = components;
264    ++outputs->num_output_vars;
265 }
266 
267 /**
268  * \brief Collects the vertex shader I/O data to feed-back to the driver.
269  *
270  * \sa #collect_io_data()
271  *
272  * \param[in] common_data Common build data.
273  * \param[in] vs_data Vertex-specific build data.
274  * \param[in] nir NIR vertex shader.
275  */
collect_io_data_vs(struct rogue_common_build_data * common_data,struct rogue_vs_build_data * vs_data,nir_shader * nir)276 static void collect_io_data_vs(struct rogue_common_build_data *common_data,
277                                struct rogue_vs_build_data *vs_data,
278                                nir_shader *nir)
279 {
280    ASSERTED bool out_pos_present = false;
281    ASSERTED unsigned num_outputs =
282       nir_count_variables_with_modes(nir, nir_var_shader_out);
283 
284    /* Process outputs. */
285 
286    /* We should always have at least a position variable. */
287    assert(num_outputs > 0 && "Unsupported number of vertex shader outputs.");
288 
289    nir_foreach_shader_out_variable (var, nir) {
290       unsigned components = glsl_get_components(var->type);
291 
292       /* Check that outputs are F32. */
293       /* TODO: Support other types. */
294       assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
295       assert(glsl_type_is_32bit(var->type));
296 
297       if (var->data.location == VARYING_SLOT_POS) {
298          assert(components == 4);
299          out_pos_present = true;
300 
301          reserve_vs_output(&vs_data->outputs, 0, components);
302       } else if ((var->data.location >= VARYING_SLOT_VAR0) &&
303                  (var->data.location <= VARYING_SLOT_VAR31)) {
304          unsigned i = (var->data.location - VARYING_SLOT_VAR0) + 1;
305          reserve_vs_output(&vs_data->outputs, i, components);
306       } else {
307          unreachable("Unsupported vertex output type.");
308       }
309    }
310 
311    /* Always need the output position to be present. */
312    assert(out_pos_present);
313 
314    vs_data->num_vertex_outputs = alloc_vs_outputs(&vs_data->outputs);
315    assert(vs_data->num_vertex_outputs);
316    assert(vs_data->num_vertex_outputs <
317           rogue_reg_infos[ROGUE_REG_CLASS_VTXOUT].num);
318 
319    vs_data->num_varyings = count_vs_varyings(&vs_data->outputs);
320 }
321 
322 /**
323  * \brief Collects I/O data to feed-back to the driver.
324  *
325  * Collects the inputs/outputs/memory required, and feeds that back to the
326  * driver. Done at this stage rather than at the start of rogue_to_binary, so
327  * that all the I/O of all the shader stages is known before backend
328  * compilation, which would let us do things like cull unused inputs.
329  *
330  * \param[in] ctx Shared multi-stage build context.
331  * \param[in] nir NIR shader.
332  */
333 PUBLIC
rogue_collect_io_data(struct rogue_build_ctx * ctx,nir_shader * nir)334 void rogue_collect_io_data(struct rogue_build_ctx *ctx, nir_shader *nir)
335 {
336    gl_shader_stage stage = nir->info.stage;
337    struct rogue_common_build_data *common_data = &ctx->common_data[stage];
338 
339    /* Collect stage-specific data. */
340    switch (stage) {
341    case MESA_SHADER_FRAGMENT:
342       return collect_io_data_fs(common_data, &ctx->stage_data.fs, nir);
343 
344    case MESA_SHADER_VERTEX:
345       return collect_io_data_vs(common_data, &ctx->stage_data.vs, nir);
346 
347    default:
348       break;
349    }
350 
351    unreachable("Unsupported stage.");
352 }
353 
354 /**
355  * \brief Returns the allocated coefficient register index for a component of an
356  * input varying location.
357  *
358  * \param[in] args The allocated iterator argument data.
359  * \param[in] location The input varying location, or ~0 for the W coefficient.
360  * \param[in] component The requested component.
361  * \return The coefficient register index.
362  */
363 PUBLIC
rogue_coeff_index_fs(struct rogue_iterator_args * args,gl_varying_slot location,unsigned component)364 unsigned rogue_coeff_index_fs(struct rogue_iterator_args *args,
365                               gl_varying_slot location,
366                               unsigned component)
367 {
368    unsigned i;
369 
370    /* Special case: W coefficient. */
371    if (location == ~0) {
372       /* The W component shouldn't be the only one. */
373       assert(args->num_fpu_iterators > 1);
374       assert(args->destination[0] == 0);
375       return 0;
376    }
377 
378    i = (location - VARYING_SLOT_VAR0) + 1;
379    assert(location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31);
380    assert(i < args->num_fpu_iterators);
381    assert(component < args->components[i]);
382    assert(args->base[i] != ~0);
383 
384    return args->base[i] + (ROGUE_COEFF_ALIGN * component);
385 }
386 
387 /**
388  * \brief Returns the allocated vertex output index for a component of an input
389  * varying location.
390  *
391  * \param[in] outputs The vertex output data.
392  * \param[in] location The output varying location.
393  * \param[in] component The requested component.
394  * \return The vertex output index.
395  */
396 PUBLIC
rogue_output_index_vs(struct rogue_vertex_outputs * outputs,gl_varying_slot location,unsigned component)397 unsigned rogue_output_index_vs(struct rogue_vertex_outputs *outputs,
398                                gl_varying_slot location,
399                                unsigned component)
400 {
401    unsigned i;
402 
403    if (location == VARYING_SLOT_POS) {
404       /* Always at location 0. */
405       assert(outputs->base[0] == 0);
406       i = 0;
407    } else if ((location >= VARYING_SLOT_VAR0) &&
408               (location <= VARYING_SLOT_VAR31)) {
409       i = (location - VARYING_SLOT_VAR0) + 1;
410    } else {
411       unreachable("Unsupported vertex output type.");
412    }
413 
414    assert(i < outputs->num_output_vars);
415    assert(component < outputs->components[i]);
416    assert(outputs->base[i] != ~0);
417 
418    return outputs->base[i] + component;
419 }
420