xref: /aosp_15_r20/external/mesa3d/src/compiler/shader_info.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #ifndef SHADER_INFO_H
26 #define SHADER_INFO_H
27 
28 #include "util/bitset.h"
29 #include "util/mesa-blake3.h"
30 #include "shader_enums.h"
31 #include <stdint.h>
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 #define MAX_XFB_BUFFERS        4
38 #define MAX_INLINABLE_UNIFORMS 4
39 
40 typedef struct shader_info {
41    const char *name;
42 
43    /* Descriptive name provided by the client; may be NULL */
44    const char *label;
45 
46    /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */
47    bool internal;
48 
49    /* BLAKE3 of the original source, used by shader detection in drivers. */
50    blake3_hash source_blake3;
51 
52    /** The shader stage, such as MESA_SHADER_VERTEX. */
53    gl_shader_stage stage:8;
54 
55    /** The shader stage in a non SSO linked program that follows this stage,
56      * such as MESA_SHADER_FRAGMENT.
57      */
58    gl_shader_stage next_stage:8;
59 
60    /* Number of textures used by this shader */
61    uint8_t num_textures;
62    /* Number of uniform buffers used by this shader */
63    uint8_t num_ubos;
64    /* Number of atomic buffers used by this shader */
65    uint8_t num_abos;
66    /* Number of shader storage buffers (max .driver_location + 1) used by this
67     * shader.  In the case of nir_lower_atomics_to_ssbo being used, this will
68     * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs
69     * and atomic counters in nir_shader->info.
70     */
71    uint8_t num_ssbos;
72    /* Number of images used by this shader */
73    uint8_t num_images;
74 
75    /* Which inputs are actually read */
76    uint64_t inputs_read;
77    /* Which inputs occupy 2 slots. */
78    uint64_t dual_slot_inputs;
79    /* Which outputs are actually written */
80    uint64_t outputs_written;
81    /* Which outputs are actually read */
82    uint64_t outputs_read;
83    /* Which system values are actually read */
84    BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
85 
86    /* Which I/O is per-primitive, for read/written information combine with
87     * the fields above.
88     */
89    uint64_t per_primitive_inputs;
90    uint64_t per_primitive_outputs;
91 
92    /* Which I/O is per-view */
93    uint64_t per_view_outputs;
94 
95    /* Which 16-bit inputs and outputs are used corresponding to
96     * VARYING_SLOT_VARn_16BIT.
97     */
98    uint16_t inputs_read_16bit;
99    uint16_t outputs_written_16bit;
100    uint16_t outputs_read_16bit;
101    uint16_t inputs_read_indirectly_16bit;
102    uint16_t outputs_accessed_indirectly_16bit;
103 
104    /* Which patch inputs are actually read */
105    uint32_t patch_inputs_read;
106    /* Which patch outputs are actually written */
107    uint32_t patch_outputs_written;
108    /* Which patch outputs are read */
109    uint32_t patch_outputs_read;
110 
111    /* Which inputs are read indirectly (subset of inputs_read) */
112    uint64_t inputs_read_indirectly;
113    /* Which outputs are read or written indirectly */
114    uint64_t outputs_accessed_indirectly;
115    /* Which patch inputs are read indirectly (subset of patch_inputs_read) */
116    uint64_t patch_inputs_read_indirectly;
117    /* Which patch outputs are read or written indirectly */
118    uint64_t patch_outputs_accessed_indirectly;
119 
120    /** Bitfield of which textures are used */
121    BITSET_DECLARE(textures_used, 128);
122 
123    /** Bitfield of which textures are used by texelFetch() */
124    BITSET_DECLARE(textures_used_by_txf, 128);
125 
126    /** Bitfield of which samplers are used */
127    BITSET_DECLARE(samplers_used, 32);
128 
129    /** Bitfield of which images are used */
130    BITSET_DECLARE(images_used, 64);
131    /** Bitfield of which images are buffers. */
132    BITSET_DECLARE(image_buffers, 64);
133    /** Bitfield of which images are MSAA. */
134    BITSET_DECLARE(msaa_images, 64);
135 
136    /* SPV_KHR_float_controls: execution mode for floating point ops */
137    uint32_t float_controls_execution_mode;
138 
139    /**
140     * Size of shared variables accessed by compute/task/mesh shaders.
141     */
142    unsigned shared_size;
143 
144    /**
145     * Size of task payload variables accessed by task/mesh shaders.
146     */
147    unsigned task_payload_size;
148 
149    /**
150     * Number of ray tracing queries in the shader (counts all elements of all
151     * variables).
152     */
153    unsigned ray_queries;
154 
155    /**
156     * Local workgroup size used by compute/task/mesh shaders.
157     */
158    uint16_t workgroup_size[3];
159 
160    enum gl_subgroup_size subgroup_size;
161    uint8_t num_subgroups;
162 
163    /**
164     * Uses subgroup intrinsics which can communicate across a quad.
165     */
166    bool uses_wide_subgroup_intrinsics;
167 
168    /* Transform feedback buffer strides in dwords, max. 1K - 4. */
169    uint8_t xfb_stride[MAX_XFB_BUFFERS];
170 
171    uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
172    uint8_t num_inlinable_uniforms:4;
173 
174    /* The size of the gl_ClipDistance[] array, if declared. */
175    uint8_t clip_distance_array_size:4;
176 
177    /* The size of the gl_CullDistance[] array, if declared. */
178    uint8_t cull_distance_array_size:4;
179 
180    /* Whether or not this shader ever uses textureGather() */
181    bool uses_texture_gather:1;
182 
183    /* Whether texture size, levels, or samples is queried. */
184    bool uses_resource_info_query:1;
185 
186    /**
187     * True if this shader uses the fddx/fddy opcodes.
188     *
189     * Note that this does not include the "fine" and "coarse" variants.
190     */
191    bool uses_fddx_fddy:1;
192 
193    /** Has divergence analysis ever been run? */
194    bool divergence_analysis_run:1;
195 
196    /* Bitmask of bit-sizes used with ALU instructions. */
197    uint8_t bit_sizes_float;
198    uint8_t bit_sizes_int;
199 
200    /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */
201    bool first_ubo_is_default_ubo:1;
202 
203    /* Whether or not separate shader objects were used */
204    bool separate_shader:1;
205 
206    /** Was this shader linked with any transform feedback varyings? */
207    bool has_transform_feedback_varyings:1;
208 
209    /* Whether flrp has been lowered. */
210    bool flrp_lowered:1;
211 
212    /* Whether nir_lower_io has been called to lower derefs.
213     * nir_variables for inputs and outputs might not be present in the IR.
214     */
215    bool io_lowered:1;
216 
217    /** Has nir_lower_var_copies called. To avoid calling any
218     * lowering/optimization that would introduce any copy_deref later.
219     */
220    bool var_copies_lowered:1;
221 
222    /* Whether the shader writes memory, including transform feedback. */
223    bool writes_memory:1;
224 
225    /* Whether gl_Layer is viewport-relative */
226    bool layer_viewport_relative:1;
227 
228    /* Whether explicit barriers are used */
229    bool uses_control_barrier : 1;
230    bool uses_memory_barrier : 1;
231 
232    /* Whether ARB_bindless_texture ops or variables are used */
233    bool uses_bindless : 1;
234 
235    /**
236     * Shared memory types have explicit layout set.  Used for
237     * SPV_KHR_workgroup_storage_explicit_layout.
238     */
239    bool shared_memory_explicit_layout:1;
240 
241    /**
242     * Used for VK_KHR_zero_initialize_workgroup_memory.
243     */
244    bool zero_initialize_shared_memory:1;
245 
246    /**
247     * Used for ARB_compute_variable_group_size.
248     */
249    bool workgroup_size_variable:1;
250 
251    /**
252     * Whether the shader uses printf instructions.
253     */
254    bool uses_printf:1;
255 
256    /**
257     * VK_KHR_shader_maximal_reconvergence
258     */
259    bool maximally_reconverges:1;
260 
261    /* Use ACO instead of LLVM on AMD. */
262    bool use_aco_amd:1;
263 
264    /**
265      * Set if this shader uses legacy (DX9 or ARB assembly) math rules.
266      *
267      * From the ARB_fragment_program specification:
268      *
269      *    "The following rules apply to multiplication:
270      *
271      *      1. <x> * <y> == <y> * <x>, for all <x> and <y>.
272      *      2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to
273      *         *representable numbers (IEEE "not a number" and "infinity"
274      *         *encodings may be exceptions).
275      *      3. +1.0 * <x> = <x>, for all <x>.""
276      *
277      * However, in effect this was due to DX9 semantics implying that 0*x=0 even
278      * for inf/nan if the hardware generated them instead of float_min/max.  So,
279      * you should not have an exception for inf/nan to rule 2 above.
280      *
281      * One implementation of this behavior would be to flush all generated NaNs
282      * to zero, at which point 0*Inf=Nan=0.  Most DX9/ARB-asm hardware did not
283      * generate NaNs, and the only way the GPU saw one was to possibly feed it
284      * in as a uniform.
285      */
286    bool use_legacy_math_rules;
287 
288    /*
289     * Arrangement of invocations used to calculate derivatives in
290     * compute/task/mesh shaders.  From KHR_compute_shader_derivatives.
291     */
292    enum gl_derivative_group derivative_group:2;
293 
294    union {
295       struct {
296          /* Which inputs are doubles */
297          uint64_t double_inputs;
298 
299          /* For AMD-specific driver-internal shaders. It replaces vertex
300           * buffer loads with code generating VS inputs from scalar registers.
301           *
302           * Valid values: SI_VS_BLIT_SGPRS_POS_*
303           */
304          uint8_t blit_sgprs_amd:4;
305 
306          /* Software TES executing as HW VS */
307          bool tes_agx:1;
308 
309          /* True if the shader writes position in window space coordinates pre-transform */
310          bool window_space_position:1;
311 
312          /** Is an edge flag input needed? */
313          bool needs_edge_flag:1;
314       } vs;
315 
316       struct {
317          /** The output primitive type */
318          enum mesa_prim output_primitive;
319 
320          /** The input primitive type */
321          enum mesa_prim input_primitive;
322 
323          /** The maximum number of vertices the geometry shader might write. */
324          uint16_t vertices_out;
325 
326          /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
327          uint8_t invocations;
328 
329          /** The number of vertices received per input primitive (max. 6) */
330          uint8_t vertices_in:3;
331 
332          /** Whether or not this shader uses EndPrimitive */
333          bool uses_end_primitive:1;
334 
335          /** The streams used in this shaders (max. 4) */
336          uint8_t active_stream_mask:4;
337       } gs;
338 
339       struct {
340          bool uses_discard:1;
341          bool uses_fbfetch_output:1;
342          bool fbfetch_coherent:1;
343          bool color_is_dual_source:1;
344 
345          /**
346           * True if this fragment shader requires full quad invocations.
347           */
348          bool require_full_quads:1;
349 
350          /**
351           * Whether the derivative group must be equivalent to the quad group.
352           */
353          bool quad_derivatives:1;
354 
355          /**
356           * True if this fragment shader requires helper invocations.  This
357           * can be caused by the use of ALU derivative ops, texture
358           * instructions which do implicit derivatives, the use of quad
359           * subgroup operations or if the shader requires full quads.
360           */
361          bool needs_quad_helper_invocations:1;
362 
363          /**
364           * Whether any inputs are declared with the "sample" qualifier.
365           */
366          bool uses_sample_qualifier:1;
367 
368          /**
369           * Whether sample shading is used.
370           */
371          bool uses_sample_shading:1;
372 
373          /**
374           * Whether early fragment tests are enabled as defined by
375           * ARB_shader_image_load_store.
376           */
377          bool early_fragment_tests:1;
378 
379          /**
380           * Defined by INTEL_conservative_rasterization.
381           */
382          bool inner_coverage:1;
383 
384          bool post_depth_coverage:1;
385 
386          /**
387           * \name ARB_fragment_coord_conventions
388           * @{
389           */
390          bool pixel_center_integer:1;
391          bool origin_upper_left:1;
392          /*@}*/
393 
394          bool pixel_interlock_ordered:1;
395          bool pixel_interlock_unordered:1;
396          bool sample_interlock_ordered:1;
397          bool sample_interlock_unordered:1;
398 
399          /**
400           * Flags whether NIR's base types on the FS color outputs should be
401           * ignored.
402           *
403           * GLSL requires that fragment shader output base types match the
404           * render target's base types for the behavior to be defined.  From
405           * the GL 4.6 spec:
406           *
407           *     "If the values written by the fragment shader do not match the
408           *      format(s) of the corresponding color buffer(s), the result is
409           *      undefined."
410           *
411           * However, for NIR shaders translated from TGSI, we don't have the
412           * output types any more, so the driver will need to do whatever
413           * fixups are necessary to handle effectively untyped data being
414           * output from the FS.
415           */
416          bool untyped_color_outputs:1;
417 
418          /** gl_FragDepth layout for ARB_conservative_depth. */
419          enum gl_frag_depth_layout depth_layout:3;
420 
421          /**
422           * Interpolation qualifiers for drivers that lowers color inputs
423           * to system values.
424           */
425          unsigned color0_interp:3; /* glsl_interp_mode */
426          bool color0_sample:1;
427          bool color0_centroid:1;
428          unsigned color1_interp:3; /* glsl_interp_mode */
429          bool color1_sample:1;
430          bool color1_centroid:1;
431 
432          /* Bitmask of gl_advanced_blend_mode values that may be used with this
433           * shader.
434           */
435          unsigned advanced_blend_modes;
436 
437          /**
438           * Defined by AMD_shader_early_and_late_fragment_tests.
439           */
440          bool early_and_late_fragment_tests:1;
441          enum gl_frag_stencil_layout stencil_front_layout:3;
442          enum gl_frag_stencil_layout stencil_back_layout:3;
443       } fs;
444 
445       struct {
446          uint16_t workgroup_size_hint[3];
447 
448          uint8_t user_data_components_amd:4;
449 
450          /*
451           * If the shader might run with shared mem on top of `shared_size`.
452           */
453          bool has_variable_shared_mem:1;
454 
455          /**
456           * If the shader has any use of a cooperative matrix. From
457           * SPV_KHR_cooperative_matrix.
458           */
459          bool has_cooperative_matrix:1;
460 
461          /**
462           * Number of bytes of shared imageblock memory per thread. Currently,
463           * this requires that the workgroup size is 32x32x1 and that
464           * shared_size = 0. These requirements could be lifted in the future.
465           * However, there is no current OpenGL/Vulkan API support for
466           * imageblocks. This is only used internally to accelerate blit/copy.
467           */
468          uint8_t image_block_size_per_thread_agx;
469 
470          /**
471           * pointer size is:
472           *   AddressingModelLogical:    0    (default)
473           *   AddressingModelPhysical32: 32
474           *   AddressingModelPhysical64: 64
475           */
476          unsigned ptr_size;
477 
478          /** Index provided by VkPipelineShaderStageNodeCreateInfoAMDX or ShaderIndexAMDX */
479          uint32_t shader_index;
480 
481          /** Maximum size required by any output node payload array */
482          uint32_t node_payloads_size;
483 
484          /** Static workgroup count for overwriting the enqueued workgroup count. (0 if dynamic) */
485          uint32_t workgroup_count[3];
486       } cs;
487 
488       /* Applies to both TCS and TES. */
489       struct {
490          enum tess_primitive_mode _primitive_mode;
491 
492          /** The number of vertices in the TCS output patch. */
493          uint8_t tcs_vertices_out;
494          unsigned spacing:2; /*gl_tess_spacing*/
495 
496          /** Is the vertex order counterclockwise? */
497          bool ccw:1;
498          bool point_mode:1;
499 
500          /* Bit mask of TCS per-vertex inputs (VS outputs) that are used
501           * with a vertex index that is NOT the invocation id
502           */
503          uint64_t tcs_cross_invocation_inputs_read;
504 
505          /* Bit mask of TCS per-vertex outputs that are used
506           * with a vertex index that is NOT the invocation id
507           */
508          uint64_t tcs_cross_invocation_outputs_read;
509       } tess;
510 
511       /* Applies to MESH and TASK. */
512       struct {
513          /* Bit mask of MS outputs that are used
514           * with an index that is NOT the local invocation index.
515           */
516          uint64_t ms_cross_invocation_output_access;
517 
518          /* Dimensions of task->mesh dispatch (EmitMeshTasksEXT)
519           * when they are known compile-time constants.
520           * 0 means they are not known.
521           */
522          uint32_t ts_mesh_dispatch_dimensions[3];
523 
524          uint16_t max_vertices_out;
525          uint16_t max_primitives_out;
526          enum mesa_prim primitive_type; /* POINTS, LINES or TRIANGLES. */
527 
528          /* TODO: remove this when we stop supporting NV_mesh_shader. */
529          bool nv;
530       } mesh;
531    };
532 } shader_info;
533 
534 #ifdef __cplusplus
535 }
536 #endif
537 
538 #endif /* SHADER_INFO_H */
539