1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #ifndef SHADER_INFO_H 26 #define SHADER_INFO_H 27 28 #include "util/bitset.h" 29 #include "util/mesa-blake3.h" 30 #include "shader_enums.h" 31 #include <stdint.h> 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 #define MAX_XFB_BUFFERS 4 38 #define MAX_INLINABLE_UNIFORMS 4 39 40 typedef struct shader_info { 41 const char *name; 42 43 /* Descriptive name provided by the client; may be NULL */ 44 const char *label; 45 46 /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */ 47 bool internal; 48 49 /* BLAKE3 of the original source, used by shader detection in drivers. */ 50 blake3_hash source_blake3; 51 52 /** The shader stage, such as MESA_SHADER_VERTEX. */ 53 gl_shader_stage stage:8; 54 55 /** The shader stage in a non SSO linked program that follows this stage, 56 * such as MESA_SHADER_FRAGMENT. 57 */ 58 gl_shader_stage next_stage:8; 59 60 /* Number of textures used by this shader */ 61 uint8_t num_textures; 62 /* Number of uniform buffers used by this shader */ 63 uint8_t num_ubos; 64 /* Number of atomic buffers used by this shader */ 65 uint8_t num_abos; 66 /* Number of shader storage buffers (max .driver_location + 1) used by this 67 * shader. In the case of nir_lower_atomics_to_ssbo being used, this will 68 * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs 69 * and atomic counters in nir_shader->info. 70 */ 71 uint8_t num_ssbos; 72 /* Number of images used by this shader */ 73 uint8_t num_images; 74 75 /* Which inputs are actually read */ 76 uint64_t inputs_read; 77 /* Which inputs occupy 2 slots. */ 78 uint64_t dual_slot_inputs; 79 /* Which outputs are actually written */ 80 uint64_t outputs_written; 81 /* Which outputs are actually read */ 82 uint64_t outputs_read; 83 /* Which system values are actually read */ 84 BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX); 85 86 /* Which I/O is per-primitive, for read/written information combine with 87 * the fields above. 88 */ 89 uint64_t per_primitive_inputs; 90 uint64_t per_primitive_outputs; 91 92 /* Which I/O is per-view */ 93 uint64_t per_view_outputs; 94 95 /* Which 16-bit inputs and outputs are used corresponding to 96 * VARYING_SLOT_VARn_16BIT. 97 */ 98 uint16_t inputs_read_16bit; 99 uint16_t outputs_written_16bit; 100 uint16_t outputs_read_16bit; 101 uint16_t inputs_read_indirectly_16bit; 102 uint16_t outputs_accessed_indirectly_16bit; 103 104 /* Which patch inputs are actually read */ 105 uint32_t patch_inputs_read; 106 /* Which patch outputs are actually written */ 107 uint32_t patch_outputs_written; 108 /* Which patch outputs are read */ 109 uint32_t patch_outputs_read; 110 111 /* Which inputs are read indirectly (subset of inputs_read) */ 112 uint64_t inputs_read_indirectly; 113 /* Which outputs are read or written indirectly */ 114 uint64_t outputs_accessed_indirectly; 115 /* Which patch inputs are read indirectly (subset of patch_inputs_read) */ 116 uint64_t patch_inputs_read_indirectly; 117 /* Which patch outputs are read or written indirectly */ 118 uint64_t patch_outputs_accessed_indirectly; 119 120 /** Bitfield of which textures are used */ 121 BITSET_DECLARE(textures_used, 128); 122 123 /** Bitfield of which textures are used by texelFetch() */ 124 BITSET_DECLARE(textures_used_by_txf, 128); 125 126 /** Bitfield of which samplers are used */ 127 BITSET_DECLARE(samplers_used, 32); 128 129 /** Bitfield of which images are used */ 130 BITSET_DECLARE(images_used, 64); 131 /** Bitfield of which images are buffers. */ 132 BITSET_DECLARE(image_buffers, 64); 133 /** Bitfield of which images are MSAA. */ 134 BITSET_DECLARE(msaa_images, 64); 135 136 /* SPV_KHR_float_controls: execution mode for floating point ops */ 137 uint32_t float_controls_execution_mode; 138 139 /** 140 * Size of shared variables accessed by compute/task/mesh shaders. 141 */ 142 unsigned shared_size; 143 144 /** 145 * Size of task payload variables accessed by task/mesh shaders. 146 */ 147 unsigned task_payload_size; 148 149 /** 150 * Number of ray tracing queries in the shader (counts all elements of all 151 * variables). 152 */ 153 unsigned ray_queries; 154 155 /** 156 * Local workgroup size used by compute/task/mesh shaders. 157 */ 158 uint16_t workgroup_size[3]; 159 160 enum gl_subgroup_size subgroup_size; 161 uint8_t num_subgroups; 162 163 /** 164 * Uses subgroup intrinsics which can communicate across a quad. 165 */ 166 bool uses_wide_subgroup_intrinsics; 167 168 /* Transform feedback buffer strides in dwords, max. 1K - 4. */ 169 uint8_t xfb_stride[MAX_XFB_BUFFERS]; 170 171 uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS]; 172 uint8_t num_inlinable_uniforms:4; 173 174 /* The size of the gl_ClipDistance[] array, if declared. */ 175 uint8_t clip_distance_array_size:4; 176 177 /* The size of the gl_CullDistance[] array, if declared. */ 178 uint8_t cull_distance_array_size:4; 179 180 /* Whether or not this shader ever uses textureGather() */ 181 bool uses_texture_gather:1; 182 183 /* Whether texture size, levels, or samples is queried. */ 184 bool uses_resource_info_query:1; 185 186 /** 187 * True if this shader uses the fddx/fddy opcodes. 188 * 189 * Note that this does not include the "fine" and "coarse" variants. 190 */ 191 bool uses_fddx_fddy:1; 192 193 /** Has divergence analysis ever been run? */ 194 bool divergence_analysis_run:1; 195 196 /* Bitmask of bit-sizes used with ALU instructions. */ 197 uint8_t bit_sizes_float; 198 uint8_t bit_sizes_int; 199 200 /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */ 201 bool first_ubo_is_default_ubo:1; 202 203 /* Whether or not separate shader objects were used */ 204 bool separate_shader:1; 205 206 /** Was this shader linked with any transform feedback varyings? */ 207 bool has_transform_feedback_varyings:1; 208 209 /* Whether flrp has been lowered. */ 210 bool flrp_lowered:1; 211 212 /* Whether nir_lower_io has been called to lower derefs. 213 * nir_variables for inputs and outputs might not be present in the IR. 214 */ 215 bool io_lowered:1; 216 217 /** Has nir_lower_var_copies called. To avoid calling any 218 * lowering/optimization that would introduce any copy_deref later. 219 */ 220 bool var_copies_lowered:1; 221 222 /* Whether the shader writes memory, including transform feedback. */ 223 bool writes_memory:1; 224 225 /* Whether gl_Layer is viewport-relative */ 226 bool layer_viewport_relative:1; 227 228 /* Whether explicit barriers are used */ 229 bool uses_control_barrier : 1; 230 bool uses_memory_barrier : 1; 231 232 /* Whether ARB_bindless_texture ops or variables are used */ 233 bool uses_bindless : 1; 234 235 /** 236 * Shared memory types have explicit layout set. Used for 237 * SPV_KHR_workgroup_storage_explicit_layout. 238 */ 239 bool shared_memory_explicit_layout:1; 240 241 /** 242 * Used for VK_KHR_zero_initialize_workgroup_memory. 243 */ 244 bool zero_initialize_shared_memory:1; 245 246 /** 247 * Used for ARB_compute_variable_group_size. 248 */ 249 bool workgroup_size_variable:1; 250 251 /** 252 * Whether the shader uses printf instructions. 253 */ 254 bool uses_printf:1; 255 256 /** 257 * VK_KHR_shader_maximal_reconvergence 258 */ 259 bool maximally_reconverges:1; 260 261 /* Use ACO instead of LLVM on AMD. */ 262 bool use_aco_amd:1; 263 264 /** 265 * Set if this shader uses legacy (DX9 or ARB assembly) math rules. 266 * 267 * From the ARB_fragment_program specification: 268 * 269 * "The following rules apply to multiplication: 270 * 271 * 1. <x> * <y> == <y> * <x>, for all <x> and <y>. 272 * 2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to 273 * *representable numbers (IEEE "not a number" and "infinity" 274 * *encodings may be exceptions). 275 * 3. +1.0 * <x> = <x>, for all <x>."" 276 * 277 * However, in effect this was due to DX9 semantics implying that 0*x=0 even 278 * for inf/nan if the hardware generated them instead of float_min/max. So, 279 * you should not have an exception for inf/nan to rule 2 above. 280 * 281 * One implementation of this behavior would be to flush all generated NaNs 282 * to zero, at which point 0*Inf=Nan=0. Most DX9/ARB-asm hardware did not 283 * generate NaNs, and the only way the GPU saw one was to possibly feed it 284 * in as a uniform. 285 */ 286 bool use_legacy_math_rules; 287 288 /* 289 * Arrangement of invocations used to calculate derivatives in 290 * compute/task/mesh shaders. From KHR_compute_shader_derivatives. 291 */ 292 enum gl_derivative_group derivative_group:2; 293 294 union { 295 struct { 296 /* Which inputs are doubles */ 297 uint64_t double_inputs; 298 299 /* For AMD-specific driver-internal shaders. It replaces vertex 300 * buffer loads with code generating VS inputs from scalar registers. 301 * 302 * Valid values: SI_VS_BLIT_SGPRS_POS_* 303 */ 304 uint8_t blit_sgprs_amd:4; 305 306 /* Software TES executing as HW VS */ 307 bool tes_agx:1; 308 309 /* True if the shader writes position in window space coordinates pre-transform */ 310 bool window_space_position:1; 311 312 /** Is an edge flag input needed? */ 313 bool needs_edge_flag:1; 314 } vs; 315 316 struct { 317 /** The output primitive type */ 318 enum mesa_prim output_primitive; 319 320 /** The input primitive type */ 321 enum mesa_prim input_primitive; 322 323 /** The maximum number of vertices the geometry shader might write. */ 324 uint16_t vertices_out; 325 326 /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ 327 uint8_t invocations; 328 329 /** The number of vertices received per input primitive (max. 6) */ 330 uint8_t vertices_in:3; 331 332 /** Whether or not this shader uses EndPrimitive */ 333 bool uses_end_primitive:1; 334 335 /** The streams used in this shaders (max. 4) */ 336 uint8_t active_stream_mask:4; 337 } gs; 338 339 struct { 340 bool uses_discard:1; 341 bool uses_fbfetch_output:1; 342 bool fbfetch_coherent:1; 343 bool color_is_dual_source:1; 344 345 /** 346 * True if this fragment shader requires full quad invocations. 347 */ 348 bool require_full_quads:1; 349 350 /** 351 * Whether the derivative group must be equivalent to the quad group. 352 */ 353 bool quad_derivatives:1; 354 355 /** 356 * True if this fragment shader requires helper invocations. This 357 * can be caused by the use of ALU derivative ops, texture 358 * instructions which do implicit derivatives, the use of quad 359 * subgroup operations or if the shader requires full quads. 360 */ 361 bool needs_quad_helper_invocations:1; 362 363 /** 364 * Whether any inputs are declared with the "sample" qualifier. 365 */ 366 bool uses_sample_qualifier:1; 367 368 /** 369 * Whether sample shading is used. 370 */ 371 bool uses_sample_shading:1; 372 373 /** 374 * Whether early fragment tests are enabled as defined by 375 * ARB_shader_image_load_store. 376 */ 377 bool early_fragment_tests:1; 378 379 /** 380 * Defined by INTEL_conservative_rasterization. 381 */ 382 bool inner_coverage:1; 383 384 bool post_depth_coverage:1; 385 386 /** 387 * \name ARB_fragment_coord_conventions 388 * @{ 389 */ 390 bool pixel_center_integer:1; 391 bool origin_upper_left:1; 392 /*@}*/ 393 394 bool pixel_interlock_ordered:1; 395 bool pixel_interlock_unordered:1; 396 bool sample_interlock_ordered:1; 397 bool sample_interlock_unordered:1; 398 399 /** 400 * Flags whether NIR's base types on the FS color outputs should be 401 * ignored. 402 * 403 * GLSL requires that fragment shader output base types match the 404 * render target's base types for the behavior to be defined. From 405 * the GL 4.6 spec: 406 * 407 * "If the values written by the fragment shader do not match the 408 * format(s) of the corresponding color buffer(s), the result is 409 * undefined." 410 * 411 * However, for NIR shaders translated from TGSI, we don't have the 412 * output types any more, so the driver will need to do whatever 413 * fixups are necessary to handle effectively untyped data being 414 * output from the FS. 415 */ 416 bool untyped_color_outputs:1; 417 418 /** gl_FragDepth layout for ARB_conservative_depth. */ 419 enum gl_frag_depth_layout depth_layout:3; 420 421 /** 422 * Interpolation qualifiers for drivers that lowers color inputs 423 * to system values. 424 */ 425 unsigned color0_interp:3; /* glsl_interp_mode */ 426 bool color0_sample:1; 427 bool color0_centroid:1; 428 unsigned color1_interp:3; /* glsl_interp_mode */ 429 bool color1_sample:1; 430 bool color1_centroid:1; 431 432 /* Bitmask of gl_advanced_blend_mode values that may be used with this 433 * shader. 434 */ 435 unsigned advanced_blend_modes; 436 437 /** 438 * Defined by AMD_shader_early_and_late_fragment_tests. 439 */ 440 bool early_and_late_fragment_tests:1; 441 enum gl_frag_stencil_layout stencil_front_layout:3; 442 enum gl_frag_stencil_layout stencil_back_layout:3; 443 } fs; 444 445 struct { 446 uint16_t workgroup_size_hint[3]; 447 448 uint8_t user_data_components_amd:4; 449 450 /* 451 * If the shader might run with shared mem on top of `shared_size`. 452 */ 453 bool has_variable_shared_mem:1; 454 455 /** 456 * If the shader has any use of a cooperative matrix. From 457 * SPV_KHR_cooperative_matrix. 458 */ 459 bool has_cooperative_matrix:1; 460 461 /** 462 * Number of bytes of shared imageblock memory per thread. Currently, 463 * this requires that the workgroup size is 32x32x1 and that 464 * shared_size = 0. These requirements could be lifted in the future. 465 * However, there is no current OpenGL/Vulkan API support for 466 * imageblocks. This is only used internally to accelerate blit/copy. 467 */ 468 uint8_t image_block_size_per_thread_agx; 469 470 /** 471 * pointer size is: 472 * AddressingModelLogical: 0 (default) 473 * AddressingModelPhysical32: 32 474 * AddressingModelPhysical64: 64 475 */ 476 unsigned ptr_size; 477 478 /** Index provided by VkPipelineShaderStageNodeCreateInfoAMDX or ShaderIndexAMDX */ 479 uint32_t shader_index; 480 481 /** Maximum size required by any output node payload array */ 482 uint32_t node_payloads_size; 483 484 /** Static workgroup count for overwriting the enqueued workgroup count. (0 if dynamic) */ 485 uint32_t workgroup_count[3]; 486 } cs; 487 488 /* Applies to both TCS and TES. */ 489 struct { 490 enum tess_primitive_mode _primitive_mode; 491 492 /** The number of vertices in the TCS output patch. */ 493 uint8_t tcs_vertices_out; 494 unsigned spacing:2; /*gl_tess_spacing*/ 495 496 /** Is the vertex order counterclockwise? */ 497 bool ccw:1; 498 bool point_mode:1; 499 500 /* Bit mask of TCS per-vertex inputs (VS outputs) that are used 501 * with a vertex index that is NOT the invocation id 502 */ 503 uint64_t tcs_cross_invocation_inputs_read; 504 505 /* Bit mask of TCS per-vertex outputs that are used 506 * with a vertex index that is NOT the invocation id 507 */ 508 uint64_t tcs_cross_invocation_outputs_read; 509 } tess; 510 511 /* Applies to MESH and TASK. */ 512 struct { 513 /* Bit mask of MS outputs that are used 514 * with an index that is NOT the local invocation index. 515 */ 516 uint64_t ms_cross_invocation_output_access; 517 518 /* Dimensions of task->mesh dispatch (EmitMeshTasksEXT) 519 * when they are known compile-time constants. 520 * 0 means they are not known. 521 */ 522 uint32_t ts_mesh_dispatch_dimensions[3]; 523 524 uint16_t max_vertices_out; 525 uint16_t max_primitives_out; 526 enum mesa_prim primitive_type; /* POINTS, LINES or TRIANGLES. */ 527 528 /* TODO: remove this when we stop supporting NV_mesh_shader. */ 529 bool nv; 530 } mesh; 531 }; 532 } shader_info; 533 534 #ifdef __cplusplus 535 } 536 #endif 537 538 #endif /* SHADER_INFO_H */ 539