1 /* 2 * Copyright 2024 Advanced Micro Devices, Inc. 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 7 #ifndef AC_NIR_META_H 8 #define AC_NIR_META_H 9 10 #include "ac_gpu_info.h" 11 #include "nir.h" 12 #include "util/box.h" 13 14 union ac_ps_resolve_key { 15 struct { 16 bool use_aco:1; 17 bool src_is_array:1; 18 uint8_t log_samples:2; 19 uint8_t last_src_channel:2; /* this shouldn't be greater than last_dst_channel */ 20 uint8_t last_dst_channel:2; 21 bool x_clamp_to_edge:1; 22 bool y_clamp_to_edge:1; 23 bool a16:1; 24 bool d16:1; 25 }; 26 uint64_t key; /* use with hash_table_u64 */ 27 }; 28 29 /* Only immutable settings. */ 30 struct ac_ps_resolve_options { 31 const nir_shader_compiler_options *nir_options; 32 const struct radeon_info *info; 33 bool use_aco; /* global driver setting */ 34 bool no_fmask; /* FMASK disabled by a debug option, ignored on GFX11+ */ 35 bool print_key; /* print ac_ps_resolve_key into stderr */ 36 }; 37 38 nir_shader * 39 ac_create_resolve_ps(const struct ac_ps_resolve_options *options, 40 const union ac_ps_resolve_key *key); 41 42 /* Universal optimized compute shader for image blits and clears. */ 43 #define SI_MAX_COMPUTE_BLIT_LANE_SIZE 16 44 #define SI_MAX_COMPUTE_BLIT_SAMPLES 8 45 46 /* This describes all possible variants of the compute blit shader. */ 47 union ac_cs_blit_key { 48 struct { 49 bool use_aco:1; 50 /* Workgroup settings. */ 51 uint8_t wg_dim:2; /* 1, 2, or 3 */ 52 bool has_start_xyz:1; 53 /* The size of a block of pixels that a single thread will process. */ 54 uint8_t log_lane_width:3; 55 uint8_t log_lane_height:2; 56 uint8_t log_lane_depth:2; 57 /* Declaration modifiers. */ 58 bool is_clear:1; 59 bool src_is_1d:1; 60 bool dst_is_1d:1; 61 bool src_is_msaa:1; 62 bool dst_is_msaa:1; 63 bool src_has_z:1; 64 bool dst_has_z:1; 65 bool a16:1; 66 bool d16:1; 67 uint8_t log_samples:2; 68 bool sample0_only:1; /* src is MSAA, dst is not MSAA, log2_samples is ignored */ 69 /* Source coordinate modifiers. */ 70 bool x_clamp_to_edge:1; 71 bool y_clamp_to_edge:1; 72 bool flip_x:1; 73 bool flip_y:1; 74 /* Output modifiers. */ 75 bool sint_to_uint:1; 76 bool uint_to_sint:1; 77 bool dst_is_srgb:1; 78 bool use_integer_one:1; 79 uint8_t last_src_channel:2; /* this shouldn't be greater than last_dst_channel */ 80 uint8_t last_dst_channel:2; 81 }; 82 uint64_t key; 83 }; 84 85 struct ac_cs_blit_options { 86 /* Global options. */ 87 const nir_shader_compiler_options *nir_options; 88 const struct radeon_info *info; 89 bool use_aco; /* global driver setting */ 90 bool no_fmask; /* FMASK disabled by a global debug option, ignored on GFX11+ */ 91 bool print_key; /* print ac_ps_resolve_key into stderr */ 92 bool fail_if_slow; /* fail if a gfx blit is faster, set to false on compute queues */ 93 94 bool is_nested; /* for internal use, don't set */ 95 }; 96 97 struct ac_cs_blit_description 98 { 99 struct { 100 struct radeon_surf *surf; 101 uint8_t dim; /* 1 = 1D texture, 2 = 2D texture, 3 = 3D texture */ 102 bool is_array; /* array or cube texture */ 103 unsigned width0; /* level 0 width */ 104 unsigned height0; /* level 0 height */ 105 uint8_t num_samples; 106 uint8_t level; 107 struct pipe_box box; /* negative width, height only legal for src */ 108 enum pipe_format format; /* format reinterpretation */ 109 } dst, src; 110 111 bool is_gfx_queue; 112 bool dst_has_dcc; 113 bool sample0_only; /* copy sample 0 instead of resolving */ 114 union pipe_color_union clear_color; /* if src.surf == NULL, this is the clear color */ 115 }; 116 117 /* Dispatch parameters generated by the blit. */ 118 struct ac_cs_blit_dispatch { 119 union ac_cs_blit_key shader_key; 120 uint32_t user_data[8]; /* for nir_intrinsic_load_user_data_amd */ 121 122 unsigned wg_size[3]; /* variable workgroup size (NUM_THREAD_FULL) */ 123 unsigned last_wg_size[3]; /* workgroup size of the last workgroup (NUM_THREAD_PARTIAL) */ 124 unsigned num_workgroups[3]; /* DISPATCH_DIRECT parameters */ 125 }; 126 127 struct ac_cs_blit_dispatches { 128 unsigned num_dispatches; 129 struct ac_cs_blit_dispatch dispatches[7]; 130 }; 131 132 nir_shader * 133 ac_create_blit_cs(const struct ac_cs_blit_options *options, const union ac_cs_blit_key *key); 134 135 bool 136 ac_prepare_compute_blit(const struct ac_cs_blit_options *options, 137 const struct ac_cs_blit_description *blit, 138 struct ac_cs_blit_dispatches *dispatches); 139 140 /* clear_buffer/copy_buffer compute shader. */ 141 union ac_cs_clear_copy_buffer_key { 142 struct { 143 bool is_clear:1; 144 unsigned dwords_per_thread:3; /* 1..4 allowed */ 145 bool clear_value_size_is_12:1; 146 bool src_is_sparse:1; 147 /* Unaligned clears and copies. */ 148 unsigned src_align_offset:2; /* how much is the source address unaligned */ 149 unsigned dst_align_offset:4; /* the first thread shouldn't write this many bytes */ 150 unsigned dst_last_thread_bytes:4; /* if non-zero, the last thread should write this many bytes */ 151 bool dst_single_thread_unaligned:1; /* only 1 thread executes, both previous fields apply */ 152 bool has_start_thread:1; /* whether the first few threads should be skipped, making later 153 waves start on a 256B boundary */ 154 }; 155 uint64_t key; 156 }; 157 158 struct ac_cs_clear_copy_buffer_options { 159 const nir_shader_compiler_options *nir_options; 160 const struct radeon_info *info; 161 bool print_key; /* print the shader key into stderr */ 162 bool fail_if_slow; /* fail if a gfx blit is faster, set to false on compute queues */ 163 }; 164 165 struct ac_cs_clear_copy_buffer_info { 166 unsigned dst_offset; 167 unsigned src_offset; 168 unsigned size; 169 unsigned clear_value_size; 170 uint32_t clear_value[4]; 171 unsigned dwords_per_thread; /* Set to 0 to let the code choose the optimal value. */ 172 bool render_condition_enabled; 173 bool dst_is_vram; 174 bool src_is_vram; 175 bool src_is_sparse; 176 }; 177 178 struct ac_cs_clear_copy_buffer_dispatch { 179 union ac_cs_clear_copy_buffer_key shader_key; 180 uint32_t user_data[6]; /* for nir_intrinsic_load_user_data_amd */ 181 unsigned num_ssbos; 182 unsigned workgroup_size; 183 unsigned num_threads; 184 185 struct { 186 unsigned offset; 187 unsigned size; 188 } ssbo[2]; 189 }; 190 191 nir_shader * 192 ac_create_clear_copy_buffer_cs(struct ac_cs_clear_copy_buffer_options *options, 193 union ac_cs_clear_copy_buffer_key *key); 194 195 bool 196 ac_prepare_cs_clear_copy_buffer(const struct ac_cs_clear_copy_buffer_options *options, 197 const struct ac_cs_clear_copy_buffer_info *info, 198 struct ac_cs_clear_copy_buffer_dispatch *out); 199 200 #endif 201