1/* 2 * Copyright 2023 Alyssa Rosenzweig 3 * Copyright 2023 Valve Corporation 4 * SPDX-License-Identifier: MIT 5 */ 6#include "libagx.h" 7#include <agx_pack.h> 8 9uint3 10libagx_txs(constant struct agx_texture_packed *ptr, uint16_t lod, 11 unsigned nr_comps, bool is_buffer, bool is_1d, bool is_2d, 12 bool is_cube, bool is_array) 13{ 14 agx_unpack(NULL, ptr, TEXTURE, d); 15 16 /* From the Vulkan spec: 17 * 18 * OpImageQuery*... return 0 if the bound descriptor is a null descriptor 19 */ 20 if (d.null) 21 return 0; 22 23 /* Buffer textures are lowered to 2D so the original size is irrecoverable. 24 * Instead, we stash it in the software-defined section. 25 */ 26 if (is_buffer) 27 return d.buffer_size_sw; 28 29 /* Load standard dimensions */ 30 uint3 size = (uint3)(d.width, d.height, d.depth); 31 lod += d.first_level; 32 33 /* Linear 2D arrays are special. 34 * 35 * TODO: Optimize this, since linear 2D arrays aren't needed for APIs and 36 * this just gets used internally for blits. 37 */ 38 if (is_2d && is_array && d.layout == AGX_LAYOUT_LINEAR) 39 size.z = d.depth_linear; 40 41 /* 1D Arrays have their second component as the layer count */ 42 if (is_1d && is_array) 43 size.y = size.z; 44 45 /* Adjust for LOD, do not adjust array size */ 46 for (uint c = 0; c < (nr_comps - (uint)is_array); ++c) 47 size[c] = max(size[c] >> lod, 1u); 48 49 /* Cube maps have equal width and height, we save some instructions by only 50 * reading one. Dead code elimination will remove the redundant instructions. 51 */ 52 if (is_cube) 53 size.y = size.x; 54 55 return size; 56} 57 58uint 59libagx_texture_samples(constant struct agx_texture_packed *ptr) 60{ 61 agx_unpack(NULL, ptr, TEXTURE, d); 62 63 /* As above */ 64 if (d.null) 65 return 0; 66 67 /* We may assume the input is multisampled, so just check the samples */ 68 return (d.samples == AGX_SAMPLE_COUNT_2) ? 2 : 4; 69} 70 71uint 72libagx_texture_levels(constant struct agx_texture_packed *ptr) 73{ 74 agx_unpack(NULL, ptr, TEXTURE, d); 75 76 /* As above */ 77 if (d.null) 78 return 0; 79 else 80 return (d.last_level - d.first_level) + 1; 81} 82 83/* 84 * Fix robustness behaviour of txf with out-of-bounds LOD. The hardware 85 * returns the correct out-of-bounds colour for out-of-bounds coordinates, 86 * just not LODs. So translate out-of-bounds LOD into an out-of-bounds 87 * coordinate to get correct behaviour in 1 instruction. 88 * 89 * Returns the fixed X-coordinate. 90 * 91 * TODO: This looks like it might be an erratum workaround on G13 (Apple does 92 * it), maybe check if G15 is affected. 93 */ 94uint 95libagx_lower_txf_robustness(constant struct agx_texture_packed *ptr, 96 bool check_lod, ushort lod, bool check_layer, 97 uint layer, uint x) 98{ 99 agx_unpack(NULL, ptr, TEXTURE, d); 100 101 bool valid = true; 102 103 if (check_lod) 104 valid &= lod <= (d.last_level - d.first_level); 105 106 if (check_layer) { 107 bool linear = (d.layout == AGX_LAYOUT_LINEAR); 108 valid &= layer < (linear ? d.depth_linear : d.depth); 109 } 110 111 /* The maximum tail offset is 0xF so by returning 0xFFF0 for out-of-bounds we 112 * stay under 0xFFFF and keep robustness after offsetting. 113 */ 114 return valid ? x : 0xFFF0; 115} 116 117static uint32_t 118calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px, 119 uint16_t tile_h_px, uint32_t aligned_width_px) 120{ 121 /* Modulo by the tile width/height to get the offsets within the tile */ 122 ushort2 tile_mask_vec = (ushort2)(tile_w_px - 1, tile_h_px - 1); 123 uint32_t tile_mask = upsample(tile_mask_vec.y, tile_mask_vec.x); 124 uint32_t coord_xy = upsample(coord.y, coord.x); 125 ushort2 offs_px = as_ushort2(coord_xy & tile_mask); 126 uint32_t offset_within_tile_px = nir_interleave_agx(offs_px.x, offs_px.y); 127 128 /* Get the coordinates of the corner of the tile */ 129 ushort2 tile_px = as_ushort2(coord_xy & ~tile_mask); 130 131 /* tile row start (px) = 132 * (y // tile height) * (# of tiles/row) * (# of pix/tile) = 133 * align_down(y, tile height) / tile height * width_tl *tile width * 134 * tile height = 135 * align_down(y, tile height) * width_tl * tile width 136 */ 137 uint32_t tile_row_start_px = tile_px.y * aligned_width_px; 138 139 /* tile column start (px) = 140 * (x // tile width) * (# of pix/tile) = 141 * align_down(x, tile width) / tile width * tile width * tile height = 142 * align_down(x, tile width) * tile height 143 */ 144 uint32_t tile_col_start_px = tile_px.x * tile_h_px; 145 146 /* Get the total offset */ 147 return tile_row_start_px + tile_col_start_px + offset_within_tile_px; 148} 149 150uint64_t 151libagx_image_texel_address(constant const struct agx_pbe_packed *ptr, 152 uint4 coord, uint sample_idx, 153 uint bytes_per_sample_B, bool is_1d, bool is_msaa, 154 bool is_layered, bool return_index) 155{ 156 agx_unpack(NULL, ptr, PBE, d); 157 158 /* We do not allow atomics on linear 2D or linear 2D arrays, as there are no 159 * known use cases. So we're twiddled in this path, unless we're handling a 160 * 1D image which will be always linear, even if it uses a twiddled layout 161 * degrading to linear-equivalent 1x1 tiles. (1D uses this path, not the 162 * buffer path, for 1D arrays.) 163 */ 164 uint total_px; 165 if (is_1d) { 166 total_px = coord.x; 167 } else { 168 uint aligned_width_px; 169 if (is_msaa) { 170 aligned_width_px = d.aligned_width_msaa_sw; 171 } else { 172 uint width_px = max(d.width >> d.level, 1u); 173 aligned_width_px = align(width_px, d.tile_width_sw); 174 } 175 176 total_px = calculate_twiddled_coordinates( 177 convert_ushort2(coord.xy), d.tile_width_sw, d.tile_height_sw, 178 aligned_width_px); 179 } 180 181 uint samples_log2 = is_msaa ? d.sample_count_log2_sw : 0; 182 183 if (is_layered) { 184 total_px += coord[is_1d ? 1 : 2] * 185 ((d.layer_stride_sw / bytes_per_sample_B) >> samples_log2); 186 } 187 188 uint total_sa = (total_px << samples_log2) + sample_idx; 189 190 if (return_index) 191 return total_sa; 192 else 193 return (d.buffer + (is_msaa ? 0 : d.level_offset_sw)) + 194 (uint64_t)(total_sa * bytes_per_sample_B); 195} 196 197uint64_t 198libagx_buffer_texel_address(constant const struct agx_pbe_packed *ptr, 199 uint4 coord, uint bytes_per_pixel_B) 200{ 201 agx_unpack(NULL, ptr, PBE, d); 202 203 uint32_t x_el = d.buffer_offset_sw + coord.x; 204 return d.buffer + (uint64_t)(x_el * bytes_per_pixel_B); 205} 206 207/* Buffer texture lowerings */ 208bool 209libagx_texture_is_rgb32(constant struct agx_texture_packed *ptr) 210{ 211 agx_unpack(NULL, ptr, TEXTURE, d); 212 return d.channels == AGX_CHANNELS_R32G32B32_EMULATED; 213} 214 215uint4 216libagx_texture_load_rgb32(constant struct agx_texture_packed *ptr, uint coord, 217 bool is_float) 218{ 219 agx_unpack(NULL, ptr, TEXTURE, d); 220 global uint3 *data = (global uint3 *)(d.address + 12 * coord); 221 222 return (uint4)(*data, is_float ? as_uint(1.0f) : 1); 223} 224 225uint 226libagx_buffer_texture_offset(constant struct agx_texture_packed *ptr, uint x) 227{ 228 agx_unpack(NULL, ptr, TEXTURE, d); 229 230 return x + d.buffer_offset_sw; 231} 232 233uint 234libagx_buffer_image_offset(constant struct agx_pbe_packed *ptr, uint x) 235{ 236 agx_unpack(NULL, ptr, PBE, d); 237 238 return x + d.buffer_offset_sw; 239} 240