xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/shaders/texture.cl (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1/*
2 * Copyright 2023 Alyssa Rosenzweig
3 * Copyright 2023 Valve Corporation
4 * SPDX-License-Identifier: MIT
5 */
6#include "libagx.h"
7#include <agx_pack.h>
8
9uint3
10libagx_txs(constant struct agx_texture_packed *ptr, uint16_t lod,
11           unsigned nr_comps, bool is_buffer, bool is_1d, bool is_2d,
12           bool is_cube, bool is_array)
13{
14   agx_unpack(NULL, ptr, TEXTURE, d);
15
16   /* From the Vulkan spec:
17    *
18    *    OpImageQuery*...  return 0 if the bound descriptor is a null descriptor
19    */
20   if (d.null)
21      return 0;
22
23   /* Buffer textures are lowered to 2D so the original size is irrecoverable.
24    * Instead, we stash it in the software-defined section.
25    */
26   if (is_buffer)
27      return d.buffer_size_sw;
28
29   /* Load standard dimensions */
30   uint3 size = (uint3)(d.width, d.height, d.depth);
31   lod += d.first_level;
32
33   /* Linear 2D arrays are special.
34    *
35    * TODO: Optimize this, since linear 2D arrays aren't needed for APIs and
36    * this just gets used internally for blits.
37    */
38   if (is_2d && is_array && d.layout == AGX_LAYOUT_LINEAR)
39      size.z = d.depth_linear;
40
41   /* 1D Arrays have their second component as the layer count */
42   if (is_1d && is_array)
43      size.y = size.z;
44
45   /* Adjust for LOD, do not adjust array size */
46   for (uint c = 0; c < (nr_comps - (uint)is_array); ++c)
47      size[c] = max(size[c] >> lod, 1u);
48
49   /* Cube maps have equal width and height, we save some instructions by only
50    * reading one. Dead code elimination will remove the redundant instructions.
51    */
52   if (is_cube)
53      size.y = size.x;
54
55   return size;
56}
57
58uint
59libagx_texture_samples(constant struct agx_texture_packed *ptr)
60{
61   agx_unpack(NULL, ptr, TEXTURE, d);
62
63   /* As above */
64   if (d.null)
65      return 0;
66
67   /* We may assume the input is multisampled, so just check the samples */
68   return (d.samples == AGX_SAMPLE_COUNT_2) ? 2 : 4;
69}
70
71uint
72libagx_texture_levels(constant struct agx_texture_packed *ptr)
73{
74   agx_unpack(NULL, ptr, TEXTURE, d);
75
76   /* As above */
77   if (d.null)
78      return 0;
79   else
80      return (d.last_level - d.first_level) + 1;
81}
82
83/*
84 * Fix robustness behaviour of txf with out-of-bounds LOD. The hardware
85 * returns the correct out-of-bounds colour for out-of-bounds coordinates,
86 * just not LODs. So translate out-of-bounds LOD into an out-of-bounds
87 * coordinate to get correct behaviour in 1 instruction.
88 *
89 * Returns the fixed X-coordinate.
90 *
91 * TODO: This looks like it might be an erratum workaround on G13 (Apple does
92 * it), maybe check if G15 is affected.
93 */
94uint
95libagx_lower_txf_robustness(constant struct agx_texture_packed *ptr,
96                            bool check_lod, ushort lod, bool check_layer,
97                            uint layer, uint x)
98{
99   agx_unpack(NULL, ptr, TEXTURE, d);
100
101   bool valid = true;
102
103   if (check_lod)
104      valid &= lod <= (d.last_level - d.first_level);
105
106   if (check_layer) {
107      bool linear = (d.layout == AGX_LAYOUT_LINEAR);
108      valid &= layer < (linear ? d.depth_linear : d.depth);
109   }
110
111   /* The maximum tail offset is 0xF so by returning 0xFFF0 for out-of-bounds we
112    * stay under 0xFFFF and keep robustness after offsetting.
113    */
114   return valid ? x : 0xFFF0;
115}
116
117static uint32_t
118calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px,
119                               uint16_t tile_h_px, uint32_t aligned_width_px)
120{
121   /* Modulo by the tile width/height to get the offsets within the tile */
122   ushort2 tile_mask_vec = (ushort2)(tile_w_px - 1, tile_h_px - 1);
123   uint32_t tile_mask = upsample(tile_mask_vec.y, tile_mask_vec.x);
124   uint32_t coord_xy = upsample(coord.y, coord.x);
125   ushort2 offs_px = as_ushort2(coord_xy & tile_mask);
126   uint32_t offset_within_tile_px = nir_interleave_agx(offs_px.x, offs_px.y);
127
128   /* Get the coordinates of the corner of the tile */
129   ushort2 tile_px = as_ushort2(coord_xy & ~tile_mask);
130
131   /* tile row start (px) =
132    *   (y // tile height) * (# of tiles/row) * (# of pix/tile) =
133    *   align_down(y, tile height) / tile height * width_tl *tile width *
134    *        tile height =
135    *   align_down(y, tile height) * width_tl * tile width
136    */
137   uint32_t tile_row_start_px = tile_px.y * aligned_width_px;
138
139   /* tile column start (px) =
140    *   (x // tile width) * (# of pix/tile) =
141    *   align_down(x, tile width) / tile width * tile width * tile height =
142    *   align_down(x, tile width) * tile height
143    */
144   uint32_t tile_col_start_px = tile_px.x * tile_h_px;
145
146   /* Get the total offset */
147   return tile_row_start_px + tile_col_start_px + offset_within_tile_px;
148}
149
150uint64_t
151libagx_image_texel_address(constant const struct agx_pbe_packed *ptr,
152                           uint4 coord, uint sample_idx,
153                           uint bytes_per_sample_B, bool is_1d, bool is_msaa,
154                           bool is_layered, bool return_index)
155{
156   agx_unpack(NULL, ptr, PBE, d);
157
158   /* We do not allow atomics on linear 2D or linear 2D arrays, as there are no
159    * known use cases. So we're twiddled in this path, unless we're handling a
160    * 1D image which will be always linear, even if it uses a twiddled layout
161    * degrading to linear-equivalent 1x1 tiles. (1D uses this path, not the
162    * buffer path, for 1D arrays.)
163    */
164   uint total_px;
165   if (is_1d) {
166      total_px = coord.x;
167   } else {
168      uint aligned_width_px;
169      if (is_msaa) {
170         aligned_width_px = d.aligned_width_msaa_sw;
171      } else {
172         uint width_px = max(d.width >> d.level, 1u);
173         aligned_width_px = align(width_px, d.tile_width_sw);
174      }
175
176      total_px = calculate_twiddled_coordinates(
177         convert_ushort2(coord.xy), d.tile_width_sw, d.tile_height_sw,
178         aligned_width_px);
179   }
180
181   uint samples_log2 = is_msaa ? d.sample_count_log2_sw : 0;
182
183   if (is_layered) {
184      total_px += coord[is_1d ? 1 : 2] *
185                  ((d.layer_stride_sw / bytes_per_sample_B) >> samples_log2);
186   }
187
188   uint total_sa = (total_px << samples_log2) + sample_idx;
189
190   if (return_index)
191      return total_sa;
192   else
193      return (d.buffer + (is_msaa ? 0 : d.level_offset_sw)) +
194             (uint64_t)(total_sa * bytes_per_sample_B);
195}
196
197uint64_t
198libagx_buffer_texel_address(constant const struct agx_pbe_packed *ptr,
199                            uint4 coord, uint bytes_per_pixel_B)
200{
201   agx_unpack(NULL, ptr, PBE, d);
202
203   uint32_t x_el = d.buffer_offset_sw + coord.x;
204   return d.buffer + (uint64_t)(x_el * bytes_per_pixel_B);
205}
206
207/* Buffer texture lowerings */
208bool
209libagx_texture_is_rgb32(constant struct agx_texture_packed *ptr)
210{
211   agx_unpack(NULL, ptr, TEXTURE, d);
212   return d.channels == AGX_CHANNELS_R32G32B32_EMULATED;
213}
214
215uint4
216libagx_texture_load_rgb32(constant struct agx_texture_packed *ptr, uint coord,
217                          bool is_float)
218{
219   agx_unpack(NULL, ptr, TEXTURE, d);
220   global uint3 *data = (global uint3 *)(d.address + 12 * coord);
221
222   return (uint4)(*data, is_float ? as_uint(1.0f) : 1);
223}
224
225uint
226libagx_buffer_texture_offset(constant struct agx_texture_packed *ptr, uint x)
227{
228   agx_unpack(NULL, ptr, TEXTURE, d);
229
230   return x + d.buffer_offset_sw;
231}
232
233uint
234libagx_buffer_image_offset(constant struct agx_pbe_packed *ptr, uint x)
235{
236   agx_unpack(NULL, ptr, PBE, d);
237
238   return x + d.buffer_offset_sw;
239}
240