xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/agx_helpers.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #pragma once
7 
8 #include <stdbool.h>
9 #include "asahi/compiler/agx_compile.h"
10 #include "asahi/layout/layout.h"
11 #include "shaders/compression.h"
12 #include "agx_device.h"
13 #include "agx_pack.h"
14 #include "agx_ppp.h"
15 
16 #define AGX_MAX_OCCLUSION_QUERIES (65536)
17 #define AGX_MAX_VIEWPORTS         (16)
18 
19 #define agx_push(ptr, T, cfg)                                                  \
20    for (unsigned _loop = 0; _loop < 1; ++_loop, ptr += AGX_##T##_LENGTH)       \
21       agx_pack(ptr, T, cfg)
22 
23 #define agx_push_packed(ptr, src, T)                                           \
24    STATIC_ASSERT(sizeof(src) == AGX_##T##_LENGTH);                             \
25    memcpy(ptr, &src, sizeof(src));                                             \
26    ptr += sizeof(src);
27 
28 static inline enum agx_sampler_states
agx_translate_sampler_state_count(unsigned count,bool extended)29 agx_translate_sampler_state_count(unsigned count, bool extended)
30 {
31    assert(count <= 17 && "max 17 sampler state registers supported");
32 
33    if (count == 0) {
34       return AGX_SAMPLER_STATES_0;
35    } else if (extended) {
36       if (count <= 8)
37          return AGX_SAMPLER_STATES_8_EXTENDED;
38       else
39          return AGX_SAMPLER_STATES_16_EXTENDED;
40    } else {
41       if (count <= 4)
42          return AGX_SAMPLER_STATES_4_COMPACT;
43       else if (count <= 8)
44          return AGX_SAMPLER_STATES_8_COMPACT;
45       else if (count <= 12)
46          return AGX_SAMPLER_STATES_12_COMPACT;
47       else
48          return AGX_SAMPLER_STATES_16_COMPACT;
49    }
50 }
51 
52 static void
agx_pack_txf_sampler(struct agx_sampler_packed * out)53 agx_pack_txf_sampler(struct agx_sampler_packed *out)
54 {
55    agx_pack(out, SAMPLER, cfg) {
56       /* Allow mipmapping. This is respected by txf, weirdly. */
57       cfg.mip_filter = AGX_MIP_FILTER_NEAREST;
58 
59       /* Out-of-bounds reads must return 0 */
60       cfg.wrap_s = AGX_WRAP_CLAMP_TO_BORDER;
61       cfg.wrap_t = AGX_WRAP_CLAMP_TO_BORDER;
62       cfg.wrap_r = AGX_WRAP_CLAMP_TO_BORDER;
63       cfg.border_colour = AGX_BORDER_COLOUR_TRANSPARENT_BLACK;
64    }
65 }
66 
67 /* Channels agree for RGBA but are weird for force 0/1 */
68 
69 static inline enum agx_channel
agx_channel_from_pipe(enum pipe_swizzle in)70 agx_channel_from_pipe(enum pipe_swizzle in)
71 {
72    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_X == AGX_CHANNEL_R);
73    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_Y == AGX_CHANNEL_G);
74    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_Z == AGX_CHANNEL_B);
75    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_W == AGX_CHANNEL_A);
76    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_0 & 0x4);
77    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_1 & 0x4);
78    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_NONE & 0x4);
79 
80    if ((in & 0x4) == 0)
81       return (enum agx_channel)in;
82    else if (in == PIPE_SWIZZLE_1)
83       return AGX_CHANNEL_1;
84    else
85       return AGX_CHANNEL_0;
86 }
87 
88 static inline enum agx_layout
agx_translate_layout(enum ail_tiling tiling)89 agx_translate_layout(enum ail_tiling tiling)
90 {
91    switch (tiling) {
92    case AIL_TILING_TWIDDLED:
93    case AIL_TILING_TWIDDLED_COMPRESSED:
94       return AGX_LAYOUT_TWIDDLED;
95    case AIL_TILING_LINEAR:
96       return AGX_LAYOUT_LINEAR;
97    }
98 
99    unreachable("Invalid tiling");
100 }
101 
102 static enum agx_sample_count
agx_translate_sample_count(unsigned samples)103 agx_translate_sample_count(unsigned samples)
104 {
105    switch (samples) {
106    case 2:
107       return AGX_SAMPLE_COUNT_2;
108    case 4:
109       return AGX_SAMPLE_COUNT_4;
110    default:
111       unreachable("Invalid sample count");
112    }
113 }
114 
115 static inline enum agx_index_size
agx_translate_index_size(uint8_t size_B)116 agx_translate_index_size(uint8_t size_B)
117 {
118    /* Index sizes are encoded logarithmically */
119    STATIC_ASSERT(__builtin_ctz(1) == AGX_INDEX_SIZE_U8);
120    STATIC_ASSERT(__builtin_ctz(2) == AGX_INDEX_SIZE_U16);
121    STATIC_ASSERT(__builtin_ctz(4) == AGX_INDEX_SIZE_U32);
122 
123    assert((size_B == 1) || (size_B == 2) || (size_B == 4));
124    return __builtin_ctz(size_B);
125 }
126 
127 static inline uint8_t
agx_index_size_to_B(enum agx_index_size size)128 agx_index_size_to_B(enum agx_index_size size)
129 {
130    return 1 << size;
131 }
132 
133 static enum agx_conservative_depth
agx_translate_depth_layout(enum gl_frag_depth_layout layout)134 agx_translate_depth_layout(enum gl_frag_depth_layout layout)
135 {
136    switch (layout) {
137    case FRAG_DEPTH_LAYOUT_ANY:
138       return AGX_CONSERVATIVE_DEPTH_ANY;
139    case FRAG_DEPTH_LAYOUT_LESS:
140       return AGX_CONSERVATIVE_DEPTH_LESS;
141    case FRAG_DEPTH_LAYOUT_GREATER:
142       return AGX_CONSERVATIVE_DEPTH_GREATER;
143    case FRAG_DEPTH_LAYOUT_UNCHANGED:
144       return AGX_CONSERVATIVE_DEPTH_UNCHANGED;
145    default:
146       unreachable("depth layout should have been canonicalized");
147    }
148 }
149 
150 static void
agx_ppp_fragment_face_2(struct agx_ppp_update * ppp,enum agx_object_type object_type,struct agx_shader_info * info)151 agx_ppp_fragment_face_2(struct agx_ppp_update *ppp,
152                         enum agx_object_type object_type,
153                         struct agx_shader_info *info)
154 {
155    agx_ppp_push(ppp, FRAGMENT_FACE_2, cfg) {
156       cfg.object_type = object_type;
157       cfg.conservative_depth =
158          info ? agx_translate_depth_layout(info->depth_layout)
159               : AGX_CONSERVATIVE_DEPTH_UNCHANGED;
160    }
161 }
162 
163 static inline uint32_t
agx_pack_line_width(float line_width)164 agx_pack_line_width(float line_width)
165 {
166    /* Line width is packed in a 4:4 fixed point format */
167    unsigned line_width_fixed = ((unsigned)(line_width * 16.0f)) - 1;
168 
169    /* Clamp to maximum line width */
170    return MIN2(line_width_fixed, 0xFF);
171 }
172 
173 /*
174  * Despite having both a layout *and* a flag that I only see Metal use with null
175  * textures, AGX doesn't seem to have "real" null textures. Instead we need to
176  * bind an arbitrary address and throw away the results to read all 0's.
177  * Accordingly, the caller must pass some address that lives at least as long as
178  * the texture descriptor itself.
179  */
180 static void
agx_set_null_texture(struct agx_texture_packed * tex,uint64_t valid_address)181 agx_set_null_texture(struct agx_texture_packed *tex, uint64_t valid_address)
182 {
183    agx_pack(tex, TEXTURE, cfg) {
184       cfg.layout = AGX_LAYOUT_NULL;
185       cfg.channels = AGX_CHANNELS_R8;
186       cfg.type = AGX_TEXTURE_TYPE_UNORM /* don't care */;
187       cfg.swizzle_r = AGX_CHANNEL_0;
188       cfg.swizzle_g = AGX_CHANNEL_0;
189       cfg.swizzle_b = AGX_CHANNEL_0;
190       cfg.swizzle_a = AGX_CHANNEL_0;
191       cfg.address = valid_address;
192       cfg.null = true;
193    }
194 }
195 
196 static void
agx_set_null_pbe(struct agx_pbe_packed * pbe,uint64_t sink)197 agx_set_null_pbe(struct agx_pbe_packed *pbe, uint64_t sink)
198 {
199    agx_pack(pbe, PBE, cfg) {
200       cfg.width = 1;
201       cfg.height = 1;
202       cfg.levels = 1;
203       cfg.layout = AGX_LAYOUT_NULL;
204       cfg.channels = AGX_CHANNELS_R8;
205       cfg.type = AGX_TEXTURE_TYPE_UNORM /* don't care */;
206       cfg.swizzle_r = AGX_CHANNEL_R;
207       cfg.swizzle_g = AGX_CHANNEL_R;
208       cfg.swizzle_b = AGX_CHANNEL_R;
209       cfg.swizzle_a = AGX_CHANNEL_R;
210       cfg.buffer = sink;
211    }
212 }
213 
214 /*
215  * Determine the maximum vertex/divided instance index.  For robustness,
216  * the index will be clamped to this before reading (if soft fault is
217  * disabled).
218  *
219  * Index i accesses up to (exclusive) offset:
220  *
221  *    src_offset + (i * stride) + elsize_B
222  *
223  * so we require
224  *
225  *    src_offset + (i * stride) + elsize_B <= size
226  *
227  * <==>
228  *
229  *    i <= floor((size - src_offset - elsize_B) / stride)
230  */
231 static inline uint32_t
agx_calculate_vbo_clamp(uint64_t vbuf,uint64_t sink,enum pipe_format format,uint32_t size_B,uint32_t stride_B,uint32_t offset_B,uint64_t * vbuf_out)232 agx_calculate_vbo_clamp(uint64_t vbuf, uint64_t sink, enum pipe_format format,
233                         uint32_t size_B, uint32_t stride_B, uint32_t offset_B,
234                         uint64_t *vbuf_out)
235 {
236    unsigned elsize_B = util_format_get_blocksize(format);
237    unsigned subtracted_B = offset_B + elsize_B;
238 
239    /* If at least one index is valid, determine the max. Otherwise, direct reads
240     * to zero.
241     */
242    if (size_B >= subtracted_B) {
243       *vbuf_out = vbuf + offset_B;
244 
245       /* If stride is zero, do not clamp, everything is valid. */
246       if (stride_B)
247          return ((size_B - subtracted_B) / stride_B);
248       else
249          return UINT32_MAX;
250    } else {
251       *vbuf_out = sink;
252       return 0;
253    }
254 }
255 
256 static struct agx_device_key
agx_gather_device_key(struct agx_device * dev)257 agx_gather_device_key(struct agx_device *dev)
258 {
259    return (struct agx_device_key){
260       .needs_g13x_coherency = (dev->params.gpu_generation == 13 &&
261                                dev->params.num_clusters_total > 1) ||
262                               dev->params.num_dies > 1,
263       .soft_fault = agx_has_soft_fault(dev),
264    };
265 }
266 
267 static void
agx_fill_decompress_push(struct libagx_decompress_push * push,struct ail_layout * layout,unsigned layer,unsigned level,uint64_t ptr)268 agx_fill_decompress_push(struct libagx_decompress_push *push,
269                          struct ail_layout *layout, unsigned layer,
270                          unsigned level, uint64_t ptr)
271 {
272    *push = (struct libagx_decompress_push){
273       .tile_uncompressed = ail_tile_mode_uncompressed(layout->format),
274       .metadata = ptr + layout->metadata_offset_B +
275                   layout->level_offsets_compressed_B[level] +
276                   (layer * layout->compression_layer_stride_B),
277       .metadata_layer_stride_tl = layout->compression_layer_stride_B / 8,
278       .metadata_width_tl = ail_metadata_width_tl(layout, level),
279       .metadata_height_tl = ail_metadata_height_tl(layout, level),
280    };
281 }
282 
283 struct agx_border_packed;
284 
285 void agx_pack_border(struct agx_border_packed *out, const uint32_t in[4],
286                      enum pipe_format format);
287