1 /*
2 * Copyright 2023 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5
6 #pragma once
7
8 #include <stdbool.h>
9 #include "asahi/compiler/agx_compile.h"
10 #include "asahi/layout/layout.h"
11 #include "shaders/compression.h"
12 #include "agx_device.h"
13 #include "agx_pack.h"
14 #include "agx_ppp.h"
15
16 #define AGX_MAX_OCCLUSION_QUERIES (65536)
17 #define AGX_MAX_VIEWPORTS (16)
18
19 #define agx_push(ptr, T, cfg) \
20 for (unsigned _loop = 0; _loop < 1; ++_loop, ptr += AGX_##T##_LENGTH) \
21 agx_pack(ptr, T, cfg)
22
23 #define agx_push_packed(ptr, src, T) \
24 STATIC_ASSERT(sizeof(src) == AGX_##T##_LENGTH); \
25 memcpy(ptr, &src, sizeof(src)); \
26 ptr += sizeof(src);
27
28 static inline enum agx_sampler_states
agx_translate_sampler_state_count(unsigned count,bool extended)29 agx_translate_sampler_state_count(unsigned count, bool extended)
30 {
31 assert(count <= 17 && "max 17 sampler state registers supported");
32
33 if (count == 0) {
34 return AGX_SAMPLER_STATES_0;
35 } else if (extended) {
36 if (count <= 8)
37 return AGX_SAMPLER_STATES_8_EXTENDED;
38 else
39 return AGX_SAMPLER_STATES_16_EXTENDED;
40 } else {
41 if (count <= 4)
42 return AGX_SAMPLER_STATES_4_COMPACT;
43 else if (count <= 8)
44 return AGX_SAMPLER_STATES_8_COMPACT;
45 else if (count <= 12)
46 return AGX_SAMPLER_STATES_12_COMPACT;
47 else
48 return AGX_SAMPLER_STATES_16_COMPACT;
49 }
50 }
51
52 static void
agx_pack_txf_sampler(struct agx_sampler_packed * out)53 agx_pack_txf_sampler(struct agx_sampler_packed *out)
54 {
55 agx_pack(out, SAMPLER, cfg) {
56 /* Allow mipmapping. This is respected by txf, weirdly. */
57 cfg.mip_filter = AGX_MIP_FILTER_NEAREST;
58
59 /* Out-of-bounds reads must return 0 */
60 cfg.wrap_s = AGX_WRAP_CLAMP_TO_BORDER;
61 cfg.wrap_t = AGX_WRAP_CLAMP_TO_BORDER;
62 cfg.wrap_r = AGX_WRAP_CLAMP_TO_BORDER;
63 cfg.border_colour = AGX_BORDER_COLOUR_TRANSPARENT_BLACK;
64 }
65 }
66
67 /* Channels agree for RGBA but are weird for force 0/1 */
68
69 static inline enum agx_channel
agx_channel_from_pipe(enum pipe_swizzle in)70 agx_channel_from_pipe(enum pipe_swizzle in)
71 {
72 STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_X == AGX_CHANNEL_R);
73 STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_Y == AGX_CHANNEL_G);
74 STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_Z == AGX_CHANNEL_B);
75 STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_W == AGX_CHANNEL_A);
76 STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_0 & 0x4);
77 STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_1 & 0x4);
78 STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_NONE & 0x4);
79
80 if ((in & 0x4) == 0)
81 return (enum agx_channel)in;
82 else if (in == PIPE_SWIZZLE_1)
83 return AGX_CHANNEL_1;
84 else
85 return AGX_CHANNEL_0;
86 }
87
88 static inline enum agx_layout
agx_translate_layout(enum ail_tiling tiling)89 agx_translate_layout(enum ail_tiling tiling)
90 {
91 switch (tiling) {
92 case AIL_TILING_TWIDDLED:
93 case AIL_TILING_TWIDDLED_COMPRESSED:
94 return AGX_LAYOUT_TWIDDLED;
95 case AIL_TILING_LINEAR:
96 return AGX_LAYOUT_LINEAR;
97 }
98
99 unreachable("Invalid tiling");
100 }
101
102 static enum agx_sample_count
agx_translate_sample_count(unsigned samples)103 agx_translate_sample_count(unsigned samples)
104 {
105 switch (samples) {
106 case 2:
107 return AGX_SAMPLE_COUNT_2;
108 case 4:
109 return AGX_SAMPLE_COUNT_4;
110 default:
111 unreachable("Invalid sample count");
112 }
113 }
114
115 static inline enum agx_index_size
agx_translate_index_size(uint8_t size_B)116 agx_translate_index_size(uint8_t size_B)
117 {
118 /* Index sizes are encoded logarithmically */
119 STATIC_ASSERT(__builtin_ctz(1) == AGX_INDEX_SIZE_U8);
120 STATIC_ASSERT(__builtin_ctz(2) == AGX_INDEX_SIZE_U16);
121 STATIC_ASSERT(__builtin_ctz(4) == AGX_INDEX_SIZE_U32);
122
123 assert((size_B == 1) || (size_B == 2) || (size_B == 4));
124 return __builtin_ctz(size_B);
125 }
126
127 static inline uint8_t
agx_index_size_to_B(enum agx_index_size size)128 agx_index_size_to_B(enum agx_index_size size)
129 {
130 return 1 << size;
131 }
132
133 static enum agx_conservative_depth
agx_translate_depth_layout(enum gl_frag_depth_layout layout)134 agx_translate_depth_layout(enum gl_frag_depth_layout layout)
135 {
136 switch (layout) {
137 case FRAG_DEPTH_LAYOUT_ANY:
138 return AGX_CONSERVATIVE_DEPTH_ANY;
139 case FRAG_DEPTH_LAYOUT_LESS:
140 return AGX_CONSERVATIVE_DEPTH_LESS;
141 case FRAG_DEPTH_LAYOUT_GREATER:
142 return AGX_CONSERVATIVE_DEPTH_GREATER;
143 case FRAG_DEPTH_LAYOUT_UNCHANGED:
144 return AGX_CONSERVATIVE_DEPTH_UNCHANGED;
145 default:
146 unreachable("depth layout should have been canonicalized");
147 }
148 }
149
150 static void
agx_ppp_fragment_face_2(struct agx_ppp_update * ppp,enum agx_object_type object_type,struct agx_shader_info * info)151 agx_ppp_fragment_face_2(struct agx_ppp_update *ppp,
152 enum agx_object_type object_type,
153 struct agx_shader_info *info)
154 {
155 agx_ppp_push(ppp, FRAGMENT_FACE_2, cfg) {
156 cfg.object_type = object_type;
157 cfg.conservative_depth =
158 info ? agx_translate_depth_layout(info->depth_layout)
159 : AGX_CONSERVATIVE_DEPTH_UNCHANGED;
160 }
161 }
162
163 static inline uint32_t
agx_pack_line_width(float line_width)164 agx_pack_line_width(float line_width)
165 {
166 /* Line width is packed in a 4:4 fixed point format */
167 unsigned line_width_fixed = ((unsigned)(line_width * 16.0f)) - 1;
168
169 /* Clamp to maximum line width */
170 return MIN2(line_width_fixed, 0xFF);
171 }
172
173 /*
174 * Despite having both a layout *and* a flag that I only see Metal use with null
175 * textures, AGX doesn't seem to have "real" null textures. Instead we need to
176 * bind an arbitrary address and throw away the results to read all 0's.
177 * Accordingly, the caller must pass some address that lives at least as long as
178 * the texture descriptor itself.
179 */
180 static void
agx_set_null_texture(struct agx_texture_packed * tex,uint64_t valid_address)181 agx_set_null_texture(struct agx_texture_packed *tex, uint64_t valid_address)
182 {
183 agx_pack(tex, TEXTURE, cfg) {
184 cfg.layout = AGX_LAYOUT_NULL;
185 cfg.channels = AGX_CHANNELS_R8;
186 cfg.type = AGX_TEXTURE_TYPE_UNORM /* don't care */;
187 cfg.swizzle_r = AGX_CHANNEL_0;
188 cfg.swizzle_g = AGX_CHANNEL_0;
189 cfg.swizzle_b = AGX_CHANNEL_0;
190 cfg.swizzle_a = AGX_CHANNEL_0;
191 cfg.address = valid_address;
192 cfg.null = true;
193 }
194 }
195
196 static void
agx_set_null_pbe(struct agx_pbe_packed * pbe,uint64_t sink)197 agx_set_null_pbe(struct agx_pbe_packed *pbe, uint64_t sink)
198 {
199 agx_pack(pbe, PBE, cfg) {
200 cfg.width = 1;
201 cfg.height = 1;
202 cfg.levels = 1;
203 cfg.layout = AGX_LAYOUT_NULL;
204 cfg.channels = AGX_CHANNELS_R8;
205 cfg.type = AGX_TEXTURE_TYPE_UNORM /* don't care */;
206 cfg.swizzle_r = AGX_CHANNEL_R;
207 cfg.swizzle_g = AGX_CHANNEL_R;
208 cfg.swizzle_b = AGX_CHANNEL_R;
209 cfg.swizzle_a = AGX_CHANNEL_R;
210 cfg.buffer = sink;
211 }
212 }
213
214 /*
215 * Determine the maximum vertex/divided instance index. For robustness,
216 * the index will be clamped to this before reading (if soft fault is
217 * disabled).
218 *
219 * Index i accesses up to (exclusive) offset:
220 *
221 * src_offset + (i * stride) + elsize_B
222 *
223 * so we require
224 *
225 * src_offset + (i * stride) + elsize_B <= size
226 *
227 * <==>
228 *
229 * i <= floor((size - src_offset - elsize_B) / stride)
230 */
231 static inline uint32_t
agx_calculate_vbo_clamp(uint64_t vbuf,uint64_t sink,enum pipe_format format,uint32_t size_B,uint32_t stride_B,uint32_t offset_B,uint64_t * vbuf_out)232 agx_calculate_vbo_clamp(uint64_t vbuf, uint64_t sink, enum pipe_format format,
233 uint32_t size_B, uint32_t stride_B, uint32_t offset_B,
234 uint64_t *vbuf_out)
235 {
236 unsigned elsize_B = util_format_get_blocksize(format);
237 unsigned subtracted_B = offset_B + elsize_B;
238
239 /* If at least one index is valid, determine the max. Otherwise, direct reads
240 * to zero.
241 */
242 if (size_B >= subtracted_B) {
243 *vbuf_out = vbuf + offset_B;
244
245 /* If stride is zero, do not clamp, everything is valid. */
246 if (stride_B)
247 return ((size_B - subtracted_B) / stride_B);
248 else
249 return UINT32_MAX;
250 } else {
251 *vbuf_out = sink;
252 return 0;
253 }
254 }
255
256 static struct agx_device_key
agx_gather_device_key(struct agx_device * dev)257 agx_gather_device_key(struct agx_device *dev)
258 {
259 return (struct agx_device_key){
260 .needs_g13x_coherency = (dev->params.gpu_generation == 13 &&
261 dev->params.num_clusters_total > 1) ||
262 dev->params.num_dies > 1,
263 .soft_fault = agx_has_soft_fault(dev),
264 };
265 }
266
267 static void
agx_fill_decompress_push(struct libagx_decompress_push * push,struct ail_layout * layout,unsigned layer,unsigned level,uint64_t ptr)268 agx_fill_decompress_push(struct libagx_decompress_push *push,
269 struct ail_layout *layout, unsigned layer,
270 unsigned level, uint64_t ptr)
271 {
272 *push = (struct libagx_decompress_push){
273 .tile_uncompressed = ail_tile_mode_uncompressed(layout->format),
274 .metadata = ptr + layout->metadata_offset_B +
275 layout->level_offsets_compressed_B[level] +
276 (layer * layout->compression_layer_stride_B),
277 .metadata_layer_stride_tl = layout->compression_layer_stride_B / 8,
278 .metadata_width_tl = ail_metadata_width_tl(layout, level),
279 .metadata_height_tl = ail_metadata_height_tl(layout, level),
280 };
281 }
282
283 struct agx_border_packed;
284
285 void agx_pack_border(struct agx_border_packed *out, const uint32_t in[4],
286 enum pipe_format format);
287