1 /*
2 * Copyright 2024 Valve Corporation
3 * Copyright 2024 Alyssa Rosenzweig
4 * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5 * SPDX-License-Identifier: MIT
6 */
7
8 #pragma once
9
10 #include "asahi/compiler/agx_compile.h"
11 #include "util/macros.h"
12 #include "agx_linker.h"
13 #include "agx_nir_lower_vbo.h"
14 #include "agx_pack.h"
15 #include "agx_usc.h"
16 #include "agx_uvs.h"
17
18 #include "hk_device.h"
19 #include "hk_device_memory.h"
20 #include "hk_private.h"
21
22 #include "nir_xfb_info.h"
23 #include "shader_enums.h"
24 #include "vk_pipeline_cache.h"
25
26 #include "nir.h"
27
28 #include "vk_shader.h"
29
30 struct hk_physical_device;
31 struct hk_pipeline_compilation_ctx;
32 struct vk_descriptor_set_layout;
33 struct vk_graphics_pipeline_state;
34 struct vk_pipeline_cache;
35 struct vk_pipeline_layout;
36 struct vk_pipeline_robustness_state;
37 struct vk_shader_module;
38
39 /* TODO: Make dynamic */
40 #define HK_ROOT_UNIFORM 104
41 #define HK_IMAGE_HEAP_UNIFORM 108
42
43 struct hk_shader_info {
44 union {
45 struct {
46 uint32_t attribs_read;
47 BITSET_DECLARE(attrib_components_read, AGX_MAX_ATTRIBS * 4);
48 uint8_t cull_distance_array_size;
49 uint8_t _pad[7];
50 } vs;
51
52 struct {
53 /* Local workgroup size */
54 uint16_t local_size[3];
55
56 uint8_t _pad[26];
57 } cs;
58
59 struct {
60 struct agx_interp_info interp;
61 struct agx_fs_epilog_link_info epilog_key;
62
63 bool reads_sample_mask;
64 bool post_depth_coverage;
65 bool uses_sample_shading;
66 bool early_fragment_tests;
67 bool writes_memory;
68
69 uint8_t _pad[7];
70 } fs;
71
72 struct {
73 uint8_t spacing;
74 uint8_t mode;
75 enum mesa_prim out_prim;
76 bool point_mode;
77 bool ccw;
78 uint8_t _pad[27];
79 } ts;
80
81 struct {
82 uint64_t per_vertex_outputs;
83 uint32_t output_stride;
84 uint8_t output_patch_size;
85 uint8_t nr_patch_outputs;
86 uint8_t _pad[18];
87 } tcs;
88
89 struct {
90 unsigned count_words;
91 enum mesa_prim out_prim;
92 uint8_t _pad[27];
93 } gs;
94
95 /* Used to initialize the union for other stages */
96 uint8_t _pad[32];
97 };
98
99 struct agx_unlinked_uvs_layout uvs;
100
101 /* Transform feedback buffer strides */
102 uint8_t xfb_stride[MAX_XFB_BUFFERS];
103
104 gl_shader_stage stage : 8;
105 uint8_t clip_distance_array_size;
106 uint8_t cull_distance_array_size;
107 uint8_t _pad0[1];
108
109 /* XXX: is there a less goofy way to do this? I really don't want dynamic
110 * allocation here.
111 */
112 nir_xfb_info xfb_info;
113 nir_xfb_output_info xfb_outputs[64];
114 };
115
116 /*
117 * Hash table keys for fast-linked shader variants. These contain the entire
118 * prolog/epilog key so we only do 1 hash table lookup instead of 2 in the
119 * general case where the linked shader is already ready.
120 */
121 struct hk_fast_link_key_vs {
122 struct agx_vs_prolog_key prolog;
123 };
124
125 struct hk_fast_link_key_fs {
126 unsigned nr_samples_shaded;
127 struct agx_fs_prolog_key prolog;
128 struct agx_fs_epilog_key epilog;
129 };
130
131 struct hk_shader {
132 struct agx_shader_part b;
133
134 struct hk_shader_info info;
135 struct agx_fragment_face_2_packed frag_face;
136 struct agx_counts_packed counts;
137
138 const void *code_ptr;
139 uint32_t code_size;
140
141 const void *data_ptr;
142 uint32_t data_size;
143
144 /* BO for any uploaded shader part */
145 struct agx_bo *bo;
146
147 /* Cache of fast linked variants */
148 struct {
149 simple_mtx_t lock;
150 struct hash_table *ht;
151 } linked;
152
153 /* If there's only a single possibly linked variant, direct pointer. TODO:
154 * Union with the cache to save some space?
155 */
156 struct hk_linked_shader *only_linked;
157
158 /* Address to the uploaded preamble section. Preambles are uploaded
159 * separately from fast-linked main shaders.
160 */
161 uint64_t preamble_addr;
162
163 /* Address of the start of the shader data section */
164 uint64_t data_addr;
165 };
166
167 enum hk_vs_variant {
168 /* Hardware vertex shader, when next stage is fragment */
169 HK_VS_VARIANT_HW,
170
171 /* Hardware compute shader, when next is geometry/tessellation */
172 HK_VS_VARIANT_SW,
173
174 HK_VS_VARIANTS,
175 };
176
177 enum hk_gs_variant {
178 /* Hardware vertex shader used for rasterization */
179 HK_GS_VARIANT_RAST,
180
181 /* Main compute shader */
182 HK_GS_VARIANT_MAIN,
183 HK_GS_VARIANT_MAIN_NO_RAST,
184
185 /* Count compute shader */
186 HK_GS_VARIANT_COUNT,
187 HK_GS_VARIANT_COUNT_NO_RAST,
188
189 /* Pre-GS compute shader */
190 HK_GS_VARIANT_PRE,
191 HK_GS_VARIANT_PRE_NO_RAST,
192
193 HK_GS_VARIANTS,
194 };
195
196 /* clang-format off */
197 static const char *hk_gs_variant_name[] = {
198 [HK_GS_VARIANT_RAST] = "Rasterization",
199 [HK_GS_VARIANT_MAIN] = "Main",
200 [HK_GS_VARIANT_MAIN_NO_RAST] = "Main (rast. discard)",
201 [HK_GS_VARIANT_COUNT] = "Count",
202 [HK_GS_VARIANT_COUNT_NO_RAST] = "Count (rast. discard)",
203 [HK_GS_VARIANT_PRE] = "Pre-GS",
204 [HK_GS_VARIANT_PRE_NO_RAST] = "Pre-GS (rast. discard)",
205 };
206 /* clang-format on */
207
208 static inline unsigned
hk_num_variants(gl_shader_stage stage)209 hk_num_variants(gl_shader_stage stage)
210 {
211 switch (stage) {
212 case MESA_SHADER_VERTEX:
213 case MESA_SHADER_TESS_EVAL:
214 return HK_VS_VARIANTS;
215
216 case MESA_SHADER_GEOMETRY:
217 return HK_GS_VARIANTS;
218
219 default:
220 return 1;
221 }
222 }
223
224 /*
225 * An hk_api shader maps 1:1 to a VkShader object. An hk_api_shader may contain
226 * multiple hardware hk_shader's, built at shader compile time. This complexity
227 * is required to efficiently implement the legacy geometry pipeline.
228 */
229 struct hk_api_shader {
230 struct vk_shader vk;
231
232 /* Is this an internal passthrough geometry shader? */
233 bool is_passthrough;
234
235 struct hk_shader variants[];
236 };
237
238 #define hk_foreach_variant(api_shader, var) \
239 for (struct hk_shader *var = api_shader->variants; \
240 var < api_shader->variants + hk_num_variants(api_shader->vk.stage); \
241 ++var)
242
243 static const char *
hk_variant_name(struct hk_api_shader * obj,struct hk_shader * variant)244 hk_variant_name(struct hk_api_shader *obj, struct hk_shader *variant)
245 {
246 unsigned i = variant - obj->variants;
247 assert(i < hk_num_variants(obj->vk.stage));
248
249 if (hk_num_variants(obj->vk.stage) == 1) {
250 return NULL;
251 } else if (obj->vk.stage == MESA_SHADER_GEOMETRY) {
252 assert(i < ARRAY_SIZE(hk_gs_variant_name));
253 return hk_gs_variant_name[i];
254 } else {
255 assert(i < 2);
256 return i == HK_VS_VARIANT_SW ? "Software" : "Hardware";
257 }
258 }
259
260 static struct hk_shader *
hk_only_variant(struct hk_api_shader * obj)261 hk_only_variant(struct hk_api_shader *obj)
262 {
263 if (!obj)
264 return NULL;
265
266 assert(hk_num_variants(obj->vk.stage) == 1);
267 return &obj->variants[0];
268 }
269
270 static struct hk_shader *
hk_any_variant(struct hk_api_shader * obj)271 hk_any_variant(struct hk_api_shader *obj)
272 {
273 if (!obj)
274 return NULL;
275
276 return &obj->variants[0];
277 }
278
279 static struct hk_shader *
hk_main_gs_variant(struct hk_api_shader * obj,bool rast_disc)280 hk_main_gs_variant(struct hk_api_shader *obj, bool rast_disc)
281 {
282 return &obj->variants[HK_GS_VARIANT_MAIN + rast_disc];
283 }
284
285 static struct hk_shader *
hk_count_gs_variant(struct hk_api_shader * obj,bool rast_disc)286 hk_count_gs_variant(struct hk_api_shader *obj, bool rast_disc)
287 {
288 return &obj->variants[HK_GS_VARIANT_COUNT + rast_disc];
289 }
290
291 static struct hk_shader *
hk_pre_gs_variant(struct hk_api_shader * obj,bool rast_disc)292 hk_pre_gs_variant(struct hk_api_shader *obj, bool rast_disc)
293 {
294 return &obj->variants[HK_GS_VARIANT_PRE + rast_disc];
295 }
296
297 #define HK_MAX_LINKED_USC_SIZE \
298 (AGX_USC_PRESHADER_LENGTH + AGX_USC_FRAGMENT_PROPERTIES_LENGTH + \
299 AGX_USC_REGISTERS_LENGTH + AGX_USC_SHADER_LENGTH + AGX_USC_SHARED_LENGTH + \
300 AGX_USC_SAMPLER_LENGTH + (AGX_USC_UNIFORM_LENGTH * 9))
301
302 struct hk_linked_shader {
303 struct agx_linked_shader b;
304
305 /* Distinct from hk_shader::counts due to addition of cf_binding_count, which
306 * is delayed since it depends on cull distance.
307 */
308 struct agx_fragment_shader_word_0_packed fs_counts;
309
310 /* Baked USC words to bind this linked shader */
311 struct {
312 uint8_t data[HK_MAX_LINKED_USC_SIZE];
313 size_t size;
314 } usc;
315 };
316
317 struct hk_linked_shader *hk_fast_link(struct hk_device *dev, bool fragment,
318 struct hk_shader *main,
319 struct agx_shader_part *prolog,
320 struct agx_shader_part *epilog,
321 unsigned nr_samples_shaded);
322
323 extern const struct vk_device_shader_ops hk_device_shader_ops;
324
325 uint64_t
326 hk_physical_device_compiler_flags(const struct hk_physical_device *pdev);
327
328 static inline nir_address_format
hk_buffer_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness)329 hk_buffer_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness)
330 {
331 switch (robustness) {
332 case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT:
333 return nir_address_format_64bit_global_32bit_offset;
334 case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT:
335 case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT:
336 return nir_address_format_64bit_bounded_global;
337 default:
338 unreachable("Invalid robust buffer access behavior");
339 }
340 }
341
342 bool hk_lower_uvs_index(nir_shader *s, unsigned vs_uniform_base);
343
344 bool
345 hk_nir_lower_descriptors(nir_shader *nir,
346 const struct vk_pipeline_robustness_state *rs,
347 uint32_t set_layout_count,
348 struct vk_descriptor_set_layout *const *set_layouts);
349 void hk_lower_nir(struct hk_device *dev, nir_shader *nir,
350 const struct vk_pipeline_robustness_state *rs,
351 bool is_multiview, uint32_t set_layout_count,
352 struct vk_descriptor_set_layout *const *set_layouts);
353
354 VkResult hk_compile_shader(struct hk_device *dev,
355 struct vk_shader_compile_info *info,
356 const struct vk_graphics_pipeline_state *state,
357 const VkAllocationCallbacks *pAllocator,
358 struct hk_api_shader **shader_out);
359
360 void hk_preprocess_nir_internal(struct vk_physical_device *vk_pdev,
361 nir_shader *nir);
362
363 void hk_api_shader_destroy(struct vk_device *vk_dev,
364 struct vk_shader *vk_shader,
365 const VkAllocationCallbacks *pAllocator);
366
367 const nir_shader_compiler_options *
368 hk_get_nir_options(struct vk_physical_device *vk_pdev, gl_shader_stage stage,
369 UNUSED const struct vk_pipeline_robustness_state *rs);
370
371 struct hk_api_shader *hk_meta_shader(struct hk_device *dev,
372 hk_internal_builder_t builder, void *data,
373 size_t data_size);
374
375 static inline struct hk_shader *
hk_meta_kernel(struct hk_device * dev,hk_internal_builder_t builder,void * data,size_t data_size)376 hk_meta_kernel(struct hk_device *dev, hk_internal_builder_t builder, void *data,
377 size_t data_size)
378 {
379 return hk_only_variant(hk_meta_shader(dev, builder, data, data_size));
380 }
381
382 struct hk_passthrough_gs_key {
383 /* Bit mask of outputs written by the VS/TES, to be passed through */
384 uint64_t outputs;
385
386 /* Clip/cull sizes, implies clip/cull written in output */
387 uint8_t clip_distance_array_size;
388 uint8_t cull_distance_array_size;
389
390 /* Transform feedback buffer strides */
391 uint8_t xfb_stride[MAX_XFB_BUFFERS];
392
393 /* Decomposed primitive */
394 enum mesa_prim prim;
395
396 /* Transform feedback info. Must add nir_xfb_info_size to get the key size */
397 nir_xfb_info xfb_info;
398 };
399
400 void hk_nir_passthrough_gs(struct nir_builder *b, const void *key_);
401