xref: /aosp_15_r20/external/mesa3d/src/asahi/vulkan/hk_shader.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2024 Valve Corporation
3  * Copyright 2024 Alyssa Rosenzweig
4  * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #pragma once
9 
10 #include "asahi/compiler/agx_compile.h"
11 #include "util/macros.h"
12 #include "agx_linker.h"
13 #include "agx_nir_lower_vbo.h"
14 #include "agx_pack.h"
15 #include "agx_usc.h"
16 #include "agx_uvs.h"
17 
18 #include "hk_device.h"
19 #include "hk_device_memory.h"
20 #include "hk_private.h"
21 
22 #include "nir_xfb_info.h"
23 #include "shader_enums.h"
24 #include "vk_pipeline_cache.h"
25 
26 #include "nir.h"
27 
28 #include "vk_shader.h"
29 
30 struct hk_physical_device;
31 struct hk_pipeline_compilation_ctx;
32 struct vk_descriptor_set_layout;
33 struct vk_graphics_pipeline_state;
34 struct vk_pipeline_cache;
35 struct vk_pipeline_layout;
36 struct vk_pipeline_robustness_state;
37 struct vk_shader_module;
38 
39 /* TODO: Make dynamic */
40 #define HK_ROOT_UNIFORM       104
41 #define HK_IMAGE_HEAP_UNIFORM 108
42 
43 struct hk_shader_info {
44    union {
45       struct {
46          uint32_t attribs_read;
47          BITSET_DECLARE(attrib_components_read, AGX_MAX_ATTRIBS * 4);
48          uint8_t cull_distance_array_size;
49          uint8_t _pad[7];
50       } vs;
51 
52       struct {
53          /* Local workgroup size */
54          uint16_t local_size[3];
55 
56          uint8_t _pad[26];
57       } cs;
58 
59       struct {
60          struct agx_interp_info interp;
61          struct agx_fs_epilog_link_info epilog_key;
62 
63          bool reads_sample_mask;
64          bool post_depth_coverage;
65          bool uses_sample_shading;
66          bool early_fragment_tests;
67          bool writes_memory;
68 
69          uint8_t _pad[7];
70       } fs;
71 
72       struct {
73          uint8_t spacing;
74          uint8_t mode;
75          enum mesa_prim out_prim;
76          bool point_mode;
77          bool ccw;
78          uint8_t _pad[27];
79       } ts;
80 
81       struct {
82          uint64_t per_vertex_outputs;
83          uint32_t output_stride;
84          uint8_t output_patch_size;
85          uint8_t nr_patch_outputs;
86          uint8_t _pad[18];
87       } tcs;
88 
89       struct {
90          unsigned count_words;
91          enum mesa_prim out_prim;
92          uint8_t _pad[27];
93       } gs;
94 
95       /* Used to initialize the union for other stages */
96       uint8_t _pad[32];
97    };
98 
99    struct agx_unlinked_uvs_layout uvs;
100 
101    /* Transform feedback buffer strides */
102    uint8_t xfb_stride[MAX_XFB_BUFFERS];
103 
104    gl_shader_stage stage : 8;
105    uint8_t clip_distance_array_size;
106    uint8_t cull_distance_array_size;
107    uint8_t _pad0[1];
108 
109    /* XXX: is there a less goofy way to do this? I really don't want dynamic
110     * allocation here.
111     */
112    nir_xfb_info xfb_info;
113    nir_xfb_output_info xfb_outputs[64];
114 };
115 
116 /*
117  * Hash table keys for fast-linked shader variants. These contain the entire
118  * prolog/epilog key so we only do 1 hash table lookup instead of 2 in the
119  * general case where the linked shader is already ready.
120  */
121 struct hk_fast_link_key_vs {
122    struct agx_vs_prolog_key prolog;
123 };
124 
125 struct hk_fast_link_key_fs {
126    unsigned nr_samples_shaded;
127    struct agx_fs_prolog_key prolog;
128    struct agx_fs_epilog_key epilog;
129 };
130 
131 struct hk_shader {
132    struct agx_shader_part b;
133 
134    struct hk_shader_info info;
135    struct agx_fragment_face_2_packed frag_face;
136    struct agx_counts_packed counts;
137 
138    const void *code_ptr;
139    uint32_t code_size;
140 
141    const void *data_ptr;
142    uint32_t data_size;
143 
144    /* BO for any uploaded shader part */
145    struct agx_bo *bo;
146 
147    /* Cache of fast linked variants */
148    struct {
149       simple_mtx_t lock;
150       struct hash_table *ht;
151    } linked;
152 
153    /* If there's only a single possibly linked variant, direct pointer. TODO:
154     * Union with the cache to save some space?
155     */
156    struct hk_linked_shader *only_linked;
157 
158    /* Address to the uploaded preamble section. Preambles are uploaded
159     * separately from fast-linked main shaders.
160     */
161    uint64_t preamble_addr;
162 
163    /* Address of the start of the shader data section */
164    uint64_t data_addr;
165 };
166 
167 enum hk_vs_variant {
168    /* Hardware vertex shader, when next stage is fragment */
169    HK_VS_VARIANT_HW,
170 
171    /* Hardware compute shader, when next is geometry/tessellation */
172    HK_VS_VARIANT_SW,
173 
174    HK_VS_VARIANTS,
175 };
176 
177 enum hk_gs_variant {
178    /* Hardware vertex shader used for rasterization */
179    HK_GS_VARIANT_RAST,
180 
181    /* Main compute shader */
182    HK_GS_VARIANT_MAIN,
183    HK_GS_VARIANT_MAIN_NO_RAST,
184 
185    /* Count compute shader */
186    HK_GS_VARIANT_COUNT,
187    HK_GS_VARIANT_COUNT_NO_RAST,
188 
189    /* Pre-GS compute shader */
190    HK_GS_VARIANT_PRE,
191    HK_GS_VARIANT_PRE_NO_RAST,
192 
193    HK_GS_VARIANTS,
194 };
195 
196 /* clang-format off */
197 static const char *hk_gs_variant_name[] = {
198    [HK_GS_VARIANT_RAST] = "Rasterization",
199    [HK_GS_VARIANT_MAIN] = "Main",
200    [HK_GS_VARIANT_MAIN_NO_RAST] = "Main (rast. discard)",
201    [HK_GS_VARIANT_COUNT] = "Count",
202    [HK_GS_VARIANT_COUNT_NO_RAST] = "Count (rast. discard)",
203    [HK_GS_VARIANT_PRE] = "Pre-GS",
204    [HK_GS_VARIANT_PRE_NO_RAST] = "Pre-GS (rast. discard)",
205 };
206 /* clang-format on */
207 
208 static inline unsigned
hk_num_variants(gl_shader_stage stage)209 hk_num_variants(gl_shader_stage stage)
210 {
211    switch (stage) {
212    case MESA_SHADER_VERTEX:
213    case MESA_SHADER_TESS_EVAL:
214       return HK_VS_VARIANTS;
215 
216    case MESA_SHADER_GEOMETRY:
217       return HK_GS_VARIANTS;
218 
219    default:
220       return 1;
221    }
222 }
223 
224 /*
225  * An hk_api shader maps 1:1 to a VkShader object. An hk_api_shader may contain
226  * multiple hardware hk_shader's, built at shader compile time. This complexity
227  * is required to efficiently implement the legacy geometry pipeline.
228  */
229 struct hk_api_shader {
230    struct vk_shader vk;
231 
232    /* Is this an internal passthrough geometry shader? */
233    bool is_passthrough;
234 
235    struct hk_shader variants[];
236 };
237 
238 #define hk_foreach_variant(api_shader, var)                                    \
239    for (struct hk_shader *var = api_shader->variants;                          \
240         var < api_shader->variants + hk_num_variants(api_shader->vk.stage);    \
241         ++var)
242 
243 static const char *
hk_variant_name(struct hk_api_shader * obj,struct hk_shader * variant)244 hk_variant_name(struct hk_api_shader *obj, struct hk_shader *variant)
245 {
246    unsigned i = variant - obj->variants;
247    assert(i < hk_num_variants(obj->vk.stage));
248 
249    if (hk_num_variants(obj->vk.stage) == 1) {
250       return NULL;
251    } else if (obj->vk.stage == MESA_SHADER_GEOMETRY) {
252       assert(i < ARRAY_SIZE(hk_gs_variant_name));
253       return hk_gs_variant_name[i];
254    } else {
255       assert(i < 2);
256       return i == HK_VS_VARIANT_SW ? "Software" : "Hardware";
257    }
258 }
259 
260 static struct hk_shader *
hk_only_variant(struct hk_api_shader * obj)261 hk_only_variant(struct hk_api_shader *obj)
262 {
263    if (!obj)
264       return NULL;
265 
266    assert(hk_num_variants(obj->vk.stage) == 1);
267    return &obj->variants[0];
268 }
269 
270 static struct hk_shader *
hk_any_variant(struct hk_api_shader * obj)271 hk_any_variant(struct hk_api_shader *obj)
272 {
273    if (!obj)
274       return NULL;
275 
276    return &obj->variants[0];
277 }
278 
279 static struct hk_shader *
hk_main_gs_variant(struct hk_api_shader * obj,bool rast_disc)280 hk_main_gs_variant(struct hk_api_shader *obj, bool rast_disc)
281 {
282    return &obj->variants[HK_GS_VARIANT_MAIN + rast_disc];
283 }
284 
285 static struct hk_shader *
hk_count_gs_variant(struct hk_api_shader * obj,bool rast_disc)286 hk_count_gs_variant(struct hk_api_shader *obj, bool rast_disc)
287 {
288    return &obj->variants[HK_GS_VARIANT_COUNT + rast_disc];
289 }
290 
291 static struct hk_shader *
hk_pre_gs_variant(struct hk_api_shader * obj,bool rast_disc)292 hk_pre_gs_variant(struct hk_api_shader *obj, bool rast_disc)
293 {
294    return &obj->variants[HK_GS_VARIANT_PRE + rast_disc];
295 }
296 
297 #define HK_MAX_LINKED_USC_SIZE                                                 \
298    (AGX_USC_PRESHADER_LENGTH + AGX_USC_FRAGMENT_PROPERTIES_LENGTH +            \
299     AGX_USC_REGISTERS_LENGTH + AGX_USC_SHADER_LENGTH + AGX_USC_SHARED_LENGTH + \
300     AGX_USC_SAMPLER_LENGTH + (AGX_USC_UNIFORM_LENGTH * 9))
301 
302 struct hk_linked_shader {
303    struct agx_linked_shader b;
304 
305    /* Distinct from hk_shader::counts due to addition of cf_binding_count, which
306     * is delayed since it depends on cull distance.
307     */
308    struct agx_fragment_shader_word_0_packed fs_counts;
309 
310    /* Baked USC words to bind this linked shader */
311    struct {
312       uint8_t data[HK_MAX_LINKED_USC_SIZE];
313       size_t size;
314    } usc;
315 };
316 
317 struct hk_linked_shader *hk_fast_link(struct hk_device *dev, bool fragment,
318                                       struct hk_shader *main,
319                                       struct agx_shader_part *prolog,
320                                       struct agx_shader_part *epilog,
321                                       unsigned nr_samples_shaded);
322 
323 extern const struct vk_device_shader_ops hk_device_shader_ops;
324 
325 uint64_t
326 hk_physical_device_compiler_flags(const struct hk_physical_device *pdev);
327 
328 static inline nir_address_format
hk_buffer_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness)329 hk_buffer_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness)
330 {
331    switch (robustness) {
332    case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT:
333       return nir_address_format_64bit_global_32bit_offset;
334    case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT:
335    case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT:
336       return nir_address_format_64bit_bounded_global;
337    default:
338       unreachable("Invalid robust buffer access behavior");
339    }
340 }
341 
342 bool hk_lower_uvs_index(nir_shader *s, unsigned vs_uniform_base);
343 
344 bool
345 hk_nir_lower_descriptors(nir_shader *nir,
346                          const struct vk_pipeline_robustness_state *rs,
347                          uint32_t set_layout_count,
348                          struct vk_descriptor_set_layout *const *set_layouts);
349 void hk_lower_nir(struct hk_device *dev, nir_shader *nir,
350                   const struct vk_pipeline_robustness_state *rs,
351                   bool is_multiview, uint32_t set_layout_count,
352                   struct vk_descriptor_set_layout *const *set_layouts);
353 
354 VkResult hk_compile_shader(struct hk_device *dev,
355                            struct vk_shader_compile_info *info,
356                            const struct vk_graphics_pipeline_state *state,
357                            const VkAllocationCallbacks *pAllocator,
358                            struct hk_api_shader **shader_out);
359 
360 void hk_preprocess_nir_internal(struct vk_physical_device *vk_pdev,
361                                 nir_shader *nir);
362 
363 void hk_api_shader_destroy(struct vk_device *vk_dev,
364                            struct vk_shader *vk_shader,
365                            const VkAllocationCallbacks *pAllocator);
366 
367 const nir_shader_compiler_options *
368 hk_get_nir_options(struct vk_physical_device *vk_pdev, gl_shader_stage stage,
369                    UNUSED const struct vk_pipeline_robustness_state *rs);
370 
371 struct hk_api_shader *hk_meta_shader(struct hk_device *dev,
372                                      hk_internal_builder_t builder, void *data,
373                                      size_t data_size);
374 
375 static inline struct hk_shader *
hk_meta_kernel(struct hk_device * dev,hk_internal_builder_t builder,void * data,size_t data_size)376 hk_meta_kernel(struct hk_device *dev, hk_internal_builder_t builder, void *data,
377                size_t data_size)
378 {
379    return hk_only_variant(hk_meta_shader(dev, builder, data, data_size));
380 }
381 
382 struct hk_passthrough_gs_key {
383    /* Bit mask of outputs written by the VS/TES, to be passed through */
384    uint64_t outputs;
385 
386    /* Clip/cull sizes, implies clip/cull written in output */
387    uint8_t clip_distance_array_size;
388    uint8_t cull_distance_array_size;
389 
390    /* Transform feedback buffer strides */
391    uint8_t xfb_stride[MAX_XFB_BUFFERS];
392 
393    /* Decomposed primitive */
394    enum mesa_prim prim;
395 
396    /* Transform feedback info. Must add nir_xfb_info_size to get the key size */
397    nir_xfb_info xfb_info;
398 };
399 
400 void hk_nir_passthrough_gs(struct nir_builder *b, const void *key_);
401