xref: /aosp_15_r20/external/mesa3d/src/nouveau/vulkan/nvk_shader.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_shader.h"
6 
7 #include "nvk_cmd_buffer.h"
8 #include "nvk_descriptor_set_layout.h"
9 #include "nvk_device.h"
10 #include "nvk_physical_device.h"
11 #include "nvk_sampler.h"
12 #include "nvk_shader.h"
13 
14 #include "vk_nir_convert_ycbcr.h"
15 #include "vk_pipeline.h"
16 #include "vk_pipeline_layout.h"
17 #include "vk_shader_module.h"
18 #include "vk_ycbcr_conversion.h"
19 
20 #include "nak.h"
21 #include "nir.h"
22 #include "nir_builder.h"
23 #include "compiler/spirv/nir_spirv.h"
24 
25 #include "nv50_ir_driver.h"
26 
27 #include "util/mesa-sha1.h"
28 #include "util/u_debug.h"
29 
30 #include "cla097.h"
31 #include "clb097.h"
32 #include "clc397.h"
33 #include "clc597.h"
34 
35 static void
shared_var_info(const struct glsl_type * type,unsigned * size,unsigned * align)36 shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
37 {
38    assert(glsl_type_is_vector_or_scalar(type));
39 
40    uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
41    unsigned length = glsl_get_vector_elements(type);
42    *size = comp_size * length, *align = comp_size;
43 }
44 
45 VkShaderStageFlags
nvk_nak_stages(const struct nv_device_info * info)46 nvk_nak_stages(const struct nv_device_info *info)
47 {
48    const VkShaderStageFlags all =
49       VK_SHADER_STAGE_VERTEX_BIT |
50       VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
51       VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
52       VK_SHADER_STAGE_GEOMETRY_BIT |
53       VK_SHADER_STAGE_FRAGMENT_BIT |
54       VK_SHADER_STAGE_COMPUTE_BIT;
55 
56    const struct debug_control flags[] = {
57       { "vs", BITFIELD64_BIT(MESA_SHADER_VERTEX) },
58       { "tcs", BITFIELD64_BIT(MESA_SHADER_TESS_CTRL) },
59       { "tes", BITFIELD64_BIT(MESA_SHADER_TESS_EVAL) },
60       { "gs", BITFIELD64_BIT(MESA_SHADER_GEOMETRY) },
61       { "fs", BITFIELD64_BIT(MESA_SHADER_FRAGMENT) },
62       { "cs", BITFIELD64_BIT(MESA_SHADER_COMPUTE) },
63       { "all", all },
64       { NULL, 0 },
65    };
66 
67    const char *env_str = getenv("NVK_USE_NAK");
68    if (env_str == NULL)
69       return info->cls_eng3d >= MAXWELL_A ? all : 0;
70    else
71       return parse_debug_string(env_str, flags);
72 }
73 
74 static bool
use_nak(const struct nvk_physical_device * pdev,gl_shader_stage stage)75 use_nak(const struct nvk_physical_device *pdev, gl_shader_stage stage)
76 {
77    return nvk_nak_stages(&pdev->info) & mesa_to_vk_shader_stage(stage);
78 }
79 
80 uint64_t
nvk_physical_device_compiler_flags(const struct nvk_physical_device * pdev)81 nvk_physical_device_compiler_flags(const struct nvk_physical_device *pdev)
82 {
83    bool no_cbufs = pdev->debug_flags & NVK_DEBUG_NO_CBUF;
84    bool use_edb_buffer_views = nvk_use_edb_buffer_views(pdev);
85    uint64_t prog_debug = nvk_cg_get_prog_debug();
86    uint64_t prog_optimize = nvk_cg_get_prog_optimize();
87    uint64_t nak_stages = nvk_nak_stages(&pdev->info);
88    uint64_t nak_flags = nak_debug_flags(pdev->nak);
89 
90    assert(prog_debug <= UINT8_MAX);
91    assert(prog_optimize < 16);
92    assert(nak_stages <= UINT32_MAX);
93    assert(nak_flags <= UINT16_MAX);
94 
95    return prog_debug
96       | (prog_optimize << 8)
97       | ((uint64_t)no_cbufs << 12)
98       | ((uint64_t)use_edb_buffer_views << 13)
99       | (nak_stages << 16)
100       | (nak_flags << 48);
101 }
102 
103 static const nir_shader_compiler_options *
nvk_get_nir_options(struct vk_physical_device * vk_pdev,gl_shader_stage stage,UNUSED const struct vk_pipeline_robustness_state * rs)104 nvk_get_nir_options(struct vk_physical_device *vk_pdev,
105                     gl_shader_stage stage,
106                     UNUSED const struct vk_pipeline_robustness_state *rs)
107 {
108    const struct nvk_physical_device *pdev =
109       container_of(vk_pdev, struct nvk_physical_device, vk);
110 
111    if (use_nak(pdev, stage))
112       return nak_nir_options(pdev->nak);
113    else
114       return nvk_cg_nir_options(pdev, stage);
115 }
116 
117 nir_address_format
nvk_ubo_addr_format(const struct nvk_physical_device * pdev,const struct vk_pipeline_robustness_state * rs)118 nvk_ubo_addr_format(const struct nvk_physical_device *pdev,
119                     const struct vk_pipeline_robustness_state *rs)
120 {
121    if (nvk_use_bindless_cbuf(&pdev->info)) {
122       return nir_address_format_vec2_index_32bit_offset;
123    } else if (rs->null_uniform_buffer_descriptor) {
124       /* We need bounds checking for null descriptors */
125       return nir_address_format_64bit_bounded_global;
126    } else {
127       switch (rs->uniform_buffers) {
128       case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT:
129          return nir_address_format_64bit_global_32bit_offset;
130       case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT:
131       case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT:
132          return nir_address_format_64bit_bounded_global;
133       default:
134          unreachable("Invalid robust buffer access behavior");
135       }
136    }
137 }
138 
139 nir_address_format
nvk_ssbo_addr_format(const struct nvk_physical_device * pdev,const struct vk_pipeline_robustness_state * rs)140 nvk_ssbo_addr_format(const struct nvk_physical_device *pdev,
141                     const struct vk_pipeline_robustness_state *rs)
142 {
143    if (rs->null_storage_buffer_descriptor) {
144       /* We need bounds checking for null descriptors */
145       return nir_address_format_64bit_bounded_global;
146    } else {
147       switch (rs->storage_buffers) {
148       case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT:
149          return nir_address_format_64bit_global_32bit_offset;
150       case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT:
151       case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT:
152          return nir_address_format_64bit_bounded_global;
153       default:
154          unreachable("Invalid robust buffer access behavior");
155       }
156    }
157 }
158 
159 static struct spirv_to_nir_options
nvk_get_spirv_options(struct vk_physical_device * vk_pdev,UNUSED gl_shader_stage stage,const struct vk_pipeline_robustness_state * rs)160 nvk_get_spirv_options(struct vk_physical_device *vk_pdev,
161                       UNUSED gl_shader_stage stage,
162                       const struct vk_pipeline_robustness_state *rs)
163 {
164    const struct nvk_physical_device *pdev =
165       container_of(vk_pdev, struct nvk_physical_device, vk);
166 
167    return (struct spirv_to_nir_options) {
168       .ssbo_addr_format = nvk_ssbo_addr_format(pdev, rs),
169       .phys_ssbo_addr_format = nir_address_format_64bit_global,
170       .ubo_addr_format = nvk_ubo_addr_format(pdev, rs),
171       .shared_addr_format = nir_address_format_32bit_offset,
172       .min_ssbo_alignment = NVK_MIN_SSBO_ALIGNMENT,
173       .min_ubo_alignment = nvk_min_cbuf_alignment(&pdev->info),
174    };
175 }
176 
177 static void
nvk_preprocess_nir(struct vk_physical_device * vk_pdev,nir_shader * nir)178 nvk_preprocess_nir(struct vk_physical_device *vk_pdev, nir_shader *nir)
179 {
180    const struct nvk_physical_device *pdev =
181       container_of(vk_pdev, struct nvk_physical_device, vk);
182 
183    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
184               nir_shader_get_entrypoint(nir), true, false);
185 
186    if (use_nak(pdev, nir->info.stage))
187       nak_preprocess_nir(nir, pdev->nak);
188    else
189       nvk_cg_preprocess_nir(nir);
190 }
191 
192 static void
nvk_populate_fs_key(struct nak_fs_key * key,const struct vk_graphics_pipeline_state * state)193 nvk_populate_fs_key(struct nak_fs_key *key,
194                     const struct vk_graphics_pipeline_state *state)
195 {
196    memset(key, 0, sizeof(*key));
197 
198    key->sample_info_cb = 0;
199    key->sample_locations_offset = nvk_root_descriptor_offset(draw.sample_locations);
200    key->sample_masks_offset = nvk_root_descriptor_offset(draw.sample_masks);
201 
202    /* Turn underestimate on when no state is availaible or if explicitly set */
203    if (state == NULL || state->rs == NULL ||
204        state->rs->conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT)
205       key->uses_underestimate = true;
206 
207    if (state == NULL)
208       return;
209 
210    if (state->pipeline_flags &
211        VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT)
212       key->zs_self_dep = true;
213 
214    /* We force per-sample interpolation whenever sampleShadingEnable is set
215     * regardless of minSampleShading or rasterizationSamples.
216     *
217     * When sampleShadingEnable is set, few guarantees are made about the
218     * location of interpolation of the inputs.  The only real guarantees are
219     * that the inputs are interpolated within the pixel and that you get at
220     * least `rasterizationSamples * minSampleShading` unique positions.
221     * Importantly, it does not require that when `rasterizationSamples *
222     * minSampleShading <= 1.0` that those positions are at the fragment
223     * center.  Therefore, it's valid to just always do per-sample (which maps
224     * to CENTROID on NVIDIA hardware) all the time and let the hardware sort
225     * it out based on what we set in HYBRID_ANTI_ALIAS_CONTROL::passes.
226     *
227     * Also, we set HYBRID_ANTI_ALIAS_CONTROL::centroid at draw time based on
228     * `rasterizationSamples * minSampleShading` so it should be per-pixel
229     * whenever we're running only a single pass.  However, this would still be
230     * correct even if it got interpolated at some other sample.
231     *
232     * The one caveat here is that we have to be careful about gl_SampleMaskIn.
233     * When `nak_fs_key::force_sample_shading = true` we also turn any reads of
234     * gl_SampleMaskIn into `1 << gl_SampleID` because the hardware sample mask
235     * is actually per-fragment, not per-pass.  We handle this by smashing
236     * minSampleShading to 1.0 whenever gl_SampleMaskIn is read.
237     */
238    const struct vk_multisample_state *ms = state->ms;
239    if (ms != NULL && ms->sample_shading_enable)
240       key->force_sample_shading = true;
241 }
242 
243 static void
nvk_hash_graphics_state(struct vk_physical_device * device,const struct vk_graphics_pipeline_state * state,VkShaderStageFlags stages,blake3_hash blake3_out)244 nvk_hash_graphics_state(struct vk_physical_device *device,
245                         const struct vk_graphics_pipeline_state *state,
246                         VkShaderStageFlags stages,
247                         blake3_hash blake3_out)
248 {
249    struct mesa_blake3 blake3_ctx;
250    _mesa_blake3_init(&blake3_ctx);
251    if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
252       struct nak_fs_key key;
253       nvk_populate_fs_key(&key, state);
254       _mesa_blake3_update(&blake3_ctx, &key, sizeof(key));
255 
256       const bool is_multiview = state->rp->view_mask != 0;
257       _mesa_blake3_update(&blake3_ctx, &is_multiview, sizeof(is_multiview));
258 
259       /* This doesn't impact the shader compile but it does go in the
260        * nvk_shader and gets [de]serialized along with the binary so we
261        * need to hash it.
262        */
263       if (state->ms && state->ms->sample_shading_enable) {
264          _mesa_blake3_update(&blake3_ctx, &state->ms->min_sample_shading,
265                              sizeof(state->ms->min_sample_shading));
266       }
267    }
268    _mesa_blake3_final(&blake3_ctx, blake3_out);
269 }
270 
271 static bool
lower_load_intrinsic(nir_builder * b,nir_intrinsic_instr * load,UNUSED void * _data)272 lower_load_intrinsic(nir_builder *b, nir_intrinsic_instr *load,
273                      UNUSED void *_data)
274 {
275    switch (load->intrinsic) {
276    case nir_intrinsic_load_ubo: {
277       b->cursor = nir_before_instr(&load->instr);
278 
279       nir_def *index = load->src[0].ssa;
280       nir_def *offset = load->src[1].ssa;
281       const enum gl_access_qualifier access = nir_intrinsic_access(load);
282       const uint32_t align_mul = nir_intrinsic_align_mul(load);
283       const uint32_t align_offset = nir_intrinsic_align_offset(load);
284 
285       nir_def *val;
286       if (load->src[0].ssa->num_components == 1) {
287          val = nir_ldc_nv(b, load->num_components, load->def.bit_size,
288                            index, offset, .access = access,
289                            .align_mul = align_mul,
290                            .align_offset = align_offset);
291       } else if (load->src[0].ssa->num_components == 2) {
292          nir_def *handle = nir_pack_64_2x32(b, load->src[0].ssa);
293          val = nir_ldcx_nv(b, load->num_components, load->def.bit_size,
294                            handle, offset, .access = access,
295                            .align_mul = align_mul,
296                            .align_offset = align_offset);
297       } else {
298          unreachable("Invalid UBO index");
299       }
300       nir_def_rewrite_uses(&load->def, val);
301       return true;
302    }
303 
304    case nir_intrinsic_load_global_constant_offset:
305    case nir_intrinsic_load_global_constant_bounded: {
306       b->cursor = nir_before_instr(&load->instr);
307 
308       nir_def *base_addr = load->src[0].ssa;
309       nir_def *offset = load->src[1].ssa;
310 
311       nir_def *zero = NULL;
312       if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) {
313          nir_def *bound = load->src[2].ssa;
314 
315          unsigned bit_size = load->def.bit_size;
316          assert(bit_size >= 8 && bit_size % 8 == 0);
317          unsigned byte_size = bit_size / 8;
318 
319          zero = nir_imm_zero(b, load->num_components, bit_size);
320 
321          unsigned load_size = byte_size * load->num_components;
322 
323          nir_def *sat_offset =
324             nir_umin(b, offset, nir_imm_int(b, UINT32_MAX - (load_size - 1)));
325          nir_def *in_bounds =
326             nir_ilt(b, nir_iadd_imm(b, sat_offset, load_size - 1), bound);
327 
328          nir_push_if(b, in_bounds);
329       }
330 
331       nir_def *val =
332          nir_build_load_global_constant(b, load->def.num_components,
333                                         load->def.bit_size,
334                                         nir_iadd(b, base_addr, nir_u2u64(b, offset)),
335                                         .align_mul = nir_intrinsic_align_mul(load),
336                                         .align_offset = nir_intrinsic_align_offset(load));
337 
338       if (load->intrinsic == nir_intrinsic_load_global_constant_bounded) {
339          nir_pop_if(b, NULL);
340          val = nir_if_phi(b, val, zero);
341       }
342 
343       nir_def_rewrite_uses(&load->def, val);
344       return true;
345    }
346 
347    default:
348       return false;
349    }
350 }
351 
352 struct lower_ycbcr_state {
353    uint32_t set_layout_count;
354    struct vk_descriptor_set_layout * const *set_layouts;
355 };
356 
357 static const struct vk_ycbcr_conversion_state *
lookup_ycbcr_conversion(const void * _state,uint32_t set,uint32_t binding,uint32_t array_index)358 lookup_ycbcr_conversion(const void *_state, uint32_t set,
359                         uint32_t binding, uint32_t array_index)
360 {
361    const struct lower_ycbcr_state *state = _state;
362    assert(set < state->set_layout_count);
363    assert(state->set_layouts[set] != NULL);
364    const struct nvk_descriptor_set_layout *set_layout =
365       vk_to_nvk_descriptor_set_layout(state->set_layouts[set]);
366    assert(binding < set_layout->binding_count);
367 
368    const struct nvk_descriptor_set_binding_layout *bind_layout =
369       &set_layout->binding[binding];
370 
371    if (bind_layout->immutable_samplers == NULL)
372       return NULL;
373 
374    array_index = MIN2(array_index, bind_layout->array_size - 1);
375 
376    const struct nvk_sampler *sampler =
377       bind_layout->immutable_samplers[array_index];
378 
379    return sampler && sampler->vk.ycbcr_conversion ?
380           &sampler->vk.ycbcr_conversion->state : NULL;
381 }
382 
383 static inline bool
nir_has_image_var(nir_shader * nir)384 nir_has_image_var(nir_shader *nir)
385 {
386    nir_foreach_image_variable(_, nir)
387       return true;
388 
389    return false;
390 }
391 
392 void
nvk_lower_nir(struct nvk_device * dev,nir_shader * nir,const struct vk_pipeline_robustness_state * rs,bool is_multiview,uint32_t set_layout_count,struct vk_descriptor_set_layout * const * set_layouts,struct nvk_cbuf_map * cbuf_map_out)393 nvk_lower_nir(struct nvk_device *dev, nir_shader *nir,
394               const struct vk_pipeline_robustness_state *rs,
395               bool is_multiview,
396               uint32_t set_layout_count,
397               struct vk_descriptor_set_layout * const *set_layouts,
398               struct nvk_cbuf_map *cbuf_map_out)
399 {
400    struct nvk_physical_device *pdev = nvk_device_physical(dev);
401 
402    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
403       NIR_PASS(_, nir, nir_lower_input_attachments,
404                &(nir_input_attachment_options) {
405                   .use_fragcoord_sysval = use_nak(pdev, nir->info.stage),
406                   .use_layer_id_sysval = use_nak(pdev, nir->info.stage) ||
407                                          is_multiview,
408                   .use_view_id_for_layer = is_multiview,
409                });
410    }
411 
412    if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
413       NIR_PASS(_, nir, nir_lower_patch_vertices,
414                nir->info.tess.tcs_vertices_out, NULL);
415    }
416 
417    const struct lower_ycbcr_state ycbcr_state = {
418       .set_layout_count = set_layout_count,
419       .set_layouts = set_layouts,
420    };
421    NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex,
422             lookup_ycbcr_conversion, &ycbcr_state);
423 
424    nir_lower_compute_system_values_options csv_options = {
425       .has_base_workgroup_id = true,
426    };
427    NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
428 
429    /* Lower push constants before lower_descriptors */
430    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
431             nir_address_format_32bit_offset);
432 
433    /* Lower non-uniform access before lower_descriptors */
434    enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
435       nir_lower_non_uniform_ubo_access;
436 
437    if (pdev->info.cls_eng3d < TURING_A) {
438       lower_non_uniform_access_types |= nir_lower_non_uniform_texture_access |
439                                         nir_lower_non_uniform_image_access;
440    }
441 
442    /* In practice, most shaders do not have non-uniform-qualified accesses
443     * thus a cheaper and likely to fail check is run first.
444     */
445    if (nir_has_non_uniform_access(nir, lower_non_uniform_access_types)) {
446       struct nir_lower_non_uniform_access_options opts = {
447          .types = lower_non_uniform_access_types,
448          .callback = NULL,
449       };
450       NIR_PASS(_, nir, nir_opt_non_uniform_access);
451       NIR_PASS(_, nir, nir_lower_non_uniform_access, &opts);
452    }
453 
454    /* TODO: Kepler image lowering requires image params to be loaded from the
455     * descriptor set which we don't currently support.
456     */
457    assert(pdev->info.cls_eng3d >= MAXWELL_A || !nir_has_image_var(nir));
458 
459    struct nvk_cbuf_map *cbuf_map = NULL;
460    if (use_nak(pdev, nir->info.stage) &&
461        !(pdev->debug_flags & NVK_DEBUG_NO_CBUF)) {
462       cbuf_map = cbuf_map_out;
463 
464       /* Large constant support assumes cbufs */
465       NIR_PASS(_, nir, nir_opt_large_constants, NULL, 32);
466    } else {
467       /* Codegen sometimes puts stuff in cbuf 1 and adds 1 to our cbuf indices
468        * so we can't really rely on it for lowering to cbufs and instead place
469        * the root descriptors in both cbuf 0 and cbuf 1.
470        */
471       *cbuf_map_out = (struct nvk_cbuf_map) {
472          .cbuf_count = 2,
473          .cbufs = {
474             { .type = NVK_CBUF_TYPE_ROOT_DESC },
475             { .type = NVK_CBUF_TYPE_ROOT_DESC },
476          }
477       };
478    }
479 
480    NIR_PASS(_, nir, nvk_nir_lower_descriptors, pdev, rs,
481             set_layout_count, set_layouts, cbuf_map);
482    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
483             nir_address_format_64bit_global);
484    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
485             nvk_ssbo_addr_format(pdev, rs));
486    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
487             nvk_ubo_addr_format(pdev, rs));
488    NIR_PASS(_, nir, nir_shader_intrinsics_pass,
489             lower_load_intrinsic, nir_metadata_none, NULL);
490 
491    if (!nir->info.shared_memory_explicit_layout) {
492       NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
493                nir_var_mem_shared, shared_var_info);
494    }
495    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared,
496             nir_address_format_32bit_offset);
497 
498    if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {
499       /* QMD::SHARED_MEMORY_SIZE requires an alignment of 256B so it's safe to
500        * align everything up to 16B so we can write whole vec4s.
501        */
502       nir->info.shared_size = align(nir->info.shared_size, 16);
503       NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
504                nir->info.shared_size, 16);
505 
506       /* We need to call lower_compute_system_values again because
507        * nir_zero_initialize_shared_memory generates load_invocation_id which
508        * has to be lowered to load_invocation_index.
509        */
510       NIR_PASS(_, nir, nir_lower_compute_system_values, NULL);
511    }
512 }
513 
514 #ifndef NDEBUG
515 static void
nvk_shader_dump(struct nvk_shader * shader)516 nvk_shader_dump(struct nvk_shader *shader)
517 {
518    unsigned pos;
519 
520    if (shader->info.stage != MESA_SHADER_COMPUTE) {
521       _debug_printf("dumping HDR for %s shader\n",
522                     _mesa_shader_stage_to_string(shader->info.stage));
523       for (pos = 0; pos < ARRAY_SIZE(shader->info.hdr); ++pos)
524          _debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n",
525                       pos * sizeof(shader->info.hdr[0]), shader->info.hdr[pos]);
526    }
527    _debug_printf("shader binary code (0x%x bytes):", shader->code_size);
528    for (pos = 0; pos < shader->code_size / 4; ++pos) {
529       if ((pos % 8) == 0)
530          _debug_printf("\n");
531       _debug_printf("%08x ", ((const uint32_t *)shader->code_ptr)[pos]);
532    }
533    _debug_printf("\n");
534 }
535 #endif
536 
537 static VkResult
nvk_compile_nir_with_nak(struct nvk_physical_device * pdev,nir_shader * nir,VkShaderCreateFlagsEXT shader_flags,const struct vk_pipeline_robustness_state * rs,const struct nak_fs_key * fs_key,struct nvk_shader * shader)538 nvk_compile_nir_with_nak(struct nvk_physical_device *pdev,
539                          nir_shader *nir,
540                          VkShaderCreateFlagsEXT shader_flags,
541                          const struct vk_pipeline_robustness_state *rs,
542                          const struct nak_fs_key *fs_key,
543                          struct nvk_shader *shader)
544 {
545    const bool dump_asm =
546       shader_flags & VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA;
547 
548    nir_variable_mode robust2_modes = 0;
549    if (rs->uniform_buffers == VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT)
550       robust2_modes |= nir_var_mem_ubo;
551    if (rs->storage_buffers == VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT)
552       robust2_modes |= nir_var_mem_ssbo;
553 
554    shader->nak = nak_compile_shader(nir, dump_asm, pdev->nak, robust2_modes, fs_key);
555    shader->info = shader->nak->info;
556    shader->code_ptr = shader->nak->code;
557    shader->code_size = shader->nak->code_size;
558 
559    return VK_SUCCESS;
560 }
561 
562 static VkResult
nvk_compile_nir(struct nvk_device * dev,nir_shader * nir,VkShaderCreateFlagsEXT shader_flags,const struct vk_pipeline_robustness_state * rs,const struct nak_fs_key * fs_key,struct nvk_shader * shader)563 nvk_compile_nir(struct nvk_device *dev, nir_shader *nir,
564                 VkShaderCreateFlagsEXT shader_flags,
565                 const struct vk_pipeline_robustness_state *rs,
566                 const struct nak_fs_key *fs_key,
567                 struct nvk_shader *shader)
568 {
569    struct nvk_physical_device *pdev = nvk_device_physical(dev);
570    VkResult result;
571 
572    if (use_nak(pdev, nir->info.stage)) {
573       result = nvk_compile_nir_with_nak(pdev, nir, shader_flags, rs,
574                                        fs_key, shader);
575    } else {
576       result = nvk_cg_compile_nir(pdev, nir, fs_key, shader);
577    }
578    if (result != VK_SUCCESS)
579       return result;
580 
581    if (nir->constant_data_size > 0) {
582       uint32_t data_align = nvk_min_cbuf_alignment(&pdev->info);
583       uint32_t data_size = align(nir->constant_data_size, data_align);
584 
585       void *data = malloc(data_size);
586       if (data == NULL)
587          return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
588 
589       memcpy(data, nir->constant_data, nir->constant_data_size);
590 
591       assert(nir->constant_data_size <= data_size);
592       memset(data + nir->constant_data_size, 0,
593              data_size - nir->constant_data_size);
594 
595       shader->data_ptr = data;
596       shader->data_size = data_size;
597    }
598 
599    return VK_SUCCESS;
600 }
601 
602 static VkResult
nvk_shader_upload(struct nvk_device * dev,struct nvk_shader * shader)603 nvk_shader_upload(struct nvk_device *dev, struct nvk_shader *shader)
604 {
605    struct nvk_physical_device *pdev = nvk_device_physical(dev);
606 
607    uint32_t hdr_size = 0;
608    if (shader->info.stage != MESA_SHADER_COMPUTE) {
609       if (pdev->info.cls_eng3d >= TURING_A)
610          hdr_size = TU102_SHADER_HEADER_SIZE;
611       else
612          hdr_size = GF100_SHADER_HEADER_SIZE;
613    }
614 
615    /* Fermi   needs 0x40 alignment
616     * Kepler+ needs the first instruction to be 0x80 aligned, so we waste 0x30 bytes
617     */
618    int alignment = pdev->info.cls_eng3d >= KEPLER_A ? 0x80 : 0x40;
619 
620    uint32_t total_size = 0;
621    if (pdev->info.cls_eng3d >= KEPLER_A &&
622        pdev->info.cls_eng3d < TURING_A &&
623        hdr_size > 0) {
624       /* The instructions are what has to be aligned so we need to start at a
625        * small offset (0x30 B) into the upload area.
626        */
627       total_size = alignment - hdr_size;
628    }
629 
630    const uint32_t hdr_offset = total_size;
631    total_size += hdr_size;
632 
633    const uint32_t code_offset = total_size;
634    assert(code_offset % alignment == 0);
635    total_size += shader->code_size;
636 
637    uint32_t data_offset = 0;
638    if (shader->data_size > 0) {
639       uint32_t cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
640       alignment = MAX2(alignment, cbuf_alignment);
641       total_size = align(total_size, cbuf_alignment);
642       data_offset = total_size;
643       total_size += shader->data_size;
644    }
645 
646    char *data = malloc(total_size);
647    if (data == NULL)
648       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
649 
650    assert(hdr_size <= sizeof(shader->info.hdr));
651    memcpy(data + hdr_offset, shader->info.hdr, hdr_size);
652    memcpy(data + code_offset, shader->code_ptr, shader->code_size);
653    if (shader->data_size > 0)
654       memcpy(data + data_offset, shader->data_ptr, shader->data_size);
655 
656 #ifndef NDEBUG
657    if (debug_get_bool_option("NV50_PROG_DEBUG", false))
658       nvk_shader_dump(shader);
659 #endif
660 
661    VkResult result = nvk_heap_upload(dev, &dev->shader_heap, data,
662                                      total_size, alignment,
663                                      &shader->upload_addr);
664    if (result == VK_SUCCESS) {
665       shader->upload_size = total_size;
666 
667       shader->hdr_addr = shader->upload_addr + hdr_offset;
668       if (pdev->info.cls_eng3d < VOLTA_A) {
669          const uint64_t heap_base_addr =
670             nvk_heap_contiguous_base_address(&dev->shader_heap);
671          assert(shader->upload_addr - heap_base_addr < UINT32_MAX);
672          shader->hdr_addr -= heap_base_addr;
673       }
674       shader->data_addr = shader->upload_addr + data_offset;
675    }
676    free(data);
677 
678    return result;
679 }
680 
681 static const struct vk_shader_ops nvk_shader_ops;
682 
683 static void
nvk_shader_destroy(struct vk_device * vk_dev,struct vk_shader * vk_shader,const VkAllocationCallbacks * pAllocator)684 nvk_shader_destroy(struct vk_device *vk_dev,
685                    struct vk_shader *vk_shader,
686                    const VkAllocationCallbacks* pAllocator)
687 {
688    struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk);
689    struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
690 
691    if (shader->upload_size > 0) {
692       nvk_heap_free(dev, &dev->shader_heap,
693                     shader->upload_addr,
694                     shader->upload_size);
695    }
696 
697    if (shader->nak) {
698       nak_shader_bin_destroy(shader->nak);
699    } else {
700       /* This came from codegen or deserialize, just free it */
701       free((void *)shader->code_ptr);
702    }
703 
704    free((void *)shader->data_ptr);
705 
706    vk_shader_free(&dev->vk, pAllocator, &shader->vk);
707 }
708 
709 static VkResult
nvk_compile_shader(struct nvk_device * dev,struct vk_shader_compile_info * info,const struct vk_graphics_pipeline_state * state,const VkAllocationCallbacks * pAllocator,struct vk_shader ** shader_out)710 nvk_compile_shader(struct nvk_device *dev,
711                    struct vk_shader_compile_info *info,
712                    const struct vk_graphics_pipeline_state *state,
713                    const VkAllocationCallbacks* pAllocator,
714                    struct vk_shader **shader_out)
715 {
716    struct nvk_shader *shader;
717    VkResult result;
718 
719    /* We consume the NIR, regardless of success or failure */
720    nir_shader *nir = info->nir;
721 
722    shader = vk_shader_zalloc(&dev->vk, &nvk_shader_ops, info->stage,
723                              pAllocator, sizeof(*shader));
724    if (shader == NULL) {
725       ralloc_free(nir);
726       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
727    }
728 
729    /* TODO: Multiview with ESO */
730    const bool is_multiview = state && state->rp->view_mask != 0;
731 
732    nvk_lower_nir(dev, nir, info->robustness, is_multiview,
733                  info->set_layout_count, info->set_layouts,
734                  &shader->cbuf_map);
735 
736    struct nak_fs_key fs_key_tmp, *fs_key = NULL;
737    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
738       nvk_populate_fs_key(&fs_key_tmp, state);
739       fs_key = &fs_key_tmp;
740    }
741 
742    result = nvk_compile_nir(dev, nir, info->flags, info->robustness,
743                             fs_key, shader);
744    ralloc_free(nir);
745    if (result != VK_SUCCESS) {
746       nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
747       return result;
748    }
749 
750    result = nvk_shader_upload(dev, shader);
751    if (result != VK_SUCCESS) {
752       nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
753       return result;
754    }
755 
756    if (info->stage == MESA_SHADER_FRAGMENT) {
757       if (shader->info.fs.uses_sample_shading) {
758          shader->min_sample_shading = 1;
759       } else if (state != NULL && state->ms != NULL &&
760                  state->ms->sample_shading_enable) {
761          shader->min_sample_shading =
762             CLAMP(state->ms->min_sample_shading, 0, 1);
763       } else {
764          shader->min_sample_shading = 0;
765       }
766    }
767 
768    *shader_out = &shader->vk;
769 
770    return VK_SUCCESS;
771 }
772 
773 VkResult
nvk_compile_nir_shader(struct nvk_device * dev,nir_shader * nir,const VkAllocationCallbacks * alloc,struct nvk_shader ** shader_out)774 nvk_compile_nir_shader(struct nvk_device *dev, nir_shader *nir,
775                        const VkAllocationCallbacks *alloc,
776                        struct nvk_shader **shader_out)
777 {
778    struct nvk_physical_device *pdev = nvk_device_physical(dev);
779 
780    const struct vk_pipeline_robustness_state rs_none = {
781       .uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
782       .storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
783       .images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
784    };
785 
786    assert(nir->info.stage == MESA_SHADER_COMPUTE);
787    if (nir->options == NULL)
788       nir->options = nvk_get_nir_options(&pdev->vk, nir->info.stage, &rs_none);
789 
790    struct vk_shader_compile_info info = {
791       .stage = nir->info.stage,
792       .nir = nir,
793       .robustness = &rs_none,
794    };
795 
796    struct vk_shader *shader;
797    VkResult result = nvk_compile_shader(dev, &info, NULL, alloc, &shader);
798    if (result != VK_SUCCESS)
799       return result;
800 
801    *shader_out = container_of(shader, struct nvk_shader, vk);
802 
803    return VK_SUCCESS;
804 }
805 
806 static VkResult
nvk_compile_shaders(struct vk_device * vk_dev,uint32_t shader_count,struct vk_shader_compile_info * infos,const struct vk_graphics_pipeline_state * state,const VkAllocationCallbacks * pAllocator,struct vk_shader ** shaders_out)807 nvk_compile_shaders(struct vk_device *vk_dev,
808                     uint32_t shader_count,
809                     struct vk_shader_compile_info *infos,
810                     const struct vk_graphics_pipeline_state *state,
811                     const VkAllocationCallbacks* pAllocator,
812                     struct vk_shader **shaders_out)
813 {
814    struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk);
815 
816    for (uint32_t i = 0; i < shader_count; i++) {
817       VkResult result = nvk_compile_shader(dev, &infos[i], state,
818                                            pAllocator, &shaders_out[i]);
819       if (result != VK_SUCCESS) {
820          /* Clean up all the shaders before this point */
821          for (uint32_t j = 0; j < i; j++)
822             nvk_shader_destroy(&dev->vk, shaders_out[j], pAllocator);
823 
824          /* Clean up all the NIR after this point */
825          for (uint32_t j = i + 1; j < shader_count; j++)
826             ralloc_free(infos[j].nir);
827 
828          /* Memset the output array */
829          memset(shaders_out, 0, shader_count * sizeof(*shaders_out));
830 
831          return result;
832       }
833    }
834 
835    return VK_SUCCESS;
836 }
837 
838 static VkResult
nvk_deserialize_shader(struct vk_device * vk_dev,struct blob_reader * blob,uint32_t binary_version,const VkAllocationCallbacks * pAllocator,struct vk_shader ** shader_out)839 nvk_deserialize_shader(struct vk_device *vk_dev,
840                        struct blob_reader *blob,
841                        uint32_t binary_version,
842                        const VkAllocationCallbacks* pAllocator,
843                        struct vk_shader **shader_out)
844 {
845    struct nvk_device *dev = container_of(vk_dev, struct nvk_device, vk);
846    struct nvk_shader *shader;
847    VkResult result;
848 
849    struct nak_shader_info info;
850    blob_copy_bytes(blob, &info, sizeof(info));
851 
852    struct nvk_cbuf_map cbuf_map;
853    blob_copy_bytes(blob, &cbuf_map, sizeof(cbuf_map));
854 
855    float min_sample_shading;
856    blob_copy_bytes(blob, &min_sample_shading, sizeof(min_sample_shading));
857 
858    const uint32_t code_size = blob_read_uint32(blob);
859    const uint32_t data_size = blob_read_uint32(blob);
860    if (blob->overrun)
861       return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
862 
863    shader = vk_shader_zalloc(&dev->vk, &nvk_shader_ops, info.stage,
864                              pAllocator, sizeof(*shader));
865    if (shader == NULL)
866       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
867 
868    shader->info = info;
869    shader->cbuf_map = cbuf_map;
870    shader->min_sample_shading = min_sample_shading;
871    shader->code_size = code_size;
872    shader->data_size = data_size;
873 
874    shader->code_ptr = malloc(code_size);
875    if (shader->code_ptr == NULL) {
876       nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
877       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
878    }
879 
880    shader->data_ptr = malloc(data_size);
881    if (shader->data_ptr == NULL) {
882       nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
883       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
884    }
885 
886    blob_copy_bytes(blob, (void *)shader->code_ptr, shader->code_size);
887    blob_copy_bytes(blob, (void *)shader->data_ptr, shader->data_size);
888    if (blob->overrun) {
889       nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
890       return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
891    }
892 
893    result = nvk_shader_upload(dev, shader);
894    if (result != VK_SUCCESS) {
895       nvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
896       return result;
897    }
898 
899    *shader_out = &shader->vk;
900 
901    return VK_SUCCESS;
902 }
903 
904 static bool
nvk_shader_serialize(struct vk_device * vk_dev,const struct vk_shader * vk_shader,struct blob * blob)905 nvk_shader_serialize(struct vk_device *vk_dev,
906                      const struct vk_shader *vk_shader,
907                      struct blob *blob)
908 {
909    struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
910 
911    /* We can't currently cache assmbly */
912    if (shader->nak != NULL && shader->nak->asm_str != NULL)
913       return false;
914 
915    blob_write_bytes(blob, &shader->info, sizeof(shader->info));
916    blob_write_bytes(blob, &shader->cbuf_map, sizeof(shader->cbuf_map));
917    blob_write_bytes(blob, &shader->min_sample_shading,
918                     sizeof(shader->min_sample_shading));
919 
920    blob_write_uint32(blob, shader->code_size);
921    blob_write_uint32(blob, shader->data_size);
922    blob_write_bytes(blob, shader->code_ptr, shader->code_size);
923    blob_write_bytes(blob, shader->data_ptr, shader->data_size);
924 
925    return !blob->out_of_memory;
926 }
927 
928 #define WRITE_STR(field, ...) ({                               \
929    memset(field, 0, sizeof(field));                            \
930    UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
931    assert(i > 0 && i < sizeof(field));                         \
932 })
933 
934 static VkResult
nvk_shader_get_executable_properties(UNUSED struct vk_device * device,const struct vk_shader * vk_shader,uint32_t * executable_count,VkPipelineExecutablePropertiesKHR * properties)935 nvk_shader_get_executable_properties(
936    UNUSED struct vk_device *device,
937    const struct vk_shader *vk_shader,
938    uint32_t *executable_count,
939    VkPipelineExecutablePropertiesKHR *properties)
940 {
941    struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
942    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
943                           properties, executable_count);
944 
945    vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
946       props->stages = mesa_to_vk_shader_stage(shader->info.stage);
947       props->subgroupSize = 32;
948       WRITE_STR(props->name, "%s",
949                 _mesa_shader_stage_to_string(shader->info.stage));
950       WRITE_STR(props->description, "%s shader",
951                 _mesa_shader_stage_to_string(shader->info.stage));
952    }
953 
954    return vk_outarray_status(&out);
955 }
956 
957 static VkResult
nvk_shader_get_executable_statistics(UNUSED struct vk_device * device,const struct vk_shader * vk_shader,uint32_t executable_index,uint32_t * statistic_count,VkPipelineExecutableStatisticKHR * statistics)958 nvk_shader_get_executable_statistics(
959    UNUSED struct vk_device *device,
960    const struct vk_shader *vk_shader,
961    uint32_t executable_index,
962    uint32_t *statistic_count,
963    VkPipelineExecutableStatisticKHR *statistics)
964 {
965    struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
966    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
967                           statistics, statistic_count);
968 
969    assert(executable_index == 0);
970 
971    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
972       WRITE_STR(stat->name, "Instruction count");
973       WRITE_STR(stat->description, "Number of instructions used by this shader");
974       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
975       stat->value.u64 = shader->info.num_instrs;
976    }
977 
978    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
979       WRITE_STR(stat->name, "Code Size");
980       WRITE_STR(stat->description,
981                 "Size of the compiled shader binary, in bytes");
982       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
983       stat->value.u64 = shader->code_size;
984    }
985 
986    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
987       WRITE_STR(stat->name, "Number of GPRs");
988       WRITE_STR(stat->description, "Number of GPRs used by this pipeline");
989       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
990       stat->value.u64 = shader->info.num_gprs;
991    }
992 
993    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
994       WRITE_STR(stat->name, "SLM Size");
995       WRITE_STR(stat->description,
996                 "Size of shader local (scratch) memory, in bytes");
997       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
998       stat->value.u64 = shader->info.slm_size;
999    }
1000 
1001    return vk_outarray_status(&out);
1002 }
1003 
1004 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)1005 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
1006               const char *data)
1007 {
1008    ir->isText = VK_TRUE;
1009 
1010    size_t data_len = strlen(data) + 1;
1011 
1012    if (ir->pData == NULL) {
1013       ir->dataSize = data_len;
1014       return true;
1015    }
1016 
1017    strncpy(ir->pData, data, ir->dataSize);
1018    if (ir->dataSize < data_len)
1019       return false;
1020 
1021    ir->dataSize = data_len;
1022    return true;
1023 }
1024 
1025 static VkResult
nvk_shader_get_executable_internal_representations(UNUSED struct vk_device * device,const struct vk_shader * vk_shader,uint32_t executable_index,uint32_t * internal_representation_count,VkPipelineExecutableInternalRepresentationKHR * internal_representations)1026 nvk_shader_get_executable_internal_representations(
1027    UNUSED struct vk_device *device,
1028    const struct vk_shader *vk_shader,
1029    uint32_t executable_index,
1030    uint32_t *internal_representation_count,
1031    VkPipelineExecutableInternalRepresentationKHR *internal_representations)
1032 {
1033    struct nvk_shader *shader = container_of(vk_shader, struct nvk_shader, vk);
1034    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
1035                           internal_representations,
1036                           internal_representation_count);
1037    bool incomplete_text = false;
1038 
1039    assert(executable_index == 0);
1040 
1041    if (shader->nak != NULL && shader->nak->asm_str != NULL) {
1042       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
1043          WRITE_STR(ir->name, "NAK assembly");
1044          WRITE_STR(ir->description, "NAK assembly");
1045          if (!write_ir_text(ir, shader->nak->asm_str))
1046             incomplete_text = true;
1047       }
1048    }
1049 
1050    return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
1051 }
1052 
1053 static const struct vk_shader_ops nvk_shader_ops = {
1054    .destroy = nvk_shader_destroy,
1055    .serialize = nvk_shader_serialize,
1056    .get_executable_properties = nvk_shader_get_executable_properties,
1057    .get_executable_statistics = nvk_shader_get_executable_statistics,
1058    .get_executable_internal_representations =
1059       nvk_shader_get_executable_internal_representations,
1060 };
1061 
1062 const struct vk_device_shader_ops nvk_device_shader_ops = {
1063    .get_nir_options = nvk_get_nir_options,
1064    .get_spirv_options = nvk_get_spirv_options,
1065    .preprocess_nir = nvk_preprocess_nir,
1066    .hash_graphics_state = nvk_hash_graphics_state,
1067    .compile = nvk_compile_shaders,
1068    .deserialize = nvk_deserialize_shader,
1069    .cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
1070    .cmd_bind_shaders = nvk_cmd_bind_shaders,
1071 };
1072