xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/zink/zink_program.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2018 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "zink_program.h"
25 
26 #include "zink_compiler.h"
27 #include "zink_context.h"
28 #include "zink_descriptors.h"
29 #include "zink_helpers.h"
30 #include "zink_pipeline.h"
31 #include "zink_render_pass.h"
32 #include "zink_resource.h"
33 #include "zink_screen.h"
34 #include "zink_state.h"
35 #include "zink_inlines.h"
36 
37 #include "util/memstream.h"
38 #include "util/u_debug.h"
39 #include "util/u_memory.h"
40 #include "util/u_prim.h"
41 #include "nir_serialize.h"
42 #include "nir/nir_draw_helpers.h"
43 
44 /* for pipeline cache */
45 #define XXH_INLINE_ALL
46 #include "util/xxhash.h"
47 
48 static void
49 gfx_program_precompile_job(void *data, void *gdata, int thread_index);
50 struct zink_gfx_program *
51 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch);
52 
53 void
debug_describe_zink_gfx_program(char * buf,const struct zink_gfx_program * ptr)54 debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr)
55 {
56    sprintf(buf, "zink_gfx_program");
57 }
58 
59 void
debug_describe_zink_compute_program(char * buf,const struct zink_compute_program * ptr)60 debug_describe_zink_compute_program(char *buf, const struct zink_compute_program *ptr)
61 {
62    sprintf(buf, "zink_compute_program");
63 }
64 
65 ALWAYS_INLINE static bool
shader_key_matches_tcs_nongenerated(const struct zink_shader_module * zm,const struct zink_shader_key * key,unsigned num_uniforms)66 shader_key_matches_tcs_nongenerated(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms)
67 {
68    if (zm->num_uniforms != num_uniforms || zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
69        zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
70       return false;
71    const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
72    return (!nonseamless_size || !memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)) &&
73           (!num_uniforms || !memcmp(zm->key + zm->key_size + nonseamless_size,
74                                     key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)));
75 }
76 
77 ALWAYS_INLINE static bool
shader_key_matches(const struct zink_shader_module * zm,const struct zink_shader_key * key,unsigned num_uniforms,bool has_inline,bool has_nonseamless)78 shader_key_matches(const struct zink_shader_module *zm,
79                    const struct zink_shader_key *key, unsigned num_uniforms,
80                    bool has_inline, bool has_nonseamless)
81 {
82    const uint32_t nonseamless_size = !has_nonseamless && zm->has_nonseamless ? sizeof(uint32_t) : 0;
83    if (has_inline) {
84       if (zm->num_uniforms != num_uniforms ||
85           (num_uniforms &&
86            memcmp(zm->key + zm->key_size + nonseamless_size,
87                   key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))))
88          return false;
89    }
90    if (!has_nonseamless) {
91       if (zm->has_nonseamless != !!key->base.nonseamless_cube_mask ||
92           (nonseamless_size && memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)))
93          return false;
94    }
95    if (zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle)
96       return false;
97    return !memcmp(zm->key, key, zm->key_size);
98 }
99 
100 static uint32_t
shader_module_hash(const struct zink_shader_module * zm)101 shader_module_hash(const struct zink_shader_module *zm)
102 {
103    const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0;
104    unsigned key_size = zm->key_size + nonseamless_size + zm->num_uniforms * sizeof(uint32_t);
105    return _mesa_hash_data(zm->key, key_size);
106 }
107 
108 ALWAYS_INLINE static void
gather_shader_module_info(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state,bool has_inline,bool has_nonseamless,unsigned * inline_size,unsigned * nonseamless_size)109 gather_shader_module_info(struct zink_context *ctx, struct zink_screen *screen,
110                           struct zink_shader *zs, struct zink_gfx_program *prog,
111                           struct zink_gfx_pipeline_state *state,
112                           bool has_inline, //is inlining enabled?
113                           bool has_nonseamless, //is nonseamless ext present?
114                           unsigned *inline_size, unsigned *nonseamless_size)
115 {
116    gl_shader_stage stage = zs->info.stage;
117    struct zink_shader_key *key = &state->shader_keys.key[stage];
118    if (has_inline && ctx && zs->info.num_inlinable_uniforms &&
119        ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(stage)) {
120       if (zs->can_inline && (screen->is_cpu || prog->inlined_variant_count[stage] < ZINK_MAX_INLINED_VARIANTS))
121          *inline_size = zs->info.num_inlinable_uniforms;
122       else
123          key->inline_uniforms = false;
124    }
125    if (!has_nonseamless && key->base.nonseamless_cube_mask)
126       *nonseamless_size = sizeof(uint32_t);
127 }
128 
129 ALWAYS_INLINE static struct zink_shader_module *
create_shader_module_for_stage(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state,unsigned inline_size,unsigned nonseamless_size,bool has_inline,bool has_nonseamless)130 create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
131                                struct zink_shader *zs, struct zink_gfx_program *prog,
132                                gl_shader_stage stage,
133                                struct zink_gfx_pipeline_state *state,
134                                unsigned inline_size, unsigned nonseamless_size,
135                                bool has_inline, //is inlining enabled?
136                                bool has_nonseamless) //is nonseamless ext present?
137 {
138    struct zink_shader_module *zm;
139    const struct zink_shader_key *key = &state->shader_keys.key[stage];
140    /* non-generated tcs won't use the shader key */
141    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
142    const bool shadow_needs_shader_swizzle = key->base.needs_zs_shader_swizzle ||
143                                             (stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle);
144    zm = malloc(sizeof(struct zink_shader_module) + key->size +
145                (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) +
146                (shadow_needs_shader_swizzle ? sizeof(struct zink_zs_swizzle_key) : 0));
147    if (!zm) {
148       return NULL;
149    }
150    unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs.patch_vertices;
151    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
152       assert(ctx); //TODO async
153       zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
154    } else {
155       zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage], &prog->base);
156    }
157    if (!zm->obj.mod) {
158       FREE(zm);
159       return NULL;
160    }
161    zm->shobj = prog->base.uses_shobj;
162    zm->num_uniforms = inline_size;
163    if (!is_nongenerated_tcs) {
164       zm->key_size = key->size;
165       memcpy(zm->key, key, key->size);
166    } else {
167       zm->key_size = 0;
168       memset(zm->key, 0, key->size);
169    }
170    if (!has_nonseamless && nonseamless_size) {
171       /* nonseamless mask gets added to base key if it exists */
172       memcpy(zm->key + key->size, &key->base.nonseamless_cube_mask, nonseamless_size);
173    }
174    zm->needs_zs_shader_swizzle = shadow_needs_shader_swizzle;
175    zm->has_nonseamless = has_nonseamless ? 0 : !!nonseamless_size;
176    if (inline_size)
177       memcpy(zm->key + key->size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
178    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
179       zm->hash = patch_vertices;
180    else
181       zm->hash = shader_module_hash(zm);
182    if (unlikely(shadow_needs_shader_swizzle)) {
183       memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
184       zm->hash ^= _mesa_hash_data(&ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
185    }
186    zm->default_variant = !shadow_needs_shader_swizzle && !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
187    if (inline_size)
188       prog->inlined_variant_count[stage]++;
189    util_dynarray_append(&prog->shader_cache[stage][has_nonseamless ? 0 : !!nonseamless_size][!!inline_size], void*, zm);
190    return zm;
191 }
192 
193 ALWAYS_INLINE static struct zink_shader_module *
get_shader_module_for_stage(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state,unsigned inline_size,unsigned nonseamless_size,bool has_inline,bool has_nonseamless)194 get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen,
195                             struct zink_shader *zs, struct zink_gfx_program *prog,
196                             gl_shader_stage stage,
197                             struct zink_gfx_pipeline_state *state,
198                             unsigned inline_size, unsigned nonseamless_size,
199                             bool has_inline, //is inlining enabled?
200                             bool has_nonseamless) //is nonseamless ext present?
201 {
202    const struct zink_shader_key *key = &state->shader_keys.key[stage];
203    /* non-generated tcs won't use the shader key */
204    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
205    const bool shadow_needs_shader_swizzle = unlikely(key->base.needs_zs_shader_swizzle) ||
206                                             (stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle));
207 
208    struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0];
209    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
210    struct zink_shader_module **pzm = shader_cache->data;
211    for (unsigned i = 0; i < count; i++) {
212       struct zink_shader_module *iter = pzm[i];
213       if (is_nongenerated_tcs) {
214          if (!shader_key_matches_tcs_nongenerated(iter, key, has_inline ? !!inline_size : 0))
215             continue;
216       } else {
217          if (stage == MESA_SHADER_VERTEX && iter->key_size != key->size)
218             continue;
219          if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless))
220             continue;
221          if (unlikely(shadow_needs_shader_swizzle)) {
222             /* shadow swizzle data needs a manual compare since it's so fat */
223             if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t),
224                        &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
225                continue;
226          }
227       }
228       if (i > 0) {
229          struct zink_shader_module *zero = pzm[0];
230          pzm[0] = iter;
231          pzm[i] = zero;
232       }
233       return iter;
234    }
235 
236    return NULL;
237 }
238 
239 ALWAYS_INLINE static struct zink_shader_module *
create_shader_module_for_stage_optimal(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state)240 create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
241                                        struct zink_shader *zs, struct zink_gfx_program *prog,
242                                        gl_shader_stage stage,
243                                        struct zink_gfx_pipeline_state *state)
244 {
245    struct zink_shader_module *zm;
246    uint16_t *key;
247    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
248    bool shadow_needs_shader_swizzle = false;
249    if (zs == prog->last_vertex_stage) {
250       key = (uint16_t*)&state->shader_keys_optimal.key.vs_base;
251    } else if (stage == MESA_SHADER_FRAGMENT) {
252       key = (uint16_t*)&state->shader_keys_optimal.key.fs;
253       shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false;
254    } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
255       key = (uint16_t*)&state->shader_keys_optimal.key.tcs;
256    } else {
257       key = NULL;
258    }
259    size_t key_size = sizeof(uint16_t);
260    zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_zs_swizzle_key) : 0));
261    if (!zm) {
262       return NULL;
263    }
264    if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) {
265       assert(ctx || screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints);
266       unsigned patch_vertices = 3;
267       if (ctx) {
268          struct zink_tcs_key *tcs = (struct zink_tcs_key*)key;
269          patch_vertices = tcs->patch_vertices;
270       }
271       zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base);
272    } else {
273       zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]),
274                                     (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL, &prog->base);
275    }
276    if (!zm->obj.mod) {
277       FREE(zm);
278       return NULL;
279    }
280    zm->shobj = prog->base.uses_shobj;
281    /* non-generated tcs won't use the shader key */
282    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
283    if (key && !is_nongenerated_tcs) {
284       zm->key_size = key_size;
285       uint16_t *data = (uint16_t*)zm->key;
286       /* sanitize actual key bits */
287       *data = (*key) & mask;
288       if (unlikely(shadow_needs_shader_swizzle))
289          memcpy(&data[1], &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key));
290    }
291    zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*);
292    util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm);
293    return zm;
294 }
295 
296 ALWAYS_INLINE static struct zink_shader_module *
get_shader_module_for_stage_optimal(struct zink_context * ctx,struct zink_screen * screen,struct zink_shader * zs,struct zink_gfx_program * prog,gl_shader_stage stage,struct zink_gfx_pipeline_state * state)297 get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen,
298                                     struct zink_shader *zs, struct zink_gfx_program *prog,
299                                     gl_shader_stage stage,
300                                     struct zink_gfx_pipeline_state *state)
301 {
302    /* non-generated tcs won't use the shader key */
303    const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated;
304    bool shadow_needs_shader_swizzle = false;
305    uint16_t *key;
306    unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8);
307    if (zs == prog->last_vertex_stage) {
308       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base;
309    } else if (stage == MESA_SHADER_FRAGMENT) {
310       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs;
311       shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle;
312    } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) {
313       key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs;
314    } else {
315       key = NULL;
316    }
317    struct util_dynarray *shader_cache = &prog->shader_cache[stage][0][0];
318    unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
319    struct zink_shader_module **pzm = shader_cache->data;
320    for (unsigned i = 0; i < count; i++) {
321       struct zink_shader_module *iter = pzm[i];
322       if (is_nongenerated_tcs) {
323          /* always match */
324       } else if (key) {
325          uint16_t val = (*key) & mask;
326          /* no key is bigger than uint16_t */
327          if (memcmp(iter->key, &val, sizeof(uint16_t)))
328             continue;
329          if (unlikely(shadow_needs_shader_swizzle)) {
330             /* shadow swizzle data needs a manual compare since it's so fat */
331             if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)))
332                continue;
333          }
334       }
335       if (i > 0) {
336          struct zink_shader_module *zero = pzm[0];
337          pzm[0] = iter;
338          pzm[i] = zero;
339       }
340       return iter;
341    }
342 
343    return NULL;
344 }
345 
346 static void
zink_destroy_shader_module(struct zink_screen * screen,struct zink_shader_module * zm)347 zink_destroy_shader_module(struct zink_screen *screen, struct zink_shader_module *zm)
348 {
349    if (zm->shobj)
350       VKSCR(DestroyShaderEXT)(screen->dev, zm->obj.obj, NULL);
351    else
352       VKSCR(DestroyShaderModule)(screen->dev, zm->obj.mod, NULL);
353    ralloc_free(zm->obj.spirv);
354    free(zm);
355 }
356 
357 static void
destroy_shader_cache(struct zink_screen * screen,struct util_dynarray * sc)358 destroy_shader_cache(struct zink_screen *screen, struct util_dynarray *sc)
359 {
360    while (util_dynarray_contains(sc, void*)) {
361       struct zink_shader_module *zm = util_dynarray_pop(sc, struct zink_shader_module*);
362       zink_destroy_shader_module(screen, zm);
363    }
364 }
365 
366 ALWAYS_INLINE static void
update_gfx_shader_modules(struct zink_context * ctx,struct zink_screen * screen,struct zink_gfx_program * prog,uint32_t mask,struct zink_gfx_pipeline_state * state,bool has_inline,bool has_nonseamless)367 update_gfx_shader_modules(struct zink_context *ctx,
368                       struct zink_screen *screen,
369                       struct zink_gfx_program *prog, uint32_t mask,
370                       struct zink_gfx_pipeline_state *state,
371                       bool has_inline, //is inlining enabled?
372                       bool has_nonseamless) //is nonseamless ext present?
373 {
374    bool hash_changed = false;
375    bool default_variants = true;
376    assert(prog->objs[MESA_SHADER_VERTEX].mod);
377    uint32_t variant_hash = prog->last_variant_hash;
378    prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
379    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
380       if (!(mask & BITFIELD_BIT(i)))
381          continue;
382 
383       assert(prog->shaders[i]);
384 
385       unsigned inline_size = 0, nonseamless_size = 0;
386       gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, has_inline, has_nonseamless, &inline_size, &nonseamless_size);
387       struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
388                                                                   inline_size, nonseamless_size, has_inline, has_nonseamless);
389       if (!zm)
390          zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
391                                              inline_size, nonseamless_size, has_inline, has_nonseamless);
392       state->modules[i] = zm->obj.mod;
393       if (prog->objs[i].mod == zm->obj.mod)
394          continue;
395       prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated;
396       variant_hash ^= prog->module_hash[i];
397       hash_changed = true;
398       default_variants &= zm->default_variant;
399       prog->objs[i] = zm->obj;
400       prog->objects[i] = zm->obj.obj;
401       prog->module_hash[i] = zm->hash;
402       if (has_inline) {
403          if (zm->num_uniforms)
404             prog->inline_variants |= BITFIELD_BIT(i);
405          else
406             prog->inline_variants &= ~BITFIELD_BIT(i);
407       }
408       variant_hash ^= prog->module_hash[i];
409    }
410 
411    if (hash_changed && state) {
412       if (default_variants)
413          prog->last_variant_hash = prog->default_variant_hash;
414       else
415          prog->last_variant_hash = variant_hash;
416 
417       state->modules_changed = true;
418    }
419 }
420 
421 static void
generate_gfx_program_modules(struct zink_context * ctx,struct zink_screen * screen,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state)422 generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
423 {
424    assert(!prog->objs[MESA_SHADER_VERTEX].mod);
425    uint32_t variant_hash = 0;
426    bool default_variants = true;
427    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
428       if (!(prog->stages_present & BITFIELD_BIT(i)))
429          continue;
430 
431       assert(prog->shaders[i]);
432 
433       unsigned inline_size = 0, nonseamless_size = 0;
434       gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state,
435                                 screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map,
436                                 &inline_size, &nonseamless_size);
437       struct zink_shader_module *zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state,
438                                                                      inline_size, nonseamless_size,
439                                                                      screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map);
440       state->modules[i] = zm->obj.mod;
441       prog->objs[i] = zm->obj;
442       prog->objects[i] = zm->obj.obj;
443       prog->module_hash[i] = zm->hash;
444       if (zm->num_uniforms)
445          prog->inline_variants |= BITFIELD_BIT(i);
446       default_variants &= zm->default_variant;
447       variant_hash ^= prog->module_hash[i];
448    }
449 
450    state->modules_changed = true;
451 
452    prog->last_variant_hash = variant_hash;
453    if (default_variants)
454       prog->default_variant_hash = prog->last_variant_hash;
455 }
456 
457 static void
generate_gfx_program_modules_optimal(struct zink_context * ctx,struct zink_screen * screen,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state)458 generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
459 {
460    assert(!prog->objs[MESA_SHADER_VERTEX].mod);
461    for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
462       if (!(prog->stages_present & BITFIELD_BIT(i)))
463          continue;
464 
465       assert(prog->shaders[i]);
466 
467       struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state);
468       prog->objs[i] = zm->obj;
469       prog->objects[i] = zm->obj.obj;
470    }
471 
472    state->modules_changed = true;
473    prog->last_variant_hash = state->optimal_key;
474 }
475 
476 static uint32_t
hash_pipeline_lib_generated_tcs(const void * key)477 hash_pipeline_lib_generated_tcs(const void *key)
478 {
479    const struct zink_gfx_library_key *gkey = key;
480    return gkey->optimal_key;
481 }
482 
483 
484 static bool
equals_pipeline_lib_generated_tcs(const void * a,const void * b)485 equals_pipeline_lib_generated_tcs(const void *a, const void *b)
486 {
487    return !memcmp(a, b, sizeof(uint32_t));
488 }
489 
490 static uint32_t
hash_pipeline_lib(const void * key)491 hash_pipeline_lib(const void *key)
492 {
493    const struct zink_gfx_library_key *gkey = key;
494    /* remove generated tcs bits */
495    return zink_shader_key_optimal_no_tcs(gkey->optimal_key);
496 }
497 
498 static bool
equals_pipeline_lib(const void * a,const void * b)499 equals_pipeline_lib(const void *a, const void *b)
500 {
501    const struct zink_gfx_library_key *ak = a;
502    const struct zink_gfx_library_key *bk = b;
503    /* remove generated tcs bits */
504    uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key);
505    uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key);
506    return val_a == val_b;
507 }
508 
509 uint32_t
hash_gfx_input_dynamic(const void * key)510 hash_gfx_input_dynamic(const void *key)
511 {
512    const struct zink_gfx_input_key *ikey = key;
513    return ikey->idx;
514 }
515 
516 static bool
equals_gfx_input_dynamic(const void * a,const void * b)517 equals_gfx_input_dynamic(const void *a, const void *b)
518 {
519    const struct zink_gfx_input_key *ikey_a = a;
520    const struct zink_gfx_input_key *ikey_b = b;
521    return ikey_a->idx == ikey_b->idx;
522 }
523 
524 uint32_t
hash_gfx_input(const void * key)525 hash_gfx_input(const void *key)
526 {
527    const struct zink_gfx_input_key *ikey = key;
528    if (ikey->uses_dynamic_stride)
529       return ikey->input;
530    return _mesa_hash_data(key, offsetof(struct zink_gfx_input_key, pipeline));
531 }
532 
533 static bool
equals_gfx_input(const void * a,const void * b)534 equals_gfx_input(const void *a, const void *b)
535 {
536    const struct zink_gfx_input_key *ikey_a = a;
537    const struct zink_gfx_input_key *ikey_b = b;
538    if (ikey_a->uses_dynamic_stride)
539       return ikey_a->element_state == ikey_b->element_state &&
540              !memcmp(a, b, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask));
541    return !memcmp(a, b, offsetof(struct zink_gfx_input_key, pipeline));
542 }
543 
544 uint32_t
hash_gfx_output_ds3(const void * key)545 hash_gfx_output_ds3(const void *key)
546 {
547    const uint8_t *data = key;
548    return _mesa_hash_data(data, sizeof(uint32_t));
549 }
550 
551 static bool
equals_gfx_output_ds3(const void * a,const void * b)552 equals_gfx_output_ds3(const void *a, const void *b)
553 {
554    const uint8_t *da = a;
555    const uint8_t *db = b;
556    return !memcmp(da, db, sizeof(uint32_t));
557 }
558 
559 uint32_t
hash_gfx_output(const void * key)560 hash_gfx_output(const void *key)
561 {
562    const uint8_t *data = key;
563    return _mesa_hash_data(data, offsetof(struct zink_gfx_output_key, pipeline));
564 }
565 
566 static bool
equals_gfx_output(const void * a,const void * b)567 equals_gfx_output(const void *a, const void *b)
568 {
569    const uint8_t *da = a;
570    const uint8_t *db = b;
571    return !memcmp(da, db, offsetof(struct zink_gfx_output_key, pipeline));
572 }
573 
574 ALWAYS_INLINE static void
update_gfx_program_nonseamless(struct zink_context * ctx,struct zink_gfx_program * prog,bool has_nonseamless)575 update_gfx_program_nonseamless(struct zink_context *ctx, struct zink_gfx_program *prog, bool has_nonseamless)
576 {
577    struct zink_screen *screen = zink_screen(ctx->base.screen);
578    if (screen->driconf.inline_uniforms || prog->needs_inlining)
579       update_gfx_shader_modules(ctx, screen, prog,
580                                 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
581                                 true, has_nonseamless);
582    else
583       update_gfx_shader_modules(ctx, screen, prog,
584                                 ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state,
585                                 false, has_nonseamless);
586 }
587 
588 static void
update_gfx_program(struct zink_context * ctx,struct zink_gfx_program * prog)589 update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog)
590 {
591    struct zink_screen *screen = zink_screen(ctx->base.screen);
592    if (screen->info.have_EXT_non_seamless_cube_map)
593       update_gfx_program_nonseamless(ctx, prog, true);
594    else
595       update_gfx_program_nonseamless(ctx, prog, false);
596 }
597 
598 void
zink_gfx_program_update(struct zink_context * ctx)599 zink_gfx_program_update(struct zink_context *ctx)
600 {
601    if (ctx->last_vertex_stage_dirty) {
602       gl_shader_stage pstage = ctx->last_vertex_stage->info.stage;
603       ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage);
604       memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
605              &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
606              sizeof(struct zink_vs_key_base));
607       ctx->last_vertex_stage_dirty = false;
608    }
609    if (ctx->gfx_dirty) {
610       struct zink_gfx_program *prog = NULL;
611 
612       simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
613       struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
614       const uint32_t hash = ctx->gfx_hash;
615       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
616       /* this must be done before prog is updated */
617       if (ctx->curr_program)
618          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
619       if (entry) {
620          prog = (struct zink_gfx_program*)entry->data;
621          for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
622             if (prog->stages_present & ~ctx->dirty_gfx_stages & BITFIELD_BIT(i))
623                ctx->gfx_pipeline_state.modules[i] = prog->objs[i].mod;
624          }
625          /* ensure variants are always updated if keys have changed since last use */
626          ctx->dirty_gfx_stages |= prog->stages_present;
627          update_gfx_program(ctx, prog);
628       } else {
629          ctx->dirty_gfx_stages |= ctx->shader_stages;
630          prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash);
631          zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false);
632          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
633          prog->base.removed = false;
634          generate_gfx_program_modules(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state);
635       }
636       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
637       if (prog && prog != ctx->curr_program)
638          zink_batch_reference_program(ctx, &prog->base);
639       ctx->curr_program = prog;
640       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
641       ctx->gfx_dirty = false;
642    } else if (ctx->dirty_gfx_stages) {
643       /* remove old hash */
644       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
645       update_gfx_program(ctx, ctx->curr_program);
646       /* apply new hash */
647       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
648    }
649    ctx->dirty_gfx_stages = 0;
650 }
651 
652 ALWAYS_INLINE static bool
update_gfx_shader_module_optimal(struct zink_context * ctx,struct zink_gfx_program * prog,gl_shader_stage pstage)653 update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage)
654 {
655    struct zink_screen *screen = zink_screen(ctx->base.screen);
656    if (screen->info.have_EXT_graphics_pipeline_library)
657       util_queue_fence_wait(&prog->base.cache_fence);
658    struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
659    if (!zm) {
660       zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state);
661       perf_debug(ctx, "zink[gfx_compile]: %s shader variant required\n", _mesa_shader_stage_to_string(pstage));
662    }
663 
664    bool changed = prog->objs[pstage].mod != zm->obj.mod;
665    prog->objs[pstage] = zm->obj;
666    prog->objects[pstage] = zm->obj.obj;
667    return changed;
668 }
669 
670 static void
update_gfx_program_optimal(struct zink_context * ctx,struct zink_gfx_program * prog)671 update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog)
672 {
673    const union zink_shader_key_optimal *key = (union zink_shader_key_optimal*)&ctx->gfx_pipeline_state.optimal_key;
674    const union zink_shader_key_optimal *last_prog_key = (union zink_shader_key_optimal*)&prog->last_variant_hash;
675    if (key->vs_bits != last_prog_key->vs_bits) {
676       assert(!prog->is_separable);
677       bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->info.stage);
678       ctx->gfx_pipeline_state.modules_changed |= changed;
679    }
680    const bool shadow_needs_shader_swizzle = last_prog_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
681    if (key->fs_bits != last_prog_key->fs_bits ||
682        /* always recheck shadow swizzles since they aren't directly part of the key */
683        unlikely(shadow_needs_shader_swizzle)) {
684       assert(!prog->is_separable);
685       bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT);
686       ctx->gfx_pipeline_state.modules_changed |= changed;
687       if (unlikely(shadow_needs_shader_swizzle)) {
688          struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data;
689          ctx->gfx_pipeline_state.shadow = (struct zink_zs_swizzle_key*)pzm[0]->key + sizeof(uint16_t);
690       }
691    }
692    if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated &&
693        key->tcs_bits != last_prog_key->tcs_bits) {
694       assert(!prog->is_separable);
695       bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL);
696       ctx->gfx_pipeline_state.modules_changed |= changed;
697    }
698    prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
699 }
700 
701 static struct zink_gfx_program *
replace_separable_prog(struct zink_context * ctx,struct hash_entry * entry,struct zink_gfx_program * prog)702 replace_separable_prog(struct zink_context *ctx, struct hash_entry *entry, struct zink_gfx_program *prog)
703 {
704    struct zink_screen *screen = zink_screen(ctx->base.screen);
705    struct zink_gfx_program *real = prog->full_prog ?
706                                    prog->full_prog :
707                                    /* this will be NULL with ZINK_DEBUG_NOOPT */
708                                    zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash);
709    entry->data = real;
710    entry->key = real->shaders;
711    real->base.removed = false;
712    zink_gfx_program_reference(screen, &prog->full_prog, NULL);
713    prog->base.removed = true;
714    return real;
715 }
716 
717 void
zink_gfx_program_update_optimal(struct zink_context * ctx)718 zink_gfx_program_update_optimal(struct zink_context *ctx)
719 {
720    struct zink_screen *screen = zink_screen(ctx->base.screen);
721    if (ctx->gfx_dirty) {
722       struct zink_gfx_program *prog = NULL;
723       ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val);
724       struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
725       const uint32_t hash = ctx->gfx_hash;
726       simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
727       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
728 
729       if (ctx->curr_program)
730          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
731       if (entry) {
732          prog = (struct zink_gfx_program*)entry->data;
733          bool must_replace = prog->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (prog->is_separable && !zink_can_use_pipeline_libs(ctx));
734          if (prog->is_separable) {
735             /* shader variants can't be handled by separable programs: sync and compile */
736             if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace)
737                util_queue_fence_wait(&prog->base.cache_fence);
738             /* If the optimized linked pipeline is done compiling, swap it into place. */
739             if (util_queue_fence_is_signalled(&prog->base.cache_fence) &&
740                 /* but only if needed for ZINK_DEBUG=noopt */
741                 (!(zink_debug & ZINK_DEBUG_NOOPT) || !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace)) {
742                prog = replace_separable_prog(ctx, entry, prog);
743             }
744          }
745          update_gfx_program_optimal(ctx, prog);
746       } else {
747          ctx->dirty_gfx_stages |= ctx->shader_stages;
748          prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
749          prog->base.removed = false;
750          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
751          if (!prog->is_separable) {
752             zink_screen_get_pipeline_cache(screen, &prog->base, false);
753             perf_debug(ctx, "zink[gfx_compile]: new program created (probably legacy GL features in use)\n");
754             generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state);
755          }
756       }
757       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
758       if (prog && prog != ctx->curr_program)
759          zink_batch_reference_program(ctx, &prog->base);
760       ctx->curr_program = prog;
761       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
762    } else if (ctx->dirty_gfx_stages) {
763       /* remove old hash */
764       ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val);
765       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
766 
767       bool must_replace = ctx->curr_program->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (ctx->curr_program->is_separable && !zink_can_use_pipeline_libs(ctx));
768       if (must_replace || (ctx->curr_program->is_separable && !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key))) {
769          struct zink_gfx_program *prog = ctx->curr_program;
770 
771          util_queue_fence_wait(&prog->base.cache_fence);
772          /* shader variants can't be handled by separable programs: sync and compile */
773          perf_debug(ctx, "zink[gfx_compile]: non-default shader variant required with separate shader object program\n");
774          struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)];
775          const uint32_t hash = ctx->gfx_hash;
776          simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
777          struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
778          ctx->curr_program = replace_separable_prog(ctx, entry, prog);
779          simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]);
780       }
781       update_gfx_program_optimal(ctx, ctx->curr_program);
782       /* apply new hash */
783       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
784    }
785    ctx->dirty_gfx_stages = 0;
786    ctx->gfx_dirty = false;
787    ctx->last_vertex_stage_dirty = false;
788 }
789 
790 static void
optimized_compile_job(void * data,void * gdata,int thread_index)791 optimized_compile_job(void *data, void *gdata, int thread_index)
792 {
793    struct zink_gfx_pipeline_cache_entry *pc_entry = data;
794    struct zink_screen *screen = gdata;
795    VkPipeline pipeline;
796    if (pc_entry->gpl.gkey)
797       pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->gpl.ikey->pipeline, &pc_entry->gpl.gkey->pipeline, 1, pc_entry->gpl.okey->pipeline, true, false);
798    else
799       pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, pc_entry->prog->objs, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
800    if (pipeline) {
801       pc_entry->gpl.unoptimized_pipeline = pc_entry->pipeline;
802       pc_entry->pipeline = pipeline;
803    }
804 }
805 
806 static void
optimized_shobj_compile_job(void * data,void * gdata,int thread_index)807 optimized_shobj_compile_job(void *data, void *gdata, int thread_index)
808 {
809    struct zink_gfx_pipeline_cache_entry *pc_entry = data;
810    struct zink_screen *screen = gdata;
811 
812    struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT];
813    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
814       objs[i].mod = VK_NULL_HANDLE;
815       objs[i].spirv = pc_entry->shobjs[i].spirv;
816    }
817    pc_entry->pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, NULL, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true);
818    /* no unoptimized_pipeline dance */
819 }
820 
821 void
zink_gfx_program_compile_queue(struct zink_context * ctx,struct zink_gfx_pipeline_cache_entry * pc_entry)822 zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry)
823 {
824    struct zink_screen *screen = zink_screen(ctx->base.screen);
825    if (screen->driver_workarounds.disable_optimized_compile)
826       return;
827    if (zink_debug & ZINK_DEBUG_NOBGC) {
828       if (pc_entry->prog->base.uses_shobj)
829          optimized_shobj_compile_job(pc_entry, screen, 0);
830       else
831          optimized_compile_job(pc_entry, screen, 0);
832    } else {
833       util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence,
834                          pc_entry->prog->base.uses_shobj ? optimized_shobj_compile_job : optimized_compile_job, NULL, 0);
835    }
836 }
837 
838 void
zink_program_finish(struct zink_context * ctx,struct zink_program * pg)839 zink_program_finish(struct zink_context *ctx, struct zink_program *pg)
840 {
841    util_queue_fence_wait(&pg->cache_fence);
842    if (pg->is_compute)
843       return;
844    struct zink_gfx_program *prog = (struct zink_gfx_program*)pg;
845    for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
846       for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
847          hash_table_foreach(&prog->pipelines[r][i], entry) {
848             struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
849             util_queue_fence_wait(&pc_entry->fence);
850          }
851       }
852    }
853 }
854 
855 static void
update_cs_shader_module(struct zink_context * ctx,struct zink_compute_program * comp)856 update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp)
857 {
858    struct zink_screen *screen = zink_screen(ctx->base.screen);
859    struct zink_shader *zs = comp->shader;
860    struct zink_shader_module *zm = NULL;
861    unsigned inline_size = 0, nonseamless_size = 0, zs_swizzle_size = 0;
862    struct zink_shader_key *key = &ctx->compute_pipeline_state.key;
863    ASSERTED bool check_robustness = screen->driver_compiler_workarounds.lower_robustImageAccess2 && (ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS);
864    assert(zink_cs_key(key)->robust_access == check_robustness);
865 
866    if (ctx && zs->info.num_inlinable_uniforms &&
867        ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(MESA_SHADER_COMPUTE)) {
868       if (screen->is_cpu || comp->inlined_variant_count < ZINK_MAX_INLINED_VARIANTS)
869          inline_size = zs->info.num_inlinable_uniforms;
870       else
871          key->inline_uniforms = false;
872    }
873    if (key->base.nonseamless_cube_mask)
874       nonseamless_size = sizeof(uint32_t);
875    if (key->base.needs_zs_shader_swizzle)
876       zs_swizzle_size = sizeof(struct zink_zs_swizzle_key);
877 
878    if (inline_size || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size) {
879       struct util_dynarray *shader_cache = &comp->shader_cache[!!nonseamless_size];
880       unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *);
881       struct zink_shader_module **pzm = shader_cache->data;
882       for (unsigned i = 0; i < count; i++) {
883          struct zink_shader_module *iter = pzm[i];
884          if (!shader_key_matches(iter, key, inline_size,
885                                  screen->driconf.inline_uniforms,
886                                  screen->info.have_EXT_non_seamless_cube_map))
887             continue;
888          if (unlikely(zs_swizzle_size)) {
889             /* zs swizzle data needs a manual compare since it's so fat */
890             if (memcmp(iter->key + iter->key_size + nonseamless_size + inline_size * sizeof(uint32_t),
891                        &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size))
892                continue;
893          }
894          if (i > 0) {
895             struct zink_shader_module *zero = pzm[0];
896             pzm[0] = iter;
897             pzm[i] = zero;
898          }
899          zm = iter;
900       }
901    } else {
902       zm = comp->module;
903    }
904 
905    if (!zm) {
906       zm = malloc(sizeof(struct zink_shader_module) + nonseamless_size + inline_size * sizeof(uint32_t) + zs_swizzle_size);
907       if (!zm) {
908          return;
909       }
910       zm->shobj = false;
911       zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL, &comp->base);
912       if (!zm->obj.spirv) {
913          FREE(zm);
914          return;
915       }
916       zm->num_uniforms = inline_size;
917       zm->key_size = key->size;
918       memcpy(zm->key, key, key->size);
919       zm->has_nonseamless = !!nonseamless_size;
920       zm->needs_zs_shader_swizzle = !!zs_swizzle_size;
921       assert(nonseamless_size || inline_size || zink_cs_key(key)->robust_access || zs_swizzle_size);
922       if (nonseamless_size)
923          memcpy(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size);
924       if (inline_size)
925          memcpy(zm->key + zm->key_size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t));
926       if (zs_swizzle_size)
927          memcpy(zm->key + zm->key_size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size);
928 
929       zm->hash = shader_module_hash(zm);
930       zm->default_variant = false;
931       if (inline_size)
932          comp->inlined_variant_count++;
933 
934       /* this is otherwise the default variant, which is stored as comp->module */
935       if (zm->num_uniforms || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size)
936          util_dynarray_append(&comp->shader_cache[!!nonseamless_size], void*, zm);
937    }
938    if (comp->curr == zm)
939       return;
940    ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
941    comp->curr = zm;
942    ctx->compute_pipeline_state.module_hash = zm->hash;
943    ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
944    ctx->compute_pipeline_state.module_changed = true;
945 }
946 
947 void
zink_update_compute_program(struct zink_context * ctx)948 zink_update_compute_program(struct zink_context *ctx)
949 {
950    util_queue_fence_wait(&ctx->curr_compute->base.cache_fence);
951    update_cs_shader_module(ctx, ctx->curr_compute);
952 }
953 
954 VkPipelineLayout
zink_pipeline_layout_create(struct zink_screen * screen,VkDescriptorSetLayout * dsl,unsigned num_dsl,bool is_compute,VkPipelineLayoutCreateFlags flags)955 zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute, VkPipelineLayoutCreateFlags flags)
956 {
957    VkPipelineLayoutCreateInfo plci = {0};
958    plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
959    plci.flags = flags;
960 
961    plci.pSetLayouts = dsl;
962    plci.setLayoutCount = num_dsl;
963 
964    VkPushConstantRange pcr;
965    if (!is_compute) {
966       pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
967       pcr.offset = 0;
968       pcr.size = sizeof(struct zink_gfx_push_constant);
969       plci.pushConstantRangeCount = 1;
970       plci.pPushConstantRanges = &pcr;
971    }
972 
973    VkPipelineLayout layout;
974    VkResult result = VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout);
975    if (result != VK_SUCCESS) {
976       mesa_loge("vkCreatePipelineLayout failed (%s)", vk_Result_to_str(result));
977       return VK_NULL_HANDLE;
978    }
979 
980    return layout;
981 }
982 
983 static void *
create_program(struct zink_context * ctx,bool is_compute)984 create_program(struct zink_context *ctx, bool is_compute)
985 {
986    struct zink_program *pg = rzalloc_size(NULL, is_compute ? sizeof(struct zink_compute_program) : sizeof(struct zink_gfx_program));
987    if (!pg)
988       return NULL;
989 
990    pipe_reference_init(&pg->reference, 1);
991    u_rwlock_init(&pg->pipeline_cache_lock);
992    util_queue_fence_init(&pg->cache_fence);
993    pg->is_compute = is_compute;
994    pg->ctx = ctx;
995    return (void*)pg;
996 }
997 
998 static void
assign_io(struct zink_screen * screen,nir_shader * shaders[ZINK_GFX_SHADER_COUNT])999 assign_io(struct zink_screen *screen,
1000           nir_shader *shaders[ZINK_GFX_SHADER_COUNT])
1001 {
1002    for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) {
1003       nir_shader *producer = shaders[i];
1004       for (unsigned j = i + 1; j < ZINK_GFX_SHADER_COUNT; i++, j++) {
1005          nir_shader *consumer = shaders[j];
1006          if (!consumer)
1007             continue;
1008          zink_compiler_assign_io(screen, producer, consumer);
1009          i = j;
1010          break;
1011       }
1012    }
1013 }
1014 
1015 void
zink_gfx_lib_cache_unref(struct zink_screen * screen,struct zink_gfx_lib_cache * libs)1016 zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs)
1017 {
1018    if (!p_atomic_dec_zero(&libs->refcount))
1019       return;
1020 
1021    simple_mtx_destroy(&libs->lock);
1022    set_foreach_remove(&libs->libs, he) {
1023       struct zink_gfx_library_key *gkey = (void*)he->key;
1024       VKSCR(DestroyPipeline)(screen->dev, gkey->pipeline, NULL);
1025       FREE(gkey);
1026    }
1027    ralloc_free(libs->libs.table);
1028    FREE(libs);
1029 }
1030 
1031 static struct zink_gfx_lib_cache *
create_lib_cache(struct zink_gfx_program * prog,bool generated_tcs)1032 create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs)
1033 {
1034    struct zink_gfx_lib_cache *libs = CALLOC_STRUCT(zink_gfx_lib_cache);
1035    libs->stages_present = prog->stages_present;
1036    if (generated_tcs)
1037       libs->stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1038    simple_mtx_init(&libs->lock, mtx_plain);
1039    if (generated_tcs)
1040       _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs);
1041    else
1042       _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib, equals_pipeline_lib);
1043    return libs;
1044 }
1045 
1046 static struct zink_gfx_lib_cache *
find_or_create_lib_cache(struct zink_screen * screen,struct zink_gfx_program * prog)1047 find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *prog)
1048 {
1049    unsigned stages_present = prog->stages_present;
1050    bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated;
1051    if (generated_tcs)
1052       stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1053    unsigned idx = zink_program_cache_stages(stages_present);
1054    struct set *ht = &screen->pipeline_libs[idx];
1055    const uint32_t hash = prog->gfx_hash;
1056 
1057    simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
1058    bool found = false;
1059    struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found);
1060    struct zink_gfx_lib_cache *libs;
1061    if (found) {
1062       libs = (void*)entry->key;
1063    } else {
1064       libs = create_lib_cache(prog, generated_tcs);
1065       memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders));
1066       entry->key = libs;
1067       unsigned refs = 0;
1068       for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
1069          if (prog->shaders[i] && (!generated_tcs || i != MESA_SHADER_TESS_CTRL)) {
1070             simple_mtx_lock(&prog->shaders[i]->lock);
1071             util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs);
1072             simple_mtx_unlock(&prog->shaders[i]->lock);
1073             refs++;
1074          }
1075       }
1076       p_atomic_set(&libs->refcount, refs);
1077    }
1078    simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
1079    return libs;
1080 }
1081 
1082 static struct zink_gfx_program *
gfx_program_create(struct zink_context * ctx,struct zink_shader ** stages,unsigned vertices_per_patch,uint32_t gfx_hash)1083 gfx_program_create(struct zink_context *ctx,
1084                         struct zink_shader **stages,
1085                         unsigned vertices_per_patch,
1086                         uint32_t gfx_hash)
1087 {
1088    struct zink_screen *screen = zink_screen(ctx->base.screen);
1089    struct zink_gfx_program *prog = create_program(ctx, false);
1090    if (!prog)
1091       goto fail;
1092 
1093    prog->gfx_hash = gfx_hash;
1094    prog->base.removed = true;
1095    prog->optimal_keys = screen->optimal_keys;
1096 
1097    prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX] &&
1098                          prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags;
1099    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1100       util_dynarray_init(&prog->shader_cache[i][0][0], prog);
1101       util_dynarray_init(&prog->shader_cache[i][0][1], prog);
1102       util_dynarray_init(&prog->shader_cache[i][1][0], prog);
1103       util_dynarray_init(&prog->shader_cache[i][1][1], prog);
1104       if (stages[i]) {
1105          prog->shaders[i] = stages[i];
1106          prog->stages_present |= BITFIELD_BIT(i);
1107          if (i != MESA_SHADER_FRAGMENT)
1108             prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated;
1109          prog->needs_inlining |= prog->shaders[i]->needs_inlining;
1110       }
1111    }
1112    if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
1113       util_queue_fence_wait(&stages[MESA_SHADER_TESS_EVAL]->precompile.fence);
1114       prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs =
1115       prog->shaders[MESA_SHADER_TESS_CTRL] =
1116         zink_shader_tcs_create(screen, vertices_per_patch);
1117       prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1118    }
1119    prog->stages_remaining = prog->stages_present;
1120    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1121       if (prog->shaders[i]) {
1122          simple_mtx_lock(&prog->shaders[i]->lock);
1123          _mesa_set_add(prog->shaders[i]->programs, prog);
1124          simple_mtx_unlock(&prog->shaders[i]->lock);
1125          zink_gfx_program_reference(screen, NULL, prog);
1126       }
1127    }
1128    p_atomic_dec(&prog->base.reference.count);
1129 
1130    if (stages[MESA_SHADER_GEOMETRY])
1131       prog->last_vertex_stage = stages[MESA_SHADER_GEOMETRY];
1132    else if (stages[MESA_SHADER_TESS_EVAL])
1133       prog->last_vertex_stage = stages[MESA_SHADER_TESS_EVAL];
1134    else
1135       prog->last_vertex_stage = stages[MESA_SHADER_VERTEX];
1136 
1137    for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1138       for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1139          _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1140          /* only need first 3/4 for point/line/tri/patch */
1141          if (screen->info.have_EXT_extended_dynamic_state &&
1142              i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1143             break;
1144       }
1145    }
1146    return prog;
1147 
1148 fail:
1149    if (prog)
1150       zink_destroy_gfx_program(screen, prog);
1151    return NULL;
1152 }
1153 
1154 /* NO THREAD-UNSAFE ctx USAGE! */
1155 static struct zink_gfx_program *
gfx_program_init(struct zink_context * ctx,struct zink_gfx_program * prog)1156 gfx_program_init(struct zink_context *ctx, struct zink_gfx_program *prog)
1157 {
1158    struct zink_screen *screen = zink_screen(ctx->base.screen);
1159    nir_shader *nir[ZINK_GFX_SHADER_COUNT];
1160 
1161    /* iterate in reverse order to create TES before generated TCS */
1162    for (int i = MESA_SHADER_FRAGMENT; i >= MESA_SHADER_VERTEX; i--) {
1163       if (prog->shaders[i]) {
1164          util_queue_fence_wait(&prog->shaders[i]->precompile.fence);
1165          /* this may have already been precompiled for separate shader */
1166          if (i == MESA_SHADER_TESS_CTRL && prog->shaders[i]->non_fs.is_generated && prog->shaders[MESA_SHADER_TESS_CTRL]->nir)
1167             zink_shader_tcs_init(screen, prog->shaders[MESA_SHADER_TESS_CTRL], nir[MESA_SHADER_TESS_EVAL], &nir[i]);
1168          else
1169             nir[i] = zink_shader_deserialize(screen, prog->shaders[i]);
1170       } else {
1171          nir[i] = NULL;
1172       }
1173    }
1174    assign_io(screen, nir);
1175    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1176       if (nir[i])
1177          zink_shader_serialize_blob(nir[i], &prog->blobs[i]);
1178       ralloc_free(nir[i]);
1179    }
1180 
1181    if (screen->optimal_keys)
1182       prog->libs = find_or_create_lib_cache(screen, prog);
1183    if (prog->libs)
1184       p_atomic_inc(&prog->libs->refcount);
1185 
1186    struct mesa_blake3 sctx;
1187    _mesa_blake3_init(&sctx);
1188    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1189       if (prog->shaders[i])
1190          _mesa_blake3_update(&sctx, prog->shaders[i]->base.sha1, sizeof(prog->shaders[i]->base.sha1));
1191    }
1192    _mesa_blake3_final(&sctx, prog->base.blake3);
1193 
1194    if (!zink_descriptor_program_init(ctx, &prog->base))
1195       goto fail;
1196 
1197    return prog;
1198 
1199 fail:
1200    if (prog)
1201       zink_destroy_gfx_program(screen, prog);
1202    return NULL;
1203 }
1204 
1205 struct zink_gfx_program *
zink_create_gfx_program(struct zink_context * ctx,struct zink_shader ** stages,unsigned vertices_per_patch,uint32_t gfx_hash)1206 zink_create_gfx_program(struct zink_context *ctx,
1207                         struct zink_shader **stages,
1208                         unsigned vertices_per_patch,
1209                         uint32_t gfx_hash)
1210 {
1211    struct zink_gfx_program *prog = gfx_program_create(ctx, stages, vertices_per_patch, gfx_hash);
1212    if (prog)
1213       prog = gfx_program_init(ctx, prog);
1214    return prog;
1215 }
1216 
1217 /* Creates a replacement, optimized zink_gfx_program for this set of separate shaders, which will
1218  * be swapped in in place of the fast-linked separable program once it's done compiling.
1219  */
1220 static void
create_linked_separable_job(void * data,void * gdata,int thread_index)1221 create_linked_separable_job(void *data, void *gdata, int thread_index)
1222 {
1223    struct zink_gfx_program *prog = data;
1224    /* this is a dead program */
1225    if (prog->base.removed)
1226       return;
1227    prog->full_prog = gfx_program_create(prog->base.ctx, prog->shaders, 0, prog->gfx_hash);
1228    /* block gfx_shader_prune in the main thread */
1229    util_queue_fence_reset(&prog->full_prog->base.cache_fence);
1230    /* add an ownership ref */
1231    zink_gfx_program_reference(zink_screen(prog->base.ctx->base.screen), NULL, prog->full_prog);
1232    /* this is otherwise a dead program */
1233    if (prog->full_prog->stages_present == prog->full_prog->stages_remaining)
1234       gfx_program_precompile_job(prog->full_prog, gdata, thread_index);
1235    util_queue_fence_signal(&prog->full_prog->base.cache_fence);
1236 }
1237 
1238 struct zink_gfx_program *
create_gfx_program_separable(struct zink_context * ctx,struct zink_shader ** stages,unsigned vertices_per_patch)1239 create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch)
1240 {
1241    struct zink_screen *screen = zink_screen(ctx->base.screen);
1242    bool is_separate = true;
1243    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
1244       is_separate &= !stages[i] || stages[i]->info.separate_shader;
1245    /* filter cases that need real pipelines */
1246    if (!is_separate ||
1247        /* TODO: maybe try variants? grimace */
1248        !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) ||
1249        !zink_can_use_pipeline_libs(ctx))
1250       return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
1251    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1252       /* ensure async shader creation is done */
1253       if (stages[i]) {
1254          util_queue_fence_wait(&stages[i]->precompile.fence);
1255          if (!stages[i]->precompile.obj.mod)
1256             return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash);
1257       }
1258    }
1259 
1260    struct zink_gfx_program *prog = create_program(ctx, false);
1261    if (!prog)
1262       goto fail;
1263 
1264    prog->is_separable = true;
1265    prog->gfx_hash = ctx->gfx_hash;
1266    prog->base.uses_shobj = screen->info.have_EXT_shader_object;
1267 
1268    prog->stages_remaining = prog->stages_present = ctx->shader_stages;
1269    memcpy(prog->shaders, stages, sizeof(prog->shaders));
1270    prog->last_vertex_stage = ctx->last_vertex_stage;
1271 
1272    if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
1273       prog->shaders[MESA_SHADER_TESS_CTRL] = stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs;
1274       prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
1275    }
1276 
1277    if (!screen->info.have_EXT_shader_object) {
1278       prog->libs = create_lib_cache(prog, false);
1279       /* this libs cache is owned by the program */
1280       p_atomic_set(&prog->libs->refcount, 1);
1281    }
1282 
1283    unsigned refs = 0;
1284    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1285       if (prog->shaders[i]) {
1286          simple_mtx_lock(&prog->shaders[i]->lock);
1287          _mesa_set_add(prog->shaders[i]->programs, prog);
1288          simple_mtx_unlock(&prog->shaders[i]->lock);
1289          if (screen->info.have_EXT_shader_object) {
1290             if (!prog->objects[i])
1291                prog->objects[i] = prog->shaders[i]->precompile.obj.obj;
1292          }
1293          refs++;
1294       }
1295    }
1296    /* We can do this add after the _mesa_set_adds above because we know the prog->shaders[] are
1297    * referenced by the draw state and zink_gfx_shader_free() can't be called on them while we're in here.
1298    */
1299    p_atomic_add(&prog->base.reference.count, refs - 1);
1300 
1301    for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) {
1302       for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
1303          _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog));
1304          /* only need first 3/4 for point/line/tri/patch */
1305          if (screen->info.have_EXT_extended_dynamic_state &&
1306              i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3))
1307             break;
1308       }
1309    }
1310 
1311    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1312       if (!prog->shaders[i] || !prog->shaders[i]->precompile.dsl)
1313          continue;
1314       int idx = !i ? 0 : screen->info.have_EXT_shader_object ? i : 1;
1315       prog->base.dd.binding_usage |= BITFIELD_BIT(idx);
1316       prog->base.dsl[idx] = prog->shaders[i]->precompile.dsl;
1317       /* guarantee a null dsl if previous stages don't have descriptors */
1318       if (prog->shaders[i]->precompile.dsl)
1319          prog->base.num_dsl = idx + 1;
1320       prog->base.dd.bindless |= prog->shaders[i]->bindless;
1321    }
1322    if (prog->base.dd.bindless) {
1323       prog->base.num_dsl = screen->compact_descriptors ? ZINK_DESCRIPTOR_ALL_TYPES - ZINK_DESCRIPTOR_COMPACT : ZINK_DESCRIPTOR_ALL_TYPES;
1324       prog->base.dsl[screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]] = screen->bindless_layout;
1325    }
1326    prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT);
1327 
1328    prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key;
1329 
1330    if (!screen->info.have_EXT_shader_object) {
1331       VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl};
1332       struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
1333       if (!gkey) {
1334          mesa_loge("ZINK: failed to allocate gkey!");
1335          goto fail;
1336       }
1337       gkey->optimal_key = prog->last_variant_hash;
1338       assert(gkey->optimal_key);
1339       gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false, false);
1340       _mesa_set_add(&prog->libs->libs, gkey);
1341    }
1342 
1343    if (!(zink_debug & ZINK_DEBUG_NOOPT))
1344       util_queue_add_job(&screen->cache_get_thread, prog, &prog->base.cache_fence, create_linked_separable_job, NULL, 0);
1345 
1346    return prog;
1347 fail:
1348    if (prog)
1349       zink_destroy_gfx_program(screen, prog);
1350    return NULL;
1351 }
1352 
1353 static void
print_pipeline_stats(struct zink_screen * screen,VkPipeline pipeline,struct util_debug_callback * debug)1354 print_pipeline_stats(struct zink_screen *screen, VkPipeline pipeline, struct util_debug_callback *debug)
1355 {
1356    VkPipelineInfoKHR pinfo = {
1357      VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR,
1358      NULL,
1359      pipeline
1360    };
1361    unsigned exe_count = 0;
1362    VkPipelineExecutablePropertiesKHR props[10] = {0};
1363    for (unsigned i = 0; i < ARRAY_SIZE(props); i++) {
1364       props[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR;
1365       props[i].pNext = NULL;
1366    }
1367    VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, NULL);
1368    VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, props);
1369    for (unsigned e = 0; e < exe_count; e++) {
1370       VkPipelineExecutableInfoKHR info = {
1371          VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR,
1372          NULL,
1373          pipeline,
1374          e
1375       };
1376       unsigned count = 0;
1377 
1378       struct u_memstream stream;
1379       char *print_buf;
1380       size_t print_buf_sz;
1381 
1382       if (!u_memstream_open(&stream, &print_buf, &print_buf_sz)) {
1383          mesa_loge("ZINK: failed to open memstream!");
1384          return;
1385       }
1386 
1387       FILE *f = u_memstream_get(&stream);
1388       fprintf(f, "%s shader: ", props[e].name);
1389       VkPipelineExecutableStatisticKHR *stats = NULL;
1390       VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, NULL);
1391       stats = calloc(count, sizeof(VkPipelineExecutableStatisticKHR));
1392       if (!stats) {
1393          mesa_loge("ZINK: failed to allocate stats!");
1394          return;
1395       }
1396 
1397       for (unsigned i = 0; i < count; i++)
1398          stats[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR;
1399       VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, stats);
1400 
1401       for (unsigned i = 0; i < count; i++) {
1402          if (i)
1403             fprintf(f, ", ");
1404 
1405          switch (stats[i].format) {
1406          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
1407             fprintf(f, "%u %s", stats[i].value.b32, stats[i].name);
1408             break;
1409          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
1410             fprintf(f, "%" PRIi64 " %s", stats[i].value.i64, stats[i].name);
1411             break;
1412          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
1413             fprintf(f, "%" PRIu64 " %s", stats[i].value.u64, stats[i].name);
1414             break;
1415          case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
1416             fprintf(f, "%g %s", stats[i].value.f64, stats[i].name);
1417             break;
1418          default:
1419             unreachable("unknown statistic");
1420          }
1421       }
1422 
1423       /* print_buf is only valid after flushing. */
1424       fflush(f);
1425       util_debug_message(debug, SHADER_INFO, "%s", print_buf);
1426 
1427       u_memstream_close(&stream);
1428       free(print_buf);
1429    }
1430 }
1431 
1432 static uint32_t
hash_compute_pipeline_state_local_size(const void * key)1433 hash_compute_pipeline_state_local_size(const void *key)
1434 {
1435    const struct zink_compute_pipeline_state *state = key;
1436    uint32_t hash = _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1437    hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash);
1438    return hash;
1439 }
1440 
1441 static uint32_t
hash_compute_pipeline_state(const void * key)1442 hash_compute_pipeline_state(const void *key)
1443 {
1444    const struct zink_compute_pipeline_state *state = key;
1445    return _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash));
1446 }
1447 
1448 void
zink_program_update_compute_pipeline_state(struct zink_context * ctx,struct zink_compute_program * comp,const struct pipe_grid_info * info)1449 zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const struct pipe_grid_info *info)
1450 {
1451    if (comp->use_local_size) {
1452       for (int i = 0; i < ARRAY_SIZE(ctx->compute_pipeline_state.local_size); i++) {
1453          if (ctx->compute_pipeline_state.local_size[i] != info->block[i])
1454             ctx->compute_pipeline_state.dirty = true;
1455          ctx->compute_pipeline_state.local_size[i] = info->block[i];
1456       }
1457    }
1458    if (ctx->compute_pipeline_state.variable_shared_mem != info->variable_shared_mem) {
1459       ctx->compute_pipeline_state.dirty = true;
1460       ctx->compute_pipeline_state.variable_shared_mem = info->variable_shared_mem;
1461    }
1462 }
1463 
1464 static bool
equals_compute_pipeline_state(const void * a,const void * b)1465 equals_compute_pipeline_state(const void *a, const void *b)
1466 {
1467    const struct zink_compute_pipeline_state *sa = a;
1468    const struct zink_compute_pipeline_state *sb = b;
1469    return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1470           sa->module == sb->module;
1471 }
1472 
1473 static bool
equals_compute_pipeline_state_local_size(const void * a,const void * b)1474 equals_compute_pipeline_state_local_size(const void *a, const void *b)
1475 {
1476    const struct zink_compute_pipeline_state *sa = a;
1477    const struct zink_compute_pipeline_state *sb = b;
1478    return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) &&
1479           !memcmp(sa->local_size, sb->local_size, sizeof(sa->local_size)) &&
1480           sa->module == sb->module;
1481 }
1482 
1483 static void
precompile_compute_job(void * data,void * gdata,int thread_index)1484 precompile_compute_job(void *data, void *gdata, int thread_index)
1485 {
1486    struct zink_compute_program *comp = data;
1487    struct zink_screen *screen = gdata;
1488 
1489    comp->shader = zink_shader_create(screen, comp->nir);
1490    zink_shader_init(screen, comp->shader);
1491    comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module);
1492    assert(comp->module);
1493    comp->module->shobj = false;
1494    comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL, &comp->base);
1495    /* comp->nir will be freed by zink_shader_compile */
1496    comp->nir = NULL;
1497    assert(comp->module->obj.spirv);
1498    util_dynarray_init(&comp->shader_cache[0], comp);
1499    util_dynarray_init(&comp->shader_cache[1], comp);
1500 
1501    struct mesa_blake3 blake3_ctx;
1502    _mesa_blake3_init(&blake3_ctx);
1503    _mesa_blake3_update(&blake3_ctx, comp->shader->blob.data, comp->shader->blob.size);
1504    _mesa_blake3_final(&blake3_ctx, comp->base.blake3);
1505 
1506    zink_descriptor_program_init(comp->base.ctx, &comp->base);
1507 
1508    zink_screen_get_pipeline_cache(screen, &comp->base, true);
1509    if (comp->base.can_precompile)
1510       comp->base_pipeline = zink_create_compute_pipeline(screen, comp, NULL);
1511    if (comp->base_pipeline)
1512       zink_screen_update_pipeline_cache(screen, &comp->base, true);
1513 }
1514 
1515 static struct zink_compute_program *
create_compute_program(struct zink_context * ctx,nir_shader * nir)1516 create_compute_program(struct zink_context *ctx, nir_shader *nir)
1517 {
1518    struct zink_screen *screen = zink_screen(ctx->base.screen);
1519    struct zink_compute_program *comp = create_program(ctx, true);
1520    if (!comp)
1521       return NULL;
1522    simple_mtx_init(&comp->cache_lock, mtx_plain);
1523    comp->scratch_size = nir->scratch_size;
1524    comp->nir = nir;
1525    comp->num_inlinable_uniforms = nir->info.num_inlinable_uniforms;
1526 
1527    comp->use_local_size = !(nir->info.workgroup_size[0] ||
1528                             nir->info.workgroup_size[1] ||
1529                             nir->info.workgroup_size[2]);
1530    comp->has_variable_shared_mem = nir->info.cs.has_variable_shared_mem;
1531    comp->base.can_precompile = !comp->use_local_size &&
1532                                (screen->info.have_EXT_non_seamless_cube_map || !zink_shader_has_cubes(nir)) &&
1533                                (screen->info.rb2_feats.robustImageAccess2 || !(ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS));
1534    _mesa_hash_table_init(&comp->pipelines, comp, NULL, comp->use_local_size ?
1535                                                        equals_compute_pipeline_state_local_size :
1536                                                        equals_compute_pipeline_state);
1537 
1538    if (zink_debug & (ZINK_DEBUG_NOBGC|ZINK_DEBUG_SHADERDB))
1539       precompile_compute_job(comp, screen, 0);
1540    else
1541       util_queue_add_job(&screen->cache_get_thread, comp, &comp->base.cache_fence,
1542                         precompile_compute_job, NULL, 0);
1543 
1544    if (zink_debug & ZINK_DEBUG_SHADERDB) {
1545       print_pipeline_stats(screen, comp->base_pipeline, &ctx->dbg);
1546    }
1547 
1548    return comp;
1549 }
1550 
1551 bool
zink_program_descriptor_is_buffer(struct zink_context * ctx,gl_shader_stage stage,enum zink_descriptor_type type,unsigned i)1552 zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i)
1553 {
1554    struct zink_shader *zs = NULL;
1555    switch (stage) {
1556    case MESA_SHADER_VERTEX:
1557    case MESA_SHADER_TESS_CTRL:
1558    case MESA_SHADER_TESS_EVAL:
1559    case MESA_SHADER_GEOMETRY:
1560    case MESA_SHADER_FRAGMENT:
1561       zs = ctx->gfx_stages[stage];
1562       break;
1563    case MESA_SHADER_COMPUTE: {
1564       zs = ctx->curr_compute->shader;
1565       break;
1566    }
1567    default:
1568       unreachable("unknown shader type");
1569    }
1570    if (!zs)
1571       return false;
1572    return zink_shader_descriptor_is_buffer(zs, type, i);
1573 }
1574 
1575 static unsigned
get_num_bindings(struct zink_shader * zs,enum zink_descriptor_type type)1576 get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type)
1577 {
1578    switch (type) {
1579    case ZINK_DESCRIPTOR_TYPE_UNIFORMS:
1580       return !!zs->has_uniforms;
1581    case ZINK_DESCRIPTOR_TYPE_UBO:
1582    case ZINK_DESCRIPTOR_TYPE_SSBO:
1583       return zs->num_bindings[type];
1584    default:
1585       break;
1586    }
1587    unsigned num_bindings = 0;
1588    for (int i = 0; i < zs->num_bindings[type]; i++)
1589       num_bindings += zs->bindings[type][i].size;
1590    return num_bindings;
1591 }
1592 
1593 unsigned
zink_program_num_bindings_typed(const struct zink_program * pg,enum zink_descriptor_type type)1594 zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type)
1595 {
1596    unsigned num_bindings = 0;
1597    if (pg->is_compute) {
1598       struct zink_compute_program *comp = (void*)pg;
1599       return get_num_bindings(comp->shader, type);
1600    }
1601    struct zink_gfx_program *prog = (void*)pg;
1602    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
1603       if (prog->shaders[i])
1604          num_bindings += get_num_bindings(prog->shaders[i], type);
1605    }
1606    return num_bindings;
1607 }
1608 
1609 unsigned
zink_program_num_bindings(const struct zink_program * pg)1610 zink_program_num_bindings(const struct zink_program *pg)
1611 {
1612    unsigned num_bindings = 0;
1613    for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++)
1614       num_bindings += zink_program_num_bindings_typed(pg, i);
1615    return num_bindings;
1616 }
1617 
1618 static void
deinit_program(struct zink_screen * screen,struct zink_program * pg)1619 deinit_program(struct zink_screen *screen, struct zink_program *pg)
1620 {
1621    util_queue_fence_wait(&pg->cache_fence);
1622    if (pg->layout)
1623       VKSCR(DestroyPipelineLayout)(screen->dev, pg->layout, NULL);
1624 
1625    if (pg->pipeline_cache)
1626       VKSCR(DestroyPipelineCache)(screen->dev, pg->pipeline_cache, NULL);
1627    u_rwlock_destroy(&pg->pipeline_cache_lock);
1628    zink_descriptor_program_deinit(screen, pg);
1629 }
1630 
1631 void
zink_destroy_gfx_program(struct zink_screen * screen,struct zink_gfx_program * prog)1632 zink_destroy_gfx_program(struct zink_screen *screen,
1633                          struct zink_gfx_program *prog)
1634 {
1635    unsigned max_idx = ARRAY_SIZE(prog->pipelines[0]);
1636    if (screen->info.have_EXT_extended_dynamic_state) {
1637       /* only need first 3/4 for point/line/tri/patch */
1638       if ((prog->stages_present &
1639           (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) ==
1640           BITFIELD_BIT(MESA_SHADER_TESS_EVAL))
1641          max_idx = 4;
1642       else
1643          max_idx = 3;
1644       max_idx++;
1645    }
1646 
1647    if (prog->is_separable)
1648       zink_gfx_program_reference(screen, &prog->full_prog, NULL);
1649    for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
1650       for (int i = 0; i < max_idx; ++i) {
1651          hash_table_foreach(&prog->pipelines[r][i], entry) {
1652             struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
1653 
1654             util_queue_fence_wait(&pc_entry->fence);
1655             VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1656             VKSCR(DestroyPipeline)(screen->dev, pc_entry->gpl.unoptimized_pipeline, NULL);
1657             free(pc_entry);
1658          }
1659       }
1660    }
1661 
1662    deinit_program(screen, &prog->base);
1663 
1664    for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
1665       if (prog->shaders[i]) {
1666          _mesa_set_remove_key(prog->shaders[i]->programs, prog);
1667          prog->shaders[i] = NULL;
1668       }
1669       if (!prog->is_separable) {
1670          destroy_shader_cache(screen, &prog->shader_cache[i][0][0]);
1671          destroy_shader_cache(screen, &prog->shader_cache[i][0][1]);
1672          destroy_shader_cache(screen, &prog->shader_cache[i][1][0]);
1673          destroy_shader_cache(screen, &prog->shader_cache[i][1][1]);
1674          blob_finish(&prog->blobs[i]);
1675       }
1676    }
1677    if (prog->libs)
1678       zink_gfx_lib_cache_unref(screen, prog->libs);
1679 
1680    ralloc_free(prog);
1681 }
1682 
1683 void
zink_destroy_compute_program(struct zink_screen * screen,struct zink_compute_program * comp)1684 zink_destroy_compute_program(struct zink_screen *screen,
1685                              struct zink_compute_program *comp)
1686 {
1687    deinit_program(screen, &comp->base);
1688 
1689    assert(comp->shader);
1690    assert(!comp->shader->spirv);
1691 
1692    zink_shader_free(screen, comp->shader);
1693 
1694    destroy_shader_cache(screen, &comp->shader_cache[0]);
1695    destroy_shader_cache(screen, &comp->shader_cache[1]);
1696 
1697    hash_table_foreach(&comp->pipelines, entry) {
1698       struct compute_pipeline_cache_entry *pc_entry = entry->data;
1699 
1700       VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL);
1701       free(pc_entry);
1702    }
1703    VKSCR(DestroyPipeline)(screen->dev, comp->base_pipeline, NULL);
1704    zink_destroy_shader_module(screen, comp->module);
1705 
1706    ralloc_free(comp);
1707 }
1708 
1709 ALWAYS_INLINE static bool
compute_can_shortcut(const struct zink_compute_program * comp)1710 compute_can_shortcut(const struct zink_compute_program *comp)
1711 {
1712    return !comp->use_local_size && !comp->curr->num_uniforms && !comp->curr->has_nonseamless;
1713 }
1714 
1715 VkPipeline
zink_get_compute_pipeline(struct zink_screen * screen,struct zink_compute_program * comp,struct zink_compute_pipeline_state * state)1716 zink_get_compute_pipeline(struct zink_screen *screen,
1717                       struct zink_compute_program *comp,
1718                       struct zink_compute_pipeline_state *state)
1719 {
1720    struct hash_entry *entry = NULL;
1721    struct compute_pipeline_cache_entry *cache_entry;
1722 
1723    if (!state->dirty && !state->module_changed)
1724       return state->pipeline;
1725    if (state->dirty) {
1726       if (state->pipeline) //avoid on first hash
1727          state->final_hash ^= state->hash;
1728       if (comp->use_local_size)
1729          state->hash = hash_compute_pipeline_state_local_size(state);
1730       else
1731          state->hash = hash_compute_pipeline_state(state);
1732       state->dirty = false;
1733       state->final_hash ^= state->hash;
1734    }
1735 
1736    util_queue_fence_wait(&comp->base.cache_fence);
1737    if (comp->base_pipeline && compute_can_shortcut(comp)) {
1738       state->pipeline = comp->base_pipeline;
1739       return state->pipeline;
1740    }
1741    entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
1742 
1743    if (!entry) {
1744       simple_mtx_lock(&comp->cache_lock);
1745       entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state);
1746       if (entry) {
1747          simple_mtx_unlock(&comp->cache_lock);
1748          goto out;
1749       }
1750       VkPipeline pipeline = zink_create_compute_pipeline(screen, comp, state);
1751 
1752       if (pipeline == VK_NULL_HANDLE) {
1753          simple_mtx_unlock(&comp->cache_lock);
1754          return VK_NULL_HANDLE;
1755       }
1756 
1757       zink_screen_update_pipeline_cache(screen, &comp->base, false);
1758       if (compute_can_shortcut(comp)) {
1759          simple_mtx_unlock(&comp->cache_lock);
1760          /* don't add base pipeline to cache */
1761          state->pipeline = comp->base_pipeline = pipeline;
1762          return state->pipeline;
1763       }
1764 
1765       struct compute_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(compute_pipeline_cache_entry);
1766       if (!pc_entry) {
1767          simple_mtx_unlock(&comp->cache_lock);
1768          return VK_NULL_HANDLE;
1769       }
1770 
1771       memcpy(&pc_entry->state, state, sizeof(*state));
1772       pc_entry->pipeline = pipeline;
1773 
1774       entry = _mesa_hash_table_insert_pre_hashed(&comp->pipelines, state->final_hash, pc_entry, pc_entry);
1775       assert(entry);
1776       simple_mtx_unlock(&comp->cache_lock);
1777    }
1778 out:
1779    cache_entry = entry->data;
1780    state->pipeline = cache_entry->pipeline;
1781    return state->pipeline;
1782 }
1783 
1784 static void
bind_gfx_stage(struct zink_context * ctx,gl_shader_stage stage,struct zink_shader * shader)1785 bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader)
1786 {
1787    if (shader && shader->info.num_inlinable_uniforms)
1788       ctx->shader_has_inlinable_uniforms_mask |= 1 << stage;
1789    else
1790       ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage);
1791 
1792    if (ctx->gfx_stages[stage])
1793       ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1794 
1795    if (stage == MESA_SHADER_GEOMETRY && ctx->is_generated_gs_bound && (!shader || !shader->non_fs.parent)) {
1796       ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY);
1797       ctx->is_generated_gs_bound = false;
1798    }
1799 
1800    ctx->gfx_stages[stage] = shader;
1801    ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX];
1802    ctx->gfx_pipeline_state.modules_changed = true;
1803    if (shader) {
1804       ctx->shader_stages |= BITFIELD_BIT(stage);
1805       ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash;
1806    } else {
1807       ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE;
1808       if (ctx->curr_program)
1809          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
1810       ctx->curr_program = NULL;
1811       ctx->shader_stages &= ~BITFIELD_BIT(stage);
1812    }
1813 }
1814 
1815 static enum mesa_prim
gs_output_to_reduced_prim_type(struct shader_info * info)1816 gs_output_to_reduced_prim_type(struct shader_info *info)
1817 {
1818    switch (info->gs.output_primitive) {
1819    case MESA_PRIM_POINTS:
1820       return MESA_PRIM_POINTS;
1821 
1822    case MESA_PRIM_LINES:
1823    case MESA_PRIM_LINE_LOOP:
1824    case MESA_PRIM_LINE_STRIP:
1825    case MESA_PRIM_LINES_ADJACENCY:
1826    case MESA_PRIM_LINE_STRIP_ADJACENCY:
1827       return MESA_PRIM_LINES;
1828 
1829    case MESA_PRIM_TRIANGLES:
1830    case MESA_PRIM_TRIANGLE_STRIP:
1831    case MESA_PRIM_TRIANGLE_FAN:
1832    case MESA_PRIM_TRIANGLES_ADJACENCY:
1833    case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
1834       return MESA_PRIM_TRIANGLES;
1835 
1836    default:
1837       unreachable("unexpected output primitive type");
1838    }
1839 }
1840 
1841 static enum mesa_prim
update_rast_prim(struct zink_shader * shader)1842 update_rast_prim(struct zink_shader *shader)
1843 {
1844    struct shader_info *info = &shader->info;
1845    if (info->stage == MESA_SHADER_GEOMETRY)
1846       return gs_output_to_reduced_prim_type(info);
1847    else if (info->stage == MESA_SHADER_TESS_EVAL) {
1848       if (info->tess.point_mode)
1849          return MESA_PRIM_POINTS;
1850       else {
1851          switch (info->tess._primitive_mode) {
1852          case TESS_PRIMITIVE_ISOLINES:
1853             return MESA_PRIM_LINES;
1854          case TESS_PRIMITIVE_TRIANGLES:
1855          case TESS_PRIMITIVE_QUADS:
1856             return MESA_PRIM_TRIANGLES;
1857          default:
1858             return MESA_PRIM_COUNT;
1859          }
1860       }
1861    }
1862    return MESA_PRIM_COUNT;
1863 }
1864 
1865 static void
unbind_generated_gs(struct zink_context * ctx,gl_shader_stage stage,struct zink_shader * prev_shader)1866 unbind_generated_gs(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader)
1867 {
1868    if (prev_shader->non_fs.is_generated)
1869       ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY);
1870 
1871    if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
1872        ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.parent ==
1873        prev_shader) {
1874       bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
1875    }
1876 }
1877 
1878 static void
bind_last_vertex_stage(struct zink_context * ctx,gl_shader_stage stage,struct zink_shader * prev_shader)1879 bind_last_vertex_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader)
1880 {
1881    if (prev_shader && stage < MESA_SHADER_GEOMETRY)
1882       unbind_generated_gs(ctx, stage, prev_shader);
1883 
1884    gl_shader_stage old = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_STAGES;
1885    if (ctx->gfx_stages[MESA_SHADER_GEOMETRY])
1886       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1887    else if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
1888       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
1889    else
1890       ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_VERTEX];
1891    gl_shader_stage current = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_VERTEX;
1892 
1893    /* update rast_prim */
1894    ctx->gfx_pipeline_state.shader_rast_prim =
1895       ctx->last_vertex_stage ? update_rast_prim(ctx->last_vertex_stage) :
1896                                MESA_PRIM_COUNT;
1897 
1898    if (old != current) {
1899       if (!zink_screen(ctx->base.screen)->optimal_keys) {
1900          if (old != MESA_SHADER_STAGES) {
1901             memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1902             ctx->dirty_gfx_stages |= BITFIELD_BIT(old);
1903          } else {
1904             /* always unset vertex shader values when changing to a non-vs last stage */
1905             memset(&ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base));
1906          }
1907       }
1908 
1909       unsigned num_viewports = ctx->vp_state.num_viewports;
1910       struct zink_screen *screen = zink_screen(ctx->base.screen);
1911       /* number of enabled viewports is based on whether last vertex stage writes viewport index */
1912       if (ctx->last_vertex_stage) {
1913          if (ctx->last_vertex_stage->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK))
1914             ctx->vp_state.num_viewports = MIN2(screen->info.props.limits.maxViewports, PIPE_MAX_VIEWPORTS);
1915          else
1916             ctx->vp_state.num_viewports = 1;
1917       } else {
1918          ctx->vp_state.num_viewports = 1;
1919       }
1920       ctx->vp_state_changed |= num_viewports != ctx->vp_state.num_viewports;
1921       if (!screen->info.have_EXT_extended_dynamic_state) {
1922          if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports)
1923             ctx->gfx_pipeline_state.dirty = true;
1924          ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports;
1925       }
1926       ctx->last_vertex_stage_dirty = true;
1927    }
1928 }
1929 
1930 static void
zink_bind_vs_state(struct pipe_context * pctx,void * cso)1931 zink_bind_vs_state(struct pipe_context *pctx,
1932                    void *cso)
1933 {
1934    struct zink_context *ctx = zink_context(pctx);
1935    if (!cso && !ctx->gfx_stages[MESA_SHADER_VERTEX])
1936       return;
1937    struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_VERTEX];
1938    bind_gfx_stage(ctx, MESA_SHADER_VERTEX, cso);
1939    bind_last_vertex_stage(ctx, MESA_SHADER_VERTEX, prev_shader);
1940    if (cso) {
1941       struct zink_shader *zs = cso;
1942       ctx->shader_reads_drawid = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
1943       ctx->shader_reads_basevertex = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
1944    } else {
1945       ctx->shader_reads_drawid = false;
1946       ctx->shader_reads_basevertex = false;
1947    }
1948 }
1949 
1950 /* if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count:
1951  * in GL, samples==1 means ignore gl_SampleMask[]
1952  * in VK, gl_SampleMask[] is never ignored
1953  */
1954 void
zink_update_fs_key_samples(struct zink_context * ctx)1955 zink_update_fs_key_samples(struct zink_context *ctx)
1956 {
1957    if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1958       return;
1959    if (zink_shader_uses_samples(ctx->gfx_stages[MESA_SHADER_FRAGMENT])) {
1960       bool samples = zink_get_fs_base_key(ctx)->samples;
1961       if (samples != (ctx->fb_state.samples > 1))
1962          zink_set_fs_base_key(ctx)->samples = ctx->fb_state.samples > 1;
1963    }
1964 }
1965 
zink_update_gs_key_rectangular_line(struct zink_context * ctx)1966 void zink_update_gs_key_rectangular_line(struct zink_context *ctx)
1967 {
1968    bool line_rectangular = zink_get_gs_key(ctx)->line_rectangular;
1969    if (line_rectangular != ctx->rast_state->base.line_rectangular)
1970       zink_set_gs_key(ctx)->line_rectangular = ctx->rast_state->base.line_rectangular;
1971 }
1972 
1973 static void
zink_bind_fs_state(struct pipe_context * pctx,void * cso)1974 zink_bind_fs_state(struct pipe_context *pctx,
1975                    void *cso)
1976 {
1977    struct zink_context *ctx = zink_context(pctx);
1978    if (!cso && !ctx->gfx_stages[MESA_SHADER_FRAGMENT])
1979       return;
1980    if (ctx->disable_fs && !ctx->disable_color_writes && cso != ctx->null_fs) {
1981       ctx->saved_fs = cso;
1982       zink_set_null_fs(ctx);
1983       return;
1984    }
1985    bool writes_cbuf0 = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? (ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0 : true;
1986    unsigned shadow_mask = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask : 0;
1987    bind_gfx_stage(ctx, MESA_SHADER_FRAGMENT, cso);
1988    ctx->fbfetch_outputs = 0;
1989    if (cso) {
1990       shader_info *info = &ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info;
1991       bool new_writes_cbuf0 = (info->outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0;
1992       if (ctx->gfx_pipeline_state.blend_state && ctx->gfx_pipeline_state.blend_state->alpha_to_coverage &&
1993           writes_cbuf0 != new_writes_cbuf0 && zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state3) {
1994          ctx->blend_state_changed = true;
1995          ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_A2C);
1996       }
1997       if (info->fs.uses_fbfetch_output) {
1998          if (info->outputs_read & (BITFIELD_BIT(FRAG_RESULT_DEPTH) | BITFIELD_BIT(FRAG_RESULT_STENCIL)))
1999             ctx->fbfetch_outputs |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS);
2000          ctx->fbfetch_outputs |= info->outputs_read >> FRAG_RESULT_DATA0;
2001       }
2002       zink_update_fs_key_samples(ctx);
2003       if (zink_screen(pctx->screen)->info.have_EXT_rasterization_order_attachment_access) {
2004          if (ctx->gfx_pipeline_state.rast_attachment_order != info->fs.uses_fbfetch_output)
2005             ctx->gfx_pipeline_state.dirty = true;
2006          ctx->gfx_pipeline_state.rast_attachment_order = info->fs.uses_fbfetch_output;
2007       }
2008       zink_set_zs_needs_shader_swizzle_key(ctx, MESA_SHADER_FRAGMENT, false);
2009       if (shadow_mask != ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask &&
2010           !zink_screen(pctx->screen)->driver_compiler_workarounds.needs_zs_shader_swizzle)
2011          zink_update_shadow_samplerviews(ctx, shadow_mask | ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask);
2012       if (!ctx->track_renderpasses && !ctx->blitting)
2013          ctx->rp_tc_info_updated = true;
2014    }
2015    zink_update_fbfetch(ctx);
2016 }
2017 
2018 static void
zink_bind_gs_state(struct pipe_context * pctx,void * cso)2019 zink_bind_gs_state(struct pipe_context *pctx,
2020                    void *cso)
2021 {
2022    struct zink_context *ctx = zink_context(pctx);
2023    if (!cso && !ctx->gfx_stages[MESA_SHADER_GEOMETRY])
2024       return;
2025    bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, cso);
2026    bind_last_vertex_stage(ctx, MESA_SHADER_GEOMETRY, NULL);
2027 }
2028 
2029 static void
zink_bind_tcs_state(struct pipe_context * pctx,void * cso)2030 zink_bind_tcs_state(struct pipe_context *pctx,
2031                    void *cso)
2032 {
2033    bind_gfx_stage(zink_context(pctx), MESA_SHADER_TESS_CTRL, cso);
2034 }
2035 
2036 static void
zink_bind_tes_state(struct pipe_context * pctx,void * cso)2037 zink_bind_tes_state(struct pipe_context *pctx,
2038                    void *cso)
2039 {
2040    struct zink_context *ctx = zink_context(pctx);
2041    if (!cso && !ctx->gfx_stages[MESA_SHADER_TESS_EVAL])
2042       return;
2043    if (!!ctx->gfx_stages[MESA_SHADER_TESS_EVAL] != !!cso) {
2044       if (!cso) {
2045          /* if unsetting a TESS that uses a generated TCS, ensure the TCS is unset */
2046          if (ctx->gfx_stages[MESA_SHADER_TESS_CTRL] == ctx->gfx_stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs)
2047             ctx->gfx_stages[MESA_SHADER_TESS_CTRL] = NULL;
2048       }
2049    }
2050    struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_TESS_EVAL];
2051    bind_gfx_stage(ctx, MESA_SHADER_TESS_EVAL, cso);
2052    bind_last_vertex_stage(ctx, MESA_SHADER_TESS_EVAL, prev_shader);
2053 }
2054 
2055 static void *
zink_create_cs_state(struct pipe_context * pctx,const struct pipe_compute_state * shader)2056 zink_create_cs_state(struct pipe_context *pctx,
2057                      const struct pipe_compute_state *shader)
2058 {
2059    struct nir_shader *nir;
2060    if (shader->ir_type != PIPE_SHADER_IR_NIR)
2061       nir = zink_tgsi_to_nir(pctx->screen, shader->prog);
2062    else
2063       nir = (struct nir_shader *)shader->prog;
2064 
2065    if (nir->info.uses_bindless)
2066       zink_descriptors_init_bindless(zink_context(pctx));
2067 
2068    return create_compute_program(zink_context(pctx), nir);
2069 }
2070 
2071 static void
zink_bind_cs_state(struct pipe_context * pctx,void * cso)2072 zink_bind_cs_state(struct pipe_context *pctx,
2073                    void *cso)
2074 {
2075    struct zink_context *ctx = zink_context(pctx);
2076    struct zink_compute_program *comp = cso;
2077    if (comp && comp->num_inlinable_uniforms)
2078       ctx->shader_has_inlinable_uniforms_mask |= 1 << MESA_SHADER_COMPUTE;
2079    else
2080       ctx->shader_has_inlinable_uniforms_mask &= ~(1 << MESA_SHADER_COMPUTE);
2081 
2082    if (ctx->curr_compute) {
2083       zink_batch_reference_program(ctx, &ctx->curr_compute->base);
2084       ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
2085       ctx->compute_pipeline_state.module = VK_NULL_HANDLE;
2086       ctx->compute_pipeline_state.module_hash = 0;
2087    }
2088    ctx->compute_pipeline_state.dirty = true;
2089    ctx->curr_compute = comp;
2090    if (comp && comp != ctx->curr_compute) {
2091       ctx->compute_pipeline_state.module_hash = ctx->curr_compute->curr->hash;
2092       if (util_queue_fence_is_signalled(&comp->base.cache_fence))
2093          ctx->compute_pipeline_state.module = ctx->curr_compute->curr->obj.mod;
2094       ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash;
2095       if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask)
2096          ctx->compute_dirty = true;
2097    }
2098    zink_select_launch_grid(ctx);
2099 }
2100 
2101 static void
zink_get_compute_state_info(struct pipe_context * pctx,void * cso,struct pipe_compute_state_object_info * info)2102 zink_get_compute_state_info(struct pipe_context *pctx, void *cso, struct pipe_compute_state_object_info *info)
2103 {
2104    struct zink_compute_program *comp = cso;
2105    struct zink_screen *screen = zink_screen(pctx->screen);
2106 
2107    info->max_threads = screen->info.props.limits.maxComputeWorkGroupInvocations;
2108    info->private_memory = comp->scratch_size;
2109    if (screen->info.props11.subgroupSize) {
2110       info->preferred_simd_size = screen->info.props11.subgroupSize;
2111       info->simd_sizes = info->preferred_simd_size;
2112    } else {
2113       // just guess it
2114       info->preferred_simd_size = 64;
2115       // only used for actual subgroup support
2116       info->simd_sizes = 0;
2117    }
2118 }
2119 
2120 static void
zink_delete_cs_shader_state(struct pipe_context * pctx,void * cso)2121 zink_delete_cs_shader_state(struct pipe_context *pctx, void *cso)
2122 {
2123    struct zink_compute_program *comp = cso;
2124    zink_compute_program_reference(zink_screen(pctx->screen), &comp, NULL);
2125 }
2126 
2127 /* caller must lock prog->libs->lock */
2128 struct zink_gfx_library_key *
zink_create_pipeline_lib(struct zink_screen * screen,struct zink_gfx_program * prog,struct zink_gfx_pipeline_state * state)2129 zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state)
2130 {
2131    struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key);
2132    if (!gkey) {
2133       mesa_loge("ZINK: failed to allocate gkey!");
2134       return NULL;
2135    }
2136 
2137    gkey->optimal_key = state->optimal_key;
2138    assert(gkey->optimal_key);
2139    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++)
2140       gkey->modules[i] = prog->objs[i].mod;
2141    gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog);
2142    _mesa_set_add(&prog->libs->libs, gkey);
2143    return gkey;
2144 }
2145 
2146 static const char *
print_exe_stages(VkShaderStageFlags stages)2147 print_exe_stages(VkShaderStageFlags stages)
2148 {
2149    if (stages == VK_SHADER_STAGE_VERTEX_BIT)
2150       return "VS";
2151    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
2152       return "VS+GS";
2153    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
2154       return "VS+TCS+TES";
2155    if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT))
2156       return "VS+TCS+TES+GS";
2157    if (stages == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
2158       return "TCS";
2159    if (stages == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2160       return "TES";
2161    if (stages == VK_SHADER_STAGE_GEOMETRY_BIT)
2162       return "GS";
2163    if (stages == VK_SHADER_STAGE_FRAGMENT_BIT)
2164       return "FS";
2165    if (stages == VK_SHADER_STAGE_COMPUTE_BIT)
2166       return "CS";
2167    unreachable("unhandled combination of stages!");
2168 }
2169 
2170 static void
gfx_program_precompile_job(void * data,void * gdata,int thread_index)2171 gfx_program_precompile_job(void *data, void *gdata, int thread_index)
2172 {
2173    struct zink_screen *screen = gdata;
2174    struct zink_gfx_program *prog = data;
2175 
2176    /* this is threadsafe */
2177    gfx_program_init(prog->base.ctx, prog);
2178 
2179    struct zink_gfx_pipeline_state state = {0};
2180    state.shader_keys_optimal.key.vs_base.last_vertex_stage = true;
2181    state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard
2182    state.optimal_key = state.shader_keys_optimal.key.val;
2183    generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
2184    zink_screen_get_pipeline_cache(screen, &prog->base, true);
2185    if (!screen->info.have_EXT_shader_object) {
2186       simple_mtx_lock(&prog->libs->lock);
2187       zink_create_pipeline_lib(screen, prog, &state);
2188       simple_mtx_unlock(&prog->libs->lock);
2189    }
2190    zink_screen_update_pipeline_cache(screen, &prog->base, true);
2191 }
2192 
2193 static void
zink_link_gfx_shader(struct pipe_context * pctx,void ** shaders)2194 zink_link_gfx_shader(struct pipe_context *pctx, void **shaders)
2195 {
2196    struct zink_context *ctx = zink_context(pctx);
2197    struct zink_shader **zshaders = (struct zink_shader **)shaders;
2198    if (shaders[MESA_SHADER_COMPUTE])
2199       return;
2200    /* explicitly block sample shading: this needs full pipelines always */
2201    if (zshaders[MESA_SHADER_FRAGMENT] && zshaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading)
2202       return;
2203    /* can't precompile fixedfunc */
2204    if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT]) {
2205       /* handled directly from shader create */
2206       return;
2207    }
2208    unsigned hash = 0;
2209    unsigned shader_stages = 0;
2210    for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
2211       if (zshaders[i]) {
2212          hash ^= zshaders[i]->hash;
2213          shader_stages |= BITFIELD_BIT(i);
2214       }
2215    }
2216    unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
2217    unsigned tess = shader_stages & tess_stages;
2218    /* can't do fixedfunc tes either */
2219    if (tess && !shaders[MESA_SHADER_TESS_EVAL])
2220       return;
2221    struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)];
2222    simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2223    /* link can be called repeatedly with the same shaders: ignore */
2224    if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) {
2225       simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2226       return;
2227    }
2228    struct zink_gfx_program *prog = gfx_program_create(ctx, zshaders, 3, hash);
2229    u_foreach_bit(i, shader_stages)
2230       assert(prog->shaders[i]);
2231    _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
2232    prog->base.removed = false;
2233    simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]);
2234    if (zink_debug & ZINK_DEBUG_SHADERDB) {
2235       struct zink_screen *screen = zink_screen(pctx->screen);
2236       gfx_program_init(ctx, prog);
2237       if (screen->optimal_keys)
2238          generate_gfx_program_modules_optimal(ctx, screen,  prog, &ctx->gfx_pipeline_state);
2239       else
2240          generate_gfx_program_modules(ctx, screen,  prog, &ctx->gfx_pipeline_state);
2241       VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, &ctx->gfx_pipeline_state,
2242                                                      ctx->gfx_pipeline_state.element_state->binding_map,
2243                                                      shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true);
2244       print_pipeline_stats(screen, pipeline, &ctx->dbg);
2245       VKSCR(DestroyPipeline)(screen->dev, pipeline, NULL);
2246    } else {
2247       if (zink_screen(pctx->screen)->info.have_EXT_shader_object)
2248          prog->base.uses_shobj = !BITSET_TEST(zshaders[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
2249       if (zink_debug & ZINK_DEBUG_NOBGC)
2250          gfx_program_precompile_job(prog, pctx->screen, 0);
2251       else
2252          util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, gfx_program_precompile_job, NULL, 0);
2253    }
2254 }
2255 
2256 void
zink_delete_shader_state(struct pipe_context * pctx,void * cso)2257 zink_delete_shader_state(struct pipe_context *pctx, void *cso)
2258 {
2259    zink_gfx_shader_free(zink_screen(pctx->screen), cso);
2260 }
2261 
2262 static void
precompile_separate_shader(struct zink_shader * zs,struct zink_screen * screen)2263 precompile_separate_shader(struct zink_shader *zs, struct zink_screen *screen)
2264 {
2265    zs->precompile.obj = zink_shader_compile_separate(screen, zs);
2266    if (!screen->info.have_EXT_shader_object) {
2267       struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT] = {0};
2268       objs[zs->info.stage].mod = zs->precompile.obj.mod;
2269       zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, objs, zs->precompile.layout, zs->info.stage);
2270    }
2271 }
2272 
2273 static void
gfx_shader_init_job(void * data,void * gdata,int thread_index)2274 gfx_shader_init_job(void *data, void *gdata, int thread_index)
2275 {
2276    struct zink_screen *screen = gdata;
2277    struct zink_shader *zs = data;
2278 
2279    zink_shader_init(screen, zs);
2280 
2281    if (zink_debug & ZINK_DEBUG_NOPC) {
2282       ralloc_free(zs->nir);
2283       zs->nir = NULL;
2284       return;
2285    }
2286    if (zs->info.separate_shader && zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB &&
2287       (screen->info.have_EXT_shader_object ||
2288       (screen->info.have_EXT_graphics_pipeline_library && (zs->info.stage == MESA_SHADER_FRAGMENT || zs->info.stage == MESA_SHADER_VERTEX)))) {
2289       /* sample shading can't precompile */
2290       if (zs->info.stage != MESA_SHADER_FRAGMENT || !zs->info.fs.uses_sample_shading)
2291          precompile_separate_shader(zs, screen);
2292    }
2293    ralloc_free(zs->nir);
2294    zs->nir = NULL;
2295 }
2296 
2297 void *
zink_create_gfx_shader_state(struct pipe_context * pctx,const struct pipe_shader_state * shader)2298 zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
2299 {
2300    struct zink_screen *screen = zink_screen(pctx->screen);
2301    nir_shader *nir;
2302    if (shader->type != PIPE_SHADER_IR_NIR)
2303       nir = zink_tgsi_to_nir(pctx->screen, shader->tokens);
2304    else
2305       nir = (struct nir_shader *)shader->ir.nir;
2306 
2307    if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output)
2308       zink_descriptor_util_init_fbfetch(zink_context(pctx));
2309    if (nir->info.uses_bindless)
2310       zink_descriptors_init_bindless(zink_context(pctx));
2311 
2312    struct zink_shader *zs = zink_shader_create(zink_screen(pctx->screen), nir);
2313    if (zink_debug & ZINK_DEBUG_NOBGC)
2314       gfx_shader_init_job(zs, screen, 0);
2315    else
2316       util_queue_add_job(&screen->cache_get_thread, zs, &zs->precompile.fence, gfx_shader_init_job, NULL, 0);
2317 
2318    return zs;
2319 }
2320 
2321 static void
zink_delete_cached_shader_state(struct pipe_context * pctx,void * cso)2322 zink_delete_cached_shader_state(struct pipe_context *pctx, void *cso)
2323 {
2324    struct zink_screen *screen = zink_screen(pctx->screen);
2325    util_shader_reference(pctx, &screen->shaders, &cso, NULL);
2326 }
2327 
2328 static void *
zink_create_cached_shader_state(struct pipe_context * pctx,const struct pipe_shader_state * shader)2329 zink_create_cached_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader)
2330 {
2331    bool cache_hit;
2332    struct zink_screen *screen = zink_screen(pctx->screen);
2333    return util_live_shader_cache_get(pctx, &screen->shaders, shader, &cache_hit);
2334 }
2335 
2336 
2337 void
zink_program_init(struct zink_context * ctx)2338 zink_program_init(struct zink_context *ctx)
2339 {
2340    ctx->base.create_vs_state = zink_create_cached_shader_state;
2341    ctx->base.bind_vs_state = zink_bind_vs_state;
2342    ctx->base.delete_vs_state = zink_delete_cached_shader_state;
2343 
2344    ctx->base.create_fs_state = zink_create_cached_shader_state;
2345    ctx->base.bind_fs_state = zink_bind_fs_state;
2346    ctx->base.delete_fs_state = zink_delete_cached_shader_state;
2347 
2348    ctx->base.create_gs_state = zink_create_cached_shader_state;
2349    ctx->base.bind_gs_state = zink_bind_gs_state;
2350    ctx->base.delete_gs_state = zink_delete_cached_shader_state;
2351 
2352    ctx->base.create_tcs_state = zink_create_cached_shader_state;
2353    ctx->base.bind_tcs_state = zink_bind_tcs_state;
2354    ctx->base.delete_tcs_state = zink_delete_cached_shader_state;
2355 
2356    ctx->base.create_tes_state = zink_create_cached_shader_state;
2357    ctx->base.bind_tes_state = zink_bind_tes_state;
2358    ctx->base.delete_tes_state = zink_delete_cached_shader_state;
2359 
2360    ctx->base.create_compute_state = zink_create_cs_state;
2361    ctx->base.bind_compute_state = zink_bind_cs_state;
2362    ctx->base.get_compute_state_info = zink_get_compute_state_info;
2363    ctx->base.delete_compute_state = zink_delete_cs_shader_state;
2364 
2365    if (zink_screen(ctx->base.screen)->info.have_EXT_vertex_input_dynamic_state)
2366       _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input_dynamic, equals_gfx_input_dynamic);
2367    else
2368       _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input, equals_gfx_input);
2369    if (zink_screen(ctx->base.screen)->have_full_ds3)
2370       _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output_ds3, equals_gfx_output_ds3);
2371    else
2372       _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output, equals_gfx_output);
2373    /* validate struct packing */
2374    STATIC_ASSERT(offsetof(struct zink_gfx_output_key, sample_mask) == sizeof(uint32_t));
2375    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_pipeline_state, input) ==
2376                  offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_input_key, input));
2377    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_strides) - offsetof(struct zink_gfx_pipeline_state, input) ==
2378                  offsetof(struct zink_gfx_input_key, vertex_strides) - offsetof(struct zink_gfx_input_key, input));
2379    STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, element_state) - offsetof(struct zink_gfx_pipeline_state, input) ==
2380                  offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input));
2381 
2382    STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t));
2383 
2384    /* no precompile at all */
2385    if (zink_debug & ZINK_DEBUG_NOPC)
2386       return;
2387 
2388    struct zink_screen *screen = zink_screen(ctx->base.screen);
2389    if (screen->info.have_EXT_graphics_pipeline_library || screen->info.have_EXT_shader_object || zink_debug & ZINK_DEBUG_SHADERDB)
2390       ctx->base.link_shader = zink_link_gfx_shader;
2391 }
2392 
2393 bool
zink_set_rasterizer_discard(struct zink_context * ctx,bool disable)2394 zink_set_rasterizer_discard(struct zink_context *ctx, bool disable)
2395 {
2396    bool value = disable ? false : (ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false);
2397    bool changed = ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard != value;
2398    ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard = value;
2399    if (!changed)
2400       return false;
2401    if (!zink_screen(ctx->base.screen)->info.have_EXT_extended_dynamic_state2)
2402       ctx->gfx_pipeline_state.dirty |= true;
2403    ctx->rasterizer_discard_changed = true;
2404    return true;
2405 }
2406 
2407 void
zink_driver_thread_add_job(struct pipe_screen * pscreen,void * data,struct util_queue_fence * fence,pipe_driver_thread_func execute,pipe_driver_thread_func cleanup,const size_t job_size)2408 zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data,
2409                            struct util_queue_fence *fence,
2410                            pipe_driver_thread_func execute,
2411                            pipe_driver_thread_func cleanup,
2412                            const size_t job_size)
2413 {
2414    struct zink_screen *screen = zink_screen(pscreen);
2415    util_queue_add_job(&screen->cache_get_thread, data, fence, execute, cleanup, job_size);
2416 }
2417 
2418 static bool
has_edge_flags(struct zink_context * ctx)2419 has_edge_flags(struct zink_context *ctx)
2420 {
2421    switch(ctx->gfx_pipeline_state.gfx_prim_mode) {
2422    case MESA_PRIM_POINTS:
2423    case MESA_PRIM_LINE_STRIP:
2424    case MESA_PRIM_LINE_STRIP_ADJACENCY:
2425    case MESA_PRIM_LINES:
2426    case MESA_PRIM_LINE_LOOP:
2427    case MESA_PRIM_LINES_ADJACENCY:
2428    case MESA_PRIM_TRIANGLE_STRIP:
2429    case MESA_PRIM_TRIANGLE_FAN:
2430    case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
2431    case MESA_PRIM_QUAD_STRIP:
2432    case MESA_PRIM_PATCHES:
2433       return false;
2434    case MESA_PRIM_TRIANGLES:
2435    case MESA_PRIM_TRIANGLES_ADJACENCY:
2436    case MESA_PRIM_QUADS:
2437    case MESA_PRIM_POLYGON:
2438    case MESA_PRIM_COUNT:
2439    default:
2440       break;
2441    }
2442    return (ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES ||
2443            ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS) &&
2444           ctx->gfx_stages[MESA_SHADER_VERTEX]->has_edgeflags;
2445 }
2446 
2447 static enum zink_rast_prim
zink_rast_prim_for_pipe(enum mesa_prim prim)2448 zink_rast_prim_for_pipe(enum mesa_prim prim)
2449 {
2450    switch (prim) {
2451    case MESA_PRIM_POINTS:
2452       return ZINK_PRIM_POINTS;
2453    case MESA_PRIM_LINES:
2454       return ZINK_PRIM_LINES;
2455    case MESA_PRIM_TRIANGLES:
2456    default:
2457       return ZINK_PRIM_TRIANGLES;
2458    }
2459 }
2460 
2461 static enum mesa_prim
zink_tess_prim_type(struct zink_shader * tess)2462 zink_tess_prim_type(struct zink_shader *tess)
2463 {
2464    if (tess->info.tess.point_mode)
2465       return MESA_PRIM_POINTS;
2466    else {
2467       switch (tess->info.tess._primitive_mode) {
2468       case TESS_PRIMITIVE_ISOLINES:
2469          return MESA_PRIM_LINES;
2470       case TESS_PRIMITIVE_TRIANGLES:
2471       case TESS_PRIMITIVE_QUADS:
2472          return MESA_PRIM_TRIANGLES;
2473       default:
2474          return MESA_PRIM_COUNT;
2475       }
2476    }
2477 }
2478 
2479 static inline void
zink_add_inline_uniform(nir_shader * shader,int offset)2480 zink_add_inline_uniform(nir_shader *shader, int offset)
2481 {
2482    shader->info.inlinable_uniform_dw_offsets[shader->info.num_inlinable_uniforms] = offset;
2483    ++shader->info.num_inlinable_uniforms;
2484 }
2485 
2486 static unsigned
encode_lower_pv_mode(enum mesa_prim prim_type)2487 encode_lower_pv_mode(enum mesa_prim prim_type)
2488 {
2489    switch (prim_type) {
2490    case MESA_PRIM_TRIANGLE_STRIP:
2491    case MESA_PRIM_QUAD_STRIP:
2492       return ZINK_PVE_PRIMITIVE_TRISTRIP;
2493    case MESA_PRIM_TRIANGLE_FAN:
2494       return ZINK_PVE_PRIMITIVE_FAN;
2495    default:
2496       return ZINK_PVE_PRIMITIVE_SIMPLE;
2497    }
2498 }
2499 
2500 void
zink_set_primitive_emulation_keys(struct zink_context * ctx)2501 zink_set_primitive_emulation_keys(struct zink_context *ctx)
2502 {
2503    struct zink_screen *screen = zink_screen(ctx->base.screen);
2504    bool lower_line_stipple = false, lower_line_smooth = false;
2505    unsigned lower_pv_mode = 0;
2506    if (!screen->optimal_keys) {
2507       lower_line_stipple = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES &&
2508                                 screen->driver_workarounds.no_linestipple &&
2509                                 ctx->rast_state->base.line_stipple_enable &&
2510                                 !ctx->num_so_targets;
2511 
2512       bool lower_point_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS &&
2513                                 screen->driconf.emulate_point_smooth &&
2514                                 ctx->rast_state->base.point_smooth;
2515       if (zink_get_fs_key(ctx)->lower_line_stipple != lower_line_stipple) {
2516          assert(zink_get_gs_key(ctx)->lower_line_stipple ==
2517                 zink_get_fs_key(ctx)->lower_line_stipple);
2518          zink_set_fs_key(ctx)->lower_line_stipple = lower_line_stipple;
2519          zink_set_gs_key(ctx)->lower_line_stipple = lower_line_stipple;
2520       }
2521 
2522       lower_line_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES &&
2523                           screen->driver_workarounds.no_linesmooth &&
2524                           ctx->rast_state->base.line_smooth &&
2525                           !ctx->num_so_targets;
2526 
2527       if (zink_get_fs_key(ctx)->lower_line_smooth != lower_line_smooth) {
2528          assert(zink_get_gs_key(ctx)->lower_line_smooth ==
2529                 zink_get_fs_key(ctx)->lower_line_smooth);
2530          zink_set_fs_key(ctx)->lower_line_smooth = lower_line_smooth;
2531          zink_set_gs_key(ctx)->lower_line_smooth = lower_line_smooth;
2532       }
2533 
2534       if (zink_get_fs_key(ctx)->lower_point_smooth != lower_point_smooth) {
2535          zink_set_fs_key(ctx)->lower_point_smooth = lower_point_smooth;
2536       }
2537 
2538       lower_pv_mode = ctx->gfx_pipeline_state.dyn_state3.pv_last &&
2539                       !screen->info.have_EXT_provoking_vertex;
2540       if (lower_pv_mode)
2541          lower_pv_mode = encode_lower_pv_mode(ctx->gfx_pipeline_state.gfx_prim_mode);
2542 
2543       if (zink_get_gs_key(ctx)->lower_pv_mode != lower_pv_mode)
2544          zink_set_gs_key(ctx)->lower_pv_mode = lower_pv_mode;
2545    }
2546 
2547    bool lower_edge_flags = has_edge_flags(ctx);
2548 
2549    bool lower_quad_prim = ctx->gfx_pipeline_state.gfx_prim_mode == MESA_PRIM_QUADS;
2550 
2551    bool lower_filled_quad =  lower_quad_prim &&
2552       ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_TRIANGLES;
2553 
2554    if (lower_line_stipple || lower_line_smooth ||
2555        lower_edge_flags || lower_quad_prim ||
2556        lower_pv_mode || zink_get_gs_key(ctx)->lower_gl_point) {
2557       enum pipe_shader_type prev_vertex_stage =
2558          ctx->gfx_stages[MESA_SHADER_TESS_EVAL] ?
2559             MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2560       enum zink_rast_prim zink_prim_type =
2561          zink_rast_prim_for_pipe(ctx->gfx_pipeline_state.rast_prim);
2562 
2563       //when using transform feedback primitives must be tessellated
2564       lower_filled_quad |= lower_quad_prim && ctx->gfx_stages[prev_vertex_stage]->info.has_transform_feedback_varyings;
2565 
2566       if (!ctx->gfx_stages[MESA_SHADER_GEOMETRY] || (ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated &&
2567           ctx->gfx_stages[MESA_SHADER_GEOMETRY]->info.gs.input_primitive != ctx->gfx_pipeline_state.gfx_prim_mode)) {
2568 
2569          if (!ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]) {
2570             util_queue_fence_wait(&ctx->gfx_stages[prev_vertex_stage]->precompile.fence);
2571             nir_shader *prev_stage = zink_shader_deserialize(screen, ctx->gfx_stages[prev_vertex_stage]);
2572             nir_shader *nir;
2573             if (lower_filled_quad) {
2574                nir = zink_create_quads_emulation_gs(
2575                   &screen->nir_options,
2576                   prev_stage);
2577             } else {
2578                enum mesa_prim prim = ctx->gfx_pipeline_state.gfx_prim_mode;
2579                if (prev_vertex_stage == MESA_SHADER_TESS_EVAL)
2580                   prim = zink_tess_prim_type(ctx->gfx_stages[MESA_SHADER_TESS_EVAL]);
2581                nir = nir_create_passthrough_gs(
2582                   &screen->nir_options,
2583                   prev_stage,
2584                   prim,
2585                   ctx->gfx_pipeline_state.rast_prim,
2586                   lower_edge_flags,
2587                   lower_line_stipple || lower_quad_prim);
2588             }
2589             zink_lower_system_values_to_inlined_uniforms(nir);
2590 
2591             zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK);
2592             zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK+1);
2593             zink_add_inline_uniform(nir, ZINK_INLINE_VAL_PV_LAST_VERT);
2594             ralloc_free(prev_stage);
2595             struct zink_shader *shader = zink_shader_create(screen, nir);
2596             zink_shader_init(screen, shader);
2597             shader->needs_inlining = true;
2598             ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type] = shader;
2599             shader->non_fs.is_generated = true;
2600             shader->non_fs.parent = ctx->gfx_stages[prev_vertex_stage];
2601             shader->can_inline = true;
2602             memcpy(shader->sinfo.stride, ctx->gfx_stages[prev_vertex_stage]->sinfo.stride, sizeof(shader->sinfo.stride));
2603          }
2604 
2605          ctx->base.bind_gs_state(&ctx->base,
2606                                  ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]);
2607          ctx->is_generated_gs_bound = true;
2608       }
2609 
2610       ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 3,
2611                                         (uint32_t []){ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags,
2612                                                       ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags >> 32,
2613                                                       ctx->gfx_pipeline_state.dyn_state3.pv_last});
2614    } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] &&
2615               ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated)
2616          ctx->base.bind_gs_state(&ctx->base, NULL);
2617 }
2618