xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/lima/lima_program.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (c) 2017-2019 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "util/u_memory.h"
26 #include "util/ralloc.h"
27 #include "util/u_debug.h"
28 
29 #include "tgsi/tgsi_dump.h"
30 #include "compiler/nir/nir.h"
31 #include "compiler/nir/nir_serialize.h"
32 #include "nir/tgsi_to_nir.h"
33 
34 #include "pipe/p_state.h"
35 
36 #include "lima_screen.h"
37 #include "lima_context.h"
38 #include "lima_job.h"
39 #include "lima_program.h"
40 #include "lima_bo.h"
41 #include "lima_disk_cache.h"
42 
43 #include "ir/lima_ir.h"
44 
45 static const nir_shader_compiler_options vs_nir_options = {
46    .lower_ffma16 = true,
47    .lower_ffma32 = true,
48    .lower_ffma64 = true,
49    .lower_fpow = true,
50    .lower_ffract = true,
51    .lower_fdiv = true,
52    .lower_fmod = true,
53    .lower_fsqrt = true,
54    .lower_flrp32 = true,
55    .lower_flrp64 = true,
56    /* could be implemented by clamp */
57    .lower_fsat = true,
58    .lower_bitops = true,
59    .lower_sincos = true,
60    .lower_fceil = true,
61    .lower_insert_byte = true,
62    .lower_insert_word = true,
63    .force_indirect_unrolling = nir_var_all,
64    .force_indirect_unrolling_sampler = true,
65    .lower_varying_from_uniform = true,
66    .max_unroll_iterations = 32,
67    .no_integers = true,
68 };
69 
70 static const nir_shader_compiler_options fs_nir_options = {
71    .lower_ffma16 = true,
72    .lower_ffma32 = true,
73    .lower_ffma64 = true,
74    .lower_fpow = true,
75    .lower_fdiv = true,
76    .lower_fmod = true,
77    .lower_flrp32 = true,
78    .lower_flrp64 = true,
79    .lower_fsign = true,
80    .lower_fdot = true,
81    .lower_fdph = true,
82    .lower_insert_byte = true,
83    .lower_insert_word = true,
84    .lower_bitops = true,
85    .lower_vector_cmp = true,
86    .force_indirect_unrolling = (nir_var_shader_out | nir_var_function_temp),
87    .force_indirect_unrolling_sampler = true,
88    .lower_varying_from_uniform = true,
89    .max_unroll_iterations = 32,
90    .has_ddx_intrinsics = true,
91    .no_integers = true,
92 };
93 
94 const void *
lima_program_get_compiler_options(enum pipe_shader_type shader)95 lima_program_get_compiler_options(enum pipe_shader_type shader)
96 {
97    switch (shader) {
98    case PIPE_SHADER_VERTEX:
99       return &vs_nir_options;
100    case PIPE_SHADER_FRAGMENT:
101       return &fs_nir_options;
102    default:
103       return NULL;
104    }
105 }
106 
107 static int
type_size(const struct glsl_type * type,bool bindless)108 type_size(const struct glsl_type *type, bool bindless)
109 {
110    return glsl_count_attribute_slots(type, false);
111 }
112 
113 static void
lima_program_optimize_vs_nir(struct nir_shader * s)114 lima_program_optimize_vs_nir(struct nir_shader *s)
115 {
116    bool progress;
117 
118    NIR_PASS_V(s, nir_lower_viewport_transform);
119    NIR_PASS_V(s, nir_lower_point_size, 1.0f, 100.0f);
120    NIR_PASS_V(s, nir_lower_io,
121 	      nir_var_shader_in | nir_var_shader_out, type_size, 0);
122    NIR_PASS_V(s, nir_lower_load_const_to_scalar);
123    NIR_PASS_V(s, lima_nir_lower_uniform_to_scalar);
124    NIR_PASS_V(s, nir_lower_io_to_scalar,
125               nir_var_shader_in|nir_var_shader_out, NULL, NULL);
126 
127    do {
128       progress = false;
129 
130       NIR_PASS_V(s, nir_lower_vars_to_ssa);
131       NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
132       NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
133       NIR_PASS(progress, s, nir_copy_prop);
134       NIR_PASS(progress, s, nir_opt_remove_phis);
135       NIR_PASS(progress, s, nir_opt_dce);
136       NIR_PASS(progress, s, nir_opt_dead_cf);
137       NIR_PASS(progress, s, nir_opt_cse);
138       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
139       NIR_PASS(progress, s, nir_opt_algebraic);
140       NIR_PASS(progress, s, lima_nir_lower_ftrunc);
141       NIR_PASS(progress, s, nir_opt_constant_folding);
142       NIR_PASS(progress, s, nir_opt_undef);
143       NIR_PASS(progress, s, nir_lower_undef_to_zero);
144       NIR_PASS(progress, s, nir_opt_loop_unroll);
145       NIR_PASS(progress, s, nir_lower_undef_to_zero);
146    } while (progress);
147 
148    NIR_PASS_V(s, nir_lower_int_to_float);
149    /* int_to_float pass generates ftrunc, so lower it */
150    NIR_PASS(progress, s, lima_nir_lower_ftrunc);
151    NIR_PASS_V(s, nir_lower_bool_to_float, true);
152 
153    NIR_PASS_V(s, nir_copy_prop);
154    NIR_PASS_V(s, nir_opt_dce);
155    NIR_PASS_V(s, lima_nir_split_loads);
156    NIR_PASS_V(s, nir_convert_from_ssa, true);
157    NIR_PASS_V(s, nir_opt_dce);
158    NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
159    nir_sweep(s);
160 }
161 
162 static bool
lima_alu_to_scalar_filter_cb(const nir_instr * instr,const void * data)163 lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
164 {
165    if (instr->type != nir_instr_type_alu)
166       return false;
167 
168    nir_alu_instr *alu = nir_instr_as_alu(instr);
169    switch (alu->op) {
170    case nir_op_frcp:
171    /* nir_op_idiv is lowered to frcp by lower_int_to_floats which
172     * will be run later, so lower idiv here
173     */
174    case nir_op_idiv:
175    case nir_op_frsq:
176    case nir_op_flog2:
177    case nir_op_fexp2:
178    case nir_op_fsqrt:
179    case nir_op_fsin:
180    case nir_op_fcos:
181       return true;
182    default:
183       break;
184    }
185 
186    /* nir vec4 fcsel assumes that each component of the condition will be
187     * used to select the same component from the two options, but Utgard PP
188     * has only 1 component condition. If all condition components are not the
189     * same we need to lower it to scalar.
190     */
191    switch (alu->op) {
192    case nir_op_bcsel:
193    case nir_op_fcsel:
194       break;
195    default:
196       return false;
197    }
198 
199    int num_components = alu->def.num_components;
200 
201    uint8_t swizzle = alu->src[0].swizzle[0];
202 
203    for (int i = 1; i < num_components; i++)
204       if (alu->src[0].swizzle[i] != swizzle)
205          return true;
206 
207    return false;
208 }
209 
210 static bool
lima_vec_to_regs_filter_cb(const nir_instr * instr,unsigned writemask,const void * data)211 lima_vec_to_regs_filter_cb(const nir_instr *instr, unsigned writemask,
212                            const void *data)
213 {
214    assert(writemask > 0);
215    if (util_bitcount(writemask) == 1)
216       return true;
217 
218    return !lima_alu_to_scalar_filter_cb(instr, data);
219 }
220 
221 static void
lima_program_optimize_fs_nir(struct nir_shader * s,struct nir_lower_tex_options * tex_options)222 lima_program_optimize_fs_nir(struct nir_shader *s,
223                              struct nir_lower_tex_options *tex_options)
224 {
225    bool progress;
226 
227    NIR_PASS_V(s, nir_lower_fragcoord_wtrans);
228    NIR_PASS_V(s, nir_lower_io,
229 	      nir_var_shader_in | nir_var_shader_out, type_size, 0);
230    NIR_PASS_V(s, nir_lower_tex, tex_options);
231    NIR_PASS_V(s, lima_nir_lower_txp);
232 
233    do {
234       progress = false;
235       NIR_PASS(progress, s, nir_opt_vectorize, NULL, NULL);
236    } while (progress);
237 
238    do {
239       progress = false;
240 
241       NIR_PASS_V(s, nir_lower_vars_to_ssa);
242       NIR_PASS(progress, s, nir_lower_alu_to_scalar, lima_alu_to_scalar_filter_cb, NULL);
243       NIR_PASS(progress, s, nir_copy_prop);
244       NIR_PASS(progress, s, nir_opt_remove_phis);
245       NIR_PASS(progress, s, nir_opt_dce);
246       NIR_PASS(progress, s, nir_opt_dead_cf);
247       NIR_PASS(progress, s, nir_opt_cse);
248       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
249       NIR_PASS(progress, s, nir_opt_algebraic);
250       NIR_PASS(progress, s, nir_opt_constant_folding);
251       NIR_PASS(progress, s, nir_opt_undef);
252       NIR_PASS(progress, s, nir_opt_loop_unroll);
253       NIR_PASS(progress, s, lima_nir_split_load_input);
254    } while (progress);
255 
256    NIR_PASS_V(s, nir_lower_int_to_float);
257    NIR_PASS_V(s, nir_lower_bool_to_float, true);
258 
259    /* Some ops must be lowered after being converted from int ops,
260     * so re-run nir_opt_algebraic after int lowering. */
261    do {
262       progress = false;
263       NIR_PASS(progress, s, nir_opt_algebraic);
264    } while (progress);
265 
266    /* Must be run after optimization loop */
267    NIR_PASS_V(s, lima_nir_scale_trig);
268    NIR_PASS_V(s, lima_nir_ppir_algebraic_late);
269 
270    NIR_PASS_V(s, nir_copy_prop);
271    NIR_PASS_V(s, nir_opt_dce);
272 
273    NIR_PASS_V(s, nir_convert_from_ssa, true);
274    NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
275 
276    NIR_PASS_V(s, nir_move_vec_src_uses_to_dest, false);
277    NIR_PASS_V(s, nir_lower_vec_to_regs, lima_vec_to_regs_filter_cb, NULL);
278 
279    NIR_PASS_V(s, nir_opt_dce); /* clean up any new dead code from vec to movs */
280 
281    NIR_PASS_V(s, lima_nir_duplicate_load_uniforms);
282    NIR_PASS_V(s, lima_nir_duplicate_load_inputs);
283    NIR_PASS_V(s, lima_nir_duplicate_load_consts);
284 
285    NIR_PASS_V(s, nir_trivialize_registers);
286 
287    nir_sweep(s);
288 }
289 
290 static bool
lima_fs_compile_shader(struct lima_context * ctx,struct lima_fs_key * key,struct lima_fs_uncompiled_shader * ufs,struct lima_fs_compiled_shader * fs)291 lima_fs_compile_shader(struct lima_context *ctx,
292                        struct lima_fs_key *key,
293                        struct lima_fs_uncompiled_shader *ufs,
294                        struct lima_fs_compiled_shader *fs)
295 {
296    struct lima_screen *screen = lima_screen(ctx->base.screen);
297    nir_shader *nir = nir_shader_clone(fs, ufs->base.ir.nir);
298 
299    struct nir_lower_tex_options tex_options = {
300       .swizzle_result = ~0u,
301       .lower_invalid_implicit_lod = true,
302    };
303 
304    for (int i = 0; i < ARRAY_SIZE(key->tex); i++) {
305       for (int j = 0; j < 4; j++)
306          tex_options.swizzles[i][j] = key->tex[i].swizzle[j];
307    }
308 
309    lima_program_optimize_fs_nir(nir, &tex_options);
310 
311    if (lima_debug & LIMA_DEBUG_PP)
312       nir_print_shader(nir, stdout);
313 
314    if (!ppir_compile_nir(fs, nir, screen->pp_ra, &ctx->base.debug)) {
315       ralloc_free(nir);
316       return false;
317    }
318 
319    fs->state.uses_discard = nir->info.fs.uses_discard;
320    ralloc_free(nir);
321 
322    return true;
323 }
324 
325 static bool
lima_fs_upload_shader(struct lima_context * ctx,struct lima_fs_compiled_shader * fs)326 lima_fs_upload_shader(struct lima_context *ctx,
327                       struct lima_fs_compiled_shader *fs)
328 {
329    static const uint32_t pp_clear_program[] = {
330       PP_CLEAR_PROGRAM
331    };
332    int shader_size = sizeof(pp_clear_program);
333    void *shader = (void *)pp_clear_program;
334    struct lima_screen *screen = lima_screen(ctx->base.screen);
335 
336    if (fs->state.shader_size) {
337       shader_size = fs->state.shader_size;
338       shader = fs->shader;
339    }
340 
341    fs->bo = lima_bo_create(screen, shader_size, 0);
342    if (!fs->bo) {
343       fprintf(stderr, "lima: create fs shader bo fail\n");
344       return false;
345    }
346 
347    memcpy(lima_bo_map(fs->bo), shader, shader_size);
348 
349    return true;
350 }
351 
352 static struct lima_fs_compiled_shader *
lima_get_compiled_fs(struct lima_context * ctx,struct lima_fs_uncompiled_shader * ufs,struct lima_fs_key * key)353 lima_get_compiled_fs(struct lima_context *ctx,
354                      struct lima_fs_uncompiled_shader *ufs,
355                      struct lima_fs_key *key)
356 {
357    struct lima_screen *screen = lima_screen(ctx->base.screen);
358    struct hash_table *ht;
359    uint32_t key_size;
360 
361    ht = ctx->fs_cache;
362    key_size = sizeof(struct lima_fs_key);
363 
364    struct hash_entry *entry = _mesa_hash_table_search(ht, key);
365    if (entry)
366       return entry->data;
367 
368    /* Not on memory cache, try disk cache */
369    struct lima_fs_compiled_shader *fs =
370       lima_fs_disk_cache_retrieve(screen->disk_cache, key);
371 
372    if (!fs) {
373       /* Not on disk cache, compile and insert into disk cache*/
374       fs = rzalloc(NULL, struct lima_fs_compiled_shader);
375       if (!fs)
376          return NULL;
377 
378       if (!lima_fs_compile_shader(ctx, key, ufs, fs))
379          goto err;
380 
381       lima_fs_disk_cache_store(screen->disk_cache, key, fs);
382    }
383 
384    if (!lima_fs_upload_shader(ctx, fs))
385       goto err;
386 
387    ralloc_free(fs->shader);
388    fs->shader = NULL;
389 
390    /* Insert into memory cache */
391    struct lima_key *dup_key;
392    dup_key = rzalloc_size(fs, key_size);
393    memcpy(dup_key, key, key_size);
394    _mesa_hash_table_insert(ht, dup_key, fs);
395 
396    return fs;
397 
398 err:
399    ralloc_free(fs);
400    return NULL;
401 }
402 
403 static void *
lima_create_fs_state(struct pipe_context * pctx,const struct pipe_shader_state * cso)404 lima_create_fs_state(struct pipe_context *pctx,
405                      const struct pipe_shader_state *cso)
406 {
407    struct lima_context *ctx = lima_context(pctx);
408    struct lima_fs_uncompiled_shader *so = rzalloc(NULL, struct lima_fs_uncompiled_shader);
409 
410    if (!so)
411       return NULL;
412 
413    nir_shader *nir;
414    if (cso->type == PIPE_SHADER_IR_NIR)
415       /* The backend takes ownership of the NIR shader on state
416        * creation. */
417       nir = cso->ir.nir;
418    else {
419       assert(cso->type == PIPE_SHADER_IR_TGSI);
420 
421       nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
422    }
423 
424    so->base.type = PIPE_SHADER_IR_NIR;
425    so->base.ir.nir = nir;
426 
427    /* Serialize the NIR to a binary blob that we can hash for the disk
428     * cache.  Drop unnecessary information (like variable names)
429     * so the serialized NIR is smaller, and also to let us detect more
430     * isomorphic shaders when hashing, increasing cache hits.
431     */
432    struct blob blob;
433    blob_init(&blob);
434    nir_serialize(&blob, nir, true);
435    _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
436    blob_finish(&blob);
437 
438    if (lima_debug & LIMA_DEBUG_PRECOMPILE) {
439       /* Trigger initial compilation with default settings */
440       struct lima_fs_key key;
441       memset(&key, 0, sizeof(key));
442       memcpy(key.nir_sha1, so->nir_sha1, sizeof(so->nir_sha1));
443       for (int i = 0; i < ARRAY_SIZE(key.tex); i++) {
444          for (int j = 0; j < 4; j++)
445             key.tex[i].swizzle[j] = j;
446       }
447       lima_get_compiled_fs(ctx, so, &key);
448    }
449 
450    return so;
451 }
452 
453 static void
lima_bind_fs_state(struct pipe_context * pctx,void * hwcso)454 lima_bind_fs_state(struct pipe_context *pctx, void *hwcso)
455 {
456    struct lima_context *ctx = lima_context(pctx);
457 
458    ctx->uncomp_fs = hwcso;
459    ctx->dirty |= LIMA_CONTEXT_DIRTY_UNCOMPILED_FS;
460 }
461 
462 static void
lima_delete_fs_state(struct pipe_context * pctx,void * hwcso)463 lima_delete_fs_state(struct pipe_context *pctx, void *hwcso)
464 {
465    struct lima_context *ctx = lima_context(pctx);
466    struct lima_fs_uncompiled_shader *so = hwcso;
467 
468    hash_table_foreach(ctx->fs_cache, entry) {
469       const struct lima_fs_key *key = entry->key;
470       if (!memcmp(key->nir_sha1, so->nir_sha1, sizeof(so->nir_sha1))) {
471          struct lima_fs_compiled_shader *fs = entry->data;
472          _mesa_hash_table_remove(ctx->fs_cache, entry);
473          if (fs->bo)
474             lima_bo_unreference(fs->bo);
475 
476          if (fs == ctx->fs)
477             ctx->fs = NULL;
478 
479          ralloc_free(fs);
480       }
481    }
482 
483    ralloc_free(so->base.ir.nir);
484    ralloc_free(so);
485 }
486 
487 static bool
lima_vs_compile_shader(struct lima_context * ctx,struct lima_vs_key * key,struct lima_vs_uncompiled_shader * uvs,struct lima_vs_compiled_shader * vs)488 lima_vs_compile_shader(struct lima_context *ctx,
489                        struct lima_vs_key *key,
490                        struct lima_vs_uncompiled_shader *uvs,
491                        struct lima_vs_compiled_shader *vs)
492 {
493    nir_shader *nir = nir_shader_clone(vs, uvs->base.ir.nir);
494 
495    lima_program_optimize_vs_nir(nir);
496 
497    if (lima_debug & LIMA_DEBUG_GP)
498       nir_print_shader(nir, stdout);
499 
500    if (!gpir_compile_nir(vs, nir, &ctx->base.debug)) {
501       ralloc_free(nir);
502       return false;
503    }
504 
505    ralloc_free(nir);
506 
507    return true;
508 }
509 
510 static bool
lima_vs_upload_shader(struct lima_context * ctx,struct lima_vs_compiled_shader * vs)511 lima_vs_upload_shader(struct lima_context *ctx,
512                       struct lima_vs_compiled_shader *vs)
513 {
514    struct lima_screen *screen = lima_screen(ctx->base.screen);
515    vs->bo = lima_bo_create(screen, vs->state.shader_size, 0);
516    if (!vs->bo) {
517       fprintf(stderr, "lima: create vs shader bo fail\n");
518       return false;
519    }
520 
521    memcpy(lima_bo_map(vs->bo), vs->shader, vs->state.shader_size);
522 
523    return true;
524 }
525 
526 static struct lima_vs_compiled_shader *
lima_get_compiled_vs(struct lima_context * ctx,struct lima_vs_uncompiled_shader * uvs,struct lima_vs_key * key)527 lima_get_compiled_vs(struct lima_context *ctx,
528                      struct lima_vs_uncompiled_shader *uvs,
529                      struct lima_vs_key *key)
530 {
531    struct lima_screen *screen = lima_screen(ctx->base.screen);
532    struct hash_table *ht;
533    uint32_t key_size;
534 
535    ht = ctx->vs_cache;
536    key_size = sizeof(struct lima_vs_key);
537 
538    struct hash_entry *entry = _mesa_hash_table_search(ht, key);
539    if (entry)
540       return entry->data;
541 
542    /* Not on memory cache, try disk cache */
543    struct lima_vs_compiled_shader *vs =
544       lima_vs_disk_cache_retrieve(screen->disk_cache, key);
545 
546    if (!vs) {
547       /* Not on disk cache, compile and insert into disk cache */
548       vs = rzalloc(NULL, struct lima_vs_compiled_shader);
549       if (!vs)
550          return NULL;
551       if (!lima_vs_compile_shader(ctx, key, uvs, vs))
552          goto err;
553 
554       lima_vs_disk_cache_store(screen->disk_cache, key, vs);
555    }
556 
557    if (!lima_vs_upload_shader(ctx, vs))
558       goto err;
559 
560    ralloc_free(vs->shader);
561    vs->shader = NULL;
562 
563    struct lima_key *dup_key;
564    dup_key = rzalloc_size(vs, key_size);
565    memcpy(dup_key, key, key_size);
566    _mesa_hash_table_insert(ht, dup_key, vs);
567 
568    return vs;
569 
570 err:
571    ralloc_free(vs);
572    return NULL;
573 }
574 
575 bool
lima_update_vs_state(struct lima_context * ctx)576 lima_update_vs_state(struct lima_context *ctx)
577 {
578    if (!(ctx->dirty & LIMA_CONTEXT_DIRTY_UNCOMPILED_VS)) {
579       return true;
580    }
581 
582    struct lima_vs_key local_key;
583    struct lima_vs_key *key = &local_key;
584    memset(key, 0, sizeof(*key));
585    memcpy(key->nir_sha1, ctx->uncomp_vs->nir_sha1,
586           sizeof(ctx->uncomp_vs->nir_sha1));
587 
588    struct lima_vs_compiled_shader *old_vs = ctx->vs;
589    struct lima_vs_compiled_shader *vs = lima_get_compiled_vs(ctx,
590                                                              ctx->uncomp_vs,
591                                                              key);
592    if (!vs)
593       return false;
594 
595    ctx->vs = vs;
596 
597    if (ctx->vs != old_vs)
598       ctx->dirty |= LIMA_CONTEXT_DIRTY_COMPILED_VS;
599 
600    return true;
601 }
602 
603 bool
lima_update_fs_state(struct lima_context * ctx)604 lima_update_fs_state(struct lima_context *ctx)
605 {
606    if (!(ctx->dirty & (LIMA_CONTEXT_DIRTY_UNCOMPILED_FS |
607                        LIMA_CONTEXT_DIRTY_TEXTURES))) {
608       return true;
609    }
610 
611    struct lima_texture_stateobj *lima_tex = &ctx->tex_stateobj;
612    struct lima_fs_key local_key;
613    struct lima_fs_key *key = &local_key;
614    memset(key, 0, sizeof(*key));
615    memcpy(key->nir_sha1, ctx->uncomp_fs->nir_sha1,
616           sizeof(ctx->uncomp_fs->nir_sha1));
617 
618    uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
619                            PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
620    for (int i = 0; i < lima_tex->num_textures; i++) {
621       struct lima_sampler_view *sampler = lima_sampler_view(lima_tex->textures[i]);
622       if (!sampler) {
623          memcpy(key->tex[i].swizzle, identity, 4);
624          continue;
625       }
626       for (int j = 0; j < 4; j++)
627          key->tex[i].swizzle[j] = sampler->swizzle[j];
628    }
629 
630    /* Fill rest with identity swizzle */
631    for (int i = lima_tex->num_textures; i < ARRAY_SIZE(key->tex); i++)
632       memcpy(key->tex[i].swizzle, identity, 4);
633 
634    struct lima_fs_compiled_shader *old_fs = ctx->fs;
635 
636    struct lima_fs_compiled_shader *fs = lima_get_compiled_fs(ctx,
637                                                              ctx->uncomp_fs,
638                                                              key);
639    if (!fs)
640       return false;
641 
642    ctx->fs = fs;
643 
644    if (ctx->fs != old_fs)
645       ctx->dirty |= LIMA_CONTEXT_DIRTY_COMPILED_FS;
646 
647    return true;
648 }
649 
650 static void *
lima_create_vs_state(struct pipe_context * pctx,const struct pipe_shader_state * cso)651 lima_create_vs_state(struct pipe_context *pctx,
652                      const struct pipe_shader_state *cso)
653 {
654    struct lima_context *ctx = lima_context(pctx);
655    struct lima_vs_uncompiled_shader *so = rzalloc(NULL, struct lima_vs_uncompiled_shader);
656 
657    if (!so)
658       return NULL;
659 
660    nir_shader *nir;
661    if (cso->type == PIPE_SHADER_IR_NIR)
662       /* The backend takes ownership of the NIR shader on state
663        * creation. */
664       nir = cso->ir.nir;
665    else {
666       assert(cso->type == PIPE_SHADER_IR_TGSI);
667 
668       nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
669    }
670 
671    so->base.type = PIPE_SHADER_IR_NIR;
672    so->base.ir.nir = nir;
673 
674    /* Serialize the NIR to a binary blob that we can hash for the disk
675     * cache.  Drop unnecessary information (like variable names)
676     * so the serialized NIR is smaller, and also to let us detect more
677     * isomorphic shaders when hashing, increasing cache hits.
678     */
679    struct blob blob;
680    blob_init(&blob);
681    nir_serialize(&blob, nir, true);
682    _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
683    blob_finish(&blob);
684 
685    if (lima_debug & LIMA_DEBUG_PRECOMPILE) {
686       /* Trigger initial compilation with default settings */
687       struct lima_vs_key key;
688       memset(&key, 0, sizeof(key));
689       memcpy(key.nir_sha1, so->nir_sha1, sizeof(so->nir_sha1));
690       lima_get_compiled_vs(ctx, so, &key);
691    }
692 
693    return so;
694 }
695 
696 static void
lima_bind_vs_state(struct pipe_context * pctx,void * hwcso)697 lima_bind_vs_state(struct pipe_context *pctx, void *hwcso)
698 {
699    struct lima_context *ctx = lima_context(pctx);
700 
701    ctx->uncomp_vs = hwcso;
702    ctx->dirty |= LIMA_CONTEXT_DIRTY_UNCOMPILED_VS;
703 }
704 
705 static void
lima_delete_vs_state(struct pipe_context * pctx,void * hwcso)706 lima_delete_vs_state(struct pipe_context *pctx, void *hwcso)
707 {
708    struct lima_context *ctx = lima_context(pctx);
709    struct lima_vs_uncompiled_shader *so = hwcso;
710 
711    hash_table_foreach(ctx->vs_cache, entry) {
712       const struct lima_vs_key *key = entry->key;
713       if (!memcmp(key->nir_sha1, so->nir_sha1, sizeof(so->nir_sha1))) {
714          struct lima_vs_compiled_shader *vs = entry->data;
715          _mesa_hash_table_remove(ctx->vs_cache, entry);
716          if (vs->bo)
717             lima_bo_unreference(vs->bo);
718 
719          if (vs == ctx->vs)
720             ctx->vs = NULL;
721 
722          ralloc_free(vs);
723       }
724    }
725 
726    ralloc_free(so->base.ir.nir);
727    ralloc_free(so);
728 }
729 
730 static uint32_t
lima_fs_cache_hash(const void * key)731 lima_fs_cache_hash(const void *key)
732 {
733    return _mesa_hash_data(key, sizeof(struct lima_fs_key));
734 }
735 
736 static uint32_t
lima_vs_cache_hash(const void * key)737 lima_vs_cache_hash(const void *key)
738 {
739    return _mesa_hash_data(key, sizeof(struct lima_vs_key));
740 }
741 
742 static bool
lima_fs_cache_compare(const void * key1,const void * key2)743 lima_fs_cache_compare(const void *key1, const void *key2)
744 {
745    return memcmp(key1, key2, sizeof(struct lima_fs_key)) == 0;
746 }
747 
748 static bool
lima_vs_cache_compare(const void * key1,const void * key2)749 lima_vs_cache_compare(const void *key1, const void *key2)
750 {
751    return memcmp(key1, key2, sizeof(struct lima_vs_key)) == 0;
752 }
753 
754 void
lima_program_init(struct lima_context * ctx)755 lima_program_init(struct lima_context *ctx)
756 {
757    ctx->base.create_fs_state = lima_create_fs_state;
758    ctx->base.bind_fs_state = lima_bind_fs_state;
759    ctx->base.delete_fs_state = lima_delete_fs_state;
760 
761    ctx->base.create_vs_state = lima_create_vs_state;
762    ctx->base.bind_vs_state = lima_bind_vs_state;
763    ctx->base.delete_vs_state = lima_delete_vs_state;
764 
765    ctx->fs_cache = _mesa_hash_table_create(ctx, lima_fs_cache_hash,
766                                            lima_fs_cache_compare);
767    ctx->vs_cache = _mesa_hash_table_create(ctx, lima_vs_cache_hash,
768                                            lima_vs_cache_compare);
769 }
770 
771 void
lima_program_fini(struct lima_context * ctx)772 lima_program_fini(struct lima_context *ctx)
773 {
774    hash_table_foreach(ctx->vs_cache, entry) {
775       struct lima_vs_compiled_shader *vs = entry->data;
776       if (vs->bo)
777          lima_bo_unreference(vs->bo);
778       ralloc_free(vs);
779       _mesa_hash_table_remove(ctx->vs_cache, entry);
780    }
781 
782    hash_table_foreach(ctx->fs_cache, entry) {
783       struct lima_fs_compiled_shader *fs = entry->data;
784       if (fs->bo)
785          lima_bo_unreference(fs->bo);
786       ralloc_free(fs);
787       _mesa_hash_table_remove(ctx->fs_cache, entry);
788    }
789 }
790