xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/svga/svga_shader.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (c) 2008-2024 Broadcom. All Rights Reserved.
3  * The term “Broadcom” refers to Broadcom Inc.
4  * and/or its subsidiaries.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include "util/u_bitmask.h"
9 #include "util/u_memory.h"
10 #include "util/format/u_format.h"
11 #include "svga_context.h"
12 #include "svga_cmd.h"
13 #include "svga_format.h"
14 #include "svga_shader.h"
15 #include "svga_tgsi.h"
16 #include "svga_resource_texture.h"
17 #include "VGPU10ShaderTokens.h"
18 
19 #include "compiler/nir/nir.h"
20 #include "compiler/glsl/gl_nir.h"
21 #include "nir/nir_to_tgsi.h"
22 
23 
24 /**
25  * This bit isn't really used anywhere.  It only serves to help
26  * generate a unique "signature" for the vertex shader output bitmask.
27  * Shader input/output signatures are used to resolve shader linking
28  * issues.
29  */
30 #define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
31 
32 
33 /**
34  * Use the shader info to generate a bitmask indicating which generic
35  * inputs are used by the shader.  A set bit indicates that GENERIC[i]
36  * is used.
37  */
38 uint64_t
svga_get_generic_inputs_mask(const struct tgsi_shader_info * info)39 svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
40 {
41    unsigned i;
42    uint64_t mask = 0x0;
43 
44    for (i = 0; i < info->num_inputs; i++) {
45       if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
46          unsigned j = info->input_semantic_index[i];
47          assert(j < sizeof(mask) * 8);
48          mask |= ((uint64_t) 1) << j;
49       }
50    }
51 
52    return mask;
53 }
54 
55 
56 /**
57  * Scan shader info to return a bitmask of written outputs.
58  */
59 uint64_t
svga_get_generic_outputs_mask(const struct tgsi_shader_info * info)60 svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
61 {
62    unsigned i;
63    uint64_t mask = 0x0;
64 
65    for (i = 0; i < info->num_outputs; i++) {
66       switch (info->output_semantic_name[i]) {
67       case TGSI_SEMANTIC_GENERIC:
68          {
69             unsigned j = info->output_semantic_index[i];
70             assert(j < sizeof(mask) * 8);
71             mask |= ((uint64_t) 1) << j;
72          }
73          break;
74       case TGSI_SEMANTIC_FOG:
75          mask |= FOG_GENERIC_BIT;
76          break;
77       }
78    }
79 
80    return mask;
81 }
82 
83 
84 
85 /**
86  * Given a mask of used generic variables (as returned by the above functions)
87  * fill in a table which maps those indexes to small integers.
88  * This table is used by the remap_generic_index() function in
89  * svga_tgsi_decl_sm30.c
90  * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
91  * GENERIC[3] are used.  The remap_table will contain:
92  *   table[1] = 0;
93  *   table[3] = 1;
94  * The remaining table entries will be filled in with the next unused
95  * generic index (in this example, 2).
96  */
97 void
svga_remap_generics(uint64_t generics_mask,int8_t remap_table[MAX_GENERIC_VARYING])98 svga_remap_generics(uint64_t generics_mask,
99                     int8_t remap_table[MAX_GENERIC_VARYING])
100 {
101    /* Note texcoord[0] is reserved so start at 1 */
102    unsigned count = 1, i;
103 
104    for (i = 0; i < MAX_GENERIC_VARYING; i++) {
105       remap_table[i] = -1;
106    }
107 
108    /* for each bit set in generic_mask */
109    while (generics_mask) {
110       unsigned index = ffsll(generics_mask) - 1;
111       remap_table[index] = count++;
112       generics_mask &= ~((uint64_t) 1 << index);
113    }
114 }
115 
116 
117 /**
118  * Use the generic remap table to map a TGSI generic varying variable
119  * index to a small integer.  If the remapping table doesn't have a
120  * valid value for the given index (the table entry is -1) it means
121  * the fragment shader doesn't use that VS output.  Just allocate
122  * the next free value in that case.  Alternately, we could cull
123  * VS instructions that write to register, or replace the register
124  * with a dummy temp register.
125  * XXX TODO: we should do one of the later as it would save precious
126  * texcoord registers.
127  */
128 int
svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],int generic_index)129 svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
130                          int generic_index)
131 {
132    assert(generic_index < MAX_GENERIC_VARYING);
133 
134    if (generic_index >= MAX_GENERIC_VARYING) {
135       /* just don't return a random/garbage value */
136       generic_index = MAX_GENERIC_VARYING - 1;
137    }
138 
139    if (remap_table[generic_index] == -1) {
140       /* This is a VS output that has no matching PS input.  Find a
141        * free index.
142        */
143       int i, max = 0;
144       for (i = 0; i < MAX_GENERIC_VARYING; i++) {
145          max = MAX2(max, remap_table[i]);
146       }
147       remap_table[generic_index] = max + 1;
148    }
149 
150    return remap_table[generic_index];
151 }
152 
153 static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
154    PIPE_SWIZZLE_X,
155    PIPE_SWIZZLE_Y,
156    PIPE_SWIZZLE_Z,
157    PIPE_SWIZZLE_W,
158    PIPE_SWIZZLE_0,
159    PIPE_SWIZZLE_1,
160    PIPE_SWIZZLE_NONE
161 };
162 
163 static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
164    PIPE_SWIZZLE_X,
165    PIPE_SWIZZLE_Y,
166    PIPE_SWIZZLE_Z,
167    PIPE_SWIZZLE_1,
168    PIPE_SWIZZLE_0,
169    PIPE_SWIZZLE_1,
170    PIPE_SWIZZLE_NONE
171 };
172 
173 static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
174    PIPE_SWIZZLE_0,
175    PIPE_SWIZZLE_0,
176    PIPE_SWIZZLE_0,
177    PIPE_SWIZZLE_X,
178    PIPE_SWIZZLE_0,
179    PIPE_SWIZZLE_1,
180    PIPE_SWIZZLE_NONE
181 };
182 
183 static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
184    PIPE_SWIZZLE_X,
185    PIPE_SWIZZLE_X,
186    PIPE_SWIZZLE_X,
187    PIPE_SWIZZLE_X,
188    PIPE_SWIZZLE_0,
189    PIPE_SWIZZLE_1,
190    PIPE_SWIZZLE_NONE
191 };
192 
193 static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
194    PIPE_SWIZZLE_X,
195    PIPE_SWIZZLE_X,
196    PIPE_SWIZZLE_X,
197    PIPE_SWIZZLE_1,
198    PIPE_SWIZZLE_0,
199    PIPE_SWIZZLE_1,
200    PIPE_SWIZZLE_NONE
201 };
202 
203 static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
204    PIPE_SWIZZLE_X,
205    PIPE_SWIZZLE_X,
206    PIPE_SWIZZLE_X,
207    PIPE_SWIZZLE_Y,
208    PIPE_SWIZZLE_0,
209    PIPE_SWIZZLE_1,
210    PIPE_SWIZZLE_NONE
211 };
212 
213 static const enum pipe_swizzle set_YYYY[PIPE_SWIZZLE_MAX] = {
214    PIPE_SWIZZLE_Y,
215    PIPE_SWIZZLE_Y,
216    PIPE_SWIZZLE_Y,
217    PIPE_SWIZZLE_Y,
218    PIPE_SWIZZLE_0,
219    PIPE_SWIZZLE_1,
220    PIPE_SWIZZLE_NONE
221 };
222 
223 
224 static VGPU10_RESOURCE_RETURN_TYPE
vgpu10_return_type(enum pipe_format format)225 vgpu10_return_type(enum pipe_format format)
226 {
227    if (util_format_is_unorm(format))
228       return VGPU10_RETURN_TYPE_UNORM;
229    else if (util_format_is_snorm(format))
230       return VGPU10_RETURN_TYPE_SNORM;
231    else if (util_format_is_pure_uint(format))
232       return VGPU10_RETURN_TYPE_UINT;
233    else if (util_format_is_pure_sint(format))
234       return VGPU10_RETURN_TYPE_SINT;
235    else if (util_format_is_float(format))
236       return VGPU10_RETURN_TYPE_FLOAT;
237    else
238       return VGPU10_RETURN_TYPE_MAX;
239 }
240 
241 
242 /**
243  * A helper function to return TRUE if the specified format
244  * is a supported format for sample_c instruction.
245  */
246 static bool
isValidSampleCFormat(enum pipe_format format)247 isValidSampleCFormat(enum pipe_format format)
248 {
249    return util_format_is_depth_or_stencil(format);
250 }
251 
252 
253 /**
254  * Initialize the shader-neutral fields of svga_compile_key from context
255  * state.  This is basically the texture-related state.
256  */
257 void
svga_init_shader_key_common(const struct svga_context * svga,enum pipe_shader_type shader_type,const struct svga_shader * shader,struct svga_compile_key * key)258 svga_init_shader_key_common(const struct svga_context *svga,
259                             enum pipe_shader_type shader_type,
260                             const struct svga_shader *shader,
261                             struct svga_compile_key *key)
262 {
263    unsigned i, idx = 0;
264    unsigned sampler_slots = 0;
265 
266    assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
267 
268    /* In case the number of samplers and sampler_views doesn't match,
269     * loop over the upper of the two counts.
270     */
271    key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
272                             svga->curr.num_samplers[shader_type]);
273 
274    if (!shader->info.uses_samplers)
275       key->num_textures = 0;
276 
277    key->num_samplers = 0;
278 
279    /* Set sampler_state_mapping only if GL43 is supported and
280     * the number of samplers exceeds SVGA limit or the sampler state
281     * mapping env is set.
282     */
283    bool sampler_state_mapping =
284       svga_use_sampler_state_mapping(svga, svga->curr.num_samplers[shader_type]);
285 
286    key->sampler_state_mapping =
287       key->num_textures && sampler_state_mapping ? 1 : 0;
288 
289    for (i = 0; i < key->num_textures; i++) {
290       struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
291       const struct svga_sampler_state
292          *sampler = svga->curr.sampler[shader_type][i];
293 
294       if (view) {
295          assert(view->texture);
296 
297          enum pipe_texture_target target = view->target;
298          assert(target < (1 << 4)); /* texture_target:4 */
299 
300 	 key->tex[i].target = target;
301 	 key->tex[i].sampler_return_type = vgpu10_return_type(view->format);
302 	 key->tex[i].sampler_view = 1;
303 
304          /* 1D/2D array textures with one slice and cube map array textures
305           * with one cube are treated as non-arrays by the SVGA3D device.
306           * Set the is_array flag only if we know that we have more than 1
307           * element.  This will be used to select shader instruction/resource
308           * types during shader translation.
309           */
310          switch (target) {
311          case PIPE_TEXTURE_1D_ARRAY:
312          case PIPE_TEXTURE_2D_ARRAY:
313             key->tex[i].is_array = view->texture->array_size > 1;
314             break;
315          case PIPE_TEXTURE_CUBE_ARRAY:
316             key->tex[i].is_array = view->texture->array_size > 6;
317             break;
318          default:
319             ; /* nothing / silence compiler warning */
320          }
321 
322          assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
323          key->tex[i].num_samples = view->texture->nr_samples;
324 
325          const enum pipe_swizzle *swizzle_tab;
326          if (target == PIPE_BUFFER) {
327             SVGA3dSurfaceFormat svga_format;
328             unsigned tf_flags;
329 
330             assert(view->texture->target == PIPE_BUFFER);
331 
332             /* Apply any special swizzle mask for the view format if needed */
333 
334             svga_translate_texture_buffer_view_format(view->format,
335                                                       &svga_format, &tf_flags);
336             if (tf_flags & TF_000X)
337                swizzle_tab = set_000X;
338             else if (tf_flags & TF_XXXX)
339                swizzle_tab = set_XXXX;
340             else if (tf_flags & TF_XXX1)
341                swizzle_tab = set_XXX1;
342             else if (tf_flags & TF_XXXY)
343                swizzle_tab = set_XXXY;
344             else
345                swizzle_tab = copy_alpha;
346          }
347          else {
348             /* If we have a non-alpha view into an svga3d surface with an
349              * alpha channel, then explicitly set the alpha channel to 1
350              * when sampling. Note that we need to check the
351              * actual device format to cover also imported surface cases.
352              */
353             swizzle_tab =
354                (!util_format_has_alpha(view->format) &&
355                 svga_texture_device_format_has_alpha(view->texture)) ?
356                 set_alpha : copy_alpha;
357 
358             if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
359                 view->texture->format == PIPE_FORMAT_DXT1_SRGB)
360                swizzle_tab = set_alpha;
361 
362             if (view->format == PIPE_FORMAT_X24S8_UINT ||
363                 view->format == PIPE_FORMAT_X32_S8X24_UINT)
364                swizzle_tab = set_YYYY;
365 
366             /* Save the compare function as we need to handle
367              * depth compare in the shader.
368              */
369             key->tex[i].compare_mode = sampler->compare_mode;
370             key->tex[i].compare_func = sampler->compare_func;
371 
372             /* Set the compare_in_shader bit if the view format
373              * is not a supported format for shadow compare.
374              * In this case, we'll do the comparison in the shader.
375              */
376             if ((sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) &&
377                 !isValidSampleCFormat(view->format)) {
378                key->tex[i].compare_in_shader = true;
379             }
380          }
381 
382          key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
383          key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
384          key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
385          key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
386       }
387       else {
388 	 key->tex[i].sampler_view = 0;
389       }
390 
391       if (sampler) {
392          if (!sampler->normalized_coords) {
393             if (view) {
394                assert(idx < (1 << 5));  /* width_height_idx:5 bitfield */
395                key->tex[i].width_height_idx = idx++;
396 	    }
397             key->tex[i].unnormalized = true;
398             ++key->num_unnormalized_coords;
399 
400             if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
401                 sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
402                 key->tex[i].texel_bias = true;
403             }
404          }
405 
406          if (!sampler_state_mapping) {
407             /* Use the same index if sampler state mapping is not supported */
408             key->tex[i].sampler_index = i;
409             key->num_samplers = i + 1;
410          }
411          else {
412 
413             /* The current samplers list can have redundant entries.
414              * In order to allow the number of bound samplers within the
415              * max limit supported by SVGA, we'll recreate the list with
416              * unique sampler state objects only.
417              */
418 
419             /* Check to see if this sampler is already on the list.
420              * If so, set the sampler index of this sampler to the
421              * same sampler index.
422              */
423             for (unsigned j = 0; j <= i; j++) {
424                if (svga->curr.sampler[shader_type][j] == sampler) {
425 
426                   if (!(sampler_slots & (1 << j))) {
427 
428                      /* if this sampler is not added to the new list yet,
429                       * set its sampler index to the next sampler index,
430                       * increment the sampler count, and mark this
431                       * sampler as added to the list.
432                       */
433 
434                      unsigned next_index =
435                         MIN2(key->num_samplers, SVGA3D_DX_MAX_SAMPLERS-1);
436 
437                      key->tex[i].sampler_index = next_index;
438                      key->num_samplers = next_index + 1;
439 
440                      if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
441                         /* reserve one slot for the alternate sampler */
442                         key->num_samplers++;
443                      }
444 
445                      sampler_slots |= (1 << j);
446                   }
447                   else {
448                      key->tex[i].sampler_index = key->tex[j].sampler_index;
449                   }
450                   break;
451                }
452             }
453          }
454       }
455    }
456 
457    if (svga_have_gl43(svga)) {
458 
459       /* Save info about which constant buffers are to be viewed
460        * as srv raw buffers in the shader key.
461        */
462       if (shader->info.const_buffers_declared &
463           svga->state.raw_constbufs[shader_type]) {
464          key->raw_constbufs = svga->state.raw_constbufs[shader_type] &
465                               shader->info.const_buffers_declared;
466       }
467 
468       /* beginning index for srv for raw constant buffers */
469       key->srv_raw_constbuf_index = PIPE_MAX_SAMPLERS;
470 
471       if (shader->info.uses_images || shader->info.uses_hw_atomic ||
472           shader->info.uses_shader_buffers) {
473 
474          /* Save the uavSpliceIndex which is the index used for the first uav
475           * in the draw pipeline. For compute, uavSpliceIndex is always 0.
476           */
477          if (shader_type != PIPE_SHADER_COMPUTE)
478             key->uav_splice_index = svga->state.hw_draw.uavSpliceIndex;
479 
480          unsigned uav_splice_index = key->uav_splice_index;
481 
482          /* Also get the texture data type to be used in the uav declaration */
483          const struct svga_image_view *cur_image_view =
484             &svga->curr.image_views[shader_type][0];
485 
486          for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.image_views[shader_type]);
487               i++, cur_image_view++) {
488 
489             struct pipe_resource *resource = cur_image_view->desc.resource;
490 
491             if (resource) {
492                key->images[i].return_type =
493                   svga_get_texture_datatype(cur_image_view->desc.format);
494 
495                key->images[i].is_array = resource->array_size > 1;
496 
497                /* Save the image resource target in the shader key because
498                 * for single layer image view, the resource target in the
499                 * tgsi shader is changed to a different texture target.
500                 */
501                key->images[i].resource_target = resource->target;
502                if (resource->target == PIPE_TEXTURE_3D ||
503                    resource->target == PIPE_TEXTURE_1D_ARRAY ||
504                    resource->target == PIPE_TEXTURE_2D_ARRAY ||
505                    resource->target == PIPE_TEXTURE_CUBE ||
506                    resource->target == PIPE_TEXTURE_CUBE_ARRAY) {
507                   key->images[i].is_single_layer =
508                      cur_image_view->desc.u.tex.first_layer ==
509                      cur_image_view->desc.u.tex.last_layer;
510                }
511 
512                key->images[i].uav_index = cur_image_view->uav_index + uav_splice_index;
513             }
514             else
515                key->images[i].uav_index = SVGA3D_INVALID_ID;
516          }
517 
518          const struct svga_shader_buffer *cur_sbuf =
519             &svga->curr.shader_buffers[shader_type][0];
520 
521          /* Save info about which shader buffers are to be viewed
522           * as srv raw buffers in the shader key.
523           */
524          if (shader->info.shader_buffers_declared &
525              svga->state.raw_shaderbufs[shader_type]) {
526             key->raw_shaderbufs = svga->state.raw_shaderbufs[shader_type] &
527                                   shader->info.shader_buffers_declared;
528             key->srv_raw_shaderbuf_index = key->srv_raw_constbuf_index +
529 		                           SVGA_MAX_CONST_BUFS;
530          }
531 
532          for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.shader_buffers[shader_type]);
533               i++, cur_sbuf++) {
534 
535             if (cur_sbuf->resource && (!(key->raw_shaderbufs & (1 << i))))
536                key->shader_buf_uav_index[i] = cur_sbuf->uav_index + uav_splice_index;
537             else
538                key->shader_buf_uav_index[i] = SVGA3D_INVALID_ID;
539          }
540 
541          const struct svga_shader_buffer *cur_buf = &svga->curr.atomic_buffers[0];
542 
543          for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.atomic_buffers);
544               i++, cur_buf++) {
545 
546             if (cur_buf->resource)
547                key->atomic_buf_uav_index[i] = cur_buf->uav_index + uav_splice_index;
548             else
549                key->atomic_buf_uav_index[i] = SVGA3D_INVALID_ID;
550          }
551 
552          key->image_size_used = shader->info.uses_image_size;
553       }
554 
555    }
556 
557    key->clamp_vertex_color = svga->curr.rast ?
558                              svga->curr.rast->templ.clamp_vertex_color : 0;
559 }
560 
561 
562 /** Search for a compiled shader variant with the same compile key */
563 struct svga_shader_variant *
svga_search_shader_key(const struct svga_shader * shader,const struct svga_compile_key * key)564 svga_search_shader_key(const struct svga_shader *shader,
565                        const struct svga_compile_key *key)
566 {
567    struct svga_shader_variant *variant = shader->variants;
568 
569    assert(key);
570 
571    for ( ; variant; variant = variant->next) {
572       if (svga_compile_keys_equal(key, &variant->key))
573          return variant;
574    }
575    return NULL;
576 }
577 
578 /** Search for a shader with the same token key */
579 struct svga_shader *
svga_search_shader_token_key(struct svga_shader * pshader,const struct svga_token_key * key)580 svga_search_shader_token_key(struct svga_shader *pshader,
581                              const struct svga_token_key *key)
582 {
583    struct svga_shader *shader = pshader;
584 
585    assert(key);
586 
587    for ( ; shader; shader = shader->next) {
588       if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
589          return shader;
590    }
591    return NULL;
592 }
593 
594 /**
595  * Helper function to define a gb shader for non-vgpu10 device
596  */
597 static enum pipe_error
define_gb_shader_vgpu9(struct svga_context * svga,struct svga_shader_variant * variant,unsigned codeLen)598 define_gb_shader_vgpu9(struct svga_context *svga,
599                        struct svga_shader_variant *variant,
600                        unsigned codeLen)
601 {
602    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
603    enum pipe_error ret;
604 
605    /**
606     * Create gb memory for the shader and upload the shader code.
607     * Kernel module will allocate an id for the shader and issue
608     * the DefineGBShader command.
609     */
610    variant->gb_shader = sws->shader_create(sws, variant->type,
611                                            variant->tokens, codeLen);
612 
613    svga->hud.shader_mem_used += codeLen;
614 
615    if (!variant->gb_shader)
616       return PIPE_ERROR_OUT_OF_MEMORY;
617 
618    ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
619 
620    return ret;
621 }
622 
623 /**
624  * Helper function to define a gb shader for vgpu10 device
625  */
626 static enum pipe_error
define_gb_shader_vgpu10(struct svga_context * svga,struct svga_shader_variant * variant,unsigned codeLen)627 define_gb_shader_vgpu10(struct svga_context *svga,
628                         struct svga_shader_variant *variant,
629                         unsigned codeLen)
630 {
631    struct svga_winsys_context *swc = svga->swc;
632    enum pipe_error ret;
633    unsigned len = codeLen + variant->signatureLen;
634 
635    /**
636     * Shaders in VGPU10 enabled device reside in the device COTable.
637     * SVGA driver will allocate an integer ID for the shader and
638     * issue DXDefineShader and DXBindShader commands.
639     */
640    variant->id = util_bitmask_add(svga->shader_id_bm);
641    if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
642       return PIPE_ERROR_OUT_OF_MEMORY;
643    }
644 
645    /* Create gb memory for the shader and upload the shader code */
646    variant->gb_shader = swc->shader_create(swc,
647                                            variant->id, variant->type,
648                                            variant->tokens, codeLen,
649                                            variant->signature,
650                                            variant->signatureLen);
651 
652    svga->hud.shader_mem_used += len;
653 
654    if (!variant->gb_shader) {
655       /* Free the shader ID */
656       assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
657       goto fail_no_allocation;
658    }
659 
660    /**
661     * Since we don't want to do any flush within state emission to avoid
662     * partial state in a command buffer, it's important to make sure that
663     * there is enough room to send both the DXDefineShader & DXBindShader
664     * commands in the same command buffer. So let's send both
665     * commands in one command reservation. If it fails, we'll undo
666     * the shader creation and return an error.
667     */
668    ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
669                                            variant->id, variant->type,
670                                            len);
671 
672    if (ret != PIPE_OK)
673       goto fail;
674 
675    return PIPE_OK;
676 
677 fail:
678    swc->shader_destroy(swc, variant->gb_shader);
679    variant->gb_shader = NULL;
680 
681 fail_no_allocation:
682    util_bitmask_clear(svga->shader_id_bm, variant->id);
683    variant->id = UTIL_BITMASK_INVALID_INDEX;
684 
685    return PIPE_ERROR_OUT_OF_MEMORY;
686 }
687 
688 /**
689  * Issue the SVGA3D commands to define a new shader.
690  * \param variant  contains the shader tokens, etc.  The result->id field will
691  *                 be set here.
692  */
693 enum pipe_error
svga_define_shader(struct svga_context * svga,struct svga_shader_variant * variant)694 svga_define_shader(struct svga_context *svga,
695                    struct svga_shader_variant *variant)
696 {
697    unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
698    enum pipe_error ret;
699 
700    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
701 
702    variant->id = UTIL_BITMASK_INVALID_INDEX;
703 
704    if (svga_have_gb_objects(svga)) {
705       if (svga_have_vgpu10(svga))
706          ret = define_gb_shader_vgpu10(svga, variant, codeLen);
707       else
708          ret = define_gb_shader_vgpu9(svga, variant, codeLen);
709    }
710    else {
711       /* Allocate an integer ID for the shader */
712       variant->id = util_bitmask_add(svga->shader_id_bm);
713       if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
714          ret = PIPE_ERROR_OUT_OF_MEMORY;
715          goto done;
716       }
717 
718       /* Issue SVGA3D device command to define the shader */
719       ret = SVGA3D_DefineShader(svga->swc,
720                                 variant->id,
721                                 variant->type,
722                                 variant->tokens,
723                                 codeLen);
724       if (ret != PIPE_OK) {
725          /* free the ID */
726          assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
727          util_bitmask_clear(svga->shader_id_bm, variant->id);
728          variant->id = UTIL_BITMASK_INVALID_INDEX;
729       }
730    }
731 
732 done:
733    SVGA_STATS_TIME_POP(svga_sws(svga));
734    return ret;
735 }
736 
737 
738 /**
739  * Issue the SVGA3D commands to set/bind a shader.
740  * \param result  the shader to bind.
741  */
742 enum pipe_error
svga_set_shader(struct svga_context * svga,SVGA3dShaderType type,struct svga_shader_variant * variant)743 svga_set_shader(struct svga_context *svga,
744                 SVGA3dShaderType type,
745                 struct svga_shader_variant *variant)
746 {
747    enum pipe_error ret;
748    unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
749 
750    assert(type == SVGA3D_SHADERTYPE_VS ||
751           type == SVGA3D_SHADERTYPE_GS ||
752           type == SVGA3D_SHADERTYPE_PS ||
753           type == SVGA3D_SHADERTYPE_HS ||
754           type == SVGA3D_SHADERTYPE_DS ||
755           type == SVGA3D_SHADERTYPE_CS);
756 
757    if (svga_have_gb_objects(svga)) {
758       struct svga_winsys_gb_shader *gbshader =
759          variant ? variant->gb_shader : NULL;
760 
761       if (svga_have_vgpu10(svga))
762          ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
763       else
764          ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
765    }
766    else {
767       ret = SVGA3D_SetShader(svga->swc, type, id);
768    }
769 
770    return ret;
771 }
772 
773 
774 struct svga_shader_variant *
svga_new_shader_variant(struct svga_context * svga,enum pipe_shader_type type)775 svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
776 {
777    struct svga_shader_variant *variant;
778 
779    switch (type) {
780    case PIPE_SHADER_FRAGMENT:
781       variant = CALLOC(1, sizeof(struct svga_fs_variant));
782       break;
783    case PIPE_SHADER_GEOMETRY:
784       variant = CALLOC(1, sizeof(struct svga_gs_variant));
785       break;
786    case PIPE_SHADER_VERTEX:
787       variant = CALLOC(1, sizeof(struct svga_vs_variant));
788       break;
789    case PIPE_SHADER_TESS_EVAL:
790       variant = CALLOC(1, sizeof(struct svga_tes_variant));
791       break;
792    case PIPE_SHADER_TESS_CTRL:
793       variant = CALLOC(1, sizeof(struct svga_tcs_variant));
794       break;
795    case PIPE_SHADER_COMPUTE:
796       variant = CALLOC(1, sizeof(struct svga_cs_variant));
797       break;
798    default:
799       return NULL;
800    }
801 
802    if (variant) {
803       variant->type = svga_shader_type(type);
804       svga->hud.num_shaders++;
805    }
806    return variant;
807 }
808 
809 
810 void
svga_destroy_shader_variant(struct svga_context * svga,struct svga_shader_variant * variant)811 svga_destroy_shader_variant(struct svga_context *svga,
812                             struct svga_shader_variant *variant)
813 {
814    if (svga_have_gb_objects(svga) && variant->gb_shader) {
815       if (svga_have_vgpu10(svga)) {
816          struct svga_winsys_context *swc = svga->swc;
817          swc->shader_destroy(swc, variant->gb_shader);
818          SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id));
819          util_bitmask_clear(svga->shader_id_bm, variant->id);
820       }
821       else {
822          struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
823          sws->shader_destroy(sws, variant->gb_shader);
824       }
825       variant->gb_shader = NULL;
826    }
827    else {
828       if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
829          SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id,
830                                                variant->type));
831          util_bitmask_clear(svga->shader_id_bm, variant->id);
832       }
833    }
834 
835    FREE(variant->signature);
836    FREE((unsigned *)variant->tokens);
837    FREE(variant);
838 
839    svga->hud.num_shaders--;
840 }
841 
842 /*
843  * Rebind shaders.
844  * Called at the beginning of every new command buffer to ensure that
845  * shaders are properly paged-in. Instead of sending the SetShader
846  * command, this function sends a private allocation command to
847  * page in a shader. This avoids emitting redundant state to the device
848  * just to page in a resource.
849  */
850 enum pipe_error
svga_rebind_shaders(struct svga_context * svga)851 svga_rebind_shaders(struct svga_context *svga)
852 {
853    struct svga_winsys_context *swc = svga->swc;
854    struct svga_hw_draw_state *hw = &svga->state.hw_draw;
855    enum pipe_error ret;
856 
857    assert(svga_have_vgpu10(svga));
858 
859    /**
860     * If the underlying winsys layer does not need resource rebinding,
861     * just clear the rebind flags and return.
862     */
863    if (swc->resource_rebind == NULL) {
864       svga->rebind.flags.vs = 0;
865       svga->rebind.flags.gs = 0;
866       svga->rebind.flags.fs = 0;
867       svga->rebind.flags.tcs = 0;
868       svga->rebind.flags.tes = 0;
869 
870       return PIPE_OK;
871    }
872 
873    if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
874       ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
875       if (ret != PIPE_OK)
876          return ret;
877    }
878    svga->rebind.flags.vs = 0;
879 
880    if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
881       ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
882       if (ret != PIPE_OK)
883          return ret;
884    }
885    svga->rebind.flags.gs = 0;
886 
887    if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
888       ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
889       if (ret != PIPE_OK)
890          return ret;
891    }
892    svga->rebind.flags.fs = 0;
893 
894    if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) {
895       ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ);
896       if (ret != PIPE_OK)
897          return ret;
898    }
899    svga->rebind.flags.tcs = 0;
900 
901    if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) {
902       ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ);
903       if (ret != PIPE_OK)
904          return ret;
905    }
906    svga->rebind.flags.tes = 0;
907 
908    return PIPE_OK;
909 }
910 
911 
912 /**
913  * Helper function to create a shader object.
914  */
915 struct svga_shader *
svga_create_shader(struct pipe_context * pipe,const struct pipe_shader_state * templ,enum pipe_shader_type stage,unsigned shader_structlen)916 svga_create_shader(struct pipe_context *pipe,
917                    const struct pipe_shader_state *templ,
918                    enum pipe_shader_type stage,
919                    unsigned shader_structlen)
920 {
921    struct svga_context *svga = svga_context(pipe);
922    struct svga_shader *shader = CALLOC(1, shader_structlen);
923    nir_shader *nir = (nir_shader *)templ->ir.nir;
924 
925    if (shader == NULL)
926       return NULL;
927 
928    shader->id = svga->debug.shader_id++;
929    shader->stage = stage;
930 
931    if (templ->type == PIPE_SHADER_IR_NIR) {
932       /* nir_to_tgsi requires lowered images */
933       NIR_PASS_V(nir, gl_nir_lower_images, false);
934    }
935    shader->tokens = pipe_shader_state_to_tgsi_tokens(pipe->screen, templ);
936    shader->type = PIPE_SHADER_IR_TGSI;
937 
938    /* Collect basic info of the shader */
939    svga_tgsi_scan_shader(shader);
940 
941    /* check for any stream output declarations */
942    if (templ->stream_output.num_outputs) {
943       shader->stream_output = svga_create_stream_output(svga, shader,
944                                                         &templ->stream_output);
945    }
946 
947    return shader;
948 }
949 
950 
951 /**
952  * Helper function to compile a shader.
953  * Depending on the shader IR type, it calls the corresponding
954  * compile shader function.
955  */
956 enum pipe_error
svga_compile_shader(struct svga_context * svga,struct svga_shader * shader,const struct svga_compile_key * key,struct svga_shader_variant ** out_variant)957 svga_compile_shader(struct svga_context *svga,
958                     struct svga_shader *shader,
959                     const struct svga_compile_key *key,
960                     struct svga_shader_variant **out_variant)
961 {
962    struct svga_shader_variant *variant = NULL;
963    enum pipe_error ret = PIPE_ERROR;
964 
965    if (shader->type == PIPE_SHADER_IR_TGSI) {
966       variant = svga_tgsi_compile_shader(svga, shader, key);
967    } else {
968       debug_printf("Unexpected nir shader\n");
969       assert(0);
970    }
971 
972    if (variant == NULL) {
973       if (shader->get_dummy_shader != NULL) {
974          debug_printf("Failed to compile shader, using dummy shader.\n");
975          variant = shader->get_dummy_shader(svga, shader, key);
976       }
977    }
978    else if (svga_shader_too_large(svga, variant)) {
979       /* too big, use shader */
980       if (shader->get_dummy_shader != NULL) {
981          debug_printf("Shader too large (%u bytes), using dummy shader.\n",
982                       (unsigned)(variant->nr_tokens
983                                  * sizeof(variant->tokens[0])));
984 
985          /* Free the too-large variant */
986          svga_destroy_shader_variant(svga, variant);
987 
988          /* Use simple pass-through shader instead */
989          variant = shader->get_dummy_shader(svga, shader, key);
990       }
991    }
992 
993    if (variant == NULL)
994       return PIPE_ERROR;
995 
996    ret = svga_define_shader(svga, variant);
997    if (ret != PIPE_OK) {
998       svga_destroy_shader_variant(svga, variant);
999       return ret;
1000    }
1001 
1002    *out_variant = variant;
1003 
1004    /* insert variant at head of linked list */
1005    variant->next = shader->variants;
1006    shader->variants = variant;
1007 
1008    return PIPE_OK;
1009 }
1010