1 /*
2 * Copyright (c) 2008-2024 Broadcom. All Rights Reserved.
3 * The term “Broadcom” refers to Broadcom Inc.
4 * and/or its subsidiaries.
5 * SPDX-License-Identifier: MIT
6 */
7
8 #include "util/u_bitmask.h"
9 #include "util/u_memory.h"
10 #include "util/format/u_format.h"
11 #include "svga_context.h"
12 #include "svga_cmd.h"
13 #include "svga_format.h"
14 #include "svga_shader.h"
15 #include "svga_tgsi.h"
16 #include "svga_resource_texture.h"
17 #include "VGPU10ShaderTokens.h"
18
19 #include "compiler/nir/nir.h"
20 #include "compiler/glsl/gl_nir.h"
21 #include "nir/nir_to_tgsi.h"
22
23
24 /**
25 * This bit isn't really used anywhere. It only serves to help
26 * generate a unique "signature" for the vertex shader output bitmask.
27 * Shader input/output signatures are used to resolve shader linking
28 * issues.
29 */
30 #define FOG_GENERIC_BIT (((uint64_t) 1) << 63)
31
32
33 /**
34 * Use the shader info to generate a bitmask indicating which generic
35 * inputs are used by the shader. A set bit indicates that GENERIC[i]
36 * is used.
37 */
38 uint64_t
svga_get_generic_inputs_mask(const struct tgsi_shader_info * info)39 svga_get_generic_inputs_mask(const struct tgsi_shader_info *info)
40 {
41 unsigned i;
42 uint64_t mask = 0x0;
43
44 for (i = 0; i < info->num_inputs; i++) {
45 if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
46 unsigned j = info->input_semantic_index[i];
47 assert(j < sizeof(mask) * 8);
48 mask |= ((uint64_t) 1) << j;
49 }
50 }
51
52 return mask;
53 }
54
55
56 /**
57 * Scan shader info to return a bitmask of written outputs.
58 */
59 uint64_t
svga_get_generic_outputs_mask(const struct tgsi_shader_info * info)60 svga_get_generic_outputs_mask(const struct tgsi_shader_info *info)
61 {
62 unsigned i;
63 uint64_t mask = 0x0;
64
65 for (i = 0; i < info->num_outputs; i++) {
66 switch (info->output_semantic_name[i]) {
67 case TGSI_SEMANTIC_GENERIC:
68 {
69 unsigned j = info->output_semantic_index[i];
70 assert(j < sizeof(mask) * 8);
71 mask |= ((uint64_t) 1) << j;
72 }
73 break;
74 case TGSI_SEMANTIC_FOG:
75 mask |= FOG_GENERIC_BIT;
76 break;
77 }
78 }
79
80 return mask;
81 }
82
83
84
85 /**
86 * Given a mask of used generic variables (as returned by the above functions)
87 * fill in a table which maps those indexes to small integers.
88 * This table is used by the remap_generic_index() function in
89 * svga_tgsi_decl_sm30.c
90 * Example: if generics_mask = binary(1010) it means that GENERIC[1] and
91 * GENERIC[3] are used. The remap_table will contain:
92 * table[1] = 0;
93 * table[3] = 1;
94 * The remaining table entries will be filled in with the next unused
95 * generic index (in this example, 2).
96 */
97 void
svga_remap_generics(uint64_t generics_mask,int8_t remap_table[MAX_GENERIC_VARYING])98 svga_remap_generics(uint64_t generics_mask,
99 int8_t remap_table[MAX_GENERIC_VARYING])
100 {
101 /* Note texcoord[0] is reserved so start at 1 */
102 unsigned count = 1, i;
103
104 for (i = 0; i < MAX_GENERIC_VARYING; i++) {
105 remap_table[i] = -1;
106 }
107
108 /* for each bit set in generic_mask */
109 while (generics_mask) {
110 unsigned index = ffsll(generics_mask) - 1;
111 remap_table[index] = count++;
112 generics_mask &= ~((uint64_t) 1 << index);
113 }
114 }
115
116
117 /**
118 * Use the generic remap table to map a TGSI generic varying variable
119 * index to a small integer. If the remapping table doesn't have a
120 * valid value for the given index (the table entry is -1) it means
121 * the fragment shader doesn't use that VS output. Just allocate
122 * the next free value in that case. Alternately, we could cull
123 * VS instructions that write to register, or replace the register
124 * with a dummy temp register.
125 * XXX TODO: we should do one of the later as it would save precious
126 * texcoord registers.
127 */
128 int
svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],int generic_index)129 svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING],
130 int generic_index)
131 {
132 assert(generic_index < MAX_GENERIC_VARYING);
133
134 if (generic_index >= MAX_GENERIC_VARYING) {
135 /* just don't return a random/garbage value */
136 generic_index = MAX_GENERIC_VARYING - 1;
137 }
138
139 if (remap_table[generic_index] == -1) {
140 /* This is a VS output that has no matching PS input. Find a
141 * free index.
142 */
143 int i, max = 0;
144 for (i = 0; i < MAX_GENERIC_VARYING; i++) {
145 max = MAX2(max, remap_table[i]);
146 }
147 remap_table[generic_index] = max + 1;
148 }
149
150 return remap_table[generic_index];
151 }
152
153 static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = {
154 PIPE_SWIZZLE_X,
155 PIPE_SWIZZLE_Y,
156 PIPE_SWIZZLE_Z,
157 PIPE_SWIZZLE_W,
158 PIPE_SWIZZLE_0,
159 PIPE_SWIZZLE_1,
160 PIPE_SWIZZLE_NONE
161 };
162
163 static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = {
164 PIPE_SWIZZLE_X,
165 PIPE_SWIZZLE_Y,
166 PIPE_SWIZZLE_Z,
167 PIPE_SWIZZLE_1,
168 PIPE_SWIZZLE_0,
169 PIPE_SWIZZLE_1,
170 PIPE_SWIZZLE_NONE
171 };
172
173 static const enum pipe_swizzle set_000X[PIPE_SWIZZLE_MAX] = {
174 PIPE_SWIZZLE_0,
175 PIPE_SWIZZLE_0,
176 PIPE_SWIZZLE_0,
177 PIPE_SWIZZLE_X,
178 PIPE_SWIZZLE_0,
179 PIPE_SWIZZLE_1,
180 PIPE_SWIZZLE_NONE
181 };
182
183 static const enum pipe_swizzle set_XXXX[PIPE_SWIZZLE_MAX] = {
184 PIPE_SWIZZLE_X,
185 PIPE_SWIZZLE_X,
186 PIPE_SWIZZLE_X,
187 PIPE_SWIZZLE_X,
188 PIPE_SWIZZLE_0,
189 PIPE_SWIZZLE_1,
190 PIPE_SWIZZLE_NONE
191 };
192
193 static const enum pipe_swizzle set_XXX1[PIPE_SWIZZLE_MAX] = {
194 PIPE_SWIZZLE_X,
195 PIPE_SWIZZLE_X,
196 PIPE_SWIZZLE_X,
197 PIPE_SWIZZLE_1,
198 PIPE_SWIZZLE_0,
199 PIPE_SWIZZLE_1,
200 PIPE_SWIZZLE_NONE
201 };
202
203 static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
204 PIPE_SWIZZLE_X,
205 PIPE_SWIZZLE_X,
206 PIPE_SWIZZLE_X,
207 PIPE_SWIZZLE_Y,
208 PIPE_SWIZZLE_0,
209 PIPE_SWIZZLE_1,
210 PIPE_SWIZZLE_NONE
211 };
212
213 static const enum pipe_swizzle set_YYYY[PIPE_SWIZZLE_MAX] = {
214 PIPE_SWIZZLE_Y,
215 PIPE_SWIZZLE_Y,
216 PIPE_SWIZZLE_Y,
217 PIPE_SWIZZLE_Y,
218 PIPE_SWIZZLE_0,
219 PIPE_SWIZZLE_1,
220 PIPE_SWIZZLE_NONE
221 };
222
223
224 static VGPU10_RESOURCE_RETURN_TYPE
vgpu10_return_type(enum pipe_format format)225 vgpu10_return_type(enum pipe_format format)
226 {
227 if (util_format_is_unorm(format))
228 return VGPU10_RETURN_TYPE_UNORM;
229 else if (util_format_is_snorm(format))
230 return VGPU10_RETURN_TYPE_SNORM;
231 else if (util_format_is_pure_uint(format))
232 return VGPU10_RETURN_TYPE_UINT;
233 else if (util_format_is_pure_sint(format))
234 return VGPU10_RETURN_TYPE_SINT;
235 else if (util_format_is_float(format))
236 return VGPU10_RETURN_TYPE_FLOAT;
237 else
238 return VGPU10_RETURN_TYPE_MAX;
239 }
240
241
242 /**
243 * A helper function to return TRUE if the specified format
244 * is a supported format for sample_c instruction.
245 */
246 static bool
isValidSampleCFormat(enum pipe_format format)247 isValidSampleCFormat(enum pipe_format format)
248 {
249 return util_format_is_depth_or_stencil(format);
250 }
251
252
253 /**
254 * Initialize the shader-neutral fields of svga_compile_key from context
255 * state. This is basically the texture-related state.
256 */
257 void
svga_init_shader_key_common(const struct svga_context * svga,enum pipe_shader_type shader_type,const struct svga_shader * shader,struct svga_compile_key * key)258 svga_init_shader_key_common(const struct svga_context *svga,
259 enum pipe_shader_type shader_type,
260 const struct svga_shader *shader,
261 struct svga_compile_key *key)
262 {
263 unsigned i, idx = 0;
264 unsigned sampler_slots = 0;
265
266 assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
267
268 /* In case the number of samplers and sampler_views doesn't match,
269 * loop over the upper of the two counts.
270 */
271 key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
272 svga->curr.num_samplers[shader_type]);
273
274 if (!shader->info.uses_samplers)
275 key->num_textures = 0;
276
277 key->num_samplers = 0;
278
279 /* Set sampler_state_mapping only if GL43 is supported and
280 * the number of samplers exceeds SVGA limit or the sampler state
281 * mapping env is set.
282 */
283 bool sampler_state_mapping =
284 svga_use_sampler_state_mapping(svga, svga->curr.num_samplers[shader_type]);
285
286 key->sampler_state_mapping =
287 key->num_textures && sampler_state_mapping ? 1 : 0;
288
289 for (i = 0; i < key->num_textures; i++) {
290 struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
291 const struct svga_sampler_state
292 *sampler = svga->curr.sampler[shader_type][i];
293
294 if (view) {
295 assert(view->texture);
296
297 enum pipe_texture_target target = view->target;
298 assert(target < (1 << 4)); /* texture_target:4 */
299
300 key->tex[i].target = target;
301 key->tex[i].sampler_return_type = vgpu10_return_type(view->format);
302 key->tex[i].sampler_view = 1;
303
304 /* 1D/2D array textures with one slice and cube map array textures
305 * with one cube are treated as non-arrays by the SVGA3D device.
306 * Set the is_array flag only if we know that we have more than 1
307 * element. This will be used to select shader instruction/resource
308 * types during shader translation.
309 */
310 switch (target) {
311 case PIPE_TEXTURE_1D_ARRAY:
312 case PIPE_TEXTURE_2D_ARRAY:
313 key->tex[i].is_array = view->texture->array_size > 1;
314 break;
315 case PIPE_TEXTURE_CUBE_ARRAY:
316 key->tex[i].is_array = view->texture->array_size > 6;
317 break;
318 default:
319 ; /* nothing / silence compiler warning */
320 }
321
322 assert(view->texture->nr_samples < (1 << 5)); /* 5-bit field */
323 key->tex[i].num_samples = view->texture->nr_samples;
324
325 const enum pipe_swizzle *swizzle_tab;
326 if (target == PIPE_BUFFER) {
327 SVGA3dSurfaceFormat svga_format;
328 unsigned tf_flags;
329
330 assert(view->texture->target == PIPE_BUFFER);
331
332 /* Apply any special swizzle mask for the view format if needed */
333
334 svga_translate_texture_buffer_view_format(view->format,
335 &svga_format, &tf_flags);
336 if (tf_flags & TF_000X)
337 swizzle_tab = set_000X;
338 else if (tf_flags & TF_XXXX)
339 swizzle_tab = set_XXXX;
340 else if (tf_flags & TF_XXX1)
341 swizzle_tab = set_XXX1;
342 else if (tf_flags & TF_XXXY)
343 swizzle_tab = set_XXXY;
344 else
345 swizzle_tab = copy_alpha;
346 }
347 else {
348 /* If we have a non-alpha view into an svga3d surface with an
349 * alpha channel, then explicitly set the alpha channel to 1
350 * when sampling. Note that we need to check the
351 * actual device format to cover also imported surface cases.
352 */
353 swizzle_tab =
354 (!util_format_has_alpha(view->format) &&
355 svga_texture_device_format_has_alpha(view->texture)) ?
356 set_alpha : copy_alpha;
357
358 if (view->texture->format == PIPE_FORMAT_DXT1_RGB ||
359 view->texture->format == PIPE_FORMAT_DXT1_SRGB)
360 swizzle_tab = set_alpha;
361
362 if (view->format == PIPE_FORMAT_X24S8_UINT ||
363 view->format == PIPE_FORMAT_X32_S8X24_UINT)
364 swizzle_tab = set_YYYY;
365
366 /* Save the compare function as we need to handle
367 * depth compare in the shader.
368 */
369 key->tex[i].compare_mode = sampler->compare_mode;
370 key->tex[i].compare_func = sampler->compare_func;
371
372 /* Set the compare_in_shader bit if the view format
373 * is not a supported format for shadow compare.
374 * In this case, we'll do the comparison in the shader.
375 */
376 if ((sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) &&
377 !isValidSampleCFormat(view->format)) {
378 key->tex[i].compare_in_shader = true;
379 }
380 }
381
382 key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
383 key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g];
384 key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b];
385 key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a];
386 }
387 else {
388 key->tex[i].sampler_view = 0;
389 }
390
391 if (sampler) {
392 if (!sampler->normalized_coords) {
393 if (view) {
394 assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
395 key->tex[i].width_height_idx = idx++;
396 }
397 key->tex[i].unnormalized = true;
398 ++key->num_unnormalized_coords;
399
400 if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST ||
401 sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) {
402 key->tex[i].texel_bias = true;
403 }
404 }
405
406 if (!sampler_state_mapping) {
407 /* Use the same index if sampler state mapping is not supported */
408 key->tex[i].sampler_index = i;
409 key->num_samplers = i + 1;
410 }
411 else {
412
413 /* The current samplers list can have redundant entries.
414 * In order to allow the number of bound samplers within the
415 * max limit supported by SVGA, we'll recreate the list with
416 * unique sampler state objects only.
417 */
418
419 /* Check to see if this sampler is already on the list.
420 * If so, set the sampler index of this sampler to the
421 * same sampler index.
422 */
423 for (unsigned j = 0; j <= i; j++) {
424 if (svga->curr.sampler[shader_type][j] == sampler) {
425
426 if (!(sampler_slots & (1 << j))) {
427
428 /* if this sampler is not added to the new list yet,
429 * set its sampler index to the next sampler index,
430 * increment the sampler count, and mark this
431 * sampler as added to the list.
432 */
433
434 unsigned next_index =
435 MIN2(key->num_samplers, SVGA3D_DX_MAX_SAMPLERS-1);
436
437 key->tex[i].sampler_index = next_index;
438 key->num_samplers = next_index + 1;
439
440 if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
441 /* reserve one slot for the alternate sampler */
442 key->num_samplers++;
443 }
444
445 sampler_slots |= (1 << j);
446 }
447 else {
448 key->tex[i].sampler_index = key->tex[j].sampler_index;
449 }
450 break;
451 }
452 }
453 }
454 }
455 }
456
457 if (svga_have_gl43(svga)) {
458
459 /* Save info about which constant buffers are to be viewed
460 * as srv raw buffers in the shader key.
461 */
462 if (shader->info.const_buffers_declared &
463 svga->state.raw_constbufs[shader_type]) {
464 key->raw_constbufs = svga->state.raw_constbufs[shader_type] &
465 shader->info.const_buffers_declared;
466 }
467
468 /* beginning index for srv for raw constant buffers */
469 key->srv_raw_constbuf_index = PIPE_MAX_SAMPLERS;
470
471 if (shader->info.uses_images || shader->info.uses_hw_atomic ||
472 shader->info.uses_shader_buffers) {
473
474 /* Save the uavSpliceIndex which is the index used for the first uav
475 * in the draw pipeline. For compute, uavSpliceIndex is always 0.
476 */
477 if (shader_type != PIPE_SHADER_COMPUTE)
478 key->uav_splice_index = svga->state.hw_draw.uavSpliceIndex;
479
480 unsigned uav_splice_index = key->uav_splice_index;
481
482 /* Also get the texture data type to be used in the uav declaration */
483 const struct svga_image_view *cur_image_view =
484 &svga->curr.image_views[shader_type][0];
485
486 for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.image_views[shader_type]);
487 i++, cur_image_view++) {
488
489 struct pipe_resource *resource = cur_image_view->desc.resource;
490
491 if (resource) {
492 key->images[i].return_type =
493 svga_get_texture_datatype(cur_image_view->desc.format);
494
495 key->images[i].is_array = resource->array_size > 1;
496
497 /* Save the image resource target in the shader key because
498 * for single layer image view, the resource target in the
499 * tgsi shader is changed to a different texture target.
500 */
501 key->images[i].resource_target = resource->target;
502 if (resource->target == PIPE_TEXTURE_3D ||
503 resource->target == PIPE_TEXTURE_1D_ARRAY ||
504 resource->target == PIPE_TEXTURE_2D_ARRAY ||
505 resource->target == PIPE_TEXTURE_CUBE ||
506 resource->target == PIPE_TEXTURE_CUBE_ARRAY) {
507 key->images[i].is_single_layer =
508 cur_image_view->desc.u.tex.first_layer ==
509 cur_image_view->desc.u.tex.last_layer;
510 }
511
512 key->images[i].uav_index = cur_image_view->uav_index + uav_splice_index;
513 }
514 else
515 key->images[i].uav_index = SVGA3D_INVALID_ID;
516 }
517
518 const struct svga_shader_buffer *cur_sbuf =
519 &svga->curr.shader_buffers[shader_type][0];
520
521 /* Save info about which shader buffers are to be viewed
522 * as srv raw buffers in the shader key.
523 */
524 if (shader->info.shader_buffers_declared &
525 svga->state.raw_shaderbufs[shader_type]) {
526 key->raw_shaderbufs = svga->state.raw_shaderbufs[shader_type] &
527 shader->info.shader_buffers_declared;
528 key->srv_raw_shaderbuf_index = key->srv_raw_constbuf_index +
529 SVGA_MAX_CONST_BUFS;
530 }
531
532 for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.shader_buffers[shader_type]);
533 i++, cur_sbuf++) {
534
535 if (cur_sbuf->resource && (!(key->raw_shaderbufs & (1 << i))))
536 key->shader_buf_uav_index[i] = cur_sbuf->uav_index + uav_splice_index;
537 else
538 key->shader_buf_uav_index[i] = SVGA3D_INVALID_ID;
539 }
540
541 const struct svga_shader_buffer *cur_buf = &svga->curr.atomic_buffers[0];
542
543 for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.atomic_buffers);
544 i++, cur_buf++) {
545
546 if (cur_buf->resource)
547 key->atomic_buf_uav_index[i] = cur_buf->uav_index + uav_splice_index;
548 else
549 key->atomic_buf_uav_index[i] = SVGA3D_INVALID_ID;
550 }
551
552 key->image_size_used = shader->info.uses_image_size;
553 }
554
555 }
556
557 key->clamp_vertex_color = svga->curr.rast ?
558 svga->curr.rast->templ.clamp_vertex_color : 0;
559 }
560
561
562 /** Search for a compiled shader variant with the same compile key */
563 struct svga_shader_variant *
svga_search_shader_key(const struct svga_shader * shader,const struct svga_compile_key * key)564 svga_search_shader_key(const struct svga_shader *shader,
565 const struct svga_compile_key *key)
566 {
567 struct svga_shader_variant *variant = shader->variants;
568
569 assert(key);
570
571 for ( ; variant; variant = variant->next) {
572 if (svga_compile_keys_equal(key, &variant->key))
573 return variant;
574 }
575 return NULL;
576 }
577
578 /** Search for a shader with the same token key */
579 struct svga_shader *
svga_search_shader_token_key(struct svga_shader * pshader,const struct svga_token_key * key)580 svga_search_shader_token_key(struct svga_shader *pshader,
581 const struct svga_token_key *key)
582 {
583 struct svga_shader *shader = pshader;
584
585 assert(key);
586
587 for ( ; shader; shader = shader->next) {
588 if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0)
589 return shader;
590 }
591 return NULL;
592 }
593
594 /**
595 * Helper function to define a gb shader for non-vgpu10 device
596 */
597 static enum pipe_error
define_gb_shader_vgpu9(struct svga_context * svga,struct svga_shader_variant * variant,unsigned codeLen)598 define_gb_shader_vgpu9(struct svga_context *svga,
599 struct svga_shader_variant *variant,
600 unsigned codeLen)
601 {
602 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
603 enum pipe_error ret;
604
605 /**
606 * Create gb memory for the shader and upload the shader code.
607 * Kernel module will allocate an id for the shader and issue
608 * the DefineGBShader command.
609 */
610 variant->gb_shader = sws->shader_create(sws, variant->type,
611 variant->tokens, codeLen);
612
613 svga->hud.shader_mem_used += codeLen;
614
615 if (!variant->gb_shader)
616 return PIPE_ERROR_OUT_OF_MEMORY;
617
618 ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader);
619
620 return ret;
621 }
622
623 /**
624 * Helper function to define a gb shader for vgpu10 device
625 */
626 static enum pipe_error
define_gb_shader_vgpu10(struct svga_context * svga,struct svga_shader_variant * variant,unsigned codeLen)627 define_gb_shader_vgpu10(struct svga_context *svga,
628 struct svga_shader_variant *variant,
629 unsigned codeLen)
630 {
631 struct svga_winsys_context *swc = svga->swc;
632 enum pipe_error ret;
633 unsigned len = codeLen + variant->signatureLen;
634
635 /**
636 * Shaders in VGPU10 enabled device reside in the device COTable.
637 * SVGA driver will allocate an integer ID for the shader and
638 * issue DXDefineShader and DXBindShader commands.
639 */
640 variant->id = util_bitmask_add(svga->shader_id_bm);
641 if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
642 return PIPE_ERROR_OUT_OF_MEMORY;
643 }
644
645 /* Create gb memory for the shader and upload the shader code */
646 variant->gb_shader = swc->shader_create(swc,
647 variant->id, variant->type,
648 variant->tokens, codeLen,
649 variant->signature,
650 variant->signatureLen);
651
652 svga->hud.shader_mem_used += len;
653
654 if (!variant->gb_shader) {
655 /* Free the shader ID */
656 assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
657 goto fail_no_allocation;
658 }
659
660 /**
661 * Since we don't want to do any flush within state emission to avoid
662 * partial state in a command buffer, it's important to make sure that
663 * there is enough room to send both the DXDefineShader & DXBindShader
664 * commands in the same command buffer. So let's send both
665 * commands in one command reservation. If it fails, we'll undo
666 * the shader creation and return an error.
667 */
668 ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader,
669 variant->id, variant->type,
670 len);
671
672 if (ret != PIPE_OK)
673 goto fail;
674
675 return PIPE_OK;
676
677 fail:
678 swc->shader_destroy(swc, variant->gb_shader);
679 variant->gb_shader = NULL;
680
681 fail_no_allocation:
682 util_bitmask_clear(svga->shader_id_bm, variant->id);
683 variant->id = UTIL_BITMASK_INVALID_INDEX;
684
685 return PIPE_ERROR_OUT_OF_MEMORY;
686 }
687
688 /**
689 * Issue the SVGA3D commands to define a new shader.
690 * \param variant contains the shader tokens, etc. The result->id field will
691 * be set here.
692 */
693 enum pipe_error
svga_define_shader(struct svga_context * svga,struct svga_shader_variant * variant)694 svga_define_shader(struct svga_context *svga,
695 struct svga_shader_variant *variant)
696 {
697 unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]);
698 enum pipe_error ret;
699
700 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER);
701
702 variant->id = UTIL_BITMASK_INVALID_INDEX;
703
704 if (svga_have_gb_objects(svga)) {
705 if (svga_have_vgpu10(svga))
706 ret = define_gb_shader_vgpu10(svga, variant, codeLen);
707 else
708 ret = define_gb_shader_vgpu9(svga, variant, codeLen);
709 }
710 else {
711 /* Allocate an integer ID for the shader */
712 variant->id = util_bitmask_add(svga->shader_id_bm);
713 if (variant->id == UTIL_BITMASK_INVALID_INDEX) {
714 ret = PIPE_ERROR_OUT_OF_MEMORY;
715 goto done;
716 }
717
718 /* Issue SVGA3D device command to define the shader */
719 ret = SVGA3D_DefineShader(svga->swc,
720 variant->id,
721 variant->type,
722 variant->tokens,
723 codeLen);
724 if (ret != PIPE_OK) {
725 /* free the ID */
726 assert(variant->id != UTIL_BITMASK_INVALID_INDEX);
727 util_bitmask_clear(svga->shader_id_bm, variant->id);
728 variant->id = UTIL_BITMASK_INVALID_INDEX;
729 }
730 }
731
732 done:
733 SVGA_STATS_TIME_POP(svga_sws(svga));
734 return ret;
735 }
736
737
738 /**
739 * Issue the SVGA3D commands to set/bind a shader.
740 * \param result the shader to bind.
741 */
742 enum pipe_error
svga_set_shader(struct svga_context * svga,SVGA3dShaderType type,struct svga_shader_variant * variant)743 svga_set_shader(struct svga_context *svga,
744 SVGA3dShaderType type,
745 struct svga_shader_variant *variant)
746 {
747 enum pipe_error ret;
748 unsigned id = variant ? variant->id : SVGA3D_INVALID_ID;
749
750 assert(type == SVGA3D_SHADERTYPE_VS ||
751 type == SVGA3D_SHADERTYPE_GS ||
752 type == SVGA3D_SHADERTYPE_PS ||
753 type == SVGA3D_SHADERTYPE_HS ||
754 type == SVGA3D_SHADERTYPE_DS ||
755 type == SVGA3D_SHADERTYPE_CS);
756
757 if (svga_have_gb_objects(svga)) {
758 struct svga_winsys_gb_shader *gbshader =
759 variant ? variant->gb_shader : NULL;
760
761 if (svga_have_vgpu10(svga))
762 ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id);
763 else
764 ret = SVGA3D_SetGBShader(svga->swc, type, gbshader);
765 }
766 else {
767 ret = SVGA3D_SetShader(svga->swc, type, id);
768 }
769
770 return ret;
771 }
772
773
774 struct svga_shader_variant *
svga_new_shader_variant(struct svga_context * svga,enum pipe_shader_type type)775 svga_new_shader_variant(struct svga_context *svga, enum pipe_shader_type type)
776 {
777 struct svga_shader_variant *variant;
778
779 switch (type) {
780 case PIPE_SHADER_FRAGMENT:
781 variant = CALLOC(1, sizeof(struct svga_fs_variant));
782 break;
783 case PIPE_SHADER_GEOMETRY:
784 variant = CALLOC(1, sizeof(struct svga_gs_variant));
785 break;
786 case PIPE_SHADER_VERTEX:
787 variant = CALLOC(1, sizeof(struct svga_vs_variant));
788 break;
789 case PIPE_SHADER_TESS_EVAL:
790 variant = CALLOC(1, sizeof(struct svga_tes_variant));
791 break;
792 case PIPE_SHADER_TESS_CTRL:
793 variant = CALLOC(1, sizeof(struct svga_tcs_variant));
794 break;
795 case PIPE_SHADER_COMPUTE:
796 variant = CALLOC(1, sizeof(struct svga_cs_variant));
797 break;
798 default:
799 return NULL;
800 }
801
802 if (variant) {
803 variant->type = svga_shader_type(type);
804 svga->hud.num_shaders++;
805 }
806 return variant;
807 }
808
809
810 void
svga_destroy_shader_variant(struct svga_context * svga,struct svga_shader_variant * variant)811 svga_destroy_shader_variant(struct svga_context *svga,
812 struct svga_shader_variant *variant)
813 {
814 if (svga_have_gb_objects(svga) && variant->gb_shader) {
815 if (svga_have_vgpu10(svga)) {
816 struct svga_winsys_context *swc = svga->swc;
817 swc->shader_destroy(swc, variant->gb_shader);
818 SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id));
819 util_bitmask_clear(svga->shader_id_bm, variant->id);
820 }
821 else {
822 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
823 sws->shader_destroy(sws, variant->gb_shader);
824 }
825 variant->gb_shader = NULL;
826 }
827 else {
828 if (variant->id != UTIL_BITMASK_INVALID_INDEX) {
829 SVGA_RETRY(svga, SVGA3D_DestroyShader(svga->swc, variant->id,
830 variant->type));
831 util_bitmask_clear(svga->shader_id_bm, variant->id);
832 }
833 }
834
835 FREE(variant->signature);
836 FREE((unsigned *)variant->tokens);
837 FREE(variant);
838
839 svga->hud.num_shaders--;
840 }
841
842 /*
843 * Rebind shaders.
844 * Called at the beginning of every new command buffer to ensure that
845 * shaders are properly paged-in. Instead of sending the SetShader
846 * command, this function sends a private allocation command to
847 * page in a shader. This avoids emitting redundant state to the device
848 * just to page in a resource.
849 */
850 enum pipe_error
svga_rebind_shaders(struct svga_context * svga)851 svga_rebind_shaders(struct svga_context *svga)
852 {
853 struct svga_winsys_context *swc = svga->swc;
854 struct svga_hw_draw_state *hw = &svga->state.hw_draw;
855 enum pipe_error ret;
856
857 assert(svga_have_vgpu10(svga));
858
859 /**
860 * If the underlying winsys layer does not need resource rebinding,
861 * just clear the rebind flags and return.
862 */
863 if (swc->resource_rebind == NULL) {
864 svga->rebind.flags.vs = 0;
865 svga->rebind.flags.gs = 0;
866 svga->rebind.flags.fs = 0;
867 svga->rebind.flags.tcs = 0;
868 svga->rebind.flags.tes = 0;
869
870 return PIPE_OK;
871 }
872
873 if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) {
874 ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ);
875 if (ret != PIPE_OK)
876 return ret;
877 }
878 svga->rebind.flags.vs = 0;
879
880 if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) {
881 ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ);
882 if (ret != PIPE_OK)
883 return ret;
884 }
885 svga->rebind.flags.gs = 0;
886
887 if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) {
888 ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ);
889 if (ret != PIPE_OK)
890 return ret;
891 }
892 svga->rebind.flags.fs = 0;
893
894 if (svga->rebind.flags.tcs && hw->tcs && hw->tcs->gb_shader) {
895 ret = swc->resource_rebind(swc, NULL, hw->tcs->gb_shader, SVGA_RELOC_READ);
896 if (ret != PIPE_OK)
897 return ret;
898 }
899 svga->rebind.flags.tcs = 0;
900
901 if (svga->rebind.flags.tes && hw->tes && hw->tes->gb_shader) {
902 ret = swc->resource_rebind(swc, NULL, hw->tes->gb_shader, SVGA_RELOC_READ);
903 if (ret != PIPE_OK)
904 return ret;
905 }
906 svga->rebind.flags.tes = 0;
907
908 return PIPE_OK;
909 }
910
911
912 /**
913 * Helper function to create a shader object.
914 */
915 struct svga_shader *
svga_create_shader(struct pipe_context * pipe,const struct pipe_shader_state * templ,enum pipe_shader_type stage,unsigned shader_structlen)916 svga_create_shader(struct pipe_context *pipe,
917 const struct pipe_shader_state *templ,
918 enum pipe_shader_type stage,
919 unsigned shader_structlen)
920 {
921 struct svga_context *svga = svga_context(pipe);
922 struct svga_shader *shader = CALLOC(1, shader_structlen);
923 nir_shader *nir = (nir_shader *)templ->ir.nir;
924
925 if (shader == NULL)
926 return NULL;
927
928 shader->id = svga->debug.shader_id++;
929 shader->stage = stage;
930
931 if (templ->type == PIPE_SHADER_IR_NIR) {
932 /* nir_to_tgsi requires lowered images */
933 NIR_PASS_V(nir, gl_nir_lower_images, false);
934 }
935 shader->tokens = pipe_shader_state_to_tgsi_tokens(pipe->screen, templ);
936 shader->type = PIPE_SHADER_IR_TGSI;
937
938 /* Collect basic info of the shader */
939 svga_tgsi_scan_shader(shader);
940
941 /* check for any stream output declarations */
942 if (templ->stream_output.num_outputs) {
943 shader->stream_output = svga_create_stream_output(svga, shader,
944 &templ->stream_output);
945 }
946
947 return shader;
948 }
949
950
951 /**
952 * Helper function to compile a shader.
953 * Depending on the shader IR type, it calls the corresponding
954 * compile shader function.
955 */
956 enum pipe_error
svga_compile_shader(struct svga_context * svga,struct svga_shader * shader,const struct svga_compile_key * key,struct svga_shader_variant ** out_variant)957 svga_compile_shader(struct svga_context *svga,
958 struct svga_shader *shader,
959 const struct svga_compile_key *key,
960 struct svga_shader_variant **out_variant)
961 {
962 struct svga_shader_variant *variant = NULL;
963 enum pipe_error ret = PIPE_ERROR;
964
965 if (shader->type == PIPE_SHADER_IR_TGSI) {
966 variant = svga_tgsi_compile_shader(svga, shader, key);
967 } else {
968 debug_printf("Unexpected nir shader\n");
969 assert(0);
970 }
971
972 if (variant == NULL) {
973 if (shader->get_dummy_shader != NULL) {
974 debug_printf("Failed to compile shader, using dummy shader.\n");
975 variant = shader->get_dummy_shader(svga, shader, key);
976 }
977 }
978 else if (svga_shader_too_large(svga, variant)) {
979 /* too big, use shader */
980 if (shader->get_dummy_shader != NULL) {
981 debug_printf("Shader too large (%u bytes), using dummy shader.\n",
982 (unsigned)(variant->nr_tokens
983 * sizeof(variant->tokens[0])));
984
985 /* Free the too-large variant */
986 svga_destroy_shader_variant(svga, variant);
987
988 /* Use simple pass-through shader instead */
989 variant = shader->get_dummy_shader(svga, shader, key);
990 }
991 }
992
993 if (variant == NULL)
994 return PIPE_ERROR;
995
996 ret = svga_define_shader(svga, variant);
997 if (ret != PIPE_OK) {
998 svga_destroy_shader_variant(svga, variant);
999 return ret;
1000 }
1001
1002 *out_variant = variant;
1003
1004 /* insert variant at head of linked list */
1005 variant->next = shader->variants;
1006 shader->variants = variant;
1007
1008 return PIPE_OK;
1009 }
1010