1 /*
2 * Copyright (c) 1998-2024 Broadcom. All Rights Reserved.
3 * The term “Broadcom” refers to Broadcom Inc.
4 * and/or its subsidiaries.
5 * SPDX-License-Identifier: MIT
6 */
7
8 /**
9 * @file svga_tgsi_vgpu10.c
10 *
11 * TGSI -> VGPU10 shader translation.
12 *
13 * \author Mingcheng Chen
14 * \author Brian Paul
15 */
16
17 #include "util/compiler.h"
18 #include "pipe/p_shader_tokens.h"
19 #include "pipe/p_defines.h"
20 #include "tgsi/tgsi_dump.h"
21 #include "tgsi/tgsi_info.h"
22 #include "tgsi/tgsi_parse.h"
23 #include "tgsi/tgsi_scan.h"
24 #include "tgsi/tgsi_strings.h"
25 #include "tgsi/tgsi_two_side.h"
26 #include "tgsi/tgsi_aa_point.h"
27 #include "tgsi/tgsi_util.h"
28 #include "util/u_math.h"
29 #include "util/u_memory.h"
30 #include "util/u_bitmask.h"
31 #include "util/u_debug.h"
32 #include "util/u_pstipple.h"
33
34 #include "svga_context.h"
35 #include "svga_debug.h"
36 #include "svga_link.h"
37 #include "svga_shader.h"
38 #include "svga_tgsi.h"
39
40 #include "VGPU10ShaderTokens.h"
41
42
43 #define INVALID_INDEX 99999
44 #define MAX_INTERNAL_TEMPS 4
45 #define MAX_SYSTEM_VALUES 4
46 #define MAX_IMMEDIATE_COUNT \
47 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
48 #define MAX_TEMP_ARRAYS 64 /* Enough? */
49
50 /**
51 * Clipping is complicated. There's four different cases which we
52 * handle during VS/GS shader translation:
53 */
54 enum clipping_mode
55 {
56 CLIP_NONE, /**< No clipping enabled */
57 CLIP_LEGACY, /**< The shader has no clipping declarations or code but
58 * one or more user-defined clip planes are enabled. We
59 * generate extra code to emit clip distances.
60 */
61 CLIP_DISTANCE, /**< The shader already declares clip distance output
62 * registers and has code to write to them.
63 */
64 CLIP_VERTEX /**< The shader declares a clip vertex output register and
65 * has code that writes to the register. We convert the
66 * clipvertex position into one or more clip distances.
67 */
68 };
69
70
71 /* Shader signature info */
72 struct svga_shader_signature
73 {
74 SVGA3dDXShaderSignatureHeader header;
75 SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
76 SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
77 SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
78 };
79
80 static inline void
set_shader_signature_entry(SVGA3dDXShaderSignatureEntry * e,unsigned index,SVGA3dDXSignatureSemanticName sgnName,unsigned mask,SVGA3dDXSignatureRegisterComponentType compType,SVGA3dDXSignatureMinPrecision minPrecision)81 set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
82 unsigned index,
83 SVGA3dDXSignatureSemanticName sgnName,
84 unsigned mask,
85 SVGA3dDXSignatureRegisterComponentType compType,
86 SVGA3dDXSignatureMinPrecision minPrecision)
87 {
88 e->registerIndex = index;
89 e->semanticName = sgnName;
90 e->mask = mask;
91 e->componentType = compType;
92 e->minPrecision = minPrecision;
93 };
94
95 static const SVGA3dDXSignatureSemanticName
96 tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
97 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
98 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
99 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
100 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
101 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
102 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
103 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
104 SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
105 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
106 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
107 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
108 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
109 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
110 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
111 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
112 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
113 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
114 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
115 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
116 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
117 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
118 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
119 SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
120 SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
121 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
122 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
123 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
124 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
125 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
126 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
127 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
128 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
129 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
130 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
131 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
132 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
133 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
134 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
135 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
136 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
137 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
138 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
139 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
140 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
141 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
142 };
143
144
145 /**
146 * Map tgsi semantic name to SVGA signature semantic name
147 */
148 static inline SVGA3dDXSignatureSemanticName
map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)149 map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
150 {
151 assert(name < TGSI_SEMANTIC_COUNT);
152
153 /* Do a few asserts here to spot check the mapping */
154 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
155 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
156 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
157 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
158 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
159 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
160
161 return tgsi_semantic_to_sgn_name[name];
162 }
163
164 enum reemit_mode {
165 REEMIT_FALSE = 0,
166 REEMIT_TRUE = 1,
167 REEMIT_IN_PROGRESS = 2
168 };
169
170 struct svga_raw_buf_tmp {
171 bool indirect;
172 unsigned buffer_index:8;
173 unsigned element_index:8;
174 unsigned element_rel:8;
175 };
176
177 struct svga_shader_emitter_v10
178 {
179 /* The token output buffer */
180 unsigned size;
181 char *buf;
182 char *ptr;
183
184 /* Information about the shader and state (does not change) */
185 struct svga_compile_key key;
186 struct tgsi_shader_info info;
187 unsigned unit;
188 unsigned version; /**< Either 40, 41, 50 or 51 at this time */
189
190 unsigned cur_tgsi_token; /**< current tgsi token position */
191 unsigned inst_start_token;
192 bool discard_instruction; /**< throw away current instruction? */
193 bool reemit_instruction; /**< reemit current instruction */
194 bool reemit_tgsi_instruction; /**< reemit current tgsi instruction */
195 bool skip_instruction; /**< skip current instruction */
196 bool use_sampler_state_mapping; /* use sampler state mapping */
197 enum reemit_mode reemit_rawbuf_instruction;
198
199 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
200 double (*immediates_dbl)[2];
201 unsigned num_immediates; /**< Number of immediates emitted */
202 unsigned common_immediate_pos[20]; /**< literals for common immediates */
203 unsigned num_common_immediates;
204 unsigned num_immediates_emitted;
205 unsigned num_new_immediates; /** pending immediates to be declared */
206 unsigned immediates_block_start_token;
207 unsigned immediates_block_next_token;
208
209 unsigned num_outputs; /**< include any extra outputs */
210 /** The first extra output is reserved for
211 * non-adjusted vertex position for
212 * stream output purpose
213 */
214
215 /* Temporary Registers */
216 unsigned num_shader_temps; /**< num of temps used by original shader */
217 unsigned internal_temp_count; /**< currently allocated internal temps */
218 struct {
219 unsigned start, size;
220 } temp_arrays[MAX_TEMP_ARRAYS];
221 unsigned num_temp_arrays;
222
223 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
224 struct {
225 unsigned arrayId, index;
226 bool initialized;
227 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
228
229 unsigned initialize_temp_index;
230
231 /** Number of constants used by original shader for each constant buffer.
232 * The size should probably always match with that of svga_state.constbufs.
233 */
234 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
235
236 /* Raw constant buffers */
237 unsigned raw_buf_srv_start_index; /* starting srv index for raw buffers */
238 unsigned raw_bufs; /* raw buffers bitmask */
239 unsigned raw_buf_tmp_index; /* starting temp index for raw buffers */
240 unsigned raw_buf_cur_tmp_index; /* current temp index for raw buffers */
241 struct svga_raw_buf_tmp raw_buf_tmp[3]; /* temporaries for raw buf source */
242
243 /* Samplers */
244 unsigned num_samplers;
245 bool sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/
246 uint8_t sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */
247 uint8_t sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */
248
249 /* Images */
250 unsigned num_images;
251 unsigned image_mask;
252 struct tgsi_declaration_image image[PIPE_MAX_SHADER_IMAGES];
253 unsigned image_size_index; /* starting index to cbuf for image size */
254
255 /* Shader buffers */
256 unsigned num_shader_bufs;
257 unsigned raw_shaderbuf_srv_start_index; /* starting srv index for raw shaderbuf */
258 uint64_t raw_shaderbufs; /* raw shader buffers bitmask */
259
260 /* HW atomic buffers */
261 unsigned num_atomic_bufs;
262 unsigned atomic_bufs_mask;
263 unsigned max_atomic_counter_index;
264 VGPU10_OPCODE_TYPE cur_atomic_opcode; /* current atomic opcode */
265
266 bool uav_declared; /* True if uav is declared */
267
268 /* Index Range declaration */
269 struct {
270 unsigned start_index;
271 unsigned count;
272 bool required;
273 unsigned operandType;
274 unsigned size;
275 unsigned dim;
276 } index_range;
277
278 /* Address regs (really implemented with temps) */
279 unsigned num_address_regs;
280 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
281
282 /* Output register usage masks */
283 uint8_t output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
284
285 /* To map TGSI system value index to VGPU shader input indexes */
286 uint8_t system_value_indexes[MAX_SYSTEM_VALUES];
287
288 struct {
289 /* vertex position scale/translation */
290 unsigned out_index; /**< the real position output reg */
291 unsigned tmp_index; /**< the fake/temp position output reg */
292 unsigned so_index; /**< the non-adjusted position output reg */
293 unsigned prescale_cbuf_index; /* index to the const buf for prescale */
294 unsigned prescale_scale_index, prescale_trans_index;
295 unsigned num_prescale; /* number of prescale factor in const buf */
296 unsigned viewport_index;
297 unsigned need_prescale:1;
298 unsigned have_prescale:1;
299 } vposition;
300
301 /* Shader limits */
302 unsigned max_vs_inputs;
303 unsigned max_vs_outputs;
304 unsigned max_gs_inputs;
305
306 /* For vertex shaders only */
307 struct {
308 /* viewport constant */
309 unsigned viewport_index;
310
311 unsigned vertex_id_bias_index;
312 unsigned vertex_id_sys_index;
313 unsigned vertex_id_tmp_index;
314
315 /* temp index of adjusted vertex attributes */
316 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
317 } vs;
318
319 /* For fragment shaders only */
320 struct {
321 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */
322 unsigned num_color_outputs;
323 unsigned color_tmp_index; /**< fake/temp color output reg */
324 unsigned alpha_ref_index; /**< immediate constant for alpha ref */
325
326 /* front-face */
327 unsigned face_input_index; /**< real fragment shader face reg (bool) */
328 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */
329
330 unsigned pstipple_sampler_unit;
331 unsigned pstipple_sampler_state_index;
332
333 unsigned fragcoord_input_index; /**< real fragment position input reg */
334 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */
335
336 unsigned sample_id_sys_index; /**< TGSI index of sample id sys value */
337
338 unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
339 unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
340
341 /** TGSI index of sample mask input sys value */
342 unsigned sample_mask_in_sys_index;
343
344 /* layer */
345 unsigned layer_input_index; /**< TGSI index of layer */
346 unsigned layer_imm_index; /**< immediate for default layer 0 */
347
348 bool forceEarlyDepthStencil; /**< true if Early Depth stencil test is enabled */
349 } fs;
350
351 /* For geometry shaders only */
352 struct {
353 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
354 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
355 unsigned input_size; /**< size of input arrays */
356 unsigned prim_id_index; /**< primitive id register index */
357 unsigned max_out_vertices; /**< maximum number of output vertices */
358 unsigned invocations;
359 unsigned invocation_id_sys_index;
360
361 unsigned viewport_index_out_index;
362 unsigned viewport_index_tmp_index;
363 } gs;
364
365 /* For tessellation control shaders only */
366 struct {
367 unsigned vertices_per_patch_index; /**< vertices_per_patch system value index */
368 unsigned imm_index; /**< immediate for tcs */
369 unsigned invocation_id_sys_index; /**< invocation id */
370 unsigned invocation_id_tmp_index;
371 unsigned instruction_token_pos; /* token pos for the first instruction */
372 unsigned control_point_input_index; /* control point input register index */
373 unsigned control_point_addr_index; /* control point input address register */
374 unsigned control_point_out_index; /* control point output register index */
375 unsigned control_point_tmp_index; /* control point temporary register */
376 unsigned control_point_out_count; /* control point output count */
377 bool control_point_phase; /* true if in control point phase */
378 bool fork_phase_add_signature; /* true if needs to add signature in fork phase */
379 unsigned patch_generic_out_count; /* per-patch generic output count */
380 unsigned patch_generic_out_index; /* per-patch generic output register index*/
381 unsigned patch_generic_tmp_index; /* per-patch generic temporary register index*/
382 unsigned prim_id_index; /* primitive id */
383 struct {
384 unsigned out_index; /* real tessinner output register */
385 unsigned temp_index; /* tessinner temp register */
386 unsigned tgsi_index; /* tgsi tessinner output register */
387 } inner;
388 struct {
389 unsigned out_index; /* real tessouter output register */
390 unsigned temp_index; /* tessouter temp register */
391 unsigned tgsi_index; /* tgsi tessouter output register */
392 } outer;
393 } tcs;
394
395 /* For tessellation evaluation shaders only */
396 struct {
397 enum mesa_prim prim_mode;
398 enum pipe_tess_spacing spacing;
399 bool vertices_order_cw;
400 bool point_mode;
401 unsigned tesscoord_sys_index;
402 unsigned swizzle_max;
403 unsigned prim_id_index; /* primitive id */
404 struct {
405 unsigned in_index; /* real tessinner input register */
406 unsigned temp_index; /* tessinner temp register */
407 unsigned tgsi_index; /* tgsi tessinner input register */
408 } inner;
409 struct {
410 unsigned in_index; /* real tessouter input register */
411 unsigned temp_index; /* tessouter temp register */
412 unsigned tgsi_index; /* tgsi tessouter input register */
413 } outer;
414 } tes;
415
416 struct {
417 unsigned block_width; /* thread group size in x dimension */
418 unsigned block_height; /* thread group size in y dimension */
419 unsigned block_depth; /* thread group size in z dimension */
420 unsigned thread_id_index; /* thread id tgsi index */
421 unsigned block_id_index; /* block id tgsi index */
422 bool shared_memory_declared; /* set if shared memory is declared */
423 struct {
424 unsigned tgsi_index; /* grid size tgsi index */
425 unsigned imm_index; /* grid size imm index */
426 } grid_size;
427 } cs;
428
429 /* For vertex or geometry shaders */
430 enum clipping_mode clip_mode;
431 unsigned clip_dist_out_index; /**< clip distance output register index */
432 unsigned clip_dist_tmp_index; /**< clip distance temporary register */
433 unsigned clip_dist_so_index; /**< clip distance shadow copy */
434
435 /** Index of temporary holding the clipvertex coordinate */
436 unsigned clip_vertex_out_index; /**< clip vertex output register index */
437 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
438
439 /* user clip plane constant slot indexes */
440 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
441
442 unsigned num_output_writes;
443 bool constant_color_output;
444
445 bool uses_flat_interp;
446
447 unsigned reserved_token; /* index to the reserved token */
448 bool uses_precise_qualifier;
449
450 /* For all shaders: const reg index for RECT coord scaling */
451 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
452
453 /* For all shaders: const reg index for texture buffer size */
454 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
455
456 /** Which texture units are doing shadow comparison in the shader code */
457 unsigned shadow_compare_units;
458
459 /* VS/TCS/TES/GS/FS Linkage info */
460 struct shader_linkage linkage;
461 struct tgsi_shader_info *prevShaderInfo;
462
463 /* Shader signature */
464 struct svga_shader_signature signature;
465
466 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */
467
468 /* For util_debug_message */
469 struct util_debug_callback svga_debug_callback;
470
471 /* current loop depth in shader */
472 unsigned current_loop_depth;
473 };
474
475
476 static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
477 static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
478 static bool emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
479 static bool emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
480 static bool emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
481 static bool emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
482 static bool emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
483 static bool emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
484 static void emit_image_declarations(struct svga_shader_emitter_v10 *emit);
485 static void emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit);
486 static void emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit);
487 static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
488
489 static bool
490 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
491
492 static bool
493 emit_vertex(struct svga_shader_emitter_v10 *emit,
494 const struct tgsi_full_instruction *inst);
495
496 static bool
497 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
498 unsigned inst_number,
499 const struct tgsi_full_instruction *inst);
500
501 static void
502 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
503 VGPU10_OPCODE_TYPE opcodeType,
504 VGPU10_OPERAND_TYPE operandType,
505 VGPU10_OPERAND_INDEX_DIMENSION dim,
506 unsigned index, unsigned size,
507 VGPU10_SYSTEM_NAME name,
508 VGPU10_OPERAND_NUM_COMPONENTS numComp,
509 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
510 unsigned usageMask,
511 VGPU10_INTERPOLATION_MODE interpMode,
512 bool addSignature,
513 SVGA3dDXSignatureSemanticName sgnName);
514
515 static bool
516 emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
517 unsigned inst_number,
518 const struct tgsi_full_instruction *inst);
519
520 static void
521 create_temp_array(struct svga_shader_emitter_v10 *emit,
522 unsigned arrayID, unsigned first, unsigned count,
523 unsigned startIndex);
524
525 static char err_buf[128];
526
527 static bool
expand(struct svga_shader_emitter_v10 * emit)528 expand(struct svga_shader_emitter_v10 *emit)
529 {
530 char *new_buf;
531 unsigned newsize = emit->size * 2;
532
533 if (emit->buf != err_buf)
534 new_buf = REALLOC(emit->buf, emit->size, newsize);
535 else
536 new_buf = NULL;
537
538 if (!new_buf) {
539 emit->ptr = err_buf;
540 emit->buf = err_buf;
541 emit->size = sizeof(err_buf);
542 return false;
543 }
544
545 emit->size = newsize;
546 emit->ptr = new_buf + (emit->ptr - emit->buf);
547 emit->buf = new_buf;
548 return true;
549 }
550
551 /**
552 * Create and initialize a new svga_shader_emitter_v10 object.
553 */
554 static struct svga_shader_emitter_v10 *
alloc_emitter(void)555 alloc_emitter(void)
556 {
557 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
558
559 if (!emit)
560 return NULL;
561
562 /* to initialize the output buffer */
563 emit->size = 512;
564 if (!expand(emit)) {
565 FREE(emit);
566 return NULL;
567 }
568 return emit;
569 }
570
571 /**
572 * Free an svga_shader_emitter_v10 object.
573 */
574 static void
free_emitter(struct svga_shader_emitter_v10 * emit)575 free_emitter(struct svga_shader_emitter_v10 *emit)
576 {
577 assert(emit);
578 FREE(emit->buf); /* will be NULL if translation succeeded */
579 FREE(emit);
580 }
581
582 static inline bool
reserve(struct svga_shader_emitter_v10 * emit,unsigned nr_dwords)583 reserve(struct svga_shader_emitter_v10 *emit,
584 unsigned nr_dwords)
585 {
586 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
587 if (!expand(emit))
588 return false;
589 }
590
591 return true;
592 }
593
594 static bool
emit_dword(struct svga_shader_emitter_v10 * emit,uint32 dword)595 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
596 {
597 if (!reserve(emit, 1))
598 return false;
599
600 *(uint32 *)emit->ptr = dword;
601 emit->ptr += sizeof dword;
602 return true;
603 }
604
605 static bool
emit_dwords(struct svga_shader_emitter_v10 * emit,const uint32 * dwords,unsigned nr)606 emit_dwords(struct svga_shader_emitter_v10 *emit,
607 const uint32 *dwords,
608 unsigned nr)
609 {
610 if (!reserve(emit, nr))
611 return false;
612
613 memcpy(emit->ptr, dwords, nr * sizeof *dwords);
614 emit->ptr += nr * sizeof *dwords;
615 return true;
616 }
617
618 /** Return the number of tokens in the emitter's buffer */
619 static unsigned
emit_get_num_tokens(const struct svga_shader_emitter_v10 * emit)620 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
621 {
622 return (emit->ptr - emit->buf) / sizeof(unsigned);
623 }
624
625
626 /**
627 * Check for register overflow. If we overflow we'll set an
628 * error flag. This function can be called for register declarations
629 * or use as src/dst instruction operands.
630 * \param type register type. One of VGPU10_OPERAND_TYPE_x
631 or VGPU10_OPCODE_DCL_x
632 * \param index the register index
633 */
634 static void
check_register_index(struct svga_shader_emitter_v10 * emit,unsigned operandType,unsigned index)635 check_register_index(struct svga_shader_emitter_v10 *emit,
636 unsigned operandType, unsigned index)
637 {
638 bool overflow_before = emit->register_overflow;
639
640 switch (operandType) {
641 case VGPU10_OPERAND_TYPE_TEMP:
642 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
643 case VGPU10_OPCODE_DCL_TEMPS:
644 if (index >= VGPU10_MAX_TEMPS) {
645 emit->register_overflow = true;
646 }
647 break;
648 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
649 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
650 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
651 emit->register_overflow = true;
652 }
653 break;
654 case VGPU10_OPERAND_TYPE_INPUT:
655 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
656 case VGPU10_OPCODE_DCL_INPUT:
657 case VGPU10_OPCODE_DCL_INPUT_SGV:
658 case VGPU10_OPCODE_DCL_INPUT_SIV:
659 case VGPU10_OPCODE_DCL_INPUT_PS:
660 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
661 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
662 if ((emit->unit == PIPE_SHADER_VERTEX &&
663 index >= emit->max_vs_inputs) ||
664 (emit->unit == PIPE_SHADER_GEOMETRY &&
665 index >= emit->max_gs_inputs) ||
666 (emit->unit == PIPE_SHADER_FRAGMENT &&
667 index >= VGPU10_MAX_FS_INPUTS) ||
668 (emit->unit == PIPE_SHADER_TESS_CTRL &&
669 index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
670 (emit->unit == PIPE_SHADER_TESS_EVAL &&
671 index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
672 emit->register_overflow = true;
673 }
674 break;
675 case VGPU10_OPERAND_TYPE_OUTPUT:
676 case VGPU10_OPCODE_DCL_OUTPUT:
677 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
678 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
679 /* Note: we are skipping two output indices in tcs for
680 * tessinner/outer levels. Implementation will not exceed
681 * number of output count but it allows index to go beyond
682 * VGPU11_MAX_HS_OUTPUTS.
683 * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
684 */
685 if ((emit->unit == PIPE_SHADER_VERTEX &&
686 index >= emit->max_vs_outputs) ||
687 (emit->unit == PIPE_SHADER_GEOMETRY &&
688 index >= VGPU10_MAX_GS_OUTPUTS) ||
689 (emit->unit == PIPE_SHADER_FRAGMENT &&
690 index >= VGPU10_MAX_FS_OUTPUTS) ||
691 (emit->unit == PIPE_SHADER_TESS_CTRL &&
692 index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
693 (emit->unit == PIPE_SHADER_TESS_EVAL &&
694 index >= VGPU11_MAX_DS_OUTPUTS)) {
695 emit->register_overflow = true;
696 }
697 break;
698 case VGPU10_OPERAND_TYPE_SAMPLER:
699 case VGPU10_OPCODE_DCL_SAMPLER:
700 if (index >= VGPU10_MAX_SAMPLERS) {
701 emit->register_overflow = true;
702 }
703 break;
704 case VGPU10_OPERAND_TYPE_RESOURCE:
705 case VGPU10_OPCODE_DCL_RESOURCE:
706 if (index >= VGPU10_MAX_RESOURCES) {
707 emit->register_overflow = true;
708 }
709 break;
710 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
711 if (index >= MAX_IMMEDIATE_COUNT) {
712 emit->register_overflow = true;
713 }
714 break;
715 case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
716 case VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
717 case VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
718 case VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT:
719 case VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT:
720 case VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT:
721 case VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
722 case VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
723 /* nothing */
724 break;
725 default:
726 assert(0);
727 ; /* nothing */
728 }
729
730 if (emit->register_overflow && !overflow_before) {
731 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
732 operandType, index);
733 }
734 }
735
736
737 /**
738 * Examine misc state to determine the clipping mode.
739 */
740 static void
determine_clipping_mode(struct svga_shader_emitter_v10 * emit)741 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
742 {
743 /* num_written_clipdistance in the shader info for tessellation
744 * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
745 * is not defined for this shader. So we go through all the output declarations
746 * to set the num_written_clipdistance. This is just to determine the
747 * clipping mode.
748 */
749 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
750 unsigned i;
751 for (i = 0; i < emit->info.num_outputs; i++) {
752 if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
753 emit->info.num_written_clipdistance =
754 4 * (emit->info.output_semantic_index[i] + 1);
755 }
756 }
757 }
758
759 if (emit->info.num_written_clipdistance > 0) {
760 emit->clip_mode = CLIP_DISTANCE;
761 }
762 else if (emit->info.writes_clipvertex) {
763 emit->clip_mode = CLIP_VERTEX;
764 }
765 else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
766 /*
767 * Only the last shader in the vertex processing stage needs to
768 * handle the legacy clip mode.
769 */
770 emit->clip_mode = CLIP_LEGACY;
771 }
772 else {
773 emit->clip_mode = CLIP_NONE;
774 }
775 }
776
777
778 /**
779 * For clip distance register declarations and clip distance register
780 * writes we need to mask the declaration usage or instruction writemask
781 * (respectively) against the set of the really-enabled clipping planes.
782 *
783 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
784 * has a VS that writes to all 8 clip distance registers, but the plane enable
785 * flags are a subset of that.
786 *
787 * This function is used to apply the plane enable flags to the register
788 * declaration or instruction writemask.
789 *
790 * \param writemask the declaration usage mask or instruction writemask
791 * \param clip_reg_index which clip plane register is being declared/written.
792 * The legal values are 0 and 1 (two clip planes per
793 * register, for a total of 8 clip planes)
794 */
795 static unsigned
apply_clip_plane_mask(struct svga_shader_emitter_v10 * emit,unsigned writemask,unsigned clip_reg_index)796 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
797 unsigned writemask, unsigned clip_reg_index)
798 {
799 unsigned shift;
800
801 assert(clip_reg_index < 2);
802
803 /* four clip planes per clip register: */
804 shift = clip_reg_index * 4;
805 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
806
807 return writemask;
808 }
809
810
811 /**
812 * Translate gallium shader type into VGPU10 type.
813 */
814 static VGPU10_PROGRAM_TYPE
translate_shader_type(unsigned type)815 translate_shader_type(unsigned type)
816 {
817 switch (type) {
818 case PIPE_SHADER_VERTEX:
819 return VGPU10_VERTEX_SHADER;
820 case PIPE_SHADER_GEOMETRY:
821 return VGPU10_GEOMETRY_SHADER;
822 case PIPE_SHADER_FRAGMENT:
823 return VGPU10_PIXEL_SHADER;
824 case PIPE_SHADER_TESS_CTRL:
825 return VGPU10_HULL_SHADER;
826 case PIPE_SHADER_TESS_EVAL:
827 return VGPU10_DOMAIN_SHADER;
828 case PIPE_SHADER_COMPUTE:
829 return VGPU10_COMPUTE_SHADER;
830 default:
831 assert(!"Unexpected shader type");
832 return VGPU10_VERTEX_SHADER;
833 }
834 }
835
836
837 /**
838 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
839 * Note: we only need to translate the opcodes for "simple" instructions,
840 * as seen below. All other opcodes are handled/translated specially.
841 */
842 static VGPU10_OPCODE_TYPE
translate_opcode(enum tgsi_opcode opcode)843 translate_opcode(enum tgsi_opcode opcode)
844 {
845 switch (opcode) {
846 case TGSI_OPCODE_MOV:
847 return VGPU10_OPCODE_MOV;
848 case TGSI_OPCODE_MUL:
849 return VGPU10_OPCODE_MUL;
850 case TGSI_OPCODE_ADD:
851 return VGPU10_OPCODE_ADD;
852 case TGSI_OPCODE_DP3:
853 return VGPU10_OPCODE_DP3;
854 case TGSI_OPCODE_DP4:
855 return VGPU10_OPCODE_DP4;
856 case TGSI_OPCODE_MIN:
857 return VGPU10_OPCODE_MIN;
858 case TGSI_OPCODE_MAX:
859 return VGPU10_OPCODE_MAX;
860 case TGSI_OPCODE_MAD:
861 return VGPU10_OPCODE_MAD;
862 case TGSI_OPCODE_SQRT:
863 return VGPU10_OPCODE_SQRT;
864 case TGSI_OPCODE_FRC:
865 return VGPU10_OPCODE_FRC;
866 case TGSI_OPCODE_FLR:
867 return VGPU10_OPCODE_ROUND_NI;
868 case TGSI_OPCODE_FSEQ:
869 return VGPU10_OPCODE_EQ;
870 case TGSI_OPCODE_FSGE:
871 return VGPU10_OPCODE_GE;
872 case TGSI_OPCODE_FSNE:
873 return VGPU10_OPCODE_NE;
874 case TGSI_OPCODE_DDX:
875 return VGPU10_OPCODE_DERIV_RTX;
876 case TGSI_OPCODE_DDY:
877 return VGPU10_OPCODE_DERIV_RTY;
878 case TGSI_OPCODE_RET:
879 return VGPU10_OPCODE_RET;
880 case TGSI_OPCODE_DIV:
881 return VGPU10_OPCODE_DIV;
882 case TGSI_OPCODE_IDIV:
883 return VGPU10_OPCODE_VMWARE;
884 case TGSI_OPCODE_DP2:
885 return VGPU10_OPCODE_DP2;
886 case TGSI_OPCODE_BRK:
887 return VGPU10_OPCODE_BREAK;
888 case TGSI_OPCODE_IF:
889 return VGPU10_OPCODE_IF;
890 case TGSI_OPCODE_ELSE:
891 return VGPU10_OPCODE_ELSE;
892 case TGSI_OPCODE_ENDIF:
893 return VGPU10_OPCODE_ENDIF;
894 case TGSI_OPCODE_CEIL:
895 return VGPU10_OPCODE_ROUND_PI;
896 case TGSI_OPCODE_I2F:
897 return VGPU10_OPCODE_ITOF;
898 case TGSI_OPCODE_NOT:
899 return VGPU10_OPCODE_NOT;
900 case TGSI_OPCODE_TRUNC:
901 return VGPU10_OPCODE_ROUND_Z;
902 case TGSI_OPCODE_SHL:
903 return VGPU10_OPCODE_ISHL;
904 case TGSI_OPCODE_AND:
905 return VGPU10_OPCODE_AND;
906 case TGSI_OPCODE_OR:
907 return VGPU10_OPCODE_OR;
908 case TGSI_OPCODE_XOR:
909 return VGPU10_OPCODE_XOR;
910 case TGSI_OPCODE_CONT:
911 return VGPU10_OPCODE_CONTINUE;
912 case TGSI_OPCODE_EMIT:
913 return VGPU10_OPCODE_EMIT;
914 case TGSI_OPCODE_ENDPRIM:
915 return VGPU10_OPCODE_CUT;
916 case TGSI_OPCODE_BGNLOOP:
917 return VGPU10_OPCODE_LOOP;
918 case TGSI_OPCODE_ENDLOOP:
919 return VGPU10_OPCODE_ENDLOOP;
920 case TGSI_OPCODE_ENDSUB:
921 return VGPU10_OPCODE_RET;
922 case TGSI_OPCODE_NOP:
923 return VGPU10_OPCODE_NOP;
924 case TGSI_OPCODE_END:
925 return VGPU10_OPCODE_RET;
926 case TGSI_OPCODE_F2I:
927 return VGPU10_OPCODE_FTOI;
928 case TGSI_OPCODE_IMAX:
929 return VGPU10_OPCODE_IMAX;
930 case TGSI_OPCODE_IMIN:
931 return VGPU10_OPCODE_IMIN;
932 case TGSI_OPCODE_UDIV:
933 case TGSI_OPCODE_UMOD:
934 case TGSI_OPCODE_MOD:
935 return VGPU10_OPCODE_UDIV;
936 case TGSI_OPCODE_IMUL_HI:
937 return VGPU10_OPCODE_IMUL;
938 case TGSI_OPCODE_INEG:
939 return VGPU10_OPCODE_INEG;
940 case TGSI_OPCODE_ISHR:
941 return VGPU10_OPCODE_ISHR;
942 case TGSI_OPCODE_ISGE:
943 return VGPU10_OPCODE_IGE;
944 case TGSI_OPCODE_ISLT:
945 return VGPU10_OPCODE_ILT;
946 case TGSI_OPCODE_F2U:
947 return VGPU10_OPCODE_FTOU;
948 case TGSI_OPCODE_UADD:
949 return VGPU10_OPCODE_IADD;
950 case TGSI_OPCODE_U2F:
951 return VGPU10_OPCODE_UTOF;
952 case TGSI_OPCODE_UCMP:
953 return VGPU10_OPCODE_MOVC;
954 case TGSI_OPCODE_UMAD:
955 return VGPU10_OPCODE_UMAD;
956 case TGSI_OPCODE_UMAX:
957 return VGPU10_OPCODE_UMAX;
958 case TGSI_OPCODE_UMIN:
959 return VGPU10_OPCODE_UMIN;
960 case TGSI_OPCODE_UMUL:
961 case TGSI_OPCODE_UMUL_HI:
962 return VGPU10_OPCODE_UMUL;
963 case TGSI_OPCODE_USEQ:
964 return VGPU10_OPCODE_IEQ;
965 case TGSI_OPCODE_USGE:
966 return VGPU10_OPCODE_UGE;
967 case TGSI_OPCODE_USHR:
968 return VGPU10_OPCODE_USHR;
969 case TGSI_OPCODE_USLT:
970 return VGPU10_OPCODE_ULT;
971 case TGSI_OPCODE_USNE:
972 return VGPU10_OPCODE_INE;
973 case TGSI_OPCODE_SWITCH:
974 return VGPU10_OPCODE_SWITCH;
975 case TGSI_OPCODE_CASE:
976 return VGPU10_OPCODE_CASE;
977 case TGSI_OPCODE_DEFAULT:
978 return VGPU10_OPCODE_DEFAULT;
979 case TGSI_OPCODE_ENDSWITCH:
980 return VGPU10_OPCODE_ENDSWITCH;
981 case TGSI_OPCODE_FSLT:
982 return VGPU10_OPCODE_LT;
983 case TGSI_OPCODE_ROUND:
984 return VGPU10_OPCODE_ROUND_NE;
985 /* Begin SM5 opcodes */
986 case TGSI_OPCODE_F2D:
987 return VGPU10_OPCODE_FTOD;
988 case TGSI_OPCODE_D2F:
989 return VGPU10_OPCODE_DTOF;
990 case TGSI_OPCODE_DMUL:
991 return VGPU10_OPCODE_DMUL;
992 case TGSI_OPCODE_DADD:
993 return VGPU10_OPCODE_DADD;
994 case TGSI_OPCODE_DMAX:
995 return VGPU10_OPCODE_DMAX;
996 case TGSI_OPCODE_DMIN:
997 return VGPU10_OPCODE_DMIN;
998 case TGSI_OPCODE_DSEQ:
999 return VGPU10_OPCODE_DEQ;
1000 case TGSI_OPCODE_DSGE:
1001 return VGPU10_OPCODE_DGE;
1002 case TGSI_OPCODE_DSLT:
1003 return VGPU10_OPCODE_DLT;
1004 case TGSI_OPCODE_DSNE:
1005 return VGPU10_OPCODE_DNE;
1006 case TGSI_OPCODE_IBFE:
1007 return VGPU10_OPCODE_IBFE;
1008 case TGSI_OPCODE_UBFE:
1009 return VGPU10_OPCODE_UBFE;
1010 case TGSI_OPCODE_BFI:
1011 return VGPU10_OPCODE_BFI;
1012 case TGSI_OPCODE_BREV:
1013 return VGPU10_OPCODE_BFREV;
1014 case TGSI_OPCODE_POPC:
1015 return VGPU10_OPCODE_COUNTBITS;
1016 case TGSI_OPCODE_LSB:
1017 return VGPU10_OPCODE_FIRSTBIT_LO;
1018 case TGSI_OPCODE_IMSB:
1019 return VGPU10_OPCODE_FIRSTBIT_SHI;
1020 case TGSI_OPCODE_UMSB:
1021 return VGPU10_OPCODE_FIRSTBIT_HI;
1022 case TGSI_OPCODE_INTERP_CENTROID:
1023 return VGPU10_OPCODE_EVAL_CENTROID;
1024 case TGSI_OPCODE_INTERP_SAMPLE:
1025 return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
1026 case TGSI_OPCODE_BARRIER:
1027 return VGPU10_OPCODE_SYNC;
1028 case TGSI_OPCODE_DFMA:
1029 return VGPU10_OPCODE_DFMA;
1030 case TGSI_OPCODE_FMA:
1031 return VGPU10_OPCODE_MAD;
1032
1033 /* DX11.1 Opcodes */
1034 case TGSI_OPCODE_DDIV:
1035 return VGPU10_OPCODE_DDIV;
1036 case TGSI_OPCODE_DRCP:
1037 return VGPU10_OPCODE_DRCP;
1038 case TGSI_OPCODE_D2I:
1039 return VGPU10_OPCODE_DTOI;
1040 case TGSI_OPCODE_D2U:
1041 return VGPU10_OPCODE_DTOU;
1042 case TGSI_OPCODE_I2D:
1043 return VGPU10_OPCODE_ITOD;
1044 case TGSI_OPCODE_U2D:
1045 return VGPU10_OPCODE_UTOD;
1046
1047 case TGSI_OPCODE_SAMPLE_POS:
1048 /* Note: we never actually get this opcode because there's no GLSL
1049 * function to query multisample resource sample positions. There's
1050 * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
1051 * position of the current sample in the render target.
1052 */
1053 FALLTHROUGH;
1054 case TGSI_OPCODE_SAMPLE_INFO:
1055 /* NOTE: we never actually get this opcode because the GLSL compiler
1056 * implements the gl_NumSamples variable with a simple constant in the
1057 * constant buffer.
1058 */
1059 FALLTHROUGH;
1060 default:
1061 assert(!"Unexpected TGSI opcode in translate_opcode()");
1062 return VGPU10_OPCODE_NOP;
1063 }
1064 }
1065
1066
1067 /**
1068 * Translate a TGSI register file type into a VGPU10 operand type.
1069 * \param array is the TGSI_FILE_TEMPORARY register an array?
1070 */
1071 static VGPU10_OPERAND_TYPE
translate_register_file(enum tgsi_file_type file,bool array)1072 translate_register_file(enum tgsi_file_type file, bool array)
1073 {
1074 switch (file) {
1075 case TGSI_FILE_CONSTANT:
1076 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
1077 case TGSI_FILE_INPUT:
1078 return VGPU10_OPERAND_TYPE_INPUT;
1079 case TGSI_FILE_OUTPUT:
1080 return VGPU10_OPERAND_TYPE_OUTPUT;
1081 case TGSI_FILE_TEMPORARY:
1082 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1083 : VGPU10_OPERAND_TYPE_TEMP;
1084 case TGSI_FILE_IMMEDIATE:
1085 /* all immediates are 32-bit values at this time so
1086 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1087 */
1088 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
1089 case TGSI_FILE_SAMPLER:
1090 return VGPU10_OPERAND_TYPE_SAMPLER;
1091 case TGSI_FILE_SYSTEM_VALUE:
1092 return VGPU10_OPERAND_TYPE_INPUT;
1093
1094 /* XXX TODO more cases to finish */
1095
1096 default:
1097 assert(!"Bad tgsi register file!");
1098 return VGPU10_OPERAND_TYPE_NULL;
1099 }
1100 }
1101
1102
1103 /**
1104 * Emit a null dst register
1105 */
1106 static void
emit_null_dst_register(struct svga_shader_emitter_v10 * emit)1107 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
1108 {
1109 VGPU10OperandToken0 operand;
1110
1111 operand.value = 0;
1112 operand.operandType = VGPU10_OPERAND_TYPE_NULL;
1113 operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
1114
1115 emit_dword(emit, operand.value);
1116 }
1117
1118
1119 /**
1120 * If the given register is a temporary, return the array ID.
1121 * Else return zero.
1122 */
1123 static unsigned
get_temp_array_id(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1124 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
1125 enum tgsi_file_type file, unsigned index)
1126 {
1127 if (file == TGSI_FILE_TEMPORARY) {
1128 return emit->temp_map[index].arrayId;
1129 }
1130 else {
1131 return 0;
1132 }
1133 }
1134
1135
1136 /**
1137 * If the given register is a temporary, convert the index from a TGSI
1138 * TEMPORARY index to a VGPU10 temp index.
1139 */
1140 static unsigned
remap_temp_index(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1141 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
1142 enum tgsi_file_type file, unsigned index)
1143 {
1144 if (file == TGSI_FILE_TEMPORARY) {
1145 return emit->temp_map[index].index;
1146 }
1147 else {
1148 return index;
1149 }
1150 }
1151
1152
1153 /**
1154 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1155 * Note: the operandType field must already be initialized.
1156 * \param file the register file being accessed
1157 * \param indirect using indirect addressing of the register file?
1158 * \param index2D if true, 2-D indexing is being used (const or temp registers)
1159 * \param indirect2D if true, 2-D indirect indexing being used (for const buf)
1160 */
1161 static VGPU10OperandToken0
setup_operand0_indexing(struct svga_shader_emitter_v10 * emit,VGPU10OperandToken0 operand0,enum tgsi_file_type file,bool indirect,bool index2D,bool indirect2D)1162 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
1163 VGPU10OperandToken0 operand0,
1164 enum tgsi_file_type file,
1165 bool indirect,
1166 bool index2D, bool indirect2D)
1167 {
1168 VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
1169 VGPU10_OPERAND_INDEX_DIMENSION indexDim;
1170
1171 /*
1172 * Compute index dimensions
1173 */
1174 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
1175 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
1176 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
1177 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
1178 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
1179 operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
1180 /* there's no swizzle for in-line immediates */
1181 indexDim = VGPU10_OPERAND_INDEX_0D;
1182 assert(operand0.selectionMode == 0);
1183 }
1184 else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
1185 indexDim = VGPU10_OPERAND_INDEX_0D;
1186 }
1187 else {
1188 indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
1189 }
1190
1191 /*
1192 * Compute index representation(s) (immediate vs relative).
1193 */
1194 if (indexDim == VGPU10_OPERAND_INDEX_2D) {
1195 index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1196 : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1197
1198 index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1199 : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1200 }
1201 else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
1202 index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1203 : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1204
1205 index1Rep = 0;
1206 }
1207 else {
1208 index0Rep = 0;
1209 index1Rep = 0;
1210 }
1211
1212 operand0.indexDimension = indexDim;
1213 operand0.index0Representation = index0Rep;
1214 operand0.index1Representation = index1Rep;
1215
1216 return operand0;
1217 }
1218
1219
1220 /**
1221 * Emit the operand for expressing an address register for indirect indexing.
1222 * Note that the address register is really just a temp register.
1223 * \param addr_reg_index which address register to use
1224 */
1225 static void
emit_indirect_register(struct svga_shader_emitter_v10 * emit,unsigned addr_reg_index)1226 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
1227 unsigned addr_reg_index)
1228 {
1229 unsigned tmp_reg_index;
1230 VGPU10OperandToken0 operand0;
1231
1232 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
1233
1234 tmp_reg_index = emit->address_reg_index[addr_reg_index];
1235
1236 /* operand0 is a simple temporary register, selecting one component */
1237 operand0.value = 0;
1238 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
1239 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1240 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1241 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1242 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1243 operand0.swizzleX = 0;
1244 operand0.swizzleY = 1;
1245 operand0.swizzleZ = 2;
1246 operand0.swizzleW = 3;
1247
1248 emit_dword(emit, operand0.value);
1249 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
1250 }
1251
1252
1253 /**
1254 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1255 * \param emit the emitter context
1256 * \param reg the TGSI dst register to translate
1257 */
1258 static void
emit_dst_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * reg)1259 emit_dst_register(struct svga_shader_emitter_v10 *emit,
1260 const struct tgsi_full_dst_register *reg)
1261 {
1262 enum tgsi_file_type file = reg->Register.File;
1263 unsigned index = reg->Register.Index;
1264 const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
1265 const unsigned sem_index = emit->info.output_semantic_index[index];
1266 unsigned writemask = reg->Register.WriteMask;
1267 const bool indirect = reg->Register.Indirect;
1268 unsigned tempArrayId = get_temp_array_id(emit, file, index);
1269 bool index2d = reg->Register.Dimension || tempArrayId > 0;
1270 VGPU10OperandToken0 operand0;
1271
1272 if (file == TGSI_FILE_TEMPORARY) {
1273 emit->temp_map[index].initialized = true;
1274 }
1275
1276 if (file == TGSI_FILE_OUTPUT) {
1277 if (emit->unit == PIPE_SHADER_VERTEX ||
1278 emit->unit == PIPE_SHADER_GEOMETRY ||
1279 emit->unit == PIPE_SHADER_TESS_EVAL) {
1280 if (index == emit->vposition.out_index &&
1281 emit->vposition.tmp_index != INVALID_INDEX) {
1282 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the
1283 * vertex position result in a temporary so that we can modify
1284 * it in the post_helper() code.
1285 */
1286 file = TGSI_FILE_TEMPORARY;
1287 index = emit->vposition.tmp_index;
1288 }
1289 else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1290 emit->clip_dist_tmp_index != INVALID_INDEX) {
1291 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1292 * We store the clip distance in a temporary first, then
1293 * we'll copy it to the shadow copy and to CLIPDIST with the
1294 * enabled planes mask in emit_clip_distance_instructions().
1295 */
1296 file = TGSI_FILE_TEMPORARY;
1297 index = emit->clip_dist_tmp_index + sem_index;
1298 }
1299 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1300 emit->clip_vertex_tmp_index != INVALID_INDEX) {
1301 /* replace the CLIPVERTEX output register with a temporary */
1302 assert(emit->clip_mode == CLIP_VERTEX);
1303 assert(sem_index == 0);
1304 file = TGSI_FILE_TEMPORARY;
1305 index = emit->clip_vertex_tmp_index;
1306 }
1307 else if (sem_name == TGSI_SEMANTIC_COLOR &&
1308 emit->key.clamp_vertex_color) {
1309
1310 /* set the saturate modifier of the instruction
1311 * to clamp the vertex color.
1312 */
1313 VGPU10OpcodeToken0 *token =
1314 (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
1315 token->saturate = true;
1316 }
1317 else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1318 emit->gs.viewport_index_out_index != INVALID_INDEX) {
1319 file = TGSI_FILE_TEMPORARY;
1320 index = emit->gs.viewport_index_tmp_index;
1321 }
1322 }
1323 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
1324 if (sem_name == TGSI_SEMANTIC_POSITION) {
1325 /* Fragment depth output register */
1326 operand0.value = 0;
1327 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
1328 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1329 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1330 emit_dword(emit, operand0.value);
1331 return;
1332 }
1333 else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
1334 /* Fragment sample mask output */
1335 operand0.value = 0;
1336 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
1337 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1338 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1339 emit_dword(emit, operand0.value);
1340 return;
1341 }
1342 else if (index == emit->fs.color_out_index[0] &&
1343 emit->fs.color_tmp_index != INVALID_INDEX) {
1344 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the
1345 * fragment color result in a temporary so that we can read it
1346 * it in the post_helper() code.
1347 */
1348 file = TGSI_FILE_TEMPORARY;
1349 index = emit->fs.color_tmp_index;
1350 }
1351 else {
1352 /* Typically, for fragment shaders, the output register index
1353 * matches the color semantic index. But not when we write to
1354 * the fragment depth register. In that case, OUT[0] will be
1355 * fragdepth and OUT[1] will be the 0th color output. We need
1356 * to use the semantic index for color outputs.
1357 */
1358 assert(sem_name == TGSI_SEMANTIC_COLOR);
1359 index = emit->info.output_semantic_index[index];
1360
1361 emit->num_output_writes++;
1362 }
1363 }
1364 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1365 if (index == emit->tcs.inner.tgsi_index) {
1366 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1367 * in temporary for now so that will be store into appropriate
1368 * registers in post_helper() in patch constant phase.
1369 */
1370 if (emit->tcs.control_point_phase) {
1371 /* Discard writing into tessfactor in control point phase */
1372 emit->discard_instruction = true;
1373 }
1374 else {
1375 file = TGSI_FILE_TEMPORARY;
1376 index = emit->tcs.inner.temp_index;
1377 }
1378 }
1379 else if (index == emit->tcs.outer.tgsi_index) {
1380 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1381 * in temporary for now so that will be store into appropriate
1382 * registers in post_helper().
1383 */
1384 if (emit->tcs.control_point_phase) {
1385 /* Discard writing into tessfactor in control point phase */
1386 emit->discard_instruction = true;
1387 }
1388 else {
1389 file = TGSI_FILE_TEMPORARY;
1390 index = emit->tcs.outer.temp_index;
1391 }
1392 }
1393 else if (index >= emit->tcs.patch_generic_out_index &&
1394 index < (emit->tcs.patch_generic_out_index +
1395 emit->tcs.patch_generic_out_count)) {
1396 if (emit->tcs.control_point_phase) {
1397 /* Discard writing into generic patch constant outputs in
1398 control point phase */
1399 emit->discard_instruction = true;
1400 }
1401 else {
1402 if (emit->reemit_instruction) {
1403 /* Store results of reemitted instruction in temporary register. */
1404 file = TGSI_FILE_TEMPORARY;
1405 index = emit->tcs.patch_generic_tmp_index +
1406 (index - emit->tcs.patch_generic_out_index);
1407 /**
1408 * Temporaries for patch constant data can be done
1409 * as indexable temporaries.
1410 */
1411 tempArrayId = get_temp_array_id(emit, file, index);
1412 index2d = tempArrayId > 0;
1413
1414 emit->reemit_instruction = false;
1415 }
1416 else {
1417 /* If per-patch outputs is been read in shader, we
1418 * reemit instruction and store results in temporaries in
1419 * patch constant phase. */
1420 if (emit->info.reads_perpatch_outputs) {
1421 emit->reemit_instruction = true;
1422 }
1423 }
1424 }
1425 }
1426 else if (reg->Register.Dimension) {
1427 /* Only control point outputs are declared 2D in tgsi */
1428 if (emit->tcs.control_point_phase) {
1429 if (emit->reemit_instruction) {
1430 /* Store results of reemitted instruction in temporary register. */
1431 index2d = false;
1432 file = TGSI_FILE_TEMPORARY;
1433 index = emit->tcs.control_point_tmp_index +
1434 (index - emit->tcs.control_point_out_index);
1435 emit->reemit_instruction = false;
1436 }
1437 else {
1438 /* The mapped control point outputs are 1-D */
1439 index2d = false;
1440 if (emit->info.reads_pervertex_outputs) {
1441 /* If per-vertex outputs is been read in shader, we
1442 * reemit instruction and store results in temporaries
1443 * control point phase. */
1444 emit->reemit_instruction = true;
1445 }
1446 }
1447
1448 if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1449 emit->clip_dist_tmp_index != INVALID_INDEX) {
1450 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1451 * We store the clip distance in a temporary first, then
1452 * we'll copy it to the shadow copy and to CLIPDIST with the
1453 * enabled planes mask in emit_clip_distance_instructions().
1454 */
1455 file = TGSI_FILE_TEMPORARY;
1456 index = emit->clip_dist_tmp_index + sem_index;
1457 }
1458 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1459 emit->clip_vertex_tmp_index != INVALID_INDEX) {
1460 /* replace the CLIPVERTEX output register with a temporary */
1461 assert(emit->clip_mode == CLIP_VERTEX);
1462 assert(sem_index == 0);
1463 file = TGSI_FILE_TEMPORARY;
1464 index = emit->clip_vertex_tmp_index;
1465 }
1466 }
1467 else {
1468 /* Discard writing into control point outputs in
1469 patch constant phase */
1470 emit->discard_instruction = true;
1471 }
1472 }
1473 }
1474 }
1475
1476 /* init operand tokens to all zero */
1477 operand0.value = 0;
1478
1479 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1480
1481 /* the operand has a writemask */
1482 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1483
1484 /* Which of the four dest components to write to. Note that we can use a
1485 * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1486 */
1487 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
1488 operand0.mask = writemask;
1489
1490 /* translate TGSI register file type to VGPU10 operand type */
1491 operand0.operandType = translate_register_file(file, tempArrayId > 0);
1492
1493 check_register_index(emit, operand0.operandType, index);
1494
1495 operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1496 index2d, false);
1497
1498 /* Emit tokens */
1499 emit_dword(emit, operand0.value);
1500 if (tempArrayId > 0) {
1501 emit_dword(emit, tempArrayId);
1502 }
1503
1504 emit_dword(emit, remap_temp_index(emit, file, index));
1505
1506 if (indirect) {
1507 emit_indirect_register(emit, reg->Indirect.Index);
1508 }
1509 }
1510
1511
1512 /**
1513 * Check if temporary register needs to be initialize when
1514 * shader is not using indirect addressing for temporary and uninitialized
1515 * temporary is not used in loop. In these two scenarios, we cannot
1516 * determine if temporary is initialized or not.
1517 */
1518 static bool
need_temp_reg_initialization(struct svga_shader_emitter_v10 * emit,unsigned index)1519 need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
1520 unsigned index)
1521 {
1522 if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY))
1523 && emit->current_loop_depth == 0) {
1524 if (!emit->temp_map[index].initialized &&
1525 emit->temp_map[index].index < emit->num_shader_temps) {
1526 return true;
1527 }
1528 }
1529
1530 return false;
1531 }
1532
1533
1534 /**
1535 * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1536 * In quite a few cases, we do register substitution. For example, if
1537 * the TGSI register is the front/back-face register, we replace that with
1538 * a temp register containing a value we computed earlier.
1539 */
1540 static void
emit_src_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)1541 emit_src_register(struct svga_shader_emitter_v10 *emit,
1542 const struct tgsi_full_src_register *reg)
1543 {
1544 enum tgsi_file_type file = reg->Register.File;
1545 unsigned index = reg->Register.Index;
1546 bool indirect = reg->Register.Indirect;
1547 unsigned tempArrayId = get_temp_array_id(emit, file, index);
1548 bool index2d = (reg->Register.Dimension ||
1549 tempArrayId > 0 ||
1550 file == TGSI_FILE_CONSTANT);
1551 unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1552 bool indirect2d = reg->Dimension.Indirect;
1553 unsigned swizzleX = reg->Register.SwizzleX;
1554 unsigned swizzleY = reg->Register.SwizzleY;
1555 unsigned swizzleZ = reg->Register.SwizzleZ;
1556 unsigned swizzleW = reg->Register.SwizzleW;
1557 const bool absolute = reg->Register.Absolute;
1558 const bool negate = reg->Register.Negate;
1559 VGPU10OperandToken0 operand0;
1560 VGPU10OperandToken1 operand1;
1561
1562 operand0.value = operand1.value = 0;
1563
1564 if (emit->unit == PIPE_SHADER_FRAGMENT){
1565 if (file == TGSI_FILE_INPUT) {
1566 if (index == emit->fs.face_input_index) {
1567 /* Replace INPUT[FACE] with TEMP[FACE] */
1568 file = TGSI_FILE_TEMPORARY;
1569 index = emit->fs.face_tmp_index;
1570 }
1571 else if (index == emit->fs.fragcoord_input_index) {
1572 /* Replace INPUT[POSITION] with TEMP[POSITION] */
1573 file = TGSI_FILE_TEMPORARY;
1574 index = emit->fs.fragcoord_tmp_index;
1575 }
1576 else if (index == emit->fs.layer_input_index) {
1577 /* Replace INPUT[LAYER] with zero.x */
1578 file = TGSI_FILE_IMMEDIATE;
1579 index = emit->fs.layer_imm_index;
1580 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1581 }
1582 else {
1583 /* We remap fragment shader inputs to that FS input indexes
1584 * match up with VS/GS output indexes.
1585 */
1586 index = emit->linkage.input_map[index];
1587 }
1588 }
1589 else if (file == TGSI_FILE_SYSTEM_VALUE) {
1590 if (index == emit->fs.sample_pos_sys_index) {
1591 assert(emit->version >= 41);
1592 /* Current sample position is in a temp register */
1593 file = TGSI_FILE_TEMPORARY;
1594 index = emit->fs.sample_pos_tmp_index;
1595 }
1596 else if (index == emit->fs.sample_mask_in_sys_index) {
1597 /* Emitted as vCoverage0.x */
1598 /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1599 * elements where s is the maximum number of color samples supported
1600 * by the implementation.
1601 */
1602 operand0.value = 0;
1603 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
1604 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1605 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1606 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1607 emit_dword(emit, operand0.value);
1608 return;
1609 }
1610 else {
1611 /* Map the TGSI system value to a VGPU10 input register */
1612 assert(index < ARRAY_SIZE(emit->system_value_indexes));
1613 file = TGSI_FILE_INPUT;
1614 index = emit->system_value_indexes[index];
1615 }
1616 }
1617 }
1618 else if (emit->unit == PIPE_SHADER_GEOMETRY) {
1619 if (file == TGSI_FILE_INPUT) {
1620 if (index == emit->gs.prim_id_index) {
1621 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1622 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1623 }
1624 index = emit->linkage.input_map[index];
1625 }
1626 else if (file == TGSI_FILE_SYSTEM_VALUE &&
1627 index == emit->gs.invocation_id_sys_index) {
1628 /* Emitted as vGSInstanceID0.x */
1629 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1630 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
1631 index = 0;
1632 }
1633 }
1634 else if (emit->unit == PIPE_SHADER_VERTEX) {
1635 if (file == TGSI_FILE_INPUT) {
1636 /* if input is adjusted... */
1637 if ((emit->key.vs.adjust_attrib_w_1 |
1638 emit->key.vs.adjust_attrib_itof |
1639 emit->key.vs.adjust_attrib_utof |
1640 emit->key.vs.attrib_is_bgra |
1641 emit->key.vs.attrib_puint_to_snorm |
1642 emit->key.vs.attrib_puint_to_uscaled |
1643 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1644 file = TGSI_FILE_TEMPORARY;
1645 index = emit->vs.adjusted_input[index];
1646 }
1647 }
1648 else if (file == TGSI_FILE_SYSTEM_VALUE) {
1649 if (index == emit->vs.vertex_id_sys_index &&
1650 emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
1651 file = TGSI_FILE_TEMPORARY;
1652 index = emit->vs.vertex_id_tmp_index;
1653 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1654 }
1655 else {
1656 /* Map the TGSI system value to a VGPU10 input register */
1657 assert(index < ARRAY_SIZE(emit->system_value_indexes));
1658 file = TGSI_FILE_INPUT;
1659 index = emit->system_value_indexes[index];
1660 }
1661 }
1662 }
1663 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1664
1665 if (file == TGSI_FILE_SYSTEM_VALUE) {
1666 if (index == emit->tcs.vertices_per_patch_index) {
1667 /**
1668 * if source register is the system value for vertices_per_patch,
1669 * replace it with the immediate.
1670 */
1671 file = TGSI_FILE_IMMEDIATE;
1672 index = emit->tcs.imm_index;
1673 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1674 }
1675 else if (index == emit->tcs.invocation_id_sys_index) {
1676 if (emit->tcs.control_point_phase) {
1677 /**
1678 * Emitted as vOutputControlPointID.x
1679 */
1680 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1681 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
1682 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1683 operand0.mask = 0;
1684 emit_dword(emit, operand0.value);
1685 return;
1686 }
1687 else {
1688 /* There is no control point ID input declaration in
1689 * the patch constant phase in hull shader.
1690 * Since for now we are emitting all instructions in
1691 * the patch constant phase, we are replacing the
1692 * control point ID reference with the immediate 0.
1693 */
1694 file = TGSI_FILE_IMMEDIATE;
1695 index = emit->tcs.imm_index;
1696 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
1697 }
1698 }
1699 else if (index == emit->tcs.prim_id_index) {
1700 /**
1701 * Emitted as vPrim.x
1702 */
1703 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1704 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1705 index = 0;
1706 }
1707 }
1708 else if (file == TGSI_FILE_INPUT) {
1709 index = emit->linkage.input_map[index];
1710 if (!emit->tcs.control_point_phase) {
1711 /* Emitted as vicp */
1712 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1713 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1714 assert(reg->Register.Dimension);
1715 }
1716 }
1717 else if (file == TGSI_FILE_OUTPUT) {
1718 if ((index >= emit->tcs.patch_generic_out_index &&
1719 index < (emit->tcs.patch_generic_out_index +
1720 emit->tcs.patch_generic_out_count)) ||
1721 index == emit->tcs.inner.tgsi_index ||
1722 index == emit->tcs.outer.tgsi_index) {
1723 if (emit->tcs.control_point_phase) {
1724 emit->discard_instruction = true;
1725 }
1726 else {
1727 /* Device doesn't allow reading from output so
1728 * use corresponding temporary register as source */
1729 file = TGSI_FILE_TEMPORARY;
1730 if (index == emit->tcs.inner.tgsi_index) {
1731 index = emit->tcs.inner.temp_index;
1732 }
1733 else if (index == emit->tcs.outer.tgsi_index) {
1734 index = emit->tcs.outer.temp_index;
1735 }
1736 else {
1737 index = emit->tcs.patch_generic_tmp_index +
1738 (index - emit->tcs.patch_generic_out_index);
1739 }
1740
1741 /**
1742 * Temporaries for patch constant data can be done
1743 * as indexable temporaries.
1744 */
1745 tempArrayId = get_temp_array_id(emit, file, index);
1746 index2d = tempArrayId > 0;
1747 index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1748 }
1749 }
1750 else if (index2d) {
1751 if (emit->tcs.control_point_phase) {
1752 /* Device doesn't allow reading from output so
1753 * use corresponding temporary register as source */
1754 file = TGSI_FILE_TEMPORARY;
1755 index2d = false;
1756 index = emit->tcs.control_point_tmp_index +
1757 (index - emit->tcs.control_point_out_index);
1758 }
1759 else {
1760 emit->discard_instruction = true;
1761 }
1762 }
1763 }
1764 }
1765 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
1766 if (file == TGSI_FILE_SYSTEM_VALUE) {
1767 if (index == emit->tes.tesscoord_sys_index) {
1768 /**
1769 * Emitted as vDomain
1770 */
1771 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1772 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
1773 index = 0;
1774
1775 /* Make sure swizzles are of those components allowed according
1776 * to the tessellator domain.
1777 */
1778 swizzleX = MIN2(swizzleX, emit->tes.swizzle_max);
1779 swizzleY = MIN2(swizzleY, emit->tes.swizzle_max);
1780 swizzleZ = MIN2(swizzleZ, emit->tes.swizzle_max);
1781 swizzleW = MIN2(swizzleW, emit->tes.swizzle_max);
1782 }
1783 else if (index == emit->tes.inner.tgsi_index) {
1784 file = TGSI_FILE_TEMPORARY;
1785 index = emit->tes.inner.temp_index;
1786 }
1787 else if (index == emit->tes.outer.tgsi_index) {
1788 file = TGSI_FILE_TEMPORARY;
1789 index = emit->tes.outer.temp_index;
1790 }
1791 else if (index == emit->tes.prim_id_index) {
1792 /**
1793 * Emitted as vPrim.x
1794 */
1795 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1796 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1797 index = 0;
1798 }
1799
1800 }
1801 else if (file == TGSI_FILE_INPUT) {
1802 if (index2d) {
1803 /* 2D input is emitted as vcp (input control point). */
1804 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1805 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1806
1807 /* index specifies the element index and is remapped
1808 * to align with the tcs output index.
1809 */
1810 index = emit->linkage.input_map[index];
1811
1812 assert(index2 < emit->key.tes.vertices_per_patch);
1813 }
1814 else {
1815 if (index < emit->key.tes.tessfactor_index)
1816 /* index specifies the generic patch index.
1817 * Remapped to match up with the tcs output index.
1818 */
1819 index = emit->linkage.input_map[index];
1820
1821 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
1822 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1823 }
1824 }
1825 }
1826 else if (emit->unit == PIPE_SHADER_COMPUTE) {
1827 if (file == TGSI_FILE_SYSTEM_VALUE) {
1828 if (index == emit->cs.thread_id_index) {
1829 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1830 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP;
1831 index = 0;
1832 } else if (index == emit->cs.block_id_index) {
1833 operand0.value = 0;
1834 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1835 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID;
1836 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1837 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1838 operand0.swizzleX = swizzleX;
1839 operand0.swizzleY = swizzleY;
1840 operand0.swizzleZ = swizzleZ;
1841 operand0.swizzleW = swizzleW;
1842 emit_dword(emit, operand0.value);
1843 return;
1844 } else if (index == emit->cs.grid_size.tgsi_index) {
1845 file = TGSI_FILE_IMMEDIATE;
1846 index = emit->cs.grid_size.imm_index;
1847 }
1848 }
1849 }
1850
1851 if (file == TGSI_FILE_ADDRESS) {
1852 index = emit->address_reg_index[index];
1853 file = TGSI_FILE_TEMPORARY;
1854 }
1855
1856 if (file == TGSI_FILE_CONSTANT) {
1857 /**
1858 * If this constant buffer is to be bound as srv raw buffer,
1859 * then we have to load the constant to a temp first before
1860 * it can be used as a source in the instruction.
1861 * This is accomplished in two passes. The first pass is to
1862 * identify if there is any constbuf to rawbuf translation.
1863 * If there isn't, emit the instruction as usual.
1864 * If there is, then we save the constant buffer reference info,
1865 * and then instead of emitting the instruction at the end
1866 * of the instruction, it will trigger a second pass of parsing
1867 * this instruction. Before it starts the parsing, it will
1868 * load the referenced raw buffer elements to temporaries.
1869 * Then it will emit the instruction that replaces the
1870 * constant buffer replaces with the corresponding temporaries.
1871 */
1872 if (emit->raw_bufs & (1 << index2)) {
1873 if (emit->reemit_rawbuf_instruction != REEMIT_IN_PROGRESS) {
1874 unsigned tmpIdx = emit->raw_buf_cur_tmp_index;
1875
1876 emit->raw_buf_tmp[tmpIdx].buffer_index = index2;
1877
1878 /* Save whether the element index is indirect indexing */
1879 emit->raw_buf_tmp[tmpIdx].indirect = indirect;
1880
1881 /* If it is indirect index, save the temporary
1882 * address index, otherwise, save the immediate index.
1883 */
1884 if (indirect) {
1885 emit->raw_buf_tmp[tmpIdx].element_index =
1886 emit->address_reg_index[reg->Indirect.Index];
1887 emit->raw_buf_tmp[tmpIdx].element_rel =
1888 reg->Register.Index;
1889 }
1890 else {
1891 emit->raw_buf_tmp[tmpIdx].element_index = index;
1892 emit->raw_buf_tmp[tmpIdx].element_rel = 0;
1893 }
1894
1895 emit->raw_buf_cur_tmp_index++;
1896 emit->reemit_rawbuf_instruction = REEMIT_TRUE;
1897 emit->discard_instruction = true;
1898 emit->reemit_tgsi_instruction = true;
1899 }
1900 else {
1901 /* In the reemitting process, replace the constant buffer
1902 * reference with temporary.
1903 */
1904 file = TGSI_FILE_TEMPORARY;
1905 index = emit->raw_buf_cur_tmp_index + emit->raw_buf_tmp_index;
1906 index2d = false;
1907 indirect = false;
1908 emit->raw_buf_cur_tmp_index++;
1909 }
1910 }
1911 }
1912
1913 if (file == TGSI_FILE_TEMPORARY) {
1914 if (need_temp_reg_initialization(emit, index)) {
1915 emit->initialize_temp_index = index;
1916 emit->discard_instruction = true;
1917 }
1918 }
1919
1920 if (operand0.value == 0) {
1921 /* if operand0 was not set above for a special case, do the general
1922 * case now.
1923 */
1924 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1925 operand0.operandType = translate_register_file(file, tempArrayId > 0);
1926 }
1927 operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1928 index2d, indirect2d);
1929
1930 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1931 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1932 /* there's no swizzle for in-line immediates */
1933 if (swizzleX == swizzleY &&
1934 swizzleX == swizzleZ &&
1935 swizzleX == swizzleW) {
1936 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1937 }
1938 else {
1939 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1940 }
1941
1942 operand0.swizzleX = swizzleX;
1943 operand0.swizzleY = swizzleY;
1944 operand0.swizzleZ = swizzleZ;
1945 operand0.swizzleW = swizzleW;
1946
1947 if (absolute || negate) {
1948 operand0.extended = 1;
1949 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1950 if (absolute && !negate)
1951 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1952 if (!absolute && negate)
1953 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1954 if (absolute && negate)
1955 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1956 }
1957 }
1958
1959 check_register_index(emit, operand0.operandType, index);
1960
1961 /* Emit the operand tokens */
1962 emit_dword(emit, operand0.value);
1963 if (operand0.extended)
1964 emit_dword(emit, operand1.value);
1965
1966 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1967 /* Emit the four float/int in-line immediate values */
1968 unsigned *c;
1969 assert(index < ARRAY_SIZE(emit->immediates));
1970 assert(file == TGSI_FILE_IMMEDIATE);
1971 assert(swizzleX < 4);
1972 assert(swizzleY < 4);
1973 assert(swizzleZ < 4);
1974 assert(swizzleW < 4);
1975 c = (unsigned *) emit->immediates[index];
1976 emit_dword(emit, c[swizzleX]);
1977 emit_dword(emit, c[swizzleY]);
1978 emit_dword(emit, c[swizzleZ]);
1979 emit_dword(emit, c[swizzleW]);
1980 }
1981 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1982 /* Emit the register index(es) */
1983 if (index2d) {
1984 emit_dword(emit, index2);
1985
1986 if (indirect2d) {
1987 emit_indirect_register(emit, reg->DimIndirect.Index);
1988 }
1989 }
1990
1991 emit_dword(emit, remap_temp_index(emit, file, index));
1992
1993 if (indirect) {
1994 assert(operand0.operandType != VGPU10_OPERAND_TYPE_TEMP);
1995 emit_indirect_register(emit, reg->Indirect.Index);
1996 }
1997 }
1998 }
1999
2000
2001 /**
2002 * Emit a resource operand (for use with a SAMPLE instruction).
2003 */
2004 static void
emit_resource_register(struct svga_shader_emitter_v10 * emit,unsigned resource_number)2005 emit_resource_register(struct svga_shader_emitter_v10 *emit,
2006 unsigned resource_number)
2007 {
2008 VGPU10OperandToken0 operand0;
2009
2010 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
2011
2012 /* init */
2013 operand0.value = 0;
2014
2015 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
2016 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2017 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2018 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2019 operand0.swizzleX = VGPU10_COMPONENT_X;
2020 operand0.swizzleY = VGPU10_COMPONENT_Y;
2021 operand0.swizzleZ = VGPU10_COMPONENT_Z;
2022 operand0.swizzleW = VGPU10_COMPONENT_W;
2023
2024 emit_dword(emit, operand0.value);
2025 emit_dword(emit, resource_number);
2026 }
2027
2028
2029 /**
2030 * Emit a sampler operand (for use with a SAMPLE instruction).
2031 */
2032 static void
emit_sampler_register(struct svga_shader_emitter_v10 * emit,unsigned unit)2033 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
2034 unsigned unit)
2035 {
2036 VGPU10OperandToken0 operand0;
2037 unsigned sampler_number;
2038
2039 sampler_number = emit->key.tex[unit].sampler_index;
2040
2041 if ((emit->shadow_compare_units & (1 << unit)) && emit->use_sampler_state_mapping)
2042 sampler_number++;
2043
2044 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
2045
2046 /* init */
2047 operand0.value = 0;
2048
2049 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
2050 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2051
2052 emit_dword(emit, operand0.value);
2053 emit_dword(emit, sampler_number);
2054 }
2055
2056
2057 /**
2058 * Emit an operand which reads the IS_FRONT_FACING register.
2059 */
2060 static void
emit_face_register(struct svga_shader_emitter_v10 * emit)2061 emit_face_register(struct svga_shader_emitter_v10 *emit)
2062 {
2063 VGPU10OperandToken0 operand0;
2064 unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
2065
2066 /* init */
2067 operand0.value = 0;
2068
2069 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
2070 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2071 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
2072 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2073
2074 operand0.swizzleX = VGPU10_COMPONENT_X;
2075 operand0.swizzleY = VGPU10_COMPONENT_X;
2076 operand0.swizzleZ = VGPU10_COMPONENT_X;
2077 operand0.swizzleW = VGPU10_COMPONENT_X;
2078
2079 emit_dword(emit, operand0.value);
2080 emit_dword(emit, index);
2081 }
2082
2083
2084 /**
2085 * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
2086 * instruction.
2087 */
2088 static void
emit_rasterizer_register(struct svga_shader_emitter_v10 * emit)2089 emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
2090 {
2091 VGPU10OperandToken0 operand0;
2092
2093 /* init */
2094 operand0.value = 0;
2095
2096 /* No register index for rasterizer index (there's only one) */
2097 operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
2098 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
2099 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2100 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2101 operand0.swizzleX = VGPU10_COMPONENT_X;
2102 operand0.swizzleY = VGPU10_COMPONENT_Y;
2103 operand0.swizzleZ = VGPU10_COMPONENT_Z;
2104 operand0.swizzleW = VGPU10_COMPONENT_W;
2105
2106 emit_dword(emit, operand0.value);
2107 }
2108
2109
2110 /**
2111 * Emit tokens for the "stream" register used by the
2112 * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
2113 */
2114 static void
emit_stream_register(struct svga_shader_emitter_v10 * emit,unsigned index)2115 emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
2116 {
2117 VGPU10OperandToken0 operand0;
2118
2119 /* init */
2120 operand0.value = 0;
2121
2122 /* No register index for rasterizer index (there's only one) */
2123 operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
2124 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2125 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
2126
2127 emit_dword(emit, operand0.value);
2128 emit_dword(emit, index);
2129 }
2130
2131
2132 /**
2133 * Emit the token for a VGPU10 opcode, with precise parameter.
2134 * \param saturate clamp result to [0,1]?
2135 */
2136 static void
emit_opcode_precise(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,bool saturate,bool precise)2137 emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
2138 unsigned vgpu10_opcode, bool saturate, bool precise)
2139 {
2140 VGPU10OpcodeToken0 token0;
2141
2142 token0.value = 0; /* init all fields to zero */
2143 token0.opcodeType = vgpu10_opcode;
2144 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2145 token0.saturate = saturate;
2146
2147 /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
2148 * 'invariant' declarations. Only set preciseValues=1 if we have SM5.
2149 */
2150 token0.preciseValues = precise && emit->version >= 50;
2151
2152 emit_dword(emit, token0.value);
2153
2154 emit->uses_precise_qualifier |= token0.preciseValues;
2155 }
2156
2157
2158 /**
2159 * Emit the token for a VGPU10 opcode.
2160 * \param saturate clamp result to [0,1]?
2161 */
2162 static void
emit_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,bool saturate)2163 emit_opcode(struct svga_shader_emitter_v10 *emit,
2164 unsigned vgpu10_opcode, bool saturate)
2165 {
2166 emit_opcode_precise(emit, vgpu10_opcode, saturate, false);
2167 }
2168
2169
2170 /**
2171 * Emit the token for a VGPU10 resinfo instruction.
2172 * \param modifier return type modifier, _uint or _rcpFloat.
2173 * TODO: We may want to remove this parameter if it will
2174 * only ever be used as _uint.
2175 */
2176 static void
emit_opcode_resinfo(struct svga_shader_emitter_v10 * emit,VGPU10_RESINFO_RETURN_TYPE modifier)2177 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
2178 VGPU10_RESINFO_RETURN_TYPE modifier)
2179 {
2180 VGPU10OpcodeToken0 token0;
2181
2182 token0.value = 0; /* init all fields to zero */
2183 token0.opcodeType = VGPU10_OPCODE_RESINFO;
2184 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2185 token0.resinfoReturnType = modifier;
2186
2187 emit_dword(emit, token0.value);
2188 }
2189
2190
2191 /**
2192 * Emit opcode tokens for a texture sample instruction. Texture instructions
2193 * can be rather complicated (texel offsets, etc) so we have this specialized
2194 * function.
2195 */
2196 static void
emit_sample_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,bool saturate,const int offsets[3])2197 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
2198 unsigned vgpu10_opcode, bool saturate,
2199 const int offsets[3])
2200 {
2201 VGPU10OpcodeToken0 token0;
2202 VGPU10OpcodeToken1 token1;
2203
2204 token0.value = 0; /* init all fields to zero */
2205 token0.opcodeType = vgpu10_opcode;
2206 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2207 token0.saturate = saturate;
2208
2209 if (offsets[0] || offsets[1] || offsets[2]) {
2210 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2211 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2212 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2213 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2214 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2215 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2216
2217 token0.extended = 1;
2218 token1.value = 0;
2219 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
2220 token1.offsetU = offsets[0];
2221 token1.offsetV = offsets[1];
2222 token1.offsetW = offsets[2];
2223 }
2224
2225 emit_dword(emit, token0.value);
2226 if (token0.extended) {
2227 emit_dword(emit, token1.value);
2228 }
2229 }
2230
2231
2232 /**
2233 * Emit a DISCARD opcode token.
2234 * If nonzero is set, we'll discard the fragment if the X component is not 0.
2235 * Otherwise, we'll discard the fragment if the X component is 0.
2236 */
2237 static void
emit_discard_opcode(struct svga_shader_emitter_v10 * emit,bool nonzero)2238 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, bool nonzero)
2239 {
2240 VGPU10OpcodeToken0 opcode0;
2241
2242 opcode0.value = 0;
2243 opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
2244 if (nonzero)
2245 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
2246
2247 emit_dword(emit, opcode0.value);
2248 }
2249
2250
2251 /**
2252 * We need to call this before we begin emitting a VGPU10 instruction.
2253 */
2254 static void
begin_emit_instruction(struct svga_shader_emitter_v10 * emit)2255 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
2256 {
2257 assert(emit->inst_start_token == 0);
2258 /* Save location of the instruction's VGPU10OpcodeToken0 token.
2259 * Note, we can't save a pointer because it would become invalid if
2260 * we have to realloc the output buffer.
2261 */
2262 emit->inst_start_token = emit_get_num_tokens(emit);
2263 }
2264
2265
2266 /**
2267 * We need to call this after we emit the last token of a VGPU10 instruction.
2268 * This function patches in the opcode token's instructionLength field.
2269 */
2270 static void
end_emit_instruction(struct svga_shader_emitter_v10 * emit)2271 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
2272 {
2273 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
2274 unsigned inst_length;
2275
2276 assert(emit->inst_start_token > 0);
2277
2278 if (emit->discard_instruction) {
2279 /* Back up the emit->ptr to where this instruction started so
2280 * that we discard the current instruction.
2281 */
2282 emit->ptr = (char *) (tokens + emit->inst_start_token);
2283 }
2284 else {
2285 /* Compute instruction length and patch that into the start of
2286 * the instruction.
2287 */
2288 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
2289
2290 assert(inst_length > 0);
2291
2292 tokens[emit->inst_start_token].instructionLength = inst_length;
2293 }
2294
2295 emit->inst_start_token = 0; /* reset to zero for error checking */
2296 emit->discard_instruction = false;
2297 }
2298
2299
2300 /**
2301 * Return index for a free temporary register.
2302 */
2303 static unsigned
get_temp_index(struct svga_shader_emitter_v10 * emit)2304 get_temp_index(struct svga_shader_emitter_v10 *emit)
2305 {
2306 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
2307 return emit->num_shader_temps + emit->internal_temp_count++;
2308 }
2309
2310
2311 /**
2312 * Release the temporaries which were generated by get_temp_index().
2313 */
2314 static void
free_temp_indexes(struct svga_shader_emitter_v10 * emit)2315 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
2316 {
2317 emit->internal_temp_count = 0;
2318 }
2319
2320
2321 /**
2322 * Create a tgsi_full_src_register.
2323 */
2324 static struct tgsi_full_src_register
make_src_reg(enum tgsi_file_type file,unsigned index)2325 make_src_reg(enum tgsi_file_type file, unsigned index)
2326 {
2327 struct tgsi_full_src_register reg;
2328
2329 memset(®, 0, sizeof(reg));
2330 reg.Register.File = file;
2331 reg.Register.Index = index;
2332 reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2333 reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2334 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2335 reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2336 return reg;
2337 }
2338
2339
2340 /**
2341 * Create a tgsi_full_src_register with a swizzle such that all four
2342 * vector components have the same scalar value.
2343 */
2344 static struct tgsi_full_src_register
make_src_scalar_reg(enum tgsi_file_type file,unsigned index,unsigned component)2345 make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
2346 {
2347 struct tgsi_full_src_register reg;
2348
2349 assert(component >= TGSI_SWIZZLE_X);
2350 assert(component <= TGSI_SWIZZLE_W);
2351
2352 memset(®, 0, sizeof(reg));
2353 reg.Register.File = file;
2354 reg.Register.Index = index;
2355 reg.Register.SwizzleX =
2356 reg.Register.SwizzleY =
2357 reg.Register.SwizzleZ =
2358 reg.Register.SwizzleW = component;
2359 return reg;
2360 }
2361
2362
2363 /**
2364 * Create a tgsi_full_src_register for a temporary.
2365 */
2366 static struct tgsi_full_src_register
make_src_temp_reg(unsigned index)2367 make_src_temp_reg(unsigned index)
2368 {
2369 return make_src_reg(TGSI_FILE_TEMPORARY, index);
2370 }
2371
2372
2373 /**
2374 * Create a tgsi_full_src_register for a constant.
2375 */
2376 static struct tgsi_full_src_register
make_src_const_reg(unsigned index)2377 make_src_const_reg(unsigned index)
2378 {
2379 return make_src_reg(TGSI_FILE_CONSTANT, index);
2380 }
2381
2382
2383 /**
2384 * Create a tgsi_full_src_register for an immediate constant.
2385 */
2386 static struct tgsi_full_src_register
make_src_immediate_reg(unsigned index)2387 make_src_immediate_reg(unsigned index)
2388 {
2389 return make_src_reg(TGSI_FILE_IMMEDIATE, index);
2390 }
2391
2392
2393 /**
2394 * Create a tgsi_full_dst_register.
2395 */
2396 static struct tgsi_full_dst_register
make_dst_reg(enum tgsi_file_type file,unsigned index)2397 make_dst_reg(enum tgsi_file_type file, unsigned index)
2398 {
2399 struct tgsi_full_dst_register reg;
2400
2401 memset(®, 0, sizeof(reg));
2402 reg.Register.File = file;
2403 reg.Register.Index = index;
2404 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
2405 return reg;
2406 }
2407
2408
2409 /**
2410 * Create a tgsi_full_dst_register for a temporary.
2411 */
2412 static struct tgsi_full_dst_register
make_dst_temp_reg(unsigned index)2413 make_dst_temp_reg(unsigned index)
2414 {
2415 return make_dst_reg(TGSI_FILE_TEMPORARY, index);
2416 }
2417
2418
2419 /**
2420 * Create a tgsi_full_dst_register for an output.
2421 */
2422 static struct tgsi_full_dst_register
make_dst_output_reg(unsigned index)2423 make_dst_output_reg(unsigned index)
2424 {
2425 return make_dst_reg(TGSI_FILE_OUTPUT, index);
2426 }
2427
2428
2429 /**
2430 * Create negated tgsi_full_src_register.
2431 */
2432 static struct tgsi_full_src_register
negate_src(const struct tgsi_full_src_register * reg)2433 negate_src(const struct tgsi_full_src_register *reg)
2434 {
2435 struct tgsi_full_src_register neg = *reg;
2436 neg.Register.Negate = !reg->Register.Negate;
2437 return neg;
2438 }
2439
2440 /**
2441 * Create absolute value of a tgsi_full_src_register.
2442 */
2443 static struct tgsi_full_src_register
absolute_src(const struct tgsi_full_src_register * reg)2444 absolute_src(const struct tgsi_full_src_register *reg)
2445 {
2446 struct tgsi_full_src_register absolute = *reg;
2447 absolute.Register.Absolute = 1;
2448 return absolute;
2449 }
2450
2451
2452 /** Return the named swizzle term from the src register */
2453 static inline unsigned
get_swizzle(const struct tgsi_full_src_register * reg,enum tgsi_swizzle term)2454 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
2455 {
2456 switch (term) {
2457 case TGSI_SWIZZLE_X:
2458 return reg->Register.SwizzleX;
2459 case TGSI_SWIZZLE_Y:
2460 return reg->Register.SwizzleY;
2461 case TGSI_SWIZZLE_Z:
2462 return reg->Register.SwizzleZ;
2463 case TGSI_SWIZZLE_W:
2464 return reg->Register.SwizzleW;
2465 default:
2466 assert(!"Bad swizzle");
2467 return TGSI_SWIZZLE_X;
2468 }
2469 }
2470
2471
2472 /**
2473 * Create swizzled tgsi_full_src_register.
2474 */
2475 static struct tgsi_full_src_register
swizzle_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzleX,enum tgsi_swizzle swizzleY,enum tgsi_swizzle swizzleZ,enum tgsi_swizzle swizzleW)2476 swizzle_src(const struct tgsi_full_src_register *reg,
2477 enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
2478 enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
2479 {
2480 struct tgsi_full_src_register swizzled = *reg;
2481 /* Note: we swizzle the current swizzle */
2482 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
2483 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
2484 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
2485 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
2486 return swizzled;
2487 }
2488
2489
2490 /**
2491 * Create swizzled tgsi_full_src_register where all the swizzle
2492 * terms are the same.
2493 */
2494 static struct tgsi_full_src_register
scalar_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzle)2495 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
2496 {
2497 struct tgsi_full_src_register swizzled = *reg;
2498 /* Note: we swizzle the current swizzle */
2499 swizzled.Register.SwizzleX =
2500 swizzled.Register.SwizzleY =
2501 swizzled.Register.SwizzleZ =
2502 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
2503 return swizzled;
2504 }
2505
2506
2507 /**
2508 * Create new tgsi_full_dst_register with writemask.
2509 * \param mask bitmask of TGSI_WRITEMASK_[XYZW]
2510 */
2511 static struct tgsi_full_dst_register
writemask_dst(const struct tgsi_full_dst_register * reg,unsigned mask)2512 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
2513 {
2514 struct tgsi_full_dst_register masked = *reg;
2515 masked.Register.WriteMask = mask;
2516 return masked;
2517 }
2518
2519
2520 /**
2521 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2522 */
2523 static bool
same_swizzle_terms(const struct tgsi_full_src_register * reg)2524 same_swizzle_terms(const struct tgsi_full_src_register *reg)
2525 {
2526 return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
2527 reg->Register.SwizzleY == reg->Register.SwizzleZ &&
2528 reg->Register.SwizzleZ == reg->Register.SwizzleW);
2529 }
2530
2531
2532 /**
2533 * Search the vector for the value 'x' and return its position.
2534 */
2535 static int
find_imm_in_vec4(const union tgsi_immediate_data vec[4],union tgsi_immediate_data x)2536 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
2537 union tgsi_immediate_data x)
2538 {
2539 unsigned i;
2540 for (i = 0; i < 4; i++) {
2541 if (vec[i].Int == x.Int)
2542 return i;
2543 }
2544 return -1;
2545 }
2546
2547
2548 /**
2549 * Helper used by make_immediate_reg(), make_immediate_reg_4().
2550 */
2551 static int
find_immediate(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data x,unsigned startIndex)2552 find_immediate(struct svga_shader_emitter_v10 *emit,
2553 union tgsi_immediate_data x, unsigned startIndex)
2554 {
2555 const unsigned endIndex = emit->num_immediates;
2556 unsigned i;
2557
2558 assert(emit->num_immediates_emitted > 0);
2559
2560 /* Search immediates for x, y, z, w */
2561 for (i = startIndex; i < endIndex; i++) {
2562 if (x.Int == emit->immediates[i][0].Int ||
2563 x.Int == emit->immediates[i][1].Int ||
2564 x.Int == emit->immediates[i][2].Int ||
2565 x.Int == emit->immediates[i][3].Int) {
2566 return i;
2567 }
2568 }
2569 /* immediate not declared yet */
2570 return -1;
2571 }
2572
2573
2574 /**
2575 * As above, but search for a double[2] pair.
2576 */
2577 static int
find_immediate_dbl(struct svga_shader_emitter_v10 * emit,double x,double y)2578 find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
2579 double x, double y)
2580 {
2581 const unsigned endIndex = emit->num_immediates;
2582 unsigned i;
2583
2584 assert(emit->num_immediates_emitted > 0);
2585
2586 /* Search immediates for x, y, z, w */
2587 for (i = 0; i < endIndex; i++) {
2588 if (x == emit->immediates_dbl[i][0] &&
2589 y == emit->immediates_dbl[i][1]) {
2590 return i;
2591 }
2592 }
2593 /* Should never try to use an immediate value that wasn't pre-declared */
2594 assert(!"find_immediate_dbl() failed!");
2595 return -1;
2596 }
2597
2598
2599
2600 /**
2601 * Return a tgsi_full_src_register for an immediate/literal
2602 * union tgsi_immediate_data[4] value.
2603 * Note: the values must have been previously declared/allocated in
2604 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same
2605 * vec4 immediate.
2606 */
2607 static struct tgsi_full_src_register
make_immediate_reg_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2608 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
2609 const union tgsi_immediate_data imm[4])
2610 {
2611 struct tgsi_full_src_register reg;
2612 unsigned i;
2613
2614 for (i = 0; i < emit->num_common_immediates; i++) {
2615 /* search for first component value */
2616 int immpos = find_immediate(emit, imm[0], i);
2617 int x, y, z, w;
2618
2619 assert(immpos >= 0);
2620
2621 /* find remaining components within the immediate vector */
2622 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
2623 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
2624 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
2625 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
2626
2627 if (x >=0 && y >= 0 && z >= 0 && w >= 0) {
2628 /* found them all */
2629 memset(®, 0, sizeof(reg));
2630 reg.Register.File = TGSI_FILE_IMMEDIATE;
2631 reg.Register.Index = immpos;
2632 reg.Register.SwizzleX = x;
2633 reg.Register.SwizzleY = y;
2634 reg.Register.SwizzleZ = z;
2635 reg.Register.SwizzleW = w;
2636 return reg;
2637 }
2638 /* else, keep searching */
2639 }
2640
2641 assert(!"Failed to find immediate register!");
2642
2643 /* Just return IMM[0].xxxx */
2644 memset(®, 0, sizeof(reg));
2645 reg.Register.File = TGSI_FILE_IMMEDIATE;
2646 return reg;
2647 }
2648
2649
2650 /**
2651 * Return a tgsi_full_src_register for an immediate/literal
2652 * union tgsi_immediate_data value of the form {value, value, value, value}.
2653 * \sa make_immediate_reg_4() regarding allowed values.
2654 */
2655 static struct tgsi_full_src_register
make_immediate_reg(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data value)2656 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
2657 union tgsi_immediate_data value)
2658 {
2659 struct tgsi_full_src_register reg;
2660 int immpos = find_immediate(emit, value, 0);
2661
2662 assert(immpos >= 0);
2663
2664 memset(®, 0, sizeof(reg));
2665 reg.Register.File = TGSI_FILE_IMMEDIATE;
2666 reg.Register.Index = immpos;
2667 reg.Register.SwizzleX =
2668 reg.Register.SwizzleY =
2669 reg.Register.SwizzleZ =
2670 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
2671
2672 return reg;
2673 }
2674
2675
2676 /**
2677 * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2678 * \sa make_immediate_reg_4() regarding allowed values.
2679 */
2680 static struct tgsi_full_src_register
make_immediate_reg_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2681 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
2682 float x, float y, float z, float w)
2683 {
2684 union tgsi_immediate_data imm[4];
2685 imm[0].Float = x;
2686 imm[1].Float = y;
2687 imm[2].Float = z;
2688 imm[3].Float = w;
2689 return make_immediate_reg_4(emit, imm);
2690 }
2691
2692
2693 /**
2694 * Return a tgsi_full_src_register for an immediate/literal float value
2695 * of the form {value, value, value, value}.
2696 * \sa make_immediate_reg_4() regarding allowed values.
2697 */
2698 static struct tgsi_full_src_register
make_immediate_reg_float(struct svga_shader_emitter_v10 * emit,float value)2699 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
2700 {
2701 union tgsi_immediate_data imm;
2702 imm.Float = value;
2703 return make_immediate_reg(emit, imm);
2704 }
2705
2706
2707 /**
2708 * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2709 */
2710 static struct tgsi_full_src_register
make_immediate_reg_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2711 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
2712 int x, int y, int z, int w)
2713 {
2714 union tgsi_immediate_data imm[4];
2715 imm[0].Int = x;
2716 imm[1].Int = y;
2717 imm[2].Int = z;
2718 imm[3].Int = w;
2719 return make_immediate_reg_4(emit, imm);
2720 }
2721
2722
2723 /**
2724 * Return a tgsi_full_src_register for an immediate/literal int value
2725 * of the form {value, value, value, value}.
2726 * \sa make_immediate_reg_4() regarding allowed values.
2727 */
2728 static struct tgsi_full_src_register
make_immediate_reg_int(struct svga_shader_emitter_v10 * emit,int value)2729 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
2730 {
2731 union tgsi_immediate_data imm;
2732 imm.Int = value;
2733 return make_immediate_reg(emit, imm);
2734 }
2735
2736
2737 static struct tgsi_full_src_register
make_immediate_reg_double(struct svga_shader_emitter_v10 * emit,double value)2738 make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
2739 {
2740 struct tgsi_full_src_register reg;
2741 int immpos = find_immediate_dbl(emit, value, value);
2742
2743 assert(immpos >= 0);
2744
2745 memset(®, 0, sizeof(reg));
2746 reg.Register.File = TGSI_FILE_IMMEDIATE;
2747 reg.Register.Index = immpos;
2748 reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2749 reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2750 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2751 reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2752
2753 return reg;
2754 }
2755
2756
2757 /**
2758 * Allocate space for a union tgsi_immediate_data[4] immediate.
2759 * \return the index/position of the immediate.
2760 */
2761 static unsigned
alloc_immediate_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2762 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
2763 const union tgsi_immediate_data imm[4])
2764 {
2765 unsigned n = emit->num_immediates++;
2766 assert(n < ARRAY_SIZE(emit->immediates));
2767 emit->immediates[n][0] = imm[0];
2768 emit->immediates[n][1] = imm[1];
2769 emit->immediates[n][2] = imm[2];
2770 emit->immediates[n][3] = imm[3];
2771 return n;
2772 }
2773
2774
2775 /**
2776 * Allocate space for a float[4] immediate.
2777 * \return the index/position of the immediate.
2778 */
2779 static unsigned
alloc_immediate_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2780 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
2781 float x, float y, float z, float w)
2782 {
2783 union tgsi_immediate_data imm[4];
2784 imm[0].Float = x;
2785 imm[1].Float = y;
2786 imm[2].Float = z;
2787 imm[3].Float = w;
2788 return alloc_immediate_4(emit, imm);
2789 }
2790
2791
2792 /**
2793 * Allocate space for an int[4] immediate.
2794 * \return the index/position of the immediate.
2795 */
2796 static unsigned
alloc_immediate_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2797 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
2798 int x, int y, int z, int w)
2799 {
2800 union tgsi_immediate_data imm[4];
2801 imm[0].Int = x;
2802 imm[1].Int = y;
2803 imm[2].Int = z;
2804 imm[3].Int = w;
2805 return alloc_immediate_4(emit, imm);
2806 }
2807
2808
2809 /**
2810 * Add a new immediate after the immediate block has been declared.
2811 * Any new immediates will be appended to the immediate block after the
2812 * shader has been parsed.
2813 * \return the index/position of the immediate.
2814 */
2815 static unsigned
add_immediate_int(struct svga_shader_emitter_v10 * emit,int x)2816 add_immediate_int(struct svga_shader_emitter_v10 *emit, int x)
2817 {
2818 union tgsi_immediate_data imm[4];
2819 imm[0].Int = x;
2820 imm[1].Int = x+1;
2821 imm[2].Int = x+2;
2822 imm[3].Int = x+3;
2823
2824 unsigned immpos = alloc_immediate_4(emit, imm);
2825 emit->num_new_immediates++;
2826
2827 return immpos;
2828 }
2829
2830
2831 static unsigned
alloc_immediate_double2(struct svga_shader_emitter_v10 * emit,double x,double y)2832 alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
2833 double x, double y)
2834 {
2835 unsigned n = emit->num_immediates++;
2836 assert(!emit->num_immediates_emitted);
2837 assert(n < ARRAY_SIZE(emit->immediates));
2838 emit->immediates_dbl[n][0] = x;
2839 emit->immediates_dbl[n][1] = y;
2840 return n;
2841
2842 }
2843
2844
2845 /**
2846 * Allocate a shader input to store a system value.
2847 */
2848 static unsigned
alloc_system_value_index(struct svga_shader_emitter_v10 * emit,unsigned index)2849 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
2850 {
2851 const unsigned n = emit->linkage.input_map_max + 1 + index;
2852 assert(index < ARRAY_SIZE(emit->system_value_indexes));
2853 emit->system_value_indexes[index] = n;
2854 return n;
2855 }
2856
2857
2858 /**
2859 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2860 */
2861 static bool
emit_vgpu10_immediate(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_immediate * imm)2862 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
2863 const struct tgsi_full_immediate *imm)
2864 {
2865 /* We don't actually emit any code here. We just save the
2866 * immediate values and emit them later.
2867 */
2868 alloc_immediate_4(emit, imm->u);
2869 return true;
2870 }
2871
2872
2873 /**
2874 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2875 * containing all the immediate values previously allocated
2876 * with alloc_immediate_4().
2877 */
2878 static bool
emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 * emit)2879 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
2880 {
2881 VGPU10OpcodeToken0 token;
2882
2883 assert(!emit->num_immediates_emitted);
2884
2885 token.value = 0;
2886 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
2887 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
2888
2889 emit->immediates_block_start_token =
2890 (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
2891
2892 /* Note: no begin/end_emit_instruction() calls */
2893 emit_dword(emit, token.value);
2894 emit_dword(emit, 2 + 4 * emit->num_immediates);
2895 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
2896
2897 emit->num_immediates_emitted = emit->num_immediates;
2898
2899 emit->immediates_block_next_token =
2900 (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
2901
2902 return true;
2903 }
2904
2905
2906 /**
2907 * Reemit the immediate constant buffer block to include the new
2908 * immediates that are allocated after the block is declared. Those
2909 * immediates are used as constant indices to constant buffers.
2910 */
2911 static bool
reemit_immediates_block(struct svga_shader_emitter_v10 * emit)2912 reemit_immediates_block(struct svga_shader_emitter_v10 *emit)
2913 {
2914 unsigned num_tokens = emit_get_num_tokens(emit);
2915 unsigned num_new_immediates = emit->num_new_immediates;
2916
2917 /* Reserve room for the new immediates */
2918 if (!reserve(emit, 4 * num_new_immediates))
2919 return false;
2920
2921 /* Move the tokens after the immediates block to make room for the
2922 * new immediates.
2923 */
2924 VGPU10ProgramToken *tokens = (VGPU10ProgramToken *)emit->buf;
2925 char *next = (char *) (tokens + emit->immediates_block_next_token);
2926 char *new_next = (char *) (tokens + emit->immediates_block_next_token +
2927 num_new_immediates * 4);
2928
2929 char *end = emit->ptr;
2930 unsigned len = end - next;
2931 memmove(new_next, next, len);
2932
2933 /* Append the new immediates to the end of the immediates block */
2934 char *start = (char *) (tokens + emit->immediates_block_start_token+1);
2935 unsigned immediates_block_size = *(uint32 *)start;
2936
2937 char *new_immediates = (char *)&emit->immediates[emit->num_immediates_emitted][0];
2938 *(uint32 *)start = immediates_block_size + 4 * num_new_immediates;
2939 memcpy(next, new_immediates, 4 * num_new_immediates * sizeof(uint32));
2940
2941 emit->ptr = (char *) (tokens + num_tokens + 4 * num_new_immediates);
2942
2943 return true;
2944 }
2945
2946
2947
2948 /**
2949 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2950 * interpolation mode.
2951 * \return a VGPU10_INTERPOLATION_x value
2952 */
2953 static unsigned
translate_interpolation(const struct svga_shader_emitter_v10 * emit,enum tgsi_interpolate_mode interp,enum tgsi_interpolate_loc interpolate_loc)2954 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
2955 enum tgsi_interpolate_mode interp,
2956 enum tgsi_interpolate_loc interpolate_loc)
2957 {
2958 if (interp == TGSI_INTERPOLATE_COLOR) {
2959 interp = emit->key.fs.flatshade ?
2960 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2961 }
2962
2963 switch (interp) {
2964 case TGSI_INTERPOLATE_CONSTANT:
2965 return VGPU10_INTERPOLATION_CONSTANT;
2966 case TGSI_INTERPOLATE_LINEAR:
2967 if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2968 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
2969 } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2970 emit->version >= 41) {
2971 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
2972 } else {
2973 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
2974 }
2975 break;
2976 case TGSI_INTERPOLATE_PERSPECTIVE:
2977 if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2978 return VGPU10_INTERPOLATION_LINEAR_CENTROID;
2979 } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2980 emit->version >= 41) {
2981 return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
2982 } else {
2983 return VGPU10_INTERPOLATION_LINEAR;
2984 }
2985 break;
2986 default:
2987 assert(!"Unexpected interpolation mode");
2988 return VGPU10_INTERPOLATION_CONSTANT;
2989 }
2990 }
2991
2992
2993 /**
2994 * Translate a TGSI property to VGPU10.
2995 * Don't emit any instructions yet, only need to gather the primitive property
2996 * information. The output primitive topology might be changed later. The
2997 * final property instructions will be emitted as part of the pre-helper code.
2998 */
2999 static bool
emit_vgpu10_property(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_property * prop)3000 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
3001 const struct tgsi_full_property *prop)
3002 {
3003 static const VGPU10_PRIMITIVE primType[] = {
3004 VGPU10_PRIMITIVE_POINT, /* MESA_PRIM_POINTS */
3005 VGPU10_PRIMITIVE_LINE, /* MESA_PRIM_LINES */
3006 VGPU10_PRIMITIVE_LINE, /* MESA_PRIM_LINE_LOOP */
3007 VGPU10_PRIMITIVE_LINE, /* MESA_PRIM_LINE_STRIP */
3008 VGPU10_PRIMITIVE_TRIANGLE, /* MESA_PRIM_TRIANGLES */
3009 VGPU10_PRIMITIVE_TRIANGLE, /* MESA_PRIM_TRIANGLE_STRIP */
3010 VGPU10_PRIMITIVE_TRIANGLE, /* MESA_PRIM_TRIANGLE_FAN */
3011 VGPU10_PRIMITIVE_UNDEFINED, /* MESA_PRIM_QUADS */
3012 VGPU10_PRIMITIVE_UNDEFINED, /* MESA_PRIM_QUAD_STRIP */
3013 VGPU10_PRIMITIVE_UNDEFINED, /* MESA_PRIM_POLYGON */
3014 VGPU10_PRIMITIVE_LINE_ADJ, /* MESA_PRIM_LINES_ADJACENCY */
3015 VGPU10_PRIMITIVE_LINE_ADJ, /* MESA_PRIM_LINE_STRIP_ADJACENCY */
3016 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* MESA_PRIM_TRIANGLES_ADJACENCY */
3017 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* MESA_PRIM_TRIANGLE_STRIP_ADJACENCY */
3018 };
3019
3020 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
3021 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* MESA_PRIM_POINTS */
3022 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* MESA_PRIM_LINES */
3023 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* MESA_PRIM_LINE_LOOP */
3024 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* MESA_PRIM_LINE_STRIP */
3025 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* MESA_PRIM_TRIANGLES */
3026 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* MESA_PRIM_TRIANGLE_STRIP */
3027 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* MESA_PRIM_TRIANGLE_FAN */
3028 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* MESA_PRIM_QUADS */
3029 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* MESA_PRIM_QUAD_STRIP */
3030 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* MESA_PRIM_POLYGON */
3031 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* MESA_PRIM_LINES_ADJACENCY */
3032 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* MESA_PRIM_LINE_STRIP_ADJACENCY */
3033 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* MESA_PRIM_TRIANGLES_ADJACENCY */
3034 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* MESA_PRIM_TRIANGLE_STRIP_ADJACENCY */
3035 };
3036
3037 static const unsigned inputArraySize[] = {
3038 0, /* VGPU10_PRIMITIVE_UNDEFINED */
3039 1, /* VGPU10_PRIMITIVE_POINT */
3040 2, /* VGPU10_PRIMITIVE_LINE */
3041 3, /* VGPU10_PRIMITIVE_TRIANGLE */
3042 0,
3043 0,
3044 4, /* VGPU10_PRIMITIVE_LINE_ADJ */
3045 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
3046 };
3047
3048 switch (prop->Property.PropertyName) {
3049 case TGSI_PROPERTY_GS_INPUT_PRIM:
3050 assert(prop->u[0].Data < ARRAY_SIZE(primType));
3051 emit->gs.prim_type = primType[prop->u[0].Data];
3052 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
3053 emit->gs.input_size = inputArraySize[emit->gs.prim_type];
3054 break;
3055
3056 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
3057 assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
3058 emit->gs.prim_topology = primTopology[prop->u[0].Data];
3059 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
3060 break;
3061
3062 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
3063 emit->gs.max_out_vertices = prop->u[0].Data;
3064 break;
3065
3066 case TGSI_PROPERTY_GS_INVOCATIONS:
3067 emit->gs.invocations = prop->u[0].Data;
3068 break;
3069
3070 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
3071 case TGSI_PROPERTY_NEXT_SHADER:
3072 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
3073 /* no-op */
3074 break;
3075
3076 case TGSI_PROPERTY_TCS_VERTICES_OUT:
3077 /* This info is already captured in the shader key */
3078 break;
3079
3080 case TGSI_PROPERTY_TES_PRIM_MODE:
3081 emit->tes.prim_mode = prop->u[0].Data;
3082 break;
3083
3084 case TGSI_PROPERTY_TES_SPACING:
3085 emit->tes.spacing = prop->u[0].Data;
3086 break;
3087
3088 case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
3089 emit->tes.vertices_order_cw = prop->u[0].Data;
3090 break;
3091
3092 case TGSI_PROPERTY_TES_POINT_MODE:
3093 emit->tes.point_mode = prop->u[0].Data;
3094 break;
3095
3096 case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
3097 emit->cs.block_width = prop->u[0].Data;
3098 break;
3099
3100 case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
3101 emit->cs.block_height = prop->u[0].Data;
3102 break;
3103
3104 case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
3105 emit->cs.block_depth = prop->u[0].Data;
3106 break;
3107
3108 case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
3109 emit->fs.forceEarlyDepthStencil = true;
3110 break;
3111
3112 default:
3113 debug_printf("Unexpected TGSI property %s\n",
3114 tgsi_property_names[prop->Property.PropertyName]);
3115 }
3116
3117 return true;
3118 }
3119
3120
3121 static void
emit_property_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,unsigned nData,unsigned data)3122 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
3123 VGPU10OpcodeToken0 opcode0, unsigned nData,
3124 unsigned data)
3125 {
3126 begin_emit_instruction(emit);
3127 emit_dword(emit, opcode0.value);
3128 if (nData)
3129 emit_dword(emit, data);
3130 end_emit_instruction(emit);
3131 }
3132
3133
3134 /**
3135 * Emit property instructions
3136 */
3137 static void
emit_property_instructions(struct svga_shader_emitter_v10 * emit)3138 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
3139 {
3140 VGPU10OpcodeToken0 opcode0;
3141
3142 assert(emit->unit == PIPE_SHADER_GEOMETRY);
3143
3144 /* emit input primitive type declaration */
3145 opcode0.value = 0;
3146 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
3147 opcode0.primitive = emit->gs.prim_type;
3148 emit_property_instruction(emit, opcode0, 0, 0);
3149
3150 /* emit max output vertices */
3151 opcode0.value = 0;
3152 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
3153 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
3154
3155 if (emit->version >= 50 && emit->gs.invocations > 0) {
3156 opcode0.value = 0;
3157 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
3158 emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
3159 }
3160 }
3161
3162
3163 /**
3164 * A helper function to declare tessellator domain in a hull shader or
3165 * in the domain shader.
3166 */
3167 static void
emit_tessellator_domain(struct svga_shader_emitter_v10 * emit,enum mesa_prim prim_mode)3168 emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
3169 enum mesa_prim prim_mode)
3170 {
3171 VGPU10OpcodeToken0 opcode0;
3172
3173 opcode0.value = 0;
3174 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
3175 switch (prim_mode) {
3176 case MESA_PRIM_QUADS:
3177 case MESA_PRIM_LINES:
3178 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
3179 break;
3180 case MESA_PRIM_TRIANGLES:
3181 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
3182 break;
3183 default:
3184 debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
3185 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
3186 }
3187 begin_emit_instruction(emit);
3188 emit_dword(emit, opcode0.value);
3189 end_emit_instruction(emit);
3190 }
3191
3192
3193 /**
3194 * Emit domain shader declarations.
3195 */
3196 static void
emit_domain_shader_declarations(struct svga_shader_emitter_v10 * emit)3197 emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
3198 {
3199 VGPU10OpcodeToken0 opcode0;
3200
3201 assert(emit->unit == PIPE_SHADER_TESS_EVAL);
3202
3203 /* Emit the input control point count */
3204 assert(emit->key.tes.vertices_per_patch >= 0 &&
3205 emit->key.tes.vertices_per_patch <= 32);
3206
3207 opcode0.value = 0;
3208 opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3209 opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
3210 begin_emit_instruction(emit);
3211 emit_dword(emit, opcode0.value);
3212 end_emit_instruction(emit);
3213
3214 emit_tessellator_domain(emit, emit->tes.prim_mode);
3215
3216 /* Specify a max for swizzles of the domain point according to the
3217 * tessellator domain type.
3218 */
3219 emit->tes.swizzle_max = emit->tes.prim_mode == MESA_PRIM_TRIANGLES ?
3220 TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y;
3221 }
3222
3223
3224 /**
3225 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
3226 * to implement some instructions. We pre-allocate those values here
3227 * in the immediate constant buffer.
3228 */
3229 static void
alloc_common_immediates(struct svga_shader_emitter_v10 * emit)3230 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
3231 {
3232 unsigned n = 0;
3233
3234 emit->common_immediate_pos[n++] =
3235 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
3236
3237 if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
3238 emit->common_immediate_pos[n++] =
3239 alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
3240 }
3241
3242 emit->common_immediate_pos[n++] =
3243 alloc_immediate_int4(emit, 0, 1, 2, -1);
3244
3245 emit->common_immediate_pos[n++] =
3246 alloc_immediate_int4(emit, 3, 4, 5, 6);
3247
3248 if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
3249 emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
3250 emit->common_immediate_pos[n++] =
3251 alloc_immediate_int4(emit, 31, 0, 0, 0);
3252 }
3253
3254 if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
3255 emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
3256 emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
3257 emit->common_immediate_pos[n++] =
3258 alloc_immediate_int4(emit, 32, 0, 0, 0);
3259 }
3260
3261 if (emit->key.vs.attrib_puint_to_snorm) {
3262 emit->common_immediate_pos[n++] =
3263 alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
3264 }
3265
3266 if (emit->key.vs.attrib_puint_to_uscaled) {
3267 emit->common_immediate_pos[n++] =
3268 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
3269 }
3270
3271 if (emit->key.vs.attrib_puint_to_sscaled) {
3272 emit->common_immediate_pos[n++] =
3273 alloc_immediate_int4(emit, 22, 12, 2, 0);
3274
3275 emit->common_immediate_pos[n++] =
3276 alloc_immediate_int4(emit, 22, 30, 0, 0);
3277 }
3278
3279 if (emit->vposition.num_prescale > 1) {
3280 unsigned i;
3281 for (i = 0; i < emit->vposition.num_prescale; i+=4) {
3282 emit->common_immediate_pos[n++] =
3283 alloc_immediate_int4(emit, i, i+1, i+2, i+3);
3284 }
3285 }
3286
3287 emit->immediates_dbl = (double (*)[2]) emit->immediates;
3288
3289 if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
3290 emit->common_immediate_pos[n++] =
3291 alloc_immediate_double2(emit, -1.0, -1.0);
3292 }
3293
3294 if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0 ||
3295 emit->info.opcode_count[TGSI_OPCODE_DTRUNC] > 0) {
3296 emit->common_immediate_pos[n++] =
3297 alloc_immediate_double2(emit, 0.0, 0.0);
3298 emit->common_immediate_pos[n++] =
3299 alloc_immediate_double2(emit, 1.0, 1.0);
3300 }
3301
3302 if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
3303 emit->common_immediate_pos[n++] =
3304 alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
3305 }
3306
3307 assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3308
3309 unsigned i;
3310
3311 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
3312 if (emit->key.tex[i].texel_bias) {
3313 /* Replace 0.0f if more immediate float value is needed */
3314 emit->common_immediate_pos[n++] =
3315 alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
3316 break;
3317 }
3318 }
3319
3320 /** TODO: allocate immediates for all possible element byte offset?
3321 */
3322 if (emit->raw_bufs) {
3323 unsigned i;
3324 for (i = 7; i < 12; i+=4) {
3325 emit->common_immediate_pos[n++] =
3326 alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
3327 }
3328 }
3329
3330 if (emit->info.indirect_files &
3331 (1 << TGSI_FILE_IMAGE | 1 << TGSI_FILE_BUFFER)) {
3332 unsigned i;
3333 for (i = 7; i < 8; i+=4) {
3334 emit->common_immediate_pos[n++] =
3335 alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
3336 }
3337 }
3338
3339 assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3340 emit->num_common_immediates = n;
3341 }
3342
3343
3344 /**
3345 * Emit hull shader declarations.
3346 */
3347 static void
emit_hull_shader_declarations(struct svga_shader_emitter_v10 * emit)3348 emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
3349 {
3350 VGPU10OpcodeToken0 opcode0;
3351
3352 /* Emit the input control point count */
3353 assert(emit->key.tcs.vertices_per_patch > 0 &&
3354 emit->key.tcs.vertices_per_patch <= 32);
3355
3356 opcode0.value = 0;
3357 opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3358 opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
3359 begin_emit_instruction(emit);
3360 emit_dword(emit, opcode0.value);
3361 end_emit_instruction(emit);
3362
3363 /* Emit the output control point count */
3364 assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
3365
3366 opcode0.value = 0;
3367 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
3368 opcode0.controlPointCount = emit->key.tcs.vertices_out;
3369 begin_emit_instruction(emit);
3370 emit_dword(emit, opcode0.value);
3371 end_emit_instruction(emit);
3372
3373 /* Emit tessellator domain */
3374 emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
3375
3376 /* Emit tessellator output primitive */
3377 opcode0.value = 0;
3378 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
3379 if (emit->key.tcs.point_mode) {
3380 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
3381 }
3382 else if (emit->key.tcs.prim_mode == MESA_PRIM_LINES) {
3383 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
3384 }
3385 else {
3386 assert(emit->key.tcs.prim_mode == MESA_PRIM_QUADS ||
3387 emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES);
3388
3389 if (emit->key.tcs.vertices_order_cw)
3390 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
3391 else
3392 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
3393 }
3394 begin_emit_instruction(emit);
3395 emit_dword(emit, opcode0.value);
3396 end_emit_instruction(emit);
3397
3398 /* Emit tessellator partitioning */
3399 opcode0.value = 0;
3400 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
3401 switch (emit->key.tcs.spacing) {
3402 case PIPE_TESS_SPACING_FRACTIONAL_ODD:
3403 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
3404 break;
3405 case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
3406 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
3407 break;
3408 case PIPE_TESS_SPACING_EQUAL:
3409 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
3410 break;
3411 default:
3412 debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
3413 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
3414 }
3415 begin_emit_instruction(emit);
3416 emit_dword(emit, opcode0.value);
3417 end_emit_instruction(emit);
3418
3419 alloc_common_immediates(emit);
3420
3421 /* Declare constant registers */
3422 emit_constant_declaration(emit);
3423
3424 /* Declare samplers and resources */
3425 emit_sampler_declarations(emit);
3426 emit_resource_declarations(emit);
3427
3428 /* Declare images */
3429 emit_image_declarations(emit);
3430
3431 /* Declare shader buffers */
3432 emit_shader_buf_declarations(emit);
3433
3434 /* Declare atomic buffers */
3435 emit_atomic_buf_declarations(emit);
3436
3437 int nVertices = emit->key.tcs.vertices_per_patch;
3438 emit->tcs.imm_index =
3439 alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
3440
3441 /* Now, emit the constant block containing all the immediates
3442 * declared by shader, as well as the extra ones seen above.
3443 */
3444 emit_vgpu10_immediates_block(emit);
3445
3446 }
3447
3448
3449 /**
3450 * A helper function to determine if control point phase is needed.
3451 * Returns TRUE if there is control point output.
3452 */
3453 static bool
needs_control_point_phase(struct svga_shader_emitter_v10 * emit)3454 needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
3455 {
3456 unsigned i;
3457
3458 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3459
3460 /* If output control point count does not match the input count,
3461 * we need a control point phase to explicitly set the output control
3462 * points.
3463 */
3464 if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
3465 emit->key.tcs.vertices_out)
3466 return true;
3467
3468 for (i = 0; i < emit->info.num_outputs; i++) {
3469 switch (emit->info.output_semantic_name[i]) {
3470 case TGSI_SEMANTIC_PATCH:
3471 case TGSI_SEMANTIC_TESSOUTER:
3472 case TGSI_SEMANTIC_TESSINNER:
3473 break;
3474 default:
3475 return true;
3476 }
3477 }
3478 return false;
3479 }
3480
3481
3482 /**
3483 * A helper function to add shader signature for passthrough control point
3484 * phase. This signature is also generated for passthrough control point
3485 * phase from HLSL compiler and is needed by Metal Renderer.
3486 */
3487 static void
emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 * emit)3488 emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
3489 {
3490 struct svga_shader_signature *sgn = &emit->signature;
3491 SVGA3dDXShaderSignatureEntry *sgnEntry;
3492 unsigned i;
3493
3494 for (i = 0; i < emit->info.num_inputs; i++) {
3495 unsigned index = emit->linkage.input_map[i];
3496 enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
3497
3498 sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
3499
3500 set_shader_signature_entry(sgnEntry, index,
3501 tgsi_semantic_to_sgn_name[sem_name],
3502 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3503 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3504 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3505
3506 sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
3507
3508 set_shader_signature_entry(sgnEntry, i,
3509 tgsi_semantic_to_sgn_name[sem_name],
3510 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3511 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3512 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3513 }
3514 }
3515
3516
3517 /**
3518 * A helper function to emit an instruction to start the control point phase
3519 * in the hull shader.
3520 */
3521 static void
emit_control_point_phase_instruction(struct svga_shader_emitter_v10 * emit)3522 emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
3523 {
3524 VGPU10OpcodeToken0 opcode0;
3525
3526 opcode0.value = 0;
3527 opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
3528 begin_emit_instruction(emit);
3529 emit_dword(emit, opcode0.value);
3530 end_emit_instruction(emit);
3531 }
3532
3533
3534 /**
3535 * Start the hull shader control point phase
3536 */
3537 static bool
emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 * emit)3538 emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
3539 {
3540 /* If there is no control point output, skip the control point phase. */
3541 if (!needs_control_point_phase(emit)) {
3542 if (!emit->key.tcs.vertices_out) {
3543 /**
3544 * If the tcs does not explicitly generate any control point output
3545 * and the tes does not use any input control point, then
3546 * emit an empty control point phase with zero output control
3547 * point count.
3548 */
3549 emit_control_point_phase_instruction(emit);
3550
3551 /**
3552 * Since this is an empty control point phase, we will need to
3553 * add input signatures when we parse the tcs again in the
3554 * patch constant phase.
3555 */
3556 emit->tcs.fork_phase_add_signature = true;
3557 }
3558 else {
3559 /**
3560 * Before skipping the control point phase, add the signature for
3561 * the passthrough control point.
3562 */
3563 emit_passthrough_control_point_signature(emit);
3564 }
3565 return false;
3566 }
3567
3568 /* Start the control point phase in the hull shader */
3569 emit_control_point_phase_instruction(emit);
3570
3571 /* Declare the output control point ID */
3572 if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
3573 /* Add invocation id declaration if it does not exist */
3574 emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
3575 }
3576
3577 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3578 VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
3579 VGPU10_OPERAND_INDEX_0D,
3580 0, 1,
3581 VGPU10_NAME_UNDEFINED,
3582 VGPU10_OPERAND_0_COMPONENT, 0,
3583 0,
3584 VGPU10_INTERPOLATION_CONSTANT, true,
3585 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3586
3587 if (emit->tcs.prim_id_index != INVALID_INDEX) {
3588 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3589 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3590 VGPU10_OPERAND_INDEX_0D,
3591 0, 1,
3592 VGPU10_NAME_UNDEFINED,
3593 VGPU10_OPERAND_0_COMPONENT,
3594 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3595 0,
3596 VGPU10_INTERPOLATION_UNDEFINED, true,
3597 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3598 }
3599
3600 return true;
3601 }
3602
3603
3604 /**
3605 * Start the hull shader patch constant phase and
3606 * do the second pass of the tcs translation and emit
3607 * the relevant declarations and instructions for this phase.
3608 */
3609 static bool
emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 * emit,struct tgsi_parse_context * parse)3610 emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
3611 struct tgsi_parse_context *parse)
3612 {
3613 unsigned inst_number = 0;
3614 bool ret = true;
3615 VGPU10OpcodeToken0 opcode0;
3616
3617 emit->skip_instruction = false;
3618
3619 /* Start the patch constant phase */
3620 opcode0.value = 0;
3621 opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
3622 begin_emit_instruction(emit);
3623 emit_dword(emit, opcode0.value);
3624 end_emit_instruction(emit);
3625
3626 /* Set the current phase to patch constant phase */
3627 emit->tcs.control_point_phase = false;
3628
3629 if (emit->tcs.prim_id_index != INVALID_INDEX) {
3630 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3631 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3632 VGPU10_OPERAND_INDEX_0D,
3633 0, 1,
3634 VGPU10_NAME_UNDEFINED,
3635 VGPU10_OPERAND_0_COMPONENT,
3636 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3637 0,
3638 VGPU10_INTERPOLATION_UNDEFINED, true,
3639 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3640 }
3641
3642 /* Emit declarations for this phase */
3643 emit->index_range.required =
3644 emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? true : false;
3645 emit_tcs_input_declarations(emit);
3646
3647 if (emit->index_range.start_index != INVALID_INDEX) {
3648 emit_index_range_declaration(emit);
3649 }
3650
3651 emit->index_range.required =
3652 emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? true : false;
3653 emit_tcs_output_declarations(emit);
3654
3655 if (emit->index_range.start_index != INVALID_INDEX) {
3656 emit_index_range_declaration(emit);
3657 }
3658 emit->index_range.required = false;
3659
3660 emit_temporaries_declaration(emit);
3661
3662 /* Reset the token position to the first instruction token
3663 * in preparation for the second pass of the shader
3664 */
3665 parse->Position = emit->tcs.instruction_token_pos;
3666
3667 while (!tgsi_parse_end_of_tokens(parse)) {
3668 tgsi_parse_token(parse);
3669
3670 assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
3671 ret = emit_vgpu10_instruction(emit, inst_number++,
3672 &parse->FullToken.FullInstruction);
3673
3674 /* Usually this applies to TCS only. If shader is reading output of
3675 * patch constant in fork phase, we should reemit all instructions
3676 * which are writting into output of patch constant in fork phase
3677 * to store results into temporaries.
3678 */
3679 assert(!(emit->reemit_instruction && emit->reemit_rawbuf_instruction));
3680 if (emit->reemit_instruction) {
3681 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3682 ret = emit_vgpu10_instruction(emit, inst_number,
3683 &parse->FullToken.FullInstruction);
3684 } else if (emit->reemit_rawbuf_instruction) {
3685 ret = emit_rawbuf_instruction(emit, inst_number,
3686 &parse->FullToken.FullInstruction);
3687 }
3688
3689 if (!ret)
3690 return false;
3691 }
3692
3693 return true;
3694 }
3695
3696
3697 /**
3698 * Emit the thread group declaration for compute shader.
3699 */
3700 static void
emit_compute_shader_declarations(struct svga_shader_emitter_v10 * emit)3701 emit_compute_shader_declarations(struct svga_shader_emitter_v10 *emit)
3702 {
3703 VGPU10OpcodeToken0 opcode0;
3704
3705 opcode0.value = 0;
3706 opcode0.opcodeType = VGPU10_OPCODE_DCL_THREAD_GROUP;
3707 begin_emit_instruction(emit);
3708 emit_dword(emit, opcode0.value);
3709 emit_dword(emit, emit->cs.block_width);
3710 emit_dword(emit, emit->cs.block_height);
3711 emit_dword(emit, emit->cs.block_depth);
3712 end_emit_instruction(emit);
3713 }
3714
3715
3716 /**
3717 * Emit index range declaration.
3718 */
3719 static bool
emit_index_range_declaration(struct svga_shader_emitter_v10 * emit)3720 emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
3721 {
3722 if (emit->version < 50)
3723 return true;
3724
3725 assert(emit->index_range.start_index != INVALID_INDEX);
3726 assert(emit->index_range.count != 0);
3727 assert(emit->index_range.required);
3728 assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
3729 assert(emit->index_range.dim != 0);
3730 assert(emit->index_range.size != 0);
3731
3732 VGPU10OpcodeToken0 opcode0;
3733 VGPU10OperandToken0 operand0;
3734
3735 opcode0.value = 0;
3736 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
3737
3738 operand0.value = 0;
3739 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3740 operand0.indexDimension = emit->index_range.dim;
3741 operand0.operandType = emit->index_range.operandType;
3742 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3743 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3744
3745 if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
3746 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3747
3748 begin_emit_instruction(emit);
3749 emit_dword(emit, opcode0.value);
3750 emit_dword(emit, operand0.value);
3751
3752 if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
3753 emit_dword(emit, emit->index_range.size);
3754 emit_dword(emit, emit->index_range.start_index);
3755 emit_dword(emit, emit->index_range.count);
3756 }
3757 else {
3758 emit_dword(emit, emit->index_range.start_index);
3759 emit_dword(emit, emit->index_range.count);
3760 }
3761
3762 end_emit_instruction(emit);
3763
3764 /* Reset fields in emit->index_range struct except
3765 * emit->index_range.required which will be reset afterwards
3766 */
3767 emit->index_range.count = 0;
3768 emit->index_range.operandType = VGPU10_NUM_OPERANDS;
3769 emit->index_range.start_index = INVALID_INDEX;
3770 emit->index_range.size = 0;
3771 emit->index_range.dim = 0;
3772
3773 return true;
3774 }
3775
3776
3777 /**
3778 * Emit a vgpu10 declaration "instruction".
3779 * \param index the register index
3780 * \param size array size of the operand. In most cases, it is 1,
3781 * but for inputs to geometry shader, the array size varies
3782 * depending on the primitive type.
3783 */
3784 static void
emit_decl_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,VGPU10OperandToken0 operand0,VGPU10NameToken name_token,unsigned index,unsigned size)3785 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
3786 VGPU10OpcodeToken0 opcode0,
3787 VGPU10OperandToken0 operand0,
3788 VGPU10NameToken name_token,
3789 unsigned index, unsigned size)
3790 {
3791 assert(opcode0.opcodeType);
3792 assert(operand0.mask ||
3793 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
3794 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
3795 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
3796 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
3797 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
3798 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
3799 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
3800 (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
3801
3802 begin_emit_instruction(emit);
3803 emit_dword(emit, opcode0.value);
3804
3805 emit_dword(emit, operand0.value);
3806
3807 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
3808 /* Next token is the index of the register to declare */
3809 emit_dword(emit, index);
3810 }
3811 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
3812 /* Next token is the size of the register */
3813 emit_dword(emit, size);
3814
3815 /* Followed by the index of the register */
3816 emit_dword(emit, index);
3817 }
3818
3819 if (name_token.value) {
3820 emit_dword(emit, name_token.value);
3821 }
3822
3823 end_emit_instruction(emit);
3824 }
3825
3826
3827 /**
3828 * Emit the declaration for a shader input.
3829 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3830 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3831 * \param dim index dimension
3832 * \param index the input register index
3833 * \param size array size of the operand. In most cases, it is 1,
3834 * but for inputs to geometry shader, the array size varies
3835 * depending on the primitive type. For tessellation control
3836 * shader, the array size is the vertex count per patch.
3837 * \param name one of VGPU10_NAME_x
3838 * \parma numComp number of components
3839 * \param selMode component selection mode
3840 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3841 * \param interpMode interpolation mode
3842 */
3843 static void
emit_input_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcodeType,VGPU10_OPERAND_TYPE operandType,VGPU10_OPERAND_INDEX_DIMENSION dim,unsigned index,unsigned size,VGPU10_SYSTEM_NAME name,VGPU10_OPERAND_NUM_COMPONENTS numComp,VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,unsigned usageMask,VGPU10_INTERPOLATION_MODE interpMode,bool addSignature,SVGA3dDXSignatureSemanticName sgnName)3844 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
3845 VGPU10_OPCODE_TYPE opcodeType,
3846 VGPU10_OPERAND_TYPE operandType,
3847 VGPU10_OPERAND_INDEX_DIMENSION dim,
3848 unsigned index, unsigned size,
3849 VGPU10_SYSTEM_NAME name,
3850 VGPU10_OPERAND_NUM_COMPONENTS numComp,
3851 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
3852 unsigned usageMask,
3853 VGPU10_INTERPOLATION_MODE interpMode,
3854 bool addSignature,
3855 SVGA3dDXSignatureSemanticName sgnName)
3856 {
3857 VGPU10OpcodeToken0 opcode0;
3858 VGPU10OperandToken0 operand0;
3859 VGPU10NameToken name_token;
3860
3861 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3862 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
3863 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
3864 opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
3865 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
3866 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
3867 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
3868 assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
3869 operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
3870 operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
3871 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
3872 operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
3873 operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
3874 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
3875 operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
3876 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
3877 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
3878 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
3879
3880 assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
3881 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
3882 assert(dim <= VGPU10_OPERAND_INDEX_3D);
3883 assert(name == VGPU10_NAME_UNDEFINED ||
3884 name == VGPU10_NAME_POSITION ||
3885 name == VGPU10_NAME_INSTANCE_ID ||
3886 name == VGPU10_NAME_VERTEX_ID ||
3887 name == VGPU10_NAME_PRIMITIVE_ID ||
3888 name == VGPU10_NAME_IS_FRONT_FACE ||
3889 name == VGPU10_NAME_SAMPLE_INDEX ||
3890 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3891 name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
3892
3893 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
3894 interpMode == VGPU10_INTERPOLATION_CONSTANT ||
3895 interpMode == VGPU10_INTERPOLATION_LINEAR ||
3896 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
3897 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
3898 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
3899 interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
3900 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
3901
3902 check_register_index(emit, opcodeType, index);
3903
3904 opcode0.value = operand0.value = name_token.value = 0;
3905
3906 opcode0.opcodeType = opcodeType;
3907 opcode0.interpolationMode = interpMode;
3908
3909 operand0.operandType = operandType;
3910 operand0.numComponents = numComp;
3911 operand0.selectionMode = selMode;
3912 operand0.mask = usageMask;
3913 operand0.indexDimension = dim;
3914 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3915 if (dim == VGPU10_OPERAND_INDEX_2D)
3916 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3917
3918 name_token.name = name;
3919
3920 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
3921
3922 if (addSignature) {
3923 struct svga_shader_signature *sgn = &emit->signature;
3924 if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
3925 /* Set patch constant signature */
3926 SVGA3dDXShaderSignatureEntry *sgnEntry =
3927 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
3928 set_shader_signature_entry(sgnEntry, index,
3929 sgnName, usageMask,
3930 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3931 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3932
3933 } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
3934 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
3935 /* Set input signature */
3936 SVGA3dDXShaderSignatureEntry *sgnEntry =
3937 &sgn->inputs[sgn->header.numInputSignatures++];
3938 set_shader_signature_entry(sgnEntry, index,
3939 sgnName, usageMask,
3940 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3941 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3942 }
3943 }
3944
3945 if (emit->index_range.required) {
3946 /* Here, index_range declaration is only applicable for opcodeType
3947 * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3948 * for operandType VGPU10_OPERAND_TYPE_INPUT,
3949 * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3950 * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3951 */
3952 if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
3953 opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
3954 (operandType != VGPU10_OPERAND_TYPE_INPUT &&
3955 operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
3956 operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
3957 if (emit->index_range.start_index != INVALID_INDEX) {
3958 emit_index_range_declaration(emit);
3959 }
3960 return;
3961 }
3962
3963 if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3964 /* Need record new index_range */
3965 emit->index_range.count = 1;
3966 emit->index_range.operandType = operandType;
3967 emit->index_range.start_index = index;
3968 emit->index_range.size = size;
3969 emit->index_range.dim = dim;
3970 }
3971 else if (index !=
3972 (emit->index_range.start_index + emit->index_range.count) ||
3973 emit->index_range.operandType != operandType) {
3974 /* Input index is not contiguous with index range or operandType is
3975 * different from index range's operandType. We need to emit current
3976 * index_range first and then start recording next index range.
3977 */
3978 emit_index_range_declaration(emit);
3979
3980 emit->index_range.count = 1;
3981 emit->index_range.operandType = operandType;
3982 emit->index_range.start_index = index;
3983 emit->index_range.size = size;
3984 emit->index_range.dim = dim;
3985 }
3986 else if (emit->index_range.operandType == operandType) {
3987 /* Since input index is contiguous with index range and operandType
3988 * is same as index range's operandType, increment index range count.
3989 */
3990 emit->index_range.count++;
3991 }
3992 }
3993 }
3994
3995
3996 /**
3997 * Emit the declaration for a shader output.
3998 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx
3999 * \param index the output register index
4000 * \param name one of VGPU10_NAME_x
4001 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
4002 */
4003 static void
emit_output_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE type,unsigned index,VGPU10_SYSTEM_NAME name,unsigned writemask,bool addSignature,SVGA3dDXSignatureSemanticName sgnName)4004 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
4005 VGPU10_OPCODE_TYPE type, unsigned index,
4006 VGPU10_SYSTEM_NAME name,
4007 unsigned writemask,
4008 bool addSignature,
4009 SVGA3dDXSignatureSemanticName sgnName)
4010 {
4011 VGPU10OpcodeToken0 opcode0;
4012 VGPU10OperandToken0 operand0;
4013 VGPU10NameToken name_token;
4014
4015 assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
4016 assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
4017 type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
4018 type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
4019 assert(name == VGPU10_NAME_UNDEFINED ||
4020 name == VGPU10_NAME_POSITION ||
4021 name == VGPU10_NAME_PRIMITIVE_ID ||
4022 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
4023 name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
4024 name == VGPU10_NAME_CLIP_DISTANCE);
4025
4026 check_register_index(emit, type, index);
4027
4028 opcode0.value = operand0.value = name_token.value = 0;
4029
4030 opcode0.opcodeType = type;
4031 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4032 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4033 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4034 operand0.mask = writemask;
4035 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4036 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4037
4038 name_token.name = name;
4039
4040 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4041
4042 /* Capture output signature */
4043 if (addSignature) {
4044 struct svga_shader_signature *sgn = &emit->signature;
4045 SVGA3dDXShaderSignatureEntry *sgnEntry =
4046 &sgn->outputs[sgn->header.numOutputSignatures++];
4047 set_shader_signature_entry(sgnEntry, index,
4048 sgnName, writemask,
4049 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4050 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4051 }
4052
4053 if (emit->index_range.required) {
4054 /* Here, index_range declaration is only applicable for opcodeType
4055 * VGPU10_OPCODE_DCL_OUTPUT and for operandType
4056 * VGPU10_OPERAND_TYPE_OUTPUT.
4057 */
4058 if (type != VGPU10_OPCODE_DCL_OUTPUT) {
4059 if (emit->index_range.start_index != INVALID_INDEX) {
4060 emit_index_range_declaration(emit);
4061 }
4062 return;
4063 }
4064
4065 if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
4066 /* Need record new index_range */
4067 emit->index_range.count = 1;
4068 emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4069 emit->index_range.start_index = index;
4070 emit->index_range.size = 1;
4071 emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
4072 }
4073 else if (index !=
4074 (emit->index_range.start_index + emit->index_range.count)) {
4075 /* Output index is not contiguous with index range. We need to
4076 * emit current index_range first and then start recording next
4077 * index range.
4078 */
4079 emit_index_range_declaration(emit);
4080
4081 emit->index_range.count = 1;
4082 emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4083 emit->index_range.start_index = index;
4084 emit->index_range.size = 1;
4085 emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
4086 }
4087 else {
4088 /* Since output index is contiguous with index range, increment
4089 * index range count.
4090 */
4091 emit->index_range.count++;
4092 }
4093 }
4094 }
4095
4096
4097 /**
4098 * Emit the declaration for the fragment depth output.
4099 */
4100 static void
emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 * emit)4101 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
4102 {
4103 VGPU10OpcodeToken0 opcode0;
4104 VGPU10OperandToken0 operand0;
4105 VGPU10NameToken name_token;
4106
4107 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4108
4109 opcode0.value = operand0.value = name_token.value = 0;
4110
4111 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
4112 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
4113 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
4114 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
4115 operand0.mask = 0;
4116
4117 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
4118 }
4119
4120
4121 /**
4122 * Emit the declaration for the fragment sample mask/coverage output.
4123 */
4124 static void
emit_samplemask_output_declaration(struct svga_shader_emitter_v10 * emit)4125 emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
4126 {
4127 VGPU10OpcodeToken0 opcode0;
4128 VGPU10OperandToken0 operand0;
4129 VGPU10NameToken name_token;
4130
4131 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4132 assert(emit->version >= 41);
4133
4134 opcode0.value = operand0.value = name_token.value = 0;
4135
4136 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
4137 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
4138 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
4139 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
4140 operand0.mask = 0;
4141
4142 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
4143 }
4144
4145
4146 /**
4147 * Emit output declarations for fragment shader.
4148 */
4149 static void
emit_fs_output_declarations(struct svga_shader_emitter_v10 * emit)4150 emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
4151 {
4152 unsigned int i;
4153
4154 for (i = 0; i < emit->info.num_outputs; i++) {
4155 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
4156 const enum tgsi_semantic semantic_name =
4157 emit->info.output_semantic_name[i];
4158 const unsigned semantic_index = emit->info.output_semantic_index[i];
4159 unsigned index = i;
4160
4161 if (semantic_name == TGSI_SEMANTIC_COLOR) {
4162 assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
4163
4164 emit->fs.color_out_index[semantic_index] = index;
4165
4166 emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
4167 index + 1);
4168
4169 /* The semantic index is the shader's color output/buffer index */
4170 emit_output_declaration(emit,
4171 VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
4172 VGPU10_NAME_UNDEFINED,
4173 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4174 true,
4175 map_tgsi_semantic_to_sgn_name(semantic_name));
4176
4177 if (semantic_index == 0) {
4178 if (emit->key.fs.write_color0_to_n_cbufs > 1) {
4179 /* Emit declarations for the additional color outputs
4180 * for broadcasting.
4181 */
4182 unsigned j;
4183 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
4184 /* Allocate a new output index */
4185 unsigned idx = emit->info.num_outputs + j - 1;
4186 emit->fs.color_out_index[j] = idx;
4187 emit_output_declaration(emit,
4188 VGPU10_OPCODE_DCL_OUTPUT, idx,
4189 VGPU10_NAME_UNDEFINED,
4190 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4191 true,
4192 map_tgsi_semantic_to_sgn_name(semantic_name));
4193 emit->info.output_semantic_index[idx] = j;
4194 }
4195
4196 emit->fs.num_color_outputs =
4197 emit->key.fs.write_color0_to_n_cbufs;
4198 }
4199 }
4200 }
4201 else if (semantic_name == TGSI_SEMANTIC_POSITION) {
4202 /* Fragment depth output */
4203 emit_fragdepth_output_declaration(emit);
4204 }
4205 else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
4206 /* Sample mask output */
4207 emit_samplemask_output_declaration(emit);
4208 }
4209 else {
4210 assert(!"Bad output semantic name");
4211 }
4212 }
4213 }
4214
4215
4216 /**
4217 * Emit common output declaration for vertex processing.
4218 */
4219 static void
emit_vertex_output_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned writemask,bool addSignature)4220 emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
4221 unsigned index, unsigned writemask,
4222 bool addSignature)
4223 {
4224 const enum tgsi_semantic semantic_name =
4225 emit->info.output_semantic_name[index];
4226 const unsigned semantic_index = emit->info.output_semantic_index[index];
4227 unsigned name, type;
4228 unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
4229
4230 assert(emit->unit != PIPE_SHADER_FRAGMENT &&
4231 emit->unit != PIPE_SHADER_COMPUTE);
4232
4233 switch (semantic_name) {
4234 case TGSI_SEMANTIC_POSITION:
4235 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4236 /* position will be declared in control point only */
4237 assert(emit->tcs.control_point_phase);
4238 type = VGPU10_OPCODE_DCL_OUTPUT;
4239 name = VGPU10_NAME_UNDEFINED;
4240 emit_output_declaration(emit, type, index, name, final_mask, true,
4241 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4242 return;
4243 }
4244 else {
4245 type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
4246 name = VGPU10_NAME_POSITION;
4247 }
4248 /* Save the index of the vertex position output register */
4249 emit->vposition.out_index = index;
4250 break;
4251 case TGSI_SEMANTIC_CLIPDIST:
4252 type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
4253 name = VGPU10_NAME_CLIP_DISTANCE;
4254 /* save the starting index of the clip distance output register */
4255 if (semantic_index == 0)
4256 emit->clip_dist_out_index = index;
4257 final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
4258 if (final_mask == 0x0)
4259 return; /* discard this do-nothing declaration */
4260 break;
4261 case TGSI_SEMANTIC_CLIPVERTEX:
4262 type = VGPU10_OPCODE_DCL_OUTPUT;
4263 name = VGPU10_NAME_UNDEFINED;
4264 emit->clip_vertex_out_index = index;
4265 break;
4266 default:
4267 /* generic output */
4268 type = VGPU10_OPCODE_DCL_OUTPUT;
4269 name = VGPU10_NAME_UNDEFINED;
4270 }
4271
4272 emit_output_declaration(emit, type, index, name, final_mask, addSignature,
4273 map_tgsi_semantic_to_sgn_name(semantic_name));
4274 }
4275
4276
4277 /**
4278 * Emit declaration for outputs in vertex shader.
4279 */
4280 static void
emit_vs_output_declarations(struct svga_shader_emitter_v10 * emit)4281 emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
4282 {
4283 unsigned i;
4284 for (i = 0; i < emit->info.num_outputs; i++) {
4285 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], true);
4286 }
4287 }
4288
4289
4290 /**
4291 * A helper function to determine the writemask for an output
4292 * for the specified stream.
4293 */
4294 static unsigned
output_writemask_for_stream(unsigned stream,uint8_t output_streams,uint8_t output_usagemask)4295 output_writemask_for_stream(unsigned stream, uint8_t output_streams,
4296 uint8_t output_usagemask)
4297 {
4298 unsigned i;
4299 unsigned writemask = 0;
4300
4301 for (i = 0; i < 4; i++) {
4302 if ((output_streams & 0x3) == stream)
4303 writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
4304 output_streams >>= 2;
4305 }
4306 return writemask & output_usagemask;
4307 }
4308
4309
4310 /**
4311 * Emit declaration for outputs in geometry shader.
4312 */
4313 static void
emit_gs_output_declarations(struct svga_shader_emitter_v10 * emit)4314 emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
4315 {
4316 unsigned i;
4317 VGPU10OpcodeToken0 opcode0;
4318 unsigned numStreamsSupported = 1;
4319 int s;
4320
4321 if (emit->version >= 50) {
4322 numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
4323 }
4324
4325 /**
4326 * Start emitting from the last stream first, so we end with
4327 * stream 0, so any of the auxiliary output declarations will
4328 * go to stream 0.
4329 */
4330 for (s = numStreamsSupported-1; s >= 0; s--) {
4331
4332 if (emit->info.num_stream_output_components[s] == 0)
4333 continue;
4334
4335 if (emit->version >= 50) {
4336 /* DCL_STREAM stream */
4337 begin_emit_instruction(emit);
4338 emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, false);
4339 emit_stream_register(emit, s);
4340 end_emit_instruction(emit);
4341 }
4342
4343 /* emit output primitive topology declaration */
4344 opcode0.value = 0;
4345 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
4346 opcode0.primitiveTopology = emit->gs.prim_topology;
4347 emit_property_instruction(emit, opcode0, 0, 0);
4348
4349 for (i = 0; i < emit->info.num_outputs; i++) {
4350 unsigned writemask;
4351
4352 /* find out the writemask for this stream */
4353 writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
4354 emit->output_usage_mask[i]);
4355
4356 if (writemask) {
4357 enum tgsi_semantic semantic_name =
4358 emit->info.output_semantic_name[i];
4359
4360 /* TODO: Still need to take care of a special case where a
4361 * single varying spans across multiple output registers.
4362 */
4363 switch(semantic_name) {
4364 case TGSI_SEMANTIC_PRIMID:
4365 emit_output_declaration(emit,
4366 VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
4367 VGPU10_NAME_PRIMITIVE_ID,
4368 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4369 false,
4370 map_tgsi_semantic_to_sgn_name(semantic_name));
4371 break;
4372 case TGSI_SEMANTIC_LAYER:
4373 emit_output_declaration(emit,
4374 VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4375 VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
4376 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4377 false,
4378 map_tgsi_semantic_to_sgn_name(semantic_name));
4379 break;
4380 case TGSI_SEMANTIC_VIEWPORT_INDEX:
4381 emit_output_declaration(emit,
4382 VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4383 VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
4384 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4385 false,
4386 map_tgsi_semantic_to_sgn_name(semantic_name));
4387 emit->gs.viewport_index_out_index = i;
4388 break;
4389 default:
4390 emit_vertex_output_declaration(emit, i, writemask, false);
4391 }
4392 }
4393 }
4394 }
4395
4396 /* For geometry shader outputs, it is possible the same register is
4397 * declared multiple times for different streams. So to avoid
4398 * redundant signature entries, geometry shader output signature is done
4399 * outside of the declaration.
4400 */
4401 struct svga_shader_signature *sgn = &emit->signature;
4402 SVGA3dDXShaderSignatureEntry *sgnEntry;
4403
4404 for (i = 0; i < emit->info.num_outputs; i++) {
4405 if (emit->output_usage_mask[i]) {
4406 enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
4407
4408 sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
4409 set_shader_signature_entry(sgnEntry, i,
4410 map_tgsi_semantic_to_sgn_name(sem_name),
4411 emit->output_usage_mask[i],
4412 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4413 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4414 }
4415 }
4416 }
4417
4418
4419 /**
4420 * Emit the declaration for the tess inner/outer output.
4421 * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4422 * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4423 * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4424 */
4425 static void
emit_tesslevel_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned opcodeType,unsigned operandType,VGPU10_SYSTEM_NAME name,SVGA3dDXSignatureSemanticName sgnName)4426 emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
4427 unsigned index, unsigned opcodeType,
4428 unsigned operandType, VGPU10_SYSTEM_NAME name,
4429 SVGA3dDXSignatureSemanticName sgnName)
4430 {
4431 VGPU10OpcodeToken0 opcode0;
4432 VGPU10OperandToken0 operand0;
4433 VGPU10NameToken name_token;
4434
4435 assert(emit->version >= 50);
4436 assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
4437 (emit->key.tcs.prim_mode == MESA_PRIM_LINES &&
4438 name == VGPU10_NAME_UNDEFINED));
4439 assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
4440
4441 assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
4442 operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
4443
4444 opcode0.value = operand0.value = name_token.value = 0;
4445
4446 opcode0.opcodeType = opcodeType;
4447 operand0.operandType = operandType;
4448 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4449 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4450 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4451 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4452 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4453
4454 name_token.name = name;
4455 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4456
4457 /* Capture patch constant signature */
4458 struct svga_shader_signature *sgn = &emit->signature;
4459 SVGA3dDXShaderSignatureEntry *sgnEntry =
4460 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4461 set_shader_signature_entry(sgnEntry, index,
4462 sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
4463 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4464 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4465 }
4466
4467
4468 /**
4469 * Emit output declarations for tessellation control shader.
4470 */
4471 static void
emit_tcs_output_declarations(struct svga_shader_emitter_v10 * emit)4472 emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
4473 {
4474 unsigned int i;
4475 unsigned outputIndex = emit->num_outputs;
4476 struct svga_shader_signature *sgn = &emit->signature;
4477
4478 /**
4479 * Initialize patch_generic_out_count so it won't be counted twice
4480 * since this function is called twice, one for control point phase
4481 * and another time for patch constant phase.
4482 */
4483 emit->tcs.patch_generic_out_count = 0;
4484
4485 for (i = 0; i < emit->info.num_outputs; i++) {
4486 unsigned index = i;
4487 const enum tgsi_semantic semantic_name =
4488 emit->info.output_semantic_name[i];
4489
4490 switch (semantic_name) {
4491 case TGSI_SEMANTIC_TESSINNER:
4492 emit->tcs.inner.tgsi_index = i;
4493
4494 /* skip per-patch output declarations in control point phase */
4495 if (emit->tcs.control_point_phase)
4496 break;
4497
4498 emit->tcs.inner.out_index = outputIndex;
4499 switch (emit->key.tcs.prim_mode) {
4500 case MESA_PRIM_QUADS:
4501 emit_tesslevel_declaration(emit, outputIndex++,
4502 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4503 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4504 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4505
4506 emit_tesslevel_declaration(emit, outputIndex++,
4507 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4508 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4509 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4510 break;
4511 case MESA_PRIM_TRIANGLES:
4512 emit_tesslevel_declaration(emit, outputIndex++,
4513 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4514 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4515 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4516 break;
4517 case MESA_PRIM_LINES:
4518 break;
4519 default:
4520 debug_printf("Unsupported primitive type");
4521 }
4522 break;
4523
4524 case TGSI_SEMANTIC_TESSOUTER:
4525 emit->tcs.outer.tgsi_index = i;
4526
4527 /* skip per-patch output declarations in control point phase */
4528 if (emit->tcs.control_point_phase)
4529 break;
4530
4531 emit->tcs.outer.out_index = outputIndex;
4532 switch (emit->key.tcs.prim_mode) {
4533 case MESA_PRIM_QUADS:
4534 for (int j = 0; j < 4; j++) {
4535 emit_tesslevel_declaration(emit, outputIndex++,
4536 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4537 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
4538 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
4539 }
4540 break;
4541 case MESA_PRIM_TRIANGLES:
4542 for (int j = 0; j < 3; j++) {
4543 emit_tesslevel_declaration(emit, outputIndex++,
4544 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4545 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
4546 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
4547 }
4548 break;
4549 case MESA_PRIM_LINES:
4550 for (int j = 0; j < 2; j++) {
4551 emit_tesslevel_declaration(emit, outputIndex++,
4552 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4553 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
4554 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
4555 }
4556 break;
4557 default:
4558 debug_printf("Unsupported primitive type");
4559 }
4560 break;
4561
4562 case TGSI_SEMANTIC_PATCH:
4563 if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
4564 emit->tcs.patch_generic_out_index= i;
4565 emit->tcs.patch_generic_out_count++;
4566
4567 /* skip per-patch output declarations in control point phase */
4568 if (emit->tcs.control_point_phase)
4569 break;
4570
4571 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
4572 VGPU10_NAME_UNDEFINED,
4573 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4574 false,
4575 map_tgsi_semantic_to_sgn_name(semantic_name));
4576
4577 SVGA3dDXShaderSignatureEntry *sgnEntry =
4578 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4579 set_shader_signature_entry(sgnEntry, index,
4580 map_tgsi_semantic_to_sgn_name(semantic_name),
4581 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4582 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4583 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4584
4585 break;
4586
4587 default:
4588 /* save the starting index of control point outputs */
4589 if (emit->tcs.control_point_out_index == INVALID_INDEX)
4590 emit->tcs.control_point_out_index = i;
4591 emit->tcs.control_point_out_count++;
4592
4593 /* skip control point output declarations in patch constant phase */
4594 if (!emit->tcs.control_point_phase)
4595 break;
4596
4597 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
4598 true);
4599
4600 }
4601 }
4602
4603 if (emit->tcs.control_point_phase) {
4604 /**
4605 * Add missing control point output in control point phase.
4606 */
4607 if (emit->tcs.control_point_out_index == INVALID_INDEX) {
4608 /* use register index after tessellation factors */
4609 switch (emit->key.tcs.prim_mode) {
4610 case MESA_PRIM_QUADS:
4611 emit->tcs.control_point_out_index = outputIndex + 6;
4612 break;
4613 case MESA_PRIM_TRIANGLES:
4614 emit->tcs.control_point_out_index = outputIndex + 4;
4615 break;
4616 default:
4617 emit->tcs.control_point_out_index = outputIndex + 2;
4618 break;
4619 }
4620 emit->tcs.control_point_out_count++;
4621 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
4622 emit->tcs.control_point_out_index,
4623 VGPU10_NAME_POSITION,
4624 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4625 true,
4626 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4627
4628 /* If tcs does not output any control point output,
4629 * we can end the hull shader control point phase here
4630 * after emitting the default control point output.
4631 */
4632 emit->skip_instruction = true;
4633 }
4634 }
4635 else {
4636 if (emit->tcs.outer.out_index == INVALID_INDEX) {
4637 /* since the TCS did not declare out outer tess level output register,
4638 * we declare it here for patch constant phase only.
4639 */
4640 emit->tcs.outer.out_index = outputIndex;
4641 if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) {
4642 for (int i = 0; i < 4; i++) {
4643 emit_tesslevel_declaration(emit, outputIndex++,
4644 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4645 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4646 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4647 }
4648 }
4649 else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) {
4650 for (int i = 0; i < 3; i++) {
4651 emit_tesslevel_declaration(emit, outputIndex++,
4652 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4653 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
4654 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
4655 }
4656 }
4657 }
4658
4659 if (emit->tcs.inner.out_index == INVALID_INDEX) {
4660 /* since the TCS did not declare out inner tess level output register,
4661 * we declare it here
4662 */
4663 emit->tcs.inner.out_index = outputIndex;
4664 if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) {
4665 emit_tesslevel_declaration(emit, outputIndex++,
4666 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4667 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4668 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4669 emit_tesslevel_declaration(emit, outputIndex++,
4670 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4671 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4672 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4673 }
4674 else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) {
4675 emit_tesslevel_declaration(emit, outputIndex++,
4676 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4677 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4678 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4679 }
4680 }
4681 }
4682 emit->num_outputs = outputIndex;
4683 }
4684
4685
4686 /**
4687 * Emit output declarations for tessellation evaluation shader.
4688 */
4689 static void
emit_tes_output_declarations(struct svga_shader_emitter_v10 * emit)4690 emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
4691 {
4692 unsigned int i;
4693
4694 for (i = 0; i < emit->info.num_outputs; i++) {
4695 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], true);
4696 }
4697 }
4698
4699
4700 /**
4701 * Emit the declaration for a system value input/output.
4702 */
4703 static void
emit_system_value_declaration(struct svga_shader_emitter_v10 * emit,enum tgsi_semantic semantic_name,unsigned index)4704 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
4705 enum tgsi_semantic semantic_name, unsigned index)
4706 {
4707 switch (semantic_name) {
4708 case TGSI_SEMANTIC_INSTANCEID:
4709 index = alloc_system_value_index(emit, index);
4710 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4711 VGPU10_OPERAND_TYPE_INPUT,
4712 VGPU10_OPERAND_INDEX_1D,
4713 index, 1,
4714 VGPU10_NAME_INSTANCE_ID,
4715 VGPU10_OPERAND_4_COMPONENT,
4716 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4717 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4718 VGPU10_INTERPOLATION_UNDEFINED, true,
4719 map_tgsi_semantic_to_sgn_name(semantic_name));
4720 break;
4721 case TGSI_SEMANTIC_VERTEXID:
4722 emit->vs.vertex_id_sys_index = index;
4723 index = alloc_system_value_index(emit, index);
4724 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4725 VGPU10_OPERAND_TYPE_INPUT,
4726 VGPU10_OPERAND_INDEX_1D,
4727 index, 1,
4728 VGPU10_NAME_VERTEX_ID,
4729 VGPU10_OPERAND_4_COMPONENT,
4730 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4731 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4732 VGPU10_INTERPOLATION_UNDEFINED, true,
4733 map_tgsi_semantic_to_sgn_name(semantic_name));
4734 break;
4735 case TGSI_SEMANTIC_SAMPLEID:
4736 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4737 emit->fs.sample_id_sys_index = index;
4738 index = alloc_system_value_index(emit, index);
4739 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
4740 VGPU10_OPERAND_TYPE_INPUT,
4741 VGPU10_OPERAND_INDEX_1D,
4742 index, 1,
4743 VGPU10_NAME_SAMPLE_INDEX,
4744 VGPU10_OPERAND_4_COMPONENT,
4745 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4746 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4747 VGPU10_INTERPOLATION_CONSTANT, true,
4748 map_tgsi_semantic_to_sgn_name(semantic_name));
4749 break;
4750 case TGSI_SEMANTIC_SAMPLEPOS:
4751 /* This system value contains the position of the current sample
4752 * when using per-sample shading. We implement this by calling
4753 * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4754 * index as the argument. See emit_sample_position_instructions().
4755 */
4756 assert(emit->version >= 41);
4757 emit->fs.sample_pos_sys_index = index;
4758 index = alloc_system_value_index(emit, index);
4759 break;
4760 case TGSI_SEMANTIC_INVOCATIONID:
4761 /* Note: invocation id input is mapped to different register depending
4762 * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4763 * In TCS, it will be mapped to vOutputControlPointID#.
4764 * Since in both cases, the mapped name is unique rather than
4765 * just a generic input name ("v#"), so there is no need to remap
4766 * the index value.
4767 */
4768 assert(emit->unit == PIPE_SHADER_GEOMETRY ||
4769 emit->unit == PIPE_SHADER_TESS_CTRL);
4770 assert(emit->version >= 50);
4771
4772 if (emit->unit == PIPE_SHADER_GEOMETRY) {
4773 emit->gs.invocation_id_sys_index = index;
4774 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4775 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
4776 VGPU10_OPERAND_INDEX_0D,
4777 index, 1,
4778 VGPU10_NAME_UNDEFINED,
4779 VGPU10_OPERAND_0_COMPONENT,
4780 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4781 0,
4782 VGPU10_INTERPOLATION_UNDEFINED, true,
4783 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4784 } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4785 /* The emission of the control point id will be done
4786 * in the control point phase in emit_hull_shader_control_point_phase().
4787 */
4788 emit->tcs.invocation_id_sys_index = index;
4789 }
4790 break;
4791 case TGSI_SEMANTIC_SAMPLEMASK:
4792 /* Note: the PS sample mask input has a unique name ("vCoverage#")
4793 * rather than just a generic input name ("v#") so no need to remap the
4794 * index value.
4795 */
4796 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4797 assert(emit->version >= 50);
4798 emit->fs.sample_mask_in_sys_index = index;
4799 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4800 VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
4801 VGPU10_OPERAND_INDEX_0D,
4802 index, 1,
4803 VGPU10_NAME_UNDEFINED,
4804 VGPU10_OPERAND_1_COMPONENT,
4805 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4806 0,
4807 VGPU10_INTERPOLATION_CONSTANT, true,
4808 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4809 break;
4810 case TGSI_SEMANTIC_TESSCOORD:
4811 assert(emit->version >= 50);
4812
4813 unsigned usageMask = 0;
4814
4815 if (emit->tes.prim_mode == MESA_PRIM_TRIANGLES) {
4816 usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
4817 }
4818 else if (emit->tes.prim_mode == MESA_PRIM_LINES ||
4819 emit->tes.prim_mode == MESA_PRIM_QUADS) {
4820 usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
4821 }
4822
4823 emit->tes.tesscoord_sys_index = index;
4824 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4825 VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
4826 VGPU10_OPERAND_INDEX_0D,
4827 index, 1,
4828 VGPU10_NAME_UNDEFINED,
4829 VGPU10_OPERAND_4_COMPONENT,
4830 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4831 usageMask,
4832 VGPU10_INTERPOLATION_UNDEFINED, true,
4833 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4834 break;
4835 case TGSI_SEMANTIC_TESSINNER:
4836 assert(emit->version >= 50);
4837 emit->tes.inner.tgsi_index = index;
4838 break;
4839 case TGSI_SEMANTIC_TESSOUTER:
4840 assert(emit->version >= 50);
4841 emit->tes.outer.tgsi_index = index;
4842 break;
4843 case TGSI_SEMANTIC_VERTICESIN:
4844 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
4845 assert(emit->version >= 50);
4846
4847 /* save the system value index */
4848 emit->tcs.vertices_per_patch_index = index;
4849 break;
4850 case TGSI_SEMANTIC_PRIMID:
4851 assert(emit->version >= 50);
4852 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4853 emit->tcs.prim_id_index = index;
4854 }
4855 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
4856 emit->tes.prim_id_index = index;
4857 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4858 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
4859 VGPU10_OPERAND_INDEX_0D,
4860 index, 1,
4861 VGPU10_NAME_UNDEFINED,
4862 VGPU10_OPERAND_0_COMPONENT,
4863 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4864 0,
4865 VGPU10_INTERPOLATION_UNDEFINED, true,
4866 map_tgsi_semantic_to_sgn_name(semantic_name));
4867 }
4868 break;
4869 case TGSI_SEMANTIC_THREAD_ID:
4870 assert(emit->unit >= PIPE_SHADER_COMPUTE);
4871 assert(emit->version >= 50);
4872 emit->cs.thread_id_index = index;
4873 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4874 VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP,
4875 VGPU10_OPERAND_INDEX_0D,
4876 index, 1,
4877 VGPU10_NAME_UNDEFINED,
4878 VGPU10_OPERAND_4_COMPONENT,
4879 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4880 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4881 VGPU10_INTERPOLATION_UNDEFINED, true,
4882 map_tgsi_semantic_to_sgn_name(semantic_name));
4883 break;
4884 case TGSI_SEMANTIC_BLOCK_ID:
4885 assert(emit->unit >= PIPE_SHADER_COMPUTE);
4886 assert(emit->version >= 50);
4887 emit->cs.block_id_index = index;
4888 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4889 VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID,
4890 VGPU10_OPERAND_INDEX_0D,
4891 index, 1,
4892 VGPU10_NAME_UNDEFINED,
4893 VGPU10_OPERAND_4_COMPONENT,
4894 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4895 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4896 VGPU10_INTERPOLATION_UNDEFINED, true,
4897 map_tgsi_semantic_to_sgn_name(semantic_name));
4898 break;
4899 case TGSI_SEMANTIC_GRID_SIZE:
4900 assert(emit->unit == PIPE_SHADER_COMPUTE);
4901 assert(emit->version >= 50);
4902 emit->cs.grid_size.tgsi_index = index;
4903 break;
4904 default:
4905 debug_printf("unexpected system value semantic index %u / %s\n",
4906 semantic_name, tgsi_semantic_names[semantic_name]);
4907 }
4908 }
4909
4910 /**
4911 * Translate a TGSI declaration to VGPU10.
4912 */
4913 static bool
emit_vgpu10_declaration(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_declaration * decl)4914 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
4915 const struct tgsi_full_declaration *decl)
4916 {
4917 switch (decl->Declaration.File) {
4918 case TGSI_FILE_INPUT:
4919 /* do nothing - see emit_input_declarations() */
4920 return true;
4921
4922 case TGSI_FILE_OUTPUT:
4923 assert(decl->Range.First == decl->Range.Last);
4924 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
4925 return true;
4926
4927 case TGSI_FILE_TEMPORARY:
4928 /* Don't declare the temps here. Just keep track of how many
4929 * and emit the declaration later.
4930 */
4931 if (decl->Declaration.Array) {
4932 /* Indexed temporary array. Save the start index of the array
4933 * and the size of the array.
4934 */
4935 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
4936 assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
4937
4938 /* Save this array so we can emit the declaration for it later */
4939 create_temp_array(emit, arrayID, decl->Range.First,
4940 decl->Range.Last - decl->Range.First + 1,
4941 decl->Range.First);
4942 }
4943
4944 /* for all temps, indexed or not, keep track of highest index */
4945 emit->num_shader_temps = MAX2(emit->num_shader_temps,
4946 decl->Range.Last + 1);
4947 return true;
4948
4949 case TGSI_FILE_CONSTANT:
4950 /* Don't declare constants here. Just keep track and emit later. */
4951 {
4952 unsigned constbuf = 0, num_consts;
4953 if (decl->Declaration.Dimension) {
4954 constbuf = decl->Dim.Index2D;
4955 }
4956 /* We throw an assertion here when, in fact, the shader should never
4957 * have linked due to constbuf index out of bounds, so we shouldn't
4958 * have reached here.
4959 */
4960 assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
4961
4962 num_consts = MAX2(emit->num_shader_consts[constbuf],
4963 decl->Range.Last + 1);
4964
4965 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
4966 debug_printf("Warning: constant buffer is declared to size [%u]"
4967 " but [%u] is the limit.\n",
4968 num_consts,
4969 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4970 emit->register_overflow = true;
4971 }
4972 /* The linker doesn't enforce the max UBO size so we clamp here */
4973 emit->num_shader_consts[constbuf] =
4974 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4975 }
4976 return true;
4977
4978 case TGSI_FILE_IMMEDIATE:
4979 assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4980 return false;
4981
4982 case TGSI_FILE_SYSTEM_VALUE:
4983 emit_system_value_declaration(emit, decl->Semantic.Name,
4984 decl->Range.First);
4985 return true;
4986
4987 case TGSI_FILE_SAMPLER:
4988 /* Don't declare samplers here. Just keep track and emit later. */
4989 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
4990 return true;
4991
4992 #if 0
4993 case TGSI_FILE_RESOURCE:
4994 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
4995 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
4996 assert(!"TGSI_FILE_RESOURCE not handled yet");
4997 return false;
4998 #endif
4999
5000 case TGSI_FILE_ADDRESS:
5001 emit->num_address_regs = MAX2(emit->num_address_regs,
5002 decl->Range.Last + 1);
5003 return true;
5004
5005 case TGSI_FILE_SAMPLER_VIEW:
5006 {
5007 unsigned unit = decl->Range.First;
5008 assert(decl->Range.First == decl->Range.Last);
5009 emit->sampler_target[unit] = decl->SamplerView.Resource;
5010
5011 /* Note: we can ignore YZW return types for now */
5012 emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
5013 emit->sampler_view[unit] = true;
5014 }
5015 return true;
5016
5017 case TGSI_FILE_IMAGE:
5018 {
5019 unsigned unit = decl->Range.First;
5020 assert(decl->Range.First == decl->Range.Last);
5021 assert(unit < PIPE_MAX_SHADER_IMAGES);
5022 emit->image[unit] = decl->Image;
5023 emit->image_mask |= 1 << unit;
5024 emit->num_images++;
5025 }
5026 return true;
5027
5028 case TGSI_FILE_HW_ATOMIC:
5029 /* Declare the atomic buffer if it is not already declared. */
5030 if (!(emit->atomic_bufs_mask & (1 << decl->Dim.Index2D))) {
5031 emit->num_atomic_bufs++;
5032 emit->atomic_bufs_mask |= (1 << decl->Dim.Index2D);
5033 }
5034
5035 /* Remember the maximum atomic counter index encountered */
5036 emit->max_atomic_counter_index =
5037 MAX2(emit->max_atomic_counter_index, decl->Range.Last);
5038 return true;
5039
5040 case TGSI_FILE_MEMORY:
5041 /* Record memory has been used. */
5042 if (emit->unit == PIPE_SHADER_COMPUTE &&
5043 decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED) {
5044 emit->cs.shared_memory_declared = true;
5045 }
5046
5047 return true;
5048
5049 case TGSI_FILE_BUFFER:
5050 assert(emit->version >= 50);
5051 emit->num_shader_bufs++;
5052 return true;
5053
5054 default:
5055 assert(!"Unexpected type of declaration");
5056 return false;
5057 }
5058 }
5059
5060
5061 /**
5062 * Emit input declarations for fragment shader.
5063 */
5064 static void
emit_fs_input_declarations(struct svga_shader_emitter_v10 * emit)5065 emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
5066 {
5067 unsigned i;
5068
5069 for (i = 0; i < emit->linkage.num_inputs; i++) {
5070 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5071 unsigned usage_mask = emit->info.input_usage_mask[i];
5072 unsigned index = emit->linkage.input_map[i];
5073 unsigned type, interpolationMode, name;
5074 unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
5075
5076 if (usage_mask == 0)
5077 continue; /* register is not actually used */
5078
5079 if (semantic_name == TGSI_SEMANTIC_POSITION) {
5080 /* fragment position input */
5081 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5082 interpolationMode = VGPU10_INTERPOLATION_LINEAR;
5083 name = VGPU10_NAME_POSITION;
5084 if (usage_mask & TGSI_WRITEMASK_W) {
5085 /* we need to replace use of 'w' with '1/w' */
5086 emit->fs.fragcoord_input_index = i;
5087 }
5088 }
5089 else if (semantic_name == TGSI_SEMANTIC_FACE) {
5090 /* fragment front-facing input */
5091 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5092 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5093 name = VGPU10_NAME_IS_FRONT_FACE;
5094 emit->fs.face_input_index = i;
5095 }
5096 else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
5097 /* primitive ID */
5098 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5099 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5100 name = VGPU10_NAME_PRIMITIVE_ID;
5101 }
5102 else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
5103 /* sample index / ID */
5104 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5105 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5106 name = VGPU10_NAME_SAMPLE_INDEX;
5107 }
5108 else if (semantic_name == TGSI_SEMANTIC_LAYER) {
5109 /* render target array index */
5110 if (emit->key.fs.layer_to_zero) {
5111 /**
5112 * The shader from the previous stage does not write to layer,
5113 * so reading the layer index in fragment shader should return 0.
5114 */
5115 emit->fs.layer_input_index = i;
5116 continue;
5117 } else {
5118 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5119 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5120 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
5121 mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
5122 }
5123 }
5124 else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
5125 /* viewport index */
5126 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5127 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5128 name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
5129 mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
5130 }
5131 else {
5132 /* general fragment input */
5133 type = VGPU10_OPCODE_DCL_INPUT_PS;
5134 interpolationMode =
5135 translate_interpolation(emit,
5136 emit->info.input_interpolate[i],
5137 emit->info.input_interpolate_loc[i]);
5138
5139 /* keeps track if flat interpolation mode is being used */
5140 emit->uses_flat_interp = emit->uses_flat_interp ||
5141 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
5142
5143 name = VGPU10_NAME_UNDEFINED;
5144 }
5145
5146 emit_input_declaration(emit, type,
5147 VGPU10_OPERAND_TYPE_INPUT,
5148 VGPU10_OPERAND_INDEX_1D, index, 1,
5149 name,
5150 VGPU10_OPERAND_4_COMPONENT,
5151 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5152 mask,
5153 interpolationMode, true,
5154 map_tgsi_semantic_to_sgn_name(semantic_name));
5155 }
5156 }
5157
5158
5159 /**
5160 * Emit input declarations for vertex shader.
5161 */
5162 static void
emit_vs_input_declarations(struct svga_shader_emitter_v10 * emit)5163 emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
5164 {
5165 unsigned i;
5166
5167 for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
5168 unsigned usage_mask = emit->info.input_usage_mask[i];
5169 unsigned index = i;
5170
5171 if (usage_mask == 0)
5172 continue; /* register is not actually used */
5173
5174 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5175 VGPU10_OPERAND_TYPE_INPUT,
5176 VGPU10_OPERAND_INDEX_1D, index, 1,
5177 VGPU10_NAME_UNDEFINED,
5178 VGPU10_OPERAND_4_COMPONENT,
5179 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5180 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5181 VGPU10_INTERPOLATION_UNDEFINED, true,
5182 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5183 }
5184 }
5185
5186
5187 /**
5188 * Emit input declarations for geometry shader.
5189 */
5190 static void
emit_gs_input_declarations(struct svga_shader_emitter_v10 * emit)5191 emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
5192 {
5193 unsigned i;
5194
5195 for (i = 0; i < emit->info.num_inputs; i++) {
5196 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5197 unsigned usage_mask = emit->info.input_usage_mask[i];
5198 unsigned index = emit->linkage.input_map[i];
5199 unsigned opcodeType, operandType;
5200 unsigned numComp, selMode;
5201 unsigned name;
5202 unsigned dim;
5203
5204 if (usage_mask == 0)
5205 continue; /* register is not actually used */
5206
5207 opcodeType = VGPU10_OPCODE_DCL_INPUT;
5208 operandType = VGPU10_OPERAND_TYPE_INPUT;
5209 numComp = VGPU10_OPERAND_4_COMPONENT;
5210 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
5211 name = VGPU10_NAME_UNDEFINED;
5212
5213 /* all geometry shader inputs are two dimensional except
5214 * gl_PrimitiveID
5215 */
5216 dim = VGPU10_OPERAND_INDEX_2D;
5217
5218 if (semantic_name == TGSI_SEMANTIC_PRIMID) {
5219 /* Primitive ID */
5220 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
5221 dim = VGPU10_OPERAND_INDEX_0D;
5222 numComp = VGPU10_OPERAND_0_COMPONENT;
5223 selMode = 0;
5224
5225 /* also save the register index so we can check for
5226 * primitive id when emit src register. We need to modify the
5227 * operand type, index dimension when emit primitive id src reg.
5228 */
5229 emit->gs.prim_id_index = i;
5230 }
5231 else if (semantic_name == TGSI_SEMANTIC_POSITION) {
5232 /* vertex position input */
5233 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
5234 name = VGPU10_NAME_POSITION;
5235 }
5236
5237 emit_input_declaration(emit, opcodeType, operandType,
5238 dim, index,
5239 emit->gs.input_size,
5240 name,
5241 numComp, selMode,
5242 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5243 VGPU10_INTERPOLATION_UNDEFINED, true,
5244 map_tgsi_semantic_to_sgn_name(semantic_name));
5245 }
5246 }
5247
5248
5249 /**
5250 * Emit input declarations for tessellation control shader.
5251 */
5252 static void
emit_tcs_input_declarations(struct svga_shader_emitter_v10 * emit)5253 emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
5254 {
5255 unsigned i;
5256 unsigned size = emit->key.tcs.vertices_per_patch;
5257 bool addSignature = true;
5258
5259 if (!emit->tcs.control_point_phase)
5260 addSignature = emit->tcs.fork_phase_add_signature;
5261
5262 for (i = 0; i < emit->info.num_inputs; i++) {
5263 unsigned usage_mask = emit->info.input_usage_mask[i];
5264 unsigned index = emit->linkage.input_map[i];
5265 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5266 VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
5267 VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
5268 SVGA3dDXSignatureSemanticName sgn_name =
5269 map_tgsi_semantic_to_sgn_name(semantic_name);
5270
5271 if (semantic_name == TGSI_SEMANTIC_POSITION ||
5272 index == emit->linkage.position_index) {
5273 /* save the input control point index for later use */
5274 emit->tcs.control_point_input_index = i;
5275 }
5276 else if (usage_mask == 0) {
5277 continue; /* register is not actually used */
5278 }
5279 else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
5280 /* The shadow copy is being used here. So set the signature name
5281 * to UNDEFINED.
5282 */
5283 sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5284 }
5285
5286 /* input control points in the patch constant phase are emitted in the
5287 * vicp register rather than the v register.
5288 */
5289 if (!emit->tcs.control_point_phase) {
5290 operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5291 }
5292
5293 /* Tessellation control shader inputs are two dimensional.
5294 * The array size is determined by the patch vertex count.
5295 */
5296 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5297 operandType,
5298 VGPU10_OPERAND_INDEX_2D,
5299 index, size, name,
5300 VGPU10_OPERAND_4_COMPONENT,
5301 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5302 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5303 VGPU10_INTERPOLATION_UNDEFINED,
5304 addSignature, sgn_name);
5305 }
5306
5307 if (emit->tcs.control_point_phase) {
5308
5309 /* Also add an address register for the indirection to the
5310 * input control points
5311 */
5312 emit->tcs.control_point_addr_index = emit->num_address_regs++;
5313 }
5314 }
5315
5316
5317 static void
emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 * emit)5318 emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
5319 {
5320
5321 /* In tcs, tess factors are emitted as extra outputs.
5322 * The starting register index for the tess factors is captured
5323 * in the compile key.
5324 */
5325 unsigned inputIndex = emit->key.tes.tessfactor_index;
5326
5327 if (emit->tes.prim_mode == MESA_PRIM_QUADS) {
5328 if (emit->key.tes.need_tessouter) {
5329 emit->tes.outer.in_index = inputIndex;
5330 for (int i = 0; i < 4; i++) {
5331 emit_tesslevel_declaration(emit, inputIndex++,
5332 VGPU10_OPCODE_DCL_INPUT_SIV,
5333 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5334 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
5335 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
5336 }
5337 }
5338
5339 if (emit->key.tes.need_tessinner) {
5340 emit->tes.inner.in_index = inputIndex;
5341 emit_tesslevel_declaration(emit, inputIndex++,
5342 VGPU10_OPCODE_DCL_INPUT_SIV,
5343 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5344 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
5345 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
5346
5347 emit_tesslevel_declaration(emit, inputIndex++,
5348 VGPU10_OPCODE_DCL_INPUT_SIV,
5349 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5350 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
5351 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
5352 }
5353 }
5354 else if (emit->tes.prim_mode == MESA_PRIM_TRIANGLES) {
5355 if (emit->key.tes.need_tessouter) {
5356 emit->tes.outer.in_index = inputIndex;
5357 for (int i = 0; i < 3; i++) {
5358 emit_tesslevel_declaration(emit, inputIndex++,
5359 VGPU10_OPCODE_DCL_INPUT_SIV,
5360 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5361 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
5362 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
5363 }
5364 }
5365
5366 if (emit->key.tes.need_tessinner) {
5367 emit->tes.inner.in_index = inputIndex;
5368 emit_tesslevel_declaration(emit, inputIndex++,
5369 VGPU10_OPCODE_DCL_INPUT_SIV,
5370 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5371 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
5372 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
5373 }
5374 }
5375 else if (emit->tes.prim_mode == MESA_PRIM_LINES) {
5376 if (emit->key.tes.need_tessouter) {
5377 emit->tes.outer.in_index = inputIndex;
5378 emit_tesslevel_declaration(emit, inputIndex++,
5379 VGPU10_OPCODE_DCL_INPUT_SIV,
5380 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5381 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
5382 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
5383
5384 emit_tesslevel_declaration(emit, inputIndex++,
5385 VGPU10_OPCODE_DCL_INPUT_SIV,
5386 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5387 VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
5388 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
5389 }
5390 }
5391 }
5392
5393
5394 /**
5395 * Emit input declarations for tessellation evaluation shader.
5396 */
5397 static void
emit_tes_input_declarations(struct svga_shader_emitter_v10 * emit)5398 emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
5399 {
5400 unsigned i;
5401
5402 for (i = 0; i < emit->info.num_inputs; i++) {
5403 unsigned usage_mask = emit->info.input_usage_mask[i];
5404 unsigned index = emit->linkage.input_map[i];
5405 unsigned size;
5406 const enum tgsi_semantic semantic_name =
5407 emit->info.input_semantic_name[i];
5408 SVGA3dDXSignatureSemanticName sgn_name;
5409 VGPU10_OPERAND_TYPE operandType;
5410 VGPU10_OPERAND_INDEX_DIMENSION dim;
5411
5412 if (usage_mask == 0)
5413 usage_mask = 1; /* at least set usage mask to one */
5414
5415 if (semantic_name == TGSI_SEMANTIC_PATCH) {
5416 operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
5417 dim = VGPU10_OPERAND_INDEX_1D;
5418 size = 1;
5419 sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
5420 }
5421 else {
5422 operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5423 dim = VGPU10_OPERAND_INDEX_2D;
5424 size = emit->key.tes.vertices_per_patch;
5425 sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5426 }
5427
5428 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
5429 dim, index, size, VGPU10_NAME_UNDEFINED,
5430 VGPU10_OPERAND_4_COMPONENT,
5431 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5432 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5433 VGPU10_INTERPOLATION_UNDEFINED,
5434 true, sgn_name);
5435 }
5436
5437 emit_tessfactor_input_declarations(emit);
5438
5439 /* DX spec requires DS input controlpoint/patch-constant signatures to match
5440 * the HS output controlpoint/patch-constant signatures exactly.
5441 * Add missing input declarations even if they are not used in the shader.
5442 */
5443 if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
5444 struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
5445 for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
5446
5447 /* If a tcs output does not have a corresponding input register in
5448 * tes, add one.
5449 */
5450 if (emit->linkage.prevShader.output_map[i] >
5451 emit->linkage.input_map_max) {
5452 const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
5453
5454 if (sem_name == TGSI_SEMANTIC_PATCH) {
5455 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5456 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5457 VGPU10_OPERAND_INDEX_1D,
5458 i, 1, VGPU10_NAME_UNDEFINED,
5459 VGPU10_OPERAND_4_COMPONENT,
5460 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5461 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5462 VGPU10_INTERPOLATION_UNDEFINED,
5463 true,
5464 map_tgsi_semantic_to_sgn_name(sem_name));
5465
5466 } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
5467 sem_name != TGSI_SEMANTIC_TESSOUTER) {
5468 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5469 VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
5470 VGPU10_OPERAND_INDEX_2D,
5471 i, emit->key.tes.vertices_per_patch,
5472 VGPU10_NAME_UNDEFINED,
5473 VGPU10_OPERAND_4_COMPONENT,
5474 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5475 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5476 VGPU10_INTERPOLATION_UNDEFINED,
5477 true,
5478 map_tgsi_semantic_to_sgn_name(sem_name));
5479 }
5480 /* tessellation factors are taken care of in
5481 * emit_tessfactor_input_declarations().
5482 */
5483 }
5484 }
5485 }
5486 }
5487
5488
5489 /**
5490 * Emit all input declarations.
5491 */
5492 static bool
emit_input_declarations(struct svga_shader_emitter_v10 * emit)5493 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
5494 {
5495 emit->index_range.required =
5496 emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? true : false;
5497
5498 switch (emit->unit) {
5499 case PIPE_SHADER_FRAGMENT:
5500 emit_fs_input_declarations(emit);
5501 break;
5502 case PIPE_SHADER_GEOMETRY:
5503 emit_gs_input_declarations(emit);
5504 break;
5505 case PIPE_SHADER_VERTEX:
5506 emit_vs_input_declarations(emit);
5507 break;
5508 case PIPE_SHADER_TESS_CTRL:
5509 emit_tcs_input_declarations(emit);
5510 break;
5511 case PIPE_SHADER_TESS_EVAL:
5512 emit_tes_input_declarations(emit);
5513 break;
5514 case PIPE_SHADER_COMPUTE:
5515 //XXX emit_cs_input_declarations(emit);
5516 break;
5517 default:
5518 assert(0);
5519 }
5520
5521 if (emit->index_range.start_index != INVALID_INDEX) {
5522 emit_index_range_declaration(emit);
5523 }
5524 emit->index_range.required = false;
5525 return true;
5526 }
5527
5528
5529 /**
5530 * Emit all output declarations.
5531 */
5532 static bool
emit_output_declarations(struct svga_shader_emitter_v10 * emit)5533 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
5534 {
5535 emit->index_range.required =
5536 emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? true : false;
5537
5538 switch (emit->unit) {
5539 case PIPE_SHADER_FRAGMENT:
5540 emit_fs_output_declarations(emit);
5541 break;
5542 case PIPE_SHADER_GEOMETRY:
5543 emit_gs_output_declarations(emit);
5544 break;
5545 case PIPE_SHADER_VERTEX:
5546 emit_vs_output_declarations(emit);
5547 break;
5548 case PIPE_SHADER_TESS_CTRL:
5549 emit_tcs_output_declarations(emit);
5550 break;
5551 case PIPE_SHADER_TESS_EVAL:
5552 emit_tes_output_declarations(emit);
5553 break;
5554 case PIPE_SHADER_COMPUTE:
5555 //XXX emit_cs_output_declarations(emit);
5556 break;
5557 default:
5558 assert(0);
5559 }
5560
5561 if (emit->vposition.so_index != INVALID_INDEX &&
5562 emit->vposition.out_index != INVALID_INDEX) {
5563
5564 assert(emit->unit != PIPE_SHADER_FRAGMENT);
5565
5566 /* Emit the declaration for the non-adjusted vertex position
5567 * for stream output purpose
5568 */
5569 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5570 emit->vposition.so_index,
5571 VGPU10_NAME_UNDEFINED,
5572 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5573 true,
5574 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
5575 }
5576
5577 if (emit->clip_dist_so_index != INVALID_INDEX &&
5578 emit->clip_dist_out_index != INVALID_INDEX) {
5579
5580 assert(emit->unit != PIPE_SHADER_FRAGMENT);
5581
5582 /* Emit the declaration for the clip distance shadow copy which
5583 * will be used for stream output purpose and for clip distance
5584 * varying variable. Note all clip distances
5585 * will be written regardless of the enabled clipping planes.
5586 */
5587 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5588 emit->clip_dist_so_index,
5589 VGPU10_NAME_UNDEFINED,
5590 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5591 true,
5592 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5593
5594 if (emit->info.num_written_clipdistance > 4) {
5595 /* for the second clip distance register, each handles 4 planes */
5596 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5597 emit->clip_dist_so_index + 1,
5598 VGPU10_NAME_UNDEFINED,
5599 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5600 true,
5601 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5602 }
5603 }
5604
5605 if (emit->index_range.start_index != INVALID_INDEX) {
5606 emit_index_range_declaration(emit);
5607 }
5608 emit->index_range.required = false;
5609 return true;
5610 }
5611
5612
5613 /**
5614 * A helper function to create a temporary indexable array
5615 * and initialize the corresponding entries in the temp_map array.
5616 */
5617 static void
create_temp_array(struct svga_shader_emitter_v10 * emit,unsigned arrayID,unsigned first,unsigned count,unsigned startIndex)5618 create_temp_array(struct svga_shader_emitter_v10 *emit,
5619 unsigned arrayID, unsigned first, unsigned count,
5620 unsigned startIndex)
5621 {
5622 unsigned i, tempIndex = startIndex;
5623
5624 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
5625 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
5626 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
5627
5628 emit->temp_arrays[arrayID].start = first;
5629 emit->temp_arrays[arrayID].size = count;
5630
5631 /* Fill in the temp_map entries for this temp array */
5632 for (i = 0; i < count; i++, tempIndex++) {
5633 emit->temp_map[tempIndex].arrayId = arrayID;
5634 emit->temp_map[tempIndex].index = i;
5635 }
5636 }
5637
5638
5639 /**
5640 * Emit the declaration for the temporary registers.
5641 */
5642 static bool
emit_temporaries_declaration(struct svga_shader_emitter_v10 * emit)5643 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
5644 {
5645 unsigned total_temps, reg, i;
5646
5647 total_temps = emit->num_shader_temps;
5648
5649 /* If there is indirect access to non-indexable temps in the shader,
5650 * convert those temps to indexable temps. This works around a bug
5651 * in the GLSL->TGSI translator exposed in piglit test
5652 * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5653 * Internal temps added by the driver remain as non-indexable temps.
5654 */
5655 if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
5656 emit->num_temp_arrays == 0) {
5657 create_temp_array(emit, 1, 0, total_temps, 0);
5658 }
5659
5660 /* Allocate extra temps for specially-implemented instructions,
5661 * such as LIT.
5662 */
5663 total_temps += MAX_INTERNAL_TEMPS;
5664
5665 /* Allocate extra temps for clip distance or clip vertex.
5666 */
5667 if (emit->clip_mode == CLIP_DISTANCE) {
5668 /* We need to write the clip distance to a temporary register
5669 * first. Then it will be copied to the shadow copy for
5670 * the clip distance varying variable and stream output purpose.
5671 * It will also be copied to the actual CLIPDIST register
5672 * according to the enabled clip planes
5673 */
5674 emit->clip_dist_tmp_index = total_temps++;
5675 if (emit->info.num_written_clipdistance > 4)
5676 total_temps++; /* second clip register */
5677 }
5678 else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
5679 /* If the current shader is in the last vertex processing stage,
5680 * We need to convert the TGSI CLIPVERTEX output to one or more
5681 * clip distances. Allocate a temp reg for the clipvertex here.
5682 */
5683 assert(emit->info.writes_clipvertex > 0);
5684 emit->clip_vertex_tmp_index = total_temps;
5685 total_temps++;
5686 }
5687
5688 if (emit->info.uses_vertexid) {
5689 assert(emit->unit == PIPE_SHADER_VERTEX);
5690 emit->vs.vertex_id_tmp_index = total_temps++;
5691 }
5692
5693 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
5694 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
5695 emit->key.clip_plane_enable ||
5696 emit->vposition.so_index != INVALID_INDEX) {
5697 emit->vposition.tmp_index = total_temps;
5698 total_temps += 1;
5699 }
5700
5701 if (emit->vposition.need_prescale) {
5702 emit->vposition.prescale_scale_index = total_temps++;
5703 emit->vposition.prescale_trans_index = total_temps++;
5704 }
5705
5706 if (emit->unit == PIPE_SHADER_VERTEX) {
5707 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
5708 emit->key.vs.adjust_attrib_itof |
5709 emit->key.vs.adjust_attrib_utof |
5710 emit->key.vs.attrib_is_bgra |
5711 emit->key.vs.attrib_puint_to_snorm |
5712 emit->key.vs.attrib_puint_to_uscaled |
5713 emit->key.vs.attrib_puint_to_sscaled);
5714 while (attrib_mask) {
5715 unsigned index = u_bit_scan(&attrib_mask);
5716 emit->vs.adjusted_input[index] = total_temps++;
5717 }
5718 }
5719 else if (emit->unit == PIPE_SHADER_GEOMETRY) {
5720 if (emit->key.gs.writes_viewport_index)
5721 emit->gs.viewport_index_tmp_index = total_temps++;
5722 }
5723 }
5724 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
5725 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
5726 emit->key.fs.write_color0_to_n_cbufs > 1) {
5727 /* Allocate a temp to hold the output color */
5728 emit->fs.color_tmp_index = total_temps;
5729 total_temps += 1;
5730 }
5731
5732 if (emit->fs.face_input_index != INVALID_INDEX) {
5733 /* Allocate a temp for the +/-1 face register */
5734 emit->fs.face_tmp_index = total_temps;
5735 total_temps += 1;
5736 }
5737
5738 if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5739 /* Allocate a temp for modified fragment position register */
5740 emit->fs.fragcoord_tmp_index = total_temps;
5741 total_temps += 1;
5742 }
5743
5744 if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
5745 /* Allocate a temp for the sample position */
5746 emit->fs.sample_pos_tmp_index = total_temps++;
5747 }
5748 }
5749 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
5750 if (emit->vposition.need_prescale) {
5751 emit->vposition.tmp_index = total_temps++;
5752 emit->vposition.prescale_scale_index = total_temps++;
5753 emit->vposition.prescale_trans_index = total_temps++;
5754 }
5755
5756 if (emit->tes.inner.tgsi_index) {
5757 emit->tes.inner.temp_index = total_temps;
5758 total_temps += 1;
5759 }
5760
5761 if (emit->tes.outer.tgsi_index) {
5762 emit->tes.outer.temp_index = total_temps;
5763 total_temps += 1;
5764 }
5765 }
5766 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
5767 if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
5768 if (!emit->tcs.control_point_phase) {
5769 emit->tcs.inner.temp_index = total_temps;
5770 total_temps += 1;
5771 }
5772 }
5773 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5774 if (!emit->tcs.control_point_phase) {
5775 emit->tcs.outer.temp_index = total_temps;
5776 total_temps += 1;
5777 }
5778 }
5779
5780 if (emit->tcs.control_point_phase &&
5781 emit->info.reads_pervertex_outputs) {
5782 emit->tcs.control_point_tmp_index = total_temps;
5783 total_temps += emit->tcs.control_point_out_count;
5784 }
5785 else if (!emit->tcs.control_point_phase &&
5786 emit->info.reads_perpatch_outputs) {
5787
5788 /* If there is indirect access to the patch constant outputs
5789 * in the control point phase, then an indexable temporary array
5790 * will be created for these patch constant outputs.
5791 * Note, indirect access can only be applicable to
5792 * patch constant outputs in the control point phase.
5793 */
5794 if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
5795 unsigned arrayID =
5796 emit->num_temp_arrays ? emit->num_temp_arrays : 1;
5797 create_temp_array(emit, arrayID, 0,
5798 emit->tcs.patch_generic_out_count, total_temps);
5799 }
5800 emit->tcs.patch_generic_tmp_index = total_temps;
5801 total_temps += emit->tcs.patch_generic_out_count;
5802 }
5803
5804 emit->tcs.invocation_id_tmp_index = total_temps++;
5805 }
5806
5807 if (emit->raw_bufs) {
5808 /**
5809 * Add 3 more temporaries if we need to translate constant buffer
5810 * to srv raw buffer. Since we need to load the value to a temporary
5811 * before it can be used as a source. There could be three source
5812 * register in an instruction.
5813 */
5814 emit->raw_buf_tmp_index = total_temps;
5815 total_temps+=3;
5816 }
5817
5818 for (i = 0; i < emit->num_address_regs; i++) {
5819 emit->address_reg_index[i] = total_temps++;
5820 }
5821
5822 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5823 * temp indexes. Basically, we compact all the non-array temp register
5824 * indexes into a consecutive series.
5825 *
5826 * Before, we may have some TGSI declarations like:
5827 * DCL TEMP[0..1], LOCAL
5828 * DCL TEMP[2..4], ARRAY(1), LOCAL
5829 * DCL TEMP[5..7], ARRAY(2), LOCAL
5830 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5831 *
5832 * After, we'll have a map like this:
5833 * temp_map[0] = { array 0, index 0 }
5834 * temp_map[1] = { array 0, index 1 }
5835 * temp_map[2] = { array 1, index 0 }
5836 * temp_map[3] = { array 1, index 1 }
5837 * temp_map[4] = { array 1, index 2 }
5838 * temp_map[5] = { array 2, index 0 }
5839 * temp_map[6] = { array 2, index 1 }
5840 * temp_map[7] = { array 2, index 2 }
5841 * temp_map[8] = { array 0, index 2 }
5842 * temp_map[9] = { array 0, index 3 }
5843 *
5844 * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5845 * temps numbered 0..3
5846 *
5847 * Any time we emit a temporary register index, we'll have to use the
5848 * temp_map[] table to convert the TGSI index to the VGPU10 index.
5849 *
5850 * Finally, we recompute the total_temps value here.
5851 */
5852 reg = 0;
5853 for (i = 0; i < total_temps; i++) {
5854 if (emit->temp_map[i].arrayId == 0) {
5855 emit->temp_map[i].index = reg++;
5856 }
5857 }
5858
5859 if (0) {
5860 debug_printf("total_temps %u\n", total_temps);
5861 for (i = 0; i < total_temps; i++) {
5862 debug_printf("temp %u -> array %u index %u\n",
5863 i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
5864 }
5865 }
5866
5867 total_temps = reg;
5868
5869 /* Emit declaration of ordinary temp registers */
5870 if (total_temps > 0) {
5871 VGPU10OpcodeToken0 opcode0;
5872
5873 opcode0.value = 0;
5874 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
5875
5876 begin_emit_instruction(emit);
5877 emit_dword(emit, opcode0.value);
5878 emit_dword(emit, total_temps);
5879 end_emit_instruction(emit);
5880 }
5881
5882 /* Emit declarations for indexable temp arrays. Skip 0th entry since
5883 * it's unused.
5884 */
5885 for (i = 1; i < emit->num_temp_arrays; i++) {
5886 unsigned num_temps = emit->temp_arrays[i].size;
5887
5888 if (num_temps > 0) {
5889 VGPU10OpcodeToken0 opcode0;
5890
5891 opcode0.value = 0;
5892 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
5893
5894 begin_emit_instruction(emit);
5895 emit_dword(emit, opcode0.value);
5896 emit_dword(emit, i); /* which array */
5897 emit_dword(emit, num_temps);
5898 emit_dword(emit, 4); /* num components */
5899 end_emit_instruction(emit);
5900
5901 total_temps += num_temps;
5902 }
5903 }
5904
5905 /* Check that the grand total of all regular and indexed temps is
5906 * under the limit.
5907 */
5908 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
5909
5910 return true;
5911 }
5912
5913
5914 static bool
emit_rawbuf_declaration(struct svga_shader_emitter_v10 * emit,unsigned index)5915 emit_rawbuf_declaration(struct svga_shader_emitter_v10 *emit,
5916 unsigned index)
5917 {
5918 VGPU10OpcodeToken0 opcode1;
5919 VGPU10OperandToken0 operand1;
5920
5921 opcode1.value = 0;
5922 opcode1.opcodeType = VGPU10_OPCODE_DCL_RESOURCE_RAW;
5923 opcode1.resourceDimension = VGPU10_RESOURCE_DIMENSION_UNKNOWN;
5924
5925 operand1.value = 0;
5926 operand1.numComponents = VGPU10_OPERAND_0_COMPONENT;
5927 operand1.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
5928 operand1.indexDimension = VGPU10_OPERAND_INDEX_1D;
5929 operand1.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5930
5931 begin_emit_instruction(emit);
5932 emit_dword(emit, opcode1.value);
5933 emit_dword(emit, operand1.value);
5934 emit_dword(emit, index);
5935 end_emit_instruction(emit);
5936
5937 return true;
5938 }
5939
5940
5941 static bool
emit_constant_declaration(struct svga_shader_emitter_v10 * emit)5942 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
5943 {
5944 VGPU10OpcodeToken0 opcode0;
5945 VGPU10OperandToken0 operand0;
5946 unsigned total_consts, i;
5947
5948 opcode0.value = 0;
5949 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
5950 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
5951 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5952
5953 operand0.value = 0;
5954 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
5955 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
5956 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5957 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5958 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
5959 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
5960 operand0.swizzleX = 0;
5961 operand0.swizzleY = 1;
5962 operand0.swizzleZ = 2;
5963 operand0.swizzleW = 3;
5964
5965 /**
5966 * Emit declaration for constant buffer [0]. We also allocate
5967 * room for the extra constants here.
5968 */
5969 total_consts = emit->num_shader_consts[0];
5970
5971 /* Now, allocate constant slots for the "extra" constants.
5972 * Note: it's critical that these extra constant locations
5973 * exactly match what's emitted by the "extra" constants code
5974 * in svga_state_constants.c
5975 */
5976
5977 /* Vertex position scale/translation */
5978 if (emit->vposition.need_prescale) {
5979 emit->vposition.prescale_cbuf_index = total_consts;
5980 total_consts += (2 * emit->vposition.num_prescale);
5981 }
5982
5983 if (emit->unit == PIPE_SHADER_VERTEX) {
5984 if (emit->key.vs.undo_viewport) {
5985 emit->vs.viewport_index = total_consts++;
5986 }
5987 if (emit->key.vs.need_vertex_id_bias) {
5988 emit->vs.vertex_id_bias_index = total_consts++;
5989 }
5990 }
5991
5992 /* user-defined clip planes */
5993 if (emit->key.clip_plane_enable) {
5994 unsigned n = util_bitcount(emit->key.clip_plane_enable);
5995 assert(emit->unit != PIPE_SHADER_FRAGMENT &&
5996 emit->unit != PIPE_SHADER_COMPUTE);
5997 for (i = 0; i < n; i++) {
5998 emit->clip_plane_const[i] = total_consts++;
5999 }
6000 }
6001
6002 for (i = 0; i < emit->num_samplers; i++) {
6003
6004 if (emit->key.tex[i].sampler_view) {
6005 /* Texcoord scale factors for RECT textures */
6006 if (emit->key.tex[i].unnormalized) {
6007 emit->texcoord_scale_index[i] = total_consts++;
6008 }
6009
6010 /* Texture buffer sizes */
6011 if (emit->key.tex[i].target == PIPE_BUFFER) {
6012 emit->texture_buffer_size_index[i] = total_consts++;
6013 }
6014 }
6015 }
6016 if (emit->key.image_size_used) {
6017 emit->image_size_index = total_consts;
6018 total_consts += emit->num_images;
6019 }
6020
6021 if (total_consts > 0) {
6022 if (total_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
6023 debug_printf("Warning: Too many constants [%u] declared in constant"
6024 " buffer 0. %u is the limit.\n",
6025 total_consts,
6026 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
6027 total_consts = VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT;
6028 emit->register_overflow = true;
6029 }
6030 begin_emit_instruction(emit);
6031 emit_dword(emit, opcode0.value);
6032 emit_dword(emit, operand0.value);
6033 emit_dword(emit, 0); /* which const buffer slot */
6034 emit_dword(emit, total_consts);
6035 end_emit_instruction(emit);
6036 }
6037
6038 /* Declare remaining constant buffers (UBOs) */
6039
6040 for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
6041 if (emit->num_shader_consts[i] > 0) {
6042 if (emit->raw_bufs & (1 << i)) {
6043 /* UBO declared as srv raw buffer */
6044 emit_rawbuf_declaration(emit, i + emit->raw_buf_srv_start_index);
6045 }
6046 else {
6047
6048 /* UBO declared as const buffer */
6049 begin_emit_instruction(emit);
6050 emit_dword(emit, opcode0.value);
6051 emit_dword(emit, operand0.value);
6052 emit_dword(emit, i); /* which const buffer slot */
6053 emit_dword(emit, emit->num_shader_consts[i]);
6054 end_emit_instruction(emit);
6055 }
6056 }
6057 }
6058
6059 return true;
6060 }
6061
6062
6063 /**
6064 * Emit declarations for samplers.
6065 */
6066 static bool
emit_sampler_declarations(struct svga_shader_emitter_v10 * emit)6067 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
6068 {
6069 unsigned i;
6070
6071 for (i = 0; i < emit->key.num_samplers; i++) {
6072
6073 VGPU10OpcodeToken0 opcode0;
6074 VGPU10OperandToken0 operand0;
6075
6076 opcode0.value = 0;
6077 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
6078 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
6079
6080 operand0.value = 0;
6081 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6082 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
6083 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6084 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6085
6086 begin_emit_instruction(emit);
6087 emit_dword(emit, opcode0.value);
6088 emit_dword(emit, operand0.value);
6089 emit_dword(emit, i);
6090 end_emit_instruction(emit);
6091 }
6092
6093 return true;
6094 }
6095
6096
6097 /**
6098 * Translate PIPE_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
6099 */
6100 static unsigned
pipe_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,bool is_array,bool is_uav)6101 pipe_texture_to_resource_dimension(enum tgsi_texture_type target,
6102 unsigned num_samples,
6103 bool is_array,
6104 bool is_uav)
6105 {
6106 switch (target) {
6107 case PIPE_BUFFER:
6108 return VGPU10_RESOURCE_DIMENSION_BUFFER;
6109 case PIPE_TEXTURE_1D:
6110 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6111 case PIPE_TEXTURE_2D:
6112 return num_samples > 2 ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS :
6113 VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6114 case PIPE_TEXTURE_RECT:
6115 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6116 case PIPE_TEXTURE_3D:
6117 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6118 case PIPE_TEXTURE_CUBE:
6119 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6120 case PIPE_TEXTURE_1D_ARRAY:
6121 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
6122 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6123 case PIPE_TEXTURE_2D_ARRAY:
6124 if (num_samples > 2 && is_array)
6125 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY;
6126 else if (is_array)
6127 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
6128 else
6129 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6130 case PIPE_TEXTURE_CUBE_ARRAY:
6131 return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6132 (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
6133 VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
6134 default:
6135 assert(!"Unexpected resource type");
6136 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6137 }
6138 }
6139
6140
6141 /**
6142 * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
6143 */
6144 static unsigned
tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,bool is_array,bool is_uav)6145 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
6146 unsigned num_samples,
6147 bool is_array,
6148 bool is_uav)
6149 {
6150 if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
6151 target = TGSI_TEXTURE_2D;
6152 }
6153 else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
6154 target = TGSI_TEXTURE_2D_ARRAY;
6155 }
6156
6157 switch (target) {
6158 case TGSI_TEXTURE_BUFFER:
6159 return VGPU10_RESOURCE_DIMENSION_BUFFER;
6160 case TGSI_TEXTURE_1D:
6161 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6162 case TGSI_TEXTURE_2D:
6163 case TGSI_TEXTURE_RECT:
6164 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6165 case TGSI_TEXTURE_3D:
6166 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6167 case TGSI_TEXTURE_CUBE:
6168 case TGSI_TEXTURE_SHADOWCUBE:
6169 return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6170 VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6171 case TGSI_TEXTURE_SHADOW1D:
6172 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6173 case TGSI_TEXTURE_SHADOW2D:
6174 case TGSI_TEXTURE_SHADOWRECT:
6175 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6176 case TGSI_TEXTURE_1D_ARRAY:
6177 case TGSI_TEXTURE_SHADOW1D_ARRAY:
6178 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
6179 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6180 case TGSI_TEXTURE_2D_ARRAY:
6181 case TGSI_TEXTURE_SHADOW2D_ARRAY:
6182 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
6183 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6184 case TGSI_TEXTURE_2D_MSAA:
6185 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
6186 case TGSI_TEXTURE_2D_ARRAY_MSAA:
6187 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
6188 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
6189 case TGSI_TEXTURE_CUBE_ARRAY:
6190 return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6191 (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
6192 VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
6193 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
6194 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
6195 : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6196 default:
6197 assert(!"Unexpected resource type");
6198 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6199 }
6200 }
6201
6202
6203 /**
6204 * Given a tgsi_return_type, return true iff it is an integer type.
6205 */
6206 static bool
is_integer_type(enum tgsi_return_type type)6207 is_integer_type(enum tgsi_return_type type)
6208 {
6209 switch (type) {
6210 case TGSI_RETURN_TYPE_SINT:
6211 case TGSI_RETURN_TYPE_UINT:
6212 return true;
6213 case TGSI_RETURN_TYPE_FLOAT:
6214 case TGSI_RETURN_TYPE_UNORM:
6215 case TGSI_RETURN_TYPE_SNORM:
6216 return false;
6217 case TGSI_RETURN_TYPE_COUNT:
6218 default:
6219 assert(!"is_integer_type: Unknown tgsi_return_type");
6220 return false;
6221 }
6222 }
6223
6224
6225 /**
6226 * Emit declarations for resources.
6227 * XXX When we're sure that all TGSI shaders will be generated with
6228 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
6229 * rework this code.
6230 */
6231 static bool
emit_resource_declarations(struct svga_shader_emitter_v10 * emit)6232 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
6233 {
6234 unsigned i;
6235
6236 /* Emit resource decl for each sampler */
6237 for (i = 0; i < emit->num_samplers; i++) {
6238 if (!(emit->info.samplers_declared & (1 << i)))
6239 continue;
6240
6241 VGPU10OpcodeToken0 opcode0;
6242 VGPU10OperandToken0 operand0;
6243 VGPU10ResourceReturnTypeToken return_type;
6244 VGPU10_RESOURCE_RETURN_TYPE rt;
6245
6246 opcode0.value = 0;
6247 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
6248 if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
6249 opcode0.resourceDimension =
6250 tgsi_texture_to_resource_dimension(emit->sampler_target[i],
6251 emit->key.tex[i].num_samples,
6252 emit->key.tex[i].is_array,
6253 false);
6254 }
6255 else {
6256 opcode0.resourceDimension =
6257 pipe_texture_to_resource_dimension(emit->key.tex[i].target,
6258 emit->key.tex[i].num_samples,
6259 emit->key.tex[i].is_array,
6260 false);
6261 }
6262 opcode0.sampleCount = emit->key.tex[i].num_samples;
6263 operand0.value = 0;
6264 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6265 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
6266 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6267 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6268
6269 #if 1
6270 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
6271 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
6272 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
6273 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
6274 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
6275 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
6276 assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
6277 if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
6278 rt = emit->sampler_return_type[i] + 1;
6279 }
6280 else {
6281 rt = emit->key.tex[i].sampler_return_type;
6282 }
6283 #else
6284 switch (emit->sampler_return_type[i]) {
6285 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
6286 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
6287 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break;
6288 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break;
6289 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
6290 case TGSI_RETURN_TYPE_COUNT:
6291 default:
6292 rt = VGPU10_RETURN_TYPE_FLOAT;
6293 assert(!"emit_resource_declarations: Unknown tgsi_return_type");
6294 }
6295 #endif
6296
6297 return_type.value = 0;
6298 return_type.component0 = rt;
6299 return_type.component1 = rt;
6300 return_type.component2 = rt;
6301 return_type.component3 = rt;
6302
6303 begin_emit_instruction(emit);
6304 emit_dword(emit, opcode0.value);
6305 emit_dword(emit, operand0.value);
6306 emit_dword(emit, i);
6307 emit_dword(emit, return_type.value);
6308 end_emit_instruction(emit);
6309 }
6310
6311 return true;
6312 }
6313
6314
6315 /**
6316 * Emit instruction to declare uav for the shader image
6317 */
6318 static void
emit_image_declarations(struct svga_shader_emitter_v10 * emit)6319 emit_image_declarations(struct svga_shader_emitter_v10 *emit)
6320 {
6321 unsigned i = 0;
6322 unsigned unit = 0;
6323 unsigned uav_mask = 0;
6324
6325 /* Emit uav decl for each image */
6326 for (i = 0; i < emit->num_images; i++, unit++) {
6327
6328 /* Find the unit index of the next declared image.
6329 */
6330 while (!(emit->image_mask & (1 << unit))) {
6331 unit++;
6332 }
6333
6334 VGPU10OpcodeToken0 opcode0;
6335 VGPU10OperandToken0 operand0;
6336 VGPU10ResourceReturnTypeToken return_type;
6337
6338 /* If the corresponding uav for the image is already declared,
6339 * skip this image declaration.
6340 */
6341 if (uav_mask & (1 << emit->key.images[unit].uav_index))
6342 continue;
6343
6344 opcode0.value = 0;
6345 opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_TYPED;
6346 opcode0.uavResourceDimension =
6347 tgsi_texture_to_resource_dimension(emit->image[unit].Resource,
6348 0, emit->key.images[unit].is_array,
6349 true);
6350
6351 if (emit->key.images[unit].is_single_layer &&
6352 emit->key.images[unit].resource_target == PIPE_TEXTURE_3D) {
6353 opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6354 }
6355
6356 /* Declare the uav as global coherent if the shader includes memory
6357 * barrier instructions.
6358 */
6359 opcode0.globallyCoherent =
6360 (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6361
6362 operand0.value = 0;
6363 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6364 operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6365 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6366 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6367
6368 return_type.value = 0;
6369 return_type.component0 =
6370 return_type.component1 =
6371 return_type.component2 =
6372 return_type.component3 = emit->key.images[unit].return_type + 1;
6373
6374 assert(emit->key.images[unit].uav_index != SVGA3D_INVALID_ID);
6375 begin_emit_instruction(emit);
6376 emit_dword(emit, opcode0.value);
6377 emit_dword(emit, operand0.value);
6378 emit_dword(emit, emit->key.images[unit].uav_index);
6379 emit_dword(emit, return_type.value);
6380 end_emit_instruction(emit);
6381
6382 /* Mark the uav is already declared */
6383 uav_mask |= 1 << emit->key.images[unit].uav_index;
6384 }
6385
6386 emit->uav_declared |= uav_mask;
6387 }
6388
6389
6390 /**
6391 * Emit instruction to declare uav for the shader buffer
6392 */
6393 static void
emit_shader_buf_declarations(struct svga_shader_emitter_v10 * emit)6394 emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit)
6395 {
6396 unsigned i;
6397 unsigned uav_mask = 0;
6398
6399 /* Emit uav decl for each shader buffer */
6400 for (i = 0; i < emit->num_shader_bufs; i++) {
6401 VGPU10OpcodeToken0 opcode0;
6402 VGPU10OperandToken0 operand0;
6403
6404 if (emit->raw_shaderbufs & (1 << i)) {
6405 emit_rawbuf_declaration(emit, i + emit->raw_shaderbuf_srv_start_index);
6406 continue;
6407 }
6408
6409 /* If the corresponding uav for the shader buf is already declared,
6410 * skip this shader buffer declaration.
6411 */
6412 if (uav_mask & (1 << emit->key.shader_buf_uav_index[i]))
6413 continue;
6414
6415 opcode0.value = 0;
6416 opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
6417
6418 /* Declare the uav as global coherent if the shader includes memory
6419 * barrier instructions.
6420 */
6421 opcode0.globallyCoherent =
6422 (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6423
6424 operand0.value = 0;
6425 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6426 operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6427 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6428 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6429
6430 assert(emit->key.shader_buf_uav_index[i] != SVGA3D_INVALID_ID);
6431 begin_emit_instruction(emit);
6432 emit_dword(emit, opcode0.value);
6433 emit_dword(emit, operand0.value);
6434 emit_dword(emit, emit->key.shader_buf_uav_index[i]);
6435 end_emit_instruction(emit);
6436
6437 /* Mark the uav is already declared */
6438 uav_mask |= 1 << emit->key.shader_buf_uav_index[i];
6439 }
6440
6441 emit->uav_declared |= uav_mask;
6442 }
6443
6444
6445 /**
6446 * Emit instruction to declare thread group shared memory(tgsm) for shared memory
6447 */
6448 static void
emit_memory_declarations(struct svga_shader_emitter_v10 * emit)6449 emit_memory_declarations(struct svga_shader_emitter_v10 *emit)
6450 {
6451 if (emit->cs.shared_memory_declared) {
6452 VGPU10OpcodeToken0 opcode0;
6453 VGPU10OperandToken0 operand0;
6454
6455 opcode0.value = 0;
6456 opcode0.opcodeType = VGPU10_OPCODE_DCL_TGSM_RAW;
6457
6458 /* Declare the uav as global coherent if the shader includes memory
6459 * barrier instructions.
6460 */
6461 opcode0.globallyCoherent =
6462 (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6463
6464 operand0.value = 0;
6465 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6466 operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
6467 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6468 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6469
6470 begin_emit_instruction(emit);
6471 emit_dword(emit, opcode0.value);
6472 emit_dword(emit, operand0.value);
6473
6474 /* Current state tracker only declares one shared memory for GLSL.
6475 * Use index 0 for this shared memory.
6476 */
6477 emit_dword(emit, 0);
6478 emit_dword(emit, emit->key.cs.mem_size); /* byte Count */
6479 end_emit_instruction(emit);
6480 }
6481 }
6482
6483
6484 /**
6485 * Emit instruction to declare uav for atomic buffers
6486 */
6487 static void
emit_atomic_buf_declarations(struct svga_shader_emitter_v10 * emit)6488 emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit)
6489 {
6490 unsigned atomic_bufs_mask = emit->atomic_bufs_mask;
6491 unsigned uav_mask = 0;
6492
6493 /* Emit uav decl for each atomic buffer */
6494 while (atomic_bufs_mask) {
6495 unsigned buf_index = u_bit_scan(&atomic_bufs_mask);
6496 unsigned uav_index = emit->key.atomic_buf_uav_index[buf_index];
6497
6498 /* If the corresponding uav for the shader buf is already declared,
6499 * skip this shader buffer declaration.
6500 */
6501 if (uav_mask & (1 << uav_index))
6502 continue;
6503
6504 VGPU10OpcodeToken0 opcode0;
6505 VGPU10OperandToken0 operand0;
6506
6507 assert(uav_index != SVGA3D_INVALID_ID);
6508
6509 opcode0.value = 0;
6510 opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
6511 opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_BUFFER;
6512
6513 /* Declare the uav as global coherent if the shader includes memory
6514 * barrier instructions.
6515 */
6516 opcode0.globallyCoherent =
6517 (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6518 opcode0.uavHasCounter = 1;
6519
6520 operand0.value = 0;
6521 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6522 operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6523 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6524 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6525
6526 begin_emit_instruction(emit);
6527 emit_dword(emit, opcode0.value);
6528 emit_dword(emit, operand0.value);
6529 emit_dword(emit, uav_index);
6530 end_emit_instruction(emit);
6531
6532 /* Mark the uav is already declared */
6533 uav_mask |= 1 << uav_index;
6534 }
6535
6536 emit->uav_declared |= uav_mask;
6537
6538 /* Allocate immediates to be used for index to the atomic buffers */
6539 unsigned j = 0;
6540 for (unsigned i = 0; i <= emit->num_atomic_bufs / 4; i++, j+=4) {
6541 alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
6542 }
6543
6544 /* Allocate immediates for the atomic counter index */
6545 for (; j <= emit->max_atomic_counter_index; j+=4) {
6546 alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
6547 }
6548 }
6549
6550
6551 /**
6552 * Emit instruction with n=1, 2 or 3 source registers.
6553 */
6554 static void
emit_instruction_opn(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3,bool saturate,bool precise)6555 emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
6556 unsigned opcode,
6557 const struct tgsi_full_dst_register *dst,
6558 const struct tgsi_full_src_register *src1,
6559 const struct tgsi_full_src_register *src2,
6560 const struct tgsi_full_src_register *src3,
6561 bool saturate, bool precise)
6562 {
6563 begin_emit_instruction(emit);
6564 emit_opcode_precise(emit, opcode, saturate, precise);
6565 emit_dst_register(emit, dst);
6566 emit_src_register(emit, src1);
6567 if (src2) {
6568 emit_src_register(emit, src2);
6569 }
6570 if (src3) {
6571 emit_src_register(emit, src3);
6572 }
6573 end_emit_instruction(emit);
6574 }
6575
6576 static void
emit_instruction_op1(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6577 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
6578 unsigned opcode,
6579 const struct tgsi_full_dst_register *dst,
6580 const struct tgsi_full_src_register *src)
6581 {
6582 emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, false, false);
6583 }
6584
6585 static void
emit_instruction_op2(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2)6586 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
6587 VGPU10_OPCODE_TYPE opcode,
6588 const struct tgsi_full_dst_register *dst,
6589 const struct tgsi_full_src_register *src1,
6590 const struct tgsi_full_src_register *src2)
6591 {
6592 emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, false, false);
6593 }
6594
6595 static void
emit_instruction_op3(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3)6596 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
6597 VGPU10_OPCODE_TYPE opcode,
6598 const struct tgsi_full_dst_register *dst,
6599 const struct tgsi_full_src_register *src1,
6600 const struct tgsi_full_src_register *src2,
6601 const struct tgsi_full_src_register *src3)
6602 {
6603 emit_instruction_opn(emit, opcode, dst, src1, src2, src3, false, false);
6604 }
6605
6606 static void
emit_instruction_op0(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode)6607 emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
6608 VGPU10_OPCODE_TYPE opcode)
6609 {
6610 begin_emit_instruction(emit);
6611 emit_opcode(emit, opcode, false);
6612 end_emit_instruction(emit);
6613 }
6614
6615 /**
6616 * Tessellation inner/outer levels needs to be store into its
6617 * appropriate registers depending on prim_mode.
6618 */
6619 static void
store_tesslevels(struct svga_shader_emitter_v10 * emit)6620 store_tesslevels(struct svga_shader_emitter_v10 *emit)
6621 {
6622 int i;
6623
6624 /* tessellation levels are required input/out in hull shader.
6625 * emitting the inner/outer tessellation levels, either from
6626 * values provided in tcs or fallback default values which is 1.0
6627 */
6628 if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) {
6629 struct tgsi_full_src_register temp_src;
6630
6631 if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
6632 temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
6633 else
6634 temp_src = make_immediate_reg_float(emit, 1.0f);
6635
6636 for (i = 0; i < 2; i++) {
6637 struct tgsi_full_src_register src =
6638 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6639 struct tgsi_full_dst_register dst =
6640 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
6641 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6642 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6643 }
6644
6645 if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6646 temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6647 else
6648 temp_src = make_immediate_reg_float(emit, 1.0f);
6649
6650 for (i = 0; i < 4; i++) {
6651 struct tgsi_full_src_register src =
6652 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6653 struct tgsi_full_dst_register dst =
6654 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6655 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6656 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6657 }
6658 }
6659 else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) {
6660 struct tgsi_full_src_register temp_src;
6661
6662 if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
6663 temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
6664 else
6665 temp_src = make_immediate_reg_float(emit, 1.0f);
6666
6667 struct tgsi_full_src_register src =
6668 scalar_src(&temp_src, TGSI_SWIZZLE_X);
6669 struct tgsi_full_dst_register dst =
6670 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
6671 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6672 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6673
6674 if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6675 temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6676 else
6677 temp_src = make_immediate_reg_float(emit, 1.0f);
6678
6679 for (i = 0; i < 3; i++) {
6680 struct tgsi_full_src_register src =
6681 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6682 struct tgsi_full_dst_register dst =
6683 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6684 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6685 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6686 }
6687 }
6688 else if (emit->key.tcs.prim_mode == MESA_PRIM_LINES) {
6689 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
6690 struct tgsi_full_src_register temp_src =
6691 make_src_temp_reg(emit->tcs.outer.temp_index);
6692 for (i = 0; i < 2; i++) {
6693 struct tgsi_full_src_register src =
6694 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6695 struct tgsi_full_dst_register dst =
6696 make_dst_reg(TGSI_FILE_OUTPUT,
6697 emit->tcs.outer.out_index + i);
6698 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6699 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6700 }
6701 }
6702 }
6703 else {
6704 debug_printf("Unsupported primitive type");
6705 }
6706 }
6707
6708
6709 /**
6710 * Emit the actual clip distance instructions to be used for clipping
6711 * by copying the clip distance from the temporary registers to the
6712 * CLIPDIST registers written with the enabled planes mask.
6713 * Also copy the clip distance from the temporary to the clip distance
6714 * shadow copy register which will be referenced by the input shader
6715 */
6716 static void
emit_clip_distance_instructions(struct svga_shader_emitter_v10 * emit)6717 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
6718 {
6719 struct tgsi_full_src_register tmp_clip_dist_src;
6720 struct tgsi_full_dst_register clip_dist_dst;
6721
6722 unsigned i;
6723 unsigned clip_plane_enable = emit->key.clip_plane_enable;
6724 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
6725 int num_written_clipdist = emit->info.num_written_clipdistance;
6726
6727 assert(emit->clip_dist_out_index != INVALID_INDEX);
6728 assert(emit->clip_dist_tmp_index != INVALID_INDEX);
6729
6730 /**
6731 * Temporary reset the temporary clip dist register index so
6732 * that the copy to the real clip dist register will not
6733 * attempt to copy to the temporary register again
6734 */
6735 emit->clip_dist_tmp_index = INVALID_INDEX;
6736
6737 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
6738
6739 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
6740
6741 /**
6742 * copy to the shadow copy for use by varying variable and
6743 * stream output. All clip distances
6744 * will be written regardless of the enabled clipping planes.
6745 */
6746 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6747 emit->clip_dist_so_index + i);
6748
6749 /* MOV clip_dist_so, tmp_clip_dist */
6750 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6751 &tmp_clip_dist_src);
6752
6753 /**
6754 * copy those clip distances to enabled clipping planes
6755 * to CLIPDIST registers for clipping
6756 */
6757 if (clip_plane_enable & 0xf) {
6758 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6759 emit->clip_dist_out_index + i);
6760 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
6761
6762 /* MOV CLIPDIST, tmp_clip_dist */
6763 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6764 &tmp_clip_dist_src);
6765 }
6766 /* four clip planes per clip register */
6767 clip_plane_enable >>= 4;
6768 }
6769 /**
6770 * set the temporary clip dist register index back to the
6771 * temporary index for the next vertex
6772 */
6773 emit->clip_dist_tmp_index = clip_dist_tmp_index;
6774 }
6775
6776 /* Declare clip distance output registers for user-defined clip planes
6777 * or the TGSI_CLIPVERTEX output.
6778 */
6779 static void
emit_clip_distance_declarations(struct svga_shader_emitter_v10 * emit)6780 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
6781 {
6782 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6783 unsigned index = emit->num_outputs;
6784 unsigned plane_mask;
6785
6786 assert(emit->unit != PIPE_SHADER_FRAGMENT);
6787 assert(num_clip_planes <= 8);
6788
6789 if (emit->clip_mode != CLIP_LEGACY &&
6790 emit->clip_mode != CLIP_VERTEX) {
6791 return;
6792 }
6793
6794 if (num_clip_planes == 0)
6795 return;
6796
6797 /* Convert clip vertex to clip distances only in the last vertex stage */
6798 if (!emit->key.last_vertex_stage)
6799 return;
6800
6801 /* Declare one or two clip output registers. The number of components
6802 * in the mask reflects the number of clip planes. For example, if 5
6803 * clip planes are needed, we'll declare outputs similar to:
6804 * dcl_output_siv o2.xyzw, clip_distance
6805 * dcl_output_siv o3.x, clip_distance
6806 */
6807 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
6808
6809 plane_mask = (1 << num_clip_planes) - 1;
6810 if (plane_mask & 0xf) {
6811 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6812 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
6813 VGPU10_NAME_CLIP_DISTANCE, cmask, true,
6814 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6815 emit->num_outputs++;
6816 }
6817 if (plane_mask & 0xf0) {
6818 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6819 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
6820 VGPU10_NAME_CLIP_DISTANCE, cmask, true,
6821 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6822 emit->num_outputs++;
6823 }
6824 }
6825
6826
6827 /**
6828 * Emit the instructions for writing to the clip distance registers
6829 * to handle legacy/automatic clip planes.
6830 * For each clip plane, the distance is the dot product of the vertex
6831 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6832 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6833 * output registers already declared.
6834 */
6835 static void
emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 * emit,unsigned vpos_tmp_index)6836 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
6837 unsigned vpos_tmp_index)
6838 {
6839 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6840
6841 assert(emit->clip_mode == CLIP_LEGACY);
6842 assert(num_clip_planes <= 8);
6843
6844 assert(emit->unit == PIPE_SHADER_VERTEX ||
6845 emit->unit == PIPE_SHADER_GEOMETRY ||
6846 emit->unit == PIPE_SHADER_TESS_EVAL);
6847
6848 for (i = 0; i < num_clip_planes; i++) {
6849 struct tgsi_full_dst_register dst;
6850 struct tgsi_full_src_register plane_src, vpos_src;
6851 unsigned reg_index = emit->clip_dist_out_index + i / 4;
6852 unsigned comp = i % 4;
6853 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6854
6855 /* create dst, src regs */
6856 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6857 dst = writemask_dst(&dst, writemask);
6858
6859 plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6860 vpos_src = make_src_temp_reg(vpos_tmp_index);
6861
6862 /* DP4 clip_dist, plane, vpos */
6863 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6864 &plane_src, &vpos_src);
6865 }
6866 }
6867
6868
6869 /**
6870 * Emit the instructions for computing the clip distance results from
6871 * the clip vertex temporary.
6872 * For each clip plane, the distance is the dot product of the clip vertex
6873 * position (found in a temp reg) and the clip plane coefficients.
6874 */
6875 static void
emit_clip_vertex_instructions(struct svga_shader_emitter_v10 * emit)6876 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6877 {
6878 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
6879 unsigned i;
6880 struct tgsi_full_dst_register dst;
6881 struct tgsi_full_src_register clipvert_src;
6882 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
6883
6884 assert(emit->unit == PIPE_SHADER_VERTEX ||
6885 emit->unit == PIPE_SHADER_GEOMETRY ||
6886 emit->unit == PIPE_SHADER_TESS_EVAL);
6887
6888 assert(emit->clip_mode == CLIP_VERTEX);
6889
6890 clipvert_src = make_src_temp_reg(clip_vertex_tmp);
6891
6892 for (i = 0; i < num_clip; i++) {
6893 struct tgsi_full_src_register plane_src;
6894 unsigned reg_index = emit->clip_dist_out_index + i / 4;
6895 unsigned comp = i % 4;
6896 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6897
6898 /* create dst, src regs */
6899 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6900 dst = writemask_dst(&dst, writemask);
6901
6902 plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6903
6904 /* DP4 clip_dist, plane, vpos */
6905 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6906 &plane_src, &clipvert_src);
6907 }
6908
6909 /* copy temporary clip vertex register to the clip vertex register */
6910
6911 assert(emit->clip_vertex_out_index != INVALID_INDEX);
6912
6913 /**
6914 * temporary reset the temporary clip vertex register index so
6915 * that copy to the clip vertex register will not attempt
6916 * to copy to the temporary register again
6917 */
6918 emit->clip_vertex_tmp_index = INVALID_INDEX;
6919
6920 /* MOV clip_vertex, clip_vertex_tmp */
6921 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
6922 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6923 &dst, &clipvert_src);
6924
6925 /**
6926 * set the temporary clip vertex register index back to the
6927 * temporary index for the next vertex
6928 */
6929 emit->clip_vertex_tmp_index = clip_vertex_tmp;
6930 }
6931
6932 /**
6933 * Emit code to convert RGBA to BGRA
6934 */
6935 static void
emit_swap_r_b(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6936 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
6937 const struct tgsi_full_dst_register *dst,
6938 const struct tgsi_full_src_register *src)
6939 {
6940 struct tgsi_full_src_register bgra_src =
6941 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
6942
6943 begin_emit_instruction(emit);
6944 emit_opcode(emit, VGPU10_OPCODE_MOV, false);
6945 emit_dst_register(emit, dst);
6946 emit_src_register(emit, &bgra_src);
6947 end_emit_instruction(emit);
6948 }
6949
6950
6951 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6952 static void
emit_puint_to_snorm(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6953 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
6954 const struct tgsi_full_dst_register *dst,
6955 const struct tgsi_full_src_register *src)
6956 {
6957 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
6958 struct tgsi_full_src_register two =
6959 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
6960 struct tgsi_full_src_register neg_two =
6961 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6962
6963 unsigned val_tmp = get_temp_index(emit);
6964 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
6965 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
6966
6967 unsigned bias_tmp = get_temp_index(emit);
6968 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
6969 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
6970
6971 /* val = src * 2.0 */
6972 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
6973
6974 /* bias = src > 0.5 */
6975 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
6976
6977 /* bias = bias & -2.0 */
6978 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
6979 &bias_src, &neg_two);
6980
6981 /* dst = val + bias */
6982 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
6983 &val_src, &bias_src);
6984
6985 free_temp_indexes(emit);
6986 }
6987
6988
6989 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
6990 static void
emit_puint_to_uscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6991 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
6992 const struct tgsi_full_dst_register *dst,
6993 const struct tgsi_full_src_register *src)
6994 {
6995 struct tgsi_full_src_register scale =
6996 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
6997
6998 /* dst = src * scale */
6999 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
7000 }
7001
7002
7003 /** Convert from R32_UINT to 10_10_10_2_sscaled */
7004 static void
emit_puint_to_sscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)7005 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
7006 const struct tgsi_full_dst_register *dst,
7007 const struct tgsi_full_src_register *src)
7008 {
7009 struct tgsi_full_src_register lshift =
7010 make_immediate_reg_int4(emit, 22, 12, 2, 0);
7011 struct tgsi_full_src_register rshift =
7012 make_immediate_reg_int4(emit, 22, 22, 22, 30);
7013
7014 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
7015
7016 unsigned tmp = get_temp_index(emit);
7017 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7018 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7019
7020 /*
7021 * r = (pixel << 22) >> 22; # signed int in [511, -512]
7022 * g = (pixel << 12) >> 22; # signed int in [511, -512]
7023 * b = (pixel << 2) >> 22; # signed int in [511, -512]
7024 * a = (pixel << 0) >> 30; # signed int in [1, -2]
7025 * dst = i_to_f(r,g,b,a); # convert to float
7026 */
7027 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
7028 &src_xxxx, &lshift);
7029 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
7030 &tmp_src, &rshift);
7031 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
7032
7033 free_temp_indexes(emit);
7034 }
7035
7036
7037 /**
7038 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
7039 */
7040 static bool
emit_arl_uarl(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7041 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
7042 const struct tgsi_full_instruction *inst)
7043 {
7044 unsigned index = inst->Dst[0].Register.Index;
7045 struct tgsi_full_dst_register dst;
7046 VGPU10_OPCODE_TYPE opcode;
7047
7048 assert(index < MAX_VGPU10_ADDR_REGS);
7049 dst = make_dst_temp_reg(emit->address_reg_index[index]);
7050 dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
7051
7052 /* ARL dst, s0
7053 * Translates into:
7054 * FTOI address_tmp, s0
7055 *
7056 * UARL dst, s0
7057 * Translates into:
7058 * MOV address_tmp, s0
7059 */
7060 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
7061 opcode = VGPU10_OPCODE_FTOI;
7062 else
7063 opcode = VGPU10_OPCODE_MOV;
7064
7065 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
7066
7067 return true;
7068 }
7069
7070
7071 /**
7072 * Emit code for TGSI_OPCODE_CAL instruction.
7073 */
7074 static bool
emit_cal(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7075 emit_cal(struct svga_shader_emitter_v10 *emit,
7076 const struct tgsi_full_instruction *inst)
7077 {
7078 unsigned label = inst->Label.Label;
7079 VGPU10OperandToken0 operand;
7080 operand.value = 0;
7081 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
7082
7083 begin_emit_instruction(emit);
7084 emit_dword(emit, operand.value);
7085 emit_dword(emit, label);
7086 end_emit_instruction(emit);
7087
7088 return true;
7089 }
7090
7091
7092 /**
7093 * Emit code for TGSI_OPCODE_IABS instruction.
7094 */
7095 static bool
emit_iabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7096 emit_iabs(struct svga_shader_emitter_v10 *emit,
7097 const struct tgsi_full_instruction *inst)
7098 {
7099 /* dst.x = (src0.x < 0) ? -src0.x : src0.x
7100 * dst.y = (src0.y < 0) ? -src0.y : src0.y
7101 * dst.z = (src0.z < 0) ? -src0.z : src0.z
7102 * dst.w = (src0.w < 0) ? -src0.w : src0.w
7103 *
7104 * Translates into
7105 * IMAX dst, src, neg(src)
7106 */
7107 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
7108 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
7109 &inst->Src[0], &neg_src);
7110
7111 return true;
7112 }
7113
7114
7115 /**
7116 * Emit code for TGSI_OPCODE_CMP instruction.
7117 */
7118 static bool
emit_cmp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7119 emit_cmp(struct svga_shader_emitter_v10 *emit,
7120 const struct tgsi_full_instruction *inst)
7121 {
7122 /* dst.x = (src0.x < 0) ? src1.x : src2.x
7123 * dst.y = (src0.y < 0) ? src1.y : src2.y
7124 * dst.z = (src0.z < 0) ? src1.z : src2.z
7125 * dst.w = (src0.w < 0) ? src1.w : src2.w
7126 *
7127 * Translates into
7128 * LT tmp, src0, 0.0
7129 * MOVC dst, tmp, src1, src2
7130 */
7131 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7132 unsigned tmp = get_temp_index(emit);
7133 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7134 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7135
7136 emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
7137 &inst->Src[0], &zero, NULL, false,
7138 inst->Instruction.Precise);
7139 emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
7140 &tmp_src, &inst->Src[1], &inst->Src[2],
7141 inst->Instruction.Saturate, false);
7142
7143 free_temp_indexes(emit);
7144
7145 return true;
7146 }
7147
7148
7149 /**
7150 * Emit code for TGSI_OPCODE_DST instruction.
7151 */
7152 static bool
emit_dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7153 emit_dst(struct svga_shader_emitter_v10 *emit,
7154 const struct tgsi_full_instruction *inst)
7155 {
7156 /*
7157 * dst.x = 1
7158 * dst.y = src0.y * src1.y
7159 * dst.z = src0.z
7160 * dst.w = src1.w
7161 */
7162
7163 struct tgsi_full_src_register s0_yyyy =
7164 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
7165 struct tgsi_full_src_register s0_zzzz =
7166 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
7167 struct tgsi_full_src_register s1_yyyy =
7168 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
7169 struct tgsi_full_src_register s1_wwww =
7170 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
7171
7172 /*
7173 * If dst and either src0 and src1 are the same we need
7174 * to create a temporary for it and insert a extra move.
7175 */
7176 unsigned tmp_move = get_temp_index(emit);
7177 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7178 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7179
7180 /* MOV dst.x, 1.0 */
7181 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7182 struct tgsi_full_dst_register dst_x =
7183 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7184 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7185
7186 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
7187 }
7188
7189 /* MUL dst.y, s0.y, s1.y */
7190 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7191 struct tgsi_full_dst_register dst_y =
7192 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7193
7194 emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
7195 &s1_yyyy, NULL, inst->Instruction.Saturate,
7196 inst->Instruction.Precise);
7197 }
7198
7199 /* MOV dst.z, s0.z */
7200 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7201 struct tgsi_full_dst_register dst_z =
7202 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7203
7204 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7205 &dst_z, &s0_zzzz, NULL, NULL,
7206 inst->Instruction.Saturate,
7207 inst->Instruction.Precise);
7208 }
7209
7210 /* MOV dst.w, s1.w */
7211 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7212 struct tgsi_full_dst_register dst_w =
7213 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7214
7215 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7216 &dst_w, &s1_wwww, NULL, NULL,
7217 inst->Instruction.Saturate,
7218 inst->Instruction.Precise);
7219 }
7220
7221 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7222 free_temp_indexes(emit);
7223
7224 return true;
7225 }
7226
7227
7228 /**
7229 * A helper function to return the stream index as specified in
7230 * the immediate register
7231 */
7232 static inline unsigned
find_stream_index(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)7233 find_stream_index(struct svga_shader_emitter_v10 *emit,
7234 const struct tgsi_full_src_register *src)
7235 {
7236 return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
7237 }
7238
7239
7240 /**
7241 * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
7242 */
7243 static bool
emit_endprim(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7244 emit_endprim(struct svga_shader_emitter_v10 *emit,
7245 const struct tgsi_full_instruction *inst)
7246 {
7247 assert(emit->unit == PIPE_SHADER_GEOMETRY);
7248
7249 begin_emit_instruction(emit);
7250 if (emit->version >= 50) {
7251 unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
7252
7253 if (emit->info.num_stream_output_components[streamIndex] == 0) {
7254 /**
7255 * If there is no output for this stream, discard this instruction.
7256 */
7257 emit->discard_instruction = true;
7258 }
7259 else {
7260 emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, false);
7261 assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
7262 emit_stream_register(emit, streamIndex);
7263 }
7264 }
7265 else {
7266 emit_opcode(emit, VGPU10_OPCODE_CUT, false);
7267 }
7268 end_emit_instruction(emit);
7269 return true;
7270 }
7271
7272
7273 /**
7274 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
7275 */
7276 static bool
emit_ex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7277 emit_ex2(struct svga_shader_emitter_v10 *emit,
7278 const struct tgsi_full_instruction *inst)
7279 {
7280 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
7281 * while VGPU10 computes four values.
7282 *
7283 * dst = EX2(src):
7284 * dst.xyzw = 2.0 ^ src.x
7285 */
7286
7287 struct tgsi_full_src_register src_xxxx =
7288 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7289 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7290
7291 /* EXP tmp, s0.xxxx */
7292 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
7293 NULL, NULL,
7294 inst->Instruction.Saturate,
7295 inst->Instruction.Precise);
7296
7297 return true;
7298 }
7299
7300
7301 /**
7302 * Emit code for TGSI_OPCODE_EXP instruction.
7303 */
7304 static bool
emit_exp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7305 emit_exp(struct svga_shader_emitter_v10 *emit,
7306 const struct tgsi_full_instruction *inst)
7307 {
7308 /*
7309 * dst.x = 2 ^ floor(s0.x)
7310 * dst.y = s0.x - floor(s0.x)
7311 * dst.z = 2 ^ s0.x
7312 * dst.w = 1.0
7313 */
7314
7315 struct tgsi_full_src_register src_xxxx =
7316 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7317 unsigned tmp = get_temp_index(emit);
7318 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7319 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7320
7321 /*
7322 * If dst and src are the same we need to create
7323 * a temporary for it and insert a extra move.
7324 */
7325 unsigned tmp_move = get_temp_index(emit);
7326 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7327 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7328
7329 /* only use X component of temp reg */
7330 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7331 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7332
7333 /* ROUND_NI tmp.x, s0.x */
7334 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
7335 &src_xxxx); /* round to -infinity */
7336
7337 /* EXP dst.x, tmp.x */
7338 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7339 struct tgsi_full_dst_register dst_x =
7340 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7341
7342 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
7343 NULL, NULL,
7344 inst->Instruction.Saturate,
7345 inst->Instruction.Precise);
7346 }
7347
7348 /* ADD dst.y, s0.x, -tmp */
7349 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7350 struct tgsi_full_dst_register dst_y =
7351 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7352 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
7353
7354 emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
7355 &neg_tmp_src, NULL,
7356 inst->Instruction.Saturate,
7357 inst->Instruction.Precise);
7358 }
7359
7360 /* EXP dst.z, s0.x */
7361 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7362 struct tgsi_full_dst_register dst_z =
7363 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7364
7365 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
7366 NULL, NULL,
7367 inst->Instruction.Saturate,
7368 inst->Instruction.Precise);
7369 }
7370
7371 /* MOV dst.w, 1.0 */
7372 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7373 struct tgsi_full_dst_register dst_w =
7374 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7375 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7376
7377 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7378 }
7379
7380 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7381
7382 free_temp_indexes(emit);
7383
7384 return true;
7385 }
7386
7387
7388 /**
7389 * Emit code for TGSI_OPCODE_IF instruction.
7390 */
7391 static bool
emit_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)7392 emit_if(struct svga_shader_emitter_v10 *emit,
7393 const struct tgsi_full_src_register *src)
7394 {
7395 VGPU10OpcodeToken0 opcode0;
7396
7397 /* The src register should be a scalar */
7398 assert(src->Register.SwizzleX == src->Register.SwizzleY &&
7399 src->Register.SwizzleX == src->Register.SwizzleZ &&
7400 src->Register.SwizzleX == src->Register.SwizzleW);
7401
7402 /* The only special thing here is that we need to set the
7403 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
7404 * src.x is non-zero.
7405 */
7406 opcode0.value = 0;
7407 opcode0.opcodeType = VGPU10_OPCODE_IF;
7408 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
7409
7410 begin_emit_instruction(emit);
7411 emit_dword(emit, opcode0.value);
7412 emit_src_register(emit, src);
7413 end_emit_instruction(emit);
7414
7415 return true;
7416 }
7417
7418
7419 /**
7420 * Emit code for conditional discard instruction (discard fragment if any of
7421 * the register components are negative).
7422 */
7423 static bool
emit_cond_discard(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7424 emit_cond_discard(struct svga_shader_emitter_v10 *emit,
7425 const struct tgsi_full_instruction *inst)
7426 {
7427 unsigned tmp = get_temp_index(emit);
7428 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7429 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7430
7431 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7432
7433 struct tgsi_full_dst_register tmp_dst_x =
7434 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7435 struct tgsi_full_src_register tmp_src_xxxx =
7436 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7437
7438 /* tmp = src[0] < 0.0 */
7439 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
7440
7441 if (!same_swizzle_terms(&inst->Src[0])) {
7442 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
7443 * logically OR the swizzle terms. Most uses of this conditional
7444 * discard instruction only test one channel so it's good to
7445 * avoid these extra steps.
7446 */
7447 struct tgsi_full_src_register tmp_src_yyyy =
7448 scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
7449 struct tgsi_full_src_register tmp_src_zzzz =
7450 scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
7451 struct tgsi_full_src_register tmp_src_wwww =
7452 scalar_src(&tmp_src, TGSI_SWIZZLE_W);
7453
7454 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7455 &tmp_src_yyyy);
7456 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7457 &tmp_src_zzzz);
7458 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7459 &tmp_src_wwww);
7460 }
7461
7462 begin_emit_instruction(emit);
7463 emit_discard_opcode(emit, true); /* discard if src0.x is non-zero */
7464 emit_src_register(emit, &tmp_src_xxxx);
7465 end_emit_instruction(emit);
7466
7467 free_temp_indexes(emit);
7468
7469 return true;
7470 }
7471
7472
7473 /**
7474 * Emit code for the unconditional discard instruction.
7475 */
7476 static bool
emit_discard(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7477 emit_discard(struct svga_shader_emitter_v10 *emit,
7478 const struct tgsi_full_instruction *inst)
7479 {
7480 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7481
7482 /* DISCARD if 0.0 is zero */
7483 begin_emit_instruction(emit);
7484 emit_discard_opcode(emit, false);
7485 emit_src_register(emit, &zero);
7486 end_emit_instruction(emit);
7487
7488 return true;
7489 }
7490
7491
7492 /**
7493 * Emit code for TGSI_OPCODE_LG2 instruction.
7494 */
7495 static bool
emit_lg2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7496 emit_lg2(struct svga_shader_emitter_v10 *emit,
7497 const struct tgsi_full_instruction *inst)
7498 {
7499 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
7500 * while VGPU10 computes four values.
7501 *
7502 * dst = LG2(src):
7503 * dst.xyzw = log2(src.x)
7504 */
7505
7506 struct tgsi_full_src_register src_xxxx =
7507 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7508 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7509
7510 /* LOG tmp, s0.xxxx */
7511 emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7512 &inst->Dst[0], &src_xxxx, NULL, NULL,
7513 inst->Instruction.Saturate,
7514 inst->Instruction.Precise);
7515
7516 return true;
7517 }
7518
7519
7520 /**
7521 * Emit code for TGSI_OPCODE_LIT instruction.
7522 */
7523 static bool
emit_lit(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7524 emit_lit(struct svga_shader_emitter_v10 *emit,
7525 const struct tgsi_full_instruction *inst)
7526 {
7527 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7528
7529 /*
7530 * If dst and src are the same we need to create
7531 * a temporary for it and insert a extra move.
7532 */
7533 unsigned tmp_move = get_temp_index(emit);
7534 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7535 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7536
7537 /*
7538 * dst.x = 1
7539 * dst.y = max(src.x, 0)
7540 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
7541 * dst.w = 1
7542 */
7543
7544 /* MOV dst.x, 1.0 */
7545 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7546 struct tgsi_full_dst_register dst_x =
7547 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7548 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
7549 }
7550
7551 /* MOV dst.w, 1.0 */
7552 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7553 struct tgsi_full_dst_register dst_w =
7554 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7555 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7556 }
7557
7558 /* MAX dst.y, src.x, 0.0 */
7559 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7560 struct tgsi_full_dst_register dst_y =
7561 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7562 struct tgsi_full_src_register zero =
7563 make_immediate_reg_float(emit, 0.0f);
7564 struct tgsi_full_src_register src_xxxx =
7565 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7566 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7567
7568 emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
7569 &zero, NULL, inst->Instruction.Saturate, false);
7570 }
7571
7572 /*
7573 * tmp1 = clamp(src.w, -128, 128);
7574 * MAX tmp1, src.w, -128
7575 * MIN tmp1, tmp1, 128
7576 *
7577 * tmp2 = max(tmp2, 0);
7578 * MAX tmp2, src.y, 0
7579 *
7580 * tmp1 = pow(tmp2, tmp1);
7581 * LOG tmp2, tmp2
7582 * MUL tmp1, tmp2, tmp1
7583 * EXP tmp1, tmp1
7584 *
7585 * tmp1 = (src.w == 0) ? 1 : tmp1;
7586 * EQ tmp2, 0, src.w
7587 * MOVC tmp1, tmp2, 1.0, tmp1
7588 *
7589 * dst.z = (0 < src.x) ? tmp1 : 0;
7590 * LT tmp2, 0, src.x
7591 * MOVC dst.z, tmp2, tmp1, 0.0
7592 */
7593 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7594 struct tgsi_full_dst_register dst_z =
7595 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7596
7597 unsigned tmp1 = get_temp_index(emit);
7598 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7599 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7600 unsigned tmp2 = get_temp_index(emit);
7601 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7602 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7603
7604 struct tgsi_full_src_register src_xxxx =
7605 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7606 struct tgsi_full_src_register src_yyyy =
7607 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
7608 struct tgsi_full_src_register src_wwww =
7609 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
7610
7611 struct tgsi_full_src_register zero =
7612 make_immediate_reg_float(emit, 0.0f);
7613 struct tgsi_full_src_register lowerbound =
7614 make_immediate_reg_float(emit, -128.0f);
7615 struct tgsi_full_src_register upperbound =
7616 make_immediate_reg_float(emit, 128.0f);
7617
7618 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
7619 &lowerbound);
7620 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
7621 &upperbound);
7622 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
7623 &zero);
7624
7625 /* POW tmp1, tmp2, tmp1 */
7626 /* LOG tmp2, tmp2 */
7627 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
7628
7629 /* MUL tmp1, tmp2, tmp1 */
7630 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
7631 &tmp1_src);
7632
7633 /* EXP tmp1, tmp1 */
7634 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
7635
7636 /* EQ tmp2, 0, src.w */
7637 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
7638 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
7639 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
7640 &tmp2_src, &one, &tmp1_src);
7641
7642 /* LT tmp2, 0, src.x */
7643 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
7644 /* MOVC dst.z, tmp2, tmp1, 0.0 */
7645 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
7646 &tmp2_src, &tmp1_src, &zero);
7647 }
7648
7649 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7650 free_temp_indexes(emit);
7651
7652 return true;
7653 }
7654
7655
7656 /**
7657 * Emit Level Of Detail Query (LODQ) instruction.
7658 */
7659 static bool
emit_lodq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7660 emit_lodq(struct svga_shader_emitter_v10 *emit,
7661 const struct tgsi_full_instruction *inst)
7662 {
7663 const uint unit = inst->Src[1].Register.Index;
7664
7665 assert(emit->version >= 41);
7666
7667 /* LOD dst, coord, resource, sampler */
7668 begin_emit_instruction(emit);
7669 emit_opcode(emit, VGPU10_OPCODE_LOD, false);
7670 emit_dst_register(emit, &inst->Dst[0]);
7671 emit_src_register(emit, &inst->Src[0]); /* coord */
7672 emit_resource_register(emit, unit);
7673 emit_sampler_register(emit, unit);
7674 end_emit_instruction(emit);
7675
7676 return true;
7677 }
7678
7679
7680 /**
7681 * Emit code for TGSI_OPCODE_LOG instruction.
7682 */
7683 static bool
emit_log(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7684 emit_log(struct svga_shader_emitter_v10 *emit,
7685 const struct tgsi_full_instruction *inst)
7686 {
7687 /*
7688 * dst.x = floor(lg2(abs(s0.x)))
7689 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
7690 * dst.z = lg2(abs(s0.x))
7691 * dst.w = 1.0
7692 */
7693
7694 struct tgsi_full_src_register src_xxxx =
7695 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7696 unsigned tmp = get_temp_index(emit);
7697 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7698 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7699 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
7700
7701 /* only use X component of temp reg */
7702 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7703 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7704
7705 /* LOG tmp.x, abs(s0.x) */
7706 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
7707 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
7708 }
7709
7710 /* MOV dst.z, tmp.x */
7711 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7712 struct tgsi_full_dst_register dst_z =
7713 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
7714
7715 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7716 &dst_z, &tmp_src, NULL, NULL,
7717 inst->Instruction.Saturate, false);
7718 }
7719
7720 /* FLR tmp.x, tmp.x */
7721 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
7722 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
7723 }
7724
7725 /* MOV dst.x, tmp.x */
7726 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7727 struct tgsi_full_dst_register dst_x =
7728 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
7729
7730 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7731 &dst_x, &tmp_src, NULL, NULL,
7732 inst->Instruction.Saturate, false);
7733 }
7734
7735 /* EXP tmp.x, tmp.x */
7736 /* DIV dst.y, abs(s0.x), tmp.x */
7737 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7738 struct tgsi_full_dst_register dst_y =
7739 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
7740
7741 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
7742 emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
7743 &tmp_src, NULL, inst->Instruction.Saturate, false);
7744 }
7745
7746 /* MOV dst.w, 1.0 */
7747 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7748 struct tgsi_full_dst_register dst_w =
7749 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
7750 struct tgsi_full_src_register one =
7751 make_immediate_reg_float(emit, 1.0f);
7752
7753 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7754 }
7755
7756 free_temp_indexes(emit);
7757
7758 return true;
7759 }
7760
7761
7762 /**
7763 * Emit code for TGSI_OPCODE_LRP instruction.
7764 */
7765 static bool
emit_lrp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7766 emit_lrp(struct svga_shader_emitter_v10 *emit,
7767 const struct tgsi_full_instruction *inst)
7768 {
7769 /* dst = LRP(s0, s1, s2):
7770 * dst = s0 * (s1 - s2) + s2
7771 * Translates into:
7772 * SUB tmp, s1, s2; tmp = s1 - s2
7773 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2
7774 */
7775 unsigned tmp = get_temp_index(emit);
7776 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
7777 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
7778 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
7779
7780 /* ADD tmp, s1, -s2 */
7781 emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
7782 &inst->Src[1], &neg_src2, NULL, false,
7783 inst->Instruction.Precise);
7784
7785 /* MAD dst, s1, tmp, s3 */
7786 emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
7787 &inst->Src[0], &src_tmp, &inst->Src[2],
7788 inst->Instruction.Saturate,
7789 inst->Instruction.Precise);
7790
7791 free_temp_indexes(emit);
7792
7793 return true;
7794 }
7795
7796
7797 /**
7798 * Emit code for TGSI_OPCODE_POW instruction.
7799 */
7800 static bool
emit_pow(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7801 emit_pow(struct svga_shader_emitter_v10 *emit,
7802 const struct tgsi_full_instruction *inst)
7803 {
7804 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7805 * src1.x while VGPU10 computes four values.
7806 *
7807 * dst = POW(src0, src1):
7808 * dst.xyzw = src0.x ^ src1.x
7809 */
7810 unsigned tmp = get_temp_index(emit);
7811 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7812 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7813 struct tgsi_full_src_register src0_xxxx =
7814 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7815 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7816 struct tgsi_full_src_register src1_xxxx =
7817 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7818 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7819
7820 /* LOG tmp, s0.xxxx */
7821 emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7822 &tmp_dst, &src0_xxxx, NULL, NULL,
7823 false, inst->Instruction.Precise);
7824
7825 /* MUL tmp, tmp, s1.xxxx */
7826 emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
7827 &tmp_dst, &tmp_src, &src1_xxxx, NULL,
7828 false, inst->Instruction.Precise);
7829
7830 /* EXP tmp, s0.xxxx */
7831 emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
7832 &inst->Dst[0], &tmp_src, NULL, NULL,
7833 inst->Instruction.Saturate,
7834 inst->Instruction.Precise);
7835
7836 /* free tmp */
7837 free_temp_indexes(emit);
7838
7839 return true;
7840 }
7841
7842
7843 /**
7844 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7845 */
7846 static bool
emit_rcp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7847 emit_rcp(struct svga_shader_emitter_v10 *emit,
7848 const struct tgsi_full_instruction *inst)
7849 {
7850 if (emit->version >= 50) {
7851 /* use new RCP instruction. But VGPU10_OPCODE_RCP is component-wise
7852 * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7853 * to manipulate the src register's swizzle.
7854 */
7855 struct tgsi_full_src_register src = inst->Src[0];
7856 src.Register.SwizzleY =
7857 src.Register.SwizzleZ =
7858 src.Register.SwizzleW = src.Register.SwizzleX;
7859
7860 begin_emit_instruction(emit);
7861 emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
7862 inst->Instruction.Saturate,
7863 inst->Instruction.Precise);
7864 emit_dst_register(emit, &inst->Dst[0]);
7865 emit_src_register(emit, &src);
7866 end_emit_instruction(emit);
7867 }
7868 else {
7869 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7870
7871 unsigned tmp = get_temp_index(emit);
7872 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7873 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7874
7875 struct tgsi_full_dst_register tmp_dst_x =
7876 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7877 struct tgsi_full_src_register tmp_src_xxxx =
7878 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7879
7880 /* DIV tmp.x, 1.0, s0 */
7881 emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
7882 &tmp_dst_x, &one, &inst->Src[0], NULL,
7883 false, inst->Instruction.Precise);
7884
7885 /* MOV dst, tmp.xxxx */
7886 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7887 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7888 inst->Instruction.Saturate,
7889 inst->Instruction.Precise);
7890
7891 free_temp_indexes(emit);
7892 }
7893
7894 return true;
7895 }
7896
7897
7898 /**
7899 * Emit code for TGSI_OPCODE_RSQ instruction.
7900 */
7901 static bool
emit_rsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7902 emit_rsq(struct svga_shader_emitter_v10 *emit,
7903 const struct tgsi_full_instruction *inst)
7904 {
7905 /* dst = RSQ(src):
7906 * dst.xyzw = 1 / sqrt(src.x)
7907 * Translates into:
7908 * RSQ tmp, src.x
7909 * MOV dst, tmp.xxxx
7910 */
7911
7912 unsigned tmp = get_temp_index(emit);
7913 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7914 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7915
7916 struct tgsi_full_dst_register tmp_dst_x =
7917 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7918 struct tgsi_full_src_register tmp_src_xxxx =
7919 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7920
7921 /* RSQ tmp, src.x */
7922 emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
7923 &tmp_dst_x, &inst->Src[0], NULL, NULL,
7924 false, inst->Instruction.Precise);
7925
7926 /* MOV dst, tmp.xxxx */
7927 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7928 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7929 inst->Instruction.Saturate,
7930 inst->Instruction.Precise);
7931
7932 /* free tmp */
7933 free_temp_indexes(emit);
7934
7935 return true;
7936 }
7937
7938
7939 /**
7940 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7941 */
7942 static bool
emit_seq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7943 emit_seq(struct svga_shader_emitter_v10 *emit,
7944 const struct tgsi_full_instruction *inst)
7945 {
7946 /* dst = SEQ(s0, s1):
7947 * dst = s0 == s1 ? 1.0 : 0.0 (per component)
7948 * Translates into:
7949 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7950 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7951 */
7952 unsigned tmp = get_temp_index(emit);
7953 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7954 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7955 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7956 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7957
7958 /* EQ tmp, s0, s1 */
7959 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
7960 &inst->Src[1]);
7961
7962 /* MOVC dst, tmp, one, zero */
7963 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7964 &one, &zero);
7965
7966 free_temp_indexes(emit);
7967
7968 return true;
7969 }
7970
7971
7972 /**
7973 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7974 */
7975 static bool
emit_sge(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7976 emit_sge(struct svga_shader_emitter_v10 *emit,
7977 const struct tgsi_full_instruction *inst)
7978 {
7979 /* dst = SGE(s0, s1):
7980 * dst = s0 >= s1 ? 1.0 : 0.0 (per component)
7981 * Translates into:
7982 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
7983 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7984 */
7985 unsigned tmp = get_temp_index(emit);
7986 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7987 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7988 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7989 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7990
7991 /* GE tmp, s0, s1 */
7992 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
7993 &inst->Src[1]);
7994
7995 /* MOVC dst, tmp, one, zero */
7996 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7997 &one, &zero);
7998
7999 free_temp_indexes(emit);
8000
8001 return true;
8002 }
8003
8004
8005 /**
8006 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
8007 */
8008 static bool
emit_sgt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8009 emit_sgt(struct svga_shader_emitter_v10 *emit,
8010 const struct tgsi_full_instruction *inst)
8011 {
8012 /* dst = SGT(s0, s1):
8013 * dst = s0 > s1 ? 1.0 : 0.0 (per component)
8014 * Translates into:
8015 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
8016 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
8017 */
8018 unsigned tmp = get_temp_index(emit);
8019 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8020 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8021 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8022 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8023
8024 /* LT tmp, s1, s0 */
8025 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
8026 &inst->Src[0]);
8027
8028 /* MOVC dst, tmp, one, zero */
8029 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8030 &one, &zero);
8031
8032 free_temp_indexes(emit);
8033
8034 return true;
8035 }
8036
8037
8038 /**
8039 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
8040 */
8041 static bool
emit_sincos(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8042 emit_sincos(struct svga_shader_emitter_v10 *emit,
8043 const struct tgsi_full_instruction *inst)
8044 {
8045 unsigned tmp = get_temp_index(emit);
8046 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8047 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8048
8049 struct tgsi_full_src_register tmp_src_xxxx =
8050 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
8051 struct tgsi_full_dst_register tmp_dst_x =
8052 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
8053
8054 begin_emit_instruction(emit);
8055 emit_opcode(emit, VGPU10_OPCODE_SINCOS, false);
8056
8057 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
8058 {
8059 emit_dst_register(emit, &tmp_dst_x); /* first destination register */
8060 emit_null_dst_register(emit); /* second destination register */
8061 }
8062 else {
8063 emit_null_dst_register(emit);
8064 emit_dst_register(emit, &tmp_dst_x);
8065 }
8066
8067 emit_src_register(emit, &inst->Src[0]);
8068 end_emit_instruction(emit);
8069
8070 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
8071 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
8072 inst->Instruction.Saturate,
8073 inst->Instruction.Precise);
8074
8075 free_temp_indexes(emit);
8076
8077 return true;
8078 }
8079
8080
8081 /**
8082 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
8083 */
8084 static bool
emit_sle(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8085 emit_sle(struct svga_shader_emitter_v10 *emit,
8086 const struct tgsi_full_instruction *inst)
8087 {
8088 /* dst = SLE(s0, s1):
8089 * dst = s0 <= s1 ? 1.0 : 0.0 (per component)
8090 * Translates into:
8091 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
8092 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
8093 */
8094 unsigned tmp = get_temp_index(emit);
8095 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8096 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8097 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8098 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8099
8100 /* GE tmp, s1, s0 */
8101 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
8102 &inst->Src[0]);
8103
8104 /* MOVC dst, tmp, one, zero */
8105 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8106 &one, &zero);
8107
8108 free_temp_indexes(emit);
8109
8110 return true;
8111 }
8112
8113
8114 /**
8115 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
8116 */
8117 static bool
emit_slt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8118 emit_slt(struct svga_shader_emitter_v10 *emit,
8119 const struct tgsi_full_instruction *inst)
8120 {
8121 /* dst = SLT(s0, s1):
8122 * dst = s0 < s1 ? 1.0 : 0.0 (per component)
8123 * Translates into:
8124 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
8125 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
8126 */
8127 unsigned tmp = get_temp_index(emit);
8128 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8129 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8130 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8131 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8132
8133 /* LT tmp, s0, s1 */
8134 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
8135 &inst->Src[1]);
8136
8137 /* MOVC dst, tmp, one, zero */
8138 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8139 &one, &zero);
8140
8141 free_temp_indexes(emit);
8142
8143 return true;
8144 }
8145
8146
8147 /**
8148 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
8149 */
8150 static bool
emit_sne(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8151 emit_sne(struct svga_shader_emitter_v10 *emit,
8152 const struct tgsi_full_instruction *inst)
8153 {
8154 /* dst = SNE(s0, s1):
8155 * dst = s0 != s1 ? 1.0 : 0.0 (per component)
8156 * Translates into:
8157 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
8158 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
8159 */
8160 unsigned tmp = get_temp_index(emit);
8161 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8162 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8163 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8164 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8165
8166 /* NE tmp, s0, s1 */
8167 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
8168 &inst->Src[1]);
8169
8170 /* MOVC dst, tmp, one, zero */
8171 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8172 &one, &zero);
8173
8174 free_temp_indexes(emit);
8175
8176 return true;
8177 }
8178
8179
8180 /**
8181 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
8182 */
8183 static bool
emit_ssg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8184 emit_ssg(struct svga_shader_emitter_v10 *emit,
8185 const struct tgsi_full_instruction *inst)
8186 {
8187 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
8188 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
8189 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
8190 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
8191 * Translates into:
8192 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp)
8193 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component)
8194 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp)
8195 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component)
8196 */
8197 struct tgsi_full_src_register zero =
8198 make_immediate_reg_float(emit, 0.0f);
8199 struct tgsi_full_src_register one =
8200 make_immediate_reg_float(emit, 1.0f);
8201 struct tgsi_full_src_register neg_one =
8202 make_immediate_reg_float(emit, -1.0f);
8203
8204 unsigned tmp1 = get_temp_index(emit);
8205 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
8206 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
8207
8208 unsigned tmp2 = get_temp_index(emit);
8209 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
8210 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
8211
8212 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
8213 &zero);
8214 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
8215 &neg_one, &zero);
8216 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
8217 &inst->Src[0]);
8218 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
8219 &one, &tmp2_src);
8220
8221 free_temp_indexes(emit);
8222
8223 return true;
8224 }
8225
8226
8227 /**
8228 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
8229 */
8230 static bool
emit_issg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8231 emit_issg(struct svga_shader_emitter_v10 *emit,
8232 const struct tgsi_full_instruction *inst)
8233 {
8234 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
8235 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
8236 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
8237 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
8238 * Translates into:
8239 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component)
8240 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component)
8241 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component)
8242 */
8243 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8244
8245 unsigned tmp1 = get_temp_index(emit);
8246 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
8247 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
8248
8249 unsigned tmp2 = get_temp_index(emit);
8250 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
8251 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
8252
8253 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
8254
8255 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
8256 &inst->Src[0], &zero);
8257 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
8258 &zero, &inst->Src[0]);
8259 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
8260 &tmp1_src, &neg_tmp2);
8261
8262 free_temp_indexes(emit);
8263
8264 return true;
8265 }
8266
8267
8268 /**
8269 * Emit a comparison instruction. The dest register will get
8270 * 0 or ~0 values depending on the outcome of comparing src0 to src1.
8271 */
8272 static void
emit_comparison(struct svga_shader_emitter_v10 * emit,SVGA3dCmpFunc func,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src0,const struct tgsi_full_src_register * src1)8273 emit_comparison(struct svga_shader_emitter_v10 *emit,
8274 SVGA3dCmpFunc func,
8275 const struct tgsi_full_dst_register *dst,
8276 const struct tgsi_full_src_register *src0,
8277 const struct tgsi_full_src_register *src1)
8278 {
8279 struct tgsi_full_src_register immediate;
8280 VGPU10OpcodeToken0 opcode0;
8281 bool swapSrc = false;
8282
8283 /* Sanity checks for svga vs. gallium enums */
8284 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
8285 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
8286
8287 opcode0.value = 0;
8288
8289 switch (func) {
8290 case SVGA3D_CMP_NEVER:
8291 immediate = make_immediate_reg_int(emit, 0);
8292 /* MOV dst, {0} */
8293 begin_emit_instruction(emit);
8294 emit_dword(emit, VGPU10_OPCODE_MOV);
8295 emit_dst_register(emit, dst);
8296 emit_src_register(emit, &immediate);
8297 end_emit_instruction(emit);
8298 return;
8299 case SVGA3D_CMP_ALWAYS:
8300 immediate = make_immediate_reg_int(emit, -1);
8301 /* MOV dst, {-1} */
8302 begin_emit_instruction(emit);
8303 emit_dword(emit, VGPU10_OPCODE_MOV);
8304 emit_dst_register(emit, dst);
8305 emit_src_register(emit, &immediate);
8306 end_emit_instruction(emit);
8307 return;
8308 case SVGA3D_CMP_LESS:
8309 opcode0.opcodeType = VGPU10_OPCODE_LT;
8310 break;
8311 case SVGA3D_CMP_EQUAL:
8312 opcode0.opcodeType = VGPU10_OPCODE_EQ;
8313 break;
8314 case SVGA3D_CMP_LESSEQUAL:
8315 opcode0.opcodeType = VGPU10_OPCODE_GE;
8316 swapSrc = true;
8317 break;
8318 case SVGA3D_CMP_GREATER:
8319 opcode0.opcodeType = VGPU10_OPCODE_LT;
8320 swapSrc = true;
8321 break;
8322 case SVGA3D_CMP_NOTEQUAL:
8323 opcode0.opcodeType = VGPU10_OPCODE_NE;
8324 break;
8325 case SVGA3D_CMP_GREATEREQUAL:
8326 opcode0.opcodeType = VGPU10_OPCODE_GE;
8327 break;
8328 default:
8329 assert(!"Unexpected comparison mode");
8330 opcode0.opcodeType = VGPU10_OPCODE_EQ;
8331 }
8332
8333 begin_emit_instruction(emit);
8334 emit_dword(emit, opcode0.value);
8335 emit_dst_register(emit, dst);
8336 if (swapSrc) {
8337 emit_src_register(emit, src1);
8338 emit_src_register(emit, src0);
8339 }
8340 else {
8341 emit_src_register(emit, src0);
8342 emit_src_register(emit, src1);
8343 }
8344 end_emit_instruction(emit);
8345 }
8346
8347
8348 /**
8349 * Get texel/address offsets for a texture instruction.
8350 */
8351 static void
get_texel_offsets(const struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,int offsets[3])8352 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
8353 const struct tgsi_full_instruction *inst, int offsets[3])
8354 {
8355 if (inst->Texture.NumOffsets == 1) {
8356 /* According to OpenGL Shader Language spec the offsets are only
8357 * fetched from a previously-declared immediate/literal.
8358 */
8359 const struct tgsi_texture_offset *off = inst->TexOffsets;
8360 const unsigned index = off[0].Index;
8361 const unsigned swizzleX = off[0].SwizzleX;
8362 const unsigned swizzleY = off[0].SwizzleY;
8363 const unsigned swizzleZ = off[0].SwizzleZ;
8364 const union tgsi_immediate_data *imm = emit->immediates[index];
8365
8366 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
8367
8368 offsets[0] = imm[swizzleX].Int;
8369 offsets[1] = imm[swizzleY].Int;
8370 offsets[2] = imm[swizzleZ].Int;
8371 }
8372 else {
8373 offsets[0] = offsets[1] = offsets[2] = 0;
8374 }
8375 }
8376
8377
8378 /**
8379 * Set up the coordinate register for texture sampling.
8380 * When we're sampling from a RECT texture we have to scale the
8381 * unnormalized coordinate to a normalized coordinate.
8382 * We do that by multiplying the coordinate by an "extra" constant.
8383 * An alternative would be to use the RESINFO instruction to query the
8384 * texture's size.
8385 */
8386 static struct tgsi_full_src_register
setup_texcoord(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_src_register * coord)8387 setup_texcoord(struct svga_shader_emitter_v10 *emit,
8388 unsigned unit,
8389 const struct tgsi_full_src_register *coord)
8390 {
8391 if (emit->key.tex[unit].sampler_view && emit->key.tex[unit].unnormalized) {
8392 unsigned scale_index = emit->texcoord_scale_index[unit];
8393 unsigned tmp = get_temp_index(emit);
8394 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8395 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8396 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
8397
8398 if (emit->key.tex[unit].texel_bias) {
8399 /* to fix texture coordinate rounding issue, 0.0001 offset is
8400 * been added. This fixes piglit test fbo-blit-scaled-linear. */
8401 struct tgsi_full_src_register offset =
8402 make_immediate_reg_float(emit, 0.0001f);
8403
8404 /* ADD tmp, coord, offset */
8405 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
8406 coord, &offset);
8407 /* MUL tmp, tmp, scale */
8408 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
8409 &tmp_src, &scale_src);
8410 }
8411 else {
8412 /* MUL tmp, coord, const[] */
8413 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
8414 coord, &scale_src);
8415 }
8416 return tmp_src;
8417 }
8418 else {
8419 /* use texcoord as-is */
8420 return *coord;
8421 }
8422 }
8423
8424
8425 /**
8426 * For SAMPLE_C instructions, emit the extra src register which indicates
8427 * the reference/comparision value.
8428 */
8429 static void
emit_tex_compare_refcoord(struct svga_shader_emitter_v10 * emit,enum tgsi_texture_type target,const struct tgsi_full_src_register * coord)8430 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
8431 enum tgsi_texture_type target,
8432 const struct tgsi_full_src_register *coord)
8433 {
8434 struct tgsi_full_src_register coord_src_ref;
8435 int component;
8436
8437 assert(tgsi_is_shadow_target(target));
8438
8439 component = tgsi_util_get_shadow_ref_src_index(target) % 4;
8440 assert(component >= 0);
8441
8442 coord_src_ref = scalar_src(coord, component);
8443
8444 emit_src_register(emit, &coord_src_ref);
8445 }
8446
8447
8448 /**
8449 * Info for implementing texture swizzles.
8450 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
8451 * functions use this to encapsulate the extra steps needed to perform
8452 * a texture swizzle, or shadow/depth comparisons.
8453 * The shadow/depth comparison is only done here if for the cases where
8454 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
8455 */
8456 struct tex_swizzle_info
8457 {
8458 bool swizzled;
8459 bool shadow_compare;
8460 unsigned unit;
8461 enum tgsi_texture_type texture_target; /**< TGSI_TEXTURE_x */
8462 struct tgsi_full_src_register tmp_src;
8463 struct tgsi_full_dst_register tmp_dst;
8464 const struct tgsi_full_dst_register *inst_dst;
8465 const struct tgsi_full_src_register *coord_src;
8466 };
8467
8468
8469 /**
8470 * Do setup for handling texture swizzles or shadow compares.
8471 * \param unit the texture unit
8472 * \param inst the TGSI texture instruction
8473 * \param shadow_compare do shadow/depth comparison?
8474 * \param swz returns the swizzle info
8475 */
8476 static void
begin_tex_swizzle(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_instruction * inst,bool shadow_compare,struct tex_swizzle_info * swz)8477 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
8478 unsigned unit,
8479 const struct tgsi_full_instruction *inst,
8480 bool shadow_compare,
8481 struct tex_swizzle_info *swz)
8482 {
8483 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
8484 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
8485 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
8486 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
8487
8488 swz->shadow_compare = shadow_compare;
8489 swz->texture_target = inst->Texture.Texture;
8490
8491 if (swz->swizzled || shadow_compare) {
8492 /* Allocate temp register for the result of the SAMPLE instruction
8493 * and the source of the MOV/compare/swizzle instructions.
8494 */
8495 unsigned tmp = get_temp_index(emit);
8496 swz->tmp_src = make_src_temp_reg(tmp);
8497 swz->tmp_dst = make_dst_temp_reg(tmp);
8498
8499 swz->unit = unit;
8500 }
8501 swz->inst_dst = &inst->Dst[0];
8502 swz->coord_src = &inst->Src[0];
8503
8504 emit->shadow_compare_units |= shadow_compare << unit;
8505 }
8506
8507
8508 /**
8509 * Returns the register to put the SAMPLE instruction results into.
8510 * This will either be the original instruction dst reg (if no swizzle
8511 * and no shadow comparison) or a temporary reg if there is a swizzle.
8512 */
8513 static const struct tgsi_full_dst_register *
get_tex_swizzle_dst(const struct tex_swizzle_info * swz)8514 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
8515 {
8516 return (swz->swizzled || swz->shadow_compare)
8517 ? &swz->tmp_dst : swz->inst_dst;
8518 }
8519
8520
8521 /**
8522 * This emits the MOV instruction that actually implements a texture swizzle
8523 * and/or shadow comparison.
8524 */
8525 static void
end_tex_swizzle(struct svga_shader_emitter_v10 * emit,const struct tex_swizzle_info * swz)8526 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
8527 const struct tex_swizzle_info *swz)
8528 {
8529 if (swz->shadow_compare) {
8530 /* Emit extra instructions to compare the fetched texel value against
8531 * a texture coordinate component. The result of the comparison
8532 * is 0.0 or 1.0.
8533 */
8534 struct tgsi_full_src_register coord_src;
8535 struct tgsi_full_src_register texel_src =
8536 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
8537 struct tgsi_full_src_register one =
8538 make_immediate_reg_float(emit, 1.0f);
8539 /* convert gallium comparison func to SVGA comparison func */
8540 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
8541
8542 int component =
8543 tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
8544 assert(component >= 0);
8545 coord_src = scalar_src(swz->coord_src, component);
8546
8547 /* COMPARE tmp, coord, texel */
8548 emit_comparison(emit, compare_func,
8549 &swz->tmp_dst, &coord_src, &texel_src);
8550
8551 /* AND dest, tmp, {1.0} */
8552 begin_emit_instruction(emit);
8553 emit_opcode(emit, VGPU10_OPCODE_AND, false);
8554 if (swz->swizzled) {
8555 emit_dst_register(emit, &swz->tmp_dst);
8556 }
8557 else {
8558 emit_dst_register(emit, swz->inst_dst);
8559 }
8560 emit_src_register(emit, &swz->tmp_src);
8561 emit_src_register(emit, &one);
8562 end_emit_instruction(emit);
8563 }
8564
8565 if (swz->swizzled) {
8566 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
8567 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
8568 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
8569 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
8570 unsigned writemask_0 = 0, writemask_1 = 0;
8571 bool int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
8572
8573 /* Swizzle w/out zero/one terms */
8574 struct tgsi_full_src_register src_swizzled =
8575 swizzle_src(&swz->tmp_src,
8576 swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
8577 swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
8578 swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
8579 swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
8580
8581 /* MOV dst, color(tmp).<swizzle> */
8582 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
8583 swz->inst_dst, &src_swizzled);
8584
8585 /* handle swizzle zero terms */
8586 writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
8587 ((swz_g == PIPE_SWIZZLE_0) << 1) |
8588 ((swz_b == PIPE_SWIZZLE_0) << 2) |
8589 ((swz_a == PIPE_SWIZZLE_0) << 3));
8590 writemask_0 &= swz->inst_dst->Register.WriteMask;
8591
8592 if (writemask_0) {
8593 struct tgsi_full_src_register zero = int_tex ?
8594 make_immediate_reg_int(emit, 0) :
8595 make_immediate_reg_float(emit, 0.0f);
8596 struct tgsi_full_dst_register dst =
8597 writemask_dst(swz->inst_dst, writemask_0);
8598
8599 /* MOV dst.writemask_0, {0,0,0,0} */
8600 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
8601 }
8602
8603 /* handle swizzle one terms */
8604 writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
8605 ((swz_g == PIPE_SWIZZLE_1) << 1) |
8606 ((swz_b == PIPE_SWIZZLE_1) << 2) |
8607 ((swz_a == PIPE_SWIZZLE_1) << 3));
8608 writemask_1 &= swz->inst_dst->Register.WriteMask;
8609
8610 if (writemask_1) {
8611 struct tgsi_full_src_register one = int_tex ?
8612 make_immediate_reg_int(emit, 1) :
8613 make_immediate_reg_float(emit, 1.0f);
8614 struct tgsi_full_dst_register dst =
8615 writemask_dst(swz->inst_dst, writemask_1);
8616
8617 /* MOV dst.writemask_1, {1,1,1,1} */
8618 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
8619 }
8620 }
8621 }
8622
8623
8624 /**
8625 * Emit code for TGSI_OPCODE_SAMPLE instruction.
8626 */
8627 static bool
emit_sample(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8628 emit_sample(struct svga_shader_emitter_v10 *emit,
8629 const struct tgsi_full_instruction *inst)
8630 {
8631 const unsigned resource_unit = inst->Src[1].Register.Index;
8632 const unsigned sampler_unit = inst->Src[2].Register.Index;
8633 struct tgsi_full_src_register coord;
8634 int offsets[3];
8635 struct tex_swizzle_info swz_info;
8636
8637 begin_tex_swizzle(emit, sampler_unit, inst, false, &swz_info);
8638
8639 get_texel_offsets(emit, inst, offsets);
8640
8641 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
8642
8643 /* SAMPLE dst, coord(s0), resource, sampler */
8644 begin_emit_instruction(emit);
8645
8646 /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
8647 * with LOD=0. But our virtual GPU accepts this as-is.
8648 */
8649 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
8650 inst->Instruction.Saturate, offsets);
8651 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8652 emit_src_register(emit, &coord);
8653 emit_resource_register(emit, resource_unit);
8654 emit_sampler_register(emit, sampler_unit);
8655 end_emit_instruction(emit);
8656
8657 end_tex_swizzle(emit, &swz_info);
8658
8659 free_temp_indexes(emit);
8660
8661 return true;
8662 }
8663
8664
8665 /**
8666 * Check if a texture instruction is valid.
8667 * An example of an invalid texture instruction is doing shadow comparison
8668 * with an integer-valued texture.
8669 * If we detect an invalid texture instruction, we replace it with:
8670 * MOV dst, {1,1,1,1};
8671 * \return TRUE if valid, FALSE if invalid.
8672 */
8673 static bool
is_valid_tex_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8674 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
8675 const struct tgsi_full_instruction *inst)
8676 {
8677 const unsigned unit = inst->Src[1].Register.Index;
8678 const enum tgsi_texture_type target = inst->Texture.Texture;
8679 bool valid = true;
8680
8681 if (tgsi_is_shadow_target(target) &&
8682 is_integer_type(emit->sampler_return_type[unit])) {
8683 debug_printf("Invalid SAMPLE_C with an integer texture!\n");
8684 valid = false;
8685 }
8686 /* XXX might check for other conditions in the future here */
8687
8688 if (!valid) {
8689 /* emit a MOV dst, {1,1,1,1} instruction. */
8690 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8691 begin_emit_instruction(emit);
8692 emit_opcode(emit, VGPU10_OPCODE_MOV, false);
8693 emit_dst_register(emit, &inst->Dst[0]);
8694 emit_src_register(emit, &one);
8695 end_emit_instruction(emit);
8696 }
8697
8698 return valid;
8699 }
8700
8701
8702 /**
8703 * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
8704 */
8705 static bool
emit_tex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8706 emit_tex(struct svga_shader_emitter_v10 *emit,
8707 const struct tgsi_full_instruction *inst)
8708 {
8709 const uint unit = inst->Src[1].Register.Index;
8710 const enum tgsi_texture_type target = inst->Texture.Texture;
8711 VGPU10_OPCODE_TYPE opcode;
8712 struct tgsi_full_src_register coord;
8713 int offsets[3];
8714 struct tex_swizzle_info swz_info;
8715 bool compare_in_shader;
8716
8717 /* check that the sampler returns a float */
8718 if (!is_valid_tex_instruction(emit, inst))
8719 return true;
8720
8721 compare_in_shader = tgsi_is_shadow_target(target) &&
8722 emit->key.tex[unit].compare_in_shader;
8723
8724 begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8725
8726 get_texel_offsets(emit, inst, offsets);
8727
8728 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8729
8730 /* SAMPLE dst, coord(s0), resource, sampler */
8731 begin_emit_instruction(emit);
8732
8733 if (tgsi_is_shadow_target(target) && !compare_in_shader)
8734 opcode = VGPU10_OPCODE_SAMPLE_C;
8735 else
8736 opcode = VGPU10_OPCODE_SAMPLE;
8737
8738 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8739 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8740 emit_src_register(emit, &coord);
8741 emit_resource_register(emit, unit);
8742 emit_sampler_register(emit, unit);
8743 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8744 emit_tex_compare_refcoord(emit, target, &coord);
8745 }
8746 end_emit_instruction(emit);
8747
8748 end_tex_swizzle(emit, &swz_info);
8749
8750 free_temp_indexes(emit);
8751
8752 return true;
8753 }
8754
8755 /**
8756 * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8757 */
8758 static bool
emit_tg4(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8759 emit_tg4(struct svga_shader_emitter_v10 *emit,
8760 const struct tgsi_full_instruction *inst)
8761 {
8762 const uint unit = inst->Src[2].Register.Index;
8763 struct tgsi_full_src_register src;
8764 struct tgsi_full_src_register offset_src, sampler, ref;
8765 int offsets[3];
8766
8767 /* check that the sampler returns a float */
8768 if (!is_valid_tex_instruction(emit, inst))
8769 return true;
8770
8771 if (emit->version >= 50) {
8772 unsigned target = inst->Texture.Texture;
8773 int index = inst->Src[1].Register.Index;
8774 const union tgsi_immediate_data *imm = emit->immediates[index];
8775 int select_comp = imm[inst->Src[1].Register.SwizzleX].Int;
8776 unsigned select_swizzle = PIPE_SWIZZLE_X;
8777
8778 if (!tgsi_is_shadow_target(target)) {
8779 switch (select_comp) {
8780 case 0:
8781 select_swizzle = emit->key.tex[unit].swizzle_r;
8782 break;
8783 case 1:
8784 select_swizzle = emit->key.tex[unit].swizzle_g;
8785 break;
8786 case 2:
8787 select_swizzle = emit->key.tex[unit].swizzle_b;
8788 break;
8789 case 3:
8790 select_swizzle = emit->key.tex[unit].swizzle_a;
8791 break;
8792 default:
8793 assert(!"Unexpected component in texture gather swizzle");
8794 }
8795 }
8796 else {
8797 select_swizzle = emit->key.tex[unit].swizzle_r;
8798 }
8799
8800 if (select_swizzle == PIPE_SWIZZLE_1) {
8801 src = make_immediate_reg_float(emit, 1.0);
8802 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8803 return true;
8804 }
8805 else if (select_swizzle == PIPE_SWIZZLE_0) {
8806 src = make_immediate_reg_float(emit, 0.0);
8807 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8808 return true;
8809 }
8810
8811 src = setup_texcoord(emit, unit, &inst->Src[0]);
8812
8813 /* GATHER4 dst, coord, resource, sampler */
8814 /* GATHER4_C dst, coord, resource, sampler ref */
8815 /* GATHER4_PO dst, coord, offset resource, sampler */
8816 /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8817 begin_emit_instruction(emit);
8818 if (inst->Texture.NumOffsets == 1) {
8819 if (tgsi_is_shadow_target(target)) {
8820 emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
8821 inst->Instruction.Saturate);
8822 }
8823 else {
8824 emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
8825 inst->Instruction.Saturate);
8826 }
8827 }
8828 else {
8829 if (tgsi_is_shadow_target(target)) {
8830 emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
8831 inst->Instruction.Saturate);
8832 }
8833 else {
8834 emit_opcode(emit, VGPU10_OPCODE_GATHER4,
8835 inst->Instruction.Saturate);
8836 }
8837 }
8838
8839 emit_dst_register(emit, &inst->Dst[0]);
8840 emit_src_register(emit, &src);
8841 if (inst->Texture.NumOffsets == 1) {
8842 /* offset */
8843 offset_src = make_src_reg(inst->TexOffsets[0].File,
8844 inst->TexOffsets[0].Index);
8845 offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
8846 inst->TexOffsets[0].SwizzleY,
8847 inst->TexOffsets[0].SwizzleZ,
8848 TGSI_SWIZZLE_W);
8849 emit_src_register(emit, &offset_src);
8850 }
8851
8852 /* resource */
8853 emit_resource_register(emit, unit);
8854
8855 /* sampler */
8856 sampler = make_src_reg(TGSI_FILE_SAMPLER,
8857 emit->key.tex[unit].sampler_index);
8858 sampler.Register.SwizzleX =
8859 sampler.Register.SwizzleY =
8860 sampler.Register.SwizzleZ =
8861 sampler.Register.SwizzleW = select_swizzle;
8862 emit_src_register(emit, &sampler);
8863
8864 if (tgsi_is_shadow_target(target)) {
8865 /* ref */
8866 if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8867 ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8868 emit_tex_compare_refcoord(emit, target, &ref);
8869 }
8870 else {
8871 emit_tex_compare_refcoord(emit, target, &src);
8872 }
8873 }
8874
8875 end_emit_instruction(emit);
8876 free_temp_indexes(emit);
8877 }
8878 else {
8879 /* Only a single channel is supported in SM4_1 and we report
8880 * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8881 * Only the 0th component will be gathered.
8882 */
8883 switch (emit->key.tex[unit].swizzle_r) {
8884 case PIPE_SWIZZLE_X:
8885 get_texel_offsets(emit, inst, offsets);
8886 src = setup_texcoord(emit, unit, &inst->Src[0]);
8887
8888 /* Gather dst, coord, resource, sampler */
8889 begin_emit_instruction(emit);
8890 emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
8891 inst->Instruction.Saturate, offsets);
8892 emit_dst_register(emit, &inst->Dst[0]);
8893 emit_src_register(emit, &src);
8894 emit_resource_register(emit, unit);
8895
8896 /* sampler */
8897 sampler = make_src_reg(TGSI_FILE_SAMPLER,
8898 emit->key.tex[unit].sampler_index);
8899 sampler.Register.SwizzleX =
8900 sampler.Register.SwizzleY =
8901 sampler.Register.SwizzleZ =
8902 sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
8903 emit_src_register(emit, &sampler);
8904
8905 end_emit_instruction(emit);
8906 break;
8907 case PIPE_SWIZZLE_W:
8908 case PIPE_SWIZZLE_1:
8909 src = make_immediate_reg_float(emit, 1.0);
8910 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8911 break;
8912 case PIPE_SWIZZLE_Y:
8913 case PIPE_SWIZZLE_Z:
8914 case PIPE_SWIZZLE_0:
8915 default:
8916 src = make_immediate_reg_float(emit, 0.0);
8917 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8918 break;
8919 }
8920 }
8921
8922 return true;
8923 }
8924
8925
8926
8927 /**
8928 * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8929 */
8930 static bool
emit_tex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8931 emit_tex2(struct svga_shader_emitter_v10 *emit,
8932 const struct tgsi_full_instruction *inst)
8933 {
8934 const uint unit = inst->Src[2].Register.Index;
8935 unsigned target = inst->Texture.Texture;
8936 struct tgsi_full_src_register coord, ref;
8937 int offsets[3];
8938 struct tex_swizzle_info swz_info;
8939 VGPU10_OPCODE_TYPE opcode;
8940 bool compare_in_shader;
8941
8942 /* check that the sampler returns a float */
8943 if (!is_valid_tex_instruction(emit, inst))
8944 return true;
8945
8946 compare_in_shader = emit->key.tex[unit].compare_in_shader;
8947 if (compare_in_shader)
8948 opcode = VGPU10_OPCODE_SAMPLE;
8949 else
8950 opcode = VGPU10_OPCODE_SAMPLE_C;
8951
8952 begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8953
8954 get_texel_offsets(emit, inst, offsets);
8955
8956 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8957 ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8958
8959 /* SAMPLE_C dst, coord, resource, sampler, ref */
8960 begin_emit_instruction(emit);
8961 emit_sample_opcode(emit, opcode,
8962 inst->Instruction.Saturate, offsets);
8963 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8964 emit_src_register(emit, &coord);
8965 emit_resource_register(emit, unit);
8966 emit_sampler_register(emit, unit);
8967 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8968 emit_tex_compare_refcoord(emit, target, &ref);
8969 }
8970 end_emit_instruction(emit);
8971
8972 end_tex_swizzle(emit, &swz_info);
8973
8974 free_temp_indexes(emit);
8975
8976 return true;
8977 }
8978
8979
8980 /**
8981 * Emit code for TGSI_OPCODE_TXP (projective texture)
8982 */
8983 static bool
emit_txp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8984 emit_txp(struct svga_shader_emitter_v10 *emit,
8985 const struct tgsi_full_instruction *inst)
8986 {
8987 const uint unit = inst->Src[1].Register.Index;
8988 const enum tgsi_texture_type target = inst->Texture.Texture;
8989 VGPU10_OPCODE_TYPE opcode;
8990 int offsets[3];
8991 unsigned tmp = get_temp_index(emit);
8992 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8993 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8994 struct tgsi_full_src_register src0_wwww =
8995 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8996 struct tgsi_full_src_register coord;
8997 struct tex_swizzle_info swz_info;
8998 bool compare_in_shader;
8999
9000 /* check that the sampler returns a float */
9001 if (!is_valid_tex_instruction(emit, inst))
9002 return true;
9003
9004 compare_in_shader = tgsi_is_shadow_target(target) &&
9005 emit->key.tex[unit].compare_in_shader;
9006
9007 begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
9008
9009 get_texel_offsets(emit, inst, offsets);
9010
9011 coord = setup_texcoord(emit, unit, &inst->Src[0]);
9012
9013 /* DIV tmp, coord, coord.wwww */
9014 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
9015 &coord, &src0_wwww);
9016
9017 /* SAMPLE dst, coord(tmp), resource, sampler */
9018 begin_emit_instruction(emit);
9019
9020 if (tgsi_is_shadow_target(target) && !compare_in_shader)
9021 /* NOTE: for non-fragment shaders, we should use
9022 * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
9023 */
9024 opcode = VGPU10_OPCODE_SAMPLE_C;
9025 else
9026 opcode = VGPU10_OPCODE_SAMPLE;
9027
9028 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9029 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9030 emit_src_register(emit, &tmp_src); /* projected coord */
9031 emit_resource_register(emit, unit);
9032 emit_sampler_register(emit, unit);
9033 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
9034 emit_tex_compare_refcoord(emit, target, &tmp_src);
9035 }
9036 end_emit_instruction(emit);
9037
9038 end_tex_swizzle(emit, &swz_info);
9039
9040 free_temp_indexes(emit);
9041
9042 return true;
9043 }
9044
9045
9046 /**
9047 * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
9048 */
9049 static bool
emit_txd(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9050 emit_txd(struct svga_shader_emitter_v10 *emit,
9051 const struct tgsi_full_instruction *inst)
9052 {
9053 const uint unit = inst->Src[3].Register.Index;
9054 const enum tgsi_texture_type target = inst->Texture.Texture;
9055 int offsets[3];
9056 struct tgsi_full_src_register coord;
9057 struct tex_swizzle_info swz_info;
9058
9059 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9060 &swz_info);
9061
9062 get_texel_offsets(emit, inst, offsets);
9063
9064 coord = setup_texcoord(emit, unit, &inst->Src[0]);
9065
9066 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
9067 begin_emit_instruction(emit);
9068 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
9069 inst->Instruction.Saturate, offsets);
9070 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9071 emit_src_register(emit, &coord);
9072 emit_resource_register(emit, unit);
9073 emit_sampler_register(emit, unit);
9074 emit_src_register(emit, &inst->Src[1]); /* Xderiv */
9075 emit_src_register(emit, &inst->Src[2]); /* Yderiv */
9076 end_emit_instruction(emit);
9077
9078 end_tex_swizzle(emit, &swz_info);
9079
9080 free_temp_indexes(emit);
9081
9082 return true;
9083 }
9084
9085
9086 /**
9087 * Emit code for TGSI_OPCODE_TXF (texel fetch)
9088 */
9089 static bool
emit_txf(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9090 emit_txf(struct svga_shader_emitter_v10 *emit,
9091 const struct tgsi_full_instruction *inst)
9092 {
9093 const uint unit = inst->Src[1].Register.Index;
9094 const bool msaa = tgsi_is_msaa_target(inst->Texture.Texture)
9095 && emit->key.tex[unit].num_samples > 1;
9096 int offsets[3];
9097 struct tex_swizzle_info swz_info;
9098
9099 begin_tex_swizzle(emit, unit, inst, false, &swz_info);
9100
9101 get_texel_offsets(emit, inst, offsets);
9102
9103 if (msaa) {
9104 assert(emit->key.tex[unit].num_samples > 1);
9105
9106 /* Fetch one sample from an MSAA texture */
9107 struct tgsi_full_src_register sampleIndex =
9108 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9109 /* LD_MS dst, coord(s0), resource, sampleIndex */
9110 begin_emit_instruction(emit);
9111 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
9112 inst->Instruction.Saturate, offsets);
9113 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9114 emit_src_register(emit, &inst->Src[0]);
9115 emit_resource_register(emit, unit);
9116 emit_src_register(emit, &sampleIndex);
9117 end_emit_instruction(emit);
9118 }
9119 else {
9120 /* Fetch one texel specified by integer coordinate */
9121 /* LD dst, coord(s0), resource */
9122 begin_emit_instruction(emit);
9123 emit_sample_opcode(emit, VGPU10_OPCODE_LD,
9124 inst->Instruction.Saturate, offsets);
9125 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9126 emit_src_register(emit, &inst->Src[0]);
9127 emit_resource_register(emit, unit);
9128 end_emit_instruction(emit);
9129 }
9130
9131 end_tex_swizzle(emit, &swz_info);
9132
9133 free_temp_indexes(emit);
9134
9135 return true;
9136 }
9137
9138
9139 /**
9140 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
9141 * or TGSI_OPCODE_TXB2 (for cube shadow maps).
9142 */
9143 static bool
emit_txl_txb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9144 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
9145 const struct tgsi_full_instruction *inst)
9146 {
9147 const enum tgsi_texture_type target = inst->Texture.Texture;
9148 VGPU10_OPCODE_TYPE opcode;
9149 unsigned unit;
9150 int offsets[3];
9151 struct tgsi_full_src_register coord, lod_bias;
9152 struct tex_swizzle_info swz_info;
9153
9154 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
9155 inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
9156 inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
9157
9158 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
9159 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
9160 unit = inst->Src[2].Register.Index;
9161 }
9162 else {
9163 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9164 unit = inst->Src[1].Register.Index;
9165 }
9166
9167 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9168 &swz_info);
9169
9170 get_texel_offsets(emit, inst, offsets);
9171
9172 coord = setup_texcoord(emit, unit, &inst->Src[0]);
9173
9174 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
9175 begin_emit_instruction(emit);
9176 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
9177 opcode = VGPU10_OPCODE_SAMPLE_L;
9178 }
9179 else {
9180 opcode = VGPU10_OPCODE_SAMPLE_B;
9181 }
9182 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9183 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9184 emit_src_register(emit, &coord);
9185 emit_resource_register(emit, unit);
9186 emit_sampler_register(emit, unit);
9187 emit_src_register(emit, &lod_bias);
9188 end_emit_instruction(emit);
9189
9190 end_tex_swizzle(emit, &swz_info);
9191
9192 free_temp_indexes(emit);
9193
9194 return true;
9195 }
9196
9197
9198 /**
9199 * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
9200 */
9201 static bool
emit_txl2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9202 emit_txl2(struct svga_shader_emitter_v10 *emit,
9203 const struct tgsi_full_instruction *inst)
9204 {
9205 unsigned target = inst->Texture.Texture;
9206 unsigned opcode, unit;
9207 int offsets[3];
9208 struct tgsi_full_src_register coord, lod;
9209 struct tex_swizzle_info swz_info;
9210
9211 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
9212
9213 lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
9214 unit = inst->Src[2].Register.Index;
9215
9216 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9217 &swz_info);
9218
9219 get_texel_offsets(emit, inst, offsets);
9220
9221 coord = setup_texcoord(emit, unit, &inst->Src[0]);
9222
9223 /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
9224 begin_emit_instruction(emit);
9225 opcode = VGPU10_OPCODE_SAMPLE_L;
9226 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9227 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9228 emit_src_register(emit, &coord);
9229 emit_resource_register(emit, unit);
9230 emit_sampler_register(emit, unit);
9231 emit_src_register(emit, &lod);
9232 end_emit_instruction(emit);
9233
9234 end_tex_swizzle(emit, &swz_info);
9235
9236 free_temp_indexes(emit);
9237
9238 return true;
9239 }
9240
9241
9242 /**
9243 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
9244 */
9245 static bool
emit_txq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9246 emit_txq(struct svga_shader_emitter_v10 *emit,
9247 const struct tgsi_full_instruction *inst)
9248 {
9249 const uint unit = inst->Src[1].Register.Index;
9250
9251 if (emit->key.tex[unit].target == PIPE_BUFFER) {
9252 /* RESINFO does not support querying texture buffers, so we instead
9253 * store texture buffer sizes in shader constants, then copy them to
9254 * implement TXQ instead of emitting RESINFO.
9255 * MOV dst, const[texture_buffer_size_index[unit]]
9256 */
9257 struct tgsi_full_src_register size_src =
9258 make_src_const_reg(emit->texture_buffer_size_index[unit]);
9259 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
9260 } else {
9261 /* RESINFO dst, srcMipLevel, resource */
9262 begin_emit_instruction(emit);
9263 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
9264 emit_dst_register(emit, &inst->Dst[0]);
9265 emit_src_register(emit, &inst->Src[0]);
9266 emit_resource_register(emit, unit);
9267 end_emit_instruction(emit);
9268 }
9269
9270 free_temp_indexes(emit);
9271
9272 return true;
9273 }
9274
9275
9276 /**
9277 * Does this opcode produce a double-precision result?
9278 * XXX perhaps move this to a TGSI utility.
9279 */
9280 static bool
opcode_has_dbl_dst(unsigned opcode)9281 opcode_has_dbl_dst(unsigned opcode)
9282 {
9283 switch (opcode) {
9284 case TGSI_OPCODE_F2D:
9285 case TGSI_OPCODE_DABS:
9286 case TGSI_OPCODE_DADD:
9287 case TGSI_OPCODE_DFRAC:
9288 case TGSI_OPCODE_DMAX:
9289 case TGSI_OPCODE_DMIN:
9290 case TGSI_OPCODE_DMUL:
9291 case TGSI_OPCODE_DNEG:
9292 case TGSI_OPCODE_I2D:
9293 case TGSI_OPCODE_U2D:
9294 case TGSI_OPCODE_DFMA:
9295 // XXX more TBD
9296 return true;
9297 default:
9298 return false;
9299 }
9300 }
9301
9302
9303 /**
9304 * Does this opcode use double-precision source registers?
9305 */
9306 static bool
opcode_has_dbl_src(unsigned opcode)9307 opcode_has_dbl_src(unsigned opcode)
9308 {
9309 switch (opcode) {
9310 case TGSI_OPCODE_D2F:
9311 case TGSI_OPCODE_DABS:
9312 case TGSI_OPCODE_DADD:
9313 case TGSI_OPCODE_DFRAC:
9314 case TGSI_OPCODE_DMAX:
9315 case TGSI_OPCODE_DMIN:
9316 case TGSI_OPCODE_DMUL:
9317 case TGSI_OPCODE_DNEG:
9318 case TGSI_OPCODE_D2I:
9319 case TGSI_OPCODE_D2U:
9320 case TGSI_OPCODE_DFMA:
9321 case TGSI_OPCODE_DSLT:
9322 case TGSI_OPCODE_DSGE:
9323 case TGSI_OPCODE_DSEQ:
9324 case TGSI_OPCODE_DSNE:
9325 case TGSI_OPCODE_DRCP:
9326 case TGSI_OPCODE_DSQRT:
9327 case TGSI_OPCODE_DMAD:
9328 case TGSI_OPCODE_DLDEXP:
9329 case TGSI_OPCODE_DRSQ:
9330 case TGSI_OPCODE_DTRUNC:
9331 case TGSI_OPCODE_DCEIL:
9332 case TGSI_OPCODE_DFLR:
9333 case TGSI_OPCODE_DROUND:
9334 case TGSI_OPCODE_DSSG:
9335 return true;
9336 default:
9337 return false;
9338 }
9339 }
9340
9341
9342 /**
9343 * Check that the swizzle for reading from a double-precision register
9344 * is valid. If not valid, move the source to a temporary register first.
9345 */
9346 static struct tgsi_full_src_register
check_double_src(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)9347 check_double_src(struct svga_shader_emitter_v10 *emit,
9348 const struct tgsi_full_src_register *reg)
9349 {
9350 struct tgsi_full_src_register src;
9351
9352 if (((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
9353 reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
9354 (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
9355 reg->Register.SwizzleY == PIPE_SWIZZLE_W)) &&
9356 ((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
9357 reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
9358 (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
9359 reg->Register.SwizzleW == PIPE_SWIZZLE_W))) {
9360 src = *reg;
9361 } else {
9362 /* move the src to a temporary to fix the swizzle */
9363 unsigned tmp = get_temp_index(emit);
9364 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9365 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9366 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, reg);
9367 src = tmp_src;
9368
9369 /* The temporary index will be released in the caller */
9370 }
9371 return src;
9372 }
9373
9374 /**
9375 * Check that the writemask for a double-precision instruction is valid.
9376 */
9377 static void
check_double_dst_writemask(const struct tgsi_full_instruction * inst)9378 check_double_dst_writemask(const struct tgsi_full_instruction *inst)
9379 {
9380 ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
9381
9382 switch (inst->Instruction.Opcode) {
9383 case TGSI_OPCODE_DABS:
9384 case TGSI_OPCODE_DADD:
9385 case TGSI_OPCODE_DFRAC:
9386 case TGSI_OPCODE_DNEG:
9387 case TGSI_OPCODE_DMAD:
9388 case TGSI_OPCODE_DMAX:
9389 case TGSI_OPCODE_DMIN:
9390 case TGSI_OPCODE_DMUL:
9391 case TGSI_OPCODE_DRCP:
9392 case TGSI_OPCODE_DSQRT:
9393 case TGSI_OPCODE_F2D:
9394 case TGSI_OPCODE_DFMA:
9395 assert(writemask == TGSI_WRITEMASK_XYZW ||
9396 writemask == TGSI_WRITEMASK_XY ||
9397 writemask == TGSI_WRITEMASK_ZW);
9398 break;
9399 case TGSI_OPCODE_DSEQ:
9400 case TGSI_OPCODE_DSGE:
9401 case TGSI_OPCODE_DSNE:
9402 case TGSI_OPCODE_DSLT:
9403 case TGSI_OPCODE_D2I:
9404 case TGSI_OPCODE_D2U:
9405 /* Write to 1 or 2 components only */
9406 assert(util_bitcount(writemask) <= 2);
9407 break;
9408 default:
9409 /* XXX this list may be incomplete */
9410 ;
9411 }
9412 }
9413
9414
9415 /**
9416 * Double-precision absolute value.
9417 */
9418 static bool
emit_dabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9419 emit_dabs(struct svga_shader_emitter_v10 *emit,
9420 const struct tgsi_full_instruction *inst)
9421 {
9422 assert(emit->version >= 50);
9423
9424 struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9425 check_double_dst_writemask(inst);
9426
9427 struct tgsi_full_src_register abs_src = absolute_src(&src);
9428
9429 /* DMOV dst, |src| */
9430 emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
9431
9432 free_temp_indexes(emit);
9433 return true;
9434 }
9435
9436
9437 /**
9438 * Double-precision negation
9439 */
9440 static bool
emit_dneg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9441 emit_dneg(struct svga_shader_emitter_v10 *emit,
9442 const struct tgsi_full_instruction *inst)
9443 {
9444 assert(emit->version >= 50);
9445 struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9446 check_double_dst_writemask(inst);
9447
9448 struct tgsi_full_src_register neg_src = negate_src(&src);
9449
9450 /* DMOV dst, -src */
9451 emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
9452
9453 free_temp_indexes(emit);
9454 return true;
9455 }
9456
9457
9458 /**
9459 * SM5 has no DMAD opcode. Implement negation with DMUL/DADD.
9460 */
9461 static bool
emit_dmad(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9462 emit_dmad(struct svga_shader_emitter_v10 *emit,
9463 const struct tgsi_full_instruction *inst)
9464 {
9465 assert(emit->version >= 50);
9466 struct tgsi_full_src_register src0 = check_double_src(emit, &inst->Src[0]);
9467 struct tgsi_full_src_register src1 = check_double_src(emit, &inst->Src[1]);
9468 struct tgsi_full_src_register src2 = check_double_src(emit, &inst->Src[2]);
9469 check_double_dst_writemask(inst);
9470
9471 unsigned tmp = get_temp_index(emit);
9472 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9473 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9474
9475 /* DMUL tmp, src[0], src[1] */
9476 emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
9477 &tmp_dst, &src0, &src1, NULL,
9478 false, inst->Instruction.Precise);
9479
9480 /* DADD dst, tmp, src[2] */
9481 emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9482 &inst->Dst[0], &tmp_src, &src2, NULL,
9483 inst->Instruction.Saturate, inst->Instruction.Precise);
9484 free_temp_indexes(emit);
9485
9486 return true;
9487 }
9488
9489
9490 /**
9491 * Double precision reciprocal square root
9492 */
9493 static bool
emit_drsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)9494 emit_drsq(struct svga_shader_emitter_v10 *emit,
9495 const struct tgsi_full_dst_register *dst,
9496 const struct tgsi_full_src_register *src)
9497 {
9498 assert(emit->version >= 50);
9499
9500 VGPU10OpcodeToken0 token0;
9501 struct tgsi_full_src_register dsrc = check_double_src(emit, src);
9502
9503 begin_emit_instruction(emit);
9504
9505 token0.value = 0;
9506 token0.opcodeType = VGPU10_OPCODE_VMWARE;
9507 token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
9508 emit_dword(emit, token0.value);
9509 emit_dst_register(emit, dst);
9510 emit_src_register(emit, &dsrc);
9511 end_emit_instruction(emit);
9512
9513 free_temp_indexes(emit);
9514
9515 return true;
9516 }
9517
9518
9519 /**
9520 * There is no SM5 opcode for double precision square root.
9521 * It will be implemented with DRSQ.
9522 * dst = src * DRSQ(src)
9523 */
9524 static bool
emit_dsqrt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9525 emit_dsqrt(struct svga_shader_emitter_v10 *emit,
9526 const struct tgsi_full_instruction *inst)
9527 {
9528 assert(emit->version >= 50);
9529
9530 struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9531
9532 /* temporary register to hold the source */
9533 unsigned tmp = get_temp_index(emit);
9534 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9535 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9536
9537 /* temporary register to hold the DEQ result */
9538 unsigned tmp_cond = get_temp_index(emit);
9539 struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
9540 struct tgsi_full_dst_register tmp_cond_dst_xy =
9541 writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9542 struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
9543 struct tgsi_full_src_register tmp_cond_src_xy =
9544 swizzle_src(&tmp_cond_src,
9545 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
9546 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
9547
9548 /* The reciprocal square root of zero yields INF.
9549 * So if the source is 0, we replace it with 1 in the tmp register.
9550 * The later multiplication of zero in the original source will yield 0
9551 * in the result.
9552 */
9553
9554 /* tmp1 = (src == 0) ? 1 : src;
9555 * EQ tmp1, 0, src
9556 * MOVC tmp, tmp1, 1.0, src
9557 */
9558 struct tgsi_full_src_register zero =
9559 make_immediate_reg_double(emit, 0);
9560
9561 struct tgsi_full_src_register one =
9562 make_immediate_reg_double(emit, 1.0);
9563
9564 emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
9565 &zero, &src);
9566 emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
9567 &tmp_cond_src_xy, &one, &src);
9568
9569 struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
9570 struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
9571
9572 /* DRSQ tmp_rsq, tmp */
9573 emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
9574
9575 /* DMUL dst, tmp_rsq, src[0] */
9576 emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
9577 &tmp_rsq_src, &src);
9578
9579 free_temp_indexes(emit);
9580
9581 return true;
9582 }
9583
9584
9585 /**
9586 * glsl-nir path does not lower DTRUNC, so we need to
9587 * add the translation here.
9588 *
9589 * frac = DFRAC(src)
9590 * tmp = src - frac
9591 * dst = src >= 0 ? tmp : (tmp + (frac==0 ? 0 : 1))
9592 */
9593 static bool
emit_dtrunc(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9594 emit_dtrunc(struct svga_shader_emitter_v10 *emit,
9595 const struct tgsi_full_instruction *inst)
9596 {
9597 assert(emit->version >= 50);
9598
9599 struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9600
9601 /* frac = DFRAC(src) */
9602 unsigned frac_index = get_temp_index(emit);
9603 struct tgsi_full_dst_register frac_dst = make_dst_temp_reg(frac_index);
9604 struct tgsi_full_src_register frac_src = make_src_temp_reg(frac_index);
9605
9606 VGPU10OpcodeToken0 token0;
9607 begin_emit_instruction(emit);
9608 token0.value = 0;
9609 token0.opcodeType = VGPU10_OPCODE_VMWARE;
9610 token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DFRC;
9611 emit_dword(emit, token0.value);
9612 emit_dst_register(emit, &frac_dst);
9613 emit_src_register(emit, &src);
9614 end_emit_instruction(emit);
9615
9616 /* tmp = src - frac */
9617 unsigned tmp_index = get_temp_index(emit);
9618 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
9619 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
9620 struct tgsi_full_src_register negate_frac_src = negate_src(&frac_src);
9621 emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9622 &tmp_dst, &src, &negate_frac_src, NULL,
9623 inst->Instruction.Saturate, inst->Instruction.Precise);
9624
9625 /* cond = frac==0 */
9626 unsigned cond_index = get_temp_index(emit);
9627 struct tgsi_full_dst_register cond_dst = make_dst_temp_reg(cond_index);
9628 struct tgsi_full_src_register cond_src = make_src_temp_reg(cond_index);
9629 struct tgsi_full_src_register zero =
9630 make_immediate_reg_double(emit, 0);
9631
9632 /* Only use one or two components for double opcode */
9633 cond_dst = writemask_dst(&cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9634
9635 emit_instruction_opn(emit, VGPU10_OPCODE_DEQ,
9636 &cond_dst, &frac_src, &zero, NULL,
9637 inst->Instruction.Saturate, inst->Instruction.Precise);
9638
9639 /* tmp2 = cond ? 0 : 1 */
9640 unsigned tmp2_index = get_temp_index(emit);
9641 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2_index);
9642 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2_index);
9643 struct tgsi_full_src_register cond_src_xy =
9644 swizzle_src(&cond_src, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
9645 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
9646 struct tgsi_full_src_register one =
9647 make_immediate_reg_double(emit, 1.0);
9648
9649 emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
9650 &tmp2_dst, &cond_src_xy, &zero, &one,
9651 inst->Instruction.Saturate, inst->Instruction.Precise);
9652
9653 /* tmp2 = tmp + tmp2 */
9654 emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9655 &tmp2_dst, &tmp_src, &tmp2_src, NULL,
9656 inst->Instruction.Saturate, inst->Instruction.Precise);
9657
9658 /* cond = src>=0 */
9659 emit_instruction_opn(emit, VGPU10_OPCODE_DGE,
9660 &cond_dst, &src, &zero, NULL,
9661 inst->Instruction.Saturate, inst->Instruction.Precise);
9662
9663 /* dst = cond ? tmp : tmp2 */
9664 emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
9665 &inst->Dst[0], &cond_src_xy, &tmp_src, &tmp2_src,
9666 inst->Instruction.Saturate, inst->Instruction.Precise);
9667
9668 free_temp_indexes(emit);
9669 return true;
9670 }
9671
9672
9673 static bool
emit_interp_offset(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9674 emit_interp_offset(struct svga_shader_emitter_v10 *emit,
9675 const struct tgsi_full_instruction *inst)
9676 {
9677 assert(emit->version >= 50);
9678
9679 /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
9680 * where (0,0) is the center of the pixel. We need to translate that
9681 * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
9682 * Also need to flip the Y axis (I think).
9683 */
9684 unsigned tmp = get_temp_index(emit);
9685 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9686 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9687 struct tgsi_full_dst_register tmp_dst_xy =
9688 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9689 struct tgsi_full_src_register const16 =
9690 make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
9691
9692 /* MUL tmp.xy, src1, {16, -16, 0, 0} */
9693 emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
9694 &tmp_dst_xy, &inst->Src[1], &const16);
9695
9696 /* FTOI tmp.xy, tmp */
9697 emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
9698
9699 /* EVAL_SNAPPED dst, src0, tmp */
9700 emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
9701 &inst->Dst[0], &inst->Src[0], &tmp_src);
9702
9703 free_temp_indexes(emit);
9704
9705 return true;
9706 }
9707
9708
9709 /**
9710 * Emit a simple instruction (like ADD, MUL, MIN, etc).
9711 */
9712 static bool
emit_simple(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9713 emit_simple(struct svga_shader_emitter_v10 *emit,
9714 const struct tgsi_full_instruction *inst)
9715 {
9716 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9717 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9718 const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9719 const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9720 unsigned i;
9721
9722 struct tgsi_full_src_register src[3];
9723
9724 if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
9725 emit->current_loop_depth++;
9726 }
9727 else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
9728 emit->current_loop_depth--;
9729 }
9730
9731 for (i = 0; i < op->num_src; i++) {
9732 if (dbl_src)
9733 src[i] = check_double_src(emit, &inst->Src[i]);
9734 else
9735 src[i] = inst->Src[i];
9736 }
9737
9738 begin_emit_instruction(emit);
9739 emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
9740 inst->Instruction.Saturate,
9741 inst->Instruction.Precise);
9742 for (i = 0; i < op->num_dst; i++) {
9743 if (dbl_dst) {
9744 check_double_dst_writemask(inst);
9745 }
9746 emit_dst_register(emit, &inst->Dst[i]);
9747 }
9748 for (i = 0; i < op->num_src; i++) {
9749 emit_src_register(emit, &src[i]);
9750 }
9751 end_emit_instruction(emit);
9752
9753 free_temp_indexes(emit);
9754 return true;
9755 }
9756
9757
9758 /**
9759 * Emit MSB instruction (like IMSB, UMSB).
9760 *
9761 * GLSL returns the index starting from the LSB;
9762 * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
9763 * To get correct location as per glsl from SM5 device, we should
9764 * return (31 - index) if returned index is not -1.
9765 */
9766 static bool
emit_msb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9767 emit_msb(struct svga_shader_emitter_v10 *emit,
9768 const struct tgsi_full_instruction *inst)
9769 {
9770 const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
9771
9772 assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
9773
9774 struct tgsi_full_src_register index_src =
9775 make_src_reg(index_dst->Register.File, index_dst->Register.Index);
9776 struct tgsi_full_src_register imm31 =
9777 make_immediate_reg_int(emit, 31);
9778 imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
9779 struct tgsi_full_src_register neg_one =
9780 make_immediate_reg_int(emit, -1);
9781 neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
9782 unsigned tmp = get_temp_index(emit);
9783 const struct tgsi_full_dst_register tmp_dst =
9784 make_dst_temp_reg(tmp);
9785 const struct tgsi_full_dst_register tmp_dst_x =
9786 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
9787 const struct tgsi_full_src_register tmp_src_x =
9788 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
9789 int writemask = TGSI_WRITEMASK_X;
9790 int src_swizzle = TGSI_SWIZZLE_X;
9791 int dst_writemask = index_dst->Register.WriteMask;
9792
9793 emit_simple(emit, inst);
9794
9795 /* index conversion from SM5 to GLSL */
9796 while (writemask & dst_writemask) {
9797 struct tgsi_full_src_register index_src_comp =
9798 scalar_src(&index_src, src_swizzle);
9799 struct tgsi_full_dst_register index_dst_comp =
9800 writemask_dst(index_dst, writemask);
9801
9802 /* check if index_src_comp != -1 */
9803 emit_instruction_op2(emit, VGPU10_OPCODE_INE,
9804 &tmp_dst_x, &index_src_comp, &neg_one);
9805
9806 /* if */
9807 emit_if(emit, &tmp_src_x);
9808
9809 index_src_comp = negate_src(&index_src_comp);
9810 /* SUB DST, IMM{31}, DST */
9811 emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
9812 &index_dst_comp, &imm31, &index_src_comp);
9813
9814 /* endif */
9815 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9816
9817 writemask = writemask << 1;
9818 src_swizzle = src_swizzle + 1;
9819 }
9820 free_temp_indexes(emit);
9821 return true;
9822 }
9823
9824
9825 /**
9826 * Emit a BFE instruction (like UBFE, IBFE).
9827 * tgsi representation:
9828 * U/IBFE dst, value, offset, width
9829 * SM5 representation:
9830 * U/IBFE dst, width, offset, value
9831 * Note: SM5 has width & offset range (0-31);
9832 * whereas GLSL has width & offset range (0-32)
9833 */
9834 static bool
emit_bfe(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9835 emit_bfe(struct svga_shader_emitter_v10 *emit,
9836 const struct tgsi_full_instruction *inst)
9837 {
9838 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9839 struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9840 imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9841 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9842 zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9843
9844 unsigned tmp1 = get_temp_index(emit);
9845 const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9846 const struct tgsi_full_dst_register cond1_dst_x =
9847 writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9848 const struct tgsi_full_src_register cond1_src_x =
9849 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9850
9851 unsigned tmp2 = get_temp_index(emit);
9852 const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9853 const struct tgsi_full_dst_register cond2_dst_x =
9854 writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9855 const struct tgsi_full_src_register cond2_src_x =
9856 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9857
9858 /**
9859 * In SM5, when width = 32 and offset = 0, it returns 0.
9860 * On the other hand GLSL, expects value to be copied as it is, to dst.
9861 */
9862
9863 /* cond1 = width ! = 32 */
9864 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9865 &cond1_dst_x, &inst->Src[2], &imm32);
9866
9867 /* cond2 = offset ! = 0 */
9868 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9869 &cond2_dst_x, &inst->Src[1], &zero);
9870
9871 /* cond 2 = cond1 & cond 2 */
9872 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
9873 &cond2_src_x,
9874 &cond1_src_x);
9875 /* IF */
9876 emit_if(emit, &cond2_src_x);
9877
9878 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9879 &inst->Src[0]);
9880
9881 /* ELSE */
9882 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9883
9884 /* U/IBFE dst, width, offset, value */
9885 emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
9886 &inst->Src[2], &inst->Src[1], &inst->Src[0]);
9887
9888 /* ENDIF */
9889 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9890
9891 free_temp_indexes(emit);
9892 return true;
9893 }
9894
9895
9896 /**
9897 * Emit BFI instruction
9898 * tgsi representation:
9899 * BFI dst, base, insert, offset, width
9900 * SM5 representation:
9901 * BFI dst, width, offset, insert, base
9902 * Note: SM5 has width & offset range (0-31);
9903 * whereas GLSL has width & offset range (0-32)
9904 */
9905 static bool
emit_bfi(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9906 emit_bfi(struct svga_shader_emitter_v10 *emit,
9907 const struct tgsi_full_instruction *inst)
9908 {
9909 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9910 struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9911 imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9912
9913 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9914 zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9915
9916 unsigned tmp1 = get_temp_index(emit);
9917 const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9918 const struct tgsi_full_dst_register cond1_dst_x =
9919 writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9920 const struct tgsi_full_src_register cond1_src_x =
9921 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9922
9923 unsigned tmp2 = get_temp_index(emit);
9924 const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9925 const struct tgsi_full_dst_register cond2_dst_x =
9926 writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9927 const struct tgsi_full_src_register cond2_src_x =
9928 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9929
9930 /**
9931 * In SM5, when width = 32 and offset = 0, it returns 0.
9932 * On the other hand GLSL, expects insert to be copied as it is, to dst.
9933 */
9934
9935 /* cond1 = width == 32 */
9936 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9937 &cond1_dst_x, &inst->Src[3], &imm32);
9938
9939 /* cond1 = offset == 0 */
9940 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9941 &cond2_dst_x, &inst->Src[2], &zero);
9942
9943 /* cond2 = cond1 & cond2 */
9944 emit_instruction_op2(emit, VGPU10_OPCODE_AND,
9945 &cond2_dst_x, &cond2_src_x, &cond1_src_x);
9946
9947 /* if */
9948 emit_if(emit, &cond2_src_x);
9949
9950 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9951 &inst->Src[1]);
9952
9953 /* else */
9954 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9955
9956 /* BFI dst, width, offset, insert, base */
9957 begin_emit_instruction(emit);
9958 emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9959 emit_dst_register(emit, &inst->Dst[0]);
9960 emit_src_register(emit, &inst->Src[3]);
9961 emit_src_register(emit, &inst->Src[2]);
9962 emit_src_register(emit, &inst->Src[1]);
9963 emit_src_register(emit, &inst->Src[0]);
9964 end_emit_instruction(emit);
9965
9966 /* endif */
9967 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9968
9969 free_temp_indexes(emit);
9970 return true;
9971 }
9972
9973
9974 /**
9975 * We only special case the MOV instruction to try to detect constant
9976 * color writes in the fragment shader.
9977 */
9978 static bool
emit_mov(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9979 emit_mov(struct svga_shader_emitter_v10 *emit,
9980 const struct tgsi_full_instruction *inst)
9981 {
9982 const struct tgsi_full_src_register *src = &inst->Src[0];
9983 const struct tgsi_full_dst_register *dst = &inst->Dst[0];
9984
9985 if (emit->unit == PIPE_SHADER_FRAGMENT &&
9986 dst->Register.File == TGSI_FILE_OUTPUT &&
9987 dst->Register.Index == 0 &&
9988 src->Register.File == TGSI_FILE_CONSTANT &&
9989 !src->Register.Indirect) {
9990 emit->constant_color_output = true;
9991 }
9992
9993 return emit_simple(emit, inst);
9994 }
9995
9996
9997 /**
9998 * Emit a simple VGPU10 instruction which writes to multiple dest registers,
9999 * where TGSI only uses one dest register.
10000 */
10001 static bool
emit_simple_1dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned dst_count,unsigned dst_index)10002 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
10003 const struct tgsi_full_instruction *inst,
10004 unsigned dst_count,
10005 unsigned dst_index)
10006 {
10007 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10008 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
10009 unsigned i;
10010
10011 begin_emit_instruction(emit);
10012 emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
10013
10014 for (i = 0; i < dst_count; i++) {
10015 if (i == dst_index) {
10016 emit_dst_register(emit, &inst->Dst[0]);
10017 } else {
10018 emit_null_dst_register(emit);
10019 }
10020 }
10021
10022 for (i = 0; i < op->num_src; i++) {
10023 emit_src_register(emit, &inst->Src[i]);
10024 }
10025 end_emit_instruction(emit);
10026
10027 return true;
10028 }
10029
10030
10031 /**
10032 * Emit a vmware specific VGPU10 instruction.
10033 */
10034 static bool
emit_vmware(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_VMWARE_OPCODE_TYPE subopcode)10035 emit_vmware(struct svga_shader_emitter_v10 *emit,
10036 const struct tgsi_full_instruction *inst,
10037 VGPU10_VMWARE_OPCODE_TYPE subopcode)
10038 {
10039 VGPU10OpcodeToken0 token0;
10040 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10041 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
10042 const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
10043 const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
10044 unsigned i;
10045 struct tgsi_full_src_register src[3];
10046
10047 for (i = 0; i < op->num_src; i++) {
10048 if (dbl_src)
10049 src[i] = check_double_src(emit, &inst->Src[i]);
10050 else
10051 src[i] = inst->Src[i];
10052 }
10053
10054 begin_emit_instruction(emit);
10055
10056 assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
10057
10058 token0.value = 0;
10059 token0.opcodeType = VGPU10_OPCODE_VMWARE;
10060 token0.vmwareOpcodeType = subopcode;
10061 emit_dword(emit, token0.value);
10062
10063 if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
10064 /* IDIV only uses the first dest register. */
10065 emit_dst_register(emit, &inst->Dst[0]);
10066 emit_null_dst_register(emit);
10067 } else {
10068 for (i = 0; i < op->num_dst; i++) {
10069 if (dbl_dst) {
10070 check_double_dst_writemask(inst);
10071 }
10072 emit_dst_register(emit, &inst->Dst[i]);
10073 }
10074 }
10075
10076 for (i = 0; i < op->num_src; i++) {
10077 emit_src_register(emit, &src[i]);
10078 }
10079 end_emit_instruction(emit);
10080
10081 free_temp_indexes(emit);
10082 return true;
10083 }
10084
10085 /**
10086 * Emit a memory register
10087 */
10088
10089 typedef enum {
10090 MEM_STORE = 0,
10091 MEM_LOAD = 1,
10092 MEM_ATOMIC_COUNTER
10093 } memory_op;
10094
10095 static void
emit_memory_register(struct svga_shader_emitter_v10 * emit,memory_op mem_op,const struct tgsi_full_instruction * inst,unsigned regIndex,unsigned writemask)10096 emit_memory_register(struct svga_shader_emitter_v10 *emit,
10097 memory_op mem_op,
10098 const struct tgsi_full_instruction *inst,
10099 unsigned regIndex, unsigned writemask)
10100 {
10101 VGPU10OperandToken0 operand0;
10102 unsigned resIndex = 0;
10103
10104 operand0.value = 0;
10105 operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
10106 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
10107 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
10108
10109 switch (mem_op) {
10110 case MEM_ATOMIC_COUNTER:
10111 {
10112 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
10113 resIndex = inst->Src[regIndex].Register.Index;
10114 break;
10115 }
10116 case MEM_STORE:
10117 {
10118 const struct tgsi_full_dst_register *reg = &inst->Dst[regIndex];
10119
10120 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
10121 operand0.mask = writemask;
10122 resIndex = reg->Register.Index;
10123 break;
10124 }
10125 case MEM_LOAD:
10126 {
10127 const struct tgsi_full_src_register *reg = &inst->Src[regIndex];
10128
10129 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
10130 operand0.swizzleX = reg->Register.SwizzleX;
10131 operand0.swizzleY = reg->Register.SwizzleY;
10132 operand0.swizzleZ = reg->Register.SwizzleZ;
10133 operand0.swizzleW = reg->Register.SwizzleW;
10134 resIndex = reg->Register.Index;
10135 break;
10136 }
10137 default:
10138 assert(!"Unexpected memory opcode");
10139 break;
10140 }
10141
10142 emit_dword(emit, operand0.value);
10143 emit_dword(emit, resIndex);
10144 }
10145
10146
10147 typedef enum {
10148 UAV_STORE = 0,
10149 UAV_LOAD = 1,
10150 UAV_ATOMIC = 2,
10151 UAV_RESQ = 3,
10152 } UAV_OP;
10153
10154
10155 /**
10156 * Emit a uav register
10157 * \param uav_index index of resource register
10158 * \param uav_op UAV_STORE/ UAV_LOAD/ UAV_ATOMIC depending on opcode
10159 * \param resourceType resource file type
10160 * \param writemask resource writemask
10161 */
10162
10163 static void
emit_uav_register(struct svga_shader_emitter_v10 * emit,unsigned res_index,UAV_OP uav_op,enum tgsi_file_type resourceType,unsigned writemask)10164 emit_uav_register(struct svga_shader_emitter_v10 *emit,
10165 unsigned res_index, UAV_OP uav_op,
10166 enum tgsi_file_type resourceType, unsigned writemask)
10167 {
10168 VGPU10OperandToken0 operand0;
10169 unsigned uav_index = INVALID_INDEX;
10170
10171 operand0.value = 0;
10172 operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
10173 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
10174 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
10175
10176 switch (resourceType) {
10177 case TGSI_FILE_IMAGE:
10178 uav_index = emit->key.images[res_index].uav_index;
10179 break;
10180 case TGSI_FILE_BUFFER:
10181 uav_index = emit->key.shader_buf_uav_index[res_index];
10182 break;
10183 case TGSI_FILE_HW_ATOMIC:
10184 uav_index = emit->key.atomic_buf_uav_index[res_index];
10185 break;
10186 default:
10187 assert(0);
10188 }
10189
10190 switch (uav_op) {
10191 case UAV_ATOMIC:
10192 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
10193 break;
10194
10195 case UAV_STORE:
10196 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
10197 operand0.mask = writemask;
10198 break;
10199
10200 case UAV_LOAD:
10201 case UAV_RESQ:
10202 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
10203 operand0.swizzleX = VGPU10_COMPONENT_X;
10204 operand0.swizzleY = VGPU10_COMPONENT_Y;
10205 operand0.swizzleZ = VGPU10_COMPONENT_Z;
10206 operand0.swizzleW = VGPU10_COMPONENT_W;
10207 break;
10208
10209 default:
10210 break;
10211 }
10212
10213 emit_dword(emit, operand0.value);
10214 emit_dword(emit, uav_index);
10215 }
10216
10217
10218 /**
10219 * A helper function to emit the uav address.
10220 * For memory, buffer, and image resource, it is set to the specified address.
10221 * For HW atomic counter, the address is the sum of the address offset and the
10222 * offset into the HW atomic buffer as specified by the register index.
10223 * It is also possible to specify the counter index as an indirect address.
10224 * And in this case, the uav address will be the sum of the address offset and the
10225 * counter index specified in the indirect address.
10226 */
10227 static
10228 struct tgsi_full_src_register
emit_uav_addr_offset(struct svga_shader_emitter_v10 * emit,enum tgsi_file_type resourceType,unsigned resourceIndex,unsigned resourceIndirect,unsigned resourceIndirectIndex,const struct tgsi_full_src_register * addr_reg)10229 emit_uav_addr_offset(struct svga_shader_emitter_v10 *emit,
10230 enum tgsi_file_type resourceType,
10231 unsigned resourceIndex,
10232 unsigned resourceIndirect,
10233 unsigned resourceIndirectIndex,
10234 const struct tgsi_full_src_register *addr_reg)
10235 {
10236 unsigned addr_tmp;
10237 struct tgsi_full_dst_register addr_dst;
10238 struct tgsi_full_src_register addr_src;
10239 struct tgsi_full_src_register two = make_immediate_reg_int(emit, 2);
10240 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
10241
10242 addr_tmp = get_temp_index(emit);
10243 addr_dst = make_dst_temp_reg(addr_tmp);
10244 addr_src = make_src_temp_reg(addr_tmp);
10245
10246 /* specified address offset */
10247 if (addr_reg)
10248 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, addr_reg);
10249 else
10250 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, &zero);
10251
10252 /* For HW atomic counter, we need to find the index to the
10253 * HW atomic buffer.
10254 */
10255 if (resourceType == TGSI_FILE_HW_ATOMIC) {
10256 if (resourceIndirect) {
10257
10258 /**
10259 * uav addr offset = counter layout offset +
10260 * counter indirect index address + address offset
10261 */
10262
10263 /* counter layout offset */
10264 struct tgsi_full_src_register layout_offset;
10265 layout_offset =
10266 make_immediate_reg_int(emit, resourceIndex);
10267
10268 /* counter layout offset + address offset */
10269 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
10270 &addr_src, &layout_offset);
10271
10272 /* counter indirect index address */
10273 unsigned indirect_addr =
10274 emit->address_reg_index[resourceIndirectIndex];
10275
10276 struct tgsi_full_src_register indirect_addr_src =
10277 make_src_temp_reg(indirect_addr);
10278
10279 indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10280
10281 /* counter layout offset + address offset + counter indirect address */
10282 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
10283 &addr_src, &indirect_addr_src);
10284
10285 } else {
10286 struct tgsi_full_src_register index_src;
10287
10288 index_src = make_immediate_reg_int(emit, resourceIndex);
10289
10290 /* uav addr offset = counter index address + address offset */
10291 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &addr_dst,
10292 &addr_src, &index_src);
10293 }
10294
10295 /* HW atomic buffer is declared as raw buffer, so the buffer address is
10296 * the byte offset, so we need to multiple the counter addr offset by 4.
10297 */
10298 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &addr_dst,
10299 &addr_src, &two);
10300 }
10301 else if (resourceType == TGSI_FILE_IMAGE) {
10302 if ((emit->key.images[resourceIndex].resource_target == PIPE_TEXTURE_3D)
10303 && emit->key.images[resourceIndex].is_single_layer) {
10304
10305 struct tgsi_full_dst_register addr_dst_z =
10306 writemask_dst(&addr_dst, TGSI_WRITEMASK_Z);
10307
10308 /* For non-layered 3D texture image view, we have to make sure the z
10309 * component of the address offset is set to 0.
10310 */
10311 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst_z,
10312 &zero);
10313 }
10314 }
10315
10316 return addr_src;
10317 }
10318
10319
10320
10321 /**
10322 * A helper function to expand indirect indexing to uav resource
10323 * by looping through the resource array, compare the indirect index and
10324 * emit the instruction for each resource in the array.
10325 */
10326 static void
loop_instruction(unsigned index,unsigned count,struct tgsi_full_src_register * addr_index,void (* fb)(struct svga_shader_emitter_v10 *,const struct tgsi_full_instruction *,unsigned),struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10327 loop_instruction(unsigned index, unsigned count,
10328 struct tgsi_full_src_register *addr_index,
10329 void (*fb)(struct svga_shader_emitter_v10 *,
10330 const struct tgsi_full_instruction *, unsigned),
10331 struct svga_shader_emitter_v10 *emit,
10332 const struct tgsi_full_instruction *inst)
10333 {
10334 if (count == 0)
10335 return;
10336
10337 if (index > 0) {
10338 /* ELSE */
10339 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
10340 }
10341
10342 struct tgsi_full_src_register index_src =
10343 make_immediate_reg_int(emit, index);
10344
10345 unsigned tmp_index = get_temp_index(emit);
10346 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
10347 struct tgsi_full_src_register tmp_src_x =
10348 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10349 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
10350
10351 /* IEQ tmp, addr_tmp_index, index */
10352 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, &tmp_dst,
10353 addr_index, &index_src);
10354
10355 /* IF tmp */
10356 emit_if(emit, &tmp_src_x);
10357
10358 free_temp_indexes(emit);
10359
10360 (*fb)(emit, inst, index);
10361
10362 loop_instruction(index+1, count-1, addr_index, fb, emit, inst);
10363
10364 /* ENDIF */
10365 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
10366 }
10367
10368
10369 /**
10370 * A helper function to emit the load instruction.
10371 */
10372 static void
emit_load_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10373 emit_load_instruction(struct svga_shader_emitter_v10 *emit,
10374 const struct tgsi_full_instruction *inst,
10375 unsigned resourceIndex)
10376 {
10377 VGPU10OpcodeToken0 token0;
10378 struct tgsi_full_src_register addr_src;
10379 enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10380
10381 /* Resolve the resource address for this resource first */
10382 addr_src = emit_uav_addr_offset(emit, resourceType, resourceIndex,
10383 inst->Src[0].Register.Indirect,
10384 inst->Src[0].Indirect.Index,
10385 &inst->Src[1]);
10386
10387 /* LOAD resource, address, src */
10388 begin_emit_instruction(emit);
10389
10390 token0.value = 0;
10391
10392 if (resourceType == TGSI_FILE_MEMORY ||
10393 resourceType == TGSI_FILE_BUFFER ||
10394 resourceType == TGSI_FILE_HW_ATOMIC) {
10395 token0.opcodeType = VGPU10_OPCODE_LD_RAW;
10396 addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10397 }
10398 else {
10399 token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
10400 }
10401
10402 token0.saturate = inst->Instruction.Saturate,
10403 emit_dword(emit, token0.value);
10404
10405 emit_dst_register(emit, &inst->Dst[0]);
10406 emit_src_register(emit, &addr_src);
10407
10408 if (resourceType == TGSI_FILE_MEMORY) {
10409 emit_memory_register(emit, MEM_LOAD, inst, 0, 0);
10410 } else if (resourceType == TGSI_FILE_HW_ATOMIC) {
10411 emit_uav_register(emit, inst->Src[0].Dimension.Index,
10412 UAV_LOAD, inst->Src[0].Register.File, 0);
10413 } else if (resourceType == TGSI_FILE_BUFFER) {
10414 if (emit->raw_shaderbufs & (1 << resourceIndex))
10415 emit_resource_register(emit, resourceIndex +
10416 emit->raw_shaderbuf_srv_start_index);
10417 else
10418 emit_uav_register(emit, resourceIndex,
10419 UAV_LOAD, inst->Src[0].Register.File, 0);
10420 } else {
10421 emit_uav_register(emit, resourceIndex,
10422 UAV_LOAD, inst->Src[0].Register.File, 0);
10423 }
10424
10425 end_emit_instruction(emit);
10426
10427 free_temp_indexes(emit);
10428 }
10429
10430
10431 /**
10432 * Emit uav / memory load instruction
10433 */
10434 static bool
emit_load(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10435 emit_load(struct svga_shader_emitter_v10 *emit,
10436 const struct tgsi_full_instruction *inst)
10437 {
10438 enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10439 unsigned resourceIndex = inst->Src[0].Register.Index;
10440
10441 /* If the resource register has indirect index, we will need
10442 * to expand it since SM5 device does not support indirect indexing
10443 * for uav.
10444 */
10445 if (inst->Src[0].Register.Indirect &&
10446 (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10447
10448 unsigned indirect_index = inst->Src[0].Indirect.Index;
10449 unsigned num_resources =
10450 resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10451 emit->num_images;
10452
10453 /* indirect index tmp register */
10454 unsigned indirect_addr = emit->address_reg_index[indirect_index];
10455 struct tgsi_full_src_register indirect_addr_src =
10456 make_src_temp_reg(indirect_addr);
10457 indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10458
10459 /* Add offset to the indirect index */
10460 if (inst->Src[0].Register.Index != 0) {
10461 struct tgsi_full_src_register offset =
10462 make_immediate_reg_int(emit, inst->Src[0].Register.Index);
10463 struct tgsi_full_dst_register indirect_addr_dst =
10464 make_dst_temp_reg(indirect_addr);
10465 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
10466 &indirect_addr_src, &offset);
10467 }
10468
10469 /* Loop through the resource array to find which resource to use.
10470 */
10471 loop_instruction(0, num_resources, &indirect_addr_src,
10472 emit_load_instruction, emit, inst);
10473 }
10474 else {
10475 emit_load_instruction(emit, inst, resourceIndex);
10476 }
10477
10478 free_temp_indexes(emit);
10479
10480 return true;
10481 }
10482
10483
10484 /**
10485 * A helper function to emit a store instruction.
10486 */
10487 static void
emit_store_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10488 emit_store_instruction(struct svga_shader_emitter_v10 *emit,
10489 const struct tgsi_full_instruction *inst,
10490 unsigned resourceIndex)
10491 {
10492 VGPU10OpcodeToken0 token0;
10493 enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
10494 unsigned writemask = inst->Dst[0].Register.WriteMask;
10495 struct tgsi_full_src_register addr_src;
10496
10497 unsigned tmp_index = get_temp_index(emit);
10498 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
10499 struct tgsi_full_dst_register tmp_dst_xyzw = make_dst_temp_reg(tmp_index);
10500 struct tgsi_full_dst_register tmp_dst;
10501
10502 struct tgsi_full_src_register src = inst->Src[1];
10503 struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
10504
10505 bool needLoad = false;
10506 bool needPerComponentStore = false;
10507 unsigned swizzles = 0;
10508
10509 /* Resolve the resource address for this resource first */
10510 addr_src = emit_uav_addr_offset(emit, resourceType,
10511 inst->Dst[0].Register.Index,
10512 inst->Dst[0].Register.Indirect,
10513 inst->Dst[0].Indirect.Index,
10514 &inst->Src[0]);
10515
10516 /* First check the writemask to see if it can be supported
10517 * by the store instruction.
10518 * store_raw only allows .x, .xy, .xyz, .xyzw. For the typeless memory,
10519 * we can adjust the address offset, and do a per-component store.
10520 * store_uav_typed only allows .xyzw. In this case, we need to
10521 * do a load first, update the temporary and then issue the
10522 * store. This does have a small risk that if different threads
10523 * update different components of the same address, data might not be
10524 * in sync.
10525 */
10526 if (resourceType == TGSI_FILE_IMAGE) {
10527 needLoad = (writemask == TGSI_WRITEMASK_XYZW) ? false : true;
10528 }
10529 else if (resourceType == TGSI_FILE_BUFFER ||
10530 resourceType == TGSI_FILE_MEMORY) {
10531 if (!(writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_XY ||
10532 writemask == TGSI_WRITEMASK_XYZ ||
10533 writemask == TGSI_WRITEMASK_XYZW)) {
10534 needPerComponentStore = true;
10535 }
10536 }
10537
10538 if (needLoad) {
10539 assert(resourceType == TGSI_FILE_IMAGE);
10540
10541 /* LOAD resource, address, src */
10542 begin_emit_instruction(emit);
10543
10544 token0.value = 0;
10545 token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
10546 token0.saturate = inst->Instruction.Saturate,
10547 emit_dword(emit, token0.value);
10548
10549 emit_dst_register(emit, &tmp_dst_xyzw);
10550 emit_src_register(emit, &addr_src);
10551 emit_uav_register(emit, resourceIndex, UAV_LOAD, resourceType, 0);
10552
10553 end_emit_instruction(emit);
10554
10555 /* MOV tmp(writemask) src */
10556 tmp_dst = writemask_dst(&tmp_dst_xyzw, writemask);
10557 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, &inst->Src[1]);
10558
10559 /* Now set the writemask to xyzw for the store_uav_typed instruction */
10560 writemask = TGSI_WRITEMASK_XYZW;
10561 }
10562 else if (needPerComponentStore) {
10563 /* Save the src swizzles */
10564 swizzles = src.Register.SwizzleX |
10565 src.Register.SwizzleY << 2 |
10566 src.Register.SwizzleZ << 4 |
10567 src.Register.SwizzleW << 6;
10568 }
10569
10570 bool storeDone = false;
10571 unsigned perComponentWritemask = writemask;
10572 unsigned shift = 0;
10573 struct tgsi_full_src_register shift_src;
10574
10575 while (!storeDone) {
10576
10577 if (needPerComponentStore) {
10578 assert(perComponentWritemask);
10579 while (!(perComponentWritemask & TGSI_WRITEMASK_X)) {
10580 shift++;
10581 perComponentWritemask >>= 1;
10582 }
10583
10584 /* First adjust the addr_src to the next component */
10585 if (shift != 0) {
10586 struct tgsi_full_dst_register addr_dst =
10587 make_dst_temp_reg(addr_src.Register.Index);
10588 shift_src = make_immediate_reg_int(emit, shift);
10589 emit_instruction_op3(emit, VGPU10_OPCODE_UMAD, &addr_dst, &four,
10590 &shift_src, &addr_src);
10591
10592 /* Adjust the src swizzle as well */
10593 swizzles >>= (shift * 2);
10594 }
10595
10596 /* Now the address offset is set to the next component,
10597 * we can set the writemask to .x and make sure to set
10598 * the src swizzle as well.
10599 */
10600 src.Register.SwizzleX = swizzles & 0x3;
10601 writemask = TGSI_WRITEMASK_X;
10602
10603 /* Shift for the next component check */
10604 perComponentWritemask >>= 1;
10605 shift = 1;
10606 }
10607
10608 /* STORE resource, address, src */
10609 begin_emit_instruction(emit);
10610
10611 token0.value = 0;
10612 token0.saturate = inst->Instruction.Saturate;
10613
10614 if (resourceType == TGSI_FILE_MEMORY) {
10615 token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
10616 addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10617 emit_dword(emit, token0.value);
10618 emit_memory_register(emit, MEM_STORE, inst, 0, writemask);
10619 }
10620 else if (resourceType == TGSI_FILE_BUFFER ||
10621 resourceType == TGSI_FILE_HW_ATOMIC) {
10622 token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
10623 addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10624 emit_dword(emit, token0.value);
10625 emit_uav_register(emit, resourceIndex, UAV_STORE,
10626 resourceType, writemask);
10627 }
10628 else {
10629 token0.opcodeType = VGPU10_OPCODE_STORE_UAV_TYPED;
10630 emit_dword(emit, token0.value);
10631 emit_uav_register(emit, resourceIndex, UAV_STORE,
10632 resourceType, writemask);
10633 }
10634
10635 emit_src_register(emit, &addr_src);
10636
10637 if (needLoad)
10638 emit_src_register(emit, &tmp_src);
10639 else
10640 emit_src_register(emit, &src);
10641
10642 end_emit_instruction(emit);
10643
10644 if (!needPerComponentStore || !perComponentWritemask)
10645 storeDone = true;
10646 }
10647
10648 free_temp_indexes(emit);
10649 }
10650
10651
10652 /**
10653 * Emit uav / memory store instruction
10654 */
10655 static bool
emit_store(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10656 emit_store(struct svga_shader_emitter_v10 *emit,
10657 const struct tgsi_full_instruction *inst)
10658 {
10659 enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
10660 unsigned resourceIndex = inst->Dst[0].Register.Index;
10661
10662 /* If the resource register has indirect index, we will need
10663 * to expand it since SM5 device does not support indirect indexing
10664 * for uav.
10665 */
10666 if (inst->Dst[0].Register.Indirect &&
10667 (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10668
10669 unsigned indirect_index = inst->Dst[0].Indirect.Index;
10670 unsigned num_resources =
10671 resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10672 emit->num_images;
10673
10674 /* Indirect index tmp register */
10675 unsigned indirect_addr = emit->address_reg_index[indirect_index];
10676 struct tgsi_full_src_register indirect_addr_src =
10677 make_src_temp_reg(indirect_addr);
10678 indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10679
10680 /* Add offset to the indirect index */
10681 if (inst->Dst[0].Register.Index != 0) {
10682 struct tgsi_full_src_register offset =
10683 make_immediate_reg_int(emit, inst->Dst[0].Register.Index);
10684 struct tgsi_full_dst_register indirect_addr_dst =
10685 make_dst_temp_reg(indirect_addr);
10686 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
10687 &indirect_addr_src, &offset);
10688 }
10689
10690 /* Loop through the resource array to find which resource to use.
10691 */
10692 loop_instruction(0, num_resources, &indirect_addr_src,
10693 emit_store_instruction, emit, inst);
10694 }
10695 else {
10696 emit_store_instruction(emit, inst, resourceIndex);
10697 }
10698
10699 free_temp_indexes(emit);
10700
10701 return true;
10702 }
10703
10704
10705 /**
10706 * A helper function to emit an atomic instruction.
10707 */
10708
10709 static void
emit_atomic_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10710 emit_atomic_instruction(struct svga_shader_emitter_v10 *emit,
10711 const struct tgsi_full_instruction *inst,
10712 unsigned resourceIndex)
10713 {
10714 VGPU10OpcodeToken0 token0;
10715 enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10716 struct tgsi_full_src_register addr_src;
10717 VGPU10_OPCODE_TYPE opcode = emit->cur_atomic_opcode;
10718 const struct tgsi_full_src_register *offset;
10719
10720 /* ntt does not specify offset for HWATOMIC. So just set offset to NULL. */
10721 offset = resourceType == TGSI_FILE_HW_ATOMIC ? NULL : &inst->Src[1];
10722
10723 /* Resolve the resource address */
10724 addr_src = emit_uav_addr_offset(emit, resourceType,
10725 inst->Src[0].Register.Index,
10726 inst->Src[0].Register.Indirect,
10727 inst->Src[0].Indirect.Index,
10728 offset);
10729
10730 /* Emit the atomic operation */
10731 begin_emit_instruction(emit);
10732
10733 token0.value = 0;
10734 token0.opcodeType = opcode;
10735 token0.saturate = inst->Instruction.Saturate,
10736 emit_dword(emit, token0.value);
10737
10738 emit_dst_register(emit, &inst->Dst[0]);
10739
10740 if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
10741 emit_memory_register(emit, MEM_ATOMIC_COUNTER, inst, 0, 0);
10742 } else if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) {
10743 assert(inst->Src[0].Register.Dimension == 1);
10744 emit_uav_register(emit, inst->Src[0].Dimension.Index,
10745 UAV_ATOMIC, inst->Src[0].Register.File, 0);
10746 } else {
10747 emit_uav_register(emit, resourceIndex,
10748 UAV_ATOMIC, inst->Src[0].Register.File, 0);
10749 }
10750
10751 /* resource address offset */
10752 emit_src_register(emit, &addr_src);
10753
10754 struct tgsi_full_src_register src0_x =
10755 swizzle_src(&inst->Src[2], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
10756 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
10757 emit_src_register(emit, &src0_x);
10758
10759 if (opcode == VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH) {
10760 struct tgsi_full_src_register src1_x =
10761 swizzle_src(&inst->Src[3], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
10762 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
10763
10764 emit_src_register(emit, &src1_x);
10765 }
10766
10767 end_emit_instruction(emit);
10768
10769 free_temp_indexes(emit);
10770 }
10771
10772
10773 /**
10774 * Emit atomic instruction
10775 */
10776 static bool
emit_atomic(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_OPCODE_TYPE opcode)10777 emit_atomic(struct svga_shader_emitter_v10 *emit,
10778 const struct tgsi_full_instruction *inst,
10779 VGPU10_OPCODE_TYPE opcode)
10780 {
10781 enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10782 unsigned resourceIndex = inst->Src[0].Register.Index;
10783
10784 emit->cur_atomic_opcode = opcode;
10785
10786 /* If the resource register has indirect index, we will need
10787 * to expand it since SM5 device does not support indirect indexing
10788 * for uav.
10789 */
10790 if (inst->Dst[0].Register.Indirect &&
10791 (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10792
10793 unsigned indirect_index = inst->Dst[0].Indirect.Index;
10794 unsigned num_resources =
10795 resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10796 emit->num_images;
10797
10798 /* indirect index tmp register */
10799 unsigned indirect_addr = emit->address_reg_index[indirect_index];
10800 struct tgsi_full_src_register indirect_addr_src =
10801 make_src_temp_reg(indirect_addr);
10802 indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10803
10804 /* Loop through the resource array to find which resource to use.
10805 */
10806 loop_instruction(0, num_resources, &indirect_addr_src,
10807 emit_atomic_instruction, emit, inst);
10808 }
10809 else {
10810 emit_atomic_instruction(emit, inst, resourceIndex);
10811 }
10812
10813 free_temp_indexes(emit);
10814
10815 return true;
10816 }
10817
10818
10819 /**
10820 * Emit barrier instruction
10821 */
10822 static bool
emit_barrier(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10823 emit_barrier(struct svga_shader_emitter_v10 *emit,
10824 const struct tgsi_full_instruction *inst)
10825 {
10826 VGPU10OpcodeToken0 token0;
10827
10828 assert(emit->version >= 50);
10829
10830 token0.value = 0;
10831 token0.opcodeType = VGPU10_OPCODE_SYNC;
10832
10833 if (emit->unit == PIPE_SHADER_TESS_CTRL && emit->version == 50) {
10834 /* SM5 device doesn't support BARRIER in tcs . If barrier is used
10835 * in shader, don't do anything for this opcode and continue rest
10836 * of shader translation
10837 */
10838 util_debug_message(&emit->svga_debug_callback, INFO,
10839 "barrier instruction is not supported in tessellation control shader\n");
10840 return true;
10841 }
10842 else if (emit->unit == PIPE_SHADER_COMPUTE) {
10843 if (emit->cs.shared_memory_declared)
10844 token0.syncThreadGroupShared = 1;
10845
10846 if (emit->uav_declared)
10847 token0.syncUAVMemoryGroup = 1;
10848
10849 token0.syncThreadsInGroup = 1;
10850 } else {
10851 token0.syncUAVMemoryGlobal = 1;
10852 }
10853
10854 assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
10855 token0.syncThreadGroupShared);
10856
10857 begin_emit_instruction(emit);
10858 emit_dword(emit, token0.value);
10859 end_emit_instruction(emit);
10860
10861 return true;
10862 }
10863
10864 /**
10865 * Emit memory barrier instruction
10866 */
10867 static bool
emit_memory_barrier(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10868 emit_memory_barrier(struct svga_shader_emitter_v10 *emit,
10869 const struct tgsi_full_instruction *inst)
10870 {
10871 unsigned index = inst->Src[0].Register.Index;
10872 unsigned swizzle = inst->Src[0].Register.SwizzleX;
10873 unsigned bartype = emit->immediates[index][swizzle].Int;
10874 VGPU10OpcodeToken0 token0;
10875
10876 token0.value = 0;
10877 token0.opcodeType = VGPU10_OPCODE_SYNC;
10878
10879 if (emit->unit == PIPE_SHADER_COMPUTE) {
10880
10881 /* For compute shader, issue sync opcode with different options
10882 * depending on the memory barrier type.
10883 *
10884 * Bit 0: Shader storage buffers
10885 * Bit 1: Atomic buffers
10886 * Bit 2: Images
10887 * Bit 3: Shared memory
10888 * Bit 4: Thread group
10889 */
10890
10891 if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
10892 TGSI_MEMBAR_SHADER_IMAGE))
10893 token0.syncUAVMemoryGlobal = 1;
10894 else if (bartype & TGSI_MEMBAR_THREAD_GROUP)
10895 token0.syncUAVMemoryGroup = 1;
10896
10897 if (bartype & TGSI_MEMBAR_SHARED)
10898 token0.syncThreadGroupShared = 1;
10899 }
10900 else {
10901 /**
10902 * For graphics stages, only sync_uglobal is available.
10903 */
10904 if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
10905 TGSI_MEMBAR_SHADER_IMAGE))
10906 token0.syncUAVMemoryGlobal = 1;
10907 }
10908
10909 assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
10910 token0.syncThreadGroupShared);
10911
10912 begin_emit_instruction(emit);
10913 emit_dword(emit, token0.value);
10914 end_emit_instruction(emit);
10915
10916 return true;
10917 }
10918
10919
10920 /**
10921 * Emit code for TGSI_OPCODE_RESQ (image size) instruction.
10922 */
10923 static bool
emit_resq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10924 emit_resq(struct svga_shader_emitter_v10 *emit,
10925 const struct tgsi_full_instruction *inst)
10926 {
10927 struct tgsi_full_src_register zero =
10928 make_immediate_reg_int(emit, 0);
10929
10930 unsigned uav_resource = emit->image[inst->Src[0].Register.Index].Resource;
10931
10932 if (uav_resource == TGSI_TEXTURE_CUBE_ARRAY) {
10933 struct tgsi_full_src_register image_src;
10934
10935 image_src = make_src_const_reg(emit->image_size_index + inst->Src[0].Register.Index);
10936
10937 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &image_src);
10938 return true;
10939 }
10940
10941 begin_emit_instruction(emit);
10942 if (uav_resource == TGSI_TEXTURE_BUFFER) {
10943 emit_opcode(emit, VGPU10_OPCODE_BUFINFO, false);
10944 emit_dst_register(emit, &inst->Dst[0]);
10945 }
10946 else {
10947 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
10948 emit_dst_register(emit, &inst->Dst[0]);
10949 emit_src_register(emit, &zero);
10950 }
10951 emit_uav_register(emit, inst->Src[0].Register.Index,
10952 UAV_RESQ, inst->Src[0].Register.File, 0);
10953 end_emit_instruction(emit);
10954
10955 return true;
10956 }
10957
10958
10959 static bool
emit_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)10960 emit_instruction(struct svga_shader_emitter_v10 *emit,
10961 unsigned inst_number,
10962 const struct tgsi_full_instruction *inst)
10963 {
10964 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10965
10966 switch (opcode) {
10967 case TGSI_OPCODE_ADD:
10968 case TGSI_OPCODE_AND:
10969 case TGSI_OPCODE_BGNLOOP:
10970 case TGSI_OPCODE_BRK:
10971 case TGSI_OPCODE_CEIL:
10972 case TGSI_OPCODE_CONT:
10973 case TGSI_OPCODE_DDX:
10974 case TGSI_OPCODE_DDY:
10975 case TGSI_OPCODE_DIV:
10976 case TGSI_OPCODE_DP2:
10977 case TGSI_OPCODE_DP3:
10978 case TGSI_OPCODE_DP4:
10979 case TGSI_OPCODE_ELSE:
10980 case TGSI_OPCODE_ENDIF:
10981 case TGSI_OPCODE_ENDLOOP:
10982 case TGSI_OPCODE_ENDSUB:
10983 case TGSI_OPCODE_F2I:
10984 case TGSI_OPCODE_F2U:
10985 case TGSI_OPCODE_FLR:
10986 case TGSI_OPCODE_FRC:
10987 case TGSI_OPCODE_FSEQ:
10988 case TGSI_OPCODE_FSGE:
10989 case TGSI_OPCODE_FSLT:
10990 case TGSI_OPCODE_FSNE:
10991 case TGSI_OPCODE_I2F:
10992 case TGSI_OPCODE_IMAX:
10993 case TGSI_OPCODE_IMIN:
10994 case TGSI_OPCODE_INEG:
10995 case TGSI_OPCODE_ISGE:
10996 case TGSI_OPCODE_ISHR:
10997 case TGSI_OPCODE_ISLT:
10998 case TGSI_OPCODE_MAD:
10999 case TGSI_OPCODE_MAX:
11000 case TGSI_OPCODE_MIN:
11001 case TGSI_OPCODE_MUL:
11002 case TGSI_OPCODE_NOP:
11003 case TGSI_OPCODE_NOT:
11004 case TGSI_OPCODE_OR:
11005 case TGSI_OPCODE_UADD:
11006 case TGSI_OPCODE_USEQ:
11007 case TGSI_OPCODE_USGE:
11008 case TGSI_OPCODE_USLT:
11009 case TGSI_OPCODE_UMIN:
11010 case TGSI_OPCODE_UMAD:
11011 case TGSI_OPCODE_UMAX:
11012 case TGSI_OPCODE_ROUND:
11013 case TGSI_OPCODE_SQRT:
11014 case TGSI_OPCODE_SHL:
11015 case TGSI_OPCODE_TRUNC:
11016 case TGSI_OPCODE_U2F:
11017 case TGSI_OPCODE_UCMP:
11018 case TGSI_OPCODE_USHR:
11019 case TGSI_OPCODE_USNE:
11020 case TGSI_OPCODE_XOR:
11021 /* Begin SM5 opcodes */
11022 case TGSI_OPCODE_F2D:
11023 case TGSI_OPCODE_D2F:
11024 case TGSI_OPCODE_DADD:
11025 case TGSI_OPCODE_DMUL:
11026 case TGSI_OPCODE_DMAX:
11027 case TGSI_OPCODE_DMIN:
11028 case TGSI_OPCODE_DSGE:
11029 case TGSI_OPCODE_DSLT:
11030 case TGSI_OPCODE_DSEQ:
11031 case TGSI_OPCODE_DSNE:
11032 case TGSI_OPCODE_BREV:
11033 case TGSI_OPCODE_POPC:
11034 case TGSI_OPCODE_LSB:
11035 case TGSI_OPCODE_INTERP_CENTROID:
11036 case TGSI_OPCODE_INTERP_SAMPLE:
11037 /* simple instructions */
11038 return emit_simple(emit, inst);
11039 case TGSI_OPCODE_RET:
11040 if (emit->unit == PIPE_SHADER_TESS_CTRL &&
11041 !emit->tcs.control_point_phase) {
11042
11043 /* store the tessellation levels in the patch constant phase only */
11044 store_tesslevels(emit);
11045 }
11046 return emit_simple(emit, inst);
11047
11048 case TGSI_OPCODE_IMSB:
11049 case TGSI_OPCODE_UMSB:
11050 return emit_msb(emit, inst);
11051 case TGSI_OPCODE_IBFE:
11052 case TGSI_OPCODE_UBFE:
11053 return emit_bfe(emit, inst);
11054 case TGSI_OPCODE_BFI:
11055 return emit_bfi(emit, inst);
11056 case TGSI_OPCODE_MOV:
11057 return emit_mov(emit, inst);
11058 case TGSI_OPCODE_EMIT:
11059 return emit_vertex(emit, inst);
11060 case TGSI_OPCODE_ENDPRIM:
11061 return emit_endprim(emit, inst);
11062 case TGSI_OPCODE_IABS:
11063 return emit_iabs(emit, inst);
11064 case TGSI_OPCODE_ARL:
11065 FALLTHROUGH;
11066 case TGSI_OPCODE_UARL:
11067 return emit_arl_uarl(emit, inst);
11068 case TGSI_OPCODE_BGNSUB:
11069 /* no-op */
11070 return true;
11071 case TGSI_OPCODE_CAL:
11072 return emit_cal(emit, inst);
11073 case TGSI_OPCODE_CMP:
11074 return emit_cmp(emit, inst);
11075 case TGSI_OPCODE_COS:
11076 return emit_sincos(emit, inst);
11077 case TGSI_OPCODE_DST:
11078 return emit_dst(emit, inst);
11079 case TGSI_OPCODE_EX2:
11080 return emit_ex2(emit, inst);
11081 case TGSI_OPCODE_EXP:
11082 return emit_exp(emit, inst);
11083 case TGSI_OPCODE_IF:
11084 return emit_if(emit, &inst->Src[0]);
11085 case TGSI_OPCODE_KILL:
11086 return emit_discard(emit, inst);
11087 case TGSI_OPCODE_KILL_IF:
11088 return emit_cond_discard(emit, inst);
11089 case TGSI_OPCODE_LG2:
11090 return emit_lg2(emit, inst);
11091 case TGSI_OPCODE_LIT:
11092 return emit_lit(emit, inst);
11093 case TGSI_OPCODE_LODQ:
11094 return emit_lodq(emit, inst);
11095 case TGSI_OPCODE_LOG:
11096 return emit_log(emit, inst);
11097 case TGSI_OPCODE_LRP:
11098 return emit_lrp(emit, inst);
11099 case TGSI_OPCODE_POW:
11100 return emit_pow(emit, inst);
11101 case TGSI_OPCODE_RCP:
11102 return emit_rcp(emit, inst);
11103 case TGSI_OPCODE_RSQ:
11104 return emit_rsq(emit, inst);
11105 case TGSI_OPCODE_SAMPLE:
11106 return emit_sample(emit, inst);
11107 case TGSI_OPCODE_SEQ:
11108 return emit_seq(emit, inst);
11109 case TGSI_OPCODE_SGE:
11110 return emit_sge(emit, inst);
11111 case TGSI_OPCODE_SGT:
11112 return emit_sgt(emit, inst);
11113 case TGSI_OPCODE_SIN:
11114 return emit_sincos(emit, inst);
11115 case TGSI_OPCODE_SLE:
11116 return emit_sle(emit, inst);
11117 case TGSI_OPCODE_SLT:
11118 return emit_slt(emit, inst);
11119 case TGSI_OPCODE_SNE:
11120 return emit_sne(emit, inst);
11121 case TGSI_OPCODE_SSG:
11122 return emit_ssg(emit, inst);
11123 case TGSI_OPCODE_ISSG:
11124 return emit_issg(emit, inst);
11125 case TGSI_OPCODE_TEX:
11126 return emit_tex(emit, inst);
11127 case TGSI_OPCODE_TG4:
11128 return emit_tg4(emit, inst);
11129 case TGSI_OPCODE_TEX2:
11130 return emit_tex2(emit, inst);
11131 case TGSI_OPCODE_TXP:
11132 return emit_txp(emit, inst);
11133 case TGSI_OPCODE_TXB:
11134 case TGSI_OPCODE_TXB2:
11135 case TGSI_OPCODE_TXL:
11136 return emit_txl_txb(emit, inst);
11137 case TGSI_OPCODE_TXD:
11138 return emit_txd(emit, inst);
11139 case TGSI_OPCODE_TXF:
11140 return emit_txf(emit, inst);
11141 case TGSI_OPCODE_TXL2:
11142 return emit_txl2(emit, inst);
11143 case TGSI_OPCODE_TXQ:
11144 return emit_txq(emit, inst);
11145 case TGSI_OPCODE_UIF:
11146 return emit_if(emit, &inst->Src[0]);
11147 case TGSI_OPCODE_UMUL_HI:
11148 case TGSI_OPCODE_IMUL_HI:
11149 case TGSI_OPCODE_UDIV:
11150 /* These cases use only the FIRST of two destination registers */
11151 return emit_simple_1dst(emit, inst, 2, 0);
11152 case TGSI_OPCODE_IDIV:
11153 return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
11154 case TGSI_OPCODE_UMUL:
11155 case TGSI_OPCODE_UMOD:
11156 case TGSI_OPCODE_MOD:
11157 /* These cases use only the SECOND of two destination registers */
11158 return emit_simple_1dst(emit, inst, 2, 1);
11159
11160 /* Begin SM5 opcodes */
11161 case TGSI_OPCODE_DABS:
11162 return emit_dabs(emit, inst);
11163 case TGSI_OPCODE_DNEG:
11164 return emit_dneg(emit, inst);
11165 case TGSI_OPCODE_DRCP:
11166 return emit_simple(emit, inst);
11167 case TGSI_OPCODE_DSQRT:
11168 return emit_dsqrt(emit, inst);
11169 case TGSI_OPCODE_DMAD:
11170 return emit_dmad(emit, inst);
11171 case TGSI_OPCODE_DFRAC:
11172 return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
11173 case TGSI_OPCODE_D2I:
11174 case TGSI_OPCODE_D2U:
11175 return emit_simple(emit, inst);
11176 case TGSI_OPCODE_I2D:
11177 case TGSI_OPCODE_U2D:
11178 return emit_simple(emit, inst);
11179 case TGSI_OPCODE_DRSQ:
11180 return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
11181 case TGSI_OPCODE_DDIV:
11182 return emit_simple(emit, inst);
11183 case TGSI_OPCODE_INTERP_OFFSET:
11184 return emit_interp_offset(emit, inst);
11185 case TGSI_OPCODE_FMA:
11186 case TGSI_OPCODE_DFMA:
11187 return emit_simple(emit, inst);
11188
11189 case TGSI_OPCODE_DTRUNC:
11190 return emit_dtrunc(emit, inst);
11191
11192 /* The following opcodes should never be seen here. We return zero
11193 * for PIPE_CAP_TGSI_DROUND_SUPPORTED.
11194 */
11195 case TGSI_OPCODE_LDEXP:
11196 case TGSI_OPCODE_DSSG:
11197 case TGSI_OPCODE_DLDEXP:
11198 case TGSI_OPCODE_DCEIL:
11199 case TGSI_OPCODE_DFLR:
11200 debug_printf("Unexpected TGSI opcode %s. "
11201 "Should have been translated away by the GLSL compiler.\n",
11202 tgsi_get_opcode_name(opcode));
11203 return false;
11204
11205 case TGSI_OPCODE_LOAD:
11206 return emit_load(emit, inst);
11207
11208 case TGSI_OPCODE_STORE:
11209 return emit_store(emit, inst);
11210
11211 case TGSI_OPCODE_ATOMAND:
11212 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_AND);
11213
11214 case TGSI_OPCODE_ATOMCAS:
11215 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
11216
11217 case TGSI_OPCODE_ATOMIMAX:
11218 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMAX);
11219
11220 case TGSI_OPCODE_ATOMIMIN:
11221 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMIN);
11222
11223 case TGSI_OPCODE_ATOMOR:
11224 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_OR);
11225
11226 case TGSI_OPCODE_ATOMUADD:
11227 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IADD);
11228
11229 case TGSI_OPCODE_ATOMUMAX:
11230 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMAX);
11231
11232 case TGSI_OPCODE_ATOMUMIN:
11233 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMIN);
11234
11235 case TGSI_OPCODE_ATOMXCHG:
11236 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_EXCH);
11237
11238 case TGSI_OPCODE_ATOMXOR:
11239 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_XOR);
11240
11241 case TGSI_OPCODE_BARRIER:
11242 return emit_barrier(emit, inst);
11243
11244 case TGSI_OPCODE_MEMBAR:
11245 return emit_memory_barrier(emit, inst);
11246
11247 case TGSI_OPCODE_RESQ:
11248 return emit_resq(emit, inst);
11249
11250 case TGSI_OPCODE_END:
11251 if (!emit_post_helpers(emit))
11252 return false;
11253 return emit_simple(emit, inst);
11254
11255 default:
11256 debug_printf("Unimplemented tgsi instruction %s\n",
11257 tgsi_get_opcode_name(opcode));
11258 return false;
11259 }
11260
11261 return true;
11262 }
11263
11264
11265 /**
11266 * Translate a single TGSI instruction to VGPU10.
11267 */
11268 static bool
emit_vgpu10_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)11269 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
11270 unsigned inst_number,
11271 const struct tgsi_full_instruction *inst)
11272 {
11273 if (emit->skip_instruction)
11274 return true;
11275
11276 bool ret = true;
11277 unsigned start_token = emit_get_num_tokens(emit);
11278
11279 emit->reemit_tgsi_instruction = false;
11280
11281 ret = emit_instruction(emit, inst_number, inst);
11282
11283 if (emit->reemit_tgsi_instruction) {
11284 /**
11285 * Reset emit->ptr to where the translation of this tgsi instruction
11286 * started.
11287 */
11288 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
11289 emit->ptr = (char *) (tokens + start_token);
11290
11291 emit->reemit_tgsi_instruction = false;
11292 }
11293 return ret;
11294 }
11295
11296
11297 /**
11298 * Emit the extra instructions to adjust the vertex position.
11299 * There are two possible adjustments:
11300 * 1. Converting from Gallium to VGPU10 coordinate space by applying the
11301 * "prescale" and "pretranslate" values.
11302 * 2. Undoing the viewport transformation when we use the swtnl/draw path.
11303 * \param vs_pos_tmp_index which temporary register contains the vertex pos.
11304 */
11305 static void
emit_vpos_instructions(struct svga_shader_emitter_v10 * emit)11306 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
11307 {
11308 struct tgsi_full_src_register tmp_pos_src;
11309 struct tgsi_full_dst_register pos_dst;
11310 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
11311
11312 /* Don't bother to emit any extra vertex instructions if vertex position is
11313 * not written out
11314 */
11315 if (emit->vposition.out_index == INVALID_INDEX)
11316 return;
11317
11318 /**
11319 * Reset the temporary vertex position register index
11320 * so that emit_dst_register() will use the real vertex position output
11321 */
11322 emit->vposition.tmp_index = INVALID_INDEX;
11323
11324 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
11325 pos_dst = make_dst_output_reg(emit->vposition.out_index);
11326
11327 /* If non-adjusted vertex position register index
11328 * is valid, copy the vertex position from the temporary
11329 * vertex position register before it is modified by the
11330 * prescale computation.
11331 */
11332 if (emit->vposition.so_index != INVALID_INDEX) {
11333 struct tgsi_full_dst_register pos_so_dst =
11334 make_dst_output_reg(emit->vposition.so_index);
11335
11336 /* MOV pos_so, tmp_pos */
11337 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
11338 }
11339
11340 if (emit->vposition.need_prescale) {
11341 /* This code adjusts the vertex position to match the VGPU10 convention.
11342 * If p is the position computed by the shader (usually by applying the
11343 * modelview and projection matrices), the new position q is computed by:
11344 *
11345 * q.x = p.w * trans.x + p.x * scale.x
11346 * q.y = p.w * trans.y + p.y * scale.y
11347 * q.z = p.w * trans.z + p.z * scale.z;
11348 * q.w = p.w * trans.w + p.w;
11349 */
11350 struct tgsi_full_src_register tmp_pos_src_w =
11351 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
11352 struct tgsi_full_dst_register tmp_pos_dst =
11353 make_dst_temp_reg(vs_pos_tmp_index);
11354 struct tgsi_full_dst_register tmp_pos_dst_xyz =
11355 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
11356
11357 struct tgsi_full_src_register prescale_scale =
11358 make_src_temp_reg(emit->vposition.prescale_scale_index);
11359 struct tgsi_full_src_register prescale_trans =
11360 make_src_temp_reg(emit->vposition.prescale_trans_index);
11361
11362 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
11363 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
11364 &tmp_pos_src, &prescale_scale);
11365
11366 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
11367 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
11368 &prescale_trans, &tmp_pos_src);
11369 }
11370 else if (emit->key.vs.undo_viewport) {
11371 /* This code computes the final vertex position from the temporary
11372 * vertex position by undoing the viewport transformation and the
11373 * divide-by-W operation (we convert window coords back to clip coords).
11374 * This is needed when we use the 'draw' module for fallbacks.
11375 * If p is the temp pos in window coords, then the NDC coord q is:
11376 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
11377 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
11378 * q.z = p.z * p.w
11379 * q.w = p.w
11380 * CONST[vs_viewport_index] contains:
11381 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
11382 */
11383 struct tgsi_full_dst_register tmp_pos_dst =
11384 make_dst_temp_reg(vs_pos_tmp_index);
11385 struct tgsi_full_dst_register tmp_pos_dst_xy =
11386 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
11387 struct tgsi_full_src_register tmp_pos_src_wwww =
11388 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
11389
11390 struct tgsi_full_dst_register pos_dst_xyz =
11391 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
11392 struct tgsi_full_dst_register pos_dst_w =
11393 writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
11394
11395 struct tgsi_full_src_register vp_xyzw =
11396 make_src_const_reg(emit->vs.viewport_index);
11397 struct tgsi_full_src_register vp_zwww =
11398 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
11399 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
11400
11401 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
11402 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
11403 &tmp_pos_src, &vp_zwww);
11404
11405 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
11406 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
11407 &tmp_pos_src, &vp_xyzw);
11408
11409 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
11410 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
11411 &tmp_pos_src, &tmp_pos_src_wwww);
11412
11413 /* MOV pos.w, tmp_pos.w */
11414 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
11415 }
11416 else if (vs_pos_tmp_index != INVALID_INDEX) {
11417 /* This code is to handle the case where the temporary vertex
11418 * position register is created when the vertex shader has stream
11419 * output and prescale is disabled because rasterization is to be
11420 * discarded.
11421 */
11422 struct tgsi_full_dst_register pos_dst =
11423 make_dst_output_reg(emit->vposition.out_index);
11424
11425 /* MOV pos, tmp_pos */
11426 begin_emit_instruction(emit);
11427 emit_opcode(emit, VGPU10_OPCODE_MOV, false);
11428 emit_dst_register(emit, &pos_dst);
11429 emit_src_register(emit, &tmp_pos_src);
11430 end_emit_instruction(emit);
11431 }
11432
11433 /* Restore original vposition.tmp_index value for the next GS vertex.
11434 * It doesn't matter for VS.
11435 */
11436 emit->vposition.tmp_index = vs_pos_tmp_index;
11437 }
11438
11439 static void
emit_clipping_instructions(struct svga_shader_emitter_v10 * emit)11440 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
11441 {
11442 if (emit->clip_mode == CLIP_DISTANCE) {
11443 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
11444 emit_clip_distance_instructions(emit);
11445
11446 } else if (emit->clip_mode == CLIP_VERTEX &&
11447 emit->key.last_vertex_stage) {
11448 /* Convert TGSI CLIPVERTEX to CLIPDIST */
11449 emit_clip_vertex_instructions(emit);
11450 }
11451
11452 /**
11453 * Emit vertex position and take care of legacy user planes only if
11454 * there is a valid vertex position register index.
11455 * This is to take care of the case
11456 * where the shader doesn't output vertex position. Then in
11457 * this case, don't bother to emit more vertex instructions.
11458 */
11459 if (emit->vposition.out_index == INVALID_INDEX)
11460 return;
11461
11462 /**
11463 * Emit per-vertex clipping instructions for legacy user defined clip planes.
11464 * NOTE: we must emit the clip distance instructions before the
11465 * emit_vpos_instructions() call since the later function will change
11466 * the TEMP[vs_pos_tmp_index] value.
11467 */
11468 if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
11469 /* Emit CLIPDIST for legacy user defined clip planes */
11470 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
11471 }
11472 }
11473
11474
11475 /**
11476 * Emit extra per-vertex instructions. This includes clip-coordinate
11477 * space conversion and computing clip distances. This is called for
11478 * each GS emit-vertex instruction and at the end of VS translation.
11479 */
11480 static void
emit_vertex_instructions(struct svga_shader_emitter_v10 * emit)11481 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
11482 {
11483 /* Emit clipping instructions based on clipping mode */
11484 emit_clipping_instructions(emit);
11485
11486 /* Emit vertex position instructions */
11487 emit_vpos_instructions(emit);
11488 }
11489
11490
11491 /**
11492 * Translate the TGSI_OPCODE_EMIT GS instruction.
11493 */
11494 static bool
emit_vertex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)11495 emit_vertex(struct svga_shader_emitter_v10 *emit,
11496 const struct tgsi_full_instruction *inst)
11497 {
11498 unsigned ret = true;
11499
11500 assert(emit->unit == PIPE_SHADER_GEOMETRY);
11501
11502 /**
11503 * Emit the viewport array index for the first vertex.
11504 */
11505 if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
11506 struct tgsi_full_dst_register viewport_index_out =
11507 make_dst_output_reg(emit->gs.viewport_index_out_index);
11508 struct tgsi_full_dst_register viewport_index_out_x =
11509 writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
11510 struct tgsi_full_src_register viewport_index_tmp =
11511 make_src_temp_reg(emit->gs.viewport_index_tmp_index);
11512
11513 /* Set the out index to INVALID_INDEX, so it will not
11514 * be assigned to a temp again in emit_dst_register, and
11515 * the viewport index will not be assigned again in the
11516 * subsequent vertices.
11517 */
11518 emit->gs.viewport_index_out_index = INVALID_INDEX;
11519 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11520 &viewport_index_out_x, &viewport_index_tmp);
11521 }
11522
11523 /**
11524 * Find the stream index associated with this emit vertex instruction.
11525 */
11526 assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
11527 unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
11528
11529 /**
11530 * According to the ARB_gpu_shader5 spec, the built-in geometry shader
11531 * outputs are always associated with vertex stream zero.
11532 * So emit the extra vertex instructions for position or clip distance
11533 * for stream zero only.
11534 */
11535 if (streamIndex == 0) {
11536 /**
11537 * Before emitting vertex instructions, emit the temporaries for
11538 * the prescale constants based on the viewport index if needed.
11539 */
11540 if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
11541 emit_temp_prescale_instructions(emit);
11542
11543 emit_vertex_instructions(emit);
11544 }
11545
11546 begin_emit_instruction(emit);
11547 if (emit->version >= 50) {
11548 if (emit->info.num_stream_output_components[streamIndex] == 0) {
11549 /**
11550 * If there is no output for this stream, discard this instruction.
11551 */
11552 emit->discard_instruction = true;
11553 }
11554 else {
11555 emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, false);
11556 emit_stream_register(emit, streamIndex);
11557 }
11558 }
11559 else {
11560 emit_opcode(emit, VGPU10_OPCODE_EMIT, false);
11561 }
11562 end_emit_instruction(emit);
11563
11564 return ret;
11565 }
11566
11567
11568 /**
11569 * Emit the extra code to convert from VGPU10's boolean front-face
11570 * register to TGSI's signed front-face register.
11571 *
11572 * TODO: Make temporary front-face register a scalar.
11573 */
11574 static void
emit_frontface_instructions(struct svga_shader_emitter_v10 * emit)11575 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
11576 {
11577 assert(emit->unit == PIPE_SHADER_FRAGMENT);
11578
11579 if (emit->fs.face_input_index != INVALID_INDEX) {
11580 /* convert vgpu10 boolean face register to gallium +/-1 value */
11581 struct tgsi_full_dst_register tmp_dst =
11582 make_dst_temp_reg(emit->fs.face_tmp_index);
11583 struct tgsi_full_src_register one =
11584 make_immediate_reg_float(emit, 1.0f);
11585 struct tgsi_full_src_register neg_one =
11586 make_immediate_reg_float(emit, -1.0f);
11587
11588 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
11589 begin_emit_instruction(emit);
11590 emit_opcode(emit, VGPU10_OPCODE_MOVC, false);
11591 emit_dst_register(emit, &tmp_dst);
11592 emit_face_register(emit);
11593 emit_src_register(emit, &one);
11594 emit_src_register(emit, &neg_one);
11595 end_emit_instruction(emit);
11596 }
11597 }
11598
11599
11600 /**
11601 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
11602 */
11603 static void
emit_fragcoord_instructions(struct svga_shader_emitter_v10 * emit)11604 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
11605 {
11606 assert(emit->unit == PIPE_SHADER_FRAGMENT);
11607
11608 if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
11609 struct tgsi_full_dst_register tmp_dst =
11610 make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
11611 struct tgsi_full_dst_register tmp_dst_xyz =
11612 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
11613 struct tgsi_full_dst_register tmp_dst_w =
11614 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
11615 struct tgsi_full_src_register one =
11616 make_immediate_reg_float(emit, 1.0f);
11617 struct tgsi_full_src_register fragcoord =
11618 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
11619
11620 /* save the input index */
11621 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
11622 /* set to invalid to prevent substitution in emit_src_register() */
11623 emit->fs.fragcoord_input_index = INVALID_INDEX;
11624
11625 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
11626 begin_emit_instruction(emit);
11627 emit_opcode(emit, VGPU10_OPCODE_MOV, false);
11628 emit_dst_register(emit, &tmp_dst_xyz);
11629 emit_src_register(emit, &fragcoord);
11630 end_emit_instruction(emit);
11631
11632 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
11633 begin_emit_instruction(emit);
11634 emit_opcode(emit, VGPU10_OPCODE_DIV, false);
11635 emit_dst_register(emit, &tmp_dst_w);
11636 emit_src_register(emit, &one);
11637 emit_src_register(emit, &fragcoord);
11638 end_emit_instruction(emit);
11639
11640 /* restore saved value */
11641 emit->fs.fragcoord_input_index = fragcoord_input_index;
11642 }
11643 }
11644
11645
11646 /**
11647 * Emit the extra code to get the current sample position value and
11648 * put it into a temp register.
11649 */
11650 static void
emit_sample_position_instructions(struct svga_shader_emitter_v10 * emit)11651 emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
11652 {
11653 assert(emit->unit == PIPE_SHADER_FRAGMENT);
11654
11655 if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
11656 assert(emit->version >= 41);
11657
11658 struct tgsi_full_dst_register tmp_dst =
11659 make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
11660 struct tgsi_full_src_register half =
11661 make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
11662
11663 struct tgsi_full_src_register tmp_src =
11664 make_src_temp_reg(emit->fs.sample_pos_tmp_index);
11665 struct tgsi_full_src_register sample_index_reg =
11666 make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
11667 emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
11668
11669 /* The first src register is a shader resource (if we want a
11670 * multisampled resource sample position) or the rasterizer register
11671 * (if we want the current sample position in the color buffer). We
11672 * want the later.
11673 */
11674
11675 /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
11676 begin_emit_instruction(emit);
11677 emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, false);
11678 emit_dst_register(emit, &tmp_dst);
11679 emit_rasterizer_register(emit);
11680 emit_src_register(emit, &sample_index_reg);
11681 end_emit_instruction(emit);
11682
11683 /* Convert from D3D coords to GL coords by adding 0.5 bias */
11684 /* ADD dst, dst, half */
11685 begin_emit_instruction(emit);
11686 emit_opcode(emit, VGPU10_OPCODE_ADD, false);
11687 emit_dst_register(emit, &tmp_dst);
11688 emit_src_register(emit, &tmp_src);
11689 emit_src_register(emit, &half);
11690 end_emit_instruction(emit);
11691 }
11692 }
11693
11694
11695 /**
11696 * Emit extra instructions to adjust VS inputs/attributes. This can
11697 * mean casting a vertex attribute from int to float or setting the
11698 * W component to 1, or both.
11699 */
11700 static void
emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 * emit)11701 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
11702 {
11703 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
11704 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
11705 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
11706 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
11707 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
11708 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
11709 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
11710
11711 unsigned adjust_mask = (save_w_1_mask |
11712 save_itof_mask |
11713 save_utof_mask |
11714 save_is_bgra_mask |
11715 save_puint_to_snorm_mask |
11716 save_puint_to_uscaled_mask |
11717 save_puint_to_sscaled_mask);
11718
11719 assert(emit->unit == PIPE_SHADER_VERTEX);
11720
11721 if (adjust_mask) {
11722 struct tgsi_full_src_register one =
11723 make_immediate_reg_float(emit, 1.0f);
11724
11725 struct tgsi_full_src_register one_int =
11726 make_immediate_reg_int(emit, 1);
11727
11728 /* We need to turn off these bitmasks while emitting the
11729 * instructions below, then restore them afterward.
11730 */
11731 emit->key.vs.adjust_attrib_w_1 = 0;
11732 emit->key.vs.adjust_attrib_itof = 0;
11733 emit->key.vs.adjust_attrib_utof = 0;
11734 emit->key.vs.attrib_is_bgra = 0;
11735 emit->key.vs.attrib_puint_to_snorm = 0;
11736 emit->key.vs.attrib_puint_to_uscaled = 0;
11737 emit->key.vs.attrib_puint_to_sscaled = 0;
11738
11739 while (adjust_mask) {
11740 unsigned index = u_bit_scan(&adjust_mask);
11741
11742 /* skip the instruction if this vertex attribute is not being used */
11743 if (emit->info.input_usage_mask[index] == 0)
11744 continue;
11745
11746 unsigned tmp = emit->vs.adjusted_input[index];
11747 struct tgsi_full_src_register input_src =
11748 make_src_reg(TGSI_FILE_INPUT, index);
11749
11750 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
11751 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
11752 struct tgsi_full_dst_register tmp_dst_w =
11753 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
11754
11755 /* ITOF/UTOF/MOV tmp, input[index] */
11756 if (save_itof_mask & (1 << index)) {
11757 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
11758 &tmp_dst, &input_src);
11759 }
11760 else if (save_utof_mask & (1 << index)) {
11761 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
11762 &tmp_dst, &input_src);
11763 }
11764 else if (save_puint_to_snorm_mask & (1 << index)) {
11765 emit_puint_to_snorm(emit, &tmp_dst, &input_src);
11766 }
11767 else if (save_puint_to_uscaled_mask & (1 << index)) {
11768 emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
11769 }
11770 else if (save_puint_to_sscaled_mask & (1 << index)) {
11771 emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
11772 }
11773 else {
11774 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
11775 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11776 &tmp_dst, &input_src);
11777 }
11778
11779 if (save_is_bgra_mask & (1 << index)) {
11780 emit_swap_r_b(emit, &tmp_dst, &tmp_src);
11781 }
11782
11783 if (save_w_1_mask & (1 << index)) {
11784 /* MOV tmp.w, 1.0 */
11785 if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
11786 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11787 &tmp_dst_w, &one_int);
11788 }
11789 else {
11790 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11791 &tmp_dst_w, &one);
11792 }
11793 }
11794 }
11795
11796 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
11797 emit->key.vs.adjust_attrib_itof = save_itof_mask;
11798 emit->key.vs.adjust_attrib_utof = save_utof_mask;
11799 emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
11800 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
11801 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
11802 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
11803 }
11804 }
11805
11806
11807 /* Find zero-value immedate for default layer index */
11808 static void
emit_default_layer_instructions(struct svga_shader_emitter_v10 * emit)11809 emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
11810 {
11811 assert(emit->unit == PIPE_SHADER_FRAGMENT);
11812
11813 /* immediate for default layer index 0 */
11814 if (emit->fs.layer_input_index != INVALID_INDEX) {
11815 union tgsi_immediate_data imm;
11816 imm.Int = 0;
11817 emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
11818 }
11819 }
11820
11821
11822 static void
emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned cbuf_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate)11823 emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
11824 unsigned cbuf_index,
11825 struct tgsi_full_dst_register *scale,
11826 struct tgsi_full_dst_register *translate)
11827 {
11828 struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
11829 struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
11830
11831 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
11832 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
11833 }
11834
11835
11836 /**
11837 * A recursive helper function to find the prescale from the constant buffer
11838 */
11839 static void
find_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned num_prescale,struct tgsi_full_src_register * vp_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate,struct tgsi_full_src_register * tmp_src,struct tgsi_full_dst_register * tmp_dst)11840 find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
11841 unsigned index, unsigned num_prescale,
11842 struct tgsi_full_src_register *vp_index,
11843 struct tgsi_full_dst_register *scale,
11844 struct tgsi_full_dst_register *translate,
11845 struct tgsi_full_src_register *tmp_src,
11846 struct tgsi_full_dst_register *tmp_dst)
11847 {
11848 if (num_prescale == 0)
11849 return;
11850
11851 if (index > 0) {
11852 /* ELSE */
11853 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
11854 }
11855
11856 struct tgsi_full_src_register index_src =
11857 make_immediate_reg_int(emit, index);
11858
11859 if (index == 0) {
11860 /* GE tmp, vp_index, index */
11861 emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
11862 vp_index, &index_src);
11863 } else {
11864 /* EQ tmp, vp_index, index */
11865 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
11866 vp_index, &index_src);
11867 }
11868
11869 /* IF tmp */
11870 emit_if(emit, tmp_src);
11871 emit_temp_prescale_from_cbuf(emit,
11872 emit->vposition.prescale_cbuf_index + 2 * index,
11873 scale, translate);
11874
11875 find_prescale_from_cbuf(emit, index+1, num_prescale-1,
11876 vp_index, scale, translate,
11877 tmp_src, tmp_dst);
11878
11879 /* ENDIF */
11880 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
11881 }
11882
11883
11884 /**
11885 * This helper function emits instructions to set the prescale
11886 * and translate temporaries to the correct constants from the
11887 * constant buffer according to the designated viewport.
11888 */
11889 static void
emit_temp_prescale_instructions(struct svga_shader_emitter_v10 * emit)11890 emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
11891 {
11892 struct tgsi_full_dst_register prescale_scale =
11893 make_dst_temp_reg(emit->vposition.prescale_scale_index);
11894 struct tgsi_full_dst_register prescale_translate =
11895 make_dst_temp_reg(emit->vposition.prescale_trans_index);
11896
11897 unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
11898
11899 if (emit->vposition.num_prescale == 1) {
11900 emit_temp_prescale_from_cbuf(emit,
11901 prescale_cbuf_index,
11902 &prescale_scale, &prescale_translate);
11903 } else {
11904 /**
11905 * Since SM5 device does not support dynamic indexing, we need
11906 * to do the if-else to find the prescale constants for the
11907 * specified viewport.
11908 */
11909 struct tgsi_full_src_register vp_index_src =
11910 make_src_temp_reg(emit->gs.viewport_index_tmp_index);
11911
11912 struct tgsi_full_src_register vp_index_src_x =
11913 scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
11914
11915 unsigned tmp = get_temp_index(emit);
11916 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
11917 struct tgsi_full_src_register tmp_src_x =
11918 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
11919 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
11920
11921 find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
11922 &vp_index_src_x,
11923 &prescale_scale, &prescale_translate,
11924 &tmp_src_x, &tmp_dst);
11925 }
11926
11927 /* Mark prescale temporaries are emitted */
11928 emit->vposition.have_prescale = 1;
11929 }
11930
11931
11932 /**
11933 * A helper function to emit an instruction in a vertex shader to add a bias
11934 * to the VertexID system value. This patches the VertexID in the SVGA vertex
11935 * shader to include the base vertex of an indexed primitive or the start index
11936 * of a non-indexed primitive.
11937 */
11938 static void
emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 * emit)11939 emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
11940 {
11941 struct tgsi_full_src_register vertex_id_bias_index =
11942 make_src_const_reg(emit->vs.vertex_id_bias_index);
11943 struct tgsi_full_src_register vertex_id_sys_src =
11944 make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
11945 struct tgsi_full_src_register vertex_id_sys_src_x =
11946 scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
11947 struct tgsi_full_dst_register vertex_id_tmp_dst =
11948 make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
11949
11950 /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
11951 unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
11952 emit->vs.vertex_id_tmp_index = INVALID_INDEX;
11953 emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
11954 &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, false,
11955 false);
11956 emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
11957 }
11958
11959 /**
11960 * Hull Shader must have control point outputs. But tessellation
11961 * control shader can return without writing to control point output.
11962 * In this case, the control point output is assumed to be passthrough
11963 * from the control point input.
11964 * This helper function is to write out a control point output first in case
11965 * the tessellation control shader returns before writing a
11966 * control point output.
11967 */
11968 static void
emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 * emit)11969 emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
11970 {
11971 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
11972 assert(emit->tcs.control_point_phase);
11973 assert(emit->tcs.control_point_out_index != INVALID_INDEX);
11974 assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
11975
11976 struct tgsi_full_dst_register output_control_point;
11977 output_control_point =
11978 make_dst_output_reg(emit->tcs.control_point_out_index);
11979
11980 if (emit->tcs.control_point_input_index == INVALID_INDEX) {
11981 /* MOV OUTPUT 0.0f */
11982 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
11983 begin_emit_instruction(emit);
11984 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false);
11985 emit_dst_register(emit, &output_control_point);
11986 emit_src_register(emit, &zero);
11987 end_emit_instruction(emit);
11988 }
11989 else {
11990 /* UARL ADDR[INDEX].x INVOCATION.xxxx */
11991
11992 struct tgsi_full_src_register invocation_src;
11993 struct tgsi_full_dst_register addr_dst;
11994 struct tgsi_full_dst_register addr_dst_x;
11995 unsigned addr_tmp;
11996
11997 addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
11998 addr_dst = make_dst_temp_reg(addr_tmp);
11999 addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
12000
12001 invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
12002 emit->tcs.invocation_id_sys_index);
12003
12004 begin_emit_instruction(emit);
12005 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false);
12006 emit_dst_register(emit, &addr_dst_x);
12007 emit_src_register(emit, &invocation_src);
12008 end_emit_instruction(emit);
12009
12010
12011 /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
12012
12013 struct tgsi_full_src_register input_control_point;
12014 input_control_point = make_src_reg(TGSI_FILE_INPUT,
12015 emit->tcs.control_point_input_index);
12016 input_control_point.Register.Dimension = 1;
12017 input_control_point.Dimension.Indirect = 1;
12018 input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
12019 input_control_point.DimIndirect.Index =
12020 emit->tcs.control_point_addr_index;
12021
12022 begin_emit_instruction(emit);
12023 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false);
12024 emit_dst_register(emit, &output_control_point);
12025 emit_src_register(emit, &input_control_point);
12026 end_emit_instruction(emit);
12027 }
12028 }
12029
12030 /**
12031 * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
12032 * values in domain shader. SM5 has tessfactors as floating point values where
12033 * as tgsi emit them as vector. This function allows to construct temp
12034 * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
12035 * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
12036 * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
12037 */
12038 static void
emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 * emit)12039 emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
12040 {
12041 struct tgsi_full_src_register src;
12042 struct tgsi_full_dst_register dst;
12043
12044 if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
12045 dst = make_dst_temp_reg(emit->tes.inner.temp_index);
12046
12047 switch (emit->tes.prim_mode) {
12048 case MESA_PRIM_QUADS:
12049 src = make_src_scalar_reg(TGSI_FILE_INPUT,
12050 emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
12051 dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
12052 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12053 FALLTHROUGH;
12054 case MESA_PRIM_TRIANGLES:
12055 src = make_src_scalar_reg(TGSI_FILE_INPUT,
12056 emit->tes.inner.in_index, TGSI_SWIZZLE_X);
12057 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
12058 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12059 break;
12060 case MESA_PRIM_LINES:
12061 /**
12062 * As per SM5 spec, InsideTessFactor for isolines are unused.
12063 * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
12064 * any application try to read tessInnerLevel in TES when primitive type
12065 * is isolines, then instead of driver throwing segfault for accesing it,
12066 * return atleast vec(1.0f)
12067 */
12068 src = make_immediate_reg_float(emit, 1.0f);
12069 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12070 break;
12071 default:
12072 break;
12073 }
12074 }
12075
12076 if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
12077 dst = make_dst_temp_reg(emit->tes.outer.temp_index);
12078
12079 switch (emit->tes.prim_mode) {
12080 case MESA_PRIM_QUADS:
12081 src = make_src_scalar_reg(TGSI_FILE_INPUT,
12082 emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
12083 dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
12084 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12085 FALLTHROUGH;
12086 case MESA_PRIM_TRIANGLES:
12087 src = make_src_scalar_reg(TGSI_FILE_INPUT,
12088 emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
12089 dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
12090 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12091 FALLTHROUGH;
12092 case MESA_PRIM_LINES:
12093 src = make_src_scalar_reg(TGSI_FILE_INPUT,
12094 emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
12095 dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
12096 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12097
12098 src = make_src_scalar_reg(TGSI_FILE_INPUT,
12099 emit->tes.outer.in_index , TGSI_SWIZZLE_X);
12100 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
12101 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12102
12103 break;
12104 default:
12105 break;
12106 }
12107 }
12108 }
12109
12110
12111 static void
emit_initialize_temp_instruction(struct svga_shader_emitter_v10 * emit)12112 emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
12113 {
12114 struct tgsi_full_src_register src;
12115 struct tgsi_full_dst_register dst;
12116 unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
12117 emit->initialize_temp_index);
12118 src = make_immediate_reg_float(emit, 0.0f);
12119 dst = make_dst_temp_reg(vgpu10_temp_index);
12120 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12121 emit->temp_map[emit->initialize_temp_index].initialized = true;
12122 emit->initialize_temp_index = INVALID_INDEX;
12123 }
12124
12125
12126 /**
12127 * Emit any extra/helper declarations/code that we might need between
12128 * the declaration section and code section.
12129 */
12130 static bool
emit_pre_helpers(struct svga_shader_emitter_v10 * emit)12131 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
12132 {
12133 /* Properties */
12134 if (emit->unit == PIPE_SHADER_GEOMETRY)
12135 emit_property_instructions(emit);
12136 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12137 emit_hull_shader_declarations(emit);
12138
12139 /* Save the position of the first instruction token so that we can
12140 * do a second pass of the instructions for the patch constant phase.
12141 */
12142 emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
12143 emit->tcs.fork_phase_add_signature = false;
12144
12145 if (!emit_hull_shader_control_point_phase(emit)) {
12146 emit->skip_instruction = true;
12147 return true;
12148 }
12149
12150 /* Set the current tcs phase to control point phase */
12151 emit->tcs.control_point_phase = true;
12152 }
12153 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12154 emit_domain_shader_declarations(emit);
12155 }
12156 else if (emit->unit == PIPE_SHADER_COMPUTE) {
12157 emit_compute_shader_declarations(emit);
12158 }
12159
12160 /* Declare inputs */
12161 if (!emit_input_declarations(emit))
12162 return false;
12163
12164 /* Declare outputs */
12165 if (!emit_output_declarations(emit))
12166 return false;
12167
12168 /* Declare temporary registers */
12169 emit_temporaries_declaration(emit);
12170
12171 /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
12172 * will already be declared in hs_decls (emit_hull_shader_declarations)
12173 */
12174 if (emit->unit != PIPE_SHADER_TESS_CTRL) {
12175
12176 alloc_common_immediates(emit);
12177
12178 /* Declare constant registers */
12179 emit_constant_declaration(emit);
12180
12181 /* Declare samplers and resources */
12182 emit_sampler_declarations(emit);
12183 emit_resource_declarations(emit);
12184
12185 /* Declare images */
12186 emit_image_declarations(emit);
12187
12188 /* Declare shader buffers */
12189 emit_shader_buf_declarations(emit);
12190
12191 /* Declare atomic buffers */
12192 emit_atomic_buf_declarations(emit);
12193 }
12194
12195 if (emit->unit != PIPE_SHADER_FRAGMENT &&
12196 emit->unit != PIPE_SHADER_COMPUTE) {
12197 /*
12198 * Declare clip distance output registers for ClipVertex or
12199 * user defined planes
12200 */
12201 emit_clip_distance_declarations(emit);
12202 }
12203
12204 if (emit->unit == PIPE_SHADER_COMPUTE) {
12205 emit_memory_declarations(emit);
12206
12207 if (emit->cs.grid_size.tgsi_index != INVALID_INDEX) {
12208 emit->cs.grid_size.imm_index =
12209 alloc_immediate_int4(emit,
12210 emit->key.cs.grid_size[0],
12211 emit->key.cs.grid_size[1],
12212 emit->key.cs.grid_size[2], 0);
12213 }
12214 }
12215
12216 if (emit->unit == PIPE_SHADER_FRAGMENT &&
12217 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
12218 float alpha = emit->key.fs.alpha_ref;
12219 emit->fs.alpha_ref_index =
12220 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
12221 }
12222
12223 if (emit->unit != PIPE_SHADER_TESS_CTRL) {
12224 /**
12225 * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
12226 * hs_decls
12227 */
12228 emit_vgpu10_immediates_block(emit);
12229 }
12230 else {
12231 emit_tcs_default_control_point_output(emit);
12232 }
12233
12234 if (emit->unit == PIPE_SHADER_FRAGMENT) {
12235 emit_frontface_instructions(emit);
12236 emit_fragcoord_instructions(emit);
12237 emit_sample_position_instructions(emit);
12238 emit_default_layer_instructions(emit);
12239 }
12240 else if (emit->unit == PIPE_SHADER_VERTEX) {
12241 emit_vertex_attrib_instructions(emit);
12242
12243 if (emit->info.uses_vertexid)
12244 emit_vertex_id_nobase_instruction(emit);
12245 }
12246 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12247 emit_temp_tessfactor_instructions(emit);
12248 }
12249
12250 /**
12251 * For geometry shader that writes to viewport index, the prescale
12252 * temporaries will be done at the first vertex emission.
12253 */
12254 if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
12255 emit_temp_prescale_instructions(emit);
12256
12257 return true;
12258 }
12259
12260
12261 /**
12262 * The device has no direct support for the pipe_blend_state::alpha_to_one
12263 * option so we implement it here with shader code.
12264 *
12265 * Note that this is kind of pointless, actually. Here we're clobbering
12266 * the alpha value with 1.0. So if alpha-to-coverage is enabled, we'll wind
12267 * up with 100% coverage. That's almost certainly not what the user wants.
12268 * The work-around is to add extra shader code to compute coverage from alpha
12269 * and write it to the coverage output register (if the user's shader doesn't
12270 * do so already). We'll probably do that in the future.
12271 */
12272 static void
emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12273 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
12274 unsigned fs_color_tmp_index)
12275 {
12276 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
12277 unsigned i;
12278
12279 /* Note: it's not 100% clear from the spec if we're supposed to clobber
12280 * the alpha for all render targets. But that's what NVIDIA does and
12281 * that's what Piglit tests.
12282 */
12283 for (i = 0; i < emit->fs.num_color_outputs; i++) {
12284 struct tgsi_full_dst_register color_dst;
12285
12286 if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
12287 /* write to the temp color register */
12288 color_dst = make_dst_temp_reg(fs_color_tmp_index);
12289 }
12290 else {
12291 /* write directly to the color[i] output */
12292 color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
12293 }
12294
12295 color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
12296
12297 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
12298 }
12299 }
12300
12301
12302 /**
12303 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w
12304 * against the alpha reference value and discards the fragment if the
12305 * comparison fails.
12306 */
12307 static void
emit_alpha_test_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12308 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
12309 unsigned fs_color_tmp_index)
12310 {
12311 /* compare output color's alpha to alpha ref and discard if comparison
12312 * fails.
12313 */
12314 unsigned tmp = get_temp_index(emit);
12315 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
12316 struct tgsi_full_src_register tmp_src_x =
12317 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
12318 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
12319 struct tgsi_full_src_register color_src =
12320 make_src_temp_reg(fs_color_tmp_index);
12321 struct tgsi_full_src_register color_src_w =
12322 scalar_src(&color_src, TGSI_SWIZZLE_W);
12323 struct tgsi_full_src_register ref_src =
12324 make_src_immediate_reg(emit->fs.alpha_ref_index);
12325 struct tgsi_full_dst_register color_dst =
12326 make_dst_output_reg(emit->fs.color_out_index[0]);
12327
12328 assert(emit->unit == PIPE_SHADER_FRAGMENT);
12329
12330 /* dst = src0 'alpha_func' src1 */
12331 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
12332 &color_src_w, &ref_src);
12333
12334 /* DISCARD if dst.x == 0 */
12335 begin_emit_instruction(emit);
12336 emit_discard_opcode(emit, false); /* discard if src0.x is zero */
12337 emit_src_register(emit, &tmp_src_x);
12338 end_emit_instruction(emit);
12339
12340 /* If we don't need to broadcast the color below, emit the final color here.
12341 */
12342 if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
12343 /* MOV output.color, tempcolor */
12344 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
12345 }
12346
12347 free_temp_indexes(emit);
12348 }
12349
12350
12351 /**
12352 * Emit instructions for writing a single color output to multiple
12353 * color buffers.
12354 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
12355 * when key.fs.white_fragments is true).
12356 * property is set and the number of render targets is greater than one.
12357 * \param fs_color_tmp_index index of the temp register that holds the
12358 * color to broadcast.
12359 */
12360 static void
emit_broadcast_color_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12361 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
12362 unsigned fs_color_tmp_index)
12363 {
12364 const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
12365 unsigned i;
12366 struct tgsi_full_src_register color_src;
12367
12368 if (emit->key.fs.white_fragments) {
12369 /* set all color outputs to white */
12370 color_src = make_immediate_reg_float(emit, 1.0f);
12371 }
12372 else {
12373 /* set all color outputs to TEMP[fs_color_tmp_index] */
12374 assert(fs_color_tmp_index != INVALID_INDEX);
12375 color_src = make_src_temp_reg(fs_color_tmp_index);
12376 }
12377
12378 assert(emit->unit == PIPE_SHADER_FRAGMENT);
12379
12380 for (i = 0; i < n; i++) {
12381 unsigned output_reg = emit->fs.color_out_index[i];
12382 struct tgsi_full_dst_register color_dst =
12383 make_dst_output_reg(output_reg);
12384
12385 /* Fill in this semantic here since we'll use it later in
12386 * emit_dst_register().
12387 */
12388 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
12389
12390 /* MOV output.color[i], tempcolor */
12391 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
12392 }
12393 }
12394
12395
12396 /**
12397 * Emit extra helper code after the original shader code, but before the
12398 * last END/RET instruction.
12399 * For vertex shaders this means emitting the extra code to apply the
12400 * prescale scale/translation.
12401 */
12402 static bool
emit_post_helpers(struct svga_shader_emitter_v10 * emit)12403 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
12404 {
12405 if (emit->unit == PIPE_SHADER_VERTEX) {
12406 emit_vertex_instructions(emit);
12407 }
12408 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
12409 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
12410
12411 assert(!(emit->key.fs.white_fragments &&
12412 emit->key.fs.write_color0_to_n_cbufs == 0));
12413
12414 /* We no longer want emit_dst_register() to substitute the
12415 * temporary fragment color register for the real color output.
12416 */
12417 emit->fs.color_tmp_index = INVALID_INDEX;
12418
12419 if (emit->key.fs.alpha_to_one) {
12420 emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
12421 }
12422 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
12423 emit_alpha_test_instructions(emit, fs_color_tmp_index);
12424 }
12425 if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
12426 emit->key.fs.white_fragments) {
12427 emit_broadcast_color_instructions(emit, fs_color_tmp_index);
12428 }
12429 }
12430 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12431 if (!emit->tcs.control_point_phase) {
12432 /* store the tessellation levels in the patch constant phase only */
12433 store_tesslevels(emit);
12434 }
12435 else {
12436 emit_clipping_instructions(emit);
12437 }
12438 }
12439 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12440 emit_vertex_instructions(emit);
12441 }
12442
12443 return true;
12444 }
12445
12446
12447 /**
12448 * Reemit rawbuf instruction
12449 */
12450 static bool
emit_rawbuf_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)12451 emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
12452 unsigned inst_number,
12453 const struct tgsi_full_instruction *inst)
12454 {
12455 bool ret;
12456
12457 /* For all the rawbuf references in this instruction,
12458 * load the rawbuf reference and assign to the designated temporary.
12459 * Then reeemit the instruction.
12460 */
12461 emit->reemit_rawbuf_instruction = REEMIT_IN_PROGRESS;
12462
12463 unsigned offset_tmp = get_temp_index(emit);
12464 struct tgsi_full_dst_register offset_dst = make_dst_temp_reg(offset_tmp);
12465 struct tgsi_full_src_register offset_src = make_src_temp_reg(offset_tmp);
12466 struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
12467
12468 for (unsigned i = 0; i < emit->raw_buf_cur_tmp_index; i++) {
12469 struct tgsi_full_src_register element_src;
12470
12471 /* First get the element index register. */
12472
12473 if (emit->raw_buf_tmp[i].indirect) {
12474 unsigned tmp = get_temp_index(emit);
12475 struct tgsi_full_dst_register element_dst = make_dst_temp_reg(tmp);
12476 struct tgsi_full_src_register element_index =
12477 make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
12478 struct tgsi_full_src_register element_rel =
12479 make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_rel);
12480
12481 element_src = make_src_temp_reg(tmp);
12482 element_src = scalar_src(&element_src, TGSI_SWIZZLE_X);
12483 element_dst = writemask_dst(&element_dst, TGSI_WRITEMASK_X);
12484
12485 /* element index from the indirect register */
12486 element_index = make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
12487 element_index = scalar_src(&element_index, TGSI_SWIZZLE_X);
12488
12489 /* IADD element_src element_index element_index_relative */
12490 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &element_dst,
12491 &element_index, &element_rel);
12492 }
12493 else {
12494 unsigned element_index = emit->raw_buf_tmp[i].element_index;
12495 union tgsi_immediate_data imm;
12496 imm.Int = element_index;
12497 int immpos = find_immediate(emit, imm, 0);
12498 if (immpos < 0) {
12499 UNUSED unsigned element_index_imm =
12500 add_immediate_int(emit, element_index);
12501 }
12502 element_src = make_immediate_reg_int(emit, element_index);
12503 }
12504
12505 /* byte offset = element index << 4 */
12506 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &offset_dst,
12507 &element_src, &four);
12508
12509 struct tgsi_full_dst_register dst_tmp =
12510 make_dst_temp_reg(i + emit->raw_buf_tmp_index);
12511
12512 /* LD_RAW tmp, rawbuf byte offset, rawbuf */
12513
12514 begin_emit_instruction(emit);
12515 emit_opcode(emit, VGPU10_OPCODE_LD_RAW, false);
12516 emit_dst_register(emit, &dst_tmp);
12517
12518 struct tgsi_full_src_register offset_x =
12519 scalar_src(&offset_src, TGSI_SWIZZLE_X);
12520 emit_src_register(emit, &offset_x);
12521
12522 emit_resource_register(emit,
12523 emit->raw_buf_tmp[i].buffer_index + emit->raw_buf_srv_start_index);
12524 end_emit_instruction(emit);
12525 }
12526
12527 emit->raw_buf_cur_tmp_index = 0;
12528
12529 ret = emit_vgpu10_instruction(emit, inst_number, inst);
12530
12531 /* reset raw buf state */
12532 emit->raw_buf_cur_tmp_index = 0;
12533 emit->reemit_rawbuf_instruction = REEMIT_FALSE;
12534
12535 free_temp_indexes(emit);
12536
12537 return ret;
12538 }
12539
12540
12541 /**
12542 * Translate the TGSI tokens into VGPU10 tokens.
12543 */
12544 static bool
emit_vgpu10_instructions(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)12545 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
12546 const struct tgsi_token *tokens)
12547 {
12548 struct tgsi_parse_context parse;
12549 bool ret = true;
12550 bool pre_helpers_emitted = false;
12551 unsigned inst_number = 0;
12552
12553 tgsi_parse_init(&parse, tokens);
12554
12555 while (!tgsi_parse_end_of_tokens(&parse)) {
12556
12557 /* Save the current tgsi token starting position */
12558 emit->cur_tgsi_token = parse.Position;
12559
12560 tgsi_parse_token(&parse);
12561
12562 switch (parse.FullToken.Token.Type) {
12563 case TGSI_TOKEN_TYPE_IMMEDIATE:
12564 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
12565 if (!ret)
12566 goto done;
12567 break;
12568
12569 case TGSI_TOKEN_TYPE_DECLARATION:
12570 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
12571 if (!ret)
12572 goto done;
12573 break;
12574
12575 case TGSI_TOKEN_TYPE_INSTRUCTION:
12576 if (!pre_helpers_emitted) {
12577 ret = emit_pre_helpers(emit);
12578 if (!ret)
12579 goto done;
12580 pre_helpers_emitted = true;
12581 }
12582 ret = emit_vgpu10_instruction(emit, inst_number++,
12583 &parse.FullToken.FullInstruction);
12584
12585 /* Usually this applies to TCS only. If shader is reading control
12586 * point outputs in control point phase, we should reemit all
12587 * instructions which are writting into control point output in
12588 * control phase to store results into temporaries.
12589 */
12590 if (emit->reemit_instruction) {
12591 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
12592 ret = emit_vgpu10_instruction(emit, inst_number,
12593 &parse.FullToken.FullInstruction);
12594 }
12595 else if (emit->initialize_temp_index != INVALID_INDEX) {
12596 emit_initialize_temp_instruction(emit);
12597 emit->initialize_temp_index = INVALID_INDEX;
12598 ret = emit_vgpu10_instruction(emit, inst_number - 1,
12599 &parse.FullToken.FullInstruction);
12600 }
12601 else if (emit->reemit_rawbuf_instruction) {
12602 ret = emit_rawbuf_instruction(emit, inst_number - 1,
12603 &parse.FullToken.FullInstruction);
12604 }
12605
12606 if (!ret)
12607 goto done;
12608 break;
12609
12610 case TGSI_TOKEN_TYPE_PROPERTY:
12611 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
12612 if (!ret)
12613 goto done;
12614 break;
12615
12616 default:
12617 break;
12618 }
12619 }
12620
12621 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12622 ret = emit_hull_shader_patch_constant_phase(emit, &parse);
12623 }
12624
12625 done:
12626 tgsi_parse_free(&parse);
12627 return ret;
12628 }
12629
12630
12631 /**
12632 * Emit the first VGPU10 shader tokens.
12633 */
12634 static bool
emit_vgpu10_header(struct svga_shader_emitter_v10 * emit)12635 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
12636 {
12637 VGPU10ProgramToken ptoken;
12638
12639 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */
12640
12641 /* Maximum supported shader version is 50 */
12642 unsigned version = MIN2(emit->version, 50);
12643
12644 ptoken.value = 0; /* init whole token to zero */
12645 ptoken.majorVersion = version / 10;
12646 ptoken.minorVersion = version % 10;
12647 ptoken.programType = translate_shader_type(emit->unit);
12648 if (!emit_dword(emit, ptoken.value))
12649 return false;
12650
12651 /* Second token: total length of shader, in tokens. We can't fill this
12652 * in until we're all done. Emit zero for now.
12653 */
12654 if (!emit_dword(emit, 0))
12655 return false;
12656
12657 if (emit->version >= 50) {
12658 VGPU10OpcodeToken0 token;
12659
12660 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12661 /* For hull shader, we need to start the declarations phase first before
12662 * emitting any declarations including the global flags.
12663 */
12664 token.value = 0;
12665 token.opcodeType = VGPU10_OPCODE_HS_DECLS;
12666 begin_emit_instruction(emit);
12667 emit_dword(emit, token.value);
12668 end_emit_instruction(emit);
12669 }
12670
12671 /* Emit global flags */
12672 token.value = 0; /* init whole token to zero */
12673 token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12674 token.enableDoublePrecisionFloatOps = 1; /* set bit */
12675 token.instructionLength = 1;
12676 if (!emit_dword(emit, token.value))
12677 return false;
12678 }
12679
12680 if (emit->version >= 40) {
12681 VGPU10OpcodeToken0 token;
12682
12683 /* Reserved for global flag such as refactoringAllowed.
12684 * If the shader does not use the precise qualifier, we will set the
12685 * refactoringAllowed global flag; otherwise, we will leave the reserved
12686 * token to NOP.
12687 */
12688 emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
12689 token.value = 0;
12690 token.opcodeType = VGPU10_OPCODE_NOP;
12691 token.instructionLength = 1;
12692 if (!emit_dword(emit, token.value))
12693 return false;
12694 }
12695
12696 return true;
12697 }
12698
12699
12700 static bool
emit_vgpu10_tail(struct svga_shader_emitter_v10 * emit)12701 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
12702 {
12703 VGPU10ProgramToken *tokens;
12704
12705 /* Replace the second token with total shader length */
12706 tokens = (VGPU10ProgramToken *) emit->buf;
12707 tokens[1].value = emit_get_num_tokens(emit);
12708
12709 if (emit->version >= 40 && !emit->uses_precise_qualifier) {
12710 /* Replace the reserved token with the RefactoringAllowed global flag */
12711 VGPU10OpcodeToken0 *ptoken;
12712
12713 ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
12714 assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
12715 ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12716 ptoken->refactoringAllowed = 1;
12717 }
12718
12719 if (emit->version >= 50 && emit->fs.forceEarlyDepthStencil) {
12720 /* Replace the reserved token with the forceEarlyDepthStencil global flag */
12721 VGPU10OpcodeToken0 *ptoken;
12722
12723 ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
12724 ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12725 ptoken->forceEarlyDepthStencil = 1;
12726 }
12727
12728 return true;
12729 }
12730
12731
12732 /**
12733 * Modify the FS to read the BCOLORs and use the FACE register
12734 * to choose between the front/back colors.
12735 */
12736 static const struct tgsi_token *
transform_fs_twoside(const struct tgsi_token * tokens)12737 transform_fs_twoside(const struct tgsi_token *tokens)
12738 {
12739 if (0) {
12740 debug_printf("Before tgsi_add_two_side ------------------\n");
12741 tgsi_dump(tokens,0);
12742 }
12743 tokens = tgsi_add_two_side(tokens);
12744 if (0) {
12745 debug_printf("After tgsi_add_two_side ------------------\n");
12746 tgsi_dump(tokens, 0);
12747 }
12748 return tokens;
12749 }
12750
12751
12752 /**
12753 * Modify the FS to do polygon stipple.
12754 */
12755 static const struct tgsi_token *
transform_fs_pstipple(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)12756 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
12757 const struct tgsi_token *tokens)
12758 {
12759 const struct tgsi_token *new_tokens;
12760 unsigned unit;
12761
12762 if (0) {
12763 debug_printf("Before pstipple ------------------\n");
12764 tgsi_dump(tokens,0);
12765 }
12766
12767 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
12768 TGSI_FILE_INPUT);
12769
12770 emit->fs.pstipple_sampler_unit = unit;
12771
12772 /* The new sampler state is appended to the end of the samplers list */
12773 emit->fs.pstipple_sampler_state_index = emit->key.num_samplers++;
12774
12775 /* Setup texture state for stipple */
12776 emit->sampler_target[unit] = TGSI_TEXTURE_2D;
12777 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
12778 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
12779 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
12780 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
12781 emit->key.tex[unit].target = PIPE_TEXTURE_2D;
12782 emit->key.tex[unit].sampler_index = emit->fs.pstipple_sampler_state_index;
12783
12784 if (0) {
12785 debug_printf("After pstipple ------------------\n");
12786 tgsi_dump(new_tokens, 0);
12787 }
12788
12789 return new_tokens;
12790 }
12791
12792 /**
12793 * Modify the FS to support anti-aliasing point.
12794 */
12795 static const struct tgsi_token *
transform_fs_aapoint(struct svga_context * svga,const struct tgsi_token * tokens,int aa_coord_index)12796 transform_fs_aapoint(struct svga_context *svga,
12797 const struct tgsi_token *tokens,
12798 int aa_coord_index)
12799 {
12800 bool need_texcoord_semantic =
12801 svga->pipe.screen->get_param(svga->pipe.screen, PIPE_CAP_TGSI_TEXCOORD);
12802
12803 if (0) {
12804 debug_printf("Before tgsi_add_aa_point ------------------\n");
12805 tgsi_dump(tokens,0);
12806 }
12807 tokens = tgsi_add_aa_point(tokens, aa_coord_index, need_texcoord_semantic);
12808 if (0) {
12809 debug_printf("After tgsi_add_aa_point ------------------\n");
12810 tgsi_dump(tokens, 0);
12811 }
12812 return tokens;
12813 }
12814
12815
12816 /**
12817 * A helper function to determine the shader in the previous stage and
12818 * then call the linker function to determine the input mapping for this
12819 * shader to match the output indices from the shader in the previous stage.
12820 */
12821 static void
compute_input_mapping(struct svga_context * svga,struct svga_shader_emitter_v10 * emit,enum pipe_shader_type unit)12822 compute_input_mapping(struct svga_context *svga,
12823 struct svga_shader_emitter_v10 *emit,
12824 enum pipe_shader_type unit)
12825 {
12826 struct svga_shader *prevShader = NULL; /* shader in the previous stage */
12827
12828 if (unit == PIPE_SHADER_FRAGMENT) {
12829 prevShader = svga->curr.gs ?
12830 &svga->curr.gs->base : (svga->curr.tes ?
12831 &svga->curr.tes->base : &svga->curr.vs->base);
12832 } else if (unit == PIPE_SHADER_GEOMETRY) {
12833 prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
12834 } else if (unit == PIPE_SHADER_TESS_EVAL) {
12835 assert(svga->curr.tcs);
12836 prevShader = &svga->curr.tcs->base;
12837 } else if (unit == PIPE_SHADER_TESS_CTRL) {
12838 assert(svga->curr.vs);
12839 prevShader = &svga->curr.vs->base;
12840 }
12841
12842 if (prevShader != NULL) {
12843 svga_link_shaders(&prevShader->tgsi_info, &emit->info, &emit->linkage);
12844 emit->prevShaderInfo = &prevShader->tgsi_info;
12845 }
12846 else {
12847 /**
12848 * Since vertex shader does not need to go through the linker to
12849 * establish the input map, we need to make sure the highest index
12850 * of input registers is set properly here.
12851 */
12852 emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
12853 emit->info.file_max[TGSI_FILE_INPUT]);
12854 }
12855 }
12856
12857
12858 /**
12859 * Copies the shader signature info to the shader variant
12860 */
12861 static void
copy_shader_signature(struct svga_shader_signature * sgn,struct svga_shader_variant * variant)12862 copy_shader_signature(struct svga_shader_signature *sgn,
12863 struct svga_shader_variant *variant)
12864 {
12865 SVGA3dDXShaderSignatureHeader *header = &sgn->header;
12866
12867 /* Calculate the signature length */
12868 variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
12869 (header->numInputSignatures +
12870 header->numOutputSignatures +
12871 header->numPatchConstantSignatures) *
12872 sizeof(SVGA3dDXShaderSignatureEntry);
12873
12874 /* Allocate buffer for the signature info */
12875 variant->signature =
12876 (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
12877
12878 char *sgnBuf = (char *)variant->signature;
12879 unsigned sgnLen;
12880
12881 /* Copy the signature info to the shader variant structure */
12882 memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
12883 sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
12884
12885 if (header->numInputSignatures) {
12886 sgnLen =
12887 header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12888 memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
12889 sgnBuf += sgnLen;
12890 }
12891
12892 if (header->numOutputSignatures) {
12893 sgnLen =
12894 header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12895 memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
12896 sgnBuf += sgnLen;
12897 }
12898
12899 if (header->numPatchConstantSignatures) {
12900 sgnLen =
12901 header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12902 memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
12903 }
12904 }
12905
12906
12907 /**
12908 * This is the main entrypoint for the TGSI -> VPGU10 translator.
12909 */
12910 struct svga_shader_variant *
svga_tgsi_vgpu10_translate(struct svga_context * svga,const struct svga_shader * shader,const struct svga_compile_key * key,enum pipe_shader_type unit)12911 svga_tgsi_vgpu10_translate(struct svga_context *svga,
12912 const struct svga_shader *shader,
12913 const struct svga_compile_key *key,
12914 enum pipe_shader_type unit)
12915 {
12916 struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
12917 struct svga_shader_variant *variant = NULL;
12918 struct svga_shader_emitter_v10 *emit;
12919 const struct tgsi_token *tokens = shader->tokens;
12920
12921 (void) make_immediate_reg_double; /* unused at this time */
12922
12923 assert(unit == PIPE_SHADER_VERTEX ||
12924 unit == PIPE_SHADER_GEOMETRY ||
12925 unit == PIPE_SHADER_FRAGMENT ||
12926 unit == PIPE_SHADER_TESS_CTRL ||
12927 unit == PIPE_SHADER_TESS_EVAL ||
12928 unit == PIPE_SHADER_COMPUTE);
12929
12930 /* These two flags cannot be used together */
12931 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
12932
12933 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
12934 /*
12935 * Setup the code emitter
12936 */
12937 emit = alloc_emitter();
12938 if (!emit)
12939 goto done;
12940
12941 emit->unit = unit;
12942 if (svga_have_gl43(svga)) {
12943 emit->version = 51;
12944 } else if (svga_have_sm5(svga)) {
12945 emit->version = 50;
12946 } else if (svga_have_sm4_1(svga)) {
12947 emit->version = 41;
12948 } else {
12949 emit->version = 40;
12950 }
12951
12952 emit->use_sampler_state_mapping = emit->key.sampler_state_mapping;
12953
12954 emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
12955
12956 emit->key = *key;
12957
12958 emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
12959 emit->key.gs.need_prescale ||
12960 emit->key.tes.need_prescale);
12961
12962 /* Determine how many prescale factors in the constant buffer */
12963 emit->vposition.num_prescale = 1;
12964 if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
12965 assert(emit->unit == PIPE_SHADER_GEOMETRY);
12966 emit->vposition.num_prescale = emit->key.gs.num_prescale;
12967 }
12968
12969 emit->vposition.tmp_index = INVALID_INDEX;
12970 emit->vposition.so_index = INVALID_INDEX;
12971 emit->vposition.out_index = INVALID_INDEX;
12972
12973 emit->vs.vertex_id_sys_index = INVALID_INDEX;
12974 emit->vs.vertex_id_tmp_index = INVALID_INDEX;
12975 emit->vs.vertex_id_bias_index = INVALID_INDEX;
12976
12977 emit->fs.color_tmp_index = INVALID_INDEX;
12978 emit->fs.face_input_index = INVALID_INDEX;
12979 emit->fs.fragcoord_input_index = INVALID_INDEX;
12980 emit->fs.sample_id_sys_index = INVALID_INDEX;
12981 emit->fs.sample_pos_sys_index = INVALID_INDEX;
12982 emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
12983 emit->fs.layer_input_index = INVALID_INDEX;
12984 emit->fs.layer_imm_index = INVALID_INDEX;
12985
12986 emit->gs.prim_id_index = INVALID_INDEX;
12987 emit->gs.invocation_id_sys_index = INVALID_INDEX;
12988 emit->gs.viewport_index_out_index = INVALID_INDEX;
12989 emit->gs.viewport_index_tmp_index = INVALID_INDEX;
12990
12991 emit->tcs.vertices_per_patch_index = INVALID_INDEX;
12992 emit->tcs.invocation_id_sys_index = INVALID_INDEX;
12993 emit->tcs.control_point_input_index = INVALID_INDEX;
12994 emit->tcs.control_point_addr_index = INVALID_INDEX;
12995 emit->tcs.control_point_out_index = INVALID_INDEX;
12996 emit->tcs.control_point_tmp_index = INVALID_INDEX;
12997 emit->tcs.control_point_out_count = 0;
12998 emit->tcs.inner.out_index = INVALID_INDEX;
12999 emit->tcs.inner.temp_index = INVALID_INDEX;
13000 emit->tcs.inner.tgsi_index = INVALID_INDEX;
13001 emit->tcs.outer.out_index = INVALID_INDEX;
13002 emit->tcs.outer.temp_index = INVALID_INDEX;
13003 emit->tcs.outer.tgsi_index = INVALID_INDEX;
13004 emit->tcs.patch_generic_out_count = 0;
13005 emit->tcs.patch_generic_out_index = INVALID_INDEX;
13006 emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
13007 emit->tcs.prim_id_index = INVALID_INDEX;
13008
13009 emit->tes.tesscoord_sys_index = INVALID_INDEX;
13010 emit->tes.inner.in_index = INVALID_INDEX;
13011 emit->tes.inner.temp_index = INVALID_INDEX;
13012 emit->tes.inner.tgsi_index = INVALID_INDEX;
13013 emit->tes.outer.in_index = INVALID_INDEX;
13014 emit->tes.outer.temp_index = INVALID_INDEX;
13015 emit->tes.outer.tgsi_index = INVALID_INDEX;
13016 emit->tes.prim_id_index = INVALID_INDEX;
13017
13018 emit->cs.thread_id_index = INVALID_INDEX;
13019 emit->cs.block_id_index = INVALID_INDEX;
13020 emit->cs.grid_size.tgsi_index = INVALID_INDEX;
13021 emit->cs.grid_size.imm_index = INVALID_INDEX;
13022 emit->cs.block_width = 1;
13023 emit->cs.block_height = 1;
13024 emit->cs.block_depth = 1;
13025
13026 emit->clip_dist_out_index = INVALID_INDEX;
13027 emit->clip_dist_tmp_index = INVALID_INDEX;
13028 emit->clip_dist_so_index = INVALID_INDEX;
13029 emit->clip_vertex_out_index = INVALID_INDEX;
13030 emit->clip_vertex_tmp_index = INVALID_INDEX;
13031 emit->svga_debug_callback = svga->debug.callback;
13032
13033 emit->index_range.start_index = INVALID_INDEX;
13034 emit->index_range.count = 0;
13035 emit->index_range.required = false;
13036 emit->index_range.operandType = VGPU10_NUM_OPERANDS;
13037 emit->index_range.dim = 0;
13038 emit->index_range.size = 0;
13039
13040 emit->current_loop_depth = 0;
13041
13042 emit->initialize_temp_index = INVALID_INDEX;
13043 emit->image_size_index = INVALID_INDEX;
13044
13045 emit->max_vs_inputs = svgascreen->max_vs_inputs;
13046 emit->max_vs_outputs = svgascreen->max_vs_outputs;
13047 emit->max_gs_inputs = svgascreen->max_gs_inputs;
13048
13049 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
13050 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
13051 }
13052
13053 if (unit == PIPE_SHADER_FRAGMENT) {
13054 if (key->fs.light_twoside) {
13055 tokens = transform_fs_twoside(tokens);
13056 }
13057 if (key->fs.pstipple) {
13058 const struct tgsi_token *new_tokens =
13059 transform_fs_pstipple(emit, tokens);
13060 if (tokens != shader->tokens) {
13061 /* free the two-sided shader tokens */
13062 tgsi_free_tokens(tokens);
13063 }
13064 tokens = new_tokens;
13065 }
13066 if (key->fs.aa_point) {
13067 tokens = transform_fs_aapoint(svga, tokens,
13068 key->fs.aa_point_coord_index);
13069 }
13070 }
13071
13072 if (SVGA_DEBUG & DEBUG_TGSI) {
13073 debug_printf("#####################################\n");
13074 debug_printf("### TGSI Shader %u\n", shader->id);
13075 tgsi_dump(tokens, 0);
13076 }
13077
13078 /**
13079 * Rescan the header if the token string is different from the one
13080 * included in the shader; otherwise, the header info is already up-to-date
13081 */
13082 if (tokens != shader->tokens) {
13083 tgsi_scan_shader(tokens, &emit->info);
13084 } else {
13085 emit->info = shader->tgsi_info;
13086 }
13087
13088 emit->num_outputs = emit->info.num_outputs;
13089
13090 /**
13091 * Compute input mapping to match the outputs from shader
13092 * in the previous stage
13093 */
13094 compute_input_mapping(svga, emit, unit);
13095
13096 determine_clipping_mode(emit);
13097
13098 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
13099 unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
13100 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
13101 /* if there is stream output declarations associated
13102 * with this shader or the shader writes to ClipDistance
13103 * then reserve extra registers for the non-adjusted vertex position
13104 * and the ClipDistance shadow copy.
13105 */
13106 emit->vposition.so_index = emit->num_outputs++;
13107
13108 if (emit->clip_mode == CLIP_DISTANCE) {
13109 emit->clip_dist_so_index = emit->num_outputs++;
13110 if (emit->info.num_written_clipdistance > 4)
13111 emit->num_outputs++;
13112 }
13113 }
13114 }
13115
13116 /* Determine if constbuf to rawbuf translation is needed */
13117 emit->raw_buf_srv_start_index = emit->key.srv_raw_constbuf_index;
13118 if (emit->info.const_buffers_declared)
13119 emit->raw_bufs = emit->key.raw_constbufs;
13120
13121 emit->raw_shaderbuf_srv_start_index = emit->key.srv_raw_shaderbuf_index;
13122 if (emit->info.shader_buffers_declared)
13123 emit->raw_shaderbufs = emit->key.raw_shaderbufs;
13124
13125 /*
13126 * Do actual shader translation.
13127 */
13128 if (!emit_vgpu10_header(emit)) {
13129 debug_printf("svga: emit VGPU10 header failed\n");
13130 goto cleanup;
13131 }
13132
13133 if (!emit_vgpu10_instructions(emit, tokens)) {
13134 debug_printf("svga: emit VGPU10 instructions failed\n");
13135 goto cleanup;
13136 }
13137
13138 if (emit->num_new_immediates > 0) {
13139 reemit_immediates_block(emit);
13140 }
13141
13142 if (!emit_vgpu10_tail(emit)) {
13143 debug_printf("svga: emit VGPU10 tail failed\n");
13144 goto cleanup;
13145 }
13146
13147 if (emit->register_overflow) {
13148 goto cleanup;
13149 }
13150
13151 /*
13152 * Create, initialize the 'variant' object.
13153 */
13154 variant = svga_new_shader_variant(svga, unit);
13155 if (!variant)
13156 goto cleanup;
13157
13158 variant->shader = shader;
13159 variant->nr_tokens = emit_get_num_tokens(emit);
13160 variant->tokens = (const unsigned *)emit->buf;
13161
13162 /* Copy shader signature info to the shader variant */
13163 if (svga_have_sm5(svga)) {
13164 copy_shader_signature(&emit->signature, variant);
13165 }
13166
13167 emit->buf = NULL; /* buffer is no longer owed by emitter context */
13168 memcpy(&variant->key, key, sizeof(*key));
13169 variant->id = UTIL_BITMASK_INVALID_INDEX;
13170
13171 /* The extra constant starting offset starts with the number of
13172 * shader constants declared in the shader.
13173 */
13174 variant->extra_const_start = emit->num_shader_consts[0];
13175 if (key->gs.wide_point) {
13176 /**
13177 * The extra constant added in the transformed shader
13178 * for inverse viewport scale is to be supplied by the driver.
13179 * So the extra constant starting offset needs to be reduced by 1.
13180 */
13181 assert(variant->extra_const_start > 0);
13182 variant->extra_const_start--;
13183 }
13184
13185 if (unit == PIPE_SHADER_FRAGMENT) {
13186 struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
13187
13188 fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
13189 fs_variant->pstipple_sampler_state_index =
13190 emit->fs.pstipple_sampler_state_index;
13191
13192 /* If there was exactly one write to a fragment shader output register
13193 * and it came from a constant buffer, we know all fragments will have
13194 * the same color (except for blending).
13195 */
13196 fs_variant->constant_color_output =
13197 emit->constant_color_output && emit->num_output_writes == 1;
13198
13199 /** keep track in the variant if flat interpolation is used
13200 * for any of the varyings.
13201 */
13202 fs_variant->uses_flat_interp = emit->uses_flat_interp;
13203
13204 fs_variant->fs_shadow_compare_units = emit->shadow_compare_units;
13205 }
13206 else if (unit == PIPE_SHADER_TESS_EVAL) {
13207 struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
13208
13209 /* Keep track in the tes variant some of the layout parameters.
13210 * These parameters will be referenced by the tcs to emit
13211 * the necessary declarations for the hull shader.
13212 */
13213 tes_variant->prim_mode = emit->tes.prim_mode;
13214 tes_variant->spacing = emit->tes.spacing;
13215 tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
13216 tes_variant->point_mode = emit->tes.point_mode;
13217 }
13218
13219
13220 if (tokens != shader->tokens) {
13221 tgsi_free_tokens(tokens);
13222 }
13223
13224 cleanup:
13225 free_emitter(emit);
13226
13227 done:
13228 SVGA_STATS_TIME_POP(svga_sws(svga));
13229 return variant;
13230 }
13231