xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/svga/svga_tgsi_vgpu10.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (c) 1998-2024 Broadcom. All Rights Reserved.
3  * The term “Broadcom” refers to Broadcom Inc.
4  * and/or its subsidiaries.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 /**
9  * @file svga_tgsi_vgpu10.c
10  *
11  * TGSI -> VGPU10 shader translation.
12  *
13  * \author Mingcheng Chen
14  * \author Brian Paul
15  */
16 
17 #include "util/compiler.h"
18 #include "pipe/p_shader_tokens.h"
19 #include "pipe/p_defines.h"
20 #include "tgsi/tgsi_dump.h"
21 #include "tgsi/tgsi_info.h"
22 #include "tgsi/tgsi_parse.h"
23 #include "tgsi/tgsi_scan.h"
24 #include "tgsi/tgsi_strings.h"
25 #include "tgsi/tgsi_two_side.h"
26 #include "tgsi/tgsi_aa_point.h"
27 #include "tgsi/tgsi_util.h"
28 #include "util/u_math.h"
29 #include "util/u_memory.h"
30 #include "util/u_bitmask.h"
31 #include "util/u_debug.h"
32 #include "util/u_pstipple.h"
33 
34 #include "svga_context.h"
35 #include "svga_debug.h"
36 #include "svga_link.h"
37 #include "svga_shader.h"
38 #include "svga_tgsi.h"
39 
40 #include "VGPU10ShaderTokens.h"
41 
42 
43 #define INVALID_INDEX 99999
44 #define MAX_INTERNAL_TEMPS 4
45 #define MAX_SYSTEM_VALUES 4
46 #define MAX_IMMEDIATE_COUNT \
47         (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
48 #define MAX_TEMP_ARRAYS 64  /* Enough? */
49 
50 /**
51  * Clipping is complicated.  There's four different cases which we
52  * handle during VS/GS shader translation:
53  */
54 enum clipping_mode
55 {
56    CLIP_NONE,     /**< No clipping enabled */
57    CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
58                    * one or more user-defined clip planes are enabled.  We
59                    * generate extra code to emit clip distances.
60                    */
61    CLIP_DISTANCE, /**< The shader already declares clip distance output
62                    * registers and has code to write to them.
63                    */
64    CLIP_VERTEX    /**< The shader declares a clip vertex output register and
65                   * has code that writes to the register.  We convert the
66                   * clipvertex position into one or more clip distances.
67                   */
68 };
69 
70 
71 /* Shader signature info */
72 struct svga_shader_signature
73 {
74    SVGA3dDXShaderSignatureHeader header;
75    SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
76    SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
77    SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
78 };
79 
80 static inline void
set_shader_signature_entry(SVGA3dDXShaderSignatureEntry * e,unsigned index,SVGA3dDXSignatureSemanticName sgnName,unsigned mask,SVGA3dDXSignatureRegisterComponentType compType,SVGA3dDXSignatureMinPrecision minPrecision)81 set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
82                            unsigned index,
83                            SVGA3dDXSignatureSemanticName sgnName,
84                            unsigned mask,
85                            SVGA3dDXSignatureRegisterComponentType compType,
86                            SVGA3dDXSignatureMinPrecision minPrecision)
87 {
88    e->registerIndex = index;
89    e->semanticName = sgnName;
90    e->mask = mask;
91    e->componentType = compType;
92    e->minPrecision = minPrecision;
93 };
94 
95 static const SVGA3dDXSignatureSemanticName
96 tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
97    SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
98    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
99    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
100    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
101    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
102    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
103    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
104    SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
105    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
106    SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
107    SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
108    SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
109    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
110    SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
111    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
112    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
113    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
114    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
115    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
116    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
117    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
118    SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
119    SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
120    SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
121    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
122    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
123    SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
124    SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
125    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
126    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
127    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
128    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
129    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
130    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
131    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
132    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
133    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
134    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
135    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
136    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
137    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
138    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
139    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
140    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
141    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
142 };
143 
144 
145 /**
146  * Map tgsi semantic name to SVGA signature semantic name
147  */
148 static inline SVGA3dDXSignatureSemanticName
map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)149 map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
150 {
151    assert(name < TGSI_SEMANTIC_COUNT);
152 
153    /* Do a few asserts here to spot check the mapping */
154    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
155           SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
156    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
157           SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
158    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
159           SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
160 
161    return tgsi_semantic_to_sgn_name[name];
162 }
163 
164 enum reemit_mode {
165    REEMIT_FALSE = 0,
166    REEMIT_TRUE = 1,
167    REEMIT_IN_PROGRESS = 2
168 };
169 
170 struct svga_raw_buf_tmp {
171    bool indirect;
172    unsigned buffer_index:8;
173    unsigned element_index:8;
174    unsigned element_rel:8;
175 };
176 
177 struct svga_shader_emitter_v10
178 {
179    /* The token output buffer */
180    unsigned size;
181    char *buf;
182    char *ptr;
183 
184    /* Information about the shader and state (does not change) */
185    struct svga_compile_key key;
186    struct tgsi_shader_info info;
187    unsigned unit;
188    unsigned version; /**< Either 40, 41, 50 or 51 at this time */
189 
190    unsigned cur_tgsi_token;     /**< current tgsi token position */
191    unsigned inst_start_token;
192    bool discard_instruction; /**< throw away current instruction? */
193    bool reemit_instruction;  /**< reemit current instruction */
194    bool reemit_tgsi_instruction;  /**< reemit current tgsi instruction */
195    bool skip_instruction;    /**< skip current instruction */
196    bool use_sampler_state_mapping; /* use sampler state mapping */
197    enum reemit_mode reemit_rawbuf_instruction;
198 
199    union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
200    double (*immediates_dbl)[2];
201    unsigned num_immediates;      /**< Number of immediates emitted */
202    unsigned common_immediate_pos[20];  /**< literals for common immediates */
203    unsigned num_common_immediates;
204    unsigned num_immediates_emitted;
205    unsigned num_new_immediates;        /** pending immediates to be declared */
206    unsigned immediates_block_start_token;
207    unsigned immediates_block_next_token;
208 
209    unsigned num_outputs;      /**< include any extra outputs */
210                               /**  The first extra output is reserved for
211                                *   non-adjusted vertex position for
212                                *   stream output purpose
213                                */
214 
215    /* Temporary Registers */
216    unsigned num_shader_temps; /**< num of temps used by original shader */
217    unsigned internal_temp_count;  /**< currently allocated internal temps */
218    struct {
219       unsigned start, size;
220    } temp_arrays[MAX_TEMP_ARRAYS];
221    unsigned num_temp_arrays;
222 
223    /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
224    struct {
225       unsigned arrayId, index;
226       bool initialized;
227    } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
228 
229    unsigned initialize_temp_index;
230 
231    /** Number of constants used by original shader for each constant buffer.
232     * The size should probably always match with that of svga_state.constbufs.
233     */
234    unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
235 
236    /* Raw constant buffers */
237    unsigned raw_buf_srv_start_index;  /* starting srv index for raw buffers */
238    unsigned raw_bufs;                 /* raw buffers bitmask */
239    unsigned raw_buf_tmp_index;        /* starting temp index for raw buffers */
240    unsigned raw_buf_cur_tmp_index;    /* current temp index for raw buffers */
241    struct svga_raw_buf_tmp raw_buf_tmp[3]; /* temporaries for raw buf source */
242 
243    /* Samplers */
244    unsigned num_samplers;
245    bool sampler_view[PIPE_MAX_SAMPLERS];  /**< True if sampler view exists*/
246    uint8_t sampler_target[PIPE_MAX_SAMPLERS];  /**< TGSI_TEXTURE_x */
247    uint8_t sampler_return_type[PIPE_MAX_SAMPLERS];  /**< TGSI_RETURN_TYPE_x */
248 
249    /* Images */
250    unsigned num_images;
251    unsigned image_mask;
252    struct tgsi_declaration_image image[PIPE_MAX_SHADER_IMAGES];
253    unsigned image_size_index;  /* starting index to cbuf for image size */
254 
255    /* Shader buffers */
256    unsigned num_shader_bufs;
257    unsigned raw_shaderbuf_srv_start_index;  /* starting srv index for raw shaderbuf */
258    uint64_t raw_shaderbufs;                 /* raw shader buffers bitmask */
259 
260    /* HW atomic buffers */
261    unsigned num_atomic_bufs;
262    unsigned atomic_bufs_mask;
263    unsigned max_atomic_counter_index;
264    VGPU10_OPCODE_TYPE cur_atomic_opcode;    /* current atomic opcode */
265 
266    bool uav_declared;  /* True if uav is declared */
267 
268    /* Index Range declaration */
269    struct {
270       unsigned start_index;
271       unsigned count;
272       bool required;
273       unsigned operandType;
274       unsigned size;
275       unsigned dim;
276    } index_range;
277 
278    /* Address regs (really implemented with temps) */
279    unsigned num_address_regs;
280    unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
281 
282    /* Output register usage masks */
283    uint8_t output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
284 
285    /* To map TGSI system value index to VGPU shader input indexes */
286    uint8_t system_value_indexes[MAX_SYSTEM_VALUES];
287 
288    struct {
289       /* vertex position scale/translation */
290       unsigned out_index;  /**< the real position output reg */
291       unsigned tmp_index;  /**< the fake/temp position output reg */
292       unsigned so_index;   /**< the non-adjusted position output reg */
293       unsigned prescale_cbuf_index;  /* index to the const buf for prescale */
294       unsigned prescale_scale_index, prescale_trans_index;
295       unsigned num_prescale;      /* number of prescale factor in const buf */
296       unsigned viewport_index;
297       unsigned need_prescale:1;
298       unsigned have_prescale:1;
299    } vposition;
300 
301    /* Shader limits */
302    unsigned max_vs_inputs;
303    unsigned max_vs_outputs;
304    unsigned max_gs_inputs;
305 
306    /* For vertex shaders only */
307    struct {
308       /* viewport constant */
309       unsigned viewport_index;
310 
311       unsigned vertex_id_bias_index;
312       unsigned vertex_id_sys_index;
313       unsigned vertex_id_tmp_index;
314 
315       /* temp index of adjusted vertex attributes */
316       unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
317    } vs;
318 
319    /* For fragment shaders only */
320    struct {
321       unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
322       unsigned num_color_outputs;
323       unsigned color_tmp_index;  /**< fake/temp color output reg */
324       unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
325 
326       /* front-face */
327       unsigned face_input_index; /**< real fragment shader face reg (bool) */
328       unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
329 
330       unsigned pstipple_sampler_unit;
331       unsigned pstipple_sampler_state_index;
332 
333       unsigned fragcoord_input_index;  /**< real fragment position input reg */
334       unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
335 
336       unsigned sample_id_sys_index;  /**< TGSI index of sample id sys value */
337 
338       unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
339       unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
340 
341       /** TGSI index of sample mask input sys value */
342       unsigned sample_mask_in_sys_index;
343 
344       /* layer */
345       unsigned layer_input_index;    /**< TGSI index of layer */
346       unsigned layer_imm_index;      /**< immediate for default layer 0 */
347 
348       bool forceEarlyDepthStencil;  /**< true if Early Depth stencil test is enabled */
349    } fs;
350 
351    /* For geometry shaders only */
352    struct {
353       VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
354       VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
355       unsigned input_size;       /**< size of input arrays */
356       unsigned prim_id_index;    /**< primitive id register index */
357       unsigned max_out_vertices; /**< maximum number of output vertices */
358       unsigned invocations;
359       unsigned invocation_id_sys_index;
360 
361       unsigned viewport_index_out_index;
362       unsigned viewport_index_tmp_index;
363    } gs;
364 
365    /* For tessellation control shaders only */
366    struct {
367       unsigned vertices_per_patch_index;     /**< vertices_per_patch system value index */
368       unsigned imm_index;                    /**< immediate for tcs */
369       unsigned invocation_id_sys_index;      /**< invocation id */
370       unsigned invocation_id_tmp_index;
371       unsigned instruction_token_pos;        /* token pos for the first instruction */
372       unsigned control_point_input_index;    /* control point input register index */
373       unsigned control_point_addr_index;     /* control point input address register */
374       unsigned control_point_out_index;      /* control point output register index */
375       unsigned control_point_tmp_index;      /* control point temporary register */
376       unsigned control_point_out_count;      /* control point output count */
377       bool  control_point_phase;          /* true if in control point phase */
378       bool  fork_phase_add_signature;     /* true if needs to add signature in fork phase */
379       unsigned patch_generic_out_count;      /* per-patch generic output count */
380       unsigned patch_generic_out_index;      /* per-patch generic output register index*/
381       unsigned patch_generic_tmp_index;      /* per-patch generic temporary register index*/
382       unsigned prim_id_index;                /* primitive id */
383       struct {
384          unsigned out_index;      /* real tessinner output register */
385          unsigned temp_index;     /* tessinner temp register */
386          unsigned tgsi_index;     /* tgsi tessinner output register */
387       } inner;
388       struct {
389          unsigned out_index;      /* real tessouter output register */
390          unsigned temp_index;     /* tessouter temp register */
391          unsigned tgsi_index;     /* tgsi tessouter output register */
392       } outer;
393    } tcs;
394 
395    /* For tessellation evaluation shaders only */
396    struct {
397       enum mesa_prim prim_mode;
398       enum pipe_tess_spacing spacing;
399       bool vertices_order_cw;
400       bool point_mode;
401       unsigned tesscoord_sys_index;
402       unsigned swizzle_max;
403       unsigned prim_id_index;                /* primitive id */
404       struct {
405          unsigned in_index;       /* real tessinner input register */
406          unsigned temp_index;     /* tessinner temp register */
407          unsigned tgsi_index;     /* tgsi tessinner input register */
408       } inner;
409       struct {
410          unsigned in_index;       /* real tessouter input register */
411          unsigned temp_index;     /* tessouter temp register */
412          unsigned tgsi_index;     /* tgsi tessouter input register */
413       } outer;
414    } tes;
415 
416    struct {
417       unsigned block_width;       /* thread group size in x dimension */
418       unsigned block_height;      /* thread group size in y dimension */
419       unsigned block_depth;       /* thread group size in z dimension */
420       unsigned thread_id_index;   /* thread id tgsi index */
421       unsigned block_id_index;    /* block id tgsi index */
422       bool shared_memory_declared;    /* set if shared memory is declared */
423       struct {
424          unsigned tgsi_index;   /* grid size tgsi index */
425          unsigned imm_index;    /* grid size imm index */
426       } grid_size;
427    } cs;
428 
429    /* For vertex or geometry shaders */
430    enum clipping_mode clip_mode;
431    unsigned clip_dist_out_index; /**< clip distance output register index */
432    unsigned clip_dist_tmp_index; /**< clip distance temporary register */
433    unsigned clip_dist_so_index;  /**< clip distance shadow copy */
434 
435    /** Index of temporary holding the clipvertex coordinate */
436    unsigned clip_vertex_out_index; /**< clip vertex output register index */
437    unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
438 
439    /* user clip plane constant slot indexes */
440    unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
441 
442    unsigned num_output_writes;
443    bool constant_color_output;
444 
445    bool uses_flat_interp;
446 
447    unsigned reserved_token;        /* index to the reserved token */
448    bool uses_precise_qualifier;
449 
450    /* For all shaders: const reg index for RECT coord scaling */
451    unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
452 
453    /* For all shaders: const reg index for texture buffer size */
454    unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
455 
456    /** Which texture units are doing shadow comparison in the shader code */
457    unsigned shadow_compare_units;
458 
459    /* VS/TCS/TES/GS/FS Linkage info */
460    struct shader_linkage linkage;
461    struct tgsi_shader_info *prevShaderInfo;
462 
463    /* Shader signature */
464    struct svga_shader_signature signature;
465 
466    bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
467 
468    /* For util_debug_message */
469    struct util_debug_callback svga_debug_callback;
470 
471    /* current loop depth in shader */
472    unsigned current_loop_depth;
473 };
474 
475 
476 static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
477 static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
478 static bool emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
479 static bool emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
480 static bool emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
481 static bool emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
482 static bool emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
483 static bool emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
484 static void emit_image_declarations(struct svga_shader_emitter_v10 *emit);
485 static void emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit);
486 static void emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit);
487 static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
488 
489 static bool
490 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
491 
492 static bool
493 emit_vertex(struct svga_shader_emitter_v10 *emit,
494             const struct tgsi_full_instruction *inst);
495 
496 static bool
497 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
498                         unsigned inst_number,
499                         const struct tgsi_full_instruction *inst);
500 
501 static void
502 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
503                        VGPU10_OPCODE_TYPE opcodeType,
504                        VGPU10_OPERAND_TYPE operandType,
505                        VGPU10_OPERAND_INDEX_DIMENSION dim,
506                        unsigned index, unsigned size,
507                        VGPU10_SYSTEM_NAME name,
508                        VGPU10_OPERAND_NUM_COMPONENTS numComp,
509                        VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
510                        unsigned usageMask,
511                        VGPU10_INTERPOLATION_MODE interpMode,
512                        bool addSignature,
513                        SVGA3dDXSignatureSemanticName sgnName);
514 
515 static bool
516 emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
517                         unsigned inst_number,
518                         const struct tgsi_full_instruction *inst);
519 
520 static void
521 create_temp_array(struct svga_shader_emitter_v10 *emit,
522                   unsigned arrayID, unsigned first, unsigned count,
523                   unsigned startIndex);
524 
525 static char err_buf[128];
526 
527 static bool
expand(struct svga_shader_emitter_v10 * emit)528 expand(struct svga_shader_emitter_v10 *emit)
529 {
530    char *new_buf;
531    unsigned newsize = emit->size * 2;
532 
533    if (emit->buf != err_buf)
534       new_buf = REALLOC(emit->buf, emit->size, newsize);
535    else
536       new_buf = NULL;
537 
538    if (!new_buf) {
539       emit->ptr = err_buf;
540       emit->buf = err_buf;
541       emit->size = sizeof(err_buf);
542       return false;
543    }
544 
545    emit->size = newsize;
546    emit->ptr = new_buf + (emit->ptr - emit->buf);
547    emit->buf = new_buf;
548    return true;
549 }
550 
551 /**
552  * Create and initialize a new svga_shader_emitter_v10 object.
553  */
554 static struct svga_shader_emitter_v10 *
alloc_emitter(void)555 alloc_emitter(void)
556 {
557    struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
558 
559    if (!emit)
560       return NULL;
561 
562    /* to initialize the output buffer */
563    emit->size = 512;
564    if (!expand(emit)) {
565       FREE(emit);
566       return NULL;
567    }
568    return emit;
569 }
570 
571 /**
572  * Free an svga_shader_emitter_v10 object.
573  */
574 static void
free_emitter(struct svga_shader_emitter_v10 * emit)575 free_emitter(struct svga_shader_emitter_v10 *emit)
576 {
577    assert(emit);
578    FREE(emit->buf);    /* will be NULL if translation succeeded */
579    FREE(emit);
580 }
581 
582 static inline bool
reserve(struct svga_shader_emitter_v10 * emit,unsigned nr_dwords)583 reserve(struct svga_shader_emitter_v10 *emit,
584         unsigned nr_dwords)
585 {
586    while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
587       if (!expand(emit))
588          return false;
589    }
590 
591    return true;
592 }
593 
594 static bool
emit_dword(struct svga_shader_emitter_v10 * emit,uint32 dword)595 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
596 {
597    if (!reserve(emit, 1))
598       return false;
599 
600    *(uint32 *)emit->ptr = dword;
601    emit->ptr += sizeof dword;
602    return true;
603 }
604 
605 static bool
emit_dwords(struct svga_shader_emitter_v10 * emit,const uint32 * dwords,unsigned nr)606 emit_dwords(struct svga_shader_emitter_v10 *emit,
607             const uint32 *dwords,
608             unsigned nr)
609 {
610    if (!reserve(emit, nr))
611       return false;
612 
613    memcpy(emit->ptr, dwords, nr * sizeof *dwords);
614    emit->ptr += nr * sizeof *dwords;
615    return true;
616 }
617 
618 /** Return the number of tokens in the emitter's buffer */
619 static unsigned
emit_get_num_tokens(const struct svga_shader_emitter_v10 * emit)620 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
621 {
622    return (emit->ptr - emit->buf) / sizeof(unsigned);
623 }
624 
625 
626 /**
627  * Check for register overflow.  If we overflow we'll set an
628  * error flag.  This function can be called for register declarations
629  * or use as src/dst instruction operands.
630  * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
631                 or VGPU10_OPCODE_DCL_x
632  * \param index  the register index
633  */
634 static void
check_register_index(struct svga_shader_emitter_v10 * emit,unsigned operandType,unsigned index)635 check_register_index(struct svga_shader_emitter_v10 *emit,
636                      unsigned operandType, unsigned index)
637 {
638    bool overflow_before = emit->register_overflow;
639 
640    switch (operandType) {
641    case VGPU10_OPERAND_TYPE_TEMP:
642    case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
643    case VGPU10_OPCODE_DCL_TEMPS:
644       if (index >= VGPU10_MAX_TEMPS) {
645          emit->register_overflow = true;
646       }
647       break;
648    case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
649    case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
650       if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
651          emit->register_overflow = true;
652       }
653       break;
654    case VGPU10_OPERAND_TYPE_INPUT:
655    case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
656    case VGPU10_OPCODE_DCL_INPUT:
657    case VGPU10_OPCODE_DCL_INPUT_SGV:
658    case VGPU10_OPCODE_DCL_INPUT_SIV:
659    case VGPU10_OPCODE_DCL_INPUT_PS:
660    case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
661    case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
662       if ((emit->unit == PIPE_SHADER_VERTEX &&
663            index >= emit->max_vs_inputs) ||
664           (emit->unit == PIPE_SHADER_GEOMETRY &&
665            index >= emit->max_gs_inputs) ||
666           (emit->unit == PIPE_SHADER_FRAGMENT &&
667            index >= VGPU10_MAX_FS_INPUTS) ||
668           (emit->unit == PIPE_SHADER_TESS_CTRL &&
669            index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
670           (emit->unit == PIPE_SHADER_TESS_EVAL &&
671            index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
672          emit->register_overflow = true;
673       }
674       break;
675    case VGPU10_OPERAND_TYPE_OUTPUT:
676    case VGPU10_OPCODE_DCL_OUTPUT:
677    case VGPU10_OPCODE_DCL_OUTPUT_SGV:
678    case VGPU10_OPCODE_DCL_OUTPUT_SIV:
679       /* Note: we are skipping two output indices in tcs for
680        * tessinner/outer levels. Implementation will not exceed
681        * number of output count but it allows index to go beyond
682        * VGPU11_MAX_HS_OUTPUTS.
683        * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
684        */
685       if ((emit->unit == PIPE_SHADER_VERTEX &&
686            index >= emit->max_vs_outputs) ||
687           (emit->unit == PIPE_SHADER_GEOMETRY &&
688            index >= VGPU10_MAX_GS_OUTPUTS) ||
689           (emit->unit == PIPE_SHADER_FRAGMENT &&
690            index >= VGPU10_MAX_FS_OUTPUTS) ||
691           (emit->unit == PIPE_SHADER_TESS_CTRL &&
692            index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
693           (emit->unit == PIPE_SHADER_TESS_EVAL &&
694            index >= VGPU11_MAX_DS_OUTPUTS)) {
695          emit->register_overflow = true;
696       }
697       break;
698    case VGPU10_OPERAND_TYPE_SAMPLER:
699    case VGPU10_OPCODE_DCL_SAMPLER:
700       if (index >= VGPU10_MAX_SAMPLERS) {
701          emit->register_overflow = true;
702       }
703       break;
704    case VGPU10_OPERAND_TYPE_RESOURCE:
705    case VGPU10_OPCODE_DCL_RESOURCE:
706       if (index >= VGPU10_MAX_RESOURCES) {
707          emit->register_overflow = true;
708       }
709       break;
710    case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
711       if (index >= MAX_IMMEDIATE_COUNT) {
712          emit->register_overflow = true;
713       }
714       break;
715    case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
716    case VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
717    case VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
718    case VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT:
719    case VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT:
720    case VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT:
721    case VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
722    case VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
723       /* nothing */
724       break;
725    default:
726       assert(0);
727       ; /* nothing */
728    }
729 
730    if (emit->register_overflow && !overflow_before) {
731       debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
732                    operandType, index);
733    }
734 }
735 
736 
737 /**
738  * Examine misc state to determine the clipping mode.
739  */
740 static void
determine_clipping_mode(struct svga_shader_emitter_v10 * emit)741 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
742 {
743    /* num_written_clipdistance in the shader info for tessellation
744     * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
745     * is not defined for this shader. So we go through all the output declarations
746     * to set the num_written_clipdistance. This is just to determine the
747     * clipping mode.
748     */
749    if (emit->unit == PIPE_SHADER_TESS_CTRL) {
750       unsigned i;
751       for (i = 0; i < emit->info.num_outputs; i++) {
752          if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
753             emit->info.num_written_clipdistance =
754                4 * (emit->info.output_semantic_index[i] + 1);
755          }
756       }
757    }
758 
759    if (emit->info.num_written_clipdistance > 0) {
760       emit->clip_mode = CLIP_DISTANCE;
761    }
762    else if (emit->info.writes_clipvertex) {
763       emit->clip_mode = CLIP_VERTEX;
764    }
765    else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
766       /*
767        * Only the last shader in the vertex processing stage needs to
768        * handle the legacy clip mode.
769        */
770       emit->clip_mode = CLIP_LEGACY;
771    }
772    else {
773       emit->clip_mode = CLIP_NONE;
774    }
775 }
776 
777 
778 /**
779  * For clip distance register declarations and clip distance register
780  * writes we need to mask the declaration usage or instruction writemask
781  * (respectively) against the set of the really-enabled clipping planes.
782  *
783  * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
784  * has a VS that writes to all 8 clip distance registers, but the plane enable
785  * flags are a subset of that.
786  *
787  * This function is used to apply the plane enable flags to the register
788  * declaration or instruction writemask.
789  *
790  * \param writemask  the declaration usage mask or instruction writemask
791  * \param clip_reg_index  which clip plane register is being declared/written.
792  *                        The legal values are 0 and 1 (two clip planes per
793  *                        register, for a total of 8 clip planes)
794  */
795 static unsigned
apply_clip_plane_mask(struct svga_shader_emitter_v10 * emit,unsigned writemask,unsigned clip_reg_index)796 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
797                       unsigned writemask, unsigned clip_reg_index)
798 {
799    unsigned shift;
800 
801    assert(clip_reg_index < 2);
802 
803    /* four clip planes per clip register: */
804    shift = clip_reg_index * 4;
805    writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
806 
807    return writemask;
808 }
809 
810 
811 /**
812  * Translate gallium shader type into VGPU10 type.
813  */
814 static VGPU10_PROGRAM_TYPE
translate_shader_type(unsigned type)815 translate_shader_type(unsigned type)
816 {
817    switch (type) {
818    case PIPE_SHADER_VERTEX:
819       return VGPU10_VERTEX_SHADER;
820    case PIPE_SHADER_GEOMETRY:
821       return VGPU10_GEOMETRY_SHADER;
822    case PIPE_SHADER_FRAGMENT:
823       return VGPU10_PIXEL_SHADER;
824    case PIPE_SHADER_TESS_CTRL:
825       return VGPU10_HULL_SHADER;
826    case PIPE_SHADER_TESS_EVAL:
827       return VGPU10_DOMAIN_SHADER;
828    case PIPE_SHADER_COMPUTE:
829       return VGPU10_COMPUTE_SHADER;
830    default:
831       assert(!"Unexpected shader type");
832       return VGPU10_VERTEX_SHADER;
833    }
834 }
835 
836 
837 /**
838  * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
839  * Note: we only need to translate the opcodes for "simple" instructions,
840  * as seen below.  All other opcodes are handled/translated specially.
841  */
842 static VGPU10_OPCODE_TYPE
translate_opcode(enum tgsi_opcode opcode)843 translate_opcode(enum tgsi_opcode opcode)
844 {
845    switch (opcode) {
846    case TGSI_OPCODE_MOV:
847       return VGPU10_OPCODE_MOV;
848    case TGSI_OPCODE_MUL:
849       return VGPU10_OPCODE_MUL;
850    case TGSI_OPCODE_ADD:
851       return VGPU10_OPCODE_ADD;
852    case TGSI_OPCODE_DP3:
853       return VGPU10_OPCODE_DP3;
854    case TGSI_OPCODE_DP4:
855       return VGPU10_OPCODE_DP4;
856    case TGSI_OPCODE_MIN:
857       return VGPU10_OPCODE_MIN;
858    case TGSI_OPCODE_MAX:
859       return VGPU10_OPCODE_MAX;
860    case TGSI_OPCODE_MAD:
861       return VGPU10_OPCODE_MAD;
862    case TGSI_OPCODE_SQRT:
863       return VGPU10_OPCODE_SQRT;
864    case TGSI_OPCODE_FRC:
865       return VGPU10_OPCODE_FRC;
866    case TGSI_OPCODE_FLR:
867       return VGPU10_OPCODE_ROUND_NI;
868    case TGSI_OPCODE_FSEQ:
869       return VGPU10_OPCODE_EQ;
870    case TGSI_OPCODE_FSGE:
871       return VGPU10_OPCODE_GE;
872    case TGSI_OPCODE_FSNE:
873       return VGPU10_OPCODE_NE;
874    case TGSI_OPCODE_DDX:
875       return VGPU10_OPCODE_DERIV_RTX;
876    case TGSI_OPCODE_DDY:
877       return VGPU10_OPCODE_DERIV_RTY;
878    case TGSI_OPCODE_RET:
879       return VGPU10_OPCODE_RET;
880    case TGSI_OPCODE_DIV:
881       return VGPU10_OPCODE_DIV;
882    case TGSI_OPCODE_IDIV:
883       return VGPU10_OPCODE_VMWARE;
884    case TGSI_OPCODE_DP2:
885       return VGPU10_OPCODE_DP2;
886    case TGSI_OPCODE_BRK:
887       return VGPU10_OPCODE_BREAK;
888    case TGSI_OPCODE_IF:
889       return VGPU10_OPCODE_IF;
890    case TGSI_OPCODE_ELSE:
891       return VGPU10_OPCODE_ELSE;
892    case TGSI_OPCODE_ENDIF:
893       return VGPU10_OPCODE_ENDIF;
894    case TGSI_OPCODE_CEIL:
895       return VGPU10_OPCODE_ROUND_PI;
896    case TGSI_OPCODE_I2F:
897       return VGPU10_OPCODE_ITOF;
898    case TGSI_OPCODE_NOT:
899       return VGPU10_OPCODE_NOT;
900    case TGSI_OPCODE_TRUNC:
901       return VGPU10_OPCODE_ROUND_Z;
902    case TGSI_OPCODE_SHL:
903       return VGPU10_OPCODE_ISHL;
904    case TGSI_OPCODE_AND:
905       return VGPU10_OPCODE_AND;
906    case TGSI_OPCODE_OR:
907       return VGPU10_OPCODE_OR;
908    case TGSI_OPCODE_XOR:
909       return VGPU10_OPCODE_XOR;
910    case TGSI_OPCODE_CONT:
911       return VGPU10_OPCODE_CONTINUE;
912    case TGSI_OPCODE_EMIT:
913       return VGPU10_OPCODE_EMIT;
914    case TGSI_OPCODE_ENDPRIM:
915       return VGPU10_OPCODE_CUT;
916    case TGSI_OPCODE_BGNLOOP:
917       return VGPU10_OPCODE_LOOP;
918    case TGSI_OPCODE_ENDLOOP:
919       return VGPU10_OPCODE_ENDLOOP;
920    case TGSI_OPCODE_ENDSUB:
921       return VGPU10_OPCODE_RET;
922    case TGSI_OPCODE_NOP:
923       return VGPU10_OPCODE_NOP;
924    case TGSI_OPCODE_END:
925       return VGPU10_OPCODE_RET;
926    case TGSI_OPCODE_F2I:
927       return VGPU10_OPCODE_FTOI;
928    case TGSI_OPCODE_IMAX:
929       return VGPU10_OPCODE_IMAX;
930    case TGSI_OPCODE_IMIN:
931       return VGPU10_OPCODE_IMIN;
932    case TGSI_OPCODE_UDIV:
933    case TGSI_OPCODE_UMOD:
934    case TGSI_OPCODE_MOD:
935       return VGPU10_OPCODE_UDIV;
936    case TGSI_OPCODE_IMUL_HI:
937       return VGPU10_OPCODE_IMUL;
938    case TGSI_OPCODE_INEG:
939       return VGPU10_OPCODE_INEG;
940    case TGSI_OPCODE_ISHR:
941       return VGPU10_OPCODE_ISHR;
942    case TGSI_OPCODE_ISGE:
943       return VGPU10_OPCODE_IGE;
944    case TGSI_OPCODE_ISLT:
945       return VGPU10_OPCODE_ILT;
946    case TGSI_OPCODE_F2U:
947       return VGPU10_OPCODE_FTOU;
948    case TGSI_OPCODE_UADD:
949       return VGPU10_OPCODE_IADD;
950    case TGSI_OPCODE_U2F:
951       return VGPU10_OPCODE_UTOF;
952    case TGSI_OPCODE_UCMP:
953       return VGPU10_OPCODE_MOVC;
954    case TGSI_OPCODE_UMAD:
955       return VGPU10_OPCODE_UMAD;
956    case TGSI_OPCODE_UMAX:
957       return VGPU10_OPCODE_UMAX;
958    case TGSI_OPCODE_UMIN:
959       return VGPU10_OPCODE_UMIN;
960    case TGSI_OPCODE_UMUL:
961    case TGSI_OPCODE_UMUL_HI:
962       return VGPU10_OPCODE_UMUL;
963    case TGSI_OPCODE_USEQ:
964       return VGPU10_OPCODE_IEQ;
965    case TGSI_OPCODE_USGE:
966       return VGPU10_OPCODE_UGE;
967    case TGSI_OPCODE_USHR:
968       return VGPU10_OPCODE_USHR;
969    case TGSI_OPCODE_USLT:
970       return VGPU10_OPCODE_ULT;
971    case TGSI_OPCODE_USNE:
972       return VGPU10_OPCODE_INE;
973    case TGSI_OPCODE_SWITCH:
974       return VGPU10_OPCODE_SWITCH;
975    case TGSI_OPCODE_CASE:
976       return VGPU10_OPCODE_CASE;
977    case TGSI_OPCODE_DEFAULT:
978       return VGPU10_OPCODE_DEFAULT;
979    case TGSI_OPCODE_ENDSWITCH:
980       return VGPU10_OPCODE_ENDSWITCH;
981    case TGSI_OPCODE_FSLT:
982       return VGPU10_OPCODE_LT;
983    case TGSI_OPCODE_ROUND:
984       return VGPU10_OPCODE_ROUND_NE;
985    /* Begin SM5 opcodes */
986    case TGSI_OPCODE_F2D:
987       return VGPU10_OPCODE_FTOD;
988    case TGSI_OPCODE_D2F:
989       return VGPU10_OPCODE_DTOF;
990    case TGSI_OPCODE_DMUL:
991       return VGPU10_OPCODE_DMUL;
992    case TGSI_OPCODE_DADD:
993       return VGPU10_OPCODE_DADD;
994    case TGSI_OPCODE_DMAX:
995       return VGPU10_OPCODE_DMAX;
996    case TGSI_OPCODE_DMIN:
997       return VGPU10_OPCODE_DMIN;
998    case TGSI_OPCODE_DSEQ:
999       return VGPU10_OPCODE_DEQ;
1000    case TGSI_OPCODE_DSGE:
1001       return VGPU10_OPCODE_DGE;
1002    case TGSI_OPCODE_DSLT:
1003       return VGPU10_OPCODE_DLT;
1004    case TGSI_OPCODE_DSNE:
1005       return VGPU10_OPCODE_DNE;
1006    case TGSI_OPCODE_IBFE:
1007       return VGPU10_OPCODE_IBFE;
1008    case TGSI_OPCODE_UBFE:
1009       return VGPU10_OPCODE_UBFE;
1010    case TGSI_OPCODE_BFI:
1011       return VGPU10_OPCODE_BFI;
1012    case TGSI_OPCODE_BREV:
1013       return VGPU10_OPCODE_BFREV;
1014    case TGSI_OPCODE_POPC:
1015       return VGPU10_OPCODE_COUNTBITS;
1016    case TGSI_OPCODE_LSB:
1017       return VGPU10_OPCODE_FIRSTBIT_LO;
1018    case TGSI_OPCODE_IMSB:
1019       return VGPU10_OPCODE_FIRSTBIT_SHI;
1020    case TGSI_OPCODE_UMSB:
1021       return VGPU10_OPCODE_FIRSTBIT_HI;
1022    case TGSI_OPCODE_INTERP_CENTROID:
1023       return VGPU10_OPCODE_EVAL_CENTROID;
1024    case TGSI_OPCODE_INTERP_SAMPLE:
1025       return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
1026    case TGSI_OPCODE_BARRIER:
1027       return VGPU10_OPCODE_SYNC;
1028    case TGSI_OPCODE_DFMA:
1029       return VGPU10_OPCODE_DFMA;
1030    case TGSI_OPCODE_FMA:
1031       return VGPU10_OPCODE_MAD;
1032 
1033    /* DX11.1 Opcodes */
1034    case TGSI_OPCODE_DDIV:
1035       return VGPU10_OPCODE_DDIV;
1036    case TGSI_OPCODE_DRCP:
1037       return VGPU10_OPCODE_DRCP;
1038    case TGSI_OPCODE_D2I:
1039       return VGPU10_OPCODE_DTOI;
1040    case TGSI_OPCODE_D2U:
1041       return VGPU10_OPCODE_DTOU;
1042    case TGSI_OPCODE_I2D:
1043       return VGPU10_OPCODE_ITOD;
1044    case TGSI_OPCODE_U2D:
1045       return VGPU10_OPCODE_UTOD;
1046 
1047    case TGSI_OPCODE_SAMPLE_POS:
1048       /* Note: we never actually get this opcode because there's no GLSL
1049        * function to query multisample resource sample positions.  There's
1050        * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
1051        * position of the current sample in the render target.
1052        */
1053       FALLTHROUGH;
1054    case TGSI_OPCODE_SAMPLE_INFO:
1055       /* NOTE: we never actually get this opcode because the GLSL compiler
1056        * implements the gl_NumSamples variable with a simple constant in the
1057        * constant buffer.
1058        */
1059       FALLTHROUGH;
1060    default:
1061       assert(!"Unexpected TGSI opcode in translate_opcode()");
1062       return VGPU10_OPCODE_NOP;
1063    }
1064 }
1065 
1066 
1067 /**
1068  * Translate a TGSI register file type into a VGPU10 operand type.
1069  * \param array  is the TGSI_FILE_TEMPORARY register an array?
1070  */
1071 static VGPU10_OPERAND_TYPE
translate_register_file(enum tgsi_file_type file,bool array)1072 translate_register_file(enum tgsi_file_type file, bool array)
1073 {
1074    switch (file) {
1075    case TGSI_FILE_CONSTANT:
1076       return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
1077    case TGSI_FILE_INPUT:
1078       return VGPU10_OPERAND_TYPE_INPUT;
1079    case TGSI_FILE_OUTPUT:
1080       return VGPU10_OPERAND_TYPE_OUTPUT;
1081    case TGSI_FILE_TEMPORARY:
1082       return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1083                    : VGPU10_OPERAND_TYPE_TEMP;
1084    case TGSI_FILE_IMMEDIATE:
1085       /* all immediates are 32-bit values at this time so
1086        * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1087        */
1088       return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
1089    case TGSI_FILE_SAMPLER:
1090       return VGPU10_OPERAND_TYPE_SAMPLER;
1091    case TGSI_FILE_SYSTEM_VALUE:
1092       return VGPU10_OPERAND_TYPE_INPUT;
1093 
1094    /* XXX TODO more cases to finish */
1095 
1096    default:
1097       assert(!"Bad tgsi register file!");
1098       return VGPU10_OPERAND_TYPE_NULL;
1099    }
1100 }
1101 
1102 
1103 /**
1104  * Emit a null dst register
1105  */
1106 static void
emit_null_dst_register(struct svga_shader_emitter_v10 * emit)1107 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
1108 {
1109    VGPU10OperandToken0 operand;
1110 
1111    operand.value = 0;
1112    operand.operandType = VGPU10_OPERAND_TYPE_NULL;
1113    operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
1114 
1115    emit_dword(emit, operand.value);
1116 }
1117 
1118 
1119 /**
1120  * If the given register is a temporary, return the array ID.
1121  * Else return zero.
1122  */
1123 static unsigned
get_temp_array_id(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1124 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
1125                   enum tgsi_file_type file, unsigned index)
1126 {
1127    if (file == TGSI_FILE_TEMPORARY) {
1128       return emit->temp_map[index].arrayId;
1129    }
1130    else {
1131       return 0;
1132    }
1133 }
1134 
1135 
1136 /**
1137  * If the given register is a temporary, convert the index from a TGSI
1138  * TEMPORARY index to a VGPU10 temp index.
1139  */
1140 static unsigned
remap_temp_index(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1141 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
1142                  enum tgsi_file_type file, unsigned index)
1143 {
1144    if (file == TGSI_FILE_TEMPORARY) {
1145       return emit->temp_map[index].index;
1146    }
1147    else {
1148       return index;
1149    }
1150 }
1151 
1152 
1153 /**
1154  * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1155  * Note: the operandType field must already be initialized.
1156  * \param file  the register file being accessed
1157  * \param indirect  using indirect addressing of the register file?
1158  * \param index2D  if true, 2-D indexing is being used (const or temp registers)
1159  * \param indirect2D  if true, 2-D indirect indexing being used (for const buf)
1160  */
1161 static VGPU10OperandToken0
setup_operand0_indexing(struct svga_shader_emitter_v10 * emit,VGPU10OperandToken0 operand0,enum tgsi_file_type file,bool indirect,bool index2D,bool indirect2D)1162 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
1163                         VGPU10OperandToken0 operand0,
1164                         enum tgsi_file_type file,
1165                         bool indirect,
1166                         bool index2D, bool indirect2D)
1167 {
1168    VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
1169    VGPU10_OPERAND_INDEX_DIMENSION indexDim;
1170 
1171    /*
1172     * Compute index dimensions
1173     */
1174    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
1175        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
1176        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
1177        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
1178        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
1179        operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
1180       /* there's no swizzle for in-line immediates */
1181       indexDim = VGPU10_OPERAND_INDEX_0D;
1182       assert(operand0.selectionMode == 0);
1183    }
1184    else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
1185       indexDim = VGPU10_OPERAND_INDEX_0D;
1186    }
1187    else {
1188       indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
1189    }
1190 
1191    /*
1192     * Compute index representation(s) (immediate vs relative).
1193     */
1194    if (indexDim == VGPU10_OPERAND_INDEX_2D) {
1195       index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1196          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1197 
1198       index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1199          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1200    }
1201    else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
1202       index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1203          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1204 
1205       index1Rep = 0;
1206    }
1207    else {
1208       index0Rep = 0;
1209       index1Rep = 0;
1210    }
1211 
1212    operand0.indexDimension = indexDim;
1213    operand0.index0Representation = index0Rep;
1214    operand0.index1Representation = index1Rep;
1215 
1216    return operand0;
1217 }
1218 
1219 
1220 /**
1221  * Emit the operand for expressing an address register for indirect indexing.
1222  * Note that the address register is really just a temp register.
1223  * \param addr_reg_index  which address register to use
1224  */
1225 static void
emit_indirect_register(struct svga_shader_emitter_v10 * emit,unsigned addr_reg_index)1226 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
1227                        unsigned addr_reg_index)
1228 {
1229    unsigned tmp_reg_index;
1230    VGPU10OperandToken0 operand0;
1231 
1232    assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
1233 
1234    tmp_reg_index = emit->address_reg_index[addr_reg_index];
1235 
1236    /* operand0 is a simple temporary register, selecting one component */
1237    operand0.value = 0;
1238    operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
1239    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1240    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1241    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1242    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1243    operand0.swizzleX = 0;
1244    operand0.swizzleY = 1;
1245    operand0.swizzleZ = 2;
1246    operand0.swizzleW = 3;
1247 
1248    emit_dword(emit, operand0.value);
1249    emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
1250 }
1251 
1252 
1253 /**
1254  * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1255  * \param emit  the emitter context
1256  * \param reg  the TGSI dst register to translate
1257  */
1258 static void
emit_dst_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * reg)1259 emit_dst_register(struct svga_shader_emitter_v10 *emit,
1260                   const struct tgsi_full_dst_register *reg)
1261 {
1262    enum tgsi_file_type file = reg->Register.File;
1263    unsigned index = reg->Register.Index;
1264    const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
1265    const unsigned sem_index = emit->info.output_semantic_index[index];
1266    unsigned writemask = reg->Register.WriteMask;
1267    const bool indirect = reg->Register.Indirect;
1268    unsigned tempArrayId = get_temp_array_id(emit, file, index);
1269    bool index2d = reg->Register.Dimension || tempArrayId > 0;
1270    VGPU10OperandToken0 operand0;
1271 
1272    if (file == TGSI_FILE_TEMPORARY) {
1273       emit->temp_map[index].initialized = true;
1274    }
1275 
1276    if (file == TGSI_FILE_OUTPUT) {
1277       if (emit->unit == PIPE_SHADER_VERTEX ||
1278           emit->unit == PIPE_SHADER_GEOMETRY ||
1279           emit->unit == PIPE_SHADER_TESS_EVAL) {
1280          if (index == emit->vposition.out_index &&
1281              emit->vposition.tmp_index != INVALID_INDEX) {
1282             /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
1283              * vertex position result in a temporary so that we can modify
1284              * it in the post_helper() code.
1285              */
1286             file = TGSI_FILE_TEMPORARY;
1287             index = emit->vposition.tmp_index;
1288          }
1289          else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1290                   emit->clip_dist_tmp_index != INVALID_INDEX) {
1291             /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1292              * We store the clip distance in a temporary first, then
1293              * we'll copy it to the shadow copy and to CLIPDIST with the
1294              * enabled planes mask in emit_clip_distance_instructions().
1295              */
1296             file = TGSI_FILE_TEMPORARY;
1297             index = emit->clip_dist_tmp_index + sem_index;
1298          }
1299          else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1300                   emit->clip_vertex_tmp_index != INVALID_INDEX) {
1301             /* replace the CLIPVERTEX output register with a temporary */
1302             assert(emit->clip_mode == CLIP_VERTEX);
1303             assert(sem_index == 0);
1304             file = TGSI_FILE_TEMPORARY;
1305             index = emit->clip_vertex_tmp_index;
1306          }
1307          else if (sem_name == TGSI_SEMANTIC_COLOR &&
1308                   emit->key.clamp_vertex_color) {
1309 
1310             /* set the saturate modifier of the instruction
1311              * to clamp the vertex color.
1312              */
1313             VGPU10OpcodeToken0 *token =
1314                (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
1315             token->saturate = true;
1316          }
1317          else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1318                   emit->gs.viewport_index_out_index != INVALID_INDEX) {
1319             file = TGSI_FILE_TEMPORARY;
1320             index = emit->gs.viewport_index_tmp_index;
1321          }
1322       }
1323       else if (emit->unit == PIPE_SHADER_FRAGMENT) {
1324          if (sem_name == TGSI_SEMANTIC_POSITION) {
1325             /* Fragment depth output register */
1326             operand0.value = 0;
1327             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
1328             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1329             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1330             emit_dword(emit, operand0.value);
1331             return;
1332          }
1333          else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
1334             /* Fragment sample mask output */
1335             operand0.value = 0;
1336             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
1337             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1338             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1339             emit_dword(emit, operand0.value);
1340             return;
1341          }
1342          else if (index == emit->fs.color_out_index[0] &&
1343              emit->fs.color_tmp_index != INVALID_INDEX) {
1344             /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
1345              * fragment color result in a temporary so that we can read it
1346              * it in the post_helper() code.
1347              */
1348             file = TGSI_FILE_TEMPORARY;
1349             index = emit->fs.color_tmp_index;
1350          }
1351          else {
1352             /* Typically, for fragment shaders, the output register index
1353              * matches the color semantic index.  But not when we write to
1354              * the fragment depth register.  In that case, OUT[0] will be
1355              * fragdepth and OUT[1] will be the 0th color output.  We need
1356              * to use the semantic index for color outputs.
1357              */
1358             assert(sem_name == TGSI_SEMANTIC_COLOR);
1359             index = emit->info.output_semantic_index[index];
1360 
1361             emit->num_output_writes++;
1362          }
1363       }
1364       else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1365          if (index == emit->tcs.inner.tgsi_index) {
1366             /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1367              * in temporary for now so that will be store into appropriate
1368              * registers in post_helper() in patch constant phase.
1369              */
1370             if (emit->tcs.control_point_phase) {
1371                /* Discard writing into tessfactor in control point phase */
1372                emit->discard_instruction =  true;
1373             }
1374             else {
1375                file = TGSI_FILE_TEMPORARY;
1376                index = emit->tcs.inner.temp_index;
1377             }
1378          }
1379          else if (index == emit->tcs.outer.tgsi_index) {
1380             /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1381              * in temporary for now so that will be store into appropriate
1382              * registers in post_helper().
1383              */
1384             if (emit->tcs.control_point_phase) {
1385                /* Discard writing into tessfactor in control point phase */
1386                emit->discard_instruction =  true;
1387             }
1388             else {
1389                file = TGSI_FILE_TEMPORARY;
1390                index = emit->tcs.outer.temp_index;
1391             }
1392          }
1393          else if (index >= emit->tcs.patch_generic_out_index &&
1394                   index < (emit->tcs.patch_generic_out_index +
1395                           emit->tcs.patch_generic_out_count)) {
1396             if (emit->tcs.control_point_phase) {
1397                /* Discard writing into generic patch constant outputs in
1398                   control point phase */
1399                emit->discard_instruction =  true;
1400             }
1401             else {
1402                if (emit->reemit_instruction) {
1403                   /* Store results of reemitted instruction in temporary register. */
1404                   file = TGSI_FILE_TEMPORARY;
1405                   index = emit->tcs.patch_generic_tmp_index +
1406                           (index - emit->tcs.patch_generic_out_index);
1407                   /**
1408                    * Temporaries for patch constant data can be done
1409                    * as indexable temporaries.
1410                    */
1411                   tempArrayId = get_temp_array_id(emit, file, index);
1412                   index2d = tempArrayId > 0;
1413 
1414                   emit->reemit_instruction = false;
1415                }
1416                else {
1417                   /* If per-patch outputs is been read in shader, we
1418                    * reemit instruction and store results in temporaries in
1419                    * patch constant phase. */
1420                   if (emit->info.reads_perpatch_outputs) {
1421                      emit->reemit_instruction = true;
1422                   }
1423                }
1424             }
1425          }
1426          else if (reg->Register.Dimension) {
1427             /* Only control point outputs are declared 2D in tgsi */
1428             if (emit->tcs.control_point_phase) {
1429                if (emit->reemit_instruction) {
1430                   /* Store results of reemitted instruction in temporary register. */
1431                   index2d = false;
1432                   file = TGSI_FILE_TEMPORARY;
1433                   index = emit->tcs.control_point_tmp_index +
1434                           (index - emit->tcs.control_point_out_index);
1435                   emit->reemit_instruction = false;
1436                }
1437                else {
1438                   /* The mapped control point outputs are 1-D */
1439                   index2d = false;
1440                   if (emit->info.reads_pervertex_outputs) {
1441                      /* If per-vertex outputs is been read in shader, we
1442                       * reemit instruction and store results in temporaries
1443                       * control point phase. */
1444                      emit->reemit_instruction = true;
1445                   }
1446                }
1447 
1448                if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1449                    emit->clip_dist_tmp_index != INVALID_INDEX) {
1450                   /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1451                    * We store the clip distance in a temporary first, then
1452                    * we'll copy it to the shadow copy and to CLIPDIST with the
1453                    * enabled planes mask in emit_clip_distance_instructions().
1454                    */
1455                   file = TGSI_FILE_TEMPORARY;
1456                   index = emit->clip_dist_tmp_index + sem_index;
1457                }
1458                else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1459                         emit->clip_vertex_tmp_index != INVALID_INDEX) {
1460                   /* replace the CLIPVERTEX output register with a temporary */
1461                   assert(emit->clip_mode == CLIP_VERTEX);
1462                   assert(sem_index == 0);
1463                   file = TGSI_FILE_TEMPORARY;
1464                   index = emit->clip_vertex_tmp_index;
1465                }
1466             }
1467             else {
1468                /* Discard writing into control point outputs in
1469                   patch constant phase */
1470                emit->discard_instruction =  true;
1471             }
1472          }
1473       }
1474    }
1475 
1476    /* init operand tokens to all zero */
1477    operand0.value = 0;
1478 
1479    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1480 
1481    /* the operand has a writemask */
1482    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1483 
1484    /* Which of the four dest components to write to. Note that we can use a
1485     * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1486     */
1487    STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
1488    operand0.mask = writemask;
1489 
1490    /* translate TGSI register file type to VGPU10 operand type */
1491    operand0.operandType = translate_register_file(file, tempArrayId > 0);
1492 
1493    check_register_index(emit, operand0.operandType, index);
1494 
1495    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1496                                       index2d, false);
1497 
1498    /* Emit tokens */
1499    emit_dword(emit, operand0.value);
1500    if (tempArrayId > 0) {
1501       emit_dword(emit, tempArrayId);
1502    }
1503 
1504    emit_dword(emit, remap_temp_index(emit, file, index));
1505 
1506    if (indirect) {
1507       emit_indirect_register(emit, reg->Indirect.Index);
1508    }
1509 }
1510 
1511 
1512 /**
1513  * Check if temporary register needs to be initialize when
1514  * shader is not using indirect addressing for temporary and uninitialized
1515  * temporary is not used in loop. In these two scenarios, we cannot
1516  * determine if temporary is initialized or not.
1517  */
1518 static bool
need_temp_reg_initialization(struct svga_shader_emitter_v10 * emit,unsigned index)1519 need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
1520                              unsigned index)
1521 {
1522    if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY))
1523        && emit->current_loop_depth == 0) {
1524       if (!emit->temp_map[index].initialized &&
1525           emit->temp_map[index].index < emit->num_shader_temps) {
1526          return true;
1527       }
1528    }
1529 
1530    return false;
1531 }
1532 
1533 
1534 /**
1535  * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1536  * In quite a few cases, we do register substitution.  For example, if
1537  * the TGSI register is the front/back-face register, we replace that with
1538  * a temp register containing a value we computed earlier.
1539  */
1540 static void
emit_src_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)1541 emit_src_register(struct svga_shader_emitter_v10 *emit,
1542                   const struct tgsi_full_src_register *reg)
1543 {
1544    enum tgsi_file_type file = reg->Register.File;
1545    unsigned index = reg->Register.Index;
1546    bool indirect = reg->Register.Indirect;
1547    unsigned tempArrayId = get_temp_array_id(emit, file, index);
1548    bool index2d = (reg->Register.Dimension ||
1549                             tempArrayId > 0 ||
1550                             file == TGSI_FILE_CONSTANT);
1551    unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1552    bool indirect2d = reg->Dimension.Indirect;
1553    unsigned swizzleX = reg->Register.SwizzleX;
1554    unsigned swizzleY = reg->Register.SwizzleY;
1555    unsigned swizzleZ = reg->Register.SwizzleZ;
1556    unsigned swizzleW = reg->Register.SwizzleW;
1557    const bool absolute = reg->Register.Absolute;
1558    const bool negate = reg->Register.Negate;
1559    VGPU10OperandToken0 operand0;
1560    VGPU10OperandToken1 operand1;
1561 
1562    operand0.value = operand1.value = 0;
1563 
1564    if (emit->unit == PIPE_SHADER_FRAGMENT){
1565       if (file == TGSI_FILE_INPUT) {
1566          if (index == emit->fs.face_input_index) {
1567             /* Replace INPUT[FACE] with TEMP[FACE] */
1568             file = TGSI_FILE_TEMPORARY;
1569             index = emit->fs.face_tmp_index;
1570          }
1571          else if (index == emit->fs.fragcoord_input_index) {
1572             /* Replace INPUT[POSITION] with TEMP[POSITION] */
1573             file = TGSI_FILE_TEMPORARY;
1574             index = emit->fs.fragcoord_tmp_index;
1575          }
1576          else if (index == emit->fs.layer_input_index) {
1577             /* Replace INPUT[LAYER] with zero.x */
1578             file = TGSI_FILE_IMMEDIATE;
1579             index = emit->fs.layer_imm_index;
1580             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1581          }
1582          else {
1583             /* We remap fragment shader inputs to that FS input indexes
1584              * match up with VS/GS output indexes.
1585              */
1586             index = emit->linkage.input_map[index];
1587          }
1588       }
1589       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1590          if (index == emit->fs.sample_pos_sys_index) {
1591             assert(emit->version >= 41);
1592             /* Current sample position is in a temp register */
1593             file = TGSI_FILE_TEMPORARY;
1594             index = emit->fs.sample_pos_tmp_index;
1595          }
1596          else if (index == emit->fs.sample_mask_in_sys_index) {
1597             /* Emitted as vCoverage0.x */
1598             /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1599              * elements where s is the maximum number of color samples supported
1600              * by the implementation.
1601              */
1602             operand0.value = 0;
1603             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
1604             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1605             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1606             operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1607             emit_dword(emit, operand0.value);
1608             return;
1609          }
1610          else {
1611             /* Map the TGSI system value to a VGPU10 input register */
1612             assert(index < ARRAY_SIZE(emit->system_value_indexes));
1613             file = TGSI_FILE_INPUT;
1614             index = emit->system_value_indexes[index];
1615          }
1616       }
1617    }
1618    else if (emit->unit == PIPE_SHADER_GEOMETRY) {
1619       if (file == TGSI_FILE_INPUT) {
1620          if (index == emit->gs.prim_id_index) {
1621             operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1622             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1623          }
1624          index = emit->linkage.input_map[index];
1625       }
1626       else if (file == TGSI_FILE_SYSTEM_VALUE &&
1627                index == emit->gs.invocation_id_sys_index) {
1628          /* Emitted as vGSInstanceID0.x */
1629          operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1630          operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
1631          index = 0;
1632       }
1633    }
1634    else if (emit->unit == PIPE_SHADER_VERTEX) {
1635       if (file == TGSI_FILE_INPUT) {
1636          /* if input is adjusted... */
1637          if ((emit->key.vs.adjust_attrib_w_1 |
1638               emit->key.vs.adjust_attrib_itof |
1639               emit->key.vs.adjust_attrib_utof |
1640               emit->key.vs.attrib_is_bgra |
1641               emit->key.vs.attrib_puint_to_snorm |
1642               emit->key.vs.attrib_puint_to_uscaled |
1643               emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1644             file = TGSI_FILE_TEMPORARY;
1645             index = emit->vs.adjusted_input[index];
1646          }
1647       }
1648       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1649          if (index == emit->vs.vertex_id_sys_index &&
1650              emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
1651             file = TGSI_FILE_TEMPORARY;
1652             index = emit->vs.vertex_id_tmp_index;
1653             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1654          }
1655          else {
1656             /* Map the TGSI system value to a VGPU10 input register */
1657             assert(index < ARRAY_SIZE(emit->system_value_indexes));
1658             file = TGSI_FILE_INPUT;
1659             index = emit->system_value_indexes[index];
1660          }
1661       }
1662    }
1663    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1664 
1665       if (file == TGSI_FILE_SYSTEM_VALUE) {
1666          if (index == emit->tcs.vertices_per_patch_index) {
1667             /**
1668              * if source register is the system value for vertices_per_patch,
1669              * replace it with the immediate.
1670              */
1671             file = TGSI_FILE_IMMEDIATE;
1672             index = emit->tcs.imm_index;
1673             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1674          }
1675          else if (index == emit->tcs.invocation_id_sys_index) {
1676             if (emit->tcs.control_point_phase) {
1677                /**
1678                 * Emitted as vOutputControlPointID.x
1679                 */
1680                operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1681                operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
1682                operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1683                operand0.mask = 0;
1684                emit_dword(emit, operand0.value);
1685                return;
1686             }
1687             else {
1688                /* There is no control point ID input declaration in
1689                 * the patch constant phase in hull shader.
1690                 * Since for now we are emitting all instructions in
1691                 * the patch constant phase, we are replacing the
1692                 * control point ID reference with the immediate 0.
1693                 */
1694                file = TGSI_FILE_IMMEDIATE;
1695                index = emit->tcs.imm_index;
1696                swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
1697             }
1698          }
1699          else if (index == emit->tcs.prim_id_index) {
1700             /**
1701              * Emitted as vPrim.x
1702              */
1703             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1704             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1705             index = 0;
1706          }
1707       }
1708       else if (file == TGSI_FILE_INPUT) {
1709          index = emit->linkage.input_map[index];
1710          if (!emit->tcs.control_point_phase) {
1711             /* Emitted as vicp */
1712             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1713             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1714             assert(reg->Register.Dimension);
1715          }
1716       }
1717       else if (file == TGSI_FILE_OUTPUT) {
1718          if ((index >= emit->tcs.patch_generic_out_index &&
1719              index < (emit->tcs.patch_generic_out_index +
1720                       emit->tcs.patch_generic_out_count)) ||
1721              index == emit->tcs.inner.tgsi_index ||
1722              index == emit->tcs.outer.tgsi_index) {
1723             if (emit->tcs.control_point_phase) {
1724                emit->discard_instruction = true;
1725             }
1726             else {
1727                /* Device doesn't allow reading from output so
1728                 * use corresponding temporary register as source */
1729                file = TGSI_FILE_TEMPORARY;
1730                if (index == emit->tcs.inner.tgsi_index) {
1731                   index = emit->tcs.inner.temp_index;
1732                }
1733                else if (index == emit->tcs.outer.tgsi_index) {
1734                   index = emit->tcs.outer.temp_index;
1735                }
1736                else {
1737                   index = emit->tcs.patch_generic_tmp_index +
1738                           (index - emit->tcs.patch_generic_out_index);
1739                }
1740 
1741                /**
1742                 * Temporaries for patch constant data can be done
1743                 * as indexable temporaries.
1744                 */
1745                tempArrayId = get_temp_array_id(emit, file, index);
1746                index2d = tempArrayId > 0;
1747                index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1748             }
1749          }
1750          else if (index2d) {
1751             if (emit->tcs.control_point_phase) {
1752                /* Device doesn't allow reading from output so
1753                 * use corresponding temporary register as source */
1754                file = TGSI_FILE_TEMPORARY;
1755                index2d = false;
1756                index = emit->tcs.control_point_tmp_index +
1757                        (index - emit->tcs.control_point_out_index);
1758             }
1759             else {
1760                emit->discard_instruction = true;
1761             }
1762          }
1763       }
1764    }
1765    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
1766       if (file == TGSI_FILE_SYSTEM_VALUE) {
1767          if (index == emit->tes.tesscoord_sys_index) {
1768             /**
1769              * Emitted as vDomain
1770              */
1771             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1772             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
1773             index = 0;
1774 
1775             /* Make sure swizzles are of those components allowed according
1776              * to the tessellator domain.
1777              */
1778             swizzleX = MIN2(swizzleX, emit->tes.swizzle_max);
1779             swizzleY = MIN2(swizzleY, emit->tes.swizzle_max);
1780             swizzleZ = MIN2(swizzleZ, emit->tes.swizzle_max);
1781             swizzleW = MIN2(swizzleW, emit->tes.swizzle_max);
1782          }
1783          else if (index == emit->tes.inner.tgsi_index) {
1784             file = TGSI_FILE_TEMPORARY;
1785             index = emit->tes.inner.temp_index;
1786          }
1787          else if (index == emit->tes.outer.tgsi_index) {
1788             file = TGSI_FILE_TEMPORARY;
1789             index = emit->tes.outer.temp_index;
1790          }
1791          else if (index == emit->tes.prim_id_index) {
1792             /**
1793              * Emitted as vPrim.x
1794              */
1795             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1796             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1797             index = 0;
1798          }
1799 
1800       }
1801       else if (file == TGSI_FILE_INPUT) {
1802          if (index2d) {
1803             /* 2D input is emitted as vcp (input control point). */
1804             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1805             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1806 
1807             /* index specifies the element index and is remapped
1808              * to align with the tcs output index.
1809              */
1810             index = emit->linkage.input_map[index];
1811 
1812             assert(index2 < emit->key.tes.vertices_per_patch);
1813          }
1814          else {
1815             if (index < emit->key.tes.tessfactor_index)
1816                /* index specifies the generic patch index.
1817                 * Remapped to match up with the tcs output index.
1818                 */
1819                index = emit->linkage.input_map[index];
1820 
1821             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
1822             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1823          }
1824       }
1825    }
1826    else if (emit->unit == PIPE_SHADER_COMPUTE) {
1827       if (file == TGSI_FILE_SYSTEM_VALUE) {
1828          if (index == emit->cs.thread_id_index) {
1829             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1830             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP;
1831             index = 0;
1832          } else if (index == emit->cs.block_id_index) {
1833             operand0.value = 0;
1834             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1835             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID;
1836             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1837             operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1838             operand0.swizzleX = swizzleX;
1839             operand0.swizzleY = swizzleY;
1840             operand0.swizzleZ = swizzleZ;
1841             operand0.swizzleW = swizzleW;
1842             emit_dword(emit, operand0.value);
1843             return;
1844          } else if (index == emit->cs.grid_size.tgsi_index) {
1845             file = TGSI_FILE_IMMEDIATE;
1846             index = emit->cs.grid_size.imm_index;
1847          }
1848       }
1849    }
1850 
1851    if (file == TGSI_FILE_ADDRESS) {
1852       index = emit->address_reg_index[index];
1853       file = TGSI_FILE_TEMPORARY;
1854    }
1855 
1856    if (file == TGSI_FILE_CONSTANT) {
1857       /**
1858        * If this constant buffer is to be bound as srv raw buffer,
1859        * then we have to load the constant to a temp first before
1860        * it can be used as a source in the instruction.
1861        * This is accomplished in two passes. The first pass is to
1862        * identify if there is any constbuf to rawbuf translation.
1863        * If there isn't, emit the instruction as usual.
1864        * If there is, then we save the constant buffer reference info,
1865        * and then instead of emitting the instruction at the end
1866        * of the instruction, it will trigger a second pass of parsing
1867        * this instruction. Before it starts the parsing, it will
1868        * load the referenced raw buffer elements to temporaries.
1869        * Then it will emit the instruction that replaces the
1870        * constant buffer replaces with the corresponding temporaries.
1871        */
1872       if (emit->raw_bufs & (1 << index2)) {
1873          if (emit->reemit_rawbuf_instruction != REEMIT_IN_PROGRESS) {
1874             unsigned tmpIdx = emit->raw_buf_cur_tmp_index;
1875 
1876             emit->raw_buf_tmp[tmpIdx].buffer_index = index2;
1877 
1878             /* Save whether the element index is indirect indexing */
1879             emit->raw_buf_tmp[tmpIdx].indirect = indirect;
1880 
1881             /* If it is indirect index, save the temporary
1882              * address index, otherwise, save the immediate index.
1883              */
1884             if (indirect) {
1885                emit->raw_buf_tmp[tmpIdx].element_index =
1886                   emit->address_reg_index[reg->Indirect.Index];
1887                emit->raw_buf_tmp[tmpIdx].element_rel =
1888                   reg->Register.Index;
1889             }
1890             else {
1891                emit->raw_buf_tmp[tmpIdx].element_index = index;
1892                emit->raw_buf_tmp[tmpIdx].element_rel = 0;
1893             }
1894 
1895             emit->raw_buf_cur_tmp_index++;
1896             emit->reemit_rawbuf_instruction = REEMIT_TRUE;
1897             emit->discard_instruction = true;
1898             emit->reemit_tgsi_instruction = true;
1899          }
1900          else {
1901             /* In the reemitting process, replace the constant buffer
1902              * reference with temporary.
1903              */
1904             file = TGSI_FILE_TEMPORARY;
1905             index = emit->raw_buf_cur_tmp_index + emit->raw_buf_tmp_index;
1906             index2d = false;
1907             indirect = false;
1908             emit->raw_buf_cur_tmp_index++;
1909          }
1910       }
1911    }
1912 
1913    if (file == TGSI_FILE_TEMPORARY) {
1914       if (need_temp_reg_initialization(emit, index)) {
1915          emit->initialize_temp_index = index;
1916          emit->discard_instruction = true;
1917       }
1918    }
1919 
1920    if (operand0.value == 0) {
1921       /* if operand0 was not set above for a special case, do the general
1922        * case now.
1923        */
1924       operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1925       operand0.operandType = translate_register_file(file, tempArrayId > 0);
1926    }
1927    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1928                                       index2d, indirect2d);
1929 
1930    if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1931        operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1932       /* there's no swizzle for in-line immediates */
1933       if (swizzleX == swizzleY &&
1934           swizzleX == swizzleZ &&
1935           swizzleX == swizzleW) {
1936          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1937       }
1938       else {
1939          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1940       }
1941 
1942       operand0.swizzleX = swizzleX;
1943       operand0.swizzleY = swizzleY;
1944       operand0.swizzleZ = swizzleZ;
1945       operand0.swizzleW = swizzleW;
1946 
1947       if (absolute || negate) {
1948          operand0.extended = 1;
1949          operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1950          if (absolute && !negate)
1951             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1952          if (!absolute && negate)
1953             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1954          if (absolute && negate)
1955             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1956       }
1957    }
1958 
1959    check_register_index(emit, operand0.operandType, index);
1960 
1961    /* Emit the operand tokens */
1962    emit_dword(emit, operand0.value);
1963    if (operand0.extended)
1964       emit_dword(emit, operand1.value);
1965 
1966    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1967       /* Emit the four float/int in-line immediate values */
1968       unsigned *c;
1969       assert(index < ARRAY_SIZE(emit->immediates));
1970       assert(file == TGSI_FILE_IMMEDIATE);
1971       assert(swizzleX < 4);
1972       assert(swizzleY < 4);
1973       assert(swizzleZ < 4);
1974       assert(swizzleW < 4);
1975       c = (unsigned *) emit->immediates[index];
1976       emit_dword(emit, c[swizzleX]);
1977       emit_dword(emit, c[swizzleY]);
1978       emit_dword(emit, c[swizzleZ]);
1979       emit_dword(emit, c[swizzleW]);
1980    }
1981    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1982       /* Emit the register index(es) */
1983       if (index2d) {
1984          emit_dword(emit, index2);
1985 
1986          if (indirect2d) {
1987             emit_indirect_register(emit, reg->DimIndirect.Index);
1988          }
1989       }
1990 
1991       emit_dword(emit, remap_temp_index(emit, file, index));
1992 
1993       if (indirect) {
1994          assert(operand0.operandType != VGPU10_OPERAND_TYPE_TEMP);
1995          emit_indirect_register(emit, reg->Indirect.Index);
1996       }
1997    }
1998 }
1999 
2000 
2001 /**
2002  * Emit a resource operand (for use with a SAMPLE instruction).
2003  */
2004 static void
emit_resource_register(struct svga_shader_emitter_v10 * emit,unsigned resource_number)2005 emit_resource_register(struct svga_shader_emitter_v10 *emit,
2006                        unsigned resource_number)
2007 {
2008    VGPU10OperandToken0 operand0;
2009 
2010    check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
2011 
2012    /* init */
2013    operand0.value = 0;
2014 
2015    operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
2016    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2017    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2018    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2019    operand0.swizzleX = VGPU10_COMPONENT_X;
2020    operand0.swizzleY = VGPU10_COMPONENT_Y;
2021    operand0.swizzleZ = VGPU10_COMPONENT_Z;
2022    operand0.swizzleW = VGPU10_COMPONENT_W;
2023 
2024    emit_dword(emit, operand0.value);
2025    emit_dword(emit, resource_number);
2026 }
2027 
2028 
2029 /**
2030  * Emit a sampler operand (for use with a SAMPLE instruction).
2031  */
2032 static void
emit_sampler_register(struct svga_shader_emitter_v10 * emit,unsigned unit)2033 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
2034                       unsigned unit)
2035 {
2036    VGPU10OperandToken0 operand0;
2037    unsigned sampler_number;
2038 
2039    sampler_number = emit->key.tex[unit].sampler_index;
2040 
2041    if ((emit->shadow_compare_units & (1 << unit)) && emit->use_sampler_state_mapping)
2042       sampler_number++;
2043 
2044    check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
2045 
2046    /* init */
2047    operand0.value = 0;
2048 
2049    operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
2050    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2051 
2052    emit_dword(emit, operand0.value);
2053    emit_dword(emit, sampler_number);
2054 }
2055 
2056 
2057 /**
2058  * Emit an operand which reads the IS_FRONT_FACING register.
2059  */
2060 static void
emit_face_register(struct svga_shader_emitter_v10 * emit)2061 emit_face_register(struct svga_shader_emitter_v10 *emit)
2062 {
2063    VGPU10OperandToken0 operand0;
2064    unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
2065 
2066    /* init */
2067    operand0.value = 0;
2068 
2069    operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
2070    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2071    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
2072    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2073 
2074    operand0.swizzleX = VGPU10_COMPONENT_X;
2075    operand0.swizzleY = VGPU10_COMPONENT_X;
2076    operand0.swizzleZ = VGPU10_COMPONENT_X;
2077    operand0.swizzleW = VGPU10_COMPONENT_X;
2078 
2079    emit_dword(emit, operand0.value);
2080    emit_dword(emit, index);
2081 }
2082 
2083 
2084 /**
2085  * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
2086  * instruction.
2087  */
2088 static void
emit_rasterizer_register(struct svga_shader_emitter_v10 * emit)2089 emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
2090 {
2091    VGPU10OperandToken0 operand0;
2092 
2093    /* init */
2094    operand0.value = 0;
2095 
2096    /* No register index for rasterizer index (there's only one) */
2097    operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
2098    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
2099    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2100    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2101    operand0.swizzleX = VGPU10_COMPONENT_X;
2102    operand0.swizzleY = VGPU10_COMPONENT_Y;
2103    operand0.swizzleZ = VGPU10_COMPONENT_Z;
2104    operand0.swizzleW = VGPU10_COMPONENT_W;
2105 
2106    emit_dword(emit, operand0.value);
2107 }
2108 
2109 
2110 /**
2111  * Emit tokens for the "stream" register used by the
2112  * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
2113  */
2114 static void
emit_stream_register(struct svga_shader_emitter_v10 * emit,unsigned index)2115 emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
2116 {
2117    VGPU10OperandToken0 operand0;
2118 
2119    /* init */
2120    operand0.value = 0;
2121 
2122    /* No register index for rasterizer index (there's only one) */
2123    operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
2124    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2125    operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
2126 
2127    emit_dword(emit, operand0.value);
2128    emit_dword(emit, index);
2129 }
2130 
2131 
2132 /**
2133  * Emit the token for a VGPU10 opcode, with precise parameter.
2134  * \param saturate   clamp result to [0,1]?
2135  */
2136 static void
emit_opcode_precise(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,bool saturate,bool precise)2137 emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
2138                     unsigned vgpu10_opcode, bool saturate, bool precise)
2139 {
2140    VGPU10OpcodeToken0 token0;
2141 
2142    token0.value = 0;  /* init all fields to zero */
2143    token0.opcodeType = vgpu10_opcode;
2144    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2145    token0.saturate = saturate;
2146 
2147    /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
2148     * 'invariant' declarations.  Only set preciseValues=1 if we have SM5.
2149     */
2150    token0.preciseValues = precise && emit->version >= 50;
2151 
2152    emit_dword(emit, token0.value);
2153 
2154    emit->uses_precise_qualifier |= token0.preciseValues;
2155 }
2156 
2157 
2158 /**
2159  * Emit the token for a VGPU10 opcode.
2160  * \param saturate   clamp result to [0,1]?
2161  */
2162 static void
emit_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,bool saturate)2163 emit_opcode(struct svga_shader_emitter_v10 *emit,
2164             unsigned vgpu10_opcode, bool saturate)
2165 {
2166    emit_opcode_precise(emit, vgpu10_opcode, saturate, false);
2167 }
2168 
2169 
2170 /**
2171  * Emit the token for a VGPU10 resinfo instruction.
2172  * \param modifier   return type modifier, _uint or _rcpFloat.
2173  *                   TODO: We may want to remove this parameter if it will
2174  *                   only ever be used as _uint.
2175  */
2176 static void
emit_opcode_resinfo(struct svga_shader_emitter_v10 * emit,VGPU10_RESINFO_RETURN_TYPE modifier)2177 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
2178                     VGPU10_RESINFO_RETURN_TYPE modifier)
2179 {
2180    VGPU10OpcodeToken0 token0;
2181 
2182    token0.value = 0;  /* init all fields to zero */
2183    token0.opcodeType = VGPU10_OPCODE_RESINFO;
2184    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2185    token0.resinfoReturnType = modifier;
2186 
2187    emit_dword(emit, token0.value);
2188 }
2189 
2190 
2191 /**
2192  * Emit opcode tokens for a texture sample instruction.  Texture instructions
2193  * can be rather complicated (texel offsets, etc) so we have this specialized
2194  * function.
2195  */
2196 static void
emit_sample_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,bool saturate,const int offsets[3])2197 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
2198                    unsigned vgpu10_opcode, bool saturate,
2199                    const int offsets[3])
2200 {
2201    VGPU10OpcodeToken0 token0;
2202    VGPU10OpcodeToken1 token1;
2203 
2204    token0.value = 0;  /* init all fields to zero */
2205    token0.opcodeType = vgpu10_opcode;
2206    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2207    token0.saturate = saturate;
2208 
2209    if (offsets[0] || offsets[1] || offsets[2]) {
2210       assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2211       assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2212       assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2213       assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2214       assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2215       assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2216 
2217       token0.extended = 1;
2218       token1.value = 0;
2219       token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
2220       token1.offsetU = offsets[0];
2221       token1.offsetV = offsets[1];
2222       token1.offsetW = offsets[2];
2223    }
2224 
2225    emit_dword(emit, token0.value);
2226    if (token0.extended) {
2227       emit_dword(emit, token1.value);
2228    }
2229 }
2230 
2231 
2232 /**
2233  * Emit a DISCARD opcode token.
2234  * If nonzero is set, we'll discard the fragment if the X component is not 0.
2235  * Otherwise, we'll discard the fragment if the X component is 0.
2236  */
2237 static void
emit_discard_opcode(struct svga_shader_emitter_v10 * emit,bool nonzero)2238 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, bool nonzero)
2239 {
2240    VGPU10OpcodeToken0 opcode0;
2241 
2242    opcode0.value = 0;
2243    opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
2244    if (nonzero)
2245       opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
2246 
2247    emit_dword(emit, opcode0.value);
2248 }
2249 
2250 
2251 /**
2252  * We need to call this before we begin emitting a VGPU10 instruction.
2253  */
2254 static void
begin_emit_instruction(struct svga_shader_emitter_v10 * emit)2255 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
2256 {
2257    assert(emit->inst_start_token == 0);
2258    /* Save location of the instruction's VGPU10OpcodeToken0 token.
2259     * Note, we can't save a pointer because it would become invalid if
2260     * we have to realloc the output buffer.
2261     */
2262    emit->inst_start_token = emit_get_num_tokens(emit);
2263 }
2264 
2265 
2266 /**
2267  * We need to call this after we emit the last token of a VGPU10 instruction.
2268  * This function patches in the opcode token's instructionLength field.
2269  */
2270 static void
end_emit_instruction(struct svga_shader_emitter_v10 * emit)2271 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
2272 {
2273    VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
2274    unsigned inst_length;
2275 
2276    assert(emit->inst_start_token > 0);
2277 
2278    if (emit->discard_instruction) {
2279       /* Back up the emit->ptr to where this instruction started so
2280        * that we discard the current instruction.
2281        */
2282       emit->ptr = (char *) (tokens + emit->inst_start_token);
2283    }
2284    else {
2285       /* Compute instruction length and patch that into the start of
2286        * the instruction.
2287        */
2288       inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
2289 
2290       assert(inst_length > 0);
2291 
2292       tokens[emit->inst_start_token].instructionLength = inst_length;
2293    }
2294 
2295    emit->inst_start_token = 0; /* reset to zero for error checking */
2296    emit->discard_instruction = false;
2297 }
2298 
2299 
2300 /**
2301  * Return index for a free temporary register.
2302  */
2303 static unsigned
get_temp_index(struct svga_shader_emitter_v10 * emit)2304 get_temp_index(struct svga_shader_emitter_v10 *emit)
2305 {
2306    assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
2307    return emit->num_shader_temps + emit->internal_temp_count++;
2308 }
2309 
2310 
2311 /**
2312  * Release the temporaries which were generated by get_temp_index().
2313  */
2314 static void
free_temp_indexes(struct svga_shader_emitter_v10 * emit)2315 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
2316 {
2317    emit->internal_temp_count = 0;
2318 }
2319 
2320 
2321 /**
2322  * Create a tgsi_full_src_register.
2323  */
2324 static struct tgsi_full_src_register
make_src_reg(enum tgsi_file_type file,unsigned index)2325 make_src_reg(enum tgsi_file_type file, unsigned index)
2326 {
2327    struct tgsi_full_src_register reg;
2328 
2329    memset(&reg, 0, sizeof(reg));
2330    reg.Register.File = file;
2331    reg.Register.Index = index;
2332    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2333    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2334    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2335    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2336    return reg;
2337 }
2338 
2339 
2340 /**
2341  * Create a tgsi_full_src_register with a swizzle such that all four
2342  * vector components have the same scalar value.
2343  */
2344 static struct tgsi_full_src_register
make_src_scalar_reg(enum tgsi_file_type file,unsigned index,unsigned component)2345 make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
2346 {
2347    struct tgsi_full_src_register reg;
2348 
2349    assert(component >= TGSI_SWIZZLE_X);
2350    assert(component <= TGSI_SWIZZLE_W);
2351 
2352    memset(&reg, 0, sizeof(reg));
2353    reg.Register.File = file;
2354    reg.Register.Index = index;
2355    reg.Register.SwizzleX =
2356    reg.Register.SwizzleY =
2357    reg.Register.SwizzleZ =
2358    reg.Register.SwizzleW = component;
2359    return reg;
2360 }
2361 
2362 
2363 /**
2364  * Create a tgsi_full_src_register for a temporary.
2365  */
2366 static struct tgsi_full_src_register
make_src_temp_reg(unsigned index)2367 make_src_temp_reg(unsigned index)
2368 {
2369    return make_src_reg(TGSI_FILE_TEMPORARY, index);
2370 }
2371 
2372 
2373 /**
2374  * Create a tgsi_full_src_register for a constant.
2375  */
2376 static struct tgsi_full_src_register
make_src_const_reg(unsigned index)2377 make_src_const_reg(unsigned index)
2378 {
2379    return make_src_reg(TGSI_FILE_CONSTANT, index);
2380 }
2381 
2382 
2383 /**
2384  * Create a tgsi_full_src_register for an immediate constant.
2385  */
2386 static struct tgsi_full_src_register
make_src_immediate_reg(unsigned index)2387 make_src_immediate_reg(unsigned index)
2388 {
2389    return make_src_reg(TGSI_FILE_IMMEDIATE, index);
2390 }
2391 
2392 
2393 /**
2394  * Create a tgsi_full_dst_register.
2395  */
2396 static struct tgsi_full_dst_register
make_dst_reg(enum tgsi_file_type file,unsigned index)2397 make_dst_reg(enum tgsi_file_type file, unsigned index)
2398 {
2399    struct tgsi_full_dst_register reg;
2400 
2401    memset(&reg, 0, sizeof(reg));
2402    reg.Register.File = file;
2403    reg.Register.Index = index;
2404    reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
2405    return reg;
2406 }
2407 
2408 
2409 /**
2410  * Create a tgsi_full_dst_register for a temporary.
2411  */
2412 static struct tgsi_full_dst_register
make_dst_temp_reg(unsigned index)2413 make_dst_temp_reg(unsigned index)
2414 {
2415    return make_dst_reg(TGSI_FILE_TEMPORARY, index);
2416 }
2417 
2418 
2419 /**
2420  * Create a tgsi_full_dst_register for an output.
2421  */
2422 static struct tgsi_full_dst_register
make_dst_output_reg(unsigned index)2423 make_dst_output_reg(unsigned index)
2424 {
2425    return make_dst_reg(TGSI_FILE_OUTPUT, index);
2426 }
2427 
2428 
2429 /**
2430  * Create negated tgsi_full_src_register.
2431  */
2432 static struct tgsi_full_src_register
negate_src(const struct tgsi_full_src_register * reg)2433 negate_src(const struct tgsi_full_src_register *reg)
2434 {
2435    struct tgsi_full_src_register neg = *reg;
2436    neg.Register.Negate = !reg->Register.Negate;
2437    return neg;
2438 }
2439 
2440 /**
2441  * Create absolute value of a tgsi_full_src_register.
2442  */
2443 static struct tgsi_full_src_register
absolute_src(const struct tgsi_full_src_register * reg)2444 absolute_src(const struct tgsi_full_src_register *reg)
2445 {
2446    struct tgsi_full_src_register absolute = *reg;
2447    absolute.Register.Absolute = 1;
2448    return absolute;
2449 }
2450 
2451 
2452 /** Return the named swizzle term from the src register */
2453 static inline unsigned
get_swizzle(const struct tgsi_full_src_register * reg,enum tgsi_swizzle term)2454 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
2455 {
2456    switch (term) {
2457    case TGSI_SWIZZLE_X:
2458       return reg->Register.SwizzleX;
2459    case TGSI_SWIZZLE_Y:
2460       return reg->Register.SwizzleY;
2461    case TGSI_SWIZZLE_Z:
2462       return reg->Register.SwizzleZ;
2463    case TGSI_SWIZZLE_W:
2464       return reg->Register.SwizzleW;
2465    default:
2466       assert(!"Bad swizzle");
2467       return TGSI_SWIZZLE_X;
2468    }
2469 }
2470 
2471 
2472 /**
2473  * Create swizzled tgsi_full_src_register.
2474  */
2475 static struct tgsi_full_src_register
swizzle_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzleX,enum tgsi_swizzle swizzleY,enum tgsi_swizzle swizzleZ,enum tgsi_swizzle swizzleW)2476 swizzle_src(const struct tgsi_full_src_register *reg,
2477             enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
2478             enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
2479 {
2480    struct tgsi_full_src_register swizzled = *reg;
2481    /* Note: we swizzle the current swizzle */
2482    swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
2483    swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
2484    swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
2485    swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
2486    return swizzled;
2487 }
2488 
2489 
2490 /**
2491  * Create swizzled tgsi_full_src_register where all the swizzle
2492  * terms are the same.
2493  */
2494 static struct tgsi_full_src_register
scalar_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzle)2495 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
2496 {
2497    struct tgsi_full_src_register swizzled = *reg;
2498    /* Note: we swizzle the current swizzle */
2499    swizzled.Register.SwizzleX =
2500    swizzled.Register.SwizzleY =
2501    swizzled.Register.SwizzleZ =
2502    swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
2503    return swizzled;
2504 }
2505 
2506 
2507 /**
2508  * Create new tgsi_full_dst_register with writemask.
2509  * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
2510  */
2511 static struct tgsi_full_dst_register
writemask_dst(const struct tgsi_full_dst_register * reg,unsigned mask)2512 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
2513 {
2514    struct tgsi_full_dst_register masked = *reg;
2515    masked.Register.WriteMask = mask;
2516    return masked;
2517 }
2518 
2519 
2520 /**
2521  * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2522  */
2523 static bool
same_swizzle_terms(const struct tgsi_full_src_register * reg)2524 same_swizzle_terms(const struct tgsi_full_src_register *reg)
2525 {
2526    return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
2527            reg->Register.SwizzleY == reg->Register.SwizzleZ &&
2528            reg->Register.SwizzleZ == reg->Register.SwizzleW);
2529 }
2530 
2531 
2532 /**
2533  * Search the vector for the value 'x' and return its position.
2534  */
2535 static int
find_imm_in_vec4(const union tgsi_immediate_data vec[4],union tgsi_immediate_data x)2536 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
2537                  union tgsi_immediate_data x)
2538 {
2539    unsigned i;
2540    for (i = 0; i < 4; i++) {
2541       if (vec[i].Int == x.Int)
2542          return i;
2543    }
2544    return -1;
2545 }
2546 
2547 
2548 /**
2549  * Helper used by make_immediate_reg(), make_immediate_reg_4().
2550  */
2551 static int
find_immediate(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data x,unsigned startIndex)2552 find_immediate(struct svga_shader_emitter_v10 *emit,
2553                union tgsi_immediate_data x, unsigned startIndex)
2554 {
2555    const unsigned endIndex = emit->num_immediates;
2556    unsigned i;
2557 
2558    assert(emit->num_immediates_emitted > 0);
2559 
2560    /* Search immediates for x, y, z, w */
2561    for (i = startIndex; i < endIndex; i++) {
2562       if (x.Int == emit->immediates[i][0].Int ||
2563           x.Int == emit->immediates[i][1].Int ||
2564           x.Int == emit->immediates[i][2].Int ||
2565           x.Int == emit->immediates[i][3].Int) {
2566          return i;
2567       }
2568    }
2569    /* immediate not declared yet */
2570    return -1;
2571 }
2572 
2573 
2574 /**
2575  * As above, but search for a double[2] pair.
2576  */
2577 static int
find_immediate_dbl(struct svga_shader_emitter_v10 * emit,double x,double y)2578 find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
2579                    double x, double y)
2580 {
2581    const unsigned endIndex = emit->num_immediates;
2582    unsigned i;
2583 
2584    assert(emit->num_immediates_emitted > 0);
2585 
2586    /* Search immediates for x, y, z, w */
2587    for (i = 0; i < endIndex; i++) {
2588       if (x == emit->immediates_dbl[i][0] &&
2589           y == emit->immediates_dbl[i][1]) {
2590          return i;
2591       }
2592    }
2593    /* Should never try to use an immediate value that wasn't pre-declared */
2594    assert(!"find_immediate_dbl() failed!");
2595    return -1;
2596 }
2597 
2598 
2599 
2600 /**
2601  * Return a tgsi_full_src_register for an immediate/literal
2602  * union tgsi_immediate_data[4] value.
2603  * Note: the values must have been previously declared/allocated in
2604  * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
2605  * vec4 immediate.
2606  */
2607 static struct tgsi_full_src_register
make_immediate_reg_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2608 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
2609                      const union tgsi_immediate_data imm[4])
2610 {
2611    struct tgsi_full_src_register reg;
2612    unsigned i;
2613 
2614    for (i = 0; i < emit->num_common_immediates; i++) {
2615       /* search for first component value */
2616       int immpos = find_immediate(emit, imm[0], i);
2617       int x, y, z, w;
2618 
2619       assert(immpos >= 0);
2620 
2621       /* find remaining components within the immediate vector */
2622       x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
2623       y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
2624       z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
2625       w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
2626 
2627       if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
2628          /* found them all */
2629          memset(&reg, 0, sizeof(reg));
2630          reg.Register.File = TGSI_FILE_IMMEDIATE;
2631          reg.Register.Index = immpos;
2632          reg.Register.SwizzleX = x;
2633          reg.Register.SwizzleY = y;
2634          reg.Register.SwizzleZ = z;
2635          reg.Register.SwizzleW = w;
2636          return reg;
2637       }
2638       /* else, keep searching */
2639    }
2640 
2641    assert(!"Failed to find immediate register!");
2642 
2643    /* Just return IMM[0].xxxx */
2644    memset(&reg, 0, sizeof(reg));
2645    reg.Register.File = TGSI_FILE_IMMEDIATE;
2646    return reg;
2647 }
2648 
2649 
2650 /**
2651  * Return a tgsi_full_src_register for an immediate/literal
2652  * union tgsi_immediate_data value of the form {value, value, value, value}.
2653  * \sa make_immediate_reg_4() regarding allowed values.
2654  */
2655 static struct tgsi_full_src_register
make_immediate_reg(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data value)2656 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
2657                    union tgsi_immediate_data value)
2658 {
2659    struct tgsi_full_src_register reg;
2660    int immpos = find_immediate(emit, value, 0);
2661 
2662    assert(immpos >= 0);
2663 
2664    memset(&reg, 0, sizeof(reg));
2665    reg.Register.File = TGSI_FILE_IMMEDIATE;
2666    reg.Register.Index = immpos;
2667    reg.Register.SwizzleX =
2668    reg.Register.SwizzleY =
2669    reg.Register.SwizzleZ =
2670    reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
2671 
2672    return reg;
2673 }
2674 
2675 
2676 /**
2677  * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2678  * \sa make_immediate_reg_4() regarding allowed values.
2679  */
2680 static struct tgsi_full_src_register
make_immediate_reg_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2681 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
2682                           float x, float y, float z, float w)
2683 {
2684    union tgsi_immediate_data imm[4];
2685    imm[0].Float = x;
2686    imm[1].Float = y;
2687    imm[2].Float = z;
2688    imm[3].Float = w;
2689    return make_immediate_reg_4(emit, imm);
2690 }
2691 
2692 
2693 /**
2694  * Return a tgsi_full_src_register for an immediate/literal float value
2695  * of the form {value, value, value, value}.
2696  * \sa make_immediate_reg_4() regarding allowed values.
2697  */
2698 static struct tgsi_full_src_register
make_immediate_reg_float(struct svga_shader_emitter_v10 * emit,float value)2699 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
2700 {
2701    union tgsi_immediate_data imm;
2702    imm.Float = value;
2703    return make_immediate_reg(emit, imm);
2704 }
2705 
2706 
2707 /**
2708  * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2709  */
2710 static struct tgsi_full_src_register
make_immediate_reg_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2711 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
2712                         int x, int y, int z, int w)
2713 {
2714    union tgsi_immediate_data imm[4];
2715    imm[0].Int = x;
2716    imm[1].Int = y;
2717    imm[2].Int = z;
2718    imm[3].Int = w;
2719    return make_immediate_reg_4(emit, imm);
2720 }
2721 
2722 
2723 /**
2724  * Return a tgsi_full_src_register for an immediate/literal int value
2725  * of the form {value, value, value, value}.
2726  * \sa make_immediate_reg_4() regarding allowed values.
2727  */
2728 static struct tgsi_full_src_register
make_immediate_reg_int(struct svga_shader_emitter_v10 * emit,int value)2729 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
2730 {
2731    union tgsi_immediate_data imm;
2732    imm.Int = value;
2733    return make_immediate_reg(emit, imm);
2734 }
2735 
2736 
2737 static struct tgsi_full_src_register
make_immediate_reg_double(struct svga_shader_emitter_v10 * emit,double value)2738 make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
2739 {
2740    struct tgsi_full_src_register reg;
2741    int immpos = find_immediate_dbl(emit, value, value);
2742 
2743    assert(immpos >= 0);
2744 
2745    memset(&reg, 0, sizeof(reg));
2746    reg.Register.File = TGSI_FILE_IMMEDIATE;
2747    reg.Register.Index = immpos;
2748    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2749    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2750    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2751    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2752 
2753    return reg;
2754 }
2755 
2756 
2757 /**
2758  * Allocate space for a union tgsi_immediate_data[4] immediate.
2759  * \return  the index/position of the immediate.
2760  */
2761 static unsigned
alloc_immediate_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2762 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
2763                   const union tgsi_immediate_data imm[4])
2764 {
2765    unsigned n = emit->num_immediates++;
2766    assert(n < ARRAY_SIZE(emit->immediates));
2767    emit->immediates[n][0] = imm[0];
2768    emit->immediates[n][1] = imm[1];
2769    emit->immediates[n][2] = imm[2];
2770    emit->immediates[n][3] = imm[3];
2771    return n;
2772 }
2773 
2774 
2775 /**
2776  * Allocate space for a float[4] immediate.
2777  * \return  the index/position of the immediate.
2778  */
2779 static unsigned
alloc_immediate_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2780 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
2781                        float x, float y, float z, float w)
2782 {
2783    union tgsi_immediate_data imm[4];
2784    imm[0].Float = x;
2785    imm[1].Float = y;
2786    imm[2].Float = z;
2787    imm[3].Float = w;
2788    return alloc_immediate_4(emit, imm);
2789 }
2790 
2791 
2792 /**
2793  * Allocate space for an int[4] immediate.
2794  * \return  the index/position of the immediate.
2795  */
2796 static unsigned
alloc_immediate_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2797 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
2798                        int x, int y, int z, int w)
2799 {
2800    union tgsi_immediate_data imm[4];
2801    imm[0].Int = x;
2802    imm[1].Int = y;
2803    imm[2].Int = z;
2804    imm[3].Int = w;
2805    return alloc_immediate_4(emit, imm);
2806 }
2807 
2808 
2809 /**
2810  * Add a new immediate after the immediate block has been declared.
2811  * Any new immediates will be appended to the immediate block after the
2812  * shader has been parsed.
2813  * \return  the index/position of the immediate.
2814  */
2815 static unsigned
add_immediate_int(struct svga_shader_emitter_v10 * emit,int x)2816 add_immediate_int(struct svga_shader_emitter_v10 *emit, int x)
2817 {
2818    union tgsi_immediate_data imm[4];
2819    imm[0].Int = x;
2820    imm[1].Int = x+1;
2821    imm[2].Int = x+2;
2822    imm[3].Int = x+3;
2823 
2824    unsigned immpos = alloc_immediate_4(emit, imm);
2825    emit->num_new_immediates++;
2826 
2827    return immpos;
2828 }
2829 
2830 
2831 static unsigned
alloc_immediate_double2(struct svga_shader_emitter_v10 * emit,double x,double y)2832 alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
2833                         double x, double y)
2834 {
2835    unsigned n = emit->num_immediates++;
2836    assert(!emit->num_immediates_emitted);
2837    assert(n < ARRAY_SIZE(emit->immediates));
2838    emit->immediates_dbl[n][0] = x;
2839    emit->immediates_dbl[n][1] = y;
2840    return n;
2841 
2842 }
2843 
2844 
2845 /**
2846  * Allocate a shader input to store a system value.
2847  */
2848 static unsigned
alloc_system_value_index(struct svga_shader_emitter_v10 * emit,unsigned index)2849 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
2850 {
2851    const unsigned n = emit->linkage.input_map_max + 1 + index;
2852    assert(index < ARRAY_SIZE(emit->system_value_indexes));
2853    emit->system_value_indexes[index] = n;
2854    return n;
2855 }
2856 
2857 
2858 /**
2859  * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2860  */
2861 static bool
emit_vgpu10_immediate(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_immediate * imm)2862 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
2863                       const struct tgsi_full_immediate *imm)
2864 {
2865    /* We don't actually emit any code here.  We just save the
2866     * immediate values and emit them later.
2867     */
2868    alloc_immediate_4(emit, imm->u);
2869    return true;
2870 }
2871 
2872 
2873 /**
2874  * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2875  * containing all the immediate values previously allocated
2876  * with alloc_immediate_4().
2877  */
2878 static bool
emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 * emit)2879 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
2880 {
2881    VGPU10OpcodeToken0 token;
2882 
2883    assert(!emit->num_immediates_emitted);
2884 
2885    token.value = 0;
2886    token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
2887    token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
2888 
2889    emit->immediates_block_start_token =
2890       (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
2891 
2892    /* Note: no begin/end_emit_instruction() calls */
2893    emit_dword(emit, token.value);
2894    emit_dword(emit, 2 + 4 * emit->num_immediates);
2895    emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
2896 
2897    emit->num_immediates_emitted = emit->num_immediates;
2898 
2899    emit->immediates_block_next_token =
2900       (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
2901 
2902    return true;
2903 }
2904 
2905 
2906 /**
2907  * Reemit the immediate constant buffer block to include the new
2908  * immediates that are allocated after the block is declared. Those
2909  * immediates are used as constant indices to constant buffers.
2910  */
2911 static bool
reemit_immediates_block(struct svga_shader_emitter_v10 * emit)2912 reemit_immediates_block(struct svga_shader_emitter_v10 *emit)
2913 {
2914    unsigned num_tokens = emit_get_num_tokens(emit);
2915    unsigned num_new_immediates = emit->num_new_immediates;
2916 
2917    /* Reserve room for the new immediates */
2918    if (!reserve(emit, 4 * num_new_immediates))
2919       return false;
2920 
2921    /* Move the tokens after the immediates block to make room for the
2922     * new immediates.
2923     */
2924    VGPU10ProgramToken *tokens = (VGPU10ProgramToken *)emit->buf;
2925    char *next = (char *) (tokens + emit->immediates_block_next_token);
2926    char *new_next = (char *) (tokens + emit->immediates_block_next_token +
2927                                  num_new_immediates * 4);
2928 
2929    char *end = emit->ptr;
2930    unsigned len = end - next;
2931    memmove(new_next, next, len);
2932 
2933    /* Append the new immediates to the end of the immediates block */
2934    char *start = (char *) (tokens + emit->immediates_block_start_token+1);
2935    unsigned immediates_block_size = *(uint32 *)start;
2936 
2937    char *new_immediates = (char *)&emit->immediates[emit->num_immediates_emitted][0];
2938    *(uint32 *)start = immediates_block_size + 4 * num_new_immediates;
2939    memcpy(next, new_immediates, 4 * num_new_immediates * sizeof(uint32));
2940 
2941    emit->ptr = (char *) (tokens + num_tokens + 4 * num_new_immediates);
2942 
2943    return true;
2944 }
2945 
2946 
2947 
2948 /**
2949  * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2950  * interpolation mode.
2951  * \return a VGPU10_INTERPOLATION_x value
2952  */
2953 static unsigned
translate_interpolation(const struct svga_shader_emitter_v10 * emit,enum tgsi_interpolate_mode interp,enum tgsi_interpolate_loc interpolate_loc)2954 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
2955                         enum tgsi_interpolate_mode interp,
2956                         enum tgsi_interpolate_loc interpolate_loc)
2957 {
2958    if (interp == TGSI_INTERPOLATE_COLOR) {
2959       interp = emit->key.fs.flatshade ?
2960          TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2961    }
2962 
2963    switch (interp) {
2964    case TGSI_INTERPOLATE_CONSTANT:
2965       return VGPU10_INTERPOLATION_CONSTANT;
2966    case TGSI_INTERPOLATE_LINEAR:
2967       if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2968          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
2969       } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2970                  emit->version >= 41) {
2971          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
2972       } else {
2973          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
2974       }
2975       break;
2976    case TGSI_INTERPOLATE_PERSPECTIVE:
2977       if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2978          return VGPU10_INTERPOLATION_LINEAR_CENTROID;
2979       } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2980                  emit->version >= 41) {
2981          return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
2982       } else {
2983          return VGPU10_INTERPOLATION_LINEAR;
2984       }
2985       break;
2986    default:
2987       assert(!"Unexpected interpolation mode");
2988       return VGPU10_INTERPOLATION_CONSTANT;
2989    }
2990 }
2991 
2992 
2993 /**
2994  * Translate a TGSI property to VGPU10.
2995  * Don't emit any instructions yet, only need to gather the primitive property
2996  * information.  The output primitive topology might be changed later. The
2997  * final property instructions will be emitted as part of the pre-helper code.
2998  */
2999 static bool
emit_vgpu10_property(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_property * prop)3000 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
3001                      const struct tgsi_full_property *prop)
3002 {
3003    static const VGPU10_PRIMITIVE primType[] = {
3004       VGPU10_PRIMITIVE_POINT,           /* MESA_PRIM_POINTS */
3005       VGPU10_PRIMITIVE_LINE,            /* MESA_PRIM_LINES */
3006       VGPU10_PRIMITIVE_LINE,            /* MESA_PRIM_LINE_LOOP */
3007       VGPU10_PRIMITIVE_LINE,            /* MESA_PRIM_LINE_STRIP */
3008       VGPU10_PRIMITIVE_TRIANGLE,        /* MESA_PRIM_TRIANGLES */
3009       VGPU10_PRIMITIVE_TRIANGLE,        /* MESA_PRIM_TRIANGLE_STRIP */
3010       VGPU10_PRIMITIVE_TRIANGLE,        /* MESA_PRIM_TRIANGLE_FAN */
3011       VGPU10_PRIMITIVE_UNDEFINED,       /* MESA_PRIM_QUADS */
3012       VGPU10_PRIMITIVE_UNDEFINED,       /* MESA_PRIM_QUAD_STRIP */
3013       VGPU10_PRIMITIVE_UNDEFINED,       /* MESA_PRIM_POLYGON */
3014       VGPU10_PRIMITIVE_LINE_ADJ,        /* MESA_PRIM_LINES_ADJACENCY */
3015       VGPU10_PRIMITIVE_LINE_ADJ,        /* MESA_PRIM_LINE_STRIP_ADJACENCY */
3016       VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* MESA_PRIM_TRIANGLES_ADJACENCY */
3017       VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* MESA_PRIM_TRIANGLE_STRIP_ADJACENCY */
3018    };
3019 
3020    static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
3021       VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* MESA_PRIM_POINTS */
3022       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* MESA_PRIM_LINES */
3023       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* MESA_PRIM_LINE_LOOP */
3024       VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* MESA_PRIM_LINE_STRIP */
3025       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* MESA_PRIM_TRIANGLES */
3026       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* MESA_PRIM_TRIANGLE_STRIP */
3027       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* MESA_PRIM_TRIANGLE_FAN */
3028       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* MESA_PRIM_QUADS */
3029       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* MESA_PRIM_QUAD_STRIP */
3030       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* MESA_PRIM_POLYGON */
3031       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* MESA_PRIM_LINES_ADJACENCY */
3032       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* MESA_PRIM_LINE_STRIP_ADJACENCY */
3033       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* MESA_PRIM_TRIANGLES_ADJACENCY */
3034       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* MESA_PRIM_TRIANGLE_STRIP_ADJACENCY */
3035    };
3036 
3037    static const unsigned inputArraySize[] = {
3038       0,       /* VGPU10_PRIMITIVE_UNDEFINED */
3039       1,       /* VGPU10_PRIMITIVE_POINT */
3040       2,       /* VGPU10_PRIMITIVE_LINE */
3041       3,       /* VGPU10_PRIMITIVE_TRIANGLE */
3042       0,
3043       0,
3044       4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
3045       6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
3046    };
3047 
3048    switch (prop->Property.PropertyName) {
3049    case TGSI_PROPERTY_GS_INPUT_PRIM:
3050       assert(prop->u[0].Data < ARRAY_SIZE(primType));
3051       emit->gs.prim_type = primType[prop->u[0].Data];
3052       assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
3053       emit->gs.input_size = inputArraySize[emit->gs.prim_type];
3054       break;
3055 
3056    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
3057       assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
3058       emit->gs.prim_topology = primTopology[prop->u[0].Data];
3059       assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
3060       break;
3061 
3062    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
3063       emit->gs.max_out_vertices = prop->u[0].Data;
3064       break;
3065 
3066    case TGSI_PROPERTY_GS_INVOCATIONS:
3067       emit->gs.invocations = prop->u[0].Data;
3068       break;
3069 
3070    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
3071    case TGSI_PROPERTY_NEXT_SHADER:
3072    case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
3073       /* no-op */
3074       break;
3075 
3076    case TGSI_PROPERTY_TCS_VERTICES_OUT:
3077       /* This info is already captured in the shader key */
3078       break;
3079 
3080    case TGSI_PROPERTY_TES_PRIM_MODE:
3081       emit->tes.prim_mode = prop->u[0].Data;
3082       break;
3083 
3084    case TGSI_PROPERTY_TES_SPACING:
3085       emit->tes.spacing = prop->u[0].Data;
3086       break;
3087 
3088    case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
3089       emit->tes.vertices_order_cw = prop->u[0].Data;
3090       break;
3091 
3092    case TGSI_PROPERTY_TES_POINT_MODE:
3093       emit->tes.point_mode = prop->u[0].Data;
3094       break;
3095 
3096    case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
3097       emit->cs.block_width = prop->u[0].Data;
3098       break;
3099 
3100    case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
3101       emit->cs.block_height = prop->u[0].Data;
3102       break;
3103 
3104    case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
3105       emit->cs.block_depth = prop->u[0].Data;
3106       break;
3107 
3108    case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
3109       emit->fs.forceEarlyDepthStencil = true;
3110       break;
3111 
3112    default:
3113       debug_printf("Unexpected TGSI property %s\n",
3114                    tgsi_property_names[prop->Property.PropertyName]);
3115    }
3116 
3117    return true;
3118 }
3119 
3120 
3121 static void
emit_property_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,unsigned nData,unsigned data)3122 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
3123                           VGPU10OpcodeToken0 opcode0, unsigned nData,
3124                           unsigned data)
3125 {
3126    begin_emit_instruction(emit);
3127    emit_dword(emit, opcode0.value);
3128    if (nData)
3129       emit_dword(emit, data);
3130    end_emit_instruction(emit);
3131 }
3132 
3133 
3134 /**
3135  * Emit property instructions
3136  */
3137 static void
emit_property_instructions(struct svga_shader_emitter_v10 * emit)3138 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
3139 {
3140    VGPU10OpcodeToken0 opcode0;
3141 
3142    assert(emit->unit == PIPE_SHADER_GEOMETRY);
3143 
3144    /* emit input primitive type declaration */
3145    opcode0.value = 0;
3146    opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
3147    opcode0.primitive = emit->gs.prim_type;
3148    emit_property_instruction(emit, opcode0, 0, 0);
3149 
3150    /* emit max output vertices */
3151    opcode0.value = 0;
3152    opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
3153    emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
3154 
3155    if (emit->version >= 50 && emit->gs.invocations > 0) {
3156       opcode0.value = 0;
3157       opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
3158       emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
3159    }
3160 }
3161 
3162 
3163 /**
3164  * A helper function to declare tessellator domain in a hull shader or
3165  * in the domain shader.
3166  */
3167 static void
emit_tessellator_domain(struct svga_shader_emitter_v10 * emit,enum mesa_prim prim_mode)3168 emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
3169                         enum mesa_prim prim_mode)
3170 {
3171    VGPU10OpcodeToken0 opcode0;
3172 
3173    opcode0.value = 0;
3174    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
3175    switch (prim_mode) {
3176    case MESA_PRIM_QUADS:
3177    case MESA_PRIM_LINES:
3178       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
3179       break;
3180    case MESA_PRIM_TRIANGLES:
3181       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
3182       break;
3183    default:
3184       debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
3185       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
3186    }
3187    begin_emit_instruction(emit);
3188    emit_dword(emit, opcode0.value);
3189    end_emit_instruction(emit);
3190 }
3191 
3192 
3193 /**
3194  * Emit domain shader declarations.
3195  */
3196 static void
emit_domain_shader_declarations(struct svga_shader_emitter_v10 * emit)3197 emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
3198 {
3199    VGPU10OpcodeToken0 opcode0;
3200 
3201    assert(emit->unit == PIPE_SHADER_TESS_EVAL);
3202 
3203    /* Emit the input control point count */
3204    assert(emit->key.tes.vertices_per_patch >= 0 &&
3205           emit->key.tes.vertices_per_patch <= 32);
3206 
3207    opcode0.value = 0;
3208    opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3209    opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
3210    begin_emit_instruction(emit);
3211    emit_dword(emit, opcode0.value);
3212    end_emit_instruction(emit);
3213 
3214    emit_tessellator_domain(emit, emit->tes.prim_mode);
3215 
3216    /* Specify a max for swizzles of the domain point according to the
3217     * tessellator domain type.
3218     */
3219    emit->tes.swizzle_max = emit->tes.prim_mode == MESA_PRIM_TRIANGLES ?
3220                               TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y;
3221 }
3222 
3223 
3224 /**
3225  * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
3226  * to implement some instructions.  We pre-allocate those values here
3227  * in the immediate constant buffer.
3228  */
3229 static void
alloc_common_immediates(struct svga_shader_emitter_v10 * emit)3230 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
3231 {
3232    unsigned n = 0;
3233 
3234    emit->common_immediate_pos[n++] =
3235       alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
3236 
3237    if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
3238       emit->common_immediate_pos[n++] =
3239          alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
3240    }
3241 
3242    emit->common_immediate_pos[n++] =
3243       alloc_immediate_int4(emit, 0, 1, 2, -1);
3244 
3245    emit->common_immediate_pos[n++] =
3246       alloc_immediate_int4(emit, 3, 4, 5, 6);
3247 
3248    if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
3249        emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
3250       emit->common_immediate_pos[n++] =
3251          alloc_immediate_int4(emit, 31, 0, 0, 0);
3252    }
3253 
3254    if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
3255        emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
3256        emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
3257       emit->common_immediate_pos[n++] =
3258          alloc_immediate_int4(emit, 32, 0, 0, 0);
3259    }
3260 
3261    if (emit->key.vs.attrib_puint_to_snorm) {
3262       emit->common_immediate_pos[n++] =
3263          alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
3264    }
3265 
3266    if (emit->key.vs.attrib_puint_to_uscaled) {
3267       emit->common_immediate_pos[n++] =
3268          alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
3269    }
3270 
3271    if (emit->key.vs.attrib_puint_to_sscaled) {
3272       emit->common_immediate_pos[n++] =
3273          alloc_immediate_int4(emit, 22, 12, 2, 0);
3274 
3275       emit->common_immediate_pos[n++] =
3276          alloc_immediate_int4(emit, 22, 30, 0, 0);
3277    }
3278 
3279    if (emit->vposition.num_prescale > 1) {
3280       unsigned i;
3281       for (i = 0; i < emit->vposition.num_prescale; i+=4) {
3282          emit->common_immediate_pos[n++] =
3283             alloc_immediate_int4(emit, i, i+1, i+2, i+3);
3284       }
3285    }
3286 
3287    emit->immediates_dbl = (double (*)[2]) emit->immediates;
3288 
3289    if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
3290       emit->common_immediate_pos[n++] =
3291          alloc_immediate_double2(emit, -1.0, -1.0);
3292    }
3293 
3294    if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0 ||
3295        emit->info.opcode_count[TGSI_OPCODE_DTRUNC] > 0) {
3296       emit->common_immediate_pos[n++] =
3297          alloc_immediate_double2(emit, 0.0, 0.0);
3298       emit->common_immediate_pos[n++] =
3299          alloc_immediate_double2(emit, 1.0, 1.0);
3300    }
3301 
3302    if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
3303       emit->common_immediate_pos[n++] =
3304          alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
3305    }
3306 
3307    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3308 
3309    unsigned i;
3310 
3311    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
3312       if (emit->key.tex[i].texel_bias) {
3313          /* Replace 0.0f if more immediate float value is needed */
3314          emit->common_immediate_pos[n++] =
3315             alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
3316          break;
3317       }
3318    }
3319 
3320    /** TODO: allocate immediates for all possible element byte offset?
3321     */
3322    if (emit->raw_bufs) {
3323       unsigned i;
3324       for (i = 7; i < 12; i+=4) {
3325          emit->common_immediate_pos[n++] =
3326             alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
3327       }
3328    }
3329 
3330    if (emit->info.indirect_files &
3331        (1 << TGSI_FILE_IMAGE | 1 << TGSI_FILE_BUFFER)) {
3332       unsigned i;
3333       for (i = 7; i < 8; i+=4) {
3334          emit->common_immediate_pos[n++] =
3335             alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
3336       }
3337    }
3338 
3339    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3340    emit->num_common_immediates = n;
3341 }
3342 
3343 
3344 /**
3345  * Emit hull shader declarations.
3346 */
3347 static void
emit_hull_shader_declarations(struct svga_shader_emitter_v10 * emit)3348 emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
3349 {
3350    VGPU10OpcodeToken0 opcode0;
3351 
3352    /* Emit the input control point count */
3353    assert(emit->key.tcs.vertices_per_patch > 0 &&
3354           emit->key.tcs.vertices_per_patch <= 32);
3355 
3356    opcode0.value = 0;
3357    opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3358    opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
3359    begin_emit_instruction(emit);
3360    emit_dword(emit, opcode0.value);
3361    end_emit_instruction(emit);
3362 
3363    /* Emit the output control point count */
3364    assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
3365 
3366    opcode0.value = 0;
3367    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
3368    opcode0.controlPointCount = emit->key.tcs.vertices_out;
3369    begin_emit_instruction(emit);
3370    emit_dword(emit, opcode0.value);
3371    end_emit_instruction(emit);
3372 
3373    /* Emit tessellator domain */
3374    emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
3375 
3376    /* Emit tessellator output primitive */
3377    opcode0.value = 0;
3378    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
3379    if (emit->key.tcs.point_mode) {
3380       opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
3381    }
3382    else if (emit->key.tcs.prim_mode == MESA_PRIM_LINES) {
3383       opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
3384    }
3385    else {
3386       assert(emit->key.tcs.prim_mode == MESA_PRIM_QUADS ||
3387              emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES);
3388 
3389       if (emit->key.tcs.vertices_order_cw)
3390          opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
3391       else
3392          opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
3393    }
3394    begin_emit_instruction(emit);
3395    emit_dword(emit, opcode0.value);
3396    end_emit_instruction(emit);
3397 
3398    /* Emit tessellator partitioning */
3399    opcode0.value = 0;
3400    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
3401    switch (emit->key.tcs.spacing) {
3402    case PIPE_TESS_SPACING_FRACTIONAL_ODD:
3403       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
3404       break;
3405    case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
3406       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
3407       break;
3408    case PIPE_TESS_SPACING_EQUAL:
3409       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
3410       break;
3411    default:
3412       debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
3413       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
3414    }
3415    begin_emit_instruction(emit);
3416    emit_dword(emit, opcode0.value);
3417    end_emit_instruction(emit);
3418 
3419    alloc_common_immediates(emit);
3420 
3421    /* Declare constant registers */
3422    emit_constant_declaration(emit);
3423 
3424    /* Declare samplers and resources */
3425    emit_sampler_declarations(emit);
3426    emit_resource_declarations(emit);
3427 
3428    /* Declare images */
3429    emit_image_declarations(emit);
3430 
3431    /* Declare shader buffers */
3432    emit_shader_buf_declarations(emit);
3433 
3434    /* Declare atomic buffers */
3435    emit_atomic_buf_declarations(emit);
3436 
3437    int nVertices = emit->key.tcs.vertices_per_patch;
3438    emit->tcs.imm_index =
3439       alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
3440 
3441    /* Now, emit the constant block containing all the immediates
3442     * declared by shader, as well as the extra ones seen above.
3443     */
3444    emit_vgpu10_immediates_block(emit);
3445 
3446 }
3447 
3448 
3449 /**
3450  * A helper function to determine if control point phase is needed.
3451  * Returns TRUE if there is control point output.
3452  */
3453 static bool
needs_control_point_phase(struct svga_shader_emitter_v10 * emit)3454 needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
3455 {
3456    unsigned i;
3457 
3458    assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3459 
3460    /* If output control point count does not match the input count,
3461     * we need a control point phase to explicitly set the output control
3462     * points.
3463     */
3464    if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
3465        emit->key.tcs.vertices_out)
3466       return true;
3467 
3468    for (i = 0; i < emit->info.num_outputs; i++) {
3469       switch (emit->info.output_semantic_name[i]) {
3470       case TGSI_SEMANTIC_PATCH:
3471       case TGSI_SEMANTIC_TESSOUTER:
3472       case TGSI_SEMANTIC_TESSINNER:
3473          break;
3474       default:
3475          return true;
3476       }
3477    }
3478    return false;
3479 }
3480 
3481 
3482 /**
3483  * A helper function to add shader signature for passthrough control point
3484  * phase. This signature is also generated for passthrough control point
3485  * phase from HLSL compiler and is needed by Metal Renderer.
3486  */
3487 static void
emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 * emit)3488 emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
3489 {
3490    struct svga_shader_signature *sgn = &emit->signature;
3491    SVGA3dDXShaderSignatureEntry *sgnEntry;
3492    unsigned i;
3493 
3494    for (i = 0; i < emit->info.num_inputs; i++) {
3495       unsigned index = emit->linkage.input_map[i];
3496       enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
3497 
3498       sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
3499 
3500       set_shader_signature_entry(sgnEntry, index,
3501                                  tgsi_semantic_to_sgn_name[sem_name],
3502                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3503                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3504                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3505 
3506       sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
3507 
3508       set_shader_signature_entry(sgnEntry, i,
3509                                  tgsi_semantic_to_sgn_name[sem_name],
3510                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3511                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3512                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3513    }
3514 }
3515 
3516 
3517 /**
3518  * A helper function to emit an instruction to start the control point phase
3519  * in the hull shader.
3520  */
3521 static void
emit_control_point_phase_instruction(struct svga_shader_emitter_v10 * emit)3522 emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
3523 {
3524    VGPU10OpcodeToken0 opcode0;
3525 
3526    opcode0.value = 0;
3527    opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
3528    begin_emit_instruction(emit);
3529    emit_dword(emit, opcode0.value);
3530    end_emit_instruction(emit);
3531 }
3532 
3533 
3534 /**
3535  * Start the hull shader control point phase
3536  */
3537 static bool
emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 * emit)3538 emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
3539 {
3540    /* If there is no control point output, skip the control point phase. */
3541    if (!needs_control_point_phase(emit)) {
3542       if (!emit->key.tcs.vertices_out) {
3543          /**
3544           * If the tcs does not explicitly generate any control point output
3545           * and the tes does not use any input control point, then
3546           * emit an empty control point phase with zero output control
3547           * point count.
3548           */
3549          emit_control_point_phase_instruction(emit);
3550 
3551          /**
3552           * Since this is an empty control point phase, we will need to
3553           * add input signatures when we parse the tcs again in the
3554           * patch constant phase.
3555           */
3556          emit->tcs.fork_phase_add_signature = true;
3557       }
3558       else {
3559          /**
3560           * Before skipping the control point phase, add the signature for
3561           * the passthrough control point.
3562           */
3563          emit_passthrough_control_point_signature(emit);
3564       }
3565       return false;
3566    }
3567 
3568    /* Start the control point phase in the hull shader */
3569    emit_control_point_phase_instruction(emit);
3570 
3571    /* Declare the output control point ID */
3572    if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
3573       /* Add invocation id declaration if it does not exist */
3574       emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
3575    }
3576 
3577    emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3578                           VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
3579                           VGPU10_OPERAND_INDEX_0D,
3580                           0, 1,
3581                           VGPU10_NAME_UNDEFINED,
3582                           VGPU10_OPERAND_0_COMPONENT, 0,
3583                           0,
3584                           VGPU10_INTERPOLATION_CONSTANT, true,
3585                           SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3586 
3587    if (emit->tcs.prim_id_index != INVALID_INDEX) {
3588       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3589                              VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3590                              VGPU10_OPERAND_INDEX_0D,
3591                              0, 1,
3592                              VGPU10_NAME_UNDEFINED,
3593                              VGPU10_OPERAND_0_COMPONENT,
3594                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3595                              0,
3596                              VGPU10_INTERPOLATION_UNDEFINED, true,
3597                              SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3598    }
3599 
3600    return true;
3601 }
3602 
3603 
3604 /**
3605  * Start the hull shader patch constant phase and
3606  * do the second pass of the tcs translation and emit
3607  * the relevant declarations and instructions for this phase.
3608  */
3609 static bool
emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 * emit,struct tgsi_parse_context * parse)3610 emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
3611                                       struct tgsi_parse_context *parse)
3612 {
3613    unsigned inst_number = 0;
3614    bool ret = true;
3615    VGPU10OpcodeToken0 opcode0;
3616 
3617    emit->skip_instruction = false;
3618 
3619    /* Start the patch constant phase */
3620    opcode0.value = 0;
3621    opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
3622    begin_emit_instruction(emit);
3623    emit_dword(emit, opcode0.value);
3624    end_emit_instruction(emit);
3625 
3626    /* Set the current phase to patch constant phase */
3627    emit->tcs.control_point_phase = false;
3628 
3629    if (emit->tcs.prim_id_index != INVALID_INDEX) {
3630       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3631                              VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3632                              VGPU10_OPERAND_INDEX_0D,
3633                              0, 1,
3634                              VGPU10_NAME_UNDEFINED,
3635                              VGPU10_OPERAND_0_COMPONENT,
3636                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3637                              0,
3638                              VGPU10_INTERPOLATION_UNDEFINED, true,
3639                              SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3640    }
3641 
3642    /* Emit declarations for this phase */
3643    emit->index_range.required =
3644       emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? true : false;
3645    emit_tcs_input_declarations(emit);
3646 
3647    if (emit->index_range.start_index != INVALID_INDEX) {
3648       emit_index_range_declaration(emit);
3649    }
3650 
3651    emit->index_range.required =
3652       emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? true : false;
3653    emit_tcs_output_declarations(emit);
3654 
3655    if (emit->index_range.start_index != INVALID_INDEX) {
3656       emit_index_range_declaration(emit);
3657    }
3658    emit->index_range.required = false;
3659 
3660    emit_temporaries_declaration(emit);
3661 
3662    /* Reset the token position to the first instruction token
3663     * in preparation for the second pass of the shader
3664     */
3665    parse->Position = emit->tcs.instruction_token_pos;
3666 
3667    while (!tgsi_parse_end_of_tokens(parse)) {
3668       tgsi_parse_token(parse);
3669 
3670       assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
3671       ret = emit_vgpu10_instruction(emit, inst_number++,
3672                                     &parse->FullToken.FullInstruction);
3673 
3674       /* Usually this applies to TCS only. If shader is reading output of
3675        * patch constant in fork phase, we should reemit all instructions
3676        * which are writting into output of patch constant in fork phase
3677        * to store results into temporaries.
3678        */
3679       assert(!(emit->reemit_instruction && emit->reemit_rawbuf_instruction));
3680       if (emit->reemit_instruction) {
3681          assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3682          ret = emit_vgpu10_instruction(emit, inst_number,
3683                                        &parse->FullToken.FullInstruction);
3684       } else if (emit->reemit_rawbuf_instruction) {
3685          ret = emit_rawbuf_instruction(emit, inst_number,
3686                                        &parse->FullToken.FullInstruction);
3687       }
3688 
3689       if (!ret)
3690          return false;
3691    }
3692 
3693    return true;
3694 }
3695 
3696 
3697 /**
3698  * Emit the thread group declaration for compute shader.
3699  */
3700 static void
emit_compute_shader_declarations(struct svga_shader_emitter_v10 * emit)3701 emit_compute_shader_declarations(struct svga_shader_emitter_v10 *emit)
3702 {
3703    VGPU10OpcodeToken0 opcode0;
3704 
3705    opcode0.value = 0;
3706    opcode0.opcodeType = VGPU10_OPCODE_DCL_THREAD_GROUP;
3707    begin_emit_instruction(emit);
3708    emit_dword(emit, opcode0.value);
3709    emit_dword(emit, emit->cs.block_width);
3710    emit_dword(emit, emit->cs.block_height);
3711    emit_dword(emit, emit->cs.block_depth);
3712    end_emit_instruction(emit);
3713 }
3714 
3715 
3716 /**
3717  * Emit index range declaration.
3718  */
3719 static bool
emit_index_range_declaration(struct svga_shader_emitter_v10 * emit)3720 emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
3721 {
3722    if (emit->version < 50)
3723       return true;
3724 
3725    assert(emit->index_range.start_index != INVALID_INDEX);
3726    assert(emit->index_range.count != 0);
3727    assert(emit->index_range.required);
3728    assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
3729    assert(emit->index_range.dim != 0);
3730    assert(emit->index_range.size != 0);
3731 
3732    VGPU10OpcodeToken0 opcode0;
3733    VGPU10OperandToken0 operand0;
3734 
3735    opcode0.value = 0;
3736    opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
3737 
3738    operand0.value = 0;
3739    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3740    operand0.indexDimension = emit->index_range.dim;
3741    operand0.operandType = emit->index_range.operandType;
3742    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3743    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3744 
3745    if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
3746       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3747 
3748    begin_emit_instruction(emit);
3749    emit_dword(emit, opcode0.value);
3750    emit_dword(emit, operand0.value);
3751 
3752    if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
3753       emit_dword(emit, emit->index_range.size);
3754       emit_dword(emit, emit->index_range.start_index);
3755       emit_dword(emit, emit->index_range.count);
3756    }
3757    else {
3758       emit_dword(emit, emit->index_range.start_index);
3759       emit_dword(emit, emit->index_range.count);
3760    }
3761 
3762    end_emit_instruction(emit);
3763 
3764    /* Reset fields in emit->index_range struct except
3765     * emit->index_range.required which will be reset afterwards
3766     */
3767    emit->index_range.count = 0;
3768    emit->index_range.operandType = VGPU10_NUM_OPERANDS;
3769    emit->index_range.start_index = INVALID_INDEX;
3770    emit->index_range.size = 0;
3771    emit->index_range.dim = 0;
3772 
3773    return true;
3774 }
3775 
3776 
3777 /**
3778  * Emit a vgpu10 declaration "instruction".
3779  * \param index  the register index
3780  * \param size   array size of the operand. In most cases, it is 1,
3781  *               but for inputs to geometry shader, the array size varies
3782  *               depending on the primitive type.
3783  */
3784 static void
emit_decl_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,VGPU10OperandToken0 operand0,VGPU10NameToken name_token,unsigned index,unsigned size)3785 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
3786                       VGPU10OpcodeToken0 opcode0,
3787                       VGPU10OperandToken0 operand0,
3788                       VGPU10NameToken name_token,
3789                       unsigned index, unsigned size)
3790 {
3791    assert(opcode0.opcodeType);
3792    assert(operand0.mask ||
3793           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
3794           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
3795           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
3796           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
3797           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
3798           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
3799           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
3800           (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
3801 
3802    begin_emit_instruction(emit);
3803    emit_dword(emit, opcode0.value);
3804 
3805    emit_dword(emit, operand0.value);
3806 
3807    if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
3808       /* Next token is the index of the register to declare */
3809       emit_dword(emit, index);
3810    }
3811    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
3812       /* Next token is the size of the register */
3813       emit_dword(emit, size);
3814 
3815       /* Followed by the index of the register */
3816       emit_dword(emit, index);
3817    }
3818 
3819    if (name_token.value) {
3820       emit_dword(emit, name_token.value);
3821    }
3822 
3823    end_emit_instruction(emit);
3824 }
3825 
3826 
3827 /**
3828  * Emit the declaration for a shader input.
3829  * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3830  * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3831  * \param dim         index dimension
3832  * \param index       the input register index
3833  * \param size        array size of the operand. In most cases, it is 1,
3834  *                    but for inputs to geometry shader, the array size varies
3835  *                    depending on the primitive type. For tessellation control
3836  *                    shader, the array size is the vertex count per patch.
3837  * \param name        one of VGPU10_NAME_x
3838  * \parma numComp     number of components
3839  * \param selMode     component selection mode
3840  * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3841  * \param interpMode  interpolation mode
3842  */
3843 static void
emit_input_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcodeType,VGPU10_OPERAND_TYPE operandType,VGPU10_OPERAND_INDEX_DIMENSION dim,unsigned index,unsigned size,VGPU10_SYSTEM_NAME name,VGPU10_OPERAND_NUM_COMPONENTS numComp,VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,unsigned usageMask,VGPU10_INTERPOLATION_MODE interpMode,bool addSignature,SVGA3dDXSignatureSemanticName sgnName)3844 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
3845                        VGPU10_OPCODE_TYPE opcodeType,
3846                        VGPU10_OPERAND_TYPE operandType,
3847                        VGPU10_OPERAND_INDEX_DIMENSION dim,
3848                        unsigned index, unsigned size,
3849                        VGPU10_SYSTEM_NAME name,
3850                        VGPU10_OPERAND_NUM_COMPONENTS numComp,
3851                        VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
3852                        unsigned usageMask,
3853                        VGPU10_INTERPOLATION_MODE interpMode,
3854                        bool addSignature,
3855                        SVGA3dDXSignatureSemanticName sgnName)
3856 {
3857    VGPU10OpcodeToken0 opcode0;
3858    VGPU10OperandToken0 operand0;
3859    VGPU10NameToken name_token;
3860 
3861    assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3862    assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
3863           opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
3864           opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
3865           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
3866           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
3867           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
3868    assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
3869           operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
3870           operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
3871           operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
3872           operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
3873           operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
3874           operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
3875           operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
3876           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
3877           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
3878           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
3879 
3880    assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
3881    assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
3882    assert(dim <= VGPU10_OPERAND_INDEX_3D);
3883    assert(name == VGPU10_NAME_UNDEFINED ||
3884           name == VGPU10_NAME_POSITION ||
3885           name == VGPU10_NAME_INSTANCE_ID ||
3886           name == VGPU10_NAME_VERTEX_ID ||
3887           name == VGPU10_NAME_PRIMITIVE_ID ||
3888           name == VGPU10_NAME_IS_FRONT_FACE ||
3889           name == VGPU10_NAME_SAMPLE_INDEX ||
3890           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3891           name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
3892 
3893    assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
3894           interpMode == VGPU10_INTERPOLATION_CONSTANT ||
3895           interpMode == VGPU10_INTERPOLATION_LINEAR ||
3896           interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
3897           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
3898           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
3899           interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
3900           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
3901 
3902    check_register_index(emit, opcodeType, index);
3903 
3904    opcode0.value = operand0.value = name_token.value = 0;
3905 
3906    opcode0.opcodeType = opcodeType;
3907    opcode0.interpolationMode = interpMode;
3908 
3909    operand0.operandType = operandType;
3910    operand0.numComponents = numComp;
3911    operand0.selectionMode = selMode;
3912    operand0.mask = usageMask;
3913    operand0.indexDimension = dim;
3914    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3915    if (dim == VGPU10_OPERAND_INDEX_2D)
3916       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3917 
3918    name_token.name = name;
3919 
3920    emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
3921 
3922    if (addSignature) {
3923       struct svga_shader_signature *sgn = &emit->signature;
3924       if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
3925          /* Set patch constant signature */
3926          SVGA3dDXShaderSignatureEntry *sgnEntry =
3927             &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
3928          set_shader_signature_entry(sgnEntry, index,
3929                                     sgnName, usageMask,
3930                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3931                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3932 
3933       } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
3934                  operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
3935          /* Set input signature */
3936          SVGA3dDXShaderSignatureEntry *sgnEntry =
3937             &sgn->inputs[sgn->header.numInputSignatures++];
3938          set_shader_signature_entry(sgnEntry, index,
3939                                     sgnName, usageMask,
3940                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3941                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3942       }
3943    }
3944 
3945    if (emit->index_range.required) {
3946       /* Here, index_range declaration is only applicable for opcodeType
3947        * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3948        * for operandType VGPU10_OPERAND_TYPE_INPUT,
3949        * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3950        * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3951        */
3952       if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
3953            opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
3954           (operandType != VGPU10_OPERAND_TYPE_INPUT &&
3955            operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
3956            operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
3957          if (emit->index_range.start_index != INVALID_INDEX) {
3958             emit_index_range_declaration(emit);
3959          }
3960          return;
3961       }
3962 
3963       if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3964          /* Need record new index_range */
3965          emit->index_range.count = 1;
3966          emit->index_range.operandType = operandType;
3967          emit->index_range.start_index = index;
3968          emit->index_range.size = size;
3969          emit->index_range.dim = dim;
3970       }
3971       else if (index !=
3972                (emit->index_range.start_index + emit->index_range.count) ||
3973                emit->index_range.operandType != operandType) {
3974          /* Input index is not contiguous with index range or operandType is
3975           * different from index range's operandType. We need to emit current
3976           * index_range first and then start recording next index range.
3977           */
3978          emit_index_range_declaration(emit);
3979 
3980          emit->index_range.count = 1;
3981          emit->index_range.operandType = operandType;
3982          emit->index_range.start_index = index;
3983          emit->index_range.size = size;
3984          emit->index_range.dim = dim;
3985       }
3986       else if (emit->index_range.operandType == operandType) {
3987          /* Since input index is contiguous with index range and operandType
3988           * is same as index range's operandType, increment index range count.
3989           */
3990          emit->index_range.count++;
3991       }
3992    }
3993 }
3994 
3995 
3996 /**
3997  * Emit the declaration for a shader output.
3998  * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
3999  * \param index  the output register index
4000  * \param name  one of VGPU10_NAME_x
4001  * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
4002  */
4003 static void
emit_output_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE type,unsigned index,VGPU10_SYSTEM_NAME name,unsigned writemask,bool addSignature,SVGA3dDXSignatureSemanticName sgnName)4004 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
4005                         VGPU10_OPCODE_TYPE type, unsigned index,
4006                         VGPU10_SYSTEM_NAME name,
4007                         unsigned writemask,
4008                         bool addSignature,
4009                         SVGA3dDXSignatureSemanticName sgnName)
4010 {
4011    VGPU10OpcodeToken0 opcode0;
4012    VGPU10OperandToken0 operand0;
4013    VGPU10NameToken name_token;
4014 
4015    assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
4016    assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
4017           type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
4018           type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
4019    assert(name == VGPU10_NAME_UNDEFINED ||
4020           name == VGPU10_NAME_POSITION ||
4021           name == VGPU10_NAME_PRIMITIVE_ID ||
4022           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
4023           name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
4024           name == VGPU10_NAME_CLIP_DISTANCE);
4025 
4026    check_register_index(emit, type, index);
4027 
4028    opcode0.value = operand0.value = name_token.value = 0;
4029 
4030    opcode0.opcodeType = type;
4031    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4032    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4033    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4034    operand0.mask = writemask;
4035    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4036    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4037 
4038    name_token.name = name;
4039 
4040    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4041 
4042    /* Capture output signature */
4043    if (addSignature) {
4044       struct svga_shader_signature *sgn = &emit->signature;
4045       SVGA3dDXShaderSignatureEntry *sgnEntry =
4046          &sgn->outputs[sgn->header.numOutputSignatures++];
4047       set_shader_signature_entry(sgnEntry, index,
4048                                  sgnName, writemask,
4049                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4050                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4051    }
4052 
4053    if (emit->index_range.required) {
4054       /* Here, index_range declaration is only applicable for opcodeType
4055        * VGPU10_OPCODE_DCL_OUTPUT and for operandType
4056        * VGPU10_OPERAND_TYPE_OUTPUT.
4057        */
4058       if (type != VGPU10_OPCODE_DCL_OUTPUT) {
4059          if (emit->index_range.start_index != INVALID_INDEX) {
4060             emit_index_range_declaration(emit);
4061          }
4062          return;
4063       }
4064 
4065       if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
4066          /* Need record new index_range */
4067          emit->index_range.count = 1;
4068          emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4069          emit->index_range.start_index = index;
4070          emit->index_range.size = 1;
4071          emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
4072       }
4073       else if (index !=
4074                (emit->index_range.start_index + emit->index_range.count)) {
4075          /* Output index is not contiguous with index range. We need to
4076           * emit current index_range first and then start recording next
4077           * index range.
4078           */
4079          emit_index_range_declaration(emit);
4080 
4081          emit->index_range.count = 1;
4082          emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4083          emit->index_range.start_index = index;
4084          emit->index_range.size = 1;
4085          emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
4086       }
4087       else {
4088          /* Since output index is contiguous with index range, increment
4089           * index range count.
4090           */
4091          emit->index_range.count++;
4092       }
4093    }
4094 }
4095 
4096 
4097 /**
4098  * Emit the declaration for the fragment depth output.
4099  */
4100 static void
emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 * emit)4101 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
4102 {
4103    VGPU10OpcodeToken0 opcode0;
4104    VGPU10OperandToken0 operand0;
4105    VGPU10NameToken name_token;
4106 
4107    assert(emit->unit == PIPE_SHADER_FRAGMENT);
4108 
4109    opcode0.value = operand0.value = name_token.value = 0;
4110 
4111    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
4112    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
4113    operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
4114    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
4115    operand0.mask = 0;
4116 
4117    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
4118 }
4119 
4120 
4121 /**
4122  * Emit the declaration for the fragment sample mask/coverage output.
4123  */
4124 static void
emit_samplemask_output_declaration(struct svga_shader_emitter_v10 * emit)4125 emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
4126 {
4127    VGPU10OpcodeToken0 opcode0;
4128    VGPU10OperandToken0 operand0;
4129    VGPU10NameToken name_token;
4130 
4131    assert(emit->unit == PIPE_SHADER_FRAGMENT);
4132    assert(emit->version >= 41);
4133 
4134    opcode0.value = operand0.value = name_token.value = 0;
4135 
4136    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
4137    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
4138    operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
4139    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
4140    operand0.mask = 0;
4141 
4142    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
4143 }
4144 
4145 
4146 /**
4147  * Emit output declarations for fragment shader.
4148  */
4149 static void
emit_fs_output_declarations(struct svga_shader_emitter_v10 * emit)4150 emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
4151 {
4152    unsigned int i;
4153 
4154    for (i = 0; i < emit->info.num_outputs; i++) {
4155       /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
4156       const enum tgsi_semantic semantic_name =
4157          emit->info.output_semantic_name[i];
4158       const unsigned semantic_index = emit->info.output_semantic_index[i];
4159       unsigned index = i;
4160 
4161       if (semantic_name == TGSI_SEMANTIC_COLOR) {
4162          assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
4163 
4164          emit->fs.color_out_index[semantic_index] = index;
4165 
4166          emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
4167                                               index + 1);
4168 
4169          /* The semantic index is the shader's color output/buffer index */
4170          emit_output_declaration(emit,
4171                                  VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
4172                                  VGPU10_NAME_UNDEFINED,
4173                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4174                                  true,
4175                                  map_tgsi_semantic_to_sgn_name(semantic_name));
4176 
4177          if (semantic_index == 0) {
4178             if (emit->key.fs.write_color0_to_n_cbufs > 1) {
4179                /* Emit declarations for the additional color outputs
4180                 * for broadcasting.
4181                 */
4182                unsigned j;
4183                for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
4184                   /* Allocate a new output index */
4185                   unsigned idx = emit->info.num_outputs + j - 1;
4186                   emit->fs.color_out_index[j] = idx;
4187                   emit_output_declaration(emit,
4188                                         VGPU10_OPCODE_DCL_OUTPUT, idx,
4189                                         VGPU10_NAME_UNDEFINED,
4190                                         VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4191                                         true,
4192                                         map_tgsi_semantic_to_sgn_name(semantic_name));
4193                   emit->info.output_semantic_index[idx] = j;
4194                }
4195 
4196                emit->fs.num_color_outputs =
4197                      emit->key.fs.write_color0_to_n_cbufs;
4198             }
4199          }
4200       }
4201       else if (semantic_name == TGSI_SEMANTIC_POSITION) {
4202          /* Fragment depth output */
4203          emit_fragdepth_output_declaration(emit);
4204       }
4205       else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
4206          /* Sample mask output */
4207          emit_samplemask_output_declaration(emit);
4208       }
4209       else {
4210          assert(!"Bad output semantic name");
4211       }
4212    }
4213 }
4214 
4215 
4216 /**
4217  * Emit common output declaration for vertex processing.
4218  */
4219 static void
emit_vertex_output_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned writemask,bool addSignature)4220 emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
4221                                unsigned index, unsigned writemask,
4222                                bool addSignature)
4223 {
4224    const enum tgsi_semantic semantic_name =
4225          emit->info.output_semantic_name[index];
4226    const unsigned semantic_index = emit->info.output_semantic_index[index];
4227    unsigned name, type;
4228    unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
4229 
4230    assert(emit->unit != PIPE_SHADER_FRAGMENT &&
4231           emit->unit != PIPE_SHADER_COMPUTE);
4232 
4233    switch (semantic_name) {
4234    case TGSI_SEMANTIC_POSITION:
4235       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4236          /* position will be declared in control point only */
4237          assert(emit->tcs.control_point_phase);
4238          type = VGPU10_OPCODE_DCL_OUTPUT;
4239          name = VGPU10_NAME_UNDEFINED;
4240          emit_output_declaration(emit, type, index, name, final_mask, true,
4241                                  SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4242          return;
4243       }
4244       else {
4245          type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
4246          name = VGPU10_NAME_POSITION;
4247       }
4248       /* Save the index of the vertex position output register */
4249       emit->vposition.out_index = index;
4250       break;
4251    case TGSI_SEMANTIC_CLIPDIST:
4252       type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
4253       name = VGPU10_NAME_CLIP_DISTANCE;
4254       /* save the starting index of the clip distance output register */
4255       if (semantic_index == 0)
4256          emit->clip_dist_out_index = index;
4257       final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
4258       if (final_mask == 0x0)
4259          return; /* discard this do-nothing declaration */
4260       break;
4261    case TGSI_SEMANTIC_CLIPVERTEX:
4262       type = VGPU10_OPCODE_DCL_OUTPUT;
4263       name = VGPU10_NAME_UNDEFINED;
4264       emit->clip_vertex_out_index = index;
4265       break;
4266    default:
4267       /* generic output */
4268       type = VGPU10_OPCODE_DCL_OUTPUT;
4269       name = VGPU10_NAME_UNDEFINED;
4270    }
4271 
4272    emit_output_declaration(emit, type, index, name, final_mask, addSignature,
4273                            map_tgsi_semantic_to_sgn_name(semantic_name));
4274 }
4275 
4276 
4277 /**
4278  * Emit declaration for outputs in vertex shader.
4279  */
4280 static void
emit_vs_output_declarations(struct svga_shader_emitter_v10 * emit)4281 emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
4282 {
4283    unsigned i;
4284    for (i = 0; i < emit->info.num_outputs; i++) {
4285       emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], true);
4286    }
4287 }
4288 
4289 
4290 /**
4291  * A helper function to determine the writemask for an output
4292  * for the specified stream.
4293  */
4294 static unsigned
output_writemask_for_stream(unsigned stream,uint8_t output_streams,uint8_t output_usagemask)4295 output_writemask_for_stream(unsigned stream, uint8_t output_streams,
4296                             uint8_t output_usagemask)
4297 {
4298    unsigned i;
4299    unsigned writemask = 0;
4300 
4301    for (i = 0; i < 4; i++) {
4302       if ((output_streams & 0x3) == stream)
4303          writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
4304       output_streams >>= 2;
4305    }
4306    return writemask & output_usagemask;
4307 }
4308 
4309 
4310 /**
4311  * Emit declaration for outputs in geometry shader.
4312  */
4313 static void
emit_gs_output_declarations(struct svga_shader_emitter_v10 * emit)4314 emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
4315 {
4316    unsigned i;
4317    VGPU10OpcodeToken0 opcode0;
4318    unsigned numStreamsSupported = 1;
4319    int s;
4320 
4321    if (emit->version >= 50) {
4322       numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
4323    }
4324 
4325    /**
4326     * Start emitting from the last stream first, so we end with
4327     * stream 0, so any of the auxiliary output declarations will
4328     * go to stream 0.
4329     */
4330    for (s = numStreamsSupported-1; s >= 0; s--) {
4331 
4332       if (emit->info.num_stream_output_components[s] == 0)
4333          continue;
4334 
4335       if (emit->version >= 50) {
4336          /* DCL_STREAM stream */
4337          begin_emit_instruction(emit);
4338          emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, false);
4339          emit_stream_register(emit, s);
4340          end_emit_instruction(emit);
4341       }
4342 
4343       /* emit output primitive topology declaration */
4344       opcode0.value = 0;
4345       opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
4346       opcode0.primitiveTopology = emit->gs.prim_topology;
4347       emit_property_instruction(emit, opcode0, 0, 0);
4348 
4349       for (i = 0; i < emit->info.num_outputs; i++) {
4350          unsigned writemask;
4351 
4352          /* find out the writemask for this stream */
4353          writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
4354                                                  emit->output_usage_mask[i]);
4355 
4356          if (writemask) {
4357             enum tgsi_semantic semantic_name =
4358                emit->info.output_semantic_name[i];
4359 
4360             /* TODO: Still need to take care of a special case where a
4361              *       single varying spans across multiple output registers.
4362              */
4363             switch(semantic_name) {
4364             case TGSI_SEMANTIC_PRIMID:
4365                emit_output_declaration(emit,
4366                                        VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
4367                                        VGPU10_NAME_PRIMITIVE_ID,
4368                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4369                                        false,
4370                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4371                break;
4372             case TGSI_SEMANTIC_LAYER:
4373                emit_output_declaration(emit,
4374                                        VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4375                                        VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
4376                                        VGPU10_OPERAND_4_COMPONENT_MASK_X,
4377                                        false,
4378                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4379                break;
4380             case TGSI_SEMANTIC_VIEWPORT_INDEX:
4381                emit_output_declaration(emit,
4382                                        VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4383                                        VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
4384                                        VGPU10_OPERAND_4_COMPONENT_MASK_X,
4385                                        false,
4386                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4387                emit->gs.viewport_index_out_index = i;
4388                break;
4389             default:
4390                emit_vertex_output_declaration(emit, i, writemask, false);
4391             }
4392          }
4393       }
4394    }
4395 
4396    /* For geometry shader outputs, it is possible the same register is
4397     * declared multiple times for different streams. So to avoid
4398     * redundant signature entries, geometry shader output signature is done
4399     * outside of the declaration.
4400     */
4401    struct svga_shader_signature *sgn = &emit->signature;
4402    SVGA3dDXShaderSignatureEntry *sgnEntry;
4403 
4404    for (i = 0; i < emit->info.num_outputs; i++) {
4405       if (emit->output_usage_mask[i]) {
4406          enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
4407 
4408          sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
4409          set_shader_signature_entry(sgnEntry, i,
4410                                     map_tgsi_semantic_to_sgn_name(sem_name),
4411                                     emit->output_usage_mask[i],
4412                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4413                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4414       }
4415    }
4416 }
4417 
4418 
4419 /**
4420  * Emit the declaration for the tess inner/outer output.
4421  * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4422  * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4423  * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4424  */
4425 static void
emit_tesslevel_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned opcodeType,unsigned operandType,VGPU10_SYSTEM_NAME name,SVGA3dDXSignatureSemanticName sgnName)4426 emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
4427                            unsigned index, unsigned opcodeType,
4428                            unsigned operandType, VGPU10_SYSTEM_NAME name,
4429                            SVGA3dDXSignatureSemanticName sgnName)
4430 {
4431    VGPU10OpcodeToken0 opcode0;
4432    VGPU10OperandToken0 operand0;
4433    VGPU10NameToken name_token;
4434 
4435    assert(emit->version >= 50);
4436    assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
4437           (emit->key.tcs.prim_mode == MESA_PRIM_LINES &&
4438            name == VGPU10_NAME_UNDEFINED));
4439    assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
4440 
4441    assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
4442           operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
4443 
4444    opcode0.value = operand0.value = name_token.value = 0;
4445 
4446    opcode0.opcodeType = opcodeType;
4447    operand0.operandType = operandType;
4448    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4449    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4450    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4451    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4452    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4453 
4454    name_token.name = name;
4455    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4456 
4457    /* Capture patch constant signature */
4458    struct svga_shader_signature *sgn = &emit->signature;
4459    SVGA3dDXShaderSignatureEntry *sgnEntry =
4460       &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4461    set_shader_signature_entry(sgnEntry, index,
4462                               sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
4463                               SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4464                               SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4465 }
4466 
4467 
4468 /**
4469  * Emit output declarations for tessellation control shader.
4470  */
4471 static void
emit_tcs_output_declarations(struct svga_shader_emitter_v10 * emit)4472 emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
4473 {
4474    unsigned int i;
4475    unsigned outputIndex = emit->num_outputs;
4476    struct svga_shader_signature *sgn = &emit->signature;
4477 
4478    /**
4479     * Initialize patch_generic_out_count so it won't be counted twice
4480     * since this function is called twice, one for control point phase
4481     * and another time for patch constant phase.
4482     */
4483    emit->tcs.patch_generic_out_count = 0;
4484 
4485    for (i = 0; i < emit->info.num_outputs; i++) {
4486       unsigned index = i;
4487       const enum tgsi_semantic semantic_name =
4488          emit->info.output_semantic_name[i];
4489 
4490       switch (semantic_name) {
4491       case TGSI_SEMANTIC_TESSINNER:
4492          emit->tcs.inner.tgsi_index = i;
4493 
4494          /* skip per-patch output declarations in control point phase */
4495          if (emit->tcs.control_point_phase)
4496             break;
4497 
4498          emit->tcs.inner.out_index = outputIndex;
4499          switch (emit->key.tcs.prim_mode) {
4500          case MESA_PRIM_QUADS:
4501             emit_tesslevel_declaration(emit, outputIndex++,
4502                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4503                VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4504                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4505 
4506             emit_tesslevel_declaration(emit, outputIndex++,
4507                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4508                VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4509                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4510             break;
4511          case MESA_PRIM_TRIANGLES:
4512             emit_tesslevel_declaration(emit, outputIndex++,
4513                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4514                VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4515                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4516             break;
4517          case MESA_PRIM_LINES:
4518             break;
4519          default:
4520             debug_printf("Unsupported primitive type");
4521          }
4522          break;
4523 
4524       case TGSI_SEMANTIC_TESSOUTER:
4525          emit->tcs.outer.tgsi_index = i;
4526 
4527          /* skip per-patch output declarations in control point phase */
4528          if (emit->tcs.control_point_phase)
4529             break;
4530 
4531          emit->tcs.outer.out_index = outputIndex;
4532          switch (emit->key.tcs.prim_mode) {
4533          case MESA_PRIM_QUADS:
4534             for (int j = 0; j < 4; j++) {
4535                emit_tesslevel_declaration(emit, outputIndex++,
4536                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4537                   VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
4538                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
4539             }
4540             break;
4541          case MESA_PRIM_TRIANGLES:
4542             for (int j = 0; j < 3; j++) {
4543                emit_tesslevel_declaration(emit, outputIndex++,
4544                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4545                   VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
4546                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
4547             }
4548             break;
4549          case MESA_PRIM_LINES:
4550             for (int j = 0; j < 2; j++) {
4551                emit_tesslevel_declaration(emit, outputIndex++,
4552                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4553                   VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
4554                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
4555             }
4556             break;
4557          default:
4558             debug_printf("Unsupported primitive type");
4559          }
4560          break;
4561 
4562       case TGSI_SEMANTIC_PATCH:
4563          if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
4564             emit->tcs.patch_generic_out_index= i;
4565          emit->tcs.patch_generic_out_count++;
4566 
4567          /* skip per-patch output declarations in control point phase */
4568          if (emit->tcs.control_point_phase)
4569             break;
4570 
4571          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
4572                                  VGPU10_NAME_UNDEFINED,
4573                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4574                                  false,
4575                                  map_tgsi_semantic_to_sgn_name(semantic_name));
4576 
4577          SVGA3dDXShaderSignatureEntry *sgnEntry =
4578             &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4579          set_shader_signature_entry(sgnEntry, index,
4580                                     map_tgsi_semantic_to_sgn_name(semantic_name),
4581                                     VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4582                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4583                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4584 
4585          break;
4586 
4587       default:
4588          /* save the starting index of control point outputs */
4589          if (emit->tcs.control_point_out_index == INVALID_INDEX)
4590             emit->tcs.control_point_out_index = i;
4591          emit->tcs.control_point_out_count++;
4592 
4593          /* skip control point output declarations in patch constant phase */
4594          if (!emit->tcs.control_point_phase)
4595             break;
4596 
4597          emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
4598                                         true);
4599 
4600       }
4601    }
4602 
4603    if (emit->tcs.control_point_phase) {
4604       /**
4605        * Add missing control point output in control point phase.
4606        */
4607       if (emit->tcs.control_point_out_index == INVALID_INDEX) {
4608          /* use register index after tessellation factors */
4609          switch (emit->key.tcs.prim_mode) {
4610          case MESA_PRIM_QUADS:
4611             emit->tcs.control_point_out_index = outputIndex + 6;
4612             break;
4613          case MESA_PRIM_TRIANGLES:
4614             emit->tcs.control_point_out_index = outputIndex + 4;
4615             break;
4616          default:
4617             emit->tcs.control_point_out_index = outputIndex + 2;
4618             break;
4619          }
4620          emit->tcs.control_point_out_count++;
4621          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
4622                                  emit->tcs.control_point_out_index,
4623                                  VGPU10_NAME_POSITION,
4624                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4625                                  true,
4626                                  SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4627 
4628          /* If tcs does not output any control point output,
4629           * we can end the hull shader control point phase here
4630           * after emitting the default control point output.
4631           */
4632          emit->skip_instruction = true;
4633       }
4634    }
4635    else {
4636       if (emit->tcs.outer.out_index == INVALID_INDEX) {
4637          /* since the TCS did not declare out outer tess level output register,
4638           * we declare it here for patch constant phase only.
4639           */
4640          emit->tcs.outer.out_index = outputIndex;
4641          if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) {
4642             for (int i = 0; i < 4; i++) {
4643                emit_tesslevel_declaration(emit, outputIndex++,
4644                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4645                   VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4646                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4647             }
4648          }
4649          else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) {
4650             for (int i = 0; i < 3; i++) {
4651                emit_tesslevel_declaration(emit, outputIndex++,
4652                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4653                   VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
4654                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
4655             }
4656          }
4657       }
4658 
4659       if (emit->tcs.inner.out_index == INVALID_INDEX) {
4660          /* since the TCS did not declare out inner tess level output register,
4661           * we declare it here
4662           */
4663          emit->tcs.inner.out_index = outputIndex;
4664          if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) {
4665             emit_tesslevel_declaration(emit, outputIndex++,
4666                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4667                VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4668                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4669             emit_tesslevel_declaration(emit, outputIndex++,
4670                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4671                VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4672                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4673          }
4674          else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) {
4675             emit_tesslevel_declaration(emit, outputIndex++,
4676                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4677                VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4678                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4679          }
4680       }
4681    }
4682    emit->num_outputs = outputIndex;
4683 }
4684 
4685 
4686 /**
4687  * Emit output declarations for tessellation evaluation shader.
4688  */
4689 static void
emit_tes_output_declarations(struct svga_shader_emitter_v10 * emit)4690 emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
4691 {
4692    unsigned int i;
4693 
4694    for (i = 0; i < emit->info.num_outputs; i++) {
4695       emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], true);
4696    }
4697 }
4698 
4699 
4700 /**
4701  * Emit the declaration for a system value input/output.
4702  */
4703 static void
emit_system_value_declaration(struct svga_shader_emitter_v10 * emit,enum tgsi_semantic semantic_name,unsigned index)4704 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
4705                               enum tgsi_semantic semantic_name, unsigned index)
4706 {
4707    switch (semantic_name) {
4708    case TGSI_SEMANTIC_INSTANCEID:
4709       index = alloc_system_value_index(emit, index);
4710       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4711                              VGPU10_OPERAND_TYPE_INPUT,
4712                              VGPU10_OPERAND_INDEX_1D,
4713                              index, 1,
4714                              VGPU10_NAME_INSTANCE_ID,
4715                              VGPU10_OPERAND_4_COMPONENT,
4716                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4717                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4718                              VGPU10_INTERPOLATION_UNDEFINED, true,
4719                              map_tgsi_semantic_to_sgn_name(semantic_name));
4720       break;
4721    case TGSI_SEMANTIC_VERTEXID:
4722       emit->vs.vertex_id_sys_index = index;
4723       index = alloc_system_value_index(emit, index);
4724       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4725                              VGPU10_OPERAND_TYPE_INPUT,
4726                              VGPU10_OPERAND_INDEX_1D,
4727                              index, 1,
4728                              VGPU10_NAME_VERTEX_ID,
4729                              VGPU10_OPERAND_4_COMPONENT,
4730                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4731                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4732                              VGPU10_INTERPOLATION_UNDEFINED, true,
4733                              map_tgsi_semantic_to_sgn_name(semantic_name));
4734       break;
4735    case TGSI_SEMANTIC_SAMPLEID:
4736       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4737       emit->fs.sample_id_sys_index = index;
4738       index = alloc_system_value_index(emit, index);
4739       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
4740                              VGPU10_OPERAND_TYPE_INPUT,
4741                              VGPU10_OPERAND_INDEX_1D,
4742                              index, 1,
4743                              VGPU10_NAME_SAMPLE_INDEX,
4744                              VGPU10_OPERAND_4_COMPONENT,
4745                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4746                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4747                              VGPU10_INTERPOLATION_CONSTANT, true,
4748                              map_tgsi_semantic_to_sgn_name(semantic_name));
4749       break;
4750    case TGSI_SEMANTIC_SAMPLEPOS:
4751       /* This system value contains the position of the current sample
4752        * when using per-sample shading.  We implement this by calling
4753        * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4754        * index as the argument.  See emit_sample_position_instructions().
4755        */
4756       assert(emit->version >= 41);
4757       emit->fs.sample_pos_sys_index = index;
4758       index = alloc_system_value_index(emit, index);
4759       break;
4760    case TGSI_SEMANTIC_INVOCATIONID:
4761       /* Note: invocation id input is mapped to different register depending
4762        * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4763        * In TCS, it will be mapped to vOutputControlPointID#.
4764        * Since in both cases, the mapped name is unique rather than
4765        * just a generic input name ("v#"), so there is no need to remap
4766        * the index value.
4767        */
4768       assert(emit->unit == PIPE_SHADER_GEOMETRY ||
4769              emit->unit == PIPE_SHADER_TESS_CTRL);
4770       assert(emit->version >= 50);
4771 
4772       if (emit->unit == PIPE_SHADER_GEOMETRY) {
4773          emit->gs.invocation_id_sys_index = index;
4774          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4775                                 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
4776                                 VGPU10_OPERAND_INDEX_0D,
4777                                 index, 1,
4778                                 VGPU10_NAME_UNDEFINED,
4779                                 VGPU10_OPERAND_0_COMPONENT,
4780                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4781                                 0,
4782                                 VGPU10_INTERPOLATION_UNDEFINED, true,
4783                                 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4784       } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4785          /* The emission of the control point id will be done
4786           * in the control point phase in emit_hull_shader_control_point_phase().
4787           */
4788          emit->tcs.invocation_id_sys_index = index;
4789       }
4790       break;
4791    case TGSI_SEMANTIC_SAMPLEMASK:
4792       /* Note: the PS sample mask input has a unique name ("vCoverage#")
4793        * rather than just a generic input name ("v#") so no need to remap the
4794        * index value.
4795        */
4796       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4797       assert(emit->version >= 50);
4798       emit->fs.sample_mask_in_sys_index = index;
4799       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4800                              VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
4801                              VGPU10_OPERAND_INDEX_0D,
4802                              index, 1,
4803                              VGPU10_NAME_UNDEFINED,
4804                              VGPU10_OPERAND_1_COMPONENT,
4805                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4806                              0,
4807                              VGPU10_INTERPOLATION_CONSTANT, true,
4808                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4809       break;
4810    case TGSI_SEMANTIC_TESSCOORD:
4811       assert(emit->version >= 50);
4812 
4813       unsigned usageMask = 0;
4814 
4815       if (emit->tes.prim_mode == MESA_PRIM_TRIANGLES) {
4816          usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
4817       }
4818       else if (emit->tes.prim_mode == MESA_PRIM_LINES ||
4819                emit->tes.prim_mode == MESA_PRIM_QUADS) {
4820          usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
4821       }
4822 
4823       emit->tes.tesscoord_sys_index = index;
4824       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4825                              VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
4826                              VGPU10_OPERAND_INDEX_0D,
4827                              index, 1,
4828                              VGPU10_NAME_UNDEFINED,
4829                              VGPU10_OPERAND_4_COMPONENT,
4830                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4831                              usageMask,
4832                              VGPU10_INTERPOLATION_UNDEFINED, true,
4833                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4834       break;
4835    case TGSI_SEMANTIC_TESSINNER:
4836       assert(emit->version >= 50);
4837       emit->tes.inner.tgsi_index = index;
4838       break;
4839    case TGSI_SEMANTIC_TESSOUTER:
4840       assert(emit->version >= 50);
4841       emit->tes.outer.tgsi_index = index;
4842       break;
4843    case TGSI_SEMANTIC_VERTICESIN:
4844       assert(emit->unit == PIPE_SHADER_TESS_CTRL);
4845       assert(emit->version >= 50);
4846 
4847       /* save the system value index */
4848       emit->tcs.vertices_per_patch_index = index;
4849       break;
4850    case TGSI_SEMANTIC_PRIMID:
4851       assert(emit->version >= 50);
4852       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4853          emit->tcs.prim_id_index = index;
4854       }
4855       else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
4856          emit->tes.prim_id_index = index;
4857          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4858                                 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
4859                                 VGPU10_OPERAND_INDEX_0D,
4860                                 index, 1,
4861                                 VGPU10_NAME_UNDEFINED,
4862                                 VGPU10_OPERAND_0_COMPONENT,
4863                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4864                                 0,
4865                                 VGPU10_INTERPOLATION_UNDEFINED, true,
4866                                 map_tgsi_semantic_to_sgn_name(semantic_name));
4867       }
4868       break;
4869    case TGSI_SEMANTIC_THREAD_ID:
4870       assert(emit->unit >= PIPE_SHADER_COMPUTE);
4871       assert(emit->version >= 50);
4872       emit->cs.thread_id_index = index;
4873       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4874                              VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP,
4875                              VGPU10_OPERAND_INDEX_0D,
4876                              index, 1,
4877                              VGPU10_NAME_UNDEFINED,
4878                              VGPU10_OPERAND_4_COMPONENT,
4879                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4880                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4881                              VGPU10_INTERPOLATION_UNDEFINED, true,
4882                              map_tgsi_semantic_to_sgn_name(semantic_name));
4883       break;
4884    case TGSI_SEMANTIC_BLOCK_ID:
4885       assert(emit->unit >= PIPE_SHADER_COMPUTE);
4886       assert(emit->version >= 50);
4887       emit->cs.block_id_index = index;
4888       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4889                              VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID,
4890                              VGPU10_OPERAND_INDEX_0D,
4891                              index, 1,
4892                              VGPU10_NAME_UNDEFINED,
4893                              VGPU10_OPERAND_4_COMPONENT,
4894                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4895                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4896                              VGPU10_INTERPOLATION_UNDEFINED, true,
4897                              map_tgsi_semantic_to_sgn_name(semantic_name));
4898       break;
4899    case TGSI_SEMANTIC_GRID_SIZE:
4900       assert(emit->unit == PIPE_SHADER_COMPUTE);
4901       assert(emit->version >= 50);
4902       emit->cs.grid_size.tgsi_index = index;
4903       break;
4904    default:
4905       debug_printf("unexpected system value semantic index %u / %s\n",
4906                    semantic_name, tgsi_semantic_names[semantic_name]);
4907    }
4908 }
4909 
4910 /**
4911  * Translate a TGSI declaration to VGPU10.
4912  */
4913 static bool
emit_vgpu10_declaration(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_declaration * decl)4914 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
4915                         const struct tgsi_full_declaration *decl)
4916 {
4917    switch (decl->Declaration.File) {
4918    case TGSI_FILE_INPUT:
4919       /* do nothing - see emit_input_declarations() */
4920       return true;
4921 
4922    case TGSI_FILE_OUTPUT:
4923       assert(decl->Range.First == decl->Range.Last);
4924       emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
4925       return true;
4926 
4927    case TGSI_FILE_TEMPORARY:
4928       /* Don't declare the temps here.  Just keep track of how many
4929        * and emit the declaration later.
4930        */
4931       if (decl->Declaration.Array) {
4932          /* Indexed temporary array.  Save the start index of the array
4933           * and the size of the array.
4934           */
4935          const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
4936          assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
4937 
4938          /* Save this array so we can emit the declaration for it later */
4939          create_temp_array(emit, arrayID, decl->Range.First,
4940                            decl->Range.Last - decl->Range.First + 1,
4941                            decl->Range.First);
4942       }
4943 
4944       /* for all temps, indexed or not, keep track of highest index */
4945       emit->num_shader_temps = MAX2(emit->num_shader_temps,
4946                                     decl->Range.Last + 1);
4947       return true;
4948 
4949    case TGSI_FILE_CONSTANT:
4950       /* Don't declare constants here.  Just keep track and emit later. */
4951       {
4952          unsigned constbuf = 0, num_consts;
4953          if (decl->Declaration.Dimension) {
4954             constbuf = decl->Dim.Index2D;
4955          }
4956          /* We throw an assertion here when, in fact, the shader should never
4957           * have linked due to constbuf index out of bounds, so we shouldn't
4958           * have reached here.
4959           */
4960          assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
4961 
4962          num_consts = MAX2(emit->num_shader_consts[constbuf],
4963                            decl->Range.Last + 1);
4964 
4965          if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
4966             debug_printf("Warning: constant buffer is declared to size [%u]"
4967                          " but [%u] is the limit.\n",
4968                          num_consts,
4969                          VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4970             emit->register_overflow = true;
4971          }
4972          /* The linker doesn't enforce the max UBO size so we clamp here */
4973          emit->num_shader_consts[constbuf] =
4974             MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4975       }
4976       return true;
4977 
4978    case TGSI_FILE_IMMEDIATE:
4979       assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4980       return false;
4981 
4982    case TGSI_FILE_SYSTEM_VALUE:
4983       emit_system_value_declaration(emit, decl->Semantic.Name,
4984                                     decl->Range.First);
4985       return true;
4986 
4987    case TGSI_FILE_SAMPLER:
4988       /* Don't declare samplers here.  Just keep track and emit later. */
4989       emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
4990       return true;
4991 
4992 #if 0
4993    case TGSI_FILE_RESOURCE:
4994       /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
4995       /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
4996       assert(!"TGSI_FILE_RESOURCE not handled yet");
4997       return false;
4998 #endif
4999 
5000    case TGSI_FILE_ADDRESS:
5001       emit->num_address_regs = MAX2(emit->num_address_regs,
5002                                     decl->Range.Last + 1);
5003       return true;
5004 
5005    case TGSI_FILE_SAMPLER_VIEW:
5006       {
5007          unsigned unit = decl->Range.First;
5008          assert(decl->Range.First == decl->Range.Last);
5009          emit->sampler_target[unit] = decl->SamplerView.Resource;
5010 
5011          /* Note: we can ignore YZW return types for now */
5012          emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
5013          emit->sampler_view[unit] = true;
5014       }
5015       return true;
5016 
5017    case TGSI_FILE_IMAGE:
5018       {
5019          unsigned unit = decl->Range.First;
5020          assert(decl->Range.First == decl->Range.Last);
5021          assert(unit < PIPE_MAX_SHADER_IMAGES);
5022          emit->image[unit] = decl->Image;
5023          emit->image_mask |= 1 << unit;
5024          emit->num_images++;
5025       }
5026       return true;
5027 
5028    case TGSI_FILE_HW_ATOMIC:
5029       /* Declare the atomic buffer if it is not already declared. */
5030       if (!(emit->atomic_bufs_mask & (1 << decl->Dim.Index2D))) {
5031          emit->num_atomic_bufs++;
5032          emit->atomic_bufs_mask |= (1 << decl->Dim.Index2D);
5033       }
5034 
5035       /* Remember the maximum atomic counter index encountered */
5036       emit->max_atomic_counter_index =
5037          MAX2(emit->max_atomic_counter_index, decl->Range.Last);
5038       return true;
5039 
5040    case TGSI_FILE_MEMORY:
5041       /* Record memory has been used. */
5042       if (emit->unit == PIPE_SHADER_COMPUTE &&
5043           decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED) {
5044          emit->cs.shared_memory_declared = true;
5045       }
5046 
5047       return true;
5048 
5049    case TGSI_FILE_BUFFER:
5050       assert(emit->version >= 50);
5051       emit->num_shader_bufs++;
5052       return true;
5053 
5054    default:
5055       assert(!"Unexpected type of declaration");
5056       return false;
5057    }
5058 }
5059 
5060 
5061 /**
5062  * Emit input declarations for fragment shader.
5063  */
5064 static void
emit_fs_input_declarations(struct svga_shader_emitter_v10 * emit)5065 emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
5066 {
5067    unsigned i;
5068 
5069    for (i = 0; i < emit->linkage.num_inputs; i++) {
5070       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5071       unsigned usage_mask = emit->info.input_usage_mask[i];
5072       unsigned index = emit->linkage.input_map[i];
5073       unsigned type, interpolationMode, name;
5074       unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
5075 
5076       if (usage_mask == 0)
5077          continue;  /* register is not actually used */
5078 
5079       if (semantic_name == TGSI_SEMANTIC_POSITION) {
5080          /* fragment position input */
5081          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5082          interpolationMode = VGPU10_INTERPOLATION_LINEAR;
5083          name = VGPU10_NAME_POSITION;
5084          if (usage_mask & TGSI_WRITEMASK_W) {
5085             /* we need to replace use of 'w' with '1/w' */
5086             emit->fs.fragcoord_input_index = i;
5087          }
5088       }
5089       else if (semantic_name == TGSI_SEMANTIC_FACE) {
5090          /* fragment front-facing input */
5091          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5092          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5093          name = VGPU10_NAME_IS_FRONT_FACE;
5094          emit->fs.face_input_index = i;
5095       }
5096       else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
5097          /* primitive ID */
5098          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5099          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5100          name = VGPU10_NAME_PRIMITIVE_ID;
5101       }
5102       else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
5103          /* sample index / ID */
5104          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5105          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5106          name = VGPU10_NAME_SAMPLE_INDEX;
5107       }
5108       else if (semantic_name == TGSI_SEMANTIC_LAYER) {
5109          /* render target array index */
5110          if (emit->key.fs.layer_to_zero) {
5111             /**
5112              * The shader from the previous stage does not write to layer,
5113              * so reading the layer index in fragment shader should return 0.
5114              */
5115             emit->fs.layer_input_index = i;
5116             continue;
5117          } else {
5118             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5119             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5120             name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
5121             mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
5122          }
5123       }
5124       else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
5125          /* viewport index */
5126          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5127          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5128          name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
5129          mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
5130       }
5131       else {
5132          /* general fragment input */
5133          type = VGPU10_OPCODE_DCL_INPUT_PS;
5134          interpolationMode =
5135                translate_interpolation(emit,
5136                                        emit->info.input_interpolate[i],
5137                                        emit->info.input_interpolate_loc[i]);
5138 
5139          /* keeps track if flat interpolation mode is being used */
5140          emit->uses_flat_interp = emit->uses_flat_interp ||
5141                (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
5142 
5143          name = VGPU10_NAME_UNDEFINED;
5144       }
5145 
5146       emit_input_declaration(emit, type,
5147                              VGPU10_OPERAND_TYPE_INPUT,
5148                              VGPU10_OPERAND_INDEX_1D, index, 1,
5149                              name,
5150                              VGPU10_OPERAND_4_COMPONENT,
5151                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5152                              mask,
5153                              interpolationMode, true,
5154                              map_tgsi_semantic_to_sgn_name(semantic_name));
5155    }
5156 }
5157 
5158 
5159 /**
5160  * Emit input declarations for vertex shader.
5161  */
5162 static void
emit_vs_input_declarations(struct svga_shader_emitter_v10 * emit)5163 emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
5164 {
5165    unsigned i;
5166 
5167    for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
5168       unsigned usage_mask = emit->info.input_usage_mask[i];
5169       unsigned index = i;
5170 
5171       if (usage_mask == 0)
5172          continue;  /* register is not actually used */
5173 
5174       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5175                              VGPU10_OPERAND_TYPE_INPUT,
5176                              VGPU10_OPERAND_INDEX_1D, index, 1,
5177                              VGPU10_NAME_UNDEFINED,
5178                              VGPU10_OPERAND_4_COMPONENT,
5179                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5180                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5181                              VGPU10_INTERPOLATION_UNDEFINED, true,
5182                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5183    }
5184 }
5185 
5186 
5187 /**
5188  * Emit input declarations for geometry shader.
5189  */
5190 static void
emit_gs_input_declarations(struct svga_shader_emitter_v10 * emit)5191 emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
5192 {
5193    unsigned i;
5194 
5195    for (i = 0; i < emit->info.num_inputs; i++) {
5196       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5197       unsigned usage_mask = emit->info.input_usage_mask[i];
5198       unsigned index = emit->linkage.input_map[i];
5199       unsigned opcodeType, operandType;
5200       unsigned numComp, selMode;
5201       unsigned name;
5202       unsigned dim;
5203 
5204       if (usage_mask == 0)
5205          continue;  /* register is not actually used */
5206 
5207       opcodeType = VGPU10_OPCODE_DCL_INPUT;
5208       operandType = VGPU10_OPERAND_TYPE_INPUT;
5209       numComp = VGPU10_OPERAND_4_COMPONENT;
5210       selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
5211       name = VGPU10_NAME_UNDEFINED;
5212 
5213       /* all geometry shader inputs are two dimensional except
5214        * gl_PrimitiveID
5215        */
5216       dim = VGPU10_OPERAND_INDEX_2D;
5217 
5218       if (semantic_name == TGSI_SEMANTIC_PRIMID) {
5219          /* Primitive ID */
5220          operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
5221          dim = VGPU10_OPERAND_INDEX_0D;
5222          numComp = VGPU10_OPERAND_0_COMPONENT;
5223          selMode = 0;
5224 
5225          /* also save the register index so we can check for
5226           * primitive id when emit src register. We need to modify the
5227           * operand type, index dimension when emit primitive id src reg.
5228           */
5229           emit->gs.prim_id_index = i;
5230       }
5231       else if (semantic_name == TGSI_SEMANTIC_POSITION) {
5232          /* vertex position input */
5233          opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
5234          name = VGPU10_NAME_POSITION;
5235       }
5236 
5237       emit_input_declaration(emit, opcodeType, operandType,
5238                              dim, index,
5239                              emit->gs.input_size,
5240                              name,
5241                              numComp, selMode,
5242                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5243                              VGPU10_INTERPOLATION_UNDEFINED, true,
5244                              map_tgsi_semantic_to_sgn_name(semantic_name));
5245    }
5246 }
5247 
5248 
5249 /**
5250  * Emit input declarations for tessellation control shader.
5251  */
5252 static void
emit_tcs_input_declarations(struct svga_shader_emitter_v10 * emit)5253 emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
5254 {
5255    unsigned i;
5256    unsigned size = emit->key.tcs.vertices_per_patch;
5257    bool addSignature = true;
5258 
5259    if (!emit->tcs.control_point_phase)
5260       addSignature = emit->tcs.fork_phase_add_signature;
5261 
5262    for (i = 0; i < emit->info.num_inputs; i++) {
5263       unsigned usage_mask = emit->info.input_usage_mask[i];
5264       unsigned index = emit->linkage.input_map[i];
5265       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5266       VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
5267       VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
5268       SVGA3dDXSignatureSemanticName sgn_name =
5269          map_tgsi_semantic_to_sgn_name(semantic_name);
5270 
5271       if (semantic_name == TGSI_SEMANTIC_POSITION ||
5272           index == emit->linkage.position_index) {
5273          /* save the input control point index for later use */
5274          emit->tcs.control_point_input_index = i;
5275       }
5276       else if (usage_mask == 0) {
5277          continue;  /* register is not actually used */
5278       }
5279       else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
5280          /* The shadow copy is being used here. So set the signature name
5281           * to UNDEFINED.
5282           */
5283          sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5284       }
5285 
5286       /* input control points in the patch constant phase are emitted in the
5287        * vicp register rather than the v register.
5288        */
5289       if (!emit->tcs.control_point_phase) {
5290          operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5291       }
5292 
5293       /* Tessellation control shader inputs are two dimensional.
5294        * The array size is determined by the patch vertex count.
5295        */
5296       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5297                              operandType,
5298                              VGPU10_OPERAND_INDEX_2D,
5299                              index, size, name,
5300                              VGPU10_OPERAND_4_COMPONENT,
5301                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5302                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5303                              VGPU10_INTERPOLATION_UNDEFINED,
5304                              addSignature, sgn_name);
5305    }
5306 
5307    if (emit->tcs.control_point_phase) {
5308 
5309       /* Also add an address register for the indirection to the
5310        * input control points
5311        */
5312       emit->tcs.control_point_addr_index = emit->num_address_regs++;
5313    }
5314 }
5315 
5316 
5317 static void
emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 * emit)5318 emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
5319 {
5320 
5321    /* In tcs, tess factors are emitted as extra outputs.
5322     * The starting register index for the tess factors is captured
5323     * in the compile key.
5324     */
5325    unsigned inputIndex = emit->key.tes.tessfactor_index;
5326 
5327    if (emit->tes.prim_mode == MESA_PRIM_QUADS) {
5328       if (emit->key.tes.need_tessouter) {
5329          emit->tes.outer.in_index = inputIndex;
5330          for (int i = 0; i < 4; i++) {
5331             emit_tesslevel_declaration(emit, inputIndex++,
5332                VGPU10_OPCODE_DCL_INPUT_SIV,
5333                VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5334                VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
5335                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
5336          }
5337       }
5338 
5339       if (emit->key.tes.need_tessinner) {
5340          emit->tes.inner.in_index = inputIndex;
5341          emit_tesslevel_declaration(emit, inputIndex++,
5342             VGPU10_OPCODE_DCL_INPUT_SIV,
5343             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5344             VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
5345             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
5346 
5347          emit_tesslevel_declaration(emit, inputIndex++,
5348             VGPU10_OPCODE_DCL_INPUT_SIV,
5349             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5350             VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
5351             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
5352       }
5353    }
5354    else if (emit->tes.prim_mode == MESA_PRIM_TRIANGLES) {
5355       if (emit->key.tes.need_tessouter) {
5356          emit->tes.outer.in_index = inputIndex;
5357          for (int i = 0; i < 3; i++) {
5358             emit_tesslevel_declaration(emit, inputIndex++,
5359                VGPU10_OPCODE_DCL_INPUT_SIV,
5360                VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5361                VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
5362                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
5363          }
5364       }
5365 
5366       if (emit->key.tes.need_tessinner) {
5367          emit->tes.inner.in_index = inputIndex;
5368          emit_tesslevel_declaration(emit, inputIndex++,
5369             VGPU10_OPCODE_DCL_INPUT_SIV,
5370             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5371             VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
5372             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
5373       }
5374    }
5375    else if (emit->tes.prim_mode == MESA_PRIM_LINES) {
5376       if (emit->key.tes.need_tessouter) {
5377          emit->tes.outer.in_index = inputIndex;
5378          emit_tesslevel_declaration(emit, inputIndex++,
5379             VGPU10_OPCODE_DCL_INPUT_SIV,
5380             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5381             VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
5382             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
5383 
5384          emit_tesslevel_declaration(emit, inputIndex++,
5385             VGPU10_OPCODE_DCL_INPUT_SIV,
5386             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5387             VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
5388             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
5389       }
5390    }
5391 }
5392 
5393 
5394 /**
5395  * Emit input declarations for tessellation evaluation shader.
5396  */
5397 static void
emit_tes_input_declarations(struct svga_shader_emitter_v10 * emit)5398 emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
5399 {
5400    unsigned i;
5401 
5402    for (i = 0; i < emit->info.num_inputs; i++) {
5403       unsigned usage_mask = emit->info.input_usage_mask[i];
5404       unsigned index = emit->linkage.input_map[i];
5405       unsigned size;
5406       const enum tgsi_semantic semantic_name =
5407          emit->info.input_semantic_name[i];
5408       SVGA3dDXSignatureSemanticName sgn_name;
5409       VGPU10_OPERAND_TYPE operandType;
5410       VGPU10_OPERAND_INDEX_DIMENSION dim;
5411 
5412       if (usage_mask == 0)
5413          usage_mask = 1;  /* at least set usage mask to one */
5414 
5415       if (semantic_name == TGSI_SEMANTIC_PATCH) {
5416          operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
5417          dim = VGPU10_OPERAND_INDEX_1D;
5418          size = 1;
5419          sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
5420       }
5421       else {
5422          operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5423          dim = VGPU10_OPERAND_INDEX_2D;
5424          size = emit->key.tes.vertices_per_patch;
5425          sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5426       }
5427 
5428       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
5429                              dim, index, size, VGPU10_NAME_UNDEFINED,
5430                              VGPU10_OPERAND_4_COMPONENT,
5431                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5432                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5433                              VGPU10_INTERPOLATION_UNDEFINED,
5434                              true, sgn_name);
5435    }
5436 
5437    emit_tessfactor_input_declarations(emit);
5438 
5439    /* DX spec requires DS input controlpoint/patch-constant signatures to match
5440     * the HS output controlpoint/patch-constant signatures exactly.
5441     * Add missing input declarations even if they are not used in the shader.
5442     */
5443    if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
5444       struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
5445       for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
5446 
5447           /* If a tcs output does not have a corresponding input register in
5448            * tes, add one.
5449            */
5450           if (emit->linkage.prevShader.output_map[i] >
5451               emit->linkage.input_map_max) {
5452              const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
5453 
5454              if (sem_name == TGSI_SEMANTIC_PATCH) {
5455                 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5456                                        VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5457                                        VGPU10_OPERAND_INDEX_1D,
5458                                        i, 1, VGPU10_NAME_UNDEFINED,
5459                                        VGPU10_OPERAND_4_COMPONENT,
5460                                        VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5461                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5462                                        VGPU10_INTERPOLATION_UNDEFINED,
5463                                        true,
5464                                        map_tgsi_semantic_to_sgn_name(sem_name));
5465 
5466              } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
5467                         sem_name != TGSI_SEMANTIC_TESSOUTER) {
5468                 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5469                                        VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
5470                                        VGPU10_OPERAND_INDEX_2D,
5471                                        i, emit->key.tes.vertices_per_patch,
5472                                        VGPU10_NAME_UNDEFINED,
5473                                        VGPU10_OPERAND_4_COMPONENT,
5474                                        VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5475                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5476                                        VGPU10_INTERPOLATION_UNDEFINED,
5477                                        true,
5478                                        map_tgsi_semantic_to_sgn_name(sem_name));
5479              }
5480              /* tessellation factors are taken care of in
5481               * emit_tessfactor_input_declarations().
5482               */
5483          }
5484       }
5485    }
5486 }
5487 
5488 
5489 /**
5490  * Emit all input declarations.
5491  */
5492 static bool
emit_input_declarations(struct svga_shader_emitter_v10 * emit)5493 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
5494 {
5495    emit->index_range.required =
5496       emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? true : false;
5497 
5498    switch (emit->unit) {
5499    case PIPE_SHADER_FRAGMENT:
5500       emit_fs_input_declarations(emit);
5501       break;
5502    case PIPE_SHADER_GEOMETRY:
5503       emit_gs_input_declarations(emit);
5504       break;
5505    case PIPE_SHADER_VERTEX:
5506       emit_vs_input_declarations(emit);
5507       break;
5508    case PIPE_SHADER_TESS_CTRL:
5509       emit_tcs_input_declarations(emit);
5510       break;
5511    case PIPE_SHADER_TESS_EVAL:
5512       emit_tes_input_declarations(emit);
5513       break;
5514    case PIPE_SHADER_COMPUTE:
5515       //XXX emit_cs_input_declarations(emit);
5516       break;
5517    default:
5518       assert(0);
5519    }
5520 
5521    if (emit->index_range.start_index != INVALID_INDEX) {
5522       emit_index_range_declaration(emit);
5523    }
5524    emit->index_range.required = false;
5525    return true;
5526 }
5527 
5528 
5529 /**
5530  * Emit all output declarations.
5531  */
5532 static bool
emit_output_declarations(struct svga_shader_emitter_v10 * emit)5533 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
5534 {
5535    emit->index_range.required =
5536       emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? true : false;
5537 
5538    switch (emit->unit) {
5539    case PIPE_SHADER_FRAGMENT:
5540       emit_fs_output_declarations(emit);
5541       break;
5542    case PIPE_SHADER_GEOMETRY:
5543       emit_gs_output_declarations(emit);
5544       break;
5545    case PIPE_SHADER_VERTEX:
5546       emit_vs_output_declarations(emit);
5547       break;
5548    case PIPE_SHADER_TESS_CTRL:
5549       emit_tcs_output_declarations(emit);
5550       break;
5551    case PIPE_SHADER_TESS_EVAL:
5552       emit_tes_output_declarations(emit);
5553       break;
5554    case PIPE_SHADER_COMPUTE:
5555       //XXX emit_cs_output_declarations(emit);
5556       break;
5557    default:
5558       assert(0);
5559    }
5560 
5561    if (emit->vposition.so_index != INVALID_INDEX &&
5562        emit->vposition.out_index != INVALID_INDEX) {
5563 
5564       assert(emit->unit != PIPE_SHADER_FRAGMENT);
5565 
5566       /* Emit the declaration for the non-adjusted vertex position
5567        * for stream output purpose
5568        */
5569       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5570                               emit->vposition.so_index,
5571                               VGPU10_NAME_UNDEFINED,
5572                               VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5573                               true,
5574                               SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
5575    }
5576 
5577    if (emit->clip_dist_so_index != INVALID_INDEX &&
5578        emit->clip_dist_out_index != INVALID_INDEX) {
5579 
5580       assert(emit->unit != PIPE_SHADER_FRAGMENT);
5581 
5582       /* Emit the declaration for the clip distance shadow copy which
5583        * will be used for stream output purpose and for clip distance
5584        * varying variable. Note all clip distances
5585        * will be written regardless of the enabled clipping planes.
5586        */
5587       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5588                               emit->clip_dist_so_index,
5589                               VGPU10_NAME_UNDEFINED,
5590                               VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5591                               true,
5592                               SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5593 
5594       if (emit->info.num_written_clipdistance > 4) {
5595          /* for the second clip distance register, each handles 4 planes */
5596          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5597                                  emit->clip_dist_so_index + 1,
5598                                  VGPU10_NAME_UNDEFINED,
5599                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5600                                  true,
5601                                  SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5602       }
5603    }
5604 
5605    if (emit->index_range.start_index != INVALID_INDEX) {
5606       emit_index_range_declaration(emit);
5607    }
5608    emit->index_range.required = false;
5609    return true;
5610 }
5611 
5612 
5613 /**
5614  * A helper function to create a temporary indexable array
5615  * and initialize the corresponding entries in the temp_map array.
5616  */
5617 static void
create_temp_array(struct svga_shader_emitter_v10 * emit,unsigned arrayID,unsigned first,unsigned count,unsigned startIndex)5618 create_temp_array(struct svga_shader_emitter_v10 *emit,
5619                   unsigned arrayID, unsigned first, unsigned count,
5620                   unsigned startIndex)
5621 {
5622    unsigned i, tempIndex = startIndex;
5623 
5624    emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
5625    assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
5626    emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
5627 
5628    emit->temp_arrays[arrayID].start = first;
5629    emit->temp_arrays[arrayID].size = count;
5630 
5631    /* Fill in the temp_map entries for this temp array */
5632    for (i = 0; i < count; i++, tempIndex++) {
5633       emit->temp_map[tempIndex].arrayId = arrayID;
5634       emit->temp_map[tempIndex].index = i;
5635    }
5636 }
5637 
5638 
5639 /**
5640  * Emit the declaration for the temporary registers.
5641  */
5642 static bool
emit_temporaries_declaration(struct svga_shader_emitter_v10 * emit)5643 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
5644 {
5645    unsigned total_temps, reg, i;
5646 
5647    total_temps = emit->num_shader_temps;
5648 
5649    /* If there is indirect access to non-indexable temps in the shader,
5650     * convert those temps to indexable temps. This works around a bug
5651     * in the GLSL->TGSI translator exposed in piglit test
5652     * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5653     * Internal temps added by the driver remain as non-indexable temps.
5654     */
5655    if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
5656        emit->num_temp_arrays == 0) {
5657       create_temp_array(emit, 1, 0, total_temps, 0);
5658    }
5659 
5660    /* Allocate extra temps for specially-implemented instructions,
5661     * such as LIT.
5662     */
5663    total_temps += MAX_INTERNAL_TEMPS;
5664 
5665    /* Allocate extra temps for clip distance or clip vertex.
5666     */
5667    if (emit->clip_mode == CLIP_DISTANCE) {
5668       /* We need to write the clip distance to a temporary register
5669        * first. Then it will be copied to the shadow copy for
5670        * the clip distance varying variable and stream output purpose.
5671        * It will also be copied to the actual CLIPDIST register
5672        * according to the enabled clip planes
5673        */
5674       emit->clip_dist_tmp_index = total_temps++;
5675       if (emit->info.num_written_clipdistance > 4)
5676          total_temps++; /* second clip register */
5677    }
5678    else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
5679       /* If the current shader is in the last vertex processing stage,
5680        * We need to convert the TGSI CLIPVERTEX output to one or more
5681        * clip distances.  Allocate a temp reg for the clipvertex here.
5682        */
5683       assert(emit->info.writes_clipvertex > 0);
5684       emit->clip_vertex_tmp_index = total_temps;
5685       total_temps++;
5686    }
5687 
5688    if (emit->info.uses_vertexid) {
5689       assert(emit->unit == PIPE_SHADER_VERTEX);
5690       emit->vs.vertex_id_tmp_index = total_temps++;
5691    }
5692 
5693    if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
5694       if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
5695           emit->key.clip_plane_enable ||
5696           emit->vposition.so_index != INVALID_INDEX) {
5697          emit->vposition.tmp_index = total_temps;
5698          total_temps += 1;
5699       }
5700 
5701       if (emit->vposition.need_prescale) {
5702          emit->vposition.prescale_scale_index = total_temps++;
5703          emit->vposition.prescale_trans_index = total_temps++;
5704       }
5705 
5706       if (emit->unit == PIPE_SHADER_VERTEX) {
5707          unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
5708                                  emit->key.vs.adjust_attrib_itof |
5709                                  emit->key.vs.adjust_attrib_utof |
5710                                  emit->key.vs.attrib_is_bgra |
5711                                  emit->key.vs.attrib_puint_to_snorm |
5712                                  emit->key.vs.attrib_puint_to_uscaled |
5713                                  emit->key.vs.attrib_puint_to_sscaled);
5714          while (attrib_mask) {
5715             unsigned index = u_bit_scan(&attrib_mask);
5716             emit->vs.adjusted_input[index] = total_temps++;
5717          }
5718       }
5719       else if (emit->unit == PIPE_SHADER_GEOMETRY) {
5720          if (emit->key.gs.writes_viewport_index)
5721             emit->gs.viewport_index_tmp_index = total_temps++;
5722       }
5723    }
5724    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
5725       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
5726           emit->key.fs.write_color0_to_n_cbufs > 1) {
5727          /* Allocate a temp to hold the output color */
5728          emit->fs.color_tmp_index = total_temps;
5729          total_temps += 1;
5730       }
5731 
5732       if (emit->fs.face_input_index != INVALID_INDEX) {
5733          /* Allocate a temp for the +/-1 face register */
5734          emit->fs.face_tmp_index = total_temps;
5735          total_temps += 1;
5736       }
5737 
5738       if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5739          /* Allocate a temp for modified fragment position register */
5740          emit->fs.fragcoord_tmp_index = total_temps;
5741          total_temps += 1;
5742       }
5743 
5744       if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
5745          /* Allocate a temp for the sample position */
5746          emit->fs.sample_pos_tmp_index = total_temps++;
5747       }
5748    }
5749    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
5750       if (emit->vposition.need_prescale) {
5751          emit->vposition.tmp_index = total_temps++;
5752          emit->vposition.prescale_scale_index = total_temps++;
5753          emit->vposition.prescale_trans_index = total_temps++;
5754       }
5755 
5756       if (emit->tes.inner.tgsi_index) {
5757          emit->tes.inner.temp_index = total_temps;
5758          total_temps += 1;
5759       }
5760 
5761       if (emit->tes.outer.tgsi_index) {
5762          emit->tes.outer.temp_index = total_temps;
5763          total_temps += 1;
5764       }
5765    }
5766    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
5767       if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
5768          if (!emit->tcs.control_point_phase) {
5769             emit->tcs.inner.temp_index = total_temps;
5770             total_temps += 1;
5771          }
5772       }
5773       if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5774          if (!emit->tcs.control_point_phase) {
5775             emit->tcs.outer.temp_index = total_temps;
5776             total_temps += 1;
5777          }
5778       }
5779 
5780       if (emit->tcs.control_point_phase &&
5781           emit->info.reads_pervertex_outputs) {
5782          emit->tcs.control_point_tmp_index = total_temps;
5783          total_temps += emit->tcs.control_point_out_count;
5784       }
5785       else if (!emit->tcs.control_point_phase &&
5786                emit->info.reads_perpatch_outputs) {
5787 
5788          /* If there is indirect access to the patch constant outputs
5789           * in the control point phase, then an indexable temporary array
5790           * will be created for these patch constant outputs.
5791           * Note, indirect access can only be applicable to
5792           * patch constant outputs in the control point phase.
5793           */
5794          if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
5795             unsigned arrayID =
5796                emit->num_temp_arrays ? emit->num_temp_arrays : 1;
5797             create_temp_array(emit, arrayID, 0,
5798                               emit->tcs.patch_generic_out_count, total_temps);
5799          }
5800          emit->tcs.patch_generic_tmp_index = total_temps;
5801          total_temps += emit->tcs.patch_generic_out_count;
5802       }
5803 
5804       emit->tcs.invocation_id_tmp_index = total_temps++;
5805    }
5806 
5807    if (emit->raw_bufs) {
5808       /**
5809        * Add 3 more temporaries if we need to translate constant buffer
5810        * to srv raw buffer. Since we need to load the value to a temporary
5811        * before it can be used as a source. There could be three source
5812        * register in an instruction.
5813        */
5814       emit->raw_buf_tmp_index = total_temps;
5815       total_temps+=3;
5816    }
5817 
5818    for (i = 0; i < emit->num_address_regs; i++) {
5819       emit->address_reg_index[i] = total_temps++;
5820    }
5821 
5822    /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5823     * temp indexes.  Basically, we compact all the non-array temp register
5824     * indexes into a consecutive series.
5825     *
5826     * Before, we may have some TGSI declarations like:
5827     *   DCL TEMP[0..1], LOCAL
5828     *   DCL TEMP[2..4], ARRAY(1), LOCAL
5829     *   DCL TEMP[5..7], ARRAY(2), LOCAL
5830     *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5831     *
5832     * After, we'll have a map like this:
5833     *   temp_map[0] = { array 0, index 0 }
5834     *   temp_map[1] = { array 0, index 1 }
5835     *   temp_map[2] = { array 1, index 0 }
5836     *   temp_map[3] = { array 1, index 1 }
5837     *   temp_map[4] = { array 1, index 2 }
5838     *   temp_map[5] = { array 2, index 0 }
5839     *   temp_map[6] = { array 2, index 1 }
5840     *   temp_map[7] = { array 2, index 2 }
5841     *   temp_map[8] = { array 0, index 2 }
5842     *   temp_map[9] = { array 0, index 3 }
5843     *
5844     * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5845     * temps numbered 0..3
5846     *
5847     * Any time we emit a temporary register index, we'll have to use the
5848     * temp_map[] table to convert the TGSI index to the VGPU10 index.
5849     *
5850     * Finally, we recompute the total_temps value here.
5851     */
5852    reg = 0;
5853    for (i = 0; i < total_temps; i++) {
5854       if (emit->temp_map[i].arrayId == 0) {
5855          emit->temp_map[i].index = reg++;
5856       }
5857    }
5858 
5859    if (0) {
5860       debug_printf("total_temps %u\n", total_temps);
5861       for (i = 0; i < total_temps; i++) {
5862          debug_printf("temp %u ->  array %u  index %u\n",
5863                       i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
5864       }
5865    }
5866 
5867    total_temps = reg;
5868 
5869    /* Emit declaration of ordinary temp registers */
5870    if (total_temps > 0) {
5871       VGPU10OpcodeToken0 opcode0;
5872 
5873       opcode0.value = 0;
5874       opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
5875 
5876       begin_emit_instruction(emit);
5877       emit_dword(emit, opcode0.value);
5878       emit_dword(emit, total_temps);
5879       end_emit_instruction(emit);
5880    }
5881 
5882    /* Emit declarations for indexable temp arrays.  Skip 0th entry since
5883     * it's unused.
5884     */
5885    for (i = 1; i < emit->num_temp_arrays; i++) {
5886       unsigned num_temps = emit->temp_arrays[i].size;
5887 
5888       if (num_temps > 0) {
5889          VGPU10OpcodeToken0 opcode0;
5890 
5891          opcode0.value = 0;
5892          opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
5893 
5894          begin_emit_instruction(emit);
5895          emit_dword(emit, opcode0.value);
5896          emit_dword(emit, i); /* which array */
5897          emit_dword(emit, num_temps);
5898          emit_dword(emit, 4); /* num components */
5899          end_emit_instruction(emit);
5900 
5901          total_temps += num_temps;
5902       }
5903    }
5904 
5905    /* Check that the grand total of all regular and indexed temps is
5906     * under the limit.
5907     */
5908    check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
5909 
5910    return true;
5911 }
5912 
5913 
5914 static bool
emit_rawbuf_declaration(struct svga_shader_emitter_v10 * emit,unsigned index)5915 emit_rawbuf_declaration(struct svga_shader_emitter_v10 *emit,
5916                         unsigned index)
5917 {
5918    VGPU10OpcodeToken0 opcode1;
5919    VGPU10OperandToken0 operand1;
5920 
5921    opcode1.value = 0;
5922    opcode1.opcodeType = VGPU10_OPCODE_DCL_RESOURCE_RAW;
5923    opcode1.resourceDimension = VGPU10_RESOURCE_DIMENSION_UNKNOWN;
5924 
5925    operand1.value = 0;
5926    operand1.numComponents = VGPU10_OPERAND_0_COMPONENT;
5927    operand1.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
5928    operand1.indexDimension = VGPU10_OPERAND_INDEX_1D;
5929    operand1.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5930 
5931    begin_emit_instruction(emit);
5932    emit_dword(emit, opcode1.value);
5933    emit_dword(emit, operand1.value);
5934    emit_dword(emit, index);
5935    end_emit_instruction(emit);
5936 
5937    return true;
5938 }
5939 
5940 
5941 static bool
emit_constant_declaration(struct svga_shader_emitter_v10 * emit)5942 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
5943 {
5944    VGPU10OpcodeToken0 opcode0;
5945    VGPU10OperandToken0 operand0;
5946    unsigned total_consts, i;
5947 
5948    opcode0.value = 0;
5949    opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
5950    opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
5951    /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5952 
5953    operand0.value = 0;
5954    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
5955    operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
5956    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5957    operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5958    operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
5959    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
5960    operand0.swizzleX = 0;
5961    operand0.swizzleY = 1;
5962    operand0.swizzleZ = 2;
5963    operand0.swizzleW = 3;
5964 
5965    /**
5966     * Emit declaration for constant buffer [0].  We also allocate
5967     * room for the extra constants here.
5968     */
5969    total_consts = emit->num_shader_consts[0];
5970 
5971    /* Now, allocate constant slots for the "extra" constants.
5972     * Note: it's critical that these extra constant locations
5973     * exactly match what's emitted by the "extra" constants code
5974     * in svga_state_constants.c
5975     */
5976 
5977    /* Vertex position scale/translation */
5978    if (emit->vposition.need_prescale) {
5979       emit->vposition.prescale_cbuf_index = total_consts;
5980       total_consts += (2 * emit->vposition.num_prescale);
5981    }
5982 
5983    if (emit->unit == PIPE_SHADER_VERTEX) {
5984       if (emit->key.vs.undo_viewport) {
5985          emit->vs.viewport_index = total_consts++;
5986       }
5987       if (emit->key.vs.need_vertex_id_bias) {
5988          emit->vs.vertex_id_bias_index = total_consts++;
5989       }
5990    }
5991 
5992    /* user-defined clip planes */
5993    if (emit->key.clip_plane_enable) {
5994       unsigned n = util_bitcount(emit->key.clip_plane_enable);
5995       assert(emit->unit != PIPE_SHADER_FRAGMENT &&
5996              emit->unit != PIPE_SHADER_COMPUTE);
5997       for (i = 0; i < n; i++) {
5998          emit->clip_plane_const[i] = total_consts++;
5999       }
6000    }
6001 
6002    for (i = 0; i < emit->num_samplers; i++) {
6003 
6004       if (emit->key.tex[i].sampler_view) {
6005          /* Texcoord scale factors for RECT textures */
6006          if (emit->key.tex[i].unnormalized) {
6007             emit->texcoord_scale_index[i] = total_consts++;
6008          }
6009 
6010          /* Texture buffer sizes */
6011          if (emit->key.tex[i].target == PIPE_BUFFER) {
6012             emit->texture_buffer_size_index[i] = total_consts++;
6013          }
6014       }
6015    }
6016    if (emit->key.image_size_used) {
6017       emit->image_size_index = total_consts;
6018       total_consts += emit->num_images;
6019    }
6020 
6021    if (total_consts > 0) {
6022       if (total_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
6023          debug_printf("Warning: Too many constants [%u] declared in constant"
6024                       " buffer 0. %u is the limit.\n",
6025                       total_consts,
6026                       VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
6027          total_consts = VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT;
6028          emit->register_overflow = true;
6029       }
6030       begin_emit_instruction(emit);
6031       emit_dword(emit, opcode0.value);
6032       emit_dword(emit, operand0.value);
6033       emit_dword(emit, 0);  /* which const buffer slot */
6034       emit_dword(emit, total_consts);
6035       end_emit_instruction(emit);
6036    }
6037 
6038    /* Declare remaining constant buffers (UBOs) */
6039 
6040    for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
6041       if (emit->num_shader_consts[i] > 0) {
6042          if (emit->raw_bufs & (1 << i)) {
6043             /* UBO declared as srv raw buffer */
6044             emit_rawbuf_declaration(emit, i + emit->raw_buf_srv_start_index);
6045          }
6046          else {
6047 
6048             /* UBO declared as const buffer */
6049             begin_emit_instruction(emit);
6050             emit_dword(emit, opcode0.value);
6051             emit_dword(emit, operand0.value);
6052             emit_dword(emit, i);  /* which const buffer slot */
6053             emit_dword(emit, emit->num_shader_consts[i]);
6054             end_emit_instruction(emit);
6055          }
6056       }
6057    }
6058 
6059    return true;
6060 }
6061 
6062 
6063 /**
6064  * Emit declarations for samplers.
6065  */
6066 static bool
emit_sampler_declarations(struct svga_shader_emitter_v10 * emit)6067 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
6068 {
6069    unsigned i;
6070 
6071    for (i = 0; i < emit->key.num_samplers; i++) {
6072 
6073       VGPU10OpcodeToken0 opcode0;
6074       VGPU10OperandToken0 operand0;
6075 
6076       opcode0.value = 0;
6077       opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
6078       opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
6079 
6080       operand0.value = 0;
6081       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6082       operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
6083       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6084       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6085 
6086       begin_emit_instruction(emit);
6087       emit_dword(emit, opcode0.value);
6088       emit_dword(emit, operand0.value);
6089       emit_dword(emit, i);
6090       end_emit_instruction(emit);
6091    }
6092 
6093    return true;
6094 }
6095 
6096 
6097 /**
6098  * Translate PIPE_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
6099  */
6100 static unsigned
pipe_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,bool is_array,bool is_uav)6101 pipe_texture_to_resource_dimension(enum tgsi_texture_type target,
6102                                    unsigned num_samples,
6103                                    bool is_array,
6104                                    bool is_uav)
6105 {
6106    switch (target) {
6107    case PIPE_BUFFER:
6108       return VGPU10_RESOURCE_DIMENSION_BUFFER;
6109    case PIPE_TEXTURE_1D:
6110       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6111    case PIPE_TEXTURE_2D:
6112       return num_samples > 2 ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS :
6113          VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6114    case PIPE_TEXTURE_RECT:
6115       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6116    case PIPE_TEXTURE_3D:
6117       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6118    case PIPE_TEXTURE_CUBE:
6119       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6120    case PIPE_TEXTURE_1D_ARRAY:
6121       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
6122          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6123    case PIPE_TEXTURE_2D_ARRAY:
6124       if (num_samples > 2 && is_array)
6125          return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY;
6126       else if (is_array)
6127          return VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
6128       else
6129          return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6130    case PIPE_TEXTURE_CUBE_ARRAY:
6131       return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6132              (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
6133                          VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
6134    default:
6135       assert(!"Unexpected resource type");
6136       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6137    }
6138 }
6139 
6140 
6141 /**
6142  * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
6143  */
6144 static unsigned
tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,bool is_array,bool is_uav)6145 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
6146                                    unsigned num_samples,
6147                                    bool is_array,
6148                                    bool is_uav)
6149 {
6150    if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
6151       target = TGSI_TEXTURE_2D;
6152    }
6153    else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
6154       target = TGSI_TEXTURE_2D_ARRAY;
6155    }
6156 
6157    switch (target) {
6158    case TGSI_TEXTURE_BUFFER:
6159       return VGPU10_RESOURCE_DIMENSION_BUFFER;
6160    case TGSI_TEXTURE_1D:
6161       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6162    case TGSI_TEXTURE_2D:
6163    case TGSI_TEXTURE_RECT:
6164       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6165    case TGSI_TEXTURE_3D:
6166       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6167    case TGSI_TEXTURE_CUBE:
6168    case TGSI_TEXTURE_SHADOWCUBE:
6169       return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6170                       VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6171    case TGSI_TEXTURE_SHADOW1D:
6172       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6173    case TGSI_TEXTURE_SHADOW2D:
6174    case TGSI_TEXTURE_SHADOWRECT:
6175       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6176    case TGSI_TEXTURE_1D_ARRAY:
6177    case TGSI_TEXTURE_SHADOW1D_ARRAY:
6178       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
6179          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6180    case TGSI_TEXTURE_2D_ARRAY:
6181    case TGSI_TEXTURE_SHADOW2D_ARRAY:
6182       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
6183          : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6184    case TGSI_TEXTURE_2D_MSAA:
6185       return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
6186    case TGSI_TEXTURE_2D_ARRAY_MSAA:
6187       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
6188          : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
6189    case TGSI_TEXTURE_CUBE_ARRAY:
6190       return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6191              (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
6192                          VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
6193    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
6194       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
6195          : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6196    default:
6197       assert(!"Unexpected resource type");
6198       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6199    }
6200 }
6201 
6202 
6203 /**
6204  * Given a tgsi_return_type, return true iff it is an integer type.
6205  */
6206 static bool
is_integer_type(enum tgsi_return_type type)6207 is_integer_type(enum tgsi_return_type type)
6208 {
6209    switch (type) {
6210       case TGSI_RETURN_TYPE_SINT:
6211       case TGSI_RETURN_TYPE_UINT:
6212          return true;
6213       case TGSI_RETURN_TYPE_FLOAT:
6214       case TGSI_RETURN_TYPE_UNORM:
6215       case TGSI_RETURN_TYPE_SNORM:
6216          return false;
6217       case TGSI_RETURN_TYPE_COUNT:
6218       default:
6219          assert(!"is_integer_type: Unknown tgsi_return_type");
6220          return false;
6221    }
6222 }
6223 
6224 
6225 /**
6226  * Emit declarations for resources.
6227  * XXX When we're sure that all TGSI shaders will be generated with
6228  * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
6229  * rework this code.
6230  */
6231 static bool
emit_resource_declarations(struct svga_shader_emitter_v10 * emit)6232 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
6233 {
6234    unsigned i;
6235 
6236    /* Emit resource decl for each sampler */
6237    for (i = 0; i < emit->num_samplers; i++) {
6238       if (!(emit->info.samplers_declared & (1 << i)))
6239          continue;
6240 
6241       VGPU10OpcodeToken0 opcode0;
6242       VGPU10OperandToken0 operand0;
6243       VGPU10ResourceReturnTypeToken return_type;
6244       VGPU10_RESOURCE_RETURN_TYPE rt;
6245 
6246       opcode0.value = 0;
6247       opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
6248       if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
6249          opcode0.resourceDimension =
6250             tgsi_texture_to_resource_dimension(emit->sampler_target[i],
6251                                                emit->key.tex[i].num_samples,
6252                                                emit->key.tex[i].is_array,
6253                                                false);
6254       }
6255       else {
6256          opcode0.resourceDimension =
6257             pipe_texture_to_resource_dimension(emit->key.tex[i].target,
6258                                                emit->key.tex[i].num_samples,
6259                                                emit->key.tex[i].is_array,
6260                                                false);
6261       }
6262       opcode0.sampleCount = emit->key.tex[i].num_samples;
6263       operand0.value = 0;
6264       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6265       operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
6266       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6267       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6268 
6269 #if 1
6270       /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
6271       STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
6272       STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
6273       STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
6274       STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
6275       STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
6276       assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
6277       if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
6278          rt = emit->sampler_return_type[i] + 1;
6279       }
6280       else {
6281          rt = emit->key.tex[i].sampler_return_type;
6282       }
6283 #else
6284       switch (emit->sampler_return_type[i]) {
6285          case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
6286          case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
6287          case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
6288          case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
6289          case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
6290          case TGSI_RETURN_TYPE_COUNT:
6291          default:
6292             rt = VGPU10_RETURN_TYPE_FLOAT;
6293             assert(!"emit_resource_declarations: Unknown tgsi_return_type");
6294       }
6295 #endif
6296 
6297       return_type.value = 0;
6298       return_type.component0 = rt;
6299       return_type.component1 = rt;
6300       return_type.component2 = rt;
6301       return_type.component3 = rt;
6302 
6303       begin_emit_instruction(emit);
6304       emit_dword(emit, opcode0.value);
6305       emit_dword(emit, operand0.value);
6306       emit_dword(emit, i);
6307       emit_dword(emit, return_type.value);
6308       end_emit_instruction(emit);
6309    }
6310 
6311    return true;
6312 }
6313 
6314 
6315 /**
6316  * Emit instruction to declare uav for the shader image
6317  */
6318 static void
emit_image_declarations(struct svga_shader_emitter_v10 * emit)6319 emit_image_declarations(struct svga_shader_emitter_v10 *emit)
6320 {
6321    unsigned i = 0;
6322    unsigned unit = 0;
6323    unsigned uav_mask = 0;
6324 
6325    /* Emit uav decl for each image */
6326    for (i = 0; i < emit->num_images; i++, unit++) {
6327 
6328       /* Find the unit index of the next declared image.
6329        */
6330       while (!(emit->image_mask & (1 << unit))) {
6331          unit++;
6332       }
6333 
6334       VGPU10OpcodeToken0 opcode0;
6335       VGPU10OperandToken0 operand0;
6336       VGPU10ResourceReturnTypeToken return_type;
6337 
6338       /* If the corresponding uav for the image is already declared,
6339        * skip this image declaration.
6340        */
6341       if (uav_mask & (1 << emit->key.images[unit].uav_index))
6342          continue;
6343 
6344       opcode0.value = 0;
6345       opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_TYPED;
6346       opcode0.uavResourceDimension =
6347          tgsi_texture_to_resource_dimension(emit->image[unit].Resource,
6348                                             0, emit->key.images[unit].is_array,
6349                                             true);
6350 
6351       if (emit->key.images[unit].is_single_layer &&
6352           emit->key.images[unit].resource_target == PIPE_TEXTURE_3D) {
6353          opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6354       }
6355 
6356       /* Declare the uav as global coherent if the shader includes memory
6357        * barrier instructions.
6358        */
6359       opcode0.globallyCoherent =
6360          (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6361 
6362       operand0.value = 0;
6363       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6364       operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6365       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6366       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6367 
6368       return_type.value = 0;
6369       return_type.component0 =
6370          return_type.component1 =
6371          return_type.component2 =
6372          return_type.component3 = emit->key.images[unit].return_type + 1;
6373 
6374       assert(emit->key.images[unit].uav_index != SVGA3D_INVALID_ID);
6375       begin_emit_instruction(emit);
6376       emit_dword(emit, opcode0.value);
6377       emit_dword(emit, operand0.value);
6378       emit_dword(emit, emit->key.images[unit].uav_index);
6379       emit_dword(emit, return_type.value);
6380       end_emit_instruction(emit);
6381 
6382       /* Mark the uav is already declared */
6383       uav_mask |= 1 << emit->key.images[unit].uav_index;
6384    }
6385 
6386    emit->uav_declared |= uav_mask;
6387 }
6388 
6389 
6390 /**
6391  * Emit instruction to declare uav for the shader buffer
6392  */
6393 static void
emit_shader_buf_declarations(struct svga_shader_emitter_v10 * emit)6394 emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit)
6395 {
6396    unsigned i;
6397    unsigned uav_mask = 0;
6398 
6399    /* Emit uav decl for each shader buffer */
6400    for (i = 0; i < emit->num_shader_bufs; i++) {
6401       VGPU10OpcodeToken0 opcode0;
6402       VGPU10OperandToken0 operand0;
6403 
6404       if (emit->raw_shaderbufs & (1 << i)) {
6405          emit_rawbuf_declaration(emit, i + emit->raw_shaderbuf_srv_start_index);
6406          continue;
6407       }
6408 
6409       /* If the corresponding uav for the shader buf is already declared,
6410        * skip this shader buffer declaration.
6411        */
6412       if (uav_mask & (1 << emit->key.shader_buf_uav_index[i]))
6413          continue;
6414 
6415       opcode0.value = 0;
6416       opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
6417 
6418       /* Declare the uav as global coherent if the shader includes memory
6419        * barrier instructions.
6420        */
6421       opcode0.globallyCoherent =
6422          (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6423 
6424       operand0.value = 0;
6425       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6426       operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6427       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6428       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6429 
6430       assert(emit->key.shader_buf_uav_index[i] != SVGA3D_INVALID_ID);
6431       begin_emit_instruction(emit);
6432       emit_dword(emit, opcode0.value);
6433       emit_dword(emit, operand0.value);
6434       emit_dword(emit, emit->key.shader_buf_uav_index[i]);
6435       end_emit_instruction(emit);
6436 
6437       /* Mark the uav is already declared */
6438       uav_mask |= 1 << emit->key.shader_buf_uav_index[i];
6439    }
6440 
6441    emit->uav_declared |= uav_mask;
6442 }
6443 
6444 
6445 /**
6446  * Emit instruction to declare thread group shared memory(tgsm) for shared memory
6447  */
6448 static void
emit_memory_declarations(struct svga_shader_emitter_v10 * emit)6449 emit_memory_declarations(struct svga_shader_emitter_v10 *emit)
6450 {
6451    if (emit->cs.shared_memory_declared) {
6452       VGPU10OpcodeToken0 opcode0;
6453       VGPU10OperandToken0 operand0;
6454 
6455       opcode0.value = 0;
6456       opcode0.opcodeType = VGPU10_OPCODE_DCL_TGSM_RAW;
6457 
6458       /* Declare the uav as global coherent if the shader includes memory
6459        * barrier instructions.
6460        */
6461       opcode0.globallyCoherent =
6462          (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6463 
6464       operand0.value = 0;
6465       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6466       operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
6467       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6468       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6469 
6470       begin_emit_instruction(emit);
6471       emit_dword(emit, opcode0.value);
6472       emit_dword(emit, operand0.value);
6473 
6474       /* Current state tracker only declares one shared memory for GLSL.
6475        * Use index 0 for this shared memory.
6476        */
6477       emit_dword(emit, 0);
6478       emit_dword(emit, emit->key.cs.mem_size); /* byte Count */
6479       end_emit_instruction(emit);
6480    }
6481 }
6482 
6483 
6484 /**
6485  * Emit instruction to declare uav for atomic buffers
6486  */
6487 static void
emit_atomic_buf_declarations(struct svga_shader_emitter_v10 * emit)6488 emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit)
6489 {
6490    unsigned atomic_bufs_mask = emit->atomic_bufs_mask;
6491    unsigned uav_mask = 0;
6492 
6493    /* Emit uav decl for each atomic buffer */
6494    while (atomic_bufs_mask) {
6495       unsigned buf_index = u_bit_scan(&atomic_bufs_mask);
6496       unsigned uav_index = emit->key.atomic_buf_uav_index[buf_index];
6497 
6498       /* If the corresponding uav for the shader buf is already declared,
6499        * skip this shader buffer declaration.
6500        */
6501       if (uav_mask & (1 << uav_index))
6502          continue;
6503 
6504       VGPU10OpcodeToken0 opcode0;
6505       VGPU10OperandToken0 operand0;
6506 
6507       assert(uav_index != SVGA3D_INVALID_ID);
6508 
6509       opcode0.value = 0;
6510       opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
6511       opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_BUFFER;
6512 
6513       /* Declare the uav as global coherent if the shader includes memory
6514        * barrier instructions.
6515        */
6516       opcode0.globallyCoherent =
6517          (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6518       opcode0.uavHasCounter = 1;
6519 
6520       operand0.value = 0;
6521       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6522       operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6523       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6524       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6525 
6526       begin_emit_instruction(emit);
6527       emit_dword(emit, opcode0.value);
6528       emit_dword(emit, operand0.value);
6529       emit_dword(emit, uav_index);
6530       end_emit_instruction(emit);
6531 
6532       /* Mark the uav is already declared */
6533       uav_mask |= 1 << uav_index;
6534    }
6535 
6536    emit->uav_declared |= uav_mask;
6537 
6538    /* Allocate immediates to be used for index to the atomic buffers */
6539    unsigned j = 0;
6540    for (unsigned i = 0; i <= emit->num_atomic_bufs / 4; i++, j+=4) {
6541       alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
6542    }
6543 
6544    /* Allocate immediates for the atomic counter index */
6545    for (; j <= emit->max_atomic_counter_index; j+=4) {
6546       alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
6547    }
6548 }
6549 
6550 
6551 /**
6552  * Emit instruction with n=1, 2 or 3 source registers.
6553  */
6554 static void
emit_instruction_opn(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3,bool saturate,bool precise)6555 emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
6556                      unsigned opcode,
6557                      const struct tgsi_full_dst_register *dst,
6558                      const struct tgsi_full_src_register *src1,
6559                      const struct tgsi_full_src_register *src2,
6560                      const struct tgsi_full_src_register *src3,
6561                      bool saturate, bool precise)
6562 {
6563    begin_emit_instruction(emit);
6564    emit_opcode_precise(emit, opcode, saturate, precise);
6565    emit_dst_register(emit, dst);
6566    emit_src_register(emit, src1);
6567    if (src2) {
6568       emit_src_register(emit, src2);
6569    }
6570    if (src3) {
6571       emit_src_register(emit, src3);
6572    }
6573    end_emit_instruction(emit);
6574 }
6575 
6576 static void
emit_instruction_op1(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6577 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
6578                      unsigned opcode,
6579                      const struct tgsi_full_dst_register *dst,
6580                      const struct tgsi_full_src_register *src)
6581 {
6582    emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, false, false);
6583 }
6584 
6585 static void
emit_instruction_op2(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2)6586 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
6587                      VGPU10_OPCODE_TYPE opcode,
6588                      const struct tgsi_full_dst_register *dst,
6589                      const struct tgsi_full_src_register *src1,
6590                      const struct tgsi_full_src_register *src2)
6591 {
6592    emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, false, false);
6593 }
6594 
6595 static void
emit_instruction_op3(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3)6596 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
6597                      VGPU10_OPCODE_TYPE opcode,
6598                      const struct tgsi_full_dst_register *dst,
6599                      const struct tgsi_full_src_register *src1,
6600                      const struct tgsi_full_src_register *src2,
6601                      const struct tgsi_full_src_register *src3)
6602 {
6603    emit_instruction_opn(emit, opcode, dst, src1, src2, src3, false, false);
6604 }
6605 
6606 static void
emit_instruction_op0(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode)6607 emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
6608                      VGPU10_OPCODE_TYPE opcode)
6609 {
6610    begin_emit_instruction(emit);
6611    emit_opcode(emit, opcode, false);
6612    end_emit_instruction(emit);
6613 }
6614 
6615 /**
6616  * Tessellation inner/outer levels needs to be store into its
6617  * appropriate registers depending on prim_mode.
6618  */
6619 static void
store_tesslevels(struct svga_shader_emitter_v10 * emit)6620 store_tesslevels(struct svga_shader_emitter_v10 *emit)
6621 {
6622    int i;
6623 
6624    /* tessellation levels are required input/out in hull shader.
6625     * emitting the inner/outer tessellation levels, either from
6626     * values provided in tcs or fallback default values which is 1.0
6627     */
6628    if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) {
6629       struct tgsi_full_src_register temp_src;
6630 
6631       if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
6632          temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
6633       else
6634          temp_src = make_immediate_reg_float(emit, 1.0f);
6635 
6636       for (i = 0; i < 2; i++) {
6637          struct tgsi_full_src_register src =
6638             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6639          struct tgsi_full_dst_register dst =
6640             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
6641          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6642          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6643       }
6644 
6645       if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6646          temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6647       else
6648          temp_src = make_immediate_reg_float(emit, 1.0f);
6649 
6650       for (i = 0; i < 4; i++) {
6651          struct tgsi_full_src_register src =
6652             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6653          struct tgsi_full_dst_register dst =
6654             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6655          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6656          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6657       }
6658    }
6659    else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) {
6660       struct tgsi_full_src_register temp_src;
6661 
6662       if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
6663          temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
6664       else
6665          temp_src = make_immediate_reg_float(emit, 1.0f);
6666 
6667       struct tgsi_full_src_register src =
6668          scalar_src(&temp_src, TGSI_SWIZZLE_X);
6669       struct tgsi_full_dst_register dst =
6670          make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
6671       dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6672       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6673 
6674       if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6675          temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6676       else
6677          temp_src = make_immediate_reg_float(emit, 1.0f);
6678 
6679       for (i = 0; i < 3; i++) {
6680          struct tgsi_full_src_register src =
6681             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6682          struct tgsi_full_dst_register dst =
6683             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6684          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6685          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6686       }
6687    }
6688    else if (emit->key.tcs.prim_mode ==  MESA_PRIM_LINES) {
6689       if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
6690          struct tgsi_full_src_register temp_src =
6691             make_src_temp_reg(emit->tcs.outer.temp_index);
6692          for (i = 0; i < 2; i++) {
6693             struct tgsi_full_src_register src =
6694                scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6695             struct tgsi_full_dst_register dst =
6696                make_dst_reg(TGSI_FILE_OUTPUT,
6697                             emit->tcs.outer.out_index + i);
6698             dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6699             emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6700          }
6701       }
6702    }
6703    else {
6704       debug_printf("Unsupported primitive type");
6705    }
6706 }
6707 
6708 
6709 /**
6710  * Emit the actual clip distance instructions to be used for clipping
6711  * by copying the clip distance from the temporary registers to the
6712  * CLIPDIST registers written with the enabled planes mask.
6713  * Also copy the clip distance from the temporary to the clip distance
6714  * shadow copy register which will be referenced by the input shader
6715  */
6716 static void
emit_clip_distance_instructions(struct svga_shader_emitter_v10 * emit)6717 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
6718 {
6719    struct tgsi_full_src_register tmp_clip_dist_src;
6720    struct tgsi_full_dst_register clip_dist_dst;
6721 
6722    unsigned i;
6723    unsigned clip_plane_enable = emit->key.clip_plane_enable;
6724    unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
6725    int num_written_clipdist = emit->info.num_written_clipdistance;
6726 
6727    assert(emit->clip_dist_out_index != INVALID_INDEX);
6728    assert(emit->clip_dist_tmp_index != INVALID_INDEX);
6729 
6730    /**
6731     * Temporary reset the temporary clip dist register index so
6732     * that the copy to the real clip dist register will not
6733     * attempt to copy to the temporary register again
6734     */
6735    emit->clip_dist_tmp_index = INVALID_INDEX;
6736 
6737    for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
6738 
6739       tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
6740 
6741       /**
6742        * copy to the shadow copy for use by varying variable and
6743        * stream output. All clip distances
6744        * will be written regardless of the enabled clipping planes.
6745        */
6746       clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6747                                    emit->clip_dist_so_index + i);
6748 
6749       /* MOV clip_dist_so, tmp_clip_dist */
6750       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6751                            &tmp_clip_dist_src);
6752 
6753       /**
6754        * copy those clip distances to enabled clipping planes
6755        * to CLIPDIST registers for clipping
6756        */
6757       if (clip_plane_enable & 0xf) {
6758          clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6759                                       emit->clip_dist_out_index + i);
6760          clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
6761 
6762          /* MOV CLIPDIST, tmp_clip_dist */
6763          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6764                               &tmp_clip_dist_src);
6765       }
6766       /* four clip planes per clip register */
6767       clip_plane_enable >>= 4;
6768    }
6769    /**
6770     * set the temporary clip dist register index back to the
6771     * temporary index for the next vertex
6772     */
6773    emit->clip_dist_tmp_index = clip_dist_tmp_index;
6774 }
6775 
6776 /* Declare clip distance output registers for user-defined clip planes
6777  * or the TGSI_CLIPVERTEX output.
6778  */
6779 static void
emit_clip_distance_declarations(struct svga_shader_emitter_v10 * emit)6780 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
6781 {
6782    unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6783    unsigned index = emit->num_outputs;
6784    unsigned plane_mask;
6785 
6786    assert(emit->unit != PIPE_SHADER_FRAGMENT);
6787    assert(num_clip_planes <= 8);
6788 
6789    if (emit->clip_mode != CLIP_LEGACY &&
6790        emit->clip_mode != CLIP_VERTEX) {
6791       return;
6792    }
6793 
6794    if (num_clip_planes == 0)
6795       return;
6796 
6797    /* Convert clip vertex to clip distances only in the last vertex stage */
6798    if (!emit->key.last_vertex_stage)
6799       return;
6800 
6801    /* Declare one or two clip output registers.  The number of components
6802     * in the mask reflects the number of clip planes.  For example, if 5
6803     * clip planes are needed, we'll declare outputs similar to:
6804     * dcl_output_siv o2.xyzw, clip_distance
6805     * dcl_output_siv o3.x, clip_distance
6806     */
6807    emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
6808 
6809    plane_mask = (1 << num_clip_planes) - 1;
6810    if (plane_mask & 0xf) {
6811       unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6812       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
6813                               VGPU10_NAME_CLIP_DISTANCE, cmask, true,
6814                               SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6815       emit->num_outputs++;
6816    }
6817    if (plane_mask & 0xf0) {
6818       unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6819       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
6820                               VGPU10_NAME_CLIP_DISTANCE, cmask, true,
6821                               SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6822       emit->num_outputs++;
6823    }
6824 }
6825 
6826 
6827 /**
6828  * Emit the instructions for writing to the clip distance registers
6829  * to handle legacy/automatic clip planes.
6830  * For each clip plane, the distance is the dot product of the vertex
6831  * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6832  * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6833  * output registers already declared.
6834  */
6835 static void
emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 * emit,unsigned vpos_tmp_index)6836 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
6837                              unsigned vpos_tmp_index)
6838 {
6839    unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6840 
6841    assert(emit->clip_mode == CLIP_LEGACY);
6842    assert(num_clip_planes <= 8);
6843 
6844    assert(emit->unit == PIPE_SHADER_VERTEX ||
6845           emit->unit == PIPE_SHADER_GEOMETRY ||
6846           emit->unit == PIPE_SHADER_TESS_EVAL);
6847 
6848    for (i = 0; i < num_clip_planes; i++) {
6849       struct tgsi_full_dst_register dst;
6850       struct tgsi_full_src_register plane_src, vpos_src;
6851       unsigned reg_index = emit->clip_dist_out_index + i / 4;
6852       unsigned comp = i % 4;
6853       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6854 
6855       /* create dst, src regs */
6856       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6857       dst = writemask_dst(&dst, writemask);
6858 
6859       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6860       vpos_src = make_src_temp_reg(vpos_tmp_index);
6861 
6862       /* DP4 clip_dist, plane, vpos */
6863       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6864                            &plane_src, &vpos_src);
6865    }
6866 }
6867 
6868 
6869 /**
6870  * Emit the instructions for computing the clip distance results from
6871  * the clip vertex temporary.
6872  * For each clip plane, the distance is the dot product of the clip vertex
6873  * position (found in a temp reg) and the clip plane coefficients.
6874  */
6875 static void
emit_clip_vertex_instructions(struct svga_shader_emitter_v10 * emit)6876 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6877 {
6878    const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
6879    unsigned i;
6880    struct tgsi_full_dst_register dst;
6881    struct tgsi_full_src_register clipvert_src;
6882    const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
6883 
6884    assert(emit->unit == PIPE_SHADER_VERTEX ||
6885           emit->unit == PIPE_SHADER_GEOMETRY ||
6886           emit->unit == PIPE_SHADER_TESS_EVAL);
6887 
6888    assert(emit->clip_mode == CLIP_VERTEX);
6889 
6890    clipvert_src = make_src_temp_reg(clip_vertex_tmp);
6891 
6892    for (i = 0; i < num_clip; i++) {
6893       struct tgsi_full_src_register plane_src;
6894       unsigned reg_index = emit->clip_dist_out_index + i / 4;
6895       unsigned comp = i % 4;
6896       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6897 
6898       /* create dst, src regs */
6899       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6900       dst = writemask_dst(&dst, writemask);
6901 
6902       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6903 
6904       /* DP4 clip_dist, plane, vpos */
6905       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6906                            &plane_src, &clipvert_src);
6907    }
6908 
6909    /* copy temporary clip vertex register to the clip vertex register */
6910 
6911    assert(emit->clip_vertex_out_index != INVALID_INDEX);
6912 
6913    /**
6914     * temporary reset the temporary clip vertex register index so
6915     * that copy to the clip vertex register will not attempt
6916     * to copy to the temporary register again
6917     */
6918    emit->clip_vertex_tmp_index = INVALID_INDEX;
6919 
6920    /* MOV clip_vertex, clip_vertex_tmp */
6921    dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
6922    emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6923                         &dst, &clipvert_src);
6924 
6925    /**
6926     * set the temporary clip vertex register index back to the
6927     * temporary index for the next vertex
6928     */
6929    emit->clip_vertex_tmp_index = clip_vertex_tmp;
6930 }
6931 
6932 /**
6933  * Emit code to convert RGBA to BGRA
6934  */
6935 static void
emit_swap_r_b(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6936 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
6937                      const struct tgsi_full_dst_register *dst,
6938                      const struct tgsi_full_src_register *src)
6939 {
6940    struct tgsi_full_src_register bgra_src =
6941       swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
6942 
6943    begin_emit_instruction(emit);
6944    emit_opcode(emit, VGPU10_OPCODE_MOV, false);
6945    emit_dst_register(emit, dst);
6946    emit_src_register(emit, &bgra_src);
6947    end_emit_instruction(emit);
6948 }
6949 
6950 
6951 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6952 static void
emit_puint_to_snorm(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6953 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
6954                     const struct tgsi_full_dst_register *dst,
6955                     const struct tgsi_full_src_register *src)
6956 {
6957    struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
6958    struct tgsi_full_src_register two =
6959       make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
6960    struct tgsi_full_src_register neg_two =
6961       make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6962 
6963    unsigned val_tmp = get_temp_index(emit);
6964    struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
6965    struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
6966 
6967    unsigned bias_tmp = get_temp_index(emit);
6968    struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
6969    struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
6970 
6971    /* val = src * 2.0 */
6972    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
6973 
6974    /* bias = src > 0.5 */
6975    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
6976 
6977    /* bias = bias & -2.0 */
6978    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
6979                         &bias_src, &neg_two);
6980 
6981    /* dst = val + bias */
6982    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
6983                         &val_src, &bias_src);
6984 
6985    free_temp_indexes(emit);
6986 }
6987 
6988 
6989 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
6990 static void
emit_puint_to_uscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6991 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
6992                       const struct tgsi_full_dst_register *dst,
6993                       const struct tgsi_full_src_register *src)
6994 {
6995    struct tgsi_full_src_register scale =
6996       make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
6997 
6998    /* dst = src * scale */
6999    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
7000 }
7001 
7002 
7003 /** Convert from R32_UINT to 10_10_10_2_sscaled */
7004 static void
emit_puint_to_sscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)7005 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
7006                       const struct tgsi_full_dst_register *dst,
7007                       const struct tgsi_full_src_register *src)
7008 {
7009    struct tgsi_full_src_register lshift =
7010       make_immediate_reg_int4(emit, 22, 12, 2, 0);
7011    struct tgsi_full_src_register rshift =
7012       make_immediate_reg_int4(emit, 22, 22, 22, 30);
7013 
7014    struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
7015 
7016    unsigned tmp = get_temp_index(emit);
7017    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7018    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7019 
7020    /*
7021     * r = (pixel << 22) >> 22;   # signed int in [511, -512]
7022     * g = (pixel << 12) >> 22;   # signed int in [511, -512]
7023     * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
7024     * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
7025     * dst = i_to_f(r,g,b,a);     # convert to float
7026     */
7027    emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
7028                         &src_xxxx, &lshift);
7029    emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
7030                         &tmp_src, &rshift);
7031    emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
7032 
7033    free_temp_indexes(emit);
7034 }
7035 
7036 
7037 /**
7038  * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
7039  */
7040 static bool
emit_arl_uarl(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7041 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
7042               const struct tgsi_full_instruction *inst)
7043 {
7044    unsigned index = inst->Dst[0].Register.Index;
7045    struct tgsi_full_dst_register dst;
7046    VGPU10_OPCODE_TYPE opcode;
7047 
7048    assert(index < MAX_VGPU10_ADDR_REGS);
7049    dst = make_dst_temp_reg(emit->address_reg_index[index]);
7050    dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
7051 
7052    /* ARL dst, s0
7053     * Translates into:
7054     * FTOI address_tmp, s0
7055     *
7056     * UARL dst, s0
7057     * Translates into:
7058     * MOV address_tmp, s0
7059     */
7060    if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
7061       opcode = VGPU10_OPCODE_FTOI;
7062    else
7063       opcode = VGPU10_OPCODE_MOV;
7064 
7065    emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
7066 
7067    return true;
7068 }
7069 
7070 
7071 /**
7072  * Emit code for TGSI_OPCODE_CAL instruction.
7073  */
7074 static bool
emit_cal(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7075 emit_cal(struct svga_shader_emitter_v10 *emit,
7076          const struct tgsi_full_instruction *inst)
7077 {
7078    unsigned label = inst->Label.Label;
7079    VGPU10OperandToken0 operand;
7080    operand.value = 0;
7081    operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
7082 
7083    begin_emit_instruction(emit);
7084    emit_dword(emit, operand.value);
7085    emit_dword(emit, label);
7086    end_emit_instruction(emit);
7087 
7088    return true;
7089 }
7090 
7091 
7092 /**
7093  * Emit code for TGSI_OPCODE_IABS instruction.
7094  */
7095 static bool
emit_iabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7096 emit_iabs(struct svga_shader_emitter_v10 *emit,
7097           const struct tgsi_full_instruction *inst)
7098 {
7099    /* dst.x = (src0.x < 0) ? -src0.x : src0.x
7100     * dst.y = (src0.y < 0) ? -src0.y : src0.y
7101     * dst.z = (src0.z < 0) ? -src0.z : src0.z
7102     * dst.w = (src0.w < 0) ? -src0.w : src0.w
7103     *
7104     * Translates into
7105     *   IMAX dst, src, neg(src)
7106     */
7107    struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
7108    emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
7109                         &inst->Src[0], &neg_src);
7110 
7111    return true;
7112 }
7113 
7114 
7115 /**
7116  * Emit code for TGSI_OPCODE_CMP instruction.
7117  */
7118 static bool
emit_cmp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7119 emit_cmp(struct svga_shader_emitter_v10 *emit,
7120          const struct tgsi_full_instruction *inst)
7121 {
7122    /* dst.x = (src0.x < 0) ? src1.x : src2.x
7123     * dst.y = (src0.y < 0) ? src1.y : src2.y
7124     * dst.z = (src0.z < 0) ? src1.z : src2.z
7125     * dst.w = (src0.w < 0) ? src1.w : src2.w
7126     *
7127     * Translates into
7128     *   LT tmp, src0, 0.0
7129     *   MOVC dst, tmp, src1, src2
7130     */
7131    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7132    unsigned tmp = get_temp_index(emit);
7133    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7134    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7135 
7136    emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
7137                         &inst->Src[0], &zero, NULL, false,
7138                         inst->Instruction.Precise);
7139    emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
7140                         &tmp_src, &inst->Src[1], &inst->Src[2],
7141                         inst->Instruction.Saturate, false);
7142 
7143    free_temp_indexes(emit);
7144 
7145    return true;
7146 }
7147 
7148 
7149 /**
7150  * Emit code for TGSI_OPCODE_DST instruction.
7151  */
7152 static bool
emit_dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7153 emit_dst(struct svga_shader_emitter_v10 *emit,
7154          const struct tgsi_full_instruction *inst)
7155 {
7156    /*
7157     * dst.x = 1
7158     * dst.y = src0.y * src1.y
7159     * dst.z = src0.z
7160     * dst.w = src1.w
7161     */
7162 
7163    struct tgsi_full_src_register s0_yyyy =
7164       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
7165    struct tgsi_full_src_register s0_zzzz =
7166       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
7167    struct tgsi_full_src_register s1_yyyy =
7168       scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
7169    struct tgsi_full_src_register s1_wwww =
7170       scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
7171 
7172    /*
7173     * If dst and either src0 and src1 are the same we need
7174     * to create a temporary for it and insert a extra move.
7175     */
7176    unsigned tmp_move = get_temp_index(emit);
7177    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7178    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7179 
7180    /* MOV dst.x, 1.0 */
7181    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7182       struct tgsi_full_dst_register dst_x =
7183          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7184       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7185 
7186       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
7187    }
7188 
7189    /* MUL dst.y, s0.y, s1.y */
7190    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7191       struct tgsi_full_dst_register dst_y =
7192          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7193 
7194       emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
7195                            &s1_yyyy, NULL, inst->Instruction.Saturate,
7196                            inst->Instruction.Precise);
7197    }
7198 
7199    /* MOV dst.z, s0.z */
7200    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7201       struct tgsi_full_dst_register dst_z =
7202          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7203 
7204       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7205                            &dst_z, &s0_zzzz, NULL, NULL,
7206                            inst->Instruction.Saturate,
7207                            inst->Instruction.Precise);
7208   }
7209 
7210    /* MOV dst.w, s1.w */
7211    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7212       struct tgsi_full_dst_register dst_w =
7213          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7214 
7215       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7216                            &dst_w, &s1_wwww, NULL, NULL,
7217                            inst->Instruction.Saturate,
7218                            inst->Instruction.Precise);
7219    }
7220 
7221    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7222    free_temp_indexes(emit);
7223 
7224    return true;
7225 }
7226 
7227 
7228 /**
7229  * A helper function to return the stream index as specified in
7230  * the immediate register
7231  */
7232 static inline unsigned
find_stream_index(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)7233 find_stream_index(struct svga_shader_emitter_v10 *emit,
7234                   const struct tgsi_full_src_register *src)
7235 {
7236    return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
7237 }
7238 
7239 
7240 /**
7241  * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
7242  */
7243 static bool
emit_endprim(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7244 emit_endprim(struct svga_shader_emitter_v10 *emit,
7245              const struct tgsi_full_instruction *inst)
7246 {
7247    assert(emit->unit == PIPE_SHADER_GEOMETRY);
7248 
7249    begin_emit_instruction(emit);
7250    if (emit->version >= 50) {
7251       unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
7252 
7253       if (emit->info.num_stream_output_components[streamIndex] == 0) {
7254          /**
7255           * If there is no output for this stream, discard this instruction.
7256           */
7257          emit->discard_instruction = true;
7258       }
7259       else {
7260          emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, false);
7261          assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
7262          emit_stream_register(emit, streamIndex);
7263       }
7264    }
7265    else {
7266       emit_opcode(emit, VGPU10_OPCODE_CUT, false);
7267    }
7268    end_emit_instruction(emit);
7269    return true;
7270 }
7271 
7272 
7273 /**
7274  * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
7275  */
7276 static bool
emit_ex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7277 emit_ex2(struct svga_shader_emitter_v10 *emit,
7278          const struct tgsi_full_instruction *inst)
7279 {
7280    /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
7281     * while VGPU10 computes four values.
7282     *
7283     * dst = EX2(src):
7284     *   dst.xyzw = 2.0 ^ src.x
7285     */
7286 
7287    struct tgsi_full_src_register src_xxxx =
7288       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7289                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7290 
7291    /* EXP tmp, s0.xxxx */
7292    emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
7293                         NULL, NULL,
7294                         inst->Instruction.Saturate,
7295                         inst->Instruction.Precise);
7296 
7297    return true;
7298 }
7299 
7300 
7301 /**
7302  * Emit code for TGSI_OPCODE_EXP instruction.
7303  */
7304 static bool
emit_exp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7305 emit_exp(struct svga_shader_emitter_v10 *emit,
7306          const struct tgsi_full_instruction *inst)
7307 {
7308    /*
7309     * dst.x = 2 ^ floor(s0.x)
7310     * dst.y = s0.x - floor(s0.x)
7311     * dst.z = 2 ^ s0.x
7312     * dst.w = 1.0
7313     */
7314 
7315    struct tgsi_full_src_register src_xxxx =
7316       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7317    unsigned tmp = get_temp_index(emit);
7318    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7319    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7320 
7321    /*
7322     * If dst and src are the same we need to create
7323     * a temporary for it and insert a extra move.
7324     */
7325    unsigned tmp_move = get_temp_index(emit);
7326    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7327    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7328 
7329    /* only use X component of temp reg */
7330    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7331    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7332 
7333    /* ROUND_NI tmp.x, s0.x */
7334    emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
7335                         &src_xxxx); /* round to -infinity */
7336 
7337    /* EXP dst.x, tmp.x */
7338    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7339       struct tgsi_full_dst_register dst_x =
7340          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7341 
7342       emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
7343                            NULL, NULL,
7344                            inst->Instruction.Saturate,
7345                            inst->Instruction.Precise);
7346    }
7347 
7348    /* ADD dst.y, s0.x, -tmp */
7349    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7350       struct tgsi_full_dst_register dst_y =
7351          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7352       struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
7353 
7354       emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
7355                            &neg_tmp_src, NULL,
7356                            inst->Instruction.Saturate,
7357                            inst->Instruction.Precise);
7358    }
7359 
7360    /* EXP dst.z, s0.x */
7361    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7362       struct tgsi_full_dst_register dst_z =
7363          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7364 
7365       emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
7366                            NULL, NULL,
7367                            inst->Instruction.Saturate,
7368                            inst->Instruction.Precise);
7369    }
7370 
7371    /* MOV dst.w, 1.0 */
7372    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7373       struct tgsi_full_dst_register dst_w =
7374          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7375       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7376 
7377       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7378    }
7379 
7380    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7381 
7382    free_temp_indexes(emit);
7383 
7384    return true;
7385 }
7386 
7387 
7388 /**
7389  * Emit code for TGSI_OPCODE_IF instruction.
7390  */
7391 static bool
emit_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)7392 emit_if(struct svga_shader_emitter_v10 *emit,
7393         const struct tgsi_full_src_register *src)
7394 {
7395    VGPU10OpcodeToken0 opcode0;
7396 
7397    /* The src register should be a scalar */
7398    assert(src->Register.SwizzleX == src->Register.SwizzleY &&
7399           src->Register.SwizzleX == src->Register.SwizzleZ &&
7400           src->Register.SwizzleX == src->Register.SwizzleW);
7401 
7402    /* The only special thing here is that we need to set the
7403     * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
7404     * src.x is non-zero.
7405     */
7406    opcode0.value = 0;
7407    opcode0.opcodeType = VGPU10_OPCODE_IF;
7408    opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
7409 
7410    begin_emit_instruction(emit);
7411    emit_dword(emit, opcode0.value);
7412    emit_src_register(emit, src);
7413    end_emit_instruction(emit);
7414 
7415    return true;
7416 }
7417 
7418 
7419 /**
7420  * Emit code for conditional discard instruction (discard fragment if any of
7421  * the register components are negative).
7422  */
7423 static bool
emit_cond_discard(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7424 emit_cond_discard(struct svga_shader_emitter_v10 *emit,
7425                   const struct tgsi_full_instruction *inst)
7426 {
7427    unsigned tmp = get_temp_index(emit);
7428    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7429    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7430 
7431    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7432 
7433    struct tgsi_full_dst_register tmp_dst_x =
7434       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7435    struct tgsi_full_src_register tmp_src_xxxx =
7436       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7437 
7438    /* tmp = src[0] < 0.0 */
7439    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
7440 
7441    if (!same_swizzle_terms(&inst->Src[0])) {
7442       /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
7443        * logically OR the swizzle terms.  Most uses of this conditional
7444        * discard instruction only test one channel so it's good to
7445        * avoid these extra steps.
7446        */
7447       struct tgsi_full_src_register tmp_src_yyyy =
7448          scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
7449       struct tgsi_full_src_register tmp_src_zzzz =
7450          scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
7451       struct tgsi_full_src_register tmp_src_wwww =
7452          scalar_src(&tmp_src, TGSI_SWIZZLE_W);
7453 
7454       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7455                            &tmp_src_yyyy);
7456       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7457                            &tmp_src_zzzz);
7458       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7459                            &tmp_src_wwww);
7460    }
7461 
7462    begin_emit_instruction(emit);
7463    emit_discard_opcode(emit, true); /* discard if src0.x is non-zero */
7464    emit_src_register(emit, &tmp_src_xxxx);
7465    end_emit_instruction(emit);
7466 
7467    free_temp_indexes(emit);
7468 
7469    return true;
7470 }
7471 
7472 
7473 /**
7474  * Emit code for the unconditional discard instruction.
7475  */
7476 static bool
emit_discard(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7477 emit_discard(struct svga_shader_emitter_v10 *emit,
7478              const struct tgsi_full_instruction *inst)
7479 {
7480    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7481 
7482    /* DISCARD if 0.0 is zero */
7483    begin_emit_instruction(emit);
7484    emit_discard_opcode(emit, false);
7485    emit_src_register(emit, &zero);
7486    end_emit_instruction(emit);
7487 
7488    return true;
7489 }
7490 
7491 
7492 /**
7493  * Emit code for TGSI_OPCODE_LG2 instruction.
7494  */
7495 static bool
emit_lg2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7496 emit_lg2(struct svga_shader_emitter_v10 *emit,
7497          const struct tgsi_full_instruction *inst)
7498 {
7499    /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
7500     * while VGPU10 computes four values.
7501     *
7502     * dst = LG2(src):
7503     *   dst.xyzw = log2(src.x)
7504     */
7505 
7506    struct tgsi_full_src_register src_xxxx =
7507       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7508                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7509 
7510    /* LOG tmp, s0.xxxx */
7511    emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7512                         &inst->Dst[0], &src_xxxx, NULL, NULL,
7513                         inst->Instruction.Saturate,
7514                         inst->Instruction.Precise);
7515 
7516    return true;
7517 }
7518 
7519 
7520 /**
7521  * Emit code for TGSI_OPCODE_LIT instruction.
7522  */
7523 static bool
emit_lit(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7524 emit_lit(struct svga_shader_emitter_v10 *emit,
7525          const struct tgsi_full_instruction *inst)
7526 {
7527    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7528 
7529    /*
7530     * If dst and src are the same we need to create
7531     * a temporary for it and insert a extra move.
7532     */
7533    unsigned tmp_move = get_temp_index(emit);
7534    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7535    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7536 
7537    /*
7538     * dst.x = 1
7539     * dst.y = max(src.x, 0)
7540     * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
7541     * dst.w = 1
7542     */
7543 
7544    /* MOV dst.x, 1.0 */
7545    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7546       struct tgsi_full_dst_register dst_x =
7547          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7548       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
7549    }
7550 
7551    /* MOV dst.w, 1.0 */
7552    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7553       struct tgsi_full_dst_register dst_w =
7554          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7555       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7556    }
7557 
7558    /* MAX dst.y, src.x, 0.0 */
7559    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7560       struct tgsi_full_dst_register dst_y =
7561          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7562       struct tgsi_full_src_register zero =
7563          make_immediate_reg_float(emit, 0.0f);
7564       struct tgsi_full_src_register src_xxxx =
7565          swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7566                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7567 
7568       emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
7569                            &zero, NULL, inst->Instruction.Saturate, false);
7570    }
7571 
7572    /*
7573     * tmp1 = clamp(src.w, -128, 128);
7574     *   MAX tmp1, src.w, -128
7575     *   MIN tmp1, tmp1, 128
7576     *
7577     * tmp2 = max(tmp2, 0);
7578     *   MAX tmp2, src.y, 0
7579     *
7580     * tmp1 = pow(tmp2, tmp1);
7581     *   LOG tmp2, tmp2
7582     *   MUL tmp1, tmp2, tmp1
7583     *   EXP tmp1, tmp1
7584     *
7585     * tmp1 = (src.w == 0) ? 1 : tmp1;
7586     *   EQ tmp2, 0, src.w
7587     *   MOVC tmp1, tmp2, 1.0, tmp1
7588     *
7589     * dst.z = (0 < src.x) ? tmp1 : 0;
7590     *   LT tmp2, 0, src.x
7591     *   MOVC dst.z, tmp2, tmp1, 0.0
7592     */
7593    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7594       struct tgsi_full_dst_register dst_z =
7595          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7596 
7597       unsigned tmp1 = get_temp_index(emit);
7598       struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7599       struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7600       unsigned tmp2 = get_temp_index(emit);
7601       struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7602       struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7603 
7604       struct tgsi_full_src_register src_xxxx =
7605          scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7606       struct tgsi_full_src_register src_yyyy =
7607          scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
7608       struct tgsi_full_src_register src_wwww =
7609          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
7610 
7611       struct tgsi_full_src_register zero =
7612          make_immediate_reg_float(emit, 0.0f);
7613       struct tgsi_full_src_register lowerbound =
7614          make_immediate_reg_float(emit, -128.0f);
7615       struct tgsi_full_src_register upperbound =
7616          make_immediate_reg_float(emit, 128.0f);
7617 
7618       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
7619                            &lowerbound);
7620       emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
7621                            &upperbound);
7622       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
7623                            &zero);
7624 
7625       /* POW tmp1, tmp2, tmp1 */
7626       /* LOG tmp2, tmp2 */
7627       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
7628 
7629       /* MUL tmp1, tmp2, tmp1 */
7630       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
7631                            &tmp1_src);
7632 
7633       /* EXP tmp1, tmp1 */
7634       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
7635 
7636       /* EQ tmp2, 0, src.w */
7637       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
7638       /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
7639       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
7640                            &tmp2_src, &one, &tmp1_src);
7641 
7642       /* LT tmp2, 0, src.x */
7643       emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
7644       /* MOVC dst.z, tmp2, tmp1, 0.0 */
7645       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
7646                            &tmp2_src, &tmp1_src, &zero);
7647    }
7648 
7649    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7650    free_temp_indexes(emit);
7651 
7652    return true;
7653 }
7654 
7655 
7656 /**
7657  * Emit Level Of Detail Query (LODQ) instruction.
7658  */
7659 static bool
emit_lodq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7660 emit_lodq(struct svga_shader_emitter_v10 *emit,
7661           const struct tgsi_full_instruction *inst)
7662 {
7663    const uint unit = inst->Src[1].Register.Index;
7664 
7665    assert(emit->version >= 41);
7666 
7667    /* LOD dst, coord, resource, sampler */
7668    begin_emit_instruction(emit);
7669    emit_opcode(emit, VGPU10_OPCODE_LOD, false);
7670    emit_dst_register(emit, &inst->Dst[0]);
7671    emit_src_register(emit, &inst->Src[0]); /* coord */
7672    emit_resource_register(emit, unit);
7673    emit_sampler_register(emit, unit);
7674    end_emit_instruction(emit);
7675 
7676    return true;
7677 }
7678 
7679 
7680 /**
7681  * Emit code for TGSI_OPCODE_LOG instruction.
7682  */
7683 static bool
emit_log(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7684 emit_log(struct svga_shader_emitter_v10 *emit,
7685          const struct tgsi_full_instruction *inst)
7686 {
7687    /*
7688     * dst.x = floor(lg2(abs(s0.x)))
7689     * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
7690     * dst.z = lg2(abs(s0.x))
7691     * dst.w = 1.0
7692     */
7693 
7694    struct tgsi_full_src_register src_xxxx =
7695       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7696    unsigned tmp = get_temp_index(emit);
7697    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7698    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7699    struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
7700 
7701    /* only use X component of temp reg */
7702    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7703    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7704 
7705    /* LOG tmp.x, abs(s0.x) */
7706    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
7707       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
7708    }
7709 
7710    /* MOV dst.z, tmp.x */
7711    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7712       struct tgsi_full_dst_register dst_z =
7713          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
7714 
7715       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7716                            &dst_z, &tmp_src, NULL, NULL,
7717                            inst->Instruction.Saturate, false);
7718    }
7719 
7720    /* FLR tmp.x, tmp.x */
7721    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
7722       emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
7723    }
7724 
7725    /* MOV dst.x, tmp.x */
7726    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7727       struct tgsi_full_dst_register dst_x =
7728          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
7729 
7730       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7731                            &dst_x, &tmp_src, NULL, NULL,
7732                            inst->Instruction.Saturate, false);
7733    }
7734 
7735    /* EXP tmp.x, tmp.x */
7736    /* DIV dst.y, abs(s0.x), tmp.x */
7737    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7738       struct tgsi_full_dst_register dst_y =
7739          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
7740 
7741       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
7742       emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
7743                            &tmp_src, NULL, inst->Instruction.Saturate, false);
7744    }
7745 
7746    /* MOV dst.w, 1.0 */
7747    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7748       struct tgsi_full_dst_register dst_w =
7749          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
7750       struct tgsi_full_src_register one =
7751          make_immediate_reg_float(emit, 1.0f);
7752 
7753       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7754    }
7755 
7756    free_temp_indexes(emit);
7757 
7758    return true;
7759 }
7760 
7761 
7762 /**
7763  * Emit code for TGSI_OPCODE_LRP instruction.
7764  */
7765 static bool
emit_lrp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7766 emit_lrp(struct svga_shader_emitter_v10 *emit,
7767          const struct tgsi_full_instruction *inst)
7768 {
7769    /* dst = LRP(s0, s1, s2):
7770     *   dst = s0 * (s1 - s2) + s2
7771     * Translates into:
7772     *   SUB tmp, s1, s2;        tmp = s1 - s2
7773     *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
7774     */
7775    unsigned tmp = get_temp_index(emit);
7776    struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
7777    struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
7778    struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
7779 
7780    /* ADD tmp, s1, -s2 */
7781    emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
7782                         &inst->Src[1], &neg_src2, NULL, false,
7783                         inst->Instruction.Precise);
7784 
7785    /* MAD dst, s1, tmp, s3 */
7786    emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
7787                         &inst->Src[0], &src_tmp, &inst->Src[2],
7788                         inst->Instruction.Saturate,
7789                         inst->Instruction.Precise);
7790 
7791    free_temp_indexes(emit);
7792 
7793    return true;
7794 }
7795 
7796 
7797 /**
7798  * Emit code for TGSI_OPCODE_POW instruction.
7799  */
7800 static bool
emit_pow(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7801 emit_pow(struct svga_shader_emitter_v10 *emit,
7802          const struct tgsi_full_instruction *inst)
7803 {
7804    /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7805     * src1.x while VGPU10 computes four values.
7806     *
7807     * dst = POW(src0, src1):
7808     *   dst.xyzw = src0.x ^ src1.x
7809     */
7810    unsigned tmp = get_temp_index(emit);
7811    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7812    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7813    struct tgsi_full_src_register src0_xxxx =
7814       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7815                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7816    struct tgsi_full_src_register src1_xxxx =
7817       swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7818                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7819 
7820    /* LOG tmp, s0.xxxx */
7821    emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7822                         &tmp_dst, &src0_xxxx, NULL, NULL,
7823                         false, inst->Instruction.Precise);
7824 
7825    /* MUL tmp, tmp, s1.xxxx */
7826    emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
7827                         &tmp_dst, &tmp_src, &src1_xxxx, NULL,
7828                         false, inst->Instruction.Precise);
7829 
7830    /* EXP tmp, s0.xxxx */
7831    emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
7832                         &inst->Dst[0], &tmp_src, NULL, NULL,
7833                         inst->Instruction.Saturate,
7834                         inst->Instruction.Precise);
7835 
7836    /* free tmp */
7837    free_temp_indexes(emit);
7838 
7839    return true;
7840 }
7841 
7842 
7843 /**
7844  * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7845  */
7846 static bool
emit_rcp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7847 emit_rcp(struct svga_shader_emitter_v10 *emit,
7848          const struct tgsi_full_instruction *inst)
7849 {
7850    if (emit->version >= 50) {
7851       /* use new RCP instruction.  But VGPU10_OPCODE_RCP is component-wise
7852        * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7853        * to manipulate the src register's swizzle.
7854        */
7855       struct tgsi_full_src_register src = inst->Src[0];
7856       src.Register.SwizzleY =
7857       src.Register.SwizzleZ =
7858       src.Register.SwizzleW = src.Register.SwizzleX;
7859 
7860       begin_emit_instruction(emit);
7861       emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
7862                           inst->Instruction.Saturate,
7863                           inst->Instruction.Precise);
7864       emit_dst_register(emit, &inst->Dst[0]);
7865       emit_src_register(emit, &src);
7866       end_emit_instruction(emit);
7867    }
7868    else {
7869       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7870 
7871       unsigned tmp = get_temp_index(emit);
7872       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7873       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7874 
7875       struct tgsi_full_dst_register tmp_dst_x =
7876          writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7877       struct tgsi_full_src_register tmp_src_xxxx =
7878          scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7879 
7880       /* DIV tmp.x, 1.0, s0 */
7881       emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
7882                            &tmp_dst_x, &one, &inst->Src[0], NULL,
7883                            false, inst->Instruction.Precise);
7884 
7885       /* MOV dst, tmp.xxxx */
7886       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7887                            &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7888                            inst->Instruction.Saturate,
7889                            inst->Instruction.Precise);
7890 
7891       free_temp_indexes(emit);
7892    }
7893 
7894    return true;
7895 }
7896 
7897 
7898 /**
7899  * Emit code for TGSI_OPCODE_RSQ instruction.
7900  */
7901 static bool
emit_rsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7902 emit_rsq(struct svga_shader_emitter_v10 *emit,
7903          const struct tgsi_full_instruction *inst)
7904 {
7905    /* dst = RSQ(src):
7906     *   dst.xyzw = 1 / sqrt(src.x)
7907     * Translates into:
7908     *   RSQ tmp, src.x
7909     *   MOV dst, tmp.xxxx
7910     */
7911 
7912    unsigned tmp = get_temp_index(emit);
7913    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7914    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7915 
7916    struct tgsi_full_dst_register tmp_dst_x =
7917       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7918    struct tgsi_full_src_register tmp_src_xxxx =
7919       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7920 
7921    /* RSQ tmp, src.x */
7922    emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
7923                         &tmp_dst_x, &inst->Src[0], NULL, NULL,
7924                         false, inst->Instruction.Precise);
7925 
7926    /* MOV dst, tmp.xxxx */
7927    emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7928                         &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7929                         inst->Instruction.Saturate,
7930                         inst->Instruction.Precise);
7931 
7932    /* free tmp */
7933    free_temp_indexes(emit);
7934 
7935    return true;
7936 }
7937 
7938 
7939 /**
7940  * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7941  */
7942 static bool
emit_seq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7943 emit_seq(struct svga_shader_emitter_v10 *emit,
7944          const struct tgsi_full_instruction *inst)
7945 {
7946    /* dst = SEQ(s0, s1):
7947     *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
7948     * Translates into:
7949     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7950     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7951     */
7952    unsigned tmp = get_temp_index(emit);
7953    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7954    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7955    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7956    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7957 
7958    /* EQ tmp, s0, s1 */
7959    emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
7960                         &inst->Src[1]);
7961 
7962    /* MOVC dst, tmp, one, zero */
7963    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7964                         &one, &zero);
7965 
7966    free_temp_indexes(emit);
7967 
7968    return true;
7969 }
7970 
7971 
7972 /**
7973  * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7974  */
7975 static bool
emit_sge(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7976 emit_sge(struct svga_shader_emitter_v10 *emit,
7977          const struct tgsi_full_instruction *inst)
7978 {
7979    /* dst = SGE(s0, s1):
7980     *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
7981     * Translates into:
7982     *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
7983     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7984     */
7985    unsigned tmp = get_temp_index(emit);
7986    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7987    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7988    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7989    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7990 
7991    /* GE tmp, s0, s1 */
7992    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
7993                         &inst->Src[1]);
7994 
7995    /* MOVC dst, tmp, one, zero */
7996    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7997                         &one, &zero);
7998 
7999    free_temp_indexes(emit);
8000 
8001    return true;
8002 }
8003 
8004 
8005 /**
8006  * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
8007  */
8008 static bool
emit_sgt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8009 emit_sgt(struct svga_shader_emitter_v10 *emit,
8010          const struct tgsi_full_instruction *inst)
8011 {
8012    /* dst = SGT(s0, s1):
8013     *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
8014     * Translates into:
8015     *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
8016     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8017     */
8018    unsigned tmp = get_temp_index(emit);
8019    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8020    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8021    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8022    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8023 
8024    /* LT tmp, s1, s0 */
8025    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
8026                         &inst->Src[0]);
8027 
8028    /* MOVC dst, tmp, one, zero */
8029    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8030                         &one, &zero);
8031 
8032    free_temp_indexes(emit);
8033 
8034    return true;
8035 }
8036 
8037 
8038 /**
8039  * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
8040  */
8041 static bool
emit_sincos(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8042 emit_sincos(struct svga_shader_emitter_v10 *emit,
8043          const struct tgsi_full_instruction *inst)
8044 {
8045    unsigned tmp = get_temp_index(emit);
8046    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8047    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8048 
8049    struct tgsi_full_src_register tmp_src_xxxx =
8050       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
8051    struct tgsi_full_dst_register tmp_dst_x =
8052       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
8053 
8054    begin_emit_instruction(emit);
8055    emit_opcode(emit, VGPU10_OPCODE_SINCOS, false);
8056 
8057    if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
8058    {
8059       emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
8060       emit_null_dst_register(emit);  /* second destination register */
8061    }
8062    else {
8063       emit_null_dst_register(emit);
8064       emit_dst_register(emit, &tmp_dst_x);
8065    }
8066 
8067    emit_src_register(emit, &inst->Src[0]);
8068    end_emit_instruction(emit);
8069 
8070    emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
8071                         &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
8072                         inst->Instruction.Saturate,
8073                         inst->Instruction.Precise);
8074 
8075    free_temp_indexes(emit);
8076 
8077    return true;
8078 }
8079 
8080 
8081 /**
8082  * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
8083  */
8084 static bool
emit_sle(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8085 emit_sle(struct svga_shader_emitter_v10 *emit,
8086          const struct tgsi_full_instruction *inst)
8087 {
8088    /* dst = SLE(s0, s1):
8089     *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
8090     * Translates into:
8091     *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
8092     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8093     */
8094    unsigned tmp = get_temp_index(emit);
8095    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8096    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8097    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8098    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8099 
8100    /* GE tmp, s1, s0 */
8101    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
8102                         &inst->Src[0]);
8103 
8104    /* MOVC dst, tmp, one, zero */
8105    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8106                         &one, &zero);
8107 
8108    free_temp_indexes(emit);
8109 
8110    return true;
8111 }
8112 
8113 
8114 /**
8115  * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
8116  */
8117 static bool
emit_slt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8118 emit_slt(struct svga_shader_emitter_v10 *emit,
8119          const struct tgsi_full_instruction *inst)
8120 {
8121    /* dst = SLT(s0, s1):
8122     *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
8123     * Translates into:
8124     *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
8125     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8126     */
8127    unsigned tmp = get_temp_index(emit);
8128    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8129    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8130    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8131    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8132 
8133    /* LT tmp, s0, s1 */
8134    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
8135                         &inst->Src[1]);
8136 
8137    /* MOVC dst, tmp, one, zero */
8138    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8139                         &one, &zero);
8140 
8141    free_temp_indexes(emit);
8142 
8143    return true;
8144 }
8145 
8146 
8147 /**
8148  * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
8149  */
8150 static bool
emit_sne(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8151 emit_sne(struct svga_shader_emitter_v10 *emit,
8152          const struct tgsi_full_instruction *inst)
8153 {
8154    /* dst = SNE(s0, s1):
8155     *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
8156     * Translates into:
8157     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
8158     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8159     */
8160    unsigned tmp = get_temp_index(emit);
8161    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8162    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8163    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8164    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8165 
8166    /* NE tmp, s0, s1 */
8167    emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
8168                         &inst->Src[1]);
8169 
8170    /* MOVC dst, tmp, one, zero */
8171    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8172                         &one, &zero);
8173 
8174    free_temp_indexes(emit);
8175 
8176    return true;
8177 }
8178 
8179 
8180 /**
8181  * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
8182  */
8183 static bool
emit_ssg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8184 emit_ssg(struct svga_shader_emitter_v10 *emit,
8185          const struct tgsi_full_instruction *inst)
8186 {
8187    /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
8188     * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
8189     * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
8190     * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
8191     * Translates into:
8192     *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
8193     *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
8194     *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
8195     *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
8196     */
8197    struct tgsi_full_src_register zero =
8198       make_immediate_reg_float(emit, 0.0f);
8199    struct tgsi_full_src_register one =
8200       make_immediate_reg_float(emit, 1.0f);
8201    struct tgsi_full_src_register neg_one =
8202       make_immediate_reg_float(emit, -1.0f);
8203 
8204    unsigned tmp1 = get_temp_index(emit);
8205    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
8206    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
8207 
8208    unsigned tmp2 = get_temp_index(emit);
8209    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
8210    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
8211 
8212    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
8213                         &zero);
8214    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
8215                         &neg_one, &zero);
8216    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
8217                         &inst->Src[0]);
8218    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
8219                         &one, &tmp2_src);
8220 
8221    free_temp_indexes(emit);
8222 
8223    return true;
8224 }
8225 
8226 
8227 /**
8228  * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
8229  */
8230 static bool
emit_issg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8231 emit_issg(struct svga_shader_emitter_v10 *emit,
8232           const struct tgsi_full_instruction *inst)
8233 {
8234    /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
8235     * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
8236     * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
8237     * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
8238     * Translates into:
8239     *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
8240     *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
8241     *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
8242     */
8243    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8244 
8245    unsigned tmp1 = get_temp_index(emit);
8246    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
8247    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
8248 
8249    unsigned tmp2 = get_temp_index(emit);
8250    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
8251    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
8252 
8253    struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
8254 
8255    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
8256                         &inst->Src[0], &zero);
8257    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
8258                         &zero, &inst->Src[0]);
8259    emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
8260                         &tmp1_src, &neg_tmp2);
8261 
8262    free_temp_indexes(emit);
8263 
8264    return true;
8265 }
8266 
8267 
8268 /**
8269  * Emit a comparison instruction.  The dest register will get
8270  * 0 or ~0 values depending on the outcome of comparing src0 to src1.
8271  */
8272 static void
emit_comparison(struct svga_shader_emitter_v10 * emit,SVGA3dCmpFunc func,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src0,const struct tgsi_full_src_register * src1)8273 emit_comparison(struct svga_shader_emitter_v10 *emit,
8274                 SVGA3dCmpFunc func,
8275                 const struct tgsi_full_dst_register *dst,
8276                 const struct tgsi_full_src_register *src0,
8277                 const struct tgsi_full_src_register *src1)
8278 {
8279    struct tgsi_full_src_register immediate;
8280    VGPU10OpcodeToken0 opcode0;
8281    bool swapSrc = false;
8282 
8283    /* Sanity checks for svga vs. gallium enums */
8284    STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
8285    STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
8286 
8287    opcode0.value = 0;
8288 
8289    switch (func) {
8290    case SVGA3D_CMP_NEVER:
8291       immediate = make_immediate_reg_int(emit, 0);
8292       /* MOV dst, {0} */
8293       begin_emit_instruction(emit);
8294       emit_dword(emit, VGPU10_OPCODE_MOV);
8295       emit_dst_register(emit, dst);
8296       emit_src_register(emit, &immediate);
8297       end_emit_instruction(emit);
8298       return;
8299    case SVGA3D_CMP_ALWAYS:
8300       immediate = make_immediate_reg_int(emit, -1);
8301       /* MOV dst, {-1} */
8302       begin_emit_instruction(emit);
8303       emit_dword(emit, VGPU10_OPCODE_MOV);
8304       emit_dst_register(emit, dst);
8305       emit_src_register(emit, &immediate);
8306       end_emit_instruction(emit);
8307       return;
8308    case SVGA3D_CMP_LESS:
8309       opcode0.opcodeType = VGPU10_OPCODE_LT;
8310       break;
8311    case SVGA3D_CMP_EQUAL:
8312       opcode0.opcodeType = VGPU10_OPCODE_EQ;
8313       break;
8314    case SVGA3D_CMP_LESSEQUAL:
8315       opcode0.opcodeType = VGPU10_OPCODE_GE;
8316       swapSrc = true;
8317       break;
8318    case SVGA3D_CMP_GREATER:
8319       opcode0.opcodeType = VGPU10_OPCODE_LT;
8320       swapSrc = true;
8321       break;
8322    case SVGA3D_CMP_NOTEQUAL:
8323       opcode0.opcodeType = VGPU10_OPCODE_NE;
8324       break;
8325    case SVGA3D_CMP_GREATEREQUAL:
8326       opcode0.opcodeType = VGPU10_OPCODE_GE;
8327       break;
8328    default:
8329       assert(!"Unexpected comparison mode");
8330       opcode0.opcodeType = VGPU10_OPCODE_EQ;
8331    }
8332 
8333    begin_emit_instruction(emit);
8334    emit_dword(emit, opcode0.value);
8335    emit_dst_register(emit, dst);
8336    if (swapSrc) {
8337       emit_src_register(emit, src1);
8338       emit_src_register(emit, src0);
8339    }
8340    else {
8341       emit_src_register(emit, src0);
8342       emit_src_register(emit, src1);
8343    }
8344    end_emit_instruction(emit);
8345 }
8346 
8347 
8348 /**
8349  * Get texel/address offsets for a texture instruction.
8350  */
8351 static void
get_texel_offsets(const struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,int offsets[3])8352 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
8353                   const struct tgsi_full_instruction *inst, int offsets[3])
8354 {
8355    if (inst->Texture.NumOffsets == 1) {
8356       /* According to OpenGL Shader Language spec the offsets are only
8357        * fetched from a previously-declared immediate/literal.
8358        */
8359       const struct tgsi_texture_offset *off = inst->TexOffsets;
8360       const unsigned index = off[0].Index;
8361       const unsigned swizzleX = off[0].SwizzleX;
8362       const unsigned swizzleY = off[0].SwizzleY;
8363       const unsigned swizzleZ = off[0].SwizzleZ;
8364       const union tgsi_immediate_data *imm = emit->immediates[index];
8365 
8366       assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
8367 
8368       offsets[0] = imm[swizzleX].Int;
8369       offsets[1] = imm[swizzleY].Int;
8370       offsets[2] = imm[swizzleZ].Int;
8371    }
8372    else {
8373       offsets[0] = offsets[1] = offsets[2] = 0;
8374    }
8375 }
8376 
8377 
8378 /**
8379  * Set up the coordinate register for texture sampling.
8380  * When we're sampling from a RECT texture we have to scale the
8381  * unnormalized coordinate to a normalized coordinate.
8382  * We do that by multiplying the coordinate by an "extra" constant.
8383  * An alternative would be to use the RESINFO instruction to query the
8384  * texture's size.
8385  */
8386 static struct tgsi_full_src_register
setup_texcoord(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_src_register * coord)8387 setup_texcoord(struct svga_shader_emitter_v10 *emit,
8388                unsigned unit,
8389                const struct tgsi_full_src_register *coord)
8390 {
8391    if (emit->key.tex[unit].sampler_view && emit->key.tex[unit].unnormalized) {
8392       unsigned scale_index = emit->texcoord_scale_index[unit];
8393       unsigned tmp = get_temp_index(emit);
8394       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8395       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8396       struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
8397 
8398       if (emit->key.tex[unit].texel_bias) {
8399          /* to fix texture coordinate rounding issue, 0.0001 offset is
8400           * been added. This fixes piglit test fbo-blit-scaled-linear. */
8401          struct tgsi_full_src_register offset =
8402             make_immediate_reg_float(emit, 0.0001f);
8403 
8404          /* ADD tmp, coord, offset */
8405          emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
8406                               coord, &offset);
8407          /* MUL tmp, tmp, scale */
8408          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
8409                               &tmp_src, &scale_src);
8410       }
8411       else {
8412          /* MUL tmp, coord, const[] */
8413          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
8414                               coord, &scale_src);
8415       }
8416       return tmp_src;
8417    }
8418    else {
8419       /* use texcoord as-is */
8420       return *coord;
8421    }
8422 }
8423 
8424 
8425 /**
8426  * For SAMPLE_C instructions, emit the extra src register which indicates
8427  * the reference/comparision value.
8428  */
8429 static void
emit_tex_compare_refcoord(struct svga_shader_emitter_v10 * emit,enum tgsi_texture_type target,const struct tgsi_full_src_register * coord)8430 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
8431                           enum tgsi_texture_type target,
8432                           const struct tgsi_full_src_register *coord)
8433 {
8434    struct tgsi_full_src_register coord_src_ref;
8435    int component;
8436 
8437    assert(tgsi_is_shadow_target(target));
8438 
8439    component = tgsi_util_get_shadow_ref_src_index(target) % 4;
8440    assert(component >= 0);
8441 
8442    coord_src_ref = scalar_src(coord, component);
8443 
8444    emit_src_register(emit, &coord_src_ref);
8445 }
8446 
8447 
8448 /**
8449  * Info for implementing texture swizzles.
8450  * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
8451  * functions use this to encapsulate the extra steps needed to perform
8452  * a texture swizzle, or shadow/depth comparisons.
8453  * The shadow/depth comparison is only done here if for the cases where
8454  * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
8455  */
8456 struct tex_swizzle_info
8457 {
8458    bool swizzled;
8459    bool shadow_compare;
8460    unsigned unit;
8461    enum tgsi_texture_type texture_target;  /**< TGSI_TEXTURE_x */
8462    struct tgsi_full_src_register tmp_src;
8463    struct tgsi_full_dst_register tmp_dst;
8464    const struct tgsi_full_dst_register *inst_dst;
8465    const struct tgsi_full_src_register *coord_src;
8466 };
8467 
8468 
8469 /**
8470  * Do setup for handling texture swizzles or shadow compares.
8471  * \param unit  the texture unit
8472  * \param inst  the TGSI texture instruction
8473  * \param shadow_compare  do shadow/depth comparison?
8474  * \param swz  returns the swizzle info
8475  */
8476 static void
begin_tex_swizzle(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_instruction * inst,bool shadow_compare,struct tex_swizzle_info * swz)8477 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
8478                   unsigned unit,
8479                   const struct tgsi_full_instruction *inst,
8480                   bool shadow_compare,
8481                   struct tex_swizzle_info *swz)
8482 {
8483    swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
8484                     emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
8485                     emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
8486                     emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
8487 
8488    swz->shadow_compare = shadow_compare;
8489    swz->texture_target = inst->Texture.Texture;
8490 
8491    if (swz->swizzled || shadow_compare) {
8492       /* Allocate temp register for the result of the SAMPLE instruction
8493        * and the source of the MOV/compare/swizzle instructions.
8494        */
8495       unsigned tmp = get_temp_index(emit);
8496       swz->tmp_src = make_src_temp_reg(tmp);
8497       swz->tmp_dst = make_dst_temp_reg(tmp);
8498 
8499       swz->unit = unit;
8500    }
8501    swz->inst_dst = &inst->Dst[0];
8502    swz->coord_src = &inst->Src[0];
8503 
8504    emit->shadow_compare_units |= shadow_compare << unit;
8505 }
8506 
8507 
8508 /**
8509  * Returns the register to put the SAMPLE instruction results into.
8510  * This will either be the original instruction dst reg (if no swizzle
8511  * and no shadow comparison) or a temporary reg if there is a swizzle.
8512  */
8513 static const struct tgsi_full_dst_register *
get_tex_swizzle_dst(const struct tex_swizzle_info * swz)8514 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
8515 {
8516    return (swz->swizzled || swz->shadow_compare)
8517       ? &swz->tmp_dst : swz->inst_dst;
8518 }
8519 
8520 
8521 /**
8522  * This emits the MOV instruction that actually implements a texture swizzle
8523  * and/or shadow comparison.
8524  */
8525 static void
end_tex_swizzle(struct svga_shader_emitter_v10 * emit,const struct tex_swizzle_info * swz)8526 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
8527                 const struct tex_swizzle_info *swz)
8528 {
8529    if (swz->shadow_compare) {
8530       /* Emit extra instructions to compare the fetched texel value against
8531        * a texture coordinate component.  The result of the comparison
8532        * is 0.0 or 1.0.
8533        */
8534       struct tgsi_full_src_register coord_src;
8535       struct tgsi_full_src_register texel_src =
8536          scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
8537       struct tgsi_full_src_register one =
8538          make_immediate_reg_float(emit, 1.0f);
8539       /* convert gallium comparison func to SVGA comparison func */
8540       SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
8541 
8542       int component =
8543          tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
8544       assert(component >= 0);
8545       coord_src = scalar_src(swz->coord_src, component);
8546 
8547       /* COMPARE tmp, coord, texel */
8548       emit_comparison(emit, compare_func,
8549                       &swz->tmp_dst, &coord_src, &texel_src);
8550 
8551       /* AND dest, tmp, {1.0} */
8552       begin_emit_instruction(emit);
8553       emit_opcode(emit, VGPU10_OPCODE_AND, false);
8554       if (swz->swizzled) {
8555          emit_dst_register(emit, &swz->tmp_dst);
8556       }
8557       else {
8558          emit_dst_register(emit, swz->inst_dst);
8559       }
8560       emit_src_register(emit, &swz->tmp_src);
8561       emit_src_register(emit, &one);
8562       end_emit_instruction(emit);
8563    }
8564 
8565    if (swz->swizzled) {
8566       unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
8567       unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
8568       unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
8569       unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
8570       unsigned writemask_0 = 0, writemask_1 = 0;
8571       bool int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
8572 
8573       /* Swizzle w/out zero/one terms */
8574       struct tgsi_full_src_register src_swizzled =
8575          swizzle_src(&swz->tmp_src,
8576                      swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
8577                      swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
8578                      swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
8579                      swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
8580 
8581       /* MOV dst, color(tmp).<swizzle> */
8582       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
8583                            swz->inst_dst, &src_swizzled);
8584 
8585       /* handle swizzle zero terms */
8586       writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
8587                      ((swz_g == PIPE_SWIZZLE_0) << 1) |
8588                      ((swz_b == PIPE_SWIZZLE_0) << 2) |
8589                      ((swz_a == PIPE_SWIZZLE_0) << 3));
8590       writemask_0 &= swz->inst_dst->Register.WriteMask;
8591 
8592       if (writemask_0) {
8593          struct tgsi_full_src_register zero = int_tex ?
8594             make_immediate_reg_int(emit, 0) :
8595             make_immediate_reg_float(emit, 0.0f);
8596          struct tgsi_full_dst_register dst =
8597             writemask_dst(swz->inst_dst, writemask_0);
8598 
8599          /* MOV dst.writemask_0, {0,0,0,0} */
8600          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
8601       }
8602 
8603       /* handle swizzle one terms */
8604       writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
8605                      ((swz_g == PIPE_SWIZZLE_1) << 1) |
8606                      ((swz_b == PIPE_SWIZZLE_1) << 2) |
8607                      ((swz_a == PIPE_SWIZZLE_1) << 3));
8608       writemask_1 &= swz->inst_dst->Register.WriteMask;
8609 
8610       if (writemask_1) {
8611          struct tgsi_full_src_register one = int_tex ?
8612             make_immediate_reg_int(emit, 1) :
8613             make_immediate_reg_float(emit, 1.0f);
8614          struct tgsi_full_dst_register dst =
8615             writemask_dst(swz->inst_dst, writemask_1);
8616 
8617          /* MOV dst.writemask_1, {1,1,1,1} */
8618          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
8619       }
8620    }
8621 }
8622 
8623 
8624 /**
8625  * Emit code for TGSI_OPCODE_SAMPLE instruction.
8626  */
8627 static bool
emit_sample(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8628 emit_sample(struct svga_shader_emitter_v10 *emit,
8629             const struct tgsi_full_instruction *inst)
8630 {
8631    const unsigned resource_unit = inst->Src[1].Register.Index;
8632    const unsigned sampler_unit = inst->Src[2].Register.Index;
8633    struct tgsi_full_src_register coord;
8634    int offsets[3];
8635    struct tex_swizzle_info swz_info;
8636 
8637    begin_tex_swizzle(emit, sampler_unit, inst, false, &swz_info);
8638 
8639    get_texel_offsets(emit, inst, offsets);
8640 
8641    coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
8642 
8643    /* SAMPLE dst, coord(s0), resource, sampler */
8644    begin_emit_instruction(emit);
8645 
8646    /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
8647     * with LOD=0.  But our virtual GPU accepts this as-is.
8648     */
8649    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
8650                       inst->Instruction.Saturate, offsets);
8651    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8652    emit_src_register(emit, &coord);
8653    emit_resource_register(emit, resource_unit);
8654    emit_sampler_register(emit, sampler_unit);
8655    end_emit_instruction(emit);
8656 
8657    end_tex_swizzle(emit, &swz_info);
8658 
8659    free_temp_indexes(emit);
8660 
8661    return true;
8662 }
8663 
8664 
8665 /**
8666  * Check if a texture instruction is valid.
8667  * An example of an invalid texture instruction is doing shadow comparison
8668  * with an integer-valued texture.
8669  * If we detect an invalid texture instruction, we replace it with:
8670  *   MOV dst, {1,1,1,1};
8671  * \return TRUE if valid, FALSE if invalid.
8672  */
8673 static bool
is_valid_tex_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8674 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
8675                          const struct tgsi_full_instruction *inst)
8676 {
8677    const unsigned unit = inst->Src[1].Register.Index;
8678    const enum tgsi_texture_type target = inst->Texture.Texture;
8679    bool valid = true;
8680 
8681    if (tgsi_is_shadow_target(target) &&
8682        is_integer_type(emit->sampler_return_type[unit])) {
8683       debug_printf("Invalid SAMPLE_C with an integer texture!\n");
8684       valid = false;
8685    }
8686    /* XXX might check for other conditions in the future here */
8687 
8688    if (!valid) {
8689       /* emit a MOV dst, {1,1,1,1} instruction. */
8690       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8691       begin_emit_instruction(emit);
8692       emit_opcode(emit, VGPU10_OPCODE_MOV, false);
8693       emit_dst_register(emit, &inst->Dst[0]);
8694       emit_src_register(emit, &one);
8695       end_emit_instruction(emit);
8696    }
8697 
8698    return valid;
8699 }
8700 
8701 
8702 /**
8703  * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
8704  */
8705 static bool
emit_tex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8706 emit_tex(struct svga_shader_emitter_v10 *emit,
8707          const struct tgsi_full_instruction *inst)
8708 {
8709    const uint unit = inst->Src[1].Register.Index;
8710    const enum tgsi_texture_type target = inst->Texture.Texture;
8711    VGPU10_OPCODE_TYPE opcode;
8712    struct tgsi_full_src_register coord;
8713    int offsets[3];
8714    struct tex_swizzle_info swz_info;
8715    bool compare_in_shader;
8716 
8717    /* check that the sampler returns a float */
8718    if (!is_valid_tex_instruction(emit, inst))
8719       return true;
8720 
8721    compare_in_shader = tgsi_is_shadow_target(target) &&
8722                        emit->key.tex[unit].compare_in_shader;
8723 
8724    begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8725 
8726    get_texel_offsets(emit, inst, offsets);
8727 
8728    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8729 
8730    /* SAMPLE dst, coord(s0), resource, sampler */
8731    begin_emit_instruction(emit);
8732 
8733    if (tgsi_is_shadow_target(target) && !compare_in_shader)
8734       opcode = VGPU10_OPCODE_SAMPLE_C;
8735    else
8736       opcode = VGPU10_OPCODE_SAMPLE;
8737 
8738    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8739    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8740    emit_src_register(emit, &coord);
8741    emit_resource_register(emit, unit);
8742    emit_sampler_register(emit, unit);
8743    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8744       emit_tex_compare_refcoord(emit, target, &coord);
8745    }
8746    end_emit_instruction(emit);
8747 
8748    end_tex_swizzle(emit, &swz_info);
8749 
8750    free_temp_indexes(emit);
8751 
8752    return true;
8753 }
8754 
8755 /**
8756  * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8757  */
8758 static bool
emit_tg4(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8759 emit_tg4(struct svga_shader_emitter_v10 *emit,
8760          const struct tgsi_full_instruction *inst)
8761 {
8762    const uint unit = inst->Src[2].Register.Index;
8763    struct tgsi_full_src_register src;
8764    struct tgsi_full_src_register offset_src, sampler, ref;
8765    int offsets[3];
8766 
8767    /* check that the sampler returns a float */
8768    if (!is_valid_tex_instruction(emit, inst))
8769       return true;
8770 
8771    if (emit->version >= 50) {
8772       unsigned target = inst->Texture.Texture;
8773       int index = inst->Src[1].Register.Index;
8774       const union tgsi_immediate_data *imm = emit->immediates[index];
8775       int select_comp  = imm[inst->Src[1].Register.SwizzleX].Int;
8776       unsigned select_swizzle = PIPE_SWIZZLE_X;
8777 
8778       if (!tgsi_is_shadow_target(target)) {
8779          switch (select_comp) {
8780          case 0:
8781             select_swizzle = emit->key.tex[unit].swizzle_r;
8782             break;
8783          case 1:
8784             select_swizzle = emit->key.tex[unit].swizzle_g;
8785             break;
8786          case 2:
8787             select_swizzle = emit->key.tex[unit].swizzle_b;
8788             break;
8789          case 3:
8790             select_swizzle = emit->key.tex[unit].swizzle_a;
8791             break;
8792          default:
8793             assert(!"Unexpected component in texture gather swizzle");
8794          }
8795       }
8796       else {
8797          select_swizzle = emit->key.tex[unit].swizzle_r;
8798       }
8799 
8800       if (select_swizzle == PIPE_SWIZZLE_1) {
8801          src = make_immediate_reg_float(emit, 1.0);
8802          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8803          return true;
8804       }
8805       else if (select_swizzle == PIPE_SWIZZLE_0) {
8806          src = make_immediate_reg_float(emit, 0.0);
8807          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8808          return true;
8809       }
8810 
8811       src = setup_texcoord(emit, unit, &inst->Src[0]);
8812 
8813       /* GATHER4 dst, coord, resource, sampler */
8814       /* GATHER4_C dst, coord, resource, sampler ref */
8815       /* GATHER4_PO dst, coord, offset resource, sampler */
8816       /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8817       begin_emit_instruction(emit);
8818       if (inst->Texture.NumOffsets == 1) {
8819          if (tgsi_is_shadow_target(target)) {
8820             emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
8821                         inst->Instruction.Saturate);
8822          }
8823          else {
8824             emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
8825                         inst->Instruction.Saturate);
8826          }
8827       }
8828       else {
8829          if (tgsi_is_shadow_target(target)) {
8830             emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
8831                         inst->Instruction.Saturate);
8832          }
8833          else {
8834             emit_opcode(emit, VGPU10_OPCODE_GATHER4,
8835                         inst->Instruction.Saturate);
8836          }
8837       }
8838 
8839       emit_dst_register(emit, &inst->Dst[0]);
8840       emit_src_register(emit, &src);
8841       if (inst->Texture.NumOffsets == 1) {
8842          /* offset */
8843          offset_src = make_src_reg(inst->TexOffsets[0].File,
8844                                    inst->TexOffsets[0].Index);
8845          offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
8846                                   inst->TexOffsets[0].SwizzleY,
8847                                   inst->TexOffsets[0].SwizzleZ,
8848                                   TGSI_SWIZZLE_W);
8849          emit_src_register(emit, &offset_src);
8850       }
8851 
8852       /* resource */
8853       emit_resource_register(emit, unit);
8854 
8855       /* sampler */
8856       sampler = make_src_reg(TGSI_FILE_SAMPLER,
8857                              emit->key.tex[unit].sampler_index);
8858       sampler.Register.SwizzleX =
8859       sampler.Register.SwizzleY =
8860       sampler.Register.SwizzleZ =
8861       sampler.Register.SwizzleW = select_swizzle;
8862       emit_src_register(emit, &sampler);
8863 
8864       if (tgsi_is_shadow_target(target)) {
8865          /* ref */
8866          if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8867             ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8868             emit_tex_compare_refcoord(emit, target, &ref);
8869          }
8870          else {
8871             emit_tex_compare_refcoord(emit, target, &src);
8872          }
8873       }
8874 
8875       end_emit_instruction(emit);
8876       free_temp_indexes(emit);
8877    }
8878    else {
8879       /* Only a single channel is supported in SM4_1 and we report
8880        * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8881        * Only the 0th component will be gathered.
8882        */
8883       switch (emit->key.tex[unit].swizzle_r) {
8884       case PIPE_SWIZZLE_X:
8885          get_texel_offsets(emit, inst, offsets);
8886          src = setup_texcoord(emit, unit, &inst->Src[0]);
8887 
8888          /* Gather dst, coord, resource, sampler */
8889          begin_emit_instruction(emit);
8890          emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
8891                             inst->Instruction.Saturate, offsets);
8892          emit_dst_register(emit, &inst->Dst[0]);
8893          emit_src_register(emit, &src);
8894          emit_resource_register(emit, unit);
8895 
8896          /* sampler */
8897          sampler = make_src_reg(TGSI_FILE_SAMPLER,
8898                                 emit->key.tex[unit].sampler_index);
8899          sampler.Register.SwizzleX =
8900          sampler.Register.SwizzleY =
8901          sampler.Register.SwizzleZ =
8902          sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
8903          emit_src_register(emit, &sampler);
8904 
8905          end_emit_instruction(emit);
8906          break;
8907       case PIPE_SWIZZLE_W:
8908       case PIPE_SWIZZLE_1:
8909          src = make_immediate_reg_float(emit, 1.0);
8910          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8911          break;
8912       case PIPE_SWIZZLE_Y:
8913       case PIPE_SWIZZLE_Z:
8914       case PIPE_SWIZZLE_0:
8915       default:
8916          src = make_immediate_reg_float(emit, 0.0);
8917          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8918          break;
8919       }
8920    }
8921 
8922    return true;
8923 }
8924 
8925 
8926 
8927 /**
8928  * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8929  */
8930 static bool
emit_tex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8931 emit_tex2(struct svga_shader_emitter_v10 *emit,
8932          const struct tgsi_full_instruction *inst)
8933 {
8934    const uint unit = inst->Src[2].Register.Index;
8935    unsigned target = inst->Texture.Texture;
8936    struct tgsi_full_src_register coord, ref;
8937    int offsets[3];
8938    struct tex_swizzle_info swz_info;
8939    VGPU10_OPCODE_TYPE opcode;
8940    bool compare_in_shader;
8941 
8942    /* check that the sampler returns a float */
8943    if (!is_valid_tex_instruction(emit, inst))
8944       return true;
8945 
8946    compare_in_shader = emit->key.tex[unit].compare_in_shader;
8947    if (compare_in_shader)
8948       opcode = VGPU10_OPCODE_SAMPLE;
8949    else
8950       opcode = VGPU10_OPCODE_SAMPLE_C;
8951 
8952    begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8953 
8954    get_texel_offsets(emit, inst, offsets);
8955 
8956    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8957    ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8958 
8959    /* SAMPLE_C dst, coord, resource, sampler, ref */
8960    begin_emit_instruction(emit);
8961    emit_sample_opcode(emit, opcode,
8962                       inst->Instruction.Saturate, offsets);
8963    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8964    emit_src_register(emit, &coord);
8965    emit_resource_register(emit, unit);
8966    emit_sampler_register(emit, unit);
8967    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8968       emit_tex_compare_refcoord(emit, target, &ref);
8969    }
8970    end_emit_instruction(emit);
8971 
8972    end_tex_swizzle(emit, &swz_info);
8973 
8974    free_temp_indexes(emit);
8975 
8976    return true;
8977 }
8978 
8979 
8980 /**
8981  * Emit code for TGSI_OPCODE_TXP (projective texture)
8982  */
8983 static bool
emit_txp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8984 emit_txp(struct svga_shader_emitter_v10 *emit,
8985          const struct tgsi_full_instruction *inst)
8986 {
8987    const uint unit = inst->Src[1].Register.Index;
8988    const enum tgsi_texture_type target = inst->Texture.Texture;
8989    VGPU10_OPCODE_TYPE opcode;
8990    int offsets[3];
8991    unsigned tmp = get_temp_index(emit);
8992    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8993    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8994    struct tgsi_full_src_register src0_wwww =
8995       scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8996    struct tgsi_full_src_register coord;
8997    struct tex_swizzle_info swz_info;
8998    bool compare_in_shader;
8999 
9000    /* check that the sampler returns a float */
9001    if (!is_valid_tex_instruction(emit, inst))
9002       return true;
9003 
9004    compare_in_shader = tgsi_is_shadow_target(target) &&
9005                        emit->key.tex[unit].compare_in_shader;
9006 
9007    begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
9008 
9009    get_texel_offsets(emit, inst, offsets);
9010 
9011    coord = setup_texcoord(emit, unit, &inst->Src[0]);
9012 
9013    /* DIV tmp, coord, coord.wwww */
9014    emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
9015                         &coord, &src0_wwww);
9016 
9017    /* SAMPLE dst, coord(tmp), resource, sampler */
9018    begin_emit_instruction(emit);
9019 
9020    if (tgsi_is_shadow_target(target) && !compare_in_shader)
9021       /* NOTE: for non-fragment shaders, we should use
9022        * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
9023        */
9024       opcode = VGPU10_OPCODE_SAMPLE_C;
9025    else
9026       opcode = VGPU10_OPCODE_SAMPLE;
9027 
9028    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9029    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9030    emit_src_register(emit, &tmp_src);  /* projected coord */
9031    emit_resource_register(emit, unit);
9032    emit_sampler_register(emit, unit);
9033    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
9034       emit_tex_compare_refcoord(emit, target, &tmp_src);
9035    }
9036    end_emit_instruction(emit);
9037 
9038    end_tex_swizzle(emit, &swz_info);
9039 
9040    free_temp_indexes(emit);
9041 
9042    return true;
9043 }
9044 
9045 
9046 /**
9047  * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
9048  */
9049 static bool
emit_txd(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9050 emit_txd(struct svga_shader_emitter_v10 *emit,
9051          const struct tgsi_full_instruction *inst)
9052 {
9053    const uint unit = inst->Src[3].Register.Index;
9054    const enum tgsi_texture_type target = inst->Texture.Texture;
9055    int offsets[3];
9056    struct tgsi_full_src_register coord;
9057    struct tex_swizzle_info swz_info;
9058 
9059    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9060                      &swz_info);
9061 
9062    get_texel_offsets(emit, inst, offsets);
9063 
9064    coord = setup_texcoord(emit, unit, &inst->Src[0]);
9065 
9066    /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
9067    begin_emit_instruction(emit);
9068    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
9069                       inst->Instruction.Saturate, offsets);
9070    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9071    emit_src_register(emit, &coord);
9072    emit_resource_register(emit, unit);
9073    emit_sampler_register(emit, unit);
9074    emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
9075    emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
9076    end_emit_instruction(emit);
9077 
9078    end_tex_swizzle(emit, &swz_info);
9079 
9080    free_temp_indexes(emit);
9081 
9082    return true;
9083 }
9084 
9085 
9086 /**
9087  * Emit code for TGSI_OPCODE_TXF (texel fetch)
9088  */
9089 static bool
emit_txf(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9090 emit_txf(struct svga_shader_emitter_v10 *emit,
9091          const struct tgsi_full_instruction *inst)
9092 {
9093    const uint unit = inst->Src[1].Register.Index;
9094    const bool msaa = tgsi_is_msaa_target(inst->Texture.Texture)
9095       && emit->key.tex[unit].num_samples > 1;
9096    int offsets[3];
9097    struct tex_swizzle_info swz_info;
9098 
9099    begin_tex_swizzle(emit, unit, inst, false, &swz_info);
9100 
9101    get_texel_offsets(emit, inst, offsets);
9102 
9103    if (msaa) {
9104       assert(emit->key.tex[unit].num_samples > 1);
9105 
9106       /* Fetch one sample from an MSAA texture */
9107       struct tgsi_full_src_register sampleIndex =
9108          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9109       /* LD_MS dst, coord(s0), resource, sampleIndex */
9110       begin_emit_instruction(emit);
9111       emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
9112                          inst->Instruction.Saturate, offsets);
9113       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9114       emit_src_register(emit, &inst->Src[0]);
9115       emit_resource_register(emit, unit);
9116       emit_src_register(emit, &sampleIndex);
9117       end_emit_instruction(emit);
9118    }
9119    else {
9120       /* Fetch one texel specified by integer coordinate */
9121       /* LD dst, coord(s0), resource */
9122       begin_emit_instruction(emit);
9123       emit_sample_opcode(emit, VGPU10_OPCODE_LD,
9124                          inst->Instruction.Saturate, offsets);
9125       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9126       emit_src_register(emit, &inst->Src[0]);
9127       emit_resource_register(emit, unit);
9128       end_emit_instruction(emit);
9129    }
9130 
9131    end_tex_swizzle(emit, &swz_info);
9132 
9133    free_temp_indexes(emit);
9134 
9135    return true;
9136 }
9137 
9138 
9139 /**
9140  * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
9141  * or TGSI_OPCODE_TXB2 (for cube shadow maps).
9142  */
9143 static bool
emit_txl_txb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9144 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
9145              const struct tgsi_full_instruction *inst)
9146 {
9147    const enum tgsi_texture_type target = inst->Texture.Texture;
9148    VGPU10_OPCODE_TYPE opcode;
9149    unsigned unit;
9150    int offsets[3];
9151    struct tgsi_full_src_register coord, lod_bias;
9152    struct tex_swizzle_info swz_info;
9153 
9154    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
9155           inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
9156           inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
9157 
9158    if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
9159       lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
9160       unit = inst->Src[2].Register.Index;
9161    }
9162    else {
9163       lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9164       unit = inst->Src[1].Register.Index;
9165    }
9166 
9167    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9168                      &swz_info);
9169 
9170    get_texel_offsets(emit, inst, offsets);
9171 
9172    coord = setup_texcoord(emit, unit, &inst->Src[0]);
9173 
9174    /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
9175    begin_emit_instruction(emit);
9176    if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
9177       opcode = VGPU10_OPCODE_SAMPLE_L;
9178    }
9179    else {
9180       opcode = VGPU10_OPCODE_SAMPLE_B;
9181    }
9182    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9183    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9184    emit_src_register(emit, &coord);
9185    emit_resource_register(emit, unit);
9186    emit_sampler_register(emit, unit);
9187    emit_src_register(emit, &lod_bias);
9188    end_emit_instruction(emit);
9189 
9190    end_tex_swizzle(emit, &swz_info);
9191 
9192    free_temp_indexes(emit);
9193 
9194    return true;
9195 }
9196 
9197 
9198 /**
9199  * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
9200  */
9201 static bool
emit_txl2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9202 emit_txl2(struct svga_shader_emitter_v10 *emit,
9203           const struct tgsi_full_instruction *inst)
9204 {
9205    unsigned target = inst->Texture.Texture;
9206    unsigned opcode, unit;
9207    int offsets[3];
9208    struct tgsi_full_src_register coord, lod;
9209    struct tex_swizzle_info swz_info;
9210 
9211    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
9212 
9213    lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
9214    unit = inst->Src[2].Register.Index;
9215 
9216    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9217                      &swz_info);
9218 
9219    get_texel_offsets(emit, inst, offsets);
9220 
9221    coord = setup_texcoord(emit, unit, &inst->Src[0]);
9222 
9223    /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
9224    begin_emit_instruction(emit);
9225    opcode = VGPU10_OPCODE_SAMPLE_L;
9226    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9227    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9228    emit_src_register(emit, &coord);
9229    emit_resource_register(emit, unit);
9230    emit_sampler_register(emit, unit);
9231    emit_src_register(emit, &lod);
9232    end_emit_instruction(emit);
9233 
9234    end_tex_swizzle(emit, &swz_info);
9235 
9236    free_temp_indexes(emit);
9237 
9238    return true;
9239 }
9240 
9241 
9242 /**
9243  * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
9244  */
9245 static bool
emit_txq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9246 emit_txq(struct svga_shader_emitter_v10 *emit,
9247          const struct tgsi_full_instruction *inst)
9248 {
9249    const uint unit = inst->Src[1].Register.Index;
9250 
9251    if (emit->key.tex[unit].target == PIPE_BUFFER) {
9252       /* RESINFO does not support querying texture buffers, so we instead
9253        * store texture buffer sizes in shader constants, then copy them to
9254        * implement TXQ instead of emitting RESINFO.
9255        * MOV dst, const[texture_buffer_size_index[unit]]
9256        */
9257       struct tgsi_full_src_register size_src =
9258          make_src_const_reg(emit->texture_buffer_size_index[unit]);
9259       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
9260    } else {
9261       /* RESINFO dst, srcMipLevel, resource */
9262       begin_emit_instruction(emit);
9263       emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
9264       emit_dst_register(emit, &inst->Dst[0]);
9265       emit_src_register(emit, &inst->Src[0]);
9266       emit_resource_register(emit, unit);
9267       end_emit_instruction(emit);
9268    }
9269 
9270    free_temp_indexes(emit);
9271 
9272    return true;
9273 }
9274 
9275 
9276 /**
9277  * Does this opcode produce a double-precision result?
9278  * XXX perhaps move this to a TGSI utility.
9279  */
9280 static bool
opcode_has_dbl_dst(unsigned opcode)9281 opcode_has_dbl_dst(unsigned opcode)
9282 {
9283    switch (opcode) {
9284    case TGSI_OPCODE_F2D:
9285    case TGSI_OPCODE_DABS:
9286    case TGSI_OPCODE_DADD:
9287    case TGSI_OPCODE_DFRAC:
9288    case TGSI_OPCODE_DMAX:
9289    case TGSI_OPCODE_DMIN:
9290    case TGSI_OPCODE_DMUL:
9291    case TGSI_OPCODE_DNEG:
9292    case TGSI_OPCODE_I2D:
9293    case TGSI_OPCODE_U2D:
9294    case TGSI_OPCODE_DFMA:
9295       // XXX more TBD
9296       return true;
9297    default:
9298       return false;
9299    }
9300 }
9301 
9302 
9303 /**
9304  * Does this opcode use double-precision source registers?
9305  */
9306 static bool
opcode_has_dbl_src(unsigned opcode)9307 opcode_has_dbl_src(unsigned opcode)
9308 {
9309    switch (opcode) {
9310    case TGSI_OPCODE_D2F:
9311    case TGSI_OPCODE_DABS:
9312    case TGSI_OPCODE_DADD:
9313    case TGSI_OPCODE_DFRAC:
9314    case TGSI_OPCODE_DMAX:
9315    case TGSI_OPCODE_DMIN:
9316    case TGSI_OPCODE_DMUL:
9317    case TGSI_OPCODE_DNEG:
9318    case TGSI_OPCODE_D2I:
9319    case TGSI_OPCODE_D2U:
9320    case TGSI_OPCODE_DFMA:
9321    case TGSI_OPCODE_DSLT:
9322    case TGSI_OPCODE_DSGE:
9323    case TGSI_OPCODE_DSEQ:
9324    case TGSI_OPCODE_DSNE:
9325    case TGSI_OPCODE_DRCP:
9326    case TGSI_OPCODE_DSQRT:
9327    case TGSI_OPCODE_DMAD:
9328    case TGSI_OPCODE_DLDEXP:
9329    case TGSI_OPCODE_DRSQ:
9330    case TGSI_OPCODE_DTRUNC:
9331    case TGSI_OPCODE_DCEIL:
9332    case TGSI_OPCODE_DFLR:
9333    case TGSI_OPCODE_DROUND:
9334    case TGSI_OPCODE_DSSG:
9335       return true;
9336    default:
9337       return false;
9338    }
9339 }
9340 
9341 
9342 /**
9343  * Check that the swizzle for reading from a double-precision register
9344  * is valid. If not valid, move the source to a temporary register first.
9345  */
9346 static struct tgsi_full_src_register
check_double_src(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)9347 check_double_src(struct svga_shader_emitter_v10 *emit,
9348                  const struct tgsi_full_src_register *reg)
9349 {
9350    struct tgsi_full_src_register src;
9351 
9352    if (((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
9353          reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
9354         (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
9355          reg->Register.SwizzleY == PIPE_SWIZZLE_W)) &&
9356        ((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
9357          reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
9358         (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
9359          reg->Register.SwizzleW == PIPE_SWIZZLE_W))) {
9360       src = *reg;
9361    } else {
9362       /* move the src to a temporary to fix the swizzle */
9363       unsigned tmp = get_temp_index(emit);
9364       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9365       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9366       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, reg);
9367       src = tmp_src;
9368 
9369       /* The temporary index will be released in the caller */
9370    }
9371    return src;
9372 }
9373 
9374 /**
9375  * Check that the writemask for a double-precision instruction is valid.
9376  */
9377 static void
check_double_dst_writemask(const struct tgsi_full_instruction * inst)9378 check_double_dst_writemask(const struct tgsi_full_instruction *inst)
9379 {
9380    ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
9381 
9382    switch (inst->Instruction.Opcode) {
9383    case TGSI_OPCODE_DABS:
9384    case TGSI_OPCODE_DADD:
9385    case TGSI_OPCODE_DFRAC:
9386    case TGSI_OPCODE_DNEG:
9387    case TGSI_OPCODE_DMAD:
9388    case TGSI_OPCODE_DMAX:
9389    case TGSI_OPCODE_DMIN:
9390    case TGSI_OPCODE_DMUL:
9391    case TGSI_OPCODE_DRCP:
9392    case TGSI_OPCODE_DSQRT:
9393    case TGSI_OPCODE_F2D:
9394    case TGSI_OPCODE_DFMA:
9395       assert(writemask == TGSI_WRITEMASK_XYZW ||
9396              writemask == TGSI_WRITEMASK_XY ||
9397              writemask == TGSI_WRITEMASK_ZW);
9398       break;
9399    case TGSI_OPCODE_DSEQ:
9400    case TGSI_OPCODE_DSGE:
9401    case TGSI_OPCODE_DSNE:
9402    case TGSI_OPCODE_DSLT:
9403    case TGSI_OPCODE_D2I:
9404    case TGSI_OPCODE_D2U:
9405       /* Write to 1 or 2 components only */
9406       assert(util_bitcount(writemask) <= 2);
9407       break;
9408    default:
9409       /* XXX this list may be incomplete */
9410       ;
9411    }
9412 }
9413 
9414 
9415 /**
9416  * Double-precision absolute value.
9417  */
9418 static bool
emit_dabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9419 emit_dabs(struct svga_shader_emitter_v10 *emit,
9420           const struct tgsi_full_instruction *inst)
9421 {
9422    assert(emit->version >= 50);
9423 
9424    struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9425    check_double_dst_writemask(inst);
9426 
9427    struct tgsi_full_src_register abs_src = absolute_src(&src);
9428 
9429    /* DMOV dst, |src| */
9430    emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
9431 
9432    free_temp_indexes(emit);
9433    return true;
9434 }
9435 
9436 
9437 /**
9438  * Double-precision negation
9439  */
9440 static bool
emit_dneg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9441 emit_dneg(struct svga_shader_emitter_v10 *emit,
9442           const struct tgsi_full_instruction *inst)
9443 {
9444    assert(emit->version >= 50);
9445    struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9446    check_double_dst_writemask(inst);
9447 
9448    struct tgsi_full_src_register neg_src = negate_src(&src);
9449 
9450    /* DMOV dst, -src */
9451    emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
9452 
9453    free_temp_indexes(emit);
9454    return true;
9455 }
9456 
9457 
9458 /**
9459  * SM5 has no DMAD opcode.  Implement negation with DMUL/DADD.
9460  */
9461 static bool
emit_dmad(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9462 emit_dmad(struct svga_shader_emitter_v10 *emit,
9463           const struct tgsi_full_instruction *inst)
9464 {
9465    assert(emit->version >= 50);
9466    struct tgsi_full_src_register src0 = check_double_src(emit, &inst->Src[0]);
9467    struct tgsi_full_src_register src1 = check_double_src(emit, &inst->Src[1]);
9468    struct tgsi_full_src_register src2 = check_double_src(emit, &inst->Src[2]);
9469    check_double_dst_writemask(inst);
9470 
9471    unsigned tmp = get_temp_index(emit);
9472    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9473    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9474 
9475    /* DMUL tmp, src[0], src[1] */
9476    emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
9477                         &tmp_dst, &src0, &src1, NULL,
9478                         false, inst->Instruction.Precise);
9479 
9480    /* DADD dst, tmp, src[2] */
9481    emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9482                         &inst->Dst[0], &tmp_src, &src2, NULL,
9483                         inst->Instruction.Saturate, inst->Instruction.Precise);
9484    free_temp_indexes(emit);
9485 
9486    return true;
9487 }
9488 
9489 
9490 /**
9491  * Double precision reciprocal square root
9492  */
9493 static bool
emit_drsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)9494 emit_drsq(struct svga_shader_emitter_v10 *emit,
9495           const struct tgsi_full_dst_register *dst,
9496           const struct tgsi_full_src_register *src)
9497 {
9498    assert(emit->version >= 50);
9499 
9500    VGPU10OpcodeToken0 token0;
9501    struct tgsi_full_src_register dsrc = check_double_src(emit, src);
9502 
9503    begin_emit_instruction(emit);
9504 
9505    token0.value = 0;
9506    token0.opcodeType = VGPU10_OPCODE_VMWARE;
9507    token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
9508    emit_dword(emit, token0.value);
9509    emit_dst_register(emit, dst);
9510    emit_src_register(emit, &dsrc);
9511    end_emit_instruction(emit);
9512 
9513    free_temp_indexes(emit);
9514 
9515    return true;
9516 }
9517 
9518 
9519 /**
9520  * There is no SM5 opcode for double precision square root.
9521  * It will be implemented with DRSQ.
9522  * dst = src * DRSQ(src)
9523  */
9524 static bool
emit_dsqrt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9525 emit_dsqrt(struct svga_shader_emitter_v10 *emit,
9526           const struct tgsi_full_instruction *inst)
9527 {
9528    assert(emit->version >= 50);
9529 
9530    struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9531 
9532    /* temporary register to hold the source */
9533    unsigned tmp = get_temp_index(emit);
9534    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9535    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9536 
9537    /* temporary register to hold the DEQ result */
9538    unsigned tmp_cond = get_temp_index(emit);
9539    struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
9540    struct tgsi_full_dst_register tmp_cond_dst_xy =
9541       writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9542    struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
9543    struct tgsi_full_src_register tmp_cond_src_xy =
9544          swizzle_src(&tmp_cond_src,
9545                      PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
9546                      PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
9547 
9548    /* The reciprocal square root of zero yields INF.
9549     * So if the source is 0, we replace it with 1 in the tmp register.
9550     * The later multiplication of zero in the original source will yield 0
9551     * in the result.
9552     */
9553 
9554    /* tmp1 = (src == 0) ? 1 : src;
9555     *   EQ tmp1, 0, src
9556     *   MOVC tmp, tmp1, 1.0, src
9557     */
9558    struct tgsi_full_src_register zero =
9559                make_immediate_reg_double(emit, 0);
9560 
9561    struct tgsi_full_src_register one =
9562                make_immediate_reg_double(emit, 1.0);
9563 
9564    emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
9565                         &zero, &src);
9566    emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
9567                         &tmp_cond_src_xy, &one, &src);
9568 
9569    struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
9570    struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
9571 
9572    /* DRSQ tmp_rsq, tmp */
9573    emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
9574 
9575    /* DMUL dst, tmp_rsq, src[0] */
9576    emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
9577                         &tmp_rsq_src, &src);
9578 
9579    free_temp_indexes(emit);
9580 
9581    return true;
9582 }
9583 
9584 
9585 /**
9586  * glsl-nir path does not lower DTRUNC, so we need to
9587  * add the translation here.
9588  *
9589  * frac = DFRAC(src)
9590  * tmp = src - frac
9591  * dst = src >= 0 ? tmp : (tmp + (frac==0 ? 0 : 1))
9592  */
9593 static bool
emit_dtrunc(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9594 emit_dtrunc(struct svga_shader_emitter_v10 *emit,
9595             const struct tgsi_full_instruction *inst)
9596 {
9597    assert(emit->version >= 50);
9598 
9599    struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9600 
9601    /* frac = DFRAC(src) */
9602    unsigned frac_index = get_temp_index(emit);
9603    struct tgsi_full_dst_register frac_dst = make_dst_temp_reg(frac_index);
9604    struct tgsi_full_src_register frac_src = make_src_temp_reg(frac_index);
9605 
9606    VGPU10OpcodeToken0 token0;
9607    begin_emit_instruction(emit);
9608    token0.value = 0;
9609    token0.opcodeType = VGPU10_OPCODE_VMWARE;
9610    token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DFRC;
9611    emit_dword(emit, token0.value);
9612    emit_dst_register(emit, &frac_dst);
9613    emit_src_register(emit, &src);
9614    end_emit_instruction(emit);
9615 
9616    /* tmp = src - frac */
9617    unsigned tmp_index = get_temp_index(emit);
9618    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
9619    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
9620    struct tgsi_full_src_register negate_frac_src = negate_src(&frac_src);
9621    emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9622                         &tmp_dst, &src, &negate_frac_src, NULL,
9623                         inst->Instruction.Saturate, inst->Instruction.Precise);
9624 
9625    /* cond = frac==0 */
9626    unsigned cond_index = get_temp_index(emit);
9627    struct tgsi_full_dst_register cond_dst = make_dst_temp_reg(cond_index);
9628    struct tgsi_full_src_register cond_src = make_src_temp_reg(cond_index);
9629    struct tgsi_full_src_register zero =
9630                make_immediate_reg_double(emit, 0);
9631 
9632    /* Only use one or two components for double opcode */
9633    cond_dst = writemask_dst(&cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9634 
9635    emit_instruction_opn(emit, VGPU10_OPCODE_DEQ,
9636                         &cond_dst, &frac_src, &zero, NULL,
9637                         inst->Instruction.Saturate, inst->Instruction.Precise);
9638 
9639    /* tmp2 = cond ? 0 : 1 */
9640    unsigned tmp2_index = get_temp_index(emit);
9641    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2_index);
9642    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2_index);
9643    struct tgsi_full_src_register cond_src_xy =
9644       swizzle_src(&cond_src, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
9645 		             PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
9646    struct tgsi_full_src_register one =
9647                make_immediate_reg_double(emit, 1.0);
9648 
9649    emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
9650                         &tmp2_dst, &cond_src_xy, &zero, &one,
9651                         inst->Instruction.Saturate, inst->Instruction.Precise);
9652 
9653    /* tmp2 = tmp + tmp2 */
9654    emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9655                         &tmp2_dst, &tmp_src, &tmp2_src, NULL,
9656                         inst->Instruction.Saturate, inst->Instruction.Precise);
9657 
9658    /* cond = src>=0 */
9659    emit_instruction_opn(emit, VGPU10_OPCODE_DGE,
9660                         &cond_dst, &src, &zero, NULL,
9661                         inst->Instruction.Saturate, inst->Instruction.Precise);
9662 
9663    /* dst = cond ? tmp : tmp2 */
9664    emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
9665                         &inst->Dst[0], &cond_src_xy, &tmp_src, &tmp2_src,
9666                         inst->Instruction.Saturate, inst->Instruction.Precise);
9667 
9668    free_temp_indexes(emit);
9669    return true;
9670 }
9671 
9672 
9673 static bool
emit_interp_offset(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9674 emit_interp_offset(struct svga_shader_emitter_v10 *emit,
9675                    const struct tgsi_full_instruction *inst)
9676 {
9677    assert(emit->version >= 50);
9678 
9679    /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
9680     * where (0,0) is the center of the pixel.  We need to translate that
9681     * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
9682     * Also need to flip the Y axis (I think).
9683     */
9684    unsigned tmp = get_temp_index(emit);
9685    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9686    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9687    struct tgsi_full_dst_register tmp_dst_xy =
9688       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9689    struct tgsi_full_src_register const16 =
9690       make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
9691 
9692    /* MUL tmp.xy, src1, {16, -16, 0, 0} */
9693    emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
9694                         &tmp_dst_xy, &inst->Src[1], &const16);
9695 
9696    /* FTOI tmp.xy, tmp */
9697    emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
9698 
9699    /* EVAL_SNAPPED dst, src0, tmp */
9700    emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
9701                         &inst->Dst[0], &inst->Src[0], &tmp_src);
9702 
9703    free_temp_indexes(emit);
9704 
9705    return true;
9706 }
9707 
9708 
9709 /**
9710  * Emit a simple instruction (like ADD, MUL, MIN, etc).
9711  */
9712 static bool
emit_simple(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9713 emit_simple(struct svga_shader_emitter_v10 *emit,
9714             const struct tgsi_full_instruction *inst)
9715 {
9716    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9717    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9718    const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9719    const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9720    unsigned i;
9721 
9722    struct tgsi_full_src_register src[3];
9723 
9724    if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
9725       emit->current_loop_depth++;
9726    }
9727    else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
9728       emit->current_loop_depth--;
9729    }
9730 
9731    for (i = 0; i < op->num_src; i++) {
9732       if (dbl_src)
9733          src[i] = check_double_src(emit, &inst->Src[i]);
9734       else
9735          src[i] = inst->Src[i];
9736    }
9737 
9738    begin_emit_instruction(emit);
9739    emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
9740                        inst->Instruction.Saturate,
9741                        inst->Instruction.Precise);
9742    for (i = 0; i < op->num_dst; i++) {
9743       if (dbl_dst) {
9744          check_double_dst_writemask(inst);
9745       }
9746       emit_dst_register(emit, &inst->Dst[i]);
9747    }
9748    for (i = 0; i < op->num_src; i++) {
9749       emit_src_register(emit, &src[i]);
9750    }
9751    end_emit_instruction(emit);
9752 
9753    free_temp_indexes(emit);
9754    return true;
9755 }
9756 
9757 
9758 /**
9759  * Emit MSB instruction (like IMSB, UMSB).
9760  *
9761  * GLSL returns the index starting from the LSB;
9762  * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
9763  * To get correct location as per glsl from SM5 device, we should
9764  * return (31 - index) if returned index is not -1.
9765  */
9766 static bool
emit_msb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9767 emit_msb(struct svga_shader_emitter_v10 *emit,
9768          const struct tgsi_full_instruction *inst)
9769 {
9770    const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
9771 
9772    assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
9773 
9774    struct tgsi_full_src_register index_src =
9775       make_src_reg(index_dst->Register.File, index_dst->Register.Index);
9776    struct tgsi_full_src_register imm31 =
9777       make_immediate_reg_int(emit, 31);
9778    imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
9779    struct tgsi_full_src_register neg_one =
9780       make_immediate_reg_int(emit, -1);
9781    neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
9782    unsigned tmp = get_temp_index(emit);
9783    const struct tgsi_full_dst_register tmp_dst =
9784       make_dst_temp_reg(tmp);
9785    const struct tgsi_full_dst_register tmp_dst_x =
9786       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
9787    const struct tgsi_full_src_register tmp_src_x =
9788        make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
9789    int writemask = TGSI_WRITEMASK_X;
9790    int src_swizzle = TGSI_SWIZZLE_X;
9791    int dst_writemask = index_dst->Register.WriteMask;
9792 
9793    emit_simple(emit, inst);
9794 
9795    /* index conversion from SM5 to GLSL */
9796    while (writemask & dst_writemask) {
9797       struct tgsi_full_src_register index_src_comp =
9798          scalar_src(&index_src, src_swizzle);
9799       struct tgsi_full_dst_register index_dst_comp =
9800          writemask_dst(index_dst, writemask);
9801 
9802       /* check if index_src_comp != -1 */
9803       emit_instruction_op2(emit, VGPU10_OPCODE_INE,
9804                            &tmp_dst_x, &index_src_comp, &neg_one);
9805 
9806       /* if */
9807       emit_if(emit, &tmp_src_x);
9808 
9809       index_src_comp = negate_src(&index_src_comp);
9810       /* SUB DST, IMM{31}, DST */
9811       emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
9812                            &index_dst_comp, &imm31, &index_src_comp);
9813 
9814       /* endif */
9815       emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9816 
9817       writemask = writemask << 1;
9818       src_swizzle = src_swizzle + 1;
9819    }
9820    free_temp_indexes(emit);
9821    return true;
9822 }
9823 
9824 
9825 /**
9826  * Emit a BFE instruction (like UBFE, IBFE).
9827  * tgsi representation:
9828  * U/IBFE dst, value, offset, width
9829  * SM5 representation:
9830  * U/IBFE dst, width, offset, value
9831  * Note: SM5 has width & offset range (0-31);
9832  *      whereas GLSL has width & offset range (0-32)
9833  */
9834 static bool
emit_bfe(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9835 emit_bfe(struct svga_shader_emitter_v10 *emit,
9836          const struct tgsi_full_instruction *inst)
9837 {
9838    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9839    struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9840    imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9841    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9842    zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9843 
9844    unsigned tmp1 = get_temp_index(emit);
9845    const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9846    const struct tgsi_full_dst_register cond1_dst_x =
9847       writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9848    const struct tgsi_full_src_register cond1_src_x =
9849       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9850 
9851    unsigned tmp2 = get_temp_index(emit);
9852    const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9853    const struct tgsi_full_dst_register cond2_dst_x =
9854       writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9855    const struct tgsi_full_src_register cond2_src_x =
9856       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9857 
9858    /**
9859     * In SM5, when width = 32  and offset = 0, it returns 0.
9860     * On the other hand GLSL, expects value to be copied as it is, to dst.
9861     */
9862 
9863    /* cond1 = width ! = 32 */
9864    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9865                         &cond1_dst_x, &inst->Src[2], &imm32);
9866 
9867    /* cond2 = offset ! = 0 */
9868    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9869                         &cond2_dst_x, &inst->Src[1], &zero);
9870 
9871    /* cond 2 = cond1 & cond 2 */
9872    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
9873                         &cond2_src_x,
9874                         &cond1_src_x);
9875    /* IF */
9876    emit_if(emit, &cond2_src_x);
9877 
9878    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9879                         &inst->Src[0]);
9880 
9881    /* ELSE */
9882    emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9883 
9884    /* U/IBFE dst, width, offset, value */
9885    emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
9886                         &inst->Src[2], &inst->Src[1], &inst->Src[0]);
9887 
9888    /* ENDIF */
9889    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9890 
9891    free_temp_indexes(emit);
9892    return true;
9893 }
9894 
9895 
9896 /**
9897  * Emit BFI  instruction
9898  * tgsi representation:
9899  * BFI dst, base, insert, offset, width
9900  * SM5 representation:
9901  * BFI dst, width, offset, insert, base
9902  * Note: SM5 has width & offset range (0-31);
9903  *      whereas GLSL has width & offset range (0-32)
9904  */
9905 static bool
emit_bfi(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9906 emit_bfi(struct svga_shader_emitter_v10 *emit,
9907          const struct tgsi_full_instruction *inst)
9908 {
9909    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9910    struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9911    imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9912 
9913    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9914    zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9915 
9916    unsigned tmp1 = get_temp_index(emit);
9917    const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9918    const struct tgsi_full_dst_register cond1_dst_x =
9919       writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9920    const struct tgsi_full_src_register cond1_src_x =
9921       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9922 
9923    unsigned tmp2 = get_temp_index(emit);
9924    const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9925    const struct tgsi_full_dst_register cond2_dst_x =
9926       writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9927    const struct tgsi_full_src_register cond2_src_x =
9928       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9929 
9930    /**
9931     * In SM5, when width = 32  and offset = 0, it returns 0.
9932     * On the other hand GLSL, expects insert to be copied as it is, to dst.
9933     */
9934 
9935    /* cond1 = width == 32 */
9936    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9937                         &cond1_dst_x, &inst->Src[3], &imm32);
9938 
9939    /* cond1 = offset == 0 */
9940    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9941                         &cond2_dst_x, &inst->Src[2], &zero);
9942 
9943    /* cond2 = cond1 & cond2 */
9944    emit_instruction_op2(emit, VGPU10_OPCODE_AND,
9945                         &cond2_dst_x, &cond2_src_x, &cond1_src_x);
9946 
9947    /* if */
9948    emit_if(emit, &cond2_src_x);
9949 
9950    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9951                         &inst->Src[1]);
9952 
9953    /* else */
9954    emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9955 
9956    /* BFI dst, width, offset, insert, base */
9957    begin_emit_instruction(emit);
9958    emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9959    emit_dst_register(emit, &inst->Dst[0]);
9960    emit_src_register(emit, &inst->Src[3]);
9961    emit_src_register(emit, &inst->Src[2]);
9962    emit_src_register(emit, &inst->Src[1]);
9963    emit_src_register(emit, &inst->Src[0]);
9964    end_emit_instruction(emit);
9965 
9966    /* endif */
9967    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9968 
9969    free_temp_indexes(emit);
9970    return true;
9971 }
9972 
9973 
9974 /**
9975  * We only special case the MOV instruction to try to detect constant
9976  * color writes in the fragment shader.
9977  */
9978 static bool
emit_mov(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9979 emit_mov(struct svga_shader_emitter_v10 *emit,
9980          const struct tgsi_full_instruction *inst)
9981 {
9982    const struct tgsi_full_src_register *src = &inst->Src[0];
9983    const struct tgsi_full_dst_register *dst = &inst->Dst[0];
9984 
9985    if (emit->unit == PIPE_SHADER_FRAGMENT &&
9986        dst->Register.File == TGSI_FILE_OUTPUT &&
9987        dst->Register.Index == 0 &&
9988        src->Register.File == TGSI_FILE_CONSTANT &&
9989        !src->Register.Indirect) {
9990       emit->constant_color_output = true;
9991    }
9992 
9993    return emit_simple(emit, inst);
9994 }
9995 
9996 
9997 /**
9998  * Emit a simple VGPU10 instruction which writes to multiple dest registers,
9999  * where TGSI only uses one dest register.
10000  */
10001 static bool
emit_simple_1dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned dst_count,unsigned dst_index)10002 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
10003                  const struct tgsi_full_instruction *inst,
10004                  unsigned dst_count,
10005                  unsigned dst_index)
10006 {
10007    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10008    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
10009    unsigned i;
10010 
10011    begin_emit_instruction(emit);
10012    emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
10013 
10014    for (i = 0; i < dst_count; i++) {
10015       if (i == dst_index) {
10016          emit_dst_register(emit, &inst->Dst[0]);
10017       } else {
10018          emit_null_dst_register(emit);
10019       }
10020    }
10021 
10022    for (i = 0; i < op->num_src; i++) {
10023       emit_src_register(emit, &inst->Src[i]);
10024    }
10025    end_emit_instruction(emit);
10026 
10027    return true;
10028 }
10029 
10030 
10031 /**
10032  * Emit a vmware specific VGPU10 instruction.
10033  */
10034 static bool
emit_vmware(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_VMWARE_OPCODE_TYPE subopcode)10035 emit_vmware(struct svga_shader_emitter_v10 *emit,
10036             const struct tgsi_full_instruction *inst,
10037             VGPU10_VMWARE_OPCODE_TYPE subopcode)
10038 {
10039    VGPU10OpcodeToken0 token0;
10040    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10041    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
10042    const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
10043    const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
10044    unsigned i;
10045    struct tgsi_full_src_register src[3];
10046 
10047    for (i = 0; i < op->num_src; i++) {
10048       if (dbl_src)
10049          src[i] = check_double_src(emit, &inst->Src[i]);
10050       else
10051          src[i] = inst->Src[i];
10052    }
10053 
10054    begin_emit_instruction(emit);
10055 
10056    assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
10057 
10058    token0.value = 0;
10059    token0.opcodeType = VGPU10_OPCODE_VMWARE;
10060    token0.vmwareOpcodeType = subopcode;
10061    emit_dword(emit, token0.value);
10062 
10063    if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
10064       /* IDIV only uses the first dest register. */
10065       emit_dst_register(emit, &inst->Dst[0]);
10066       emit_null_dst_register(emit);
10067    } else {
10068       for (i = 0; i < op->num_dst; i++) {
10069          if (dbl_dst) {
10070             check_double_dst_writemask(inst);
10071          }
10072          emit_dst_register(emit, &inst->Dst[i]);
10073       }
10074    }
10075 
10076    for (i = 0; i < op->num_src; i++) {
10077       emit_src_register(emit, &src[i]);
10078    }
10079    end_emit_instruction(emit);
10080 
10081    free_temp_indexes(emit);
10082    return true;
10083 }
10084 
10085 /**
10086  * Emit a memory register
10087  */
10088 
10089 typedef enum {
10090    MEM_STORE = 0,
10091    MEM_LOAD = 1,
10092    MEM_ATOMIC_COUNTER
10093 } memory_op;
10094 
10095 static void
emit_memory_register(struct svga_shader_emitter_v10 * emit,memory_op mem_op,const struct tgsi_full_instruction * inst,unsigned regIndex,unsigned writemask)10096 emit_memory_register(struct svga_shader_emitter_v10 *emit,
10097                      memory_op mem_op,
10098                      const struct tgsi_full_instruction *inst,
10099                      unsigned regIndex, unsigned writemask)
10100 {
10101    VGPU10OperandToken0 operand0;
10102    unsigned resIndex = 0;
10103 
10104    operand0.value = 0;
10105    operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
10106    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
10107    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
10108 
10109    switch (mem_op) {
10110    case MEM_ATOMIC_COUNTER:
10111    {
10112       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
10113       resIndex = inst->Src[regIndex].Register.Index;
10114       break;
10115    }
10116    case MEM_STORE:
10117    {
10118       const struct tgsi_full_dst_register *reg = &inst->Dst[regIndex];
10119 
10120       operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
10121       operand0.mask = writemask;
10122       resIndex = reg->Register.Index;
10123       break;
10124    }
10125    case MEM_LOAD:
10126    {
10127       const struct tgsi_full_src_register *reg = &inst->Src[regIndex];
10128 
10129       operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
10130       operand0.swizzleX = reg->Register.SwizzleX;
10131       operand0.swizzleY = reg->Register.SwizzleY;
10132       operand0.swizzleZ = reg->Register.SwizzleZ;
10133       operand0.swizzleW = reg->Register.SwizzleW;
10134       resIndex = reg->Register.Index;
10135       break;
10136    }
10137    default:
10138       assert(!"Unexpected memory opcode");
10139       break;
10140    }
10141 
10142    emit_dword(emit, operand0.value);
10143    emit_dword(emit, resIndex);
10144 }
10145 
10146 
10147 typedef enum {
10148    UAV_STORE = 0,
10149    UAV_LOAD = 1,
10150    UAV_ATOMIC = 2,
10151    UAV_RESQ = 3,
10152 } UAV_OP;
10153 
10154 
10155 /**
10156  * Emit a uav register
10157  * \param uav_index     index of resource register
10158  * \param uav_op        UAV_STORE/ UAV_LOAD/ UAV_ATOMIC depending on opcode
10159  * \param resourceType  resource file type
10160  * \param writemask     resource writemask
10161  */
10162 
10163 static void
emit_uav_register(struct svga_shader_emitter_v10 * emit,unsigned res_index,UAV_OP uav_op,enum tgsi_file_type resourceType,unsigned writemask)10164 emit_uav_register(struct svga_shader_emitter_v10 *emit,
10165                   unsigned res_index, UAV_OP uav_op,
10166                   enum tgsi_file_type resourceType, unsigned writemask)
10167 {
10168    VGPU10OperandToken0 operand0;
10169    unsigned uav_index = INVALID_INDEX;
10170 
10171    operand0.value = 0;
10172    operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
10173    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
10174    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
10175 
10176    switch (resourceType) {
10177    case TGSI_FILE_IMAGE:
10178       uav_index = emit->key.images[res_index].uav_index;
10179       break;
10180    case TGSI_FILE_BUFFER:
10181       uav_index = emit->key.shader_buf_uav_index[res_index];
10182       break;
10183    case TGSI_FILE_HW_ATOMIC:
10184       uav_index = emit->key.atomic_buf_uav_index[res_index];
10185       break;
10186    default:
10187       assert(0);
10188    }
10189 
10190    switch (uav_op) {
10191    case UAV_ATOMIC:
10192       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
10193       break;
10194 
10195    case UAV_STORE:
10196       operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
10197       operand0.mask = writemask;
10198       break;
10199 
10200    case UAV_LOAD:
10201    case UAV_RESQ:
10202       operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
10203       operand0.swizzleX = VGPU10_COMPONENT_X;
10204       operand0.swizzleY = VGPU10_COMPONENT_Y;
10205       operand0.swizzleZ = VGPU10_COMPONENT_Z;
10206       operand0.swizzleW = VGPU10_COMPONENT_W;
10207       break;
10208 
10209    default:
10210       break;
10211    }
10212 
10213    emit_dword(emit, operand0.value);
10214    emit_dword(emit, uav_index);
10215 }
10216 
10217 
10218 /**
10219  * A helper function to emit the uav address.
10220  * For memory, buffer, and image resource, it is set to the specified address.
10221  * For HW atomic counter, the address is the sum of the address offset and the
10222  * offset into the HW atomic buffer as specified by the register index.
10223  * It is also possible to specify the counter index as an indirect address.
10224  * And in this case, the uav address will be the sum of the address offset and the
10225  * counter index specified in the indirect address.
10226  */
10227 static
10228 struct tgsi_full_src_register
emit_uav_addr_offset(struct svga_shader_emitter_v10 * emit,enum tgsi_file_type resourceType,unsigned resourceIndex,unsigned resourceIndirect,unsigned resourceIndirectIndex,const struct tgsi_full_src_register * addr_reg)10229 emit_uav_addr_offset(struct svga_shader_emitter_v10 *emit,
10230                      enum tgsi_file_type resourceType,
10231                      unsigned resourceIndex,
10232                      unsigned resourceIndirect,
10233                      unsigned resourceIndirectIndex,
10234                      const struct tgsi_full_src_register *addr_reg)
10235 {
10236    unsigned addr_tmp;
10237    struct tgsi_full_dst_register addr_dst;
10238    struct tgsi_full_src_register addr_src;
10239    struct tgsi_full_src_register two = make_immediate_reg_int(emit, 2);
10240    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
10241 
10242    addr_tmp = get_temp_index(emit);
10243    addr_dst = make_dst_temp_reg(addr_tmp);
10244    addr_src = make_src_temp_reg(addr_tmp);
10245 
10246    /* specified address offset */
10247    if (addr_reg)
10248       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, addr_reg);
10249    else
10250       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, &zero);
10251 
10252    /* For HW atomic counter, we need to find the index to the
10253     * HW atomic buffer.
10254     */
10255    if (resourceType == TGSI_FILE_HW_ATOMIC) {
10256       if (resourceIndirect) {
10257 
10258          /**
10259           * uav addr offset  = counter layout offset +
10260           *                    counter indirect index address + address offset
10261           */
10262 
10263          /* counter layout offset */
10264          struct tgsi_full_src_register layout_offset;
10265          layout_offset =
10266             make_immediate_reg_int(emit, resourceIndex);
10267 
10268          /* counter layout offset + address offset */
10269          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
10270                               &addr_src, &layout_offset);
10271 
10272          /* counter indirect index address */
10273          unsigned indirect_addr =
10274             emit->address_reg_index[resourceIndirectIndex];
10275 
10276          struct tgsi_full_src_register indirect_addr_src =
10277             make_src_temp_reg(indirect_addr);
10278 
10279          indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10280 
10281          /* counter layout offset + address offset + counter indirect address */
10282          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
10283                               &addr_src, &indirect_addr_src);
10284 
10285       } else {
10286          struct tgsi_full_src_register index_src;
10287 
10288          index_src = make_immediate_reg_int(emit, resourceIndex);
10289 
10290          /* uav addr offset  = counter index address + address offset */
10291          emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &addr_dst,
10292                               &addr_src, &index_src);
10293       }
10294 
10295       /* HW atomic buffer is declared as raw buffer, so the buffer address is
10296        * the byte offset, so we need to multiple the counter addr offset by 4.
10297        */
10298       emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &addr_dst,
10299                            &addr_src, &two);
10300    }
10301    else if (resourceType == TGSI_FILE_IMAGE) {
10302       if ((emit->key.images[resourceIndex].resource_target == PIPE_TEXTURE_3D)
10303              && emit->key.images[resourceIndex].is_single_layer) {
10304 
10305          struct tgsi_full_dst_register addr_dst_z =
10306             writemask_dst(&addr_dst, TGSI_WRITEMASK_Z);
10307 
10308          /* For non-layered 3D texture image view, we have to make sure the z
10309           * component of the address offset is set to 0.
10310           */
10311          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst_z,
10312                               &zero);
10313       }
10314    }
10315 
10316    return addr_src;
10317 }
10318 
10319 
10320 
10321 /**
10322  * A helper function to expand indirect indexing to uav resource
10323  * by looping through the resource array, compare the indirect index and
10324  * emit the instruction for each resource in the array.
10325  */
10326 static void
loop_instruction(unsigned index,unsigned count,struct tgsi_full_src_register * addr_index,void (* fb)(struct svga_shader_emitter_v10 *,const struct tgsi_full_instruction *,unsigned),struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10327 loop_instruction(unsigned index, unsigned count,
10328                  struct tgsi_full_src_register *addr_index,
10329                  void (*fb)(struct svga_shader_emitter_v10 *,
10330                             const struct tgsi_full_instruction *, unsigned),
10331                  struct svga_shader_emitter_v10 *emit,
10332                  const struct tgsi_full_instruction *inst)
10333 {
10334    if (count == 0)
10335       return;
10336 
10337    if (index > 0) {
10338       /* ELSE */
10339       emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
10340    }
10341 
10342    struct tgsi_full_src_register index_src =
10343                                     make_immediate_reg_int(emit, index);
10344 
10345    unsigned tmp_index = get_temp_index(emit);
10346    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
10347    struct tgsi_full_src_register tmp_src_x =
10348                 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10349    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
10350 
10351    /* IEQ tmp, addr_tmp_index, index */
10352    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, &tmp_dst,
10353                         addr_index, &index_src);
10354 
10355    /* IF tmp */
10356    emit_if(emit, &tmp_src_x);
10357 
10358    free_temp_indexes(emit);
10359 
10360    (*fb)(emit, inst, index);
10361 
10362    loop_instruction(index+1, count-1, addr_index, fb, emit, inst);
10363 
10364    /* ENDIF */
10365    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
10366 }
10367 
10368 
10369 /**
10370  * A helper function to emit the load instruction.
10371  */
10372 static void
emit_load_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10373 emit_load_instruction(struct svga_shader_emitter_v10 *emit,
10374                       const struct tgsi_full_instruction *inst,
10375                       unsigned resourceIndex)
10376 {
10377    VGPU10OpcodeToken0 token0;
10378    struct tgsi_full_src_register addr_src;
10379    enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10380 
10381    /* Resolve the resource address for this resource first */
10382    addr_src = emit_uav_addr_offset(emit, resourceType, resourceIndex,
10383                                    inst->Src[0].Register.Indirect,
10384                                    inst->Src[0].Indirect.Index,
10385                                    &inst->Src[1]);
10386 
10387    /* LOAD resource, address, src */
10388    begin_emit_instruction(emit);
10389 
10390    token0.value = 0;
10391 
10392    if (resourceType == TGSI_FILE_MEMORY ||
10393        resourceType == TGSI_FILE_BUFFER ||
10394        resourceType == TGSI_FILE_HW_ATOMIC) {
10395       token0.opcodeType = VGPU10_OPCODE_LD_RAW;
10396       addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10397    }
10398    else {
10399       token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
10400    }
10401 
10402    token0.saturate = inst->Instruction.Saturate,
10403    emit_dword(emit, token0.value);
10404 
10405    emit_dst_register(emit, &inst->Dst[0]);
10406    emit_src_register(emit, &addr_src);
10407 
10408    if (resourceType == TGSI_FILE_MEMORY) {
10409       emit_memory_register(emit, MEM_LOAD, inst, 0, 0);
10410    } else if (resourceType == TGSI_FILE_HW_ATOMIC) {
10411       emit_uav_register(emit, inst->Src[0].Dimension.Index,
10412                         UAV_LOAD, inst->Src[0].Register.File, 0);
10413    } else if (resourceType == TGSI_FILE_BUFFER) {
10414       if (emit->raw_shaderbufs & (1 << resourceIndex))
10415          emit_resource_register(emit, resourceIndex +
10416                                       emit->raw_shaderbuf_srv_start_index);
10417       else
10418          emit_uav_register(emit, resourceIndex,
10419                            UAV_LOAD, inst->Src[0].Register.File, 0);
10420    } else {
10421       emit_uav_register(emit, resourceIndex,
10422                         UAV_LOAD, inst->Src[0].Register.File, 0);
10423    }
10424 
10425    end_emit_instruction(emit);
10426 
10427    free_temp_indexes(emit);
10428 }
10429 
10430 
10431 /**
10432  * Emit uav / memory load instruction
10433  */
10434 static bool
emit_load(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10435 emit_load(struct svga_shader_emitter_v10 *emit,
10436           const struct tgsi_full_instruction *inst)
10437 {
10438    enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10439    unsigned resourceIndex = inst->Src[0].Register.Index;
10440 
10441    /* If the resource register has indirect index, we will need
10442     * to expand it since SM5 device does not support indirect indexing
10443     * for uav.
10444     */
10445    if (inst->Src[0].Register.Indirect &&
10446        (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10447 
10448       unsigned indirect_index = inst->Src[0].Indirect.Index;
10449       unsigned num_resources =
10450          resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10451                                             emit->num_images;
10452 
10453       /* indirect index tmp register */
10454       unsigned indirect_addr = emit->address_reg_index[indirect_index];
10455       struct tgsi_full_src_register indirect_addr_src =
10456          make_src_temp_reg(indirect_addr);
10457       indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10458 
10459       /* Add offset to the indirect index */
10460       if (inst->Src[0].Register.Index != 0) {
10461          struct tgsi_full_src_register offset =
10462             make_immediate_reg_int(emit, inst->Src[0].Register.Index);
10463          struct tgsi_full_dst_register indirect_addr_dst =
10464             make_dst_temp_reg(indirect_addr);
10465          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
10466                               &indirect_addr_src, &offset);
10467       }
10468 
10469       /* Loop through the resource array to find which resource to use.
10470        */
10471       loop_instruction(0, num_resources, &indirect_addr_src,
10472                        emit_load_instruction, emit, inst);
10473    }
10474    else {
10475       emit_load_instruction(emit, inst, resourceIndex);
10476    }
10477 
10478    free_temp_indexes(emit);
10479 
10480    return true;
10481 }
10482 
10483 
10484 /**
10485  * A helper function to emit a store instruction.
10486  */
10487 static void
emit_store_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10488 emit_store_instruction(struct svga_shader_emitter_v10 *emit,
10489                        const struct tgsi_full_instruction *inst,
10490                        unsigned resourceIndex)
10491 {
10492    VGPU10OpcodeToken0 token0;
10493    enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
10494    unsigned writemask = inst->Dst[0].Register.WriteMask;
10495    struct tgsi_full_src_register addr_src;
10496 
10497    unsigned tmp_index = get_temp_index(emit);
10498    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
10499    struct tgsi_full_dst_register tmp_dst_xyzw = make_dst_temp_reg(tmp_index);
10500    struct tgsi_full_dst_register tmp_dst;
10501 
10502    struct tgsi_full_src_register src = inst->Src[1];
10503    struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
10504 
10505    bool needLoad = false;
10506    bool needPerComponentStore = false;
10507    unsigned swizzles = 0;
10508 
10509    /* Resolve the resource address for this resource first */
10510    addr_src = emit_uav_addr_offset(emit, resourceType,
10511                                    inst->Dst[0].Register.Index,
10512                                    inst->Dst[0].Register.Indirect,
10513                                    inst->Dst[0].Indirect.Index,
10514                                    &inst->Src[0]);
10515 
10516    /* First check the writemask to see if it can be supported
10517     * by the store instruction.
10518     * store_raw only allows .x, .xy, .xyz, .xyzw. For the typeless memory,
10519     * we can adjust the address offset, and do a per-component store.
10520     * store_uav_typed only allows .xyzw. In this case, we need to
10521     * do a load first, update the temporary and then issue the
10522     * store. This does have a small risk that if different threads
10523     * update different components of the same address, data might not be
10524     * in sync.
10525     */
10526    if (resourceType == TGSI_FILE_IMAGE) {
10527       needLoad = (writemask == TGSI_WRITEMASK_XYZW) ? false : true;
10528    }
10529    else if (resourceType == TGSI_FILE_BUFFER ||
10530             resourceType == TGSI_FILE_MEMORY) {
10531       if (!(writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_XY ||
10532             writemask == TGSI_WRITEMASK_XYZ ||
10533             writemask == TGSI_WRITEMASK_XYZW)) {
10534          needPerComponentStore = true;
10535       }
10536    }
10537 
10538    if (needLoad) {
10539       assert(resourceType == TGSI_FILE_IMAGE);
10540 
10541       /* LOAD resource, address, src */
10542       begin_emit_instruction(emit);
10543 
10544       token0.value = 0;
10545       token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
10546       token0.saturate = inst->Instruction.Saturate,
10547       emit_dword(emit, token0.value);
10548 
10549       emit_dst_register(emit, &tmp_dst_xyzw);
10550       emit_src_register(emit, &addr_src);
10551       emit_uav_register(emit, resourceIndex, UAV_LOAD, resourceType, 0);
10552 
10553       end_emit_instruction(emit);
10554 
10555       /* MOV tmp(writemask) src */
10556       tmp_dst = writemask_dst(&tmp_dst_xyzw, writemask);
10557       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, &inst->Src[1]);
10558 
10559       /* Now set the writemask to xyzw for the store_uav_typed instruction */
10560       writemask = TGSI_WRITEMASK_XYZW;
10561    }
10562    else if (needPerComponentStore) {
10563       /* Save the src swizzles */
10564       swizzles = src.Register.SwizzleX |
10565                  src.Register.SwizzleY << 2 |
10566                  src.Register.SwizzleZ << 4 |
10567                  src.Register.SwizzleW << 6;
10568    }
10569 
10570    bool storeDone = false;
10571    unsigned perComponentWritemask = writemask;
10572    unsigned shift = 0;
10573    struct tgsi_full_src_register shift_src;
10574 
10575    while (!storeDone) {
10576 
10577       if (needPerComponentStore) {
10578          assert(perComponentWritemask);
10579          while (!(perComponentWritemask & TGSI_WRITEMASK_X)) {
10580             shift++;
10581             perComponentWritemask >>= 1;
10582          }
10583 
10584          /* First adjust the addr_src to the next component */
10585          if (shift != 0) {
10586             struct tgsi_full_dst_register addr_dst =
10587                make_dst_temp_reg(addr_src.Register.Index);
10588             shift_src = make_immediate_reg_int(emit, shift);
10589             emit_instruction_op3(emit, VGPU10_OPCODE_UMAD, &addr_dst, &four,
10590                                  &shift_src, &addr_src);
10591 
10592             /* Adjust the src swizzle as well */
10593             swizzles >>= (shift * 2);
10594          }
10595 
10596          /* Now the address offset is set to the next component,
10597           * we can set the writemask to .x and make sure to set
10598           * the src swizzle as well.
10599           */
10600          src.Register.SwizzleX = swizzles & 0x3;
10601          writemask = TGSI_WRITEMASK_X;
10602 
10603          /* Shift for the next component check */
10604          perComponentWritemask >>= 1;
10605          shift = 1;
10606       }
10607 
10608       /* STORE resource, address, src */
10609       begin_emit_instruction(emit);
10610 
10611       token0.value = 0;
10612       token0.saturate = inst->Instruction.Saturate;
10613 
10614       if (resourceType == TGSI_FILE_MEMORY) {
10615          token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
10616          addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10617          emit_dword(emit, token0.value);
10618          emit_memory_register(emit, MEM_STORE, inst, 0, writemask);
10619       }
10620       else if (resourceType == TGSI_FILE_BUFFER ||
10621                resourceType == TGSI_FILE_HW_ATOMIC) {
10622          token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
10623          addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10624          emit_dword(emit, token0.value);
10625          emit_uav_register(emit, resourceIndex, UAV_STORE,
10626                            resourceType, writemask);
10627       }
10628       else {
10629          token0.opcodeType = VGPU10_OPCODE_STORE_UAV_TYPED;
10630          emit_dword(emit, token0.value);
10631          emit_uav_register(emit, resourceIndex, UAV_STORE,
10632                            resourceType, writemask);
10633       }
10634 
10635       emit_src_register(emit, &addr_src);
10636 
10637       if (needLoad)
10638          emit_src_register(emit, &tmp_src);
10639       else
10640          emit_src_register(emit, &src);
10641 
10642       end_emit_instruction(emit);
10643 
10644       if (!needPerComponentStore || !perComponentWritemask)
10645          storeDone = true;
10646    }
10647 
10648    free_temp_indexes(emit);
10649 }
10650 
10651 
10652 /**
10653  * Emit uav / memory store instruction
10654  */
10655 static bool
emit_store(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10656 emit_store(struct svga_shader_emitter_v10 *emit,
10657            const struct tgsi_full_instruction *inst)
10658 {
10659    enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
10660    unsigned resourceIndex = inst->Dst[0].Register.Index;
10661 
10662    /* If the resource register has indirect index, we will need
10663     * to expand it since SM5 device does not support indirect indexing
10664     * for uav.
10665     */
10666    if (inst->Dst[0].Register.Indirect &&
10667        (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10668 
10669       unsigned indirect_index = inst->Dst[0].Indirect.Index;
10670       unsigned num_resources =
10671          resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10672                                             emit->num_images;
10673 
10674       /* Indirect index tmp register */
10675       unsigned indirect_addr = emit->address_reg_index[indirect_index];
10676       struct tgsi_full_src_register indirect_addr_src =
10677          make_src_temp_reg(indirect_addr);
10678       indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10679 
10680       /* Add offset to the indirect index */
10681       if (inst->Dst[0].Register.Index != 0) {
10682          struct tgsi_full_src_register offset =
10683             make_immediate_reg_int(emit, inst->Dst[0].Register.Index);
10684          struct tgsi_full_dst_register indirect_addr_dst =
10685             make_dst_temp_reg(indirect_addr);
10686          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
10687                               &indirect_addr_src, &offset);
10688       }
10689 
10690       /* Loop through the resource array to find which resource to use.
10691        */
10692       loop_instruction(0, num_resources, &indirect_addr_src,
10693                        emit_store_instruction, emit, inst);
10694    }
10695    else {
10696       emit_store_instruction(emit, inst, resourceIndex);
10697    }
10698 
10699    free_temp_indexes(emit);
10700 
10701    return true;
10702 }
10703 
10704 
10705 /**
10706  * A helper function to emit an atomic instruction.
10707  */
10708 
10709 static void
emit_atomic_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10710 emit_atomic_instruction(struct svga_shader_emitter_v10 *emit,
10711                         const struct tgsi_full_instruction *inst,
10712                         unsigned resourceIndex)
10713 {
10714    VGPU10OpcodeToken0 token0;
10715    enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10716    struct tgsi_full_src_register addr_src;
10717    VGPU10_OPCODE_TYPE opcode = emit->cur_atomic_opcode;
10718    const struct tgsi_full_src_register *offset;
10719 
10720    /* ntt does not specify offset for HWATOMIC. So just set offset to NULL. */
10721    offset = resourceType == TGSI_FILE_HW_ATOMIC ? NULL : &inst->Src[1];
10722 
10723    /* Resolve the resource address */
10724    addr_src = emit_uav_addr_offset(emit, resourceType,
10725                                    inst->Src[0].Register.Index,
10726                                    inst->Src[0].Register.Indirect,
10727                                    inst->Src[0].Indirect.Index,
10728                                    offset);
10729 
10730    /* Emit the atomic operation */
10731    begin_emit_instruction(emit);
10732 
10733    token0.value = 0;
10734    token0.opcodeType = opcode;
10735    token0.saturate = inst->Instruction.Saturate,
10736    emit_dword(emit, token0.value);
10737 
10738    emit_dst_register(emit, &inst->Dst[0]);
10739 
10740    if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
10741       emit_memory_register(emit, MEM_ATOMIC_COUNTER, inst, 0, 0);
10742    } else if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) {
10743       assert(inst->Src[0].Register.Dimension == 1);
10744       emit_uav_register(emit, inst->Src[0].Dimension.Index,
10745                         UAV_ATOMIC, inst->Src[0].Register.File, 0);
10746    } else {
10747       emit_uav_register(emit, resourceIndex,
10748                         UAV_ATOMIC, inst->Src[0].Register.File, 0);
10749    }
10750 
10751    /* resource address offset */
10752    emit_src_register(emit, &addr_src);
10753 
10754    struct tgsi_full_src_register src0_x =
10755          swizzle_src(&inst->Src[2], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
10756                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
10757    emit_src_register(emit, &src0_x);
10758 
10759    if (opcode == VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH) {
10760       struct tgsi_full_src_register src1_x =
10761          swizzle_src(&inst->Src[3], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
10762                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
10763 
10764       emit_src_register(emit, &src1_x);
10765    }
10766 
10767    end_emit_instruction(emit);
10768 
10769    free_temp_indexes(emit);
10770 }
10771 
10772 
10773 /**
10774  * Emit atomic instruction
10775  */
10776 static bool
emit_atomic(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_OPCODE_TYPE opcode)10777 emit_atomic(struct svga_shader_emitter_v10 *emit,
10778             const struct tgsi_full_instruction *inst,
10779             VGPU10_OPCODE_TYPE opcode)
10780 {
10781    enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10782    unsigned resourceIndex = inst->Src[0].Register.Index;
10783 
10784    emit->cur_atomic_opcode = opcode;
10785 
10786    /* If the resource register has indirect index, we will need
10787     * to expand it since SM5 device does not support indirect indexing
10788     * for uav.
10789     */
10790    if (inst->Dst[0].Register.Indirect &&
10791        (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10792 
10793       unsigned indirect_index = inst->Dst[0].Indirect.Index;
10794       unsigned num_resources =
10795          resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10796                                             emit->num_images;
10797 
10798       /* indirect index tmp register */
10799       unsigned indirect_addr = emit->address_reg_index[indirect_index];
10800       struct tgsi_full_src_register indirect_addr_src =
10801          make_src_temp_reg(indirect_addr);
10802       indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10803 
10804       /* Loop through the resource array to find which resource to use.
10805        */
10806       loop_instruction(0, num_resources, &indirect_addr_src,
10807                        emit_atomic_instruction, emit, inst);
10808    }
10809    else {
10810       emit_atomic_instruction(emit, inst, resourceIndex);
10811    }
10812 
10813    free_temp_indexes(emit);
10814 
10815    return true;
10816 }
10817 
10818 
10819 /**
10820  * Emit barrier instruction
10821  */
10822 static bool
emit_barrier(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10823 emit_barrier(struct svga_shader_emitter_v10 *emit,
10824              const struct tgsi_full_instruction *inst)
10825 {
10826    VGPU10OpcodeToken0 token0;
10827 
10828    assert(emit->version >= 50);
10829 
10830    token0.value = 0;
10831    token0.opcodeType = VGPU10_OPCODE_SYNC;
10832 
10833    if (emit->unit == PIPE_SHADER_TESS_CTRL && emit->version == 50) {
10834       /* SM5 device doesn't support BARRIER in tcs . If barrier is used
10835        * in shader, don't do anything for this opcode and continue rest
10836        * of shader translation
10837        */
10838       util_debug_message(&emit->svga_debug_callback, INFO,
10839                          "barrier instruction is not supported in tessellation control shader\n");
10840       return true;
10841    }
10842    else if (emit->unit == PIPE_SHADER_COMPUTE) {
10843       if (emit->cs.shared_memory_declared)
10844          token0.syncThreadGroupShared = 1;
10845 
10846       if (emit->uav_declared)
10847          token0.syncUAVMemoryGroup = 1;
10848 
10849       token0.syncThreadsInGroup = 1;
10850    } else {
10851       token0.syncUAVMemoryGlobal = 1;
10852    }
10853 
10854    assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
10855           token0.syncThreadGroupShared);
10856 
10857    begin_emit_instruction(emit);
10858    emit_dword(emit, token0.value);
10859    end_emit_instruction(emit);
10860 
10861    return true;
10862 }
10863 
10864 /**
10865  * Emit memory barrier instruction
10866  */
10867 static bool
emit_memory_barrier(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10868 emit_memory_barrier(struct svga_shader_emitter_v10 *emit,
10869                     const struct tgsi_full_instruction *inst)
10870 {
10871    unsigned index = inst->Src[0].Register.Index;
10872    unsigned swizzle = inst->Src[0].Register.SwizzleX;
10873    unsigned bartype = emit->immediates[index][swizzle].Int;
10874    VGPU10OpcodeToken0 token0;
10875 
10876    token0.value = 0;
10877    token0.opcodeType = VGPU10_OPCODE_SYNC;
10878 
10879    if (emit->unit == PIPE_SHADER_COMPUTE) {
10880 
10881       /* For compute shader, issue sync opcode with different options
10882        * depending on the memory barrier type.
10883        *
10884        * Bit 0: Shader storage buffers
10885        * Bit 1: Atomic buffers
10886        * Bit 2: Images
10887        * Bit 3: Shared memory
10888        * Bit 4: Thread group
10889        */
10890 
10891       if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
10892                      TGSI_MEMBAR_SHADER_IMAGE))
10893          token0.syncUAVMemoryGlobal = 1;
10894       else if (bartype & TGSI_MEMBAR_THREAD_GROUP)
10895          token0.syncUAVMemoryGroup = 1;
10896 
10897       if (bartype & TGSI_MEMBAR_SHARED)
10898          token0.syncThreadGroupShared = 1;
10899    }
10900    else {
10901       /**
10902        * For graphics stages, only sync_uglobal is available.
10903        */
10904       if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
10905                      TGSI_MEMBAR_SHADER_IMAGE))
10906          token0.syncUAVMemoryGlobal = 1;
10907    }
10908 
10909    assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
10910           token0.syncThreadGroupShared);
10911 
10912    begin_emit_instruction(emit);
10913    emit_dword(emit, token0.value);
10914    end_emit_instruction(emit);
10915 
10916    return true;
10917 }
10918 
10919 
10920 /**
10921  * Emit code for TGSI_OPCODE_RESQ (image size) instruction.
10922  */
10923 static bool
emit_resq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10924 emit_resq(struct svga_shader_emitter_v10 *emit,
10925           const struct tgsi_full_instruction *inst)
10926 {
10927    struct tgsi_full_src_register zero =
10928       make_immediate_reg_int(emit, 0);
10929 
10930    unsigned uav_resource = emit->image[inst->Src[0].Register.Index].Resource;
10931 
10932    if (uav_resource == TGSI_TEXTURE_CUBE_ARRAY) {
10933       struct tgsi_full_src_register image_src;
10934 
10935       image_src = make_src_const_reg(emit->image_size_index + inst->Src[0].Register.Index);
10936 
10937       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &image_src);
10938       return true;
10939    }
10940 
10941    begin_emit_instruction(emit);
10942    if (uav_resource == TGSI_TEXTURE_BUFFER) {
10943       emit_opcode(emit, VGPU10_OPCODE_BUFINFO, false);
10944       emit_dst_register(emit, &inst->Dst[0]);
10945    }
10946    else {
10947       emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
10948       emit_dst_register(emit, &inst->Dst[0]);
10949       emit_src_register(emit, &zero);
10950    }
10951    emit_uav_register(emit, inst->Src[0].Register.Index,
10952                      UAV_RESQ, inst->Src[0].Register.File, 0);
10953    end_emit_instruction(emit);
10954 
10955    return true;
10956 }
10957 
10958 
10959 static bool
emit_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)10960 emit_instruction(struct svga_shader_emitter_v10 *emit,
10961                  unsigned inst_number,
10962                  const struct tgsi_full_instruction *inst)
10963 {
10964    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10965 
10966    switch (opcode) {
10967    case TGSI_OPCODE_ADD:
10968    case TGSI_OPCODE_AND:
10969    case TGSI_OPCODE_BGNLOOP:
10970    case TGSI_OPCODE_BRK:
10971    case TGSI_OPCODE_CEIL:
10972    case TGSI_OPCODE_CONT:
10973    case TGSI_OPCODE_DDX:
10974    case TGSI_OPCODE_DDY:
10975    case TGSI_OPCODE_DIV:
10976    case TGSI_OPCODE_DP2:
10977    case TGSI_OPCODE_DP3:
10978    case TGSI_OPCODE_DP4:
10979    case TGSI_OPCODE_ELSE:
10980    case TGSI_OPCODE_ENDIF:
10981    case TGSI_OPCODE_ENDLOOP:
10982    case TGSI_OPCODE_ENDSUB:
10983    case TGSI_OPCODE_F2I:
10984    case TGSI_OPCODE_F2U:
10985    case TGSI_OPCODE_FLR:
10986    case TGSI_OPCODE_FRC:
10987    case TGSI_OPCODE_FSEQ:
10988    case TGSI_OPCODE_FSGE:
10989    case TGSI_OPCODE_FSLT:
10990    case TGSI_OPCODE_FSNE:
10991    case TGSI_OPCODE_I2F:
10992    case TGSI_OPCODE_IMAX:
10993    case TGSI_OPCODE_IMIN:
10994    case TGSI_OPCODE_INEG:
10995    case TGSI_OPCODE_ISGE:
10996    case TGSI_OPCODE_ISHR:
10997    case TGSI_OPCODE_ISLT:
10998    case TGSI_OPCODE_MAD:
10999    case TGSI_OPCODE_MAX:
11000    case TGSI_OPCODE_MIN:
11001    case TGSI_OPCODE_MUL:
11002    case TGSI_OPCODE_NOP:
11003    case TGSI_OPCODE_NOT:
11004    case TGSI_OPCODE_OR:
11005    case TGSI_OPCODE_UADD:
11006    case TGSI_OPCODE_USEQ:
11007    case TGSI_OPCODE_USGE:
11008    case TGSI_OPCODE_USLT:
11009    case TGSI_OPCODE_UMIN:
11010    case TGSI_OPCODE_UMAD:
11011    case TGSI_OPCODE_UMAX:
11012    case TGSI_OPCODE_ROUND:
11013    case TGSI_OPCODE_SQRT:
11014    case TGSI_OPCODE_SHL:
11015    case TGSI_OPCODE_TRUNC:
11016    case TGSI_OPCODE_U2F:
11017    case TGSI_OPCODE_UCMP:
11018    case TGSI_OPCODE_USHR:
11019    case TGSI_OPCODE_USNE:
11020    case TGSI_OPCODE_XOR:
11021    /* Begin SM5 opcodes */
11022    case TGSI_OPCODE_F2D:
11023    case TGSI_OPCODE_D2F:
11024    case TGSI_OPCODE_DADD:
11025    case TGSI_OPCODE_DMUL:
11026    case TGSI_OPCODE_DMAX:
11027    case TGSI_OPCODE_DMIN:
11028    case TGSI_OPCODE_DSGE:
11029    case TGSI_OPCODE_DSLT:
11030    case TGSI_OPCODE_DSEQ:
11031    case TGSI_OPCODE_DSNE:
11032    case TGSI_OPCODE_BREV:
11033    case TGSI_OPCODE_POPC:
11034    case TGSI_OPCODE_LSB:
11035    case TGSI_OPCODE_INTERP_CENTROID:
11036    case TGSI_OPCODE_INTERP_SAMPLE:
11037       /* simple instructions */
11038       return emit_simple(emit, inst);
11039    case TGSI_OPCODE_RET:
11040       if (emit->unit == PIPE_SHADER_TESS_CTRL &&
11041           !emit->tcs.control_point_phase) {
11042 
11043          /* store the tessellation levels in the patch constant phase only */
11044          store_tesslevels(emit);
11045       }
11046       return emit_simple(emit, inst);
11047 
11048    case TGSI_OPCODE_IMSB:
11049    case TGSI_OPCODE_UMSB:
11050       return emit_msb(emit, inst);
11051    case TGSI_OPCODE_IBFE:
11052    case TGSI_OPCODE_UBFE:
11053       return emit_bfe(emit, inst);
11054    case TGSI_OPCODE_BFI:
11055       return emit_bfi(emit, inst);
11056    case TGSI_OPCODE_MOV:
11057       return emit_mov(emit, inst);
11058    case TGSI_OPCODE_EMIT:
11059       return emit_vertex(emit, inst);
11060    case TGSI_OPCODE_ENDPRIM:
11061       return emit_endprim(emit, inst);
11062    case TGSI_OPCODE_IABS:
11063       return emit_iabs(emit, inst);
11064    case TGSI_OPCODE_ARL:
11065       FALLTHROUGH;
11066    case TGSI_OPCODE_UARL:
11067       return emit_arl_uarl(emit, inst);
11068    case TGSI_OPCODE_BGNSUB:
11069       /* no-op */
11070       return true;
11071    case TGSI_OPCODE_CAL:
11072       return emit_cal(emit, inst);
11073    case TGSI_OPCODE_CMP:
11074       return emit_cmp(emit, inst);
11075    case TGSI_OPCODE_COS:
11076       return emit_sincos(emit, inst);
11077    case TGSI_OPCODE_DST:
11078       return emit_dst(emit, inst);
11079    case TGSI_OPCODE_EX2:
11080       return emit_ex2(emit, inst);
11081    case TGSI_OPCODE_EXP:
11082       return emit_exp(emit, inst);
11083    case TGSI_OPCODE_IF:
11084       return emit_if(emit, &inst->Src[0]);
11085    case TGSI_OPCODE_KILL:
11086       return emit_discard(emit, inst);
11087    case TGSI_OPCODE_KILL_IF:
11088       return emit_cond_discard(emit, inst);
11089    case TGSI_OPCODE_LG2:
11090       return emit_lg2(emit, inst);
11091    case TGSI_OPCODE_LIT:
11092       return emit_lit(emit, inst);
11093    case TGSI_OPCODE_LODQ:
11094       return emit_lodq(emit, inst);
11095    case TGSI_OPCODE_LOG:
11096       return emit_log(emit, inst);
11097    case TGSI_OPCODE_LRP:
11098       return emit_lrp(emit, inst);
11099    case TGSI_OPCODE_POW:
11100       return emit_pow(emit, inst);
11101    case TGSI_OPCODE_RCP:
11102       return emit_rcp(emit, inst);
11103    case TGSI_OPCODE_RSQ:
11104       return emit_rsq(emit, inst);
11105    case TGSI_OPCODE_SAMPLE:
11106       return emit_sample(emit, inst);
11107    case TGSI_OPCODE_SEQ:
11108       return emit_seq(emit, inst);
11109    case TGSI_OPCODE_SGE:
11110       return emit_sge(emit, inst);
11111    case TGSI_OPCODE_SGT:
11112       return emit_sgt(emit, inst);
11113    case TGSI_OPCODE_SIN:
11114       return emit_sincos(emit, inst);
11115    case TGSI_OPCODE_SLE:
11116       return emit_sle(emit, inst);
11117    case TGSI_OPCODE_SLT:
11118       return emit_slt(emit, inst);
11119    case TGSI_OPCODE_SNE:
11120       return emit_sne(emit, inst);
11121    case TGSI_OPCODE_SSG:
11122       return emit_ssg(emit, inst);
11123    case TGSI_OPCODE_ISSG:
11124       return emit_issg(emit, inst);
11125    case TGSI_OPCODE_TEX:
11126       return emit_tex(emit, inst);
11127    case TGSI_OPCODE_TG4:
11128       return emit_tg4(emit, inst);
11129    case TGSI_OPCODE_TEX2:
11130       return emit_tex2(emit, inst);
11131    case TGSI_OPCODE_TXP:
11132       return emit_txp(emit, inst);
11133    case TGSI_OPCODE_TXB:
11134    case TGSI_OPCODE_TXB2:
11135    case TGSI_OPCODE_TXL:
11136       return emit_txl_txb(emit, inst);
11137    case TGSI_OPCODE_TXD:
11138       return emit_txd(emit, inst);
11139    case TGSI_OPCODE_TXF:
11140       return emit_txf(emit, inst);
11141    case TGSI_OPCODE_TXL2:
11142       return emit_txl2(emit, inst);
11143    case TGSI_OPCODE_TXQ:
11144       return emit_txq(emit, inst);
11145    case TGSI_OPCODE_UIF:
11146       return emit_if(emit, &inst->Src[0]);
11147    case TGSI_OPCODE_UMUL_HI:
11148    case TGSI_OPCODE_IMUL_HI:
11149    case TGSI_OPCODE_UDIV:
11150       /* These cases use only the FIRST of two destination registers */
11151       return emit_simple_1dst(emit, inst, 2, 0);
11152    case TGSI_OPCODE_IDIV:
11153       return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
11154    case TGSI_OPCODE_UMUL:
11155    case TGSI_OPCODE_UMOD:
11156    case TGSI_OPCODE_MOD:
11157       /* These cases use only the SECOND of two destination registers */
11158       return emit_simple_1dst(emit, inst, 2, 1);
11159 
11160    /* Begin SM5 opcodes */
11161    case TGSI_OPCODE_DABS:
11162       return emit_dabs(emit, inst);
11163    case TGSI_OPCODE_DNEG:
11164       return emit_dneg(emit, inst);
11165    case TGSI_OPCODE_DRCP:
11166       return emit_simple(emit, inst);
11167    case TGSI_OPCODE_DSQRT:
11168       return emit_dsqrt(emit, inst);
11169    case TGSI_OPCODE_DMAD:
11170       return emit_dmad(emit, inst);
11171    case TGSI_OPCODE_DFRAC:
11172       return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
11173    case TGSI_OPCODE_D2I:
11174    case TGSI_OPCODE_D2U:
11175       return emit_simple(emit, inst);
11176    case TGSI_OPCODE_I2D:
11177    case TGSI_OPCODE_U2D:
11178       return emit_simple(emit, inst);
11179    case TGSI_OPCODE_DRSQ:
11180       return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
11181    case TGSI_OPCODE_DDIV:
11182       return emit_simple(emit, inst);
11183    case TGSI_OPCODE_INTERP_OFFSET:
11184       return emit_interp_offset(emit, inst);
11185    case TGSI_OPCODE_FMA:
11186    case TGSI_OPCODE_DFMA:
11187       return emit_simple(emit, inst);
11188 
11189    case TGSI_OPCODE_DTRUNC:
11190       return emit_dtrunc(emit, inst);
11191 
11192    /* The following opcodes should never be seen here.  We return zero
11193     * for PIPE_CAP_TGSI_DROUND_SUPPORTED.
11194     */
11195    case TGSI_OPCODE_LDEXP:
11196    case TGSI_OPCODE_DSSG:
11197    case TGSI_OPCODE_DLDEXP:
11198    case TGSI_OPCODE_DCEIL:
11199    case TGSI_OPCODE_DFLR:
11200       debug_printf("Unexpected TGSI opcode %s.  "
11201                    "Should have been translated away by the GLSL compiler.\n",
11202                    tgsi_get_opcode_name(opcode));
11203       return false;
11204 
11205    case TGSI_OPCODE_LOAD:
11206       return emit_load(emit, inst);
11207 
11208    case TGSI_OPCODE_STORE:
11209       return emit_store(emit, inst);
11210 
11211    case TGSI_OPCODE_ATOMAND:
11212       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_AND);
11213 
11214    case TGSI_OPCODE_ATOMCAS:
11215       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
11216 
11217    case TGSI_OPCODE_ATOMIMAX:
11218       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMAX);
11219 
11220    case TGSI_OPCODE_ATOMIMIN:
11221       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMIN);
11222 
11223    case TGSI_OPCODE_ATOMOR:
11224       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_OR);
11225 
11226    case TGSI_OPCODE_ATOMUADD:
11227       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IADD);
11228 
11229    case TGSI_OPCODE_ATOMUMAX:
11230       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMAX);
11231 
11232    case TGSI_OPCODE_ATOMUMIN:
11233       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMIN);
11234 
11235    case TGSI_OPCODE_ATOMXCHG:
11236       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_EXCH);
11237 
11238    case TGSI_OPCODE_ATOMXOR:
11239       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_XOR);
11240 
11241    case TGSI_OPCODE_BARRIER:
11242       return emit_barrier(emit, inst);
11243 
11244    case TGSI_OPCODE_MEMBAR:
11245       return emit_memory_barrier(emit, inst);
11246 
11247    case TGSI_OPCODE_RESQ:
11248       return emit_resq(emit, inst);
11249 
11250    case TGSI_OPCODE_END:
11251       if (!emit_post_helpers(emit))
11252          return false;
11253       return emit_simple(emit, inst);
11254 
11255    default:
11256       debug_printf("Unimplemented tgsi instruction %s\n",
11257                    tgsi_get_opcode_name(opcode));
11258       return false;
11259    }
11260 
11261    return true;
11262 }
11263 
11264 
11265 /**
11266  * Translate a single TGSI instruction to VGPU10.
11267  */
11268 static bool
emit_vgpu10_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)11269 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
11270                         unsigned inst_number,
11271                         const struct tgsi_full_instruction *inst)
11272 {
11273    if (emit->skip_instruction)
11274       return true;
11275 
11276    bool ret = true;
11277    unsigned start_token = emit_get_num_tokens(emit);
11278 
11279    emit->reemit_tgsi_instruction = false;
11280 
11281    ret = emit_instruction(emit, inst_number, inst);
11282 
11283    if (emit->reemit_tgsi_instruction) {
11284       /**
11285        * Reset emit->ptr to where the translation of this tgsi instruction
11286        * started.
11287        */
11288       VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
11289       emit->ptr = (char *) (tokens + start_token);
11290 
11291       emit->reemit_tgsi_instruction = false;
11292    }
11293    return ret;
11294 }
11295 
11296 
11297 /**
11298  * Emit the extra instructions to adjust the vertex position.
11299  * There are two possible adjustments:
11300  * 1. Converting from Gallium to VGPU10 coordinate space by applying the
11301  *    "prescale" and "pretranslate" values.
11302  * 2. Undoing the viewport transformation when we use the swtnl/draw path.
11303  * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
11304  */
11305 static void
emit_vpos_instructions(struct svga_shader_emitter_v10 * emit)11306 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
11307 {
11308    struct tgsi_full_src_register tmp_pos_src;
11309    struct tgsi_full_dst_register pos_dst;
11310    const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
11311 
11312    /* Don't bother to emit any extra vertex instructions if vertex position is
11313     * not written out
11314     */
11315    if (emit->vposition.out_index == INVALID_INDEX)
11316       return;
11317 
11318    /**
11319     * Reset the temporary vertex position register index
11320     * so that emit_dst_register() will use the real vertex position output
11321     */
11322    emit->vposition.tmp_index = INVALID_INDEX;
11323 
11324    tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
11325    pos_dst = make_dst_output_reg(emit->vposition.out_index);
11326 
11327    /* If non-adjusted vertex position register index
11328     * is valid, copy the vertex position from the temporary
11329     * vertex position register before it is modified by the
11330     * prescale computation.
11331     */
11332    if (emit->vposition.so_index != INVALID_INDEX) {
11333       struct tgsi_full_dst_register pos_so_dst =
11334          make_dst_output_reg(emit->vposition.so_index);
11335 
11336       /* MOV pos_so, tmp_pos */
11337       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
11338    }
11339 
11340    if (emit->vposition.need_prescale) {
11341       /* This code adjusts the vertex position to match the VGPU10 convention.
11342        * If p is the position computed by the shader (usually by applying the
11343        * modelview and projection matrices), the new position q is computed by:
11344        *
11345        * q.x = p.w * trans.x + p.x * scale.x
11346        * q.y = p.w * trans.y + p.y * scale.y
11347        * q.z = p.w * trans.z + p.z * scale.z;
11348        * q.w = p.w * trans.w + p.w;
11349        */
11350       struct tgsi_full_src_register tmp_pos_src_w =
11351          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
11352       struct tgsi_full_dst_register tmp_pos_dst =
11353          make_dst_temp_reg(vs_pos_tmp_index);
11354       struct tgsi_full_dst_register tmp_pos_dst_xyz =
11355          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
11356 
11357       struct tgsi_full_src_register prescale_scale =
11358          make_src_temp_reg(emit->vposition.prescale_scale_index);
11359       struct tgsi_full_src_register prescale_trans =
11360          make_src_temp_reg(emit->vposition.prescale_trans_index);
11361 
11362       /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
11363       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
11364                            &tmp_pos_src, &prescale_scale);
11365 
11366       /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
11367       emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
11368                            &prescale_trans, &tmp_pos_src);
11369    }
11370    else if (emit->key.vs.undo_viewport) {
11371       /* This code computes the final vertex position from the temporary
11372        * vertex position by undoing the viewport transformation and the
11373        * divide-by-W operation (we convert window coords back to clip coords).
11374        * This is needed when we use the 'draw' module for fallbacks.
11375        * If p is the temp pos in window coords, then the NDC coord q is:
11376        *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
11377        *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
11378        *   q.z = p.z * p.w
11379        *   q.w = p.w
11380        * CONST[vs_viewport_index] contains:
11381        *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
11382        */
11383       struct tgsi_full_dst_register tmp_pos_dst =
11384          make_dst_temp_reg(vs_pos_tmp_index);
11385       struct tgsi_full_dst_register tmp_pos_dst_xy =
11386          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
11387       struct tgsi_full_src_register tmp_pos_src_wwww =
11388          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
11389 
11390       struct tgsi_full_dst_register pos_dst_xyz =
11391          writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
11392       struct tgsi_full_dst_register pos_dst_w =
11393          writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
11394 
11395       struct tgsi_full_src_register vp_xyzw =
11396          make_src_const_reg(emit->vs.viewport_index);
11397       struct tgsi_full_src_register vp_zwww =
11398          swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
11399                      TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
11400 
11401       /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
11402       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
11403                            &tmp_pos_src, &vp_zwww);
11404 
11405       /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
11406       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
11407                            &tmp_pos_src, &vp_xyzw);
11408 
11409       /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
11410       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
11411                            &tmp_pos_src, &tmp_pos_src_wwww);
11412 
11413       /* MOV pos.w, tmp_pos.w */
11414       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
11415    }
11416    else if (vs_pos_tmp_index != INVALID_INDEX) {
11417       /* This code is to handle the case where the temporary vertex
11418        * position register is created when the vertex shader has stream
11419        * output and prescale is disabled because rasterization is to be
11420        * discarded.
11421        */
11422       struct tgsi_full_dst_register pos_dst =
11423          make_dst_output_reg(emit->vposition.out_index);
11424 
11425       /* MOV pos, tmp_pos */
11426       begin_emit_instruction(emit);
11427       emit_opcode(emit, VGPU10_OPCODE_MOV, false);
11428       emit_dst_register(emit, &pos_dst);
11429       emit_src_register(emit, &tmp_pos_src);
11430       end_emit_instruction(emit);
11431    }
11432 
11433    /* Restore original vposition.tmp_index value for the next GS vertex.
11434     * It doesn't matter for VS.
11435     */
11436    emit->vposition.tmp_index = vs_pos_tmp_index;
11437 }
11438 
11439 static void
emit_clipping_instructions(struct svga_shader_emitter_v10 * emit)11440 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
11441 {
11442    if (emit->clip_mode == CLIP_DISTANCE) {
11443       /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
11444       emit_clip_distance_instructions(emit);
11445 
11446    } else if (emit->clip_mode == CLIP_VERTEX &&
11447               emit->key.last_vertex_stage) {
11448       /* Convert TGSI CLIPVERTEX to CLIPDIST */
11449       emit_clip_vertex_instructions(emit);
11450    }
11451 
11452    /**
11453     * Emit vertex position and take care of legacy user planes only if
11454     * there is a valid vertex position register index.
11455     * This is to take care of the case
11456     * where the shader doesn't output vertex position. Then in
11457     * this case, don't bother to emit more vertex instructions.
11458     */
11459    if (emit->vposition.out_index == INVALID_INDEX)
11460       return;
11461 
11462    /**
11463     * Emit per-vertex clipping instructions for legacy user defined clip planes.
11464     * NOTE: we must emit the clip distance instructions before the
11465     * emit_vpos_instructions() call since the later function will change
11466     * the TEMP[vs_pos_tmp_index] value.
11467     */
11468    if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
11469       /* Emit CLIPDIST for legacy user defined clip planes */
11470       emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
11471    }
11472 }
11473 
11474 
11475 /**
11476  * Emit extra per-vertex instructions.  This includes clip-coordinate
11477  * space conversion and computing clip distances.  This is called for
11478  * each GS emit-vertex instruction and at the end of VS translation.
11479  */
11480 static void
emit_vertex_instructions(struct svga_shader_emitter_v10 * emit)11481 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
11482 {
11483    /* Emit clipping instructions based on clipping mode */
11484    emit_clipping_instructions(emit);
11485 
11486    /* Emit vertex position instructions */
11487    emit_vpos_instructions(emit);
11488 }
11489 
11490 
11491 /**
11492  * Translate the TGSI_OPCODE_EMIT GS instruction.
11493  */
11494 static bool
emit_vertex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)11495 emit_vertex(struct svga_shader_emitter_v10 *emit,
11496             const struct tgsi_full_instruction *inst)
11497 {
11498    unsigned ret = true;
11499 
11500    assert(emit->unit == PIPE_SHADER_GEOMETRY);
11501 
11502    /**
11503     * Emit the viewport array index for the first vertex.
11504     */
11505    if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
11506       struct tgsi_full_dst_register viewport_index_out =
11507          make_dst_output_reg(emit->gs.viewport_index_out_index);
11508       struct tgsi_full_dst_register viewport_index_out_x =
11509          writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
11510       struct tgsi_full_src_register viewport_index_tmp =
11511          make_src_temp_reg(emit->gs.viewport_index_tmp_index);
11512 
11513       /* Set the out index to INVALID_INDEX, so it will not
11514        * be assigned to a temp again in emit_dst_register, and
11515        * the viewport index will not be assigned again in the
11516        * subsequent vertices.
11517        */
11518       emit->gs.viewport_index_out_index = INVALID_INDEX;
11519       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11520                            &viewport_index_out_x, &viewport_index_tmp);
11521    }
11522 
11523    /**
11524     * Find the stream index associated with this emit vertex instruction.
11525     */
11526    assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
11527    unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
11528 
11529    /**
11530     * According to the ARB_gpu_shader5 spec, the built-in geometry shader
11531     * outputs are always associated with vertex stream zero.
11532     * So emit the extra vertex instructions for position or clip distance
11533     * for stream zero only.
11534     */
11535    if (streamIndex == 0) {
11536       /**
11537        * Before emitting vertex instructions, emit the temporaries for
11538        * the prescale constants based on the viewport index if needed.
11539        */
11540       if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
11541          emit_temp_prescale_instructions(emit);
11542 
11543       emit_vertex_instructions(emit);
11544    }
11545 
11546    begin_emit_instruction(emit);
11547    if (emit->version >= 50) {
11548       if (emit->info.num_stream_output_components[streamIndex] == 0) {
11549          /**
11550           * If there is no output for this stream, discard this instruction.
11551           */
11552          emit->discard_instruction = true;
11553       }
11554       else {
11555          emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, false);
11556          emit_stream_register(emit, streamIndex);
11557       }
11558    }
11559    else {
11560       emit_opcode(emit, VGPU10_OPCODE_EMIT, false);
11561    }
11562    end_emit_instruction(emit);
11563 
11564    return ret;
11565 }
11566 
11567 
11568 /**
11569  * Emit the extra code to convert from VGPU10's boolean front-face
11570  * register to TGSI's signed front-face register.
11571  *
11572  * TODO: Make temporary front-face register a scalar.
11573  */
11574 static void
emit_frontface_instructions(struct svga_shader_emitter_v10 * emit)11575 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
11576 {
11577    assert(emit->unit == PIPE_SHADER_FRAGMENT);
11578 
11579    if (emit->fs.face_input_index != INVALID_INDEX) {
11580       /* convert vgpu10 boolean face register to gallium +/-1 value */
11581       struct tgsi_full_dst_register tmp_dst =
11582          make_dst_temp_reg(emit->fs.face_tmp_index);
11583       struct tgsi_full_src_register one =
11584          make_immediate_reg_float(emit, 1.0f);
11585       struct tgsi_full_src_register neg_one =
11586          make_immediate_reg_float(emit, -1.0f);
11587 
11588       /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
11589       begin_emit_instruction(emit);
11590       emit_opcode(emit, VGPU10_OPCODE_MOVC, false);
11591       emit_dst_register(emit, &tmp_dst);
11592       emit_face_register(emit);
11593       emit_src_register(emit, &one);
11594       emit_src_register(emit, &neg_one);
11595       end_emit_instruction(emit);
11596    }
11597 }
11598 
11599 
11600 /**
11601  * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
11602  */
11603 static void
emit_fragcoord_instructions(struct svga_shader_emitter_v10 * emit)11604 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
11605 {
11606    assert(emit->unit == PIPE_SHADER_FRAGMENT);
11607 
11608    if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
11609       struct tgsi_full_dst_register tmp_dst =
11610          make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
11611       struct tgsi_full_dst_register tmp_dst_xyz =
11612          writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
11613       struct tgsi_full_dst_register tmp_dst_w =
11614          writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
11615       struct tgsi_full_src_register one =
11616          make_immediate_reg_float(emit, 1.0f);
11617       struct tgsi_full_src_register fragcoord =
11618          make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
11619 
11620       /* save the input index */
11621       unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
11622       /* set to invalid to prevent substitution in emit_src_register() */
11623       emit->fs.fragcoord_input_index = INVALID_INDEX;
11624 
11625       /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
11626       begin_emit_instruction(emit);
11627       emit_opcode(emit, VGPU10_OPCODE_MOV, false);
11628       emit_dst_register(emit, &tmp_dst_xyz);
11629       emit_src_register(emit, &fragcoord);
11630       end_emit_instruction(emit);
11631 
11632       /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
11633       begin_emit_instruction(emit);
11634       emit_opcode(emit, VGPU10_OPCODE_DIV, false);
11635       emit_dst_register(emit, &tmp_dst_w);
11636       emit_src_register(emit, &one);
11637       emit_src_register(emit, &fragcoord);
11638       end_emit_instruction(emit);
11639 
11640       /* restore saved value */
11641       emit->fs.fragcoord_input_index = fragcoord_input_index;
11642    }
11643 }
11644 
11645 
11646 /**
11647  * Emit the extra code to get the current sample position value and
11648  * put it into a temp register.
11649  */
11650 static void
emit_sample_position_instructions(struct svga_shader_emitter_v10 * emit)11651 emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
11652 {
11653    assert(emit->unit == PIPE_SHADER_FRAGMENT);
11654 
11655    if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
11656       assert(emit->version >= 41);
11657 
11658       struct tgsi_full_dst_register tmp_dst =
11659          make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
11660       struct tgsi_full_src_register half =
11661          make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
11662 
11663       struct tgsi_full_src_register tmp_src =
11664          make_src_temp_reg(emit->fs.sample_pos_tmp_index);
11665       struct tgsi_full_src_register sample_index_reg =
11666          make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
11667                              emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
11668 
11669       /* The first src register is a shader resource (if we want a
11670        * multisampled resource sample position) or the rasterizer register
11671        * (if we want the current sample position in the color buffer).  We
11672        * want the later.
11673        */
11674 
11675       /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
11676       begin_emit_instruction(emit);
11677       emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, false);
11678       emit_dst_register(emit, &tmp_dst);
11679       emit_rasterizer_register(emit);
11680       emit_src_register(emit, &sample_index_reg);
11681       end_emit_instruction(emit);
11682 
11683       /* Convert from D3D coords to GL coords by adding 0.5 bias */
11684       /* ADD dst, dst, half */
11685       begin_emit_instruction(emit);
11686       emit_opcode(emit, VGPU10_OPCODE_ADD, false);
11687       emit_dst_register(emit, &tmp_dst);
11688       emit_src_register(emit, &tmp_src);
11689       emit_src_register(emit, &half);
11690       end_emit_instruction(emit);
11691    }
11692 }
11693 
11694 
11695 /**
11696  * Emit extra instructions to adjust VS inputs/attributes.  This can
11697  * mean casting a vertex attribute from int to float or setting the
11698  * W component to 1, or both.
11699  */
11700 static void
emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 * emit)11701 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
11702 {
11703    const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
11704    const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
11705    const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
11706    const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
11707    const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
11708    const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
11709    const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
11710 
11711    unsigned adjust_mask = (save_w_1_mask |
11712                            save_itof_mask |
11713                            save_utof_mask |
11714                            save_is_bgra_mask |
11715                            save_puint_to_snorm_mask |
11716                            save_puint_to_uscaled_mask |
11717                            save_puint_to_sscaled_mask);
11718 
11719    assert(emit->unit == PIPE_SHADER_VERTEX);
11720 
11721    if (adjust_mask) {
11722       struct tgsi_full_src_register one =
11723          make_immediate_reg_float(emit, 1.0f);
11724 
11725       struct tgsi_full_src_register one_int =
11726          make_immediate_reg_int(emit, 1);
11727 
11728       /* We need to turn off these bitmasks while emitting the
11729        * instructions below, then restore them afterward.
11730        */
11731       emit->key.vs.adjust_attrib_w_1 = 0;
11732       emit->key.vs.adjust_attrib_itof = 0;
11733       emit->key.vs.adjust_attrib_utof = 0;
11734       emit->key.vs.attrib_is_bgra = 0;
11735       emit->key.vs.attrib_puint_to_snorm = 0;
11736       emit->key.vs.attrib_puint_to_uscaled = 0;
11737       emit->key.vs.attrib_puint_to_sscaled = 0;
11738 
11739       while (adjust_mask) {
11740          unsigned index = u_bit_scan(&adjust_mask);
11741 
11742          /* skip the instruction if this vertex attribute is not being used */
11743          if (emit->info.input_usage_mask[index] == 0)
11744             continue;
11745 
11746          unsigned tmp = emit->vs.adjusted_input[index];
11747          struct tgsi_full_src_register input_src =
11748             make_src_reg(TGSI_FILE_INPUT, index);
11749 
11750          struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
11751          struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
11752          struct tgsi_full_dst_register tmp_dst_w =
11753             writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
11754 
11755          /* ITOF/UTOF/MOV tmp, input[index] */
11756          if (save_itof_mask & (1 << index)) {
11757             emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
11758                                  &tmp_dst, &input_src);
11759          }
11760          else if (save_utof_mask & (1 << index)) {
11761             emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
11762                                  &tmp_dst, &input_src);
11763          }
11764          else if (save_puint_to_snorm_mask & (1 << index)) {
11765             emit_puint_to_snorm(emit, &tmp_dst, &input_src);
11766          }
11767          else if (save_puint_to_uscaled_mask & (1 << index)) {
11768             emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
11769          }
11770          else if (save_puint_to_sscaled_mask & (1 << index)) {
11771             emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
11772          }
11773          else {
11774             assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
11775             emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11776                                  &tmp_dst, &input_src);
11777          }
11778 
11779          if (save_is_bgra_mask & (1 << index)) {
11780             emit_swap_r_b(emit, &tmp_dst, &tmp_src);
11781          }
11782 
11783          if (save_w_1_mask & (1 << index)) {
11784             /* MOV tmp.w, 1.0 */
11785             if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
11786                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11787                                     &tmp_dst_w, &one_int);
11788             }
11789             else {
11790                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11791                                     &tmp_dst_w, &one);
11792             }
11793          }
11794       }
11795 
11796       emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
11797       emit->key.vs.adjust_attrib_itof = save_itof_mask;
11798       emit->key.vs.adjust_attrib_utof = save_utof_mask;
11799       emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
11800       emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
11801       emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
11802       emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
11803    }
11804 }
11805 
11806 
11807 /* Find zero-value immedate for default layer index */
11808 static void
emit_default_layer_instructions(struct svga_shader_emitter_v10 * emit)11809 emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
11810 {
11811    assert(emit->unit == PIPE_SHADER_FRAGMENT);
11812 
11813    /* immediate for default layer index 0 */
11814    if (emit->fs.layer_input_index != INVALID_INDEX) {
11815       union tgsi_immediate_data imm;
11816       imm.Int = 0;
11817       emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
11818    }
11819 }
11820 
11821 
11822 static void
emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned cbuf_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate)11823 emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
11824                              unsigned cbuf_index,
11825                              struct tgsi_full_dst_register *scale,
11826                              struct tgsi_full_dst_register *translate)
11827 {
11828    struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
11829    struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
11830 
11831    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
11832    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
11833 }
11834 
11835 
11836 /**
11837  * A recursive helper function to find the prescale from the constant buffer
11838  */
11839 static void
find_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned num_prescale,struct tgsi_full_src_register * vp_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate,struct tgsi_full_src_register * tmp_src,struct tgsi_full_dst_register * tmp_dst)11840 find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
11841                         unsigned index, unsigned num_prescale,
11842                         struct tgsi_full_src_register *vp_index,
11843                         struct tgsi_full_dst_register *scale,
11844                         struct tgsi_full_dst_register *translate,
11845                         struct tgsi_full_src_register *tmp_src,
11846                         struct tgsi_full_dst_register *tmp_dst)
11847 {
11848    if (num_prescale == 0)
11849       return;
11850 
11851    if (index > 0) {
11852       /* ELSE */
11853       emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
11854    }
11855 
11856    struct tgsi_full_src_register index_src =
11857 	                            make_immediate_reg_int(emit, index);
11858 
11859    if (index == 0) {
11860       /* GE tmp, vp_index, index */
11861       emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
11862                            vp_index, &index_src);
11863    } else {
11864       /* EQ tmp, vp_index, index */
11865       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
11866                            vp_index, &index_src);
11867    }
11868 
11869    /* IF tmp */
11870    emit_if(emit, tmp_src);
11871    emit_temp_prescale_from_cbuf(emit,
11872                                 emit->vposition.prescale_cbuf_index + 2 * index,
11873                                 scale, translate);
11874 
11875    find_prescale_from_cbuf(emit, index+1, num_prescale-1,
11876                            vp_index, scale, translate,
11877                            tmp_src, tmp_dst);
11878 
11879    /* ENDIF */
11880    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
11881 }
11882 
11883 
11884 /**
11885  * This helper function emits instructions to set the prescale
11886  * and translate temporaries to the correct constants from the
11887  * constant buffer according to the designated viewport.
11888  */
11889 static void
emit_temp_prescale_instructions(struct svga_shader_emitter_v10 * emit)11890 emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
11891 {
11892    struct tgsi_full_dst_register prescale_scale =
11893          make_dst_temp_reg(emit->vposition.prescale_scale_index);
11894    struct tgsi_full_dst_register prescale_translate =
11895          make_dst_temp_reg(emit->vposition.prescale_trans_index);
11896 
11897    unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
11898 
11899    if (emit->vposition.num_prescale == 1) {
11900       emit_temp_prescale_from_cbuf(emit,
11901                                    prescale_cbuf_index,
11902                                    &prescale_scale, &prescale_translate);
11903    } else {
11904       /**
11905        * Since SM5 device does not support dynamic indexing, we need
11906        * to do the if-else to find the prescale constants for the
11907        * specified viewport.
11908        */
11909       struct tgsi_full_src_register vp_index_src =
11910          make_src_temp_reg(emit->gs.viewport_index_tmp_index);
11911 
11912       struct tgsi_full_src_register vp_index_src_x =
11913          scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
11914 
11915       unsigned tmp = get_temp_index(emit);
11916       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
11917       struct tgsi_full_src_register tmp_src_x =
11918                 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
11919       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
11920 
11921       find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
11922                               &vp_index_src_x,
11923 		              &prescale_scale, &prescale_translate,
11924                               &tmp_src_x, &tmp_dst);
11925    }
11926 
11927    /* Mark prescale temporaries are emitted */
11928    emit->vposition.have_prescale = 1;
11929 }
11930 
11931 
11932 /**
11933  * A helper function to emit an instruction in a vertex shader to add a bias
11934  * to the VertexID system value. This patches the VertexID in the SVGA vertex
11935  * shader to include the base vertex of an indexed primitive or the start index
11936  * of a non-indexed primitive.
11937  */
11938 static void
emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 * emit)11939 emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
11940 {
11941    struct tgsi_full_src_register vertex_id_bias_index =
11942       make_src_const_reg(emit->vs.vertex_id_bias_index);
11943    struct tgsi_full_src_register vertex_id_sys_src =
11944       make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
11945    struct tgsi_full_src_register vertex_id_sys_src_x =
11946       scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
11947    struct tgsi_full_dst_register vertex_id_tmp_dst =
11948       make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
11949 
11950    /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
11951    unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
11952    emit->vs.vertex_id_tmp_index = INVALID_INDEX;
11953    emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
11954                         &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, false,
11955                         false);
11956    emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
11957 }
11958 
11959 /**
11960  * Hull Shader must have control point outputs. But tessellation
11961  * control shader can return without writing to control point output.
11962  * In this case, the control point output is assumed to be passthrough
11963  * from the control point input.
11964  * This helper function is to write out a control point output first in case
11965  * the tessellation control shader returns before writing a
11966  * control point output.
11967  */
11968 static void
emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 * emit)11969 emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
11970 {
11971    assert(emit->unit == PIPE_SHADER_TESS_CTRL);
11972    assert(emit->tcs.control_point_phase);
11973    assert(emit->tcs.control_point_out_index != INVALID_INDEX);
11974    assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
11975 
11976    struct tgsi_full_dst_register output_control_point;
11977    output_control_point =
11978       make_dst_output_reg(emit->tcs.control_point_out_index);
11979 
11980    if (emit->tcs.control_point_input_index == INVALID_INDEX) {
11981       /* MOV OUTPUT 0.0f */
11982       struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
11983       begin_emit_instruction(emit);
11984       emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false);
11985       emit_dst_register(emit, &output_control_point);
11986       emit_src_register(emit, &zero);
11987       end_emit_instruction(emit);
11988    }
11989    else {
11990       /* UARL ADDR[INDEX].x INVOCATION.xxxx */
11991 
11992       struct tgsi_full_src_register invocation_src;
11993       struct tgsi_full_dst_register addr_dst;
11994       struct tgsi_full_dst_register addr_dst_x;
11995       unsigned addr_tmp;
11996 
11997       addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
11998       addr_dst = make_dst_temp_reg(addr_tmp);
11999       addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
12000 
12001       invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
12002                                     emit->tcs.invocation_id_sys_index);
12003 
12004       begin_emit_instruction(emit);
12005       emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false);
12006       emit_dst_register(emit, &addr_dst_x);
12007       emit_src_register(emit, &invocation_src);
12008       end_emit_instruction(emit);
12009 
12010 
12011       /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
12012 
12013       struct tgsi_full_src_register input_control_point;
12014       input_control_point = make_src_reg(TGSI_FILE_INPUT,
12015                                          emit->tcs.control_point_input_index);
12016       input_control_point.Register.Dimension = 1;
12017       input_control_point.Dimension.Indirect = 1;
12018       input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
12019       input_control_point.DimIndirect.Index =
12020          emit->tcs.control_point_addr_index;
12021 
12022       begin_emit_instruction(emit);
12023       emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false);
12024       emit_dst_register(emit, &output_control_point);
12025       emit_src_register(emit, &input_control_point);
12026       end_emit_instruction(emit);
12027    }
12028 }
12029 
12030 /**
12031  * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
12032  * values in domain shader. SM5 has tessfactors as floating point values where
12033  * as tgsi emit them as vector. This function allows to construct temp
12034  * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
12035  * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
12036  * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
12037  */
12038 static void
emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 * emit)12039 emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
12040 {
12041    struct tgsi_full_src_register src;
12042    struct tgsi_full_dst_register dst;
12043 
12044    if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
12045       dst = make_dst_temp_reg(emit->tes.inner.temp_index);
12046 
12047       switch (emit->tes.prim_mode) {
12048       case MESA_PRIM_QUADS:
12049          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12050                   emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
12051          dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
12052          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12053          FALLTHROUGH;
12054       case MESA_PRIM_TRIANGLES:
12055          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12056                   emit->tes.inner.in_index, TGSI_SWIZZLE_X);
12057          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
12058          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12059          break;
12060       case MESA_PRIM_LINES:
12061          /**
12062           * As per SM5 spec, InsideTessFactor for isolines are unused.
12063           * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
12064           * any application try to read tessInnerLevel in TES when primitive type
12065           * is isolines, then instead of driver throwing segfault for accesing it,
12066           * return atleast vec(1.0f)
12067           */
12068          src = make_immediate_reg_float(emit, 1.0f);
12069          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12070          break;
12071       default:
12072          break;
12073       }
12074    }
12075 
12076    if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
12077       dst = make_dst_temp_reg(emit->tes.outer.temp_index);
12078 
12079       switch (emit->tes.prim_mode) {
12080       case MESA_PRIM_QUADS:
12081          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12082                   emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
12083          dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
12084          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12085          FALLTHROUGH;
12086       case MESA_PRIM_TRIANGLES:
12087          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12088                   emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
12089          dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
12090          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12091          FALLTHROUGH;
12092       case MESA_PRIM_LINES:
12093          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12094                   emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
12095          dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
12096          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12097 
12098          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12099                   emit->tes.outer.in_index , TGSI_SWIZZLE_X);
12100          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
12101          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12102 
12103          break;
12104       default:
12105          break;
12106       }
12107    }
12108 }
12109 
12110 
12111 static void
emit_initialize_temp_instruction(struct svga_shader_emitter_v10 * emit)12112 emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
12113 {
12114    struct tgsi_full_src_register src;
12115    struct tgsi_full_dst_register dst;
12116    unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
12117                                                  emit->initialize_temp_index);
12118    src = make_immediate_reg_float(emit, 0.0f);
12119    dst = make_dst_temp_reg(vgpu10_temp_index);
12120    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12121    emit->temp_map[emit->initialize_temp_index].initialized = true;
12122    emit->initialize_temp_index = INVALID_INDEX;
12123 }
12124 
12125 
12126 /**
12127  * Emit any extra/helper declarations/code that we might need between
12128  * the declaration section and code section.
12129  */
12130 static bool
emit_pre_helpers(struct svga_shader_emitter_v10 * emit)12131 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
12132 {
12133    /* Properties */
12134    if (emit->unit == PIPE_SHADER_GEOMETRY)
12135       emit_property_instructions(emit);
12136    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12137       emit_hull_shader_declarations(emit);
12138 
12139       /* Save the position of the first instruction token so that we can
12140        * do a second pass of the instructions for the patch constant phase.
12141        */
12142       emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
12143       emit->tcs.fork_phase_add_signature = false;
12144 
12145       if (!emit_hull_shader_control_point_phase(emit)) {
12146          emit->skip_instruction = true;
12147          return true;
12148       }
12149 
12150       /* Set the current tcs phase to control point phase */
12151       emit->tcs.control_point_phase = true;
12152    }
12153    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12154       emit_domain_shader_declarations(emit);
12155    }
12156    else if (emit->unit == PIPE_SHADER_COMPUTE) {
12157       emit_compute_shader_declarations(emit);
12158    }
12159 
12160    /* Declare inputs */
12161    if (!emit_input_declarations(emit))
12162       return false;
12163 
12164    /* Declare outputs */
12165    if (!emit_output_declarations(emit))
12166       return false;
12167 
12168    /* Declare temporary registers */
12169    emit_temporaries_declaration(emit);
12170 
12171    /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
12172     * will already be declared in hs_decls (emit_hull_shader_declarations)
12173     */
12174    if (emit->unit != PIPE_SHADER_TESS_CTRL) {
12175 
12176       alloc_common_immediates(emit);
12177 
12178       /* Declare constant registers */
12179       emit_constant_declaration(emit);
12180 
12181       /* Declare samplers and resources */
12182       emit_sampler_declarations(emit);
12183       emit_resource_declarations(emit);
12184 
12185       /* Declare images */
12186       emit_image_declarations(emit);
12187 
12188       /* Declare shader buffers */
12189       emit_shader_buf_declarations(emit);
12190 
12191       /* Declare atomic buffers */
12192       emit_atomic_buf_declarations(emit);
12193    }
12194 
12195    if (emit->unit != PIPE_SHADER_FRAGMENT &&
12196        emit->unit != PIPE_SHADER_COMPUTE) {
12197       /*
12198        * Declare clip distance output registers for ClipVertex or
12199        * user defined planes
12200        */
12201       emit_clip_distance_declarations(emit);
12202    }
12203 
12204    if (emit->unit == PIPE_SHADER_COMPUTE) {
12205       emit_memory_declarations(emit);
12206 
12207       if (emit->cs.grid_size.tgsi_index != INVALID_INDEX) {
12208          emit->cs.grid_size.imm_index =
12209             alloc_immediate_int4(emit,
12210                                  emit->key.cs.grid_size[0],
12211                                  emit->key.cs.grid_size[1],
12212                                  emit->key.cs.grid_size[2], 0);
12213       }
12214    }
12215 
12216    if (emit->unit == PIPE_SHADER_FRAGMENT &&
12217        emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
12218       float alpha = emit->key.fs.alpha_ref;
12219       emit->fs.alpha_ref_index =
12220          alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
12221    }
12222 
12223    if (emit->unit != PIPE_SHADER_TESS_CTRL) {
12224       /**
12225        * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
12226        * hs_decls
12227        */
12228       emit_vgpu10_immediates_block(emit);
12229    }
12230    else {
12231       emit_tcs_default_control_point_output(emit);
12232    }
12233 
12234    if (emit->unit == PIPE_SHADER_FRAGMENT) {
12235       emit_frontface_instructions(emit);
12236       emit_fragcoord_instructions(emit);
12237       emit_sample_position_instructions(emit);
12238       emit_default_layer_instructions(emit);
12239    }
12240    else if (emit->unit == PIPE_SHADER_VERTEX) {
12241       emit_vertex_attrib_instructions(emit);
12242 
12243       if (emit->info.uses_vertexid)
12244          emit_vertex_id_nobase_instruction(emit);
12245    }
12246    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12247       emit_temp_tessfactor_instructions(emit);
12248    }
12249 
12250    /**
12251     * For geometry shader that writes to viewport index, the prescale
12252     * temporaries will be done at the first vertex emission.
12253     */
12254    if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
12255       emit_temp_prescale_instructions(emit);
12256 
12257    return true;
12258 }
12259 
12260 
12261 /**
12262  * The device has no direct support for the pipe_blend_state::alpha_to_one
12263  * option so we implement it here with shader code.
12264  *
12265  * Note that this is kind of pointless, actually.  Here we're clobbering
12266  * the alpha value with 1.0.  So if alpha-to-coverage is enabled, we'll wind
12267  * up with 100% coverage.  That's almost certainly not what the user wants.
12268  * The work-around is to add extra shader code to compute coverage from alpha
12269  * and write it to the coverage output register (if the user's shader doesn't
12270  * do so already).  We'll probably do that in the future.
12271  */
12272 static void
emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12273 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
12274                                unsigned fs_color_tmp_index)
12275 {
12276    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
12277    unsigned i;
12278 
12279    /* Note: it's not 100% clear from the spec if we're supposed to clobber
12280     * the alpha for all render targets.  But that's what NVIDIA does and
12281     * that's what Piglit tests.
12282     */
12283    for (i = 0; i < emit->fs.num_color_outputs; i++) {
12284       struct tgsi_full_dst_register color_dst;
12285 
12286       if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
12287          /* write to the temp color register */
12288          color_dst = make_dst_temp_reg(fs_color_tmp_index);
12289       }
12290       else {
12291          /* write directly to the color[i] output */
12292          color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
12293       }
12294 
12295       color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
12296 
12297       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
12298    }
12299 }
12300 
12301 
12302 /**
12303  * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
12304  * against the alpha reference value and discards the fragment if the
12305  * comparison fails.
12306  */
12307 static void
emit_alpha_test_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12308 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
12309                              unsigned fs_color_tmp_index)
12310 {
12311    /* compare output color's alpha to alpha ref and discard if comparison
12312     * fails.
12313     */
12314    unsigned tmp = get_temp_index(emit);
12315    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
12316    struct tgsi_full_src_register tmp_src_x =
12317       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
12318    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
12319    struct tgsi_full_src_register color_src =
12320       make_src_temp_reg(fs_color_tmp_index);
12321    struct tgsi_full_src_register color_src_w =
12322       scalar_src(&color_src, TGSI_SWIZZLE_W);
12323    struct tgsi_full_src_register ref_src =
12324       make_src_immediate_reg(emit->fs.alpha_ref_index);
12325    struct tgsi_full_dst_register color_dst =
12326       make_dst_output_reg(emit->fs.color_out_index[0]);
12327 
12328    assert(emit->unit == PIPE_SHADER_FRAGMENT);
12329 
12330    /* dst = src0 'alpha_func' src1 */
12331    emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
12332                    &color_src_w, &ref_src);
12333 
12334    /* DISCARD if dst.x == 0 */
12335    begin_emit_instruction(emit);
12336    emit_discard_opcode(emit, false);  /* discard if src0.x is zero */
12337    emit_src_register(emit, &tmp_src_x);
12338    end_emit_instruction(emit);
12339 
12340    /* If we don't need to broadcast the color below, emit the final color here.
12341     */
12342    if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
12343       /* MOV output.color, tempcolor */
12344       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
12345    }
12346 
12347    free_temp_indexes(emit);
12348 }
12349 
12350 
12351 /**
12352  * Emit instructions for writing a single color output to multiple
12353  * color buffers.
12354  * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
12355  * when key.fs.white_fragments is true).
12356  * property is set and the number of render targets is greater than one.
12357  * \param fs_color_tmp_index  index of the temp register that holds the
12358  *                            color to broadcast.
12359  */
12360 static void
emit_broadcast_color_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12361 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
12362                                  unsigned fs_color_tmp_index)
12363 {
12364    const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
12365    unsigned i;
12366    struct tgsi_full_src_register color_src;
12367 
12368    if (emit->key.fs.white_fragments) {
12369       /* set all color outputs to white */
12370       color_src = make_immediate_reg_float(emit, 1.0f);
12371    }
12372    else {
12373       /* set all color outputs to TEMP[fs_color_tmp_index] */
12374       assert(fs_color_tmp_index != INVALID_INDEX);
12375       color_src = make_src_temp_reg(fs_color_tmp_index);
12376    }
12377 
12378    assert(emit->unit == PIPE_SHADER_FRAGMENT);
12379 
12380    for (i = 0; i < n; i++) {
12381       unsigned output_reg = emit->fs.color_out_index[i];
12382       struct tgsi_full_dst_register color_dst =
12383          make_dst_output_reg(output_reg);
12384 
12385       /* Fill in this semantic here since we'll use it later in
12386        * emit_dst_register().
12387        */
12388       emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
12389 
12390       /* MOV output.color[i], tempcolor */
12391       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
12392    }
12393 }
12394 
12395 
12396 /**
12397  * Emit extra helper code after the original shader code, but before the
12398  * last END/RET instruction.
12399  * For vertex shaders this means emitting the extra code to apply the
12400  * prescale scale/translation.
12401  */
12402 static bool
emit_post_helpers(struct svga_shader_emitter_v10 * emit)12403 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
12404 {
12405    if (emit->unit == PIPE_SHADER_VERTEX) {
12406       emit_vertex_instructions(emit);
12407    }
12408    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
12409       const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
12410 
12411       assert(!(emit->key.fs.white_fragments &&
12412                emit->key.fs.write_color0_to_n_cbufs == 0));
12413 
12414       /* We no longer want emit_dst_register() to substitute the
12415        * temporary fragment color register for the real color output.
12416        */
12417       emit->fs.color_tmp_index = INVALID_INDEX;
12418 
12419       if (emit->key.fs.alpha_to_one) {
12420          emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
12421       }
12422       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
12423          emit_alpha_test_instructions(emit, fs_color_tmp_index);
12424       }
12425       if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
12426           emit->key.fs.white_fragments) {
12427          emit_broadcast_color_instructions(emit, fs_color_tmp_index);
12428       }
12429    }
12430    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12431       if (!emit->tcs.control_point_phase) {
12432          /* store the tessellation levels in the patch constant phase only */
12433          store_tesslevels(emit);
12434       }
12435       else {
12436          emit_clipping_instructions(emit);
12437       }
12438    }
12439    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12440       emit_vertex_instructions(emit);
12441    }
12442 
12443    return true;
12444 }
12445 
12446 
12447 /**
12448  * Reemit rawbuf instruction
12449  */
12450 static bool
emit_rawbuf_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)12451 emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
12452                         unsigned inst_number,
12453                         const struct tgsi_full_instruction *inst)
12454 {
12455    bool ret;
12456 
12457    /* For all the rawbuf references in this instruction,
12458     * load the rawbuf reference and assign to the designated temporary.
12459     * Then reeemit the instruction.
12460     */
12461    emit->reemit_rawbuf_instruction = REEMIT_IN_PROGRESS;
12462 
12463    unsigned offset_tmp = get_temp_index(emit);
12464    struct tgsi_full_dst_register offset_dst = make_dst_temp_reg(offset_tmp);
12465    struct tgsi_full_src_register offset_src = make_src_temp_reg(offset_tmp);
12466    struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
12467 
12468    for (unsigned i = 0; i < emit->raw_buf_cur_tmp_index; i++) {
12469       struct tgsi_full_src_register element_src;
12470 
12471       /* First get the element index register. */
12472 
12473       if (emit->raw_buf_tmp[i].indirect) {
12474          unsigned tmp = get_temp_index(emit);
12475          struct tgsi_full_dst_register element_dst = make_dst_temp_reg(tmp);
12476          struct tgsi_full_src_register element_index =
12477             make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
12478          struct tgsi_full_src_register element_rel =
12479             make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_rel);
12480 
12481          element_src = make_src_temp_reg(tmp);
12482          element_src = scalar_src(&element_src, TGSI_SWIZZLE_X);
12483          element_dst = writemask_dst(&element_dst, TGSI_WRITEMASK_X);
12484 
12485          /* element index from the indirect register */
12486          element_index = make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
12487          element_index = scalar_src(&element_index, TGSI_SWIZZLE_X);
12488 
12489          /* IADD element_src element_index element_index_relative */
12490          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &element_dst,
12491                               &element_index, &element_rel);
12492       }
12493       else {
12494          unsigned element_index = emit->raw_buf_tmp[i].element_index;
12495          union tgsi_immediate_data imm;
12496          imm.Int = element_index;
12497          int immpos = find_immediate(emit, imm, 0);
12498          if (immpos < 0) {
12499             UNUSED unsigned element_index_imm =
12500 		                add_immediate_int(emit, element_index);
12501          }
12502          element_src = make_immediate_reg_int(emit, element_index);
12503       }
12504 
12505       /* byte offset = element index << 4 */
12506       emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &offset_dst,
12507                            &element_src, &four);
12508 
12509       struct tgsi_full_dst_register dst_tmp =
12510          make_dst_temp_reg(i + emit->raw_buf_tmp_index);
12511 
12512       /* LD_RAW tmp, rawbuf byte offset, rawbuf */
12513 
12514       begin_emit_instruction(emit);
12515       emit_opcode(emit, VGPU10_OPCODE_LD_RAW, false);
12516       emit_dst_register(emit, &dst_tmp);
12517 
12518       struct tgsi_full_src_register offset_x =
12519             scalar_src(&offset_src, TGSI_SWIZZLE_X);
12520       emit_src_register(emit, &offset_x);
12521 
12522       emit_resource_register(emit,
12523          emit->raw_buf_tmp[i].buffer_index + emit->raw_buf_srv_start_index);
12524       end_emit_instruction(emit);
12525    }
12526 
12527    emit->raw_buf_cur_tmp_index = 0;
12528 
12529    ret = emit_vgpu10_instruction(emit, inst_number, inst);
12530 
12531    /* reset raw buf state */
12532    emit->raw_buf_cur_tmp_index = 0;
12533    emit->reemit_rawbuf_instruction = REEMIT_FALSE;
12534 
12535    free_temp_indexes(emit);
12536 
12537    return ret;
12538 }
12539 
12540 
12541 /**
12542  * Translate the TGSI tokens into VGPU10 tokens.
12543  */
12544 static bool
emit_vgpu10_instructions(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)12545 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
12546                          const struct tgsi_token *tokens)
12547 {
12548    struct tgsi_parse_context parse;
12549    bool ret = true;
12550    bool pre_helpers_emitted = false;
12551    unsigned inst_number = 0;
12552 
12553    tgsi_parse_init(&parse, tokens);
12554 
12555    while (!tgsi_parse_end_of_tokens(&parse)) {
12556 
12557       /* Save the current tgsi token starting position */
12558       emit->cur_tgsi_token = parse.Position;
12559 
12560       tgsi_parse_token(&parse);
12561 
12562       switch (parse.FullToken.Token.Type) {
12563       case TGSI_TOKEN_TYPE_IMMEDIATE:
12564          ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
12565          if (!ret)
12566             goto done;
12567          break;
12568 
12569       case TGSI_TOKEN_TYPE_DECLARATION:
12570          ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
12571          if (!ret)
12572             goto done;
12573          break;
12574 
12575       case TGSI_TOKEN_TYPE_INSTRUCTION:
12576          if (!pre_helpers_emitted) {
12577             ret = emit_pre_helpers(emit);
12578             if (!ret)
12579                goto done;
12580             pre_helpers_emitted = true;
12581          }
12582          ret = emit_vgpu10_instruction(emit, inst_number++,
12583                                        &parse.FullToken.FullInstruction);
12584 
12585          /* Usually this applies to TCS only. If shader is reading control
12586           * point outputs in control point phase, we should reemit all
12587           * instructions which are writting into control point output in
12588           * control phase to store results into temporaries.
12589           */
12590          if (emit->reemit_instruction) {
12591             assert(emit->unit == PIPE_SHADER_TESS_CTRL);
12592             ret = emit_vgpu10_instruction(emit, inst_number,
12593                                           &parse.FullToken.FullInstruction);
12594          }
12595          else if (emit->initialize_temp_index != INVALID_INDEX) {
12596             emit_initialize_temp_instruction(emit);
12597             emit->initialize_temp_index = INVALID_INDEX;
12598             ret = emit_vgpu10_instruction(emit, inst_number - 1,
12599                                           &parse.FullToken.FullInstruction);
12600          }
12601          else if (emit->reemit_rawbuf_instruction) {
12602             ret = emit_rawbuf_instruction(emit, inst_number - 1,
12603                                           &parse.FullToken.FullInstruction);
12604          }
12605 
12606          if (!ret)
12607             goto done;
12608          break;
12609 
12610       case TGSI_TOKEN_TYPE_PROPERTY:
12611          ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
12612          if (!ret)
12613             goto done;
12614          break;
12615 
12616       default:
12617          break;
12618       }
12619    }
12620 
12621    if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12622       ret = emit_hull_shader_patch_constant_phase(emit, &parse);
12623    }
12624 
12625 done:
12626    tgsi_parse_free(&parse);
12627    return ret;
12628 }
12629 
12630 
12631 /**
12632  * Emit the first VGPU10 shader tokens.
12633  */
12634 static bool
emit_vgpu10_header(struct svga_shader_emitter_v10 * emit)12635 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
12636 {
12637    VGPU10ProgramToken ptoken;
12638 
12639    /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
12640 
12641    /* Maximum supported shader version is 50 */
12642    unsigned version = MIN2(emit->version, 50);
12643 
12644    ptoken.value = 0; /* init whole token to zero */
12645    ptoken.majorVersion = version / 10;
12646    ptoken.minorVersion = version % 10;
12647    ptoken.programType = translate_shader_type(emit->unit);
12648    if (!emit_dword(emit, ptoken.value))
12649       return false;
12650 
12651    /* Second token: total length of shader, in tokens.  We can't fill this
12652     * in until we're all done.  Emit zero for now.
12653     */
12654    if (!emit_dword(emit, 0))
12655       return false;
12656 
12657    if (emit->version >= 50) {
12658       VGPU10OpcodeToken0 token;
12659 
12660       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12661          /* For hull shader, we need to start the declarations phase first before
12662           * emitting any declarations including the global flags.
12663           */
12664          token.value = 0;
12665          token.opcodeType = VGPU10_OPCODE_HS_DECLS;
12666          begin_emit_instruction(emit);
12667          emit_dword(emit, token.value);
12668          end_emit_instruction(emit);
12669       }
12670 
12671       /* Emit global flags */
12672       token.value = 0;    /* init whole token to zero */
12673       token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12674       token.enableDoublePrecisionFloatOps = 1;  /* set bit */
12675       token.instructionLength = 1;
12676       if (!emit_dword(emit, token.value))
12677          return false;
12678    }
12679 
12680    if (emit->version >= 40) {
12681       VGPU10OpcodeToken0 token;
12682 
12683       /* Reserved for global flag such as refactoringAllowed.
12684        * If the shader does not use the precise qualifier, we will set the
12685        * refactoringAllowed global flag; otherwise, we will leave the reserved
12686        * token to NOP.
12687        */
12688       emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
12689       token.value = 0;
12690       token.opcodeType = VGPU10_OPCODE_NOP;
12691       token.instructionLength = 1;
12692       if (!emit_dword(emit, token.value))
12693          return false;
12694    }
12695 
12696    return true;
12697 }
12698 
12699 
12700 static bool
emit_vgpu10_tail(struct svga_shader_emitter_v10 * emit)12701 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
12702 {
12703    VGPU10ProgramToken *tokens;
12704 
12705    /* Replace the second token with total shader length */
12706    tokens = (VGPU10ProgramToken *) emit->buf;
12707    tokens[1].value = emit_get_num_tokens(emit);
12708 
12709    if (emit->version >= 40 && !emit->uses_precise_qualifier) {
12710       /* Replace the reserved token with the RefactoringAllowed global flag */
12711       VGPU10OpcodeToken0 *ptoken;
12712 
12713       ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
12714       assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
12715       ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12716       ptoken->refactoringAllowed = 1;
12717    }
12718 
12719    if (emit->version >= 50 && emit->fs.forceEarlyDepthStencil) {
12720       /* Replace the reserved token with the forceEarlyDepthStencil  global flag */
12721       VGPU10OpcodeToken0 *ptoken;
12722 
12723       ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
12724       ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12725       ptoken->forceEarlyDepthStencil = 1;
12726    }
12727 
12728    return true;
12729 }
12730 
12731 
12732 /**
12733  * Modify the FS to read the BCOLORs and use the FACE register
12734  * to choose between the front/back colors.
12735  */
12736 static const struct tgsi_token *
transform_fs_twoside(const struct tgsi_token * tokens)12737 transform_fs_twoside(const struct tgsi_token *tokens)
12738 {
12739    if (0) {
12740       debug_printf("Before tgsi_add_two_side ------------------\n");
12741       tgsi_dump(tokens,0);
12742    }
12743    tokens = tgsi_add_two_side(tokens);
12744    if (0) {
12745       debug_printf("After tgsi_add_two_side ------------------\n");
12746       tgsi_dump(tokens, 0);
12747    }
12748    return tokens;
12749 }
12750 
12751 
12752 /**
12753  * Modify the FS to do polygon stipple.
12754  */
12755 static const struct tgsi_token *
transform_fs_pstipple(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)12756 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
12757                       const struct tgsi_token *tokens)
12758 {
12759    const struct tgsi_token *new_tokens;
12760    unsigned unit;
12761 
12762    if (0) {
12763       debug_printf("Before pstipple ------------------\n");
12764       tgsi_dump(tokens,0);
12765    }
12766 
12767    new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
12768                                                      TGSI_FILE_INPUT);
12769 
12770    emit->fs.pstipple_sampler_unit = unit;
12771 
12772    /* The new sampler state is appended to the end of the samplers list */
12773    emit->fs.pstipple_sampler_state_index = emit->key.num_samplers++;
12774 
12775    /* Setup texture state for stipple */
12776    emit->sampler_target[unit] = TGSI_TEXTURE_2D;
12777    emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
12778    emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
12779    emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
12780    emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
12781    emit->key.tex[unit].target = PIPE_TEXTURE_2D;
12782    emit->key.tex[unit].sampler_index = emit->fs.pstipple_sampler_state_index;
12783 
12784    if (0) {
12785       debug_printf("After pstipple ------------------\n");
12786       tgsi_dump(new_tokens, 0);
12787    }
12788 
12789    return new_tokens;
12790 }
12791 
12792 /**
12793  * Modify the FS to support anti-aliasing point.
12794  */
12795 static const struct tgsi_token *
transform_fs_aapoint(struct svga_context * svga,const struct tgsi_token * tokens,int aa_coord_index)12796 transform_fs_aapoint(struct svga_context *svga,
12797 		     const struct tgsi_token *tokens,
12798                      int aa_coord_index)
12799 {
12800    bool need_texcoord_semantic =
12801       svga->pipe.screen->get_param(svga->pipe.screen, PIPE_CAP_TGSI_TEXCOORD);
12802 
12803    if (0) {
12804       debug_printf("Before tgsi_add_aa_point ------------------\n");
12805       tgsi_dump(tokens,0);
12806    }
12807    tokens = tgsi_add_aa_point(tokens, aa_coord_index, need_texcoord_semantic);
12808    if (0) {
12809       debug_printf("After tgsi_add_aa_point ------------------\n");
12810       tgsi_dump(tokens, 0);
12811    }
12812    return tokens;
12813 }
12814 
12815 
12816 /**
12817  * A helper function to determine the shader in the previous stage and
12818  * then call the linker function to determine the input mapping for this
12819  * shader to match the output indices from the shader in the previous stage.
12820  */
12821 static void
compute_input_mapping(struct svga_context * svga,struct svga_shader_emitter_v10 * emit,enum pipe_shader_type unit)12822 compute_input_mapping(struct svga_context *svga,
12823                       struct svga_shader_emitter_v10 *emit,
12824                       enum pipe_shader_type unit)
12825 {
12826    struct svga_shader *prevShader = NULL;   /* shader in the previous stage */
12827 
12828    if (unit == PIPE_SHADER_FRAGMENT) {
12829       prevShader = svga->curr.gs ?
12830          &svga->curr.gs->base : (svga->curr.tes ?
12831          &svga->curr.tes->base : &svga->curr.vs->base);
12832    } else if (unit == PIPE_SHADER_GEOMETRY) {
12833       prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
12834    } else if (unit == PIPE_SHADER_TESS_EVAL) {
12835       assert(svga->curr.tcs);
12836       prevShader = &svga->curr.tcs->base;
12837    } else if (unit == PIPE_SHADER_TESS_CTRL) {
12838       assert(svga->curr.vs);
12839       prevShader = &svga->curr.vs->base;
12840    }
12841 
12842    if (prevShader != NULL) {
12843       svga_link_shaders(&prevShader->tgsi_info, &emit->info, &emit->linkage);
12844       emit->prevShaderInfo = &prevShader->tgsi_info;
12845    }
12846    else {
12847       /**
12848        * Since vertex shader does not need to go through the linker to
12849        * establish the input map, we need to make sure the highest index
12850        * of input registers is set properly here.
12851        */
12852       emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
12853                                          emit->info.file_max[TGSI_FILE_INPUT]);
12854    }
12855 }
12856 
12857 
12858 /**
12859  * Copies the shader signature info to the shader variant
12860  */
12861 static void
copy_shader_signature(struct svga_shader_signature * sgn,struct svga_shader_variant * variant)12862 copy_shader_signature(struct svga_shader_signature *sgn,
12863                       struct svga_shader_variant *variant)
12864 {
12865    SVGA3dDXShaderSignatureHeader *header = &sgn->header;
12866 
12867    /* Calculate the signature length */
12868    variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
12869                            (header->numInputSignatures +
12870                             header->numOutputSignatures +
12871                             header->numPatchConstantSignatures) *
12872                            sizeof(SVGA3dDXShaderSignatureEntry);
12873 
12874    /* Allocate buffer for the signature info */
12875    variant->signature =
12876       (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
12877 
12878    char *sgnBuf = (char *)variant->signature;
12879    unsigned sgnLen;
12880 
12881    /* Copy the signature info to the shader variant structure */
12882    memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
12883    sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
12884 
12885    if (header->numInputSignatures) {
12886       sgnLen =
12887          header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12888       memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
12889       sgnBuf += sgnLen;
12890    }
12891 
12892    if (header->numOutputSignatures) {
12893       sgnLen =
12894          header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12895       memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
12896       sgnBuf += sgnLen;
12897    }
12898 
12899    if (header->numPatchConstantSignatures) {
12900       sgnLen =
12901          header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12902       memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
12903    }
12904 }
12905 
12906 
12907 /**
12908  * This is the main entrypoint for the TGSI -> VPGU10 translator.
12909  */
12910 struct svga_shader_variant *
svga_tgsi_vgpu10_translate(struct svga_context * svga,const struct svga_shader * shader,const struct svga_compile_key * key,enum pipe_shader_type unit)12911 svga_tgsi_vgpu10_translate(struct svga_context *svga,
12912                            const struct svga_shader *shader,
12913                            const struct svga_compile_key *key,
12914                            enum pipe_shader_type unit)
12915 {
12916    struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
12917    struct svga_shader_variant *variant = NULL;
12918    struct svga_shader_emitter_v10 *emit;
12919    const struct tgsi_token *tokens = shader->tokens;
12920 
12921    (void) make_immediate_reg_double;   /* unused at this time */
12922 
12923    assert(unit == PIPE_SHADER_VERTEX ||
12924           unit == PIPE_SHADER_GEOMETRY ||
12925           unit == PIPE_SHADER_FRAGMENT ||
12926           unit == PIPE_SHADER_TESS_CTRL ||
12927           unit == PIPE_SHADER_TESS_EVAL ||
12928           unit == PIPE_SHADER_COMPUTE);
12929 
12930    /* These two flags cannot be used together */
12931    assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
12932 
12933    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
12934    /*
12935     * Setup the code emitter
12936     */
12937    emit = alloc_emitter();
12938    if (!emit)
12939       goto done;
12940 
12941    emit->unit = unit;
12942    if (svga_have_gl43(svga)) {
12943       emit->version = 51;
12944    } else if (svga_have_sm5(svga)) {
12945       emit->version = 50;
12946    } else if (svga_have_sm4_1(svga)) {
12947       emit->version = 41;
12948    } else {
12949       emit->version = 40;
12950    }
12951 
12952    emit->use_sampler_state_mapping = emit->key.sampler_state_mapping;
12953 
12954    emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
12955 
12956    emit->key = *key;
12957 
12958    emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
12959                                     emit->key.gs.need_prescale ||
12960                                     emit->key.tes.need_prescale);
12961 
12962    /* Determine how many prescale factors in the constant buffer */
12963    emit->vposition.num_prescale = 1;
12964    if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
12965       assert(emit->unit == PIPE_SHADER_GEOMETRY);
12966       emit->vposition.num_prescale = emit->key.gs.num_prescale;
12967    }
12968 
12969    emit->vposition.tmp_index = INVALID_INDEX;
12970    emit->vposition.so_index = INVALID_INDEX;
12971    emit->vposition.out_index = INVALID_INDEX;
12972 
12973    emit->vs.vertex_id_sys_index = INVALID_INDEX;
12974    emit->vs.vertex_id_tmp_index = INVALID_INDEX;
12975    emit->vs.vertex_id_bias_index = INVALID_INDEX;
12976 
12977    emit->fs.color_tmp_index = INVALID_INDEX;
12978    emit->fs.face_input_index = INVALID_INDEX;
12979    emit->fs.fragcoord_input_index = INVALID_INDEX;
12980    emit->fs.sample_id_sys_index = INVALID_INDEX;
12981    emit->fs.sample_pos_sys_index = INVALID_INDEX;
12982    emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
12983    emit->fs.layer_input_index = INVALID_INDEX;
12984    emit->fs.layer_imm_index = INVALID_INDEX;
12985 
12986    emit->gs.prim_id_index = INVALID_INDEX;
12987    emit->gs.invocation_id_sys_index = INVALID_INDEX;
12988    emit->gs.viewport_index_out_index = INVALID_INDEX;
12989    emit->gs.viewport_index_tmp_index = INVALID_INDEX;
12990 
12991    emit->tcs.vertices_per_patch_index = INVALID_INDEX;
12992    emit->tcs.invocation_id_sys_index = INVALID_INDEX;
12993    emit->tcs.control_point_input_index = INVALID_INDEX;
12994    emit->tcs.control_point_addr_index = INVALID_INDEX;
12995    emit->tcs.control_point_out_index = INVALID_INDEX;
12996    emit->tcs.control_point_tmp_index = INVALID_INDEX;
12997    emit->tcs.control_point_out_count = 0;
12998    emit->tcs.inner.out_index = INVALID_INDEX;
12999    emit->tcs.inner.temp_index = INVALID_INDEX;
13000    emit->tcs.inner.tgsi_index = INVALID_INDEX;
13001    emit->tcs.outer.out_index = INVALID_INDEX;
13002    emit->tcs.outer.temp_index = INVALID_INDEX;
13003    emit->tcs.outer.tgsi_index = INVALID_INDEX;
13004    emit->tcs.patch_generic_out_count = 0;
13005    emit->tcs.patch_generic_out_index = INVALID_INDEX;
13006    emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
13007    emit->tcs.prim_id_index = INVALID_INDEX;
13008 
13009    emit->tes.tesscoord_sys_index = INVALID_INDEX;
13010    emit->tes.inner.in_index = INVALID_INDEX;
13011    emit->tes.inner.temp_index = INVALID_INDEX;
13012    emit->tes.inner.tgsi_index = INVALID_INDEX;
13013    emit->tes.outer.in_index = INVALID_INDEX;
13014    emit->tes.outer.temp_index = INVALID_INDEX;
13015    emit->tes.outer.tgsi_index = INVALID_INDEX;
13016    emit->tes.prim_id_index = INVALID_INDEX;
13017 
13018    emit->cs.thread_id_index = INVALID_INDEX;
13019    emit->cs.block_id_index = INVALID_INDEX;
13020    emit->cs.grid_size.tgsi_index = INVALID_INDEX;
13021    emit->cs.grid_size.imm_index = INVALID_INDEX;
13022    emit->cs.block_width = 1;
13023    emit->cs.block_height = 1;
13024    emit->cs.block_depth = 1;
13025 
13026    emit->clip_dist_out_index = INVALID_INDEX;
13027    emit->clip_dist_tmp_index = INVALID_INDEX;
13028    emit->clip_dist_so_index = INVALID_INDEX;
13029    emit->clip_vertex_out_index = INVALID_INDEX;
13030    emit->clip_vertex_tmp_index = INVALID_INDEX;
13031    emit->svga_debug_callback = svga->debug.callback;
13032 
13033    emit->index_range.start_index = INVALID_INDEX;
13034    emit->index_range.count = 0;
13035    emit->index_range.required = false;
13036    emit->index_range.operandType = VGPU10_NUM_OPERANDS;
13037    emit->index_range.dim = 0;
13038    emit->index_range.size = 0;
13039 
13040    emit->current_loop_depth = 0;
13041 
13042    emit->initialize_temp_index = INVALID_INDEX;
13043    emit->image_size_index = INVALID_INDEX;
13044 
13045    emit->max_vs_inputs  = svgascreen->max_vs_inputs;
13046    emit->max_vs_outputs = svgascreen->max_vs_outputs;
13047    emit->max_gs_inputs  = svgascreen->max_gs_inputs;
13048 
13049    if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
13050       emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
13051    }
13052 
13053    if (unit == PIPE_SHADER_FRAGMENT) {
13054       if (key->fs.light_twoside) {
13055          tokens = transform_fs_twoside(tokens);
13056       }
13057       if (key->fs.pstipple) {
13058          const struct tgsi_token *new_tokens =
13059             transform_fs_pstipple(emit, tokens);
13060          if (tokens != shader->tokens) {
13061             /* free the two-sided shader tokens */
13062             tgsi_free_tokens(tokens);
13063          }
13064          tokens = new_tokens;
13065       }
13066       if (key->fs.aa_point) {
13067          tokens = transform_fs_aapoint(svga, tokens,
13068 			               key->fs.aa_point_coord_index);
13069       }
13070    }
13071 
13072    if (SVGA_DEBUG & DEBUG_TGSI) {
13073       debug_printf("#####################################\n");
13074       debug_printf("### TGSI Shader %u\n", shader->id);
13075       tgsi_dump(tokens, 0);
13076    }
13077 
13078    /**
13079     * Rescan the header if the token string is different from the one
13080     * included in the shader; otherwise, the header info is already up-to-date
13081     */
13082    if (tokens != shader->tokens) {
13083       tgsi_scan_shader(tokens, &emit->info);
13084    } else {
13085       emit->info = shader->tgsi_info;
13086    }
13087 
13088    emit->num_outputs = emit->info.num_outputs;
13089 
13090    /**
13091     * Compute input mapping to match the outputs from shader
13092     * in the previous stage
13093     */
13094    compute_input_mapping(svga, emit, unit);
13095 
13096    determine_clipping_mode(emit);
13097 
13098    if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
13099        unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
13100       if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
13101          /* if there is stream output declarations associated
13102           * with this shader or the shader writes to ClipDistance
13103           * then reserve extra registers for the non-adjusted vertex position
13104           * and the ClipDistance shadow copy.
13105           */
13106          emit->vposition.so_index = emit->num_outputs++;
13107 
13108          if (emit->clip_mode == CLIP_DISTANCE) {
13109             emit->clip_dist_so_index = emit->num_outputs++;
13110             if (emit->info.num_written_clipdistance > 4)
13111                emit->num_outputs++;
13112          }
13113       }
13114    }
13115 
13116    /* Determine if constbuf to rawbuf translation is needed */
13117    emit->raw_buf_srv_start_index = emit->key.srv_raw_constbuf_index;
13118    if (emit->info.const_buffers_declared)
13119       emit->raw_bufs = emit->key.raw_constbufs;
13120 
13121    emit->raw_shaderbuf_srv_start_index = emit->key.srv_raw_shaderbuf_index;
13122    if (emit->info.shader_buffers_declared)
13123       emit->raw_shaderbufs = emit->key.raw_shaderbufs;
13124 
13125    /*
13126     * Do actual shader translation.
13127     */
13128    if (!emit_vgpu10_header(emit)) {
13129       debug_printf("svga: emit VGPU10 header failed\n");
13130       goto cleanup;
13131    }
13132 
13133    if (!emit_vgpu10_instructions(emit, tokens)) {
13134       debug_printf("svga: emit VGPU10 instructions failed\n");
13135       goto cleanup;
13136    }
13137 
13138    if (emit->num_new_immediates > 0) {
13139       reemit_immediates_block(emit);
13140    }
13141 
13142    if (!emit_vgpu10_tail(emit)) {
13143       debug_printf("svga: emit VGPU10 tail failed\n");
13144       goto cleanup;
13145    }
13146 
13147    if (emit->register_overflow) {
13148       goto cleanup;
13149    }
13150 
13151    /*
13152     * Create, initialize the 'variant' object.
13153     */
13154    variant = svga_new_shader_variant(svga, unit);
13155    if (!variant)
13156       goto cleanup;
13157 
13158    variant->shader = shader;
13159    variant->nr_tokens = emit_get_num_tokens(emit);
13160    variant->tokens = (const unsigned *)emit->buf;
13161 
13162    /* Copy shader signature info to the shader variant */
13163    if (svga_have_sm5(svga)) {
13164       copy_shader_signature(&emit->signature, variant);
13165    }
13166 
13167    emit->buf = NULL;  /* buffer is no longer owed by emitter context */
13168    memcpy(&variant->key, key, sizeof(*key));
13169    variant->id = UTIL_BITMASK_INVALID_INDEX;
13170 
13171    /* The extra constant starting offset starts with the number of
13172     * shader constants declared in the shader.
13173     */
13174    variant->extra_const_start = emit->num_shader_consts[0];
13175    if (key->gs.wide_point) {
13176       /**
13177        * The extra constant added in the transformed shader
13178        * for inverse viewport scale is to be supplied by the driver.
13179        * So the extra constant starting offset needs to be reduced by 1.
13180        */
13181       assert(variant->extra_const_start > 0);
13182       variant->extra_const_start--;
13183    }
13184 
13185    if (unit == PIPE_SHADER_FRAGMENT) {
13186       struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
13187 
13188       fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
13189       fs_variant->pstipple_sampler_state_index =
13190          emit->fs.pstipple_sampler_state_index;
13191 
13192       /* If there was exactly one write to a fragment shader output register
13193        * and it came from a constant buffer, we know all fragments will have
13194        * the same color (except for blending).
13195        */
13196       fs_variant->constant_color_output =
13197          emit->constant_color_output && emit->num_output_writes == 1;
13198 
13199       /** keep track in the variant if flat interpolation is used
13200        *  for any of the varyings.
13201        */
13202       fs_variant->uses_flat_interp = emit->uses_flat_interp;
13203 
13204       fs_variant->fs_shadow_compare_units = emit->shadow_compare_units;
13205    }
13206    else if (unit == PIPE_SHADER_TESS_EVAL) {
13207       struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
13208 
13209       /* Keep track in the tes variant some of the layout parameters.
13210        * These parameters will be referenced by the tcs to emit
13211        * the necessary declarations for the hull shader.
13212        */
13213       tes_variant->prim_mode = emit->tes.prim_mode;
13214       tes_variant->spacing = emit->tes.spacing;
13215       tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
13216       tes_variant->point_mode = emit->tes.point_mode;
13217    }
13218 
13219 
13220    if (tokens != shader->tokens) {
13221       tgsi_free_tokens(tokens);
13222    }
13223 
13224 cleanup:
13225    free_emitter(emit);
13226 
13227 done:
13228    SVGA_STATS_TIME_POP(svga_sws(svga));
13229    return variant;
13230 }
13231