xref: /aosp_15_r20/external/mesa3d/src/gallium/frontends/nine/nine_shader.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2011 Joakim Sindholt <[email protected]>
3  * Copyright 2013 Christoph Bumiller
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "nine_shader.h"
8 
9 #include "device9.h"
10 #include "nine_debug.h"
11 #include "nine_state.h"
12 #include "vertexdeclaration9.h"
13 
14 #include "util/bitscan.h"
15 #include "util/macros.h"
16 #include "util/u_memory.h"
17 #include "util/u_inlines.h"
18 #include "pipe/p_shader_tokens.h"
19 #include "tgsi/tgsi_ureg.h"
20 #include "tgsi/tgsi_dump.h"
21 #include "nir/tgsi_to_nir.h"
22 
23 #define DBG_CHANNEL DBG_SHADER
24 
25 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
26 
27 
28 struct shader_translator;
29 
30 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
31 
32 static inline const char *d3dsio_to_string(unsigned opcode);
33 
34 
35 #define NINED3D_SM1_VS 0xfffe
36 #define NINED3D_SM1_PS 0xffff
37 
38 #define NINE_MAX_COND_DEPTH 64
39 #define NINE_MAX_LOOP_DEPTH 64
40 
41 #define NINED3DSP_END 0x0000ffff
42 
43 #define NINED3DSPTYPE_FLOAT4  0
44 #define NINED3DSPTYPE_INT4    1
45 #define NINED3DSPTYPE_BOOL    2
46 
47 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
48 
49 #define NINED3DSP_WRITEMASK_MASK  D3DSP_WRITEMASK_ALL
50 #define NINED3DSP_WRITEMASK_SHIFT 16
51 
52 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
53 
54 #define NINED3DSHADER_REL_OP_GT 1
55 #define NINED3DSHADER_REL_OP_EQ 2
56 #define NINED3DSHADER_REL_OP_GE 3
57 #define NINED3DSHADER_REL_OP_LT 4
58 #define NINED3DSHADER_REL_OP_NE 5
59 #define NINED3DSHADER_REL_OP_LE 6
60 
61 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
62 #define NINED3DSIO_OPCODE_FLAGS_MASK  (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
63 
64 #define NINED3DSI_TEXLD_PROJECT 0x1
65 #define NINED3DSI_TEXLD_BIAS    0x2
66 
67 #define NINED3DSP_WRITEMASK_0   0x1
68 #define NINED3DSP_WRITEMASK_1   0x2
69 #define NINED3DSP_WRITEMASK_2   0x4
70 #define NINED3DSP_WRITEMASK_3   0x8
71 #define NINED3DSP_WRITEMASK_ALL 0xf
72 
73 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
74 
75 #define NINE_SWIZZLE4(x,y,z,w) \
76    TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
77 
78 #define NINE_APPLY_SWIZZLE(src, s) \
79    ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
80 
81 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
82 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
83 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
84 
85 /*
86  * NEG     all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
87  * BIAS    <= PS 1.4 (x-0.5)
88  * BIASNEG <= PS 1.4 (-(x-0.5))
89  * SIGN    <= PS 1.4 (2(x-0.5))
90  * SIGNNEG <= PS 1.4 (-2(x-0.5))
91  * COMP    <= PS 1.4 (1-x)
92  * X2       = PS 1.4 (2x)
93  * X2NEG    = PS 1.4 (-2x)
94  * DZ      <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
95  * DW      <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
96  * ABS     >= SM 3.0 (abs(x))
97  * ABSNEG  >= SM 3.0 (-abs(x))
98  * NOT     >= SM 2.0 pedication only
99  */
100 #define NINED3DSPSM_NONE    (D3DSPSM_NONE    >> D3DSP_SRCMOD_SHIFT)
101 #define NINED3DSPSM_NEG     (D3DSPSM_NEG     >> D3DSP_SRCMOD_SHIFT)
102 #define NINED3DSPSM_BIAS    (D3DSPSM_BIAS    >> D3DSP_SRCMOD_SHIFT)
103 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
104 #define NINED3DSPSM_SIGN    (D3DSPSM_SIGN    >> D3DSP_SRCMOD_SHIFT)
105 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
106 #define NINED3DSPSM_COMP    (D3DSPSM_COMP    >> D3DSP_SRCMOD_SHIFT)
107 #define NINED3DSPSM_X2      (D3DSPSM_X2      >> D3DSP_SRCMOD_SHIFT)
108 #define NINED3DSPSM_X2NEG   (D3DSPSM_X2NEG   >> D3DSP_SRCMOD_SHIFT)
109 #define NINED3DSPSM_DZ      (D3DSPSM_DZ      >> D3DSP_SRCMOD_SHIFT)
110 #define NINED3DSPSM_DW      (D3DSPSM_DW      >> D3DSP_SRCMOD_SHIFT)
111 #define NINED3DSPSM_ABS     (D3DSPSM_ABS     >> D3DSP_SRCMOD_SHIFT)
112 #define NINED3DSPSM_ABSNEG  (D3DSPSM_ABSNEG  >> D3DSP_SRCMOD_SHIFT)
113 #define NINED3DSPSM_NOT     (D3DSPSM_NOT     >> D3DSP_SRCMOD_SHIFT)
114 
115 static const char *sm1_mod_str[] =
116 {
117     [NINED3DSPSM_NONE] = "",
118     [NINED3DSPSM_NEG] = "-",
119     [NINED3DSPSM_BIAS] = "bias",
120     [NINED3DSPSM_BIASNEG] = "biasneg",
121     [NINED3DSPSM_SIGN] = "sign",
122     [NINED3DSPSM_SIGNNEG] = "signneg",
123     [NINED3DSPSM_COMP] = "comp",
124     [NINED3DSPSM_X2] = "x2",
125     [NINED3DSPSM_X2NEG] = "x2neg",
126     [NINED3DSPSM_DZ] = "dz",
127     [NINED3DSPSM_DW] = "dw",
128     [NINED3DSPSM_ABS] = "abs",
129     [NINED3DSPSM_ABSNEG] = "-abs",
130     [NINED3DSPSM_NOT] = "not"
131 };
132 
133 static void
sm1_dump_writemask(BYTE mask)134 sm1_dump_writemask(BYTE mask)
135 {
136     if (mask & 1) DUMP("x"); else DUMP("_");
137     if (mask & 2) DUMP("y"); else DUMP("_");
138     if (mask & 4) DUMP("z"); else DUMP("_");
139     if (mask & 8) DUMP("w"); else DUMP("_");
140 }
141 
142 static void
sm1_dump_swizzle(BYTE s)143 sm1_dump_swizzle(BYTE s)
144 {
145     char c[4] = { 'x', 'y', 'z', 'w' };
146     DUMP("%c%c%c%c",
147          c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
148 }
149 
150 static const char sm1_file_char[] =
151 {
152     [D3DSPR_TEMP] = 'r',
153     [D3DSPR_INPUT] = 'v',
154     [D3DSPR_CONST] = 'c',
155     [D3DSPR_ADDR] = 'A',
156     [D3DSPR_RASTOUT] = 'R',
157     [D3DSPR_ATTROUT] = 'D',
158     [D3DSPR_OUTPUT] = 'o',
159     [D3DSPR_CONSTINT] = 'I',
160     [D3DSPR_COLOROUT] = 'C',
161     [D3DSPR_DEPTHOUT] = 'D',
162     [D3DSPR_SAMPLER] = 's',
163     [D3DSPR_CONST2] = 'c',
164     [D3DSPR_CONST3] = 'c',
165     [D3DSPR_CONST4] = 'c',
166     [D3DSPR_CONSTBOOL] = 'B',
167     [D3DSPR_LOOP] = 'L',
168     [D3DSPR_TEMPFLOAT16] = 'h',
169     [D3DSPR_MISCTYPE] = 'M',
170     [D3DSPR_LABEL] = 'X',
171     [D3DSPR_PREDICATE] = 'p'
172 };
173 
174 static void
sm1_dump_reg(BYTE file,INT index)175 sm1_dump_reg(BYTE file, INT index)
176 {
177     switch (file) {
178     case D3DSPR_LOOP:
179         DUMP("aL");
180         break;
181     case D3DSPR_COLOROUT:
182         DUMP("oC%i", index);
183         break;
184     case D3DSPR_DEPTHOUT:
185         DUMP("oDepth");
186         break;
187     case D3DSPR_RASTOUT:
188         DUMP("oRast%i", index);
189         break;
190     case D3DSPR_CONSTINT:
191         DUMP("iconst[%i]", index);
192         break;
193     case D3DSPR_CONSTBOOL:
194         DUMP("bconst[%i]", index);
195         break;
196     default:
197         DUMP("%c%i", sm1_file_char[file], index);
198         break;
199     }
200 }
201 
202 struct sm1_src_param
203 {
204     INT idx;
205     struct sm1_src_param *rel;
206     BYTE file;
207     BYTE swizzle;
208     BYTE mod;
209     BYTE type;
210     union {
211         DWORD d[4];
212         float f[4];
213         int i[4];
214         BOOL b;
215     } imm;
216 };
217 static void
218 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
219 
220 struct sm1_dst_param
221 {
222     INT idx;
223     struct sm1_src_param *rel;
224     BYTE file;
225     BYTE mask;
226     BYTE mod;
227     int8_t shift; /* sint4 */
228     BYTE type;
229 };
230 
231 static inline void
assert_replicate_swizzle(const struct ureg_src * reg)232 assert_replicate_swizzle(const struct ureg_src *reg)
233 {
234     assert(reg->SwizzleY == reg->SwizzleX &&
235            reg->SwizzleZ == reg->SwizzleX &&
236            reg->SwizzleW == reg->SwizzleX);
237 }
238 
239 static void
sm1_dump_immediate(const struct sm1_src_param * param)240 sm1_dump_immediate(const struct sm1_src_param *param)
241 {
242     switch (param->type) {
243     case NINED3DSPTYPE_FLOAT4:
244         DUMP("{ %f %f %f %f }",
245              param->imm.f[0], param->imm.f[1],
246              param->imm.f[2], param->imm.f[3]);
247         break;
248     case NINED3DSPTYPE_INT4:
249         DUMP("{ %i %i %i %i }",
250              param->imm.i[0], param->imm.i[1],
251              param->imm.i[2], param->imm.i[3]);
252         break;
253     case NINED3DSPTYPE_BOOL:
254         DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
255         break;
256     default:
257         assert(0);
258         break;
259     }
260 }
261 
262 static void
sm1_dump_src_param(const struct sm1_src_param * param)263 sm1_dump_src_param(const struct sm1_src_param *param)
264 {
265     if (param->file == NINED3DSPR_IMMEDIATE) {
266         assert(!param->mod &&
267                !param->rel &&
268                param->swizzle == NINED3DSP_NOSWIZZLE);
269         sm1_dump_immediate(param);
270         return;
271     }
272 
273     if (param->mod)
274         DUMP("%s(", sm1_mod_str[param->mod]);
275     if (param->rel) {
276         DUMP("%c[", sm1_file_char[param->file]);
277         sm1_dump_src_param(param->rel);
278         DUMP("+%i]", param->idx);
279     } else {
280         sm1_dump_reg(param->file, param->idx);
281     }
282     if (param->mod)
283        DUMP(")");
284     if (param->swizzle != NINED3DSP_NOSWIZZLE) {
285        DUMP(".");
286        sm1_dump_swizzle(param->swizzle);
287     }
288 }
289 
290 static void
sm1_dump_dst_param(const struct sm1_dst_param * param)291 sm1_dump_dst_param(const struct sm1_dst_param *param)
292 {
293    if (param->mod & NINED3DSPDM_SATURATE)
294       DUMP("sat ");
295    if (param->mod & NINED3DSPDM_PARTIALP)
296       DUMP("pp ");
297    if (param->mod & NINED3DSPDM_CENTROID)
298       DUMP("centroid ");
299    if (param->shift < 0)
300       DUMP("/%u ", 1 << -param->shift);
301    if (param->shift > 0)
302       DUMP("*%u ", 1 << param->shift);
303 
304    if (param->rel) {
305       DUMP("%c[", sm1_file_char[param->file]);
306       sm1_dump_src_param(param->rel);
307       DUMP("+%i]", param->idx);
308    } else {
309       sm1_dump_reg(param->file, param->idx);
310    }
311    if (param->mask != NINED3DSP_WRITEMASK_ALL) {
312       DUMP(".");
313       sm1_dump_writemask(param->mask);
314    }
315 }
316 
317 struct sm1_semantic
318 {
319    struct sm1_dst_param reg;
320    BYTE sampler_type;
321    D3DDECLUSAGE usage;
322    BYTE usage_idx;
323 };
324 
325 struct sm1_op_info
326 {
327     /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
328      * should be ignored completely */
329     unsigned sio;
330     unsigned opcode; /* TGSI_OPCODE_x */
331 
332     /* versions are still set even handler is set */
333     struct {
334         unsigned min;
335         unsigned max;
336     } vert_version, frag_version;
337 
338     /* number of regs parsed outside of special handler */
339     unsigned ndst;
340     unsigned nsrc;
341 
342     /* some instructions don't map perfectly, so use a special handler */
343     translate_instruction_func handler;
344 };
345 
346 struct sm1_instruction
347 {
348     D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
349     BYTE flags;
350     BOOL coissue;
351     BOOL predicated;
352     BYTE ndst;
353     BYTE nsrc;
354     struct sm1_src_param src[4];
355     struct sm1_src_param src_rel[4];
356     struct sm1_src_param pred;
357     struct sm1_src_param dst_rel[1];
358     struct sm1_dst_param dst[1];
359 
360     const struct sm1_op_info *info;
361 };
362 
363 static void
sm1_dump_instruction(struct sm1_instruction * insn,unsigned indent)364 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
365 {
366     unsigned i;
367 
368     /* no info stored for these: */
369     if (insn->opcode == D3DSIO_DCL)
370         return;
371     for (i = 0; i < indent; ++i)
372         DUMP("  ");
373 
374     if (insn->predicated) {
375         DUMP("@");
376         sm1_dump_src_param(&insn->pred);
377         DUMP(" ");
378     }
379     DUMP("%s", d3dsio_to_string(insn->opcode));
380     if (insn->flags) {
381         switch (insn->opcode) {
382         case D3DSIO_TEX:
383             DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
384             break;
385         default:
386             DUMP("_%x", insn->flags);
387             break;
388         }
389     }
390     if (insn->coissue)
391         DUMP("_co");
392     DUMP(" ");
393 
394     for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
395         sm1_dump_dst_param(&insn->dst[i]);
396         DUMP(" ");
397     }
398 
399     for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
400         sm1_dump_src_param(&insn->src[i]);
401         DUMP(" ");
402     }
403     if (insn->opcode == D3DSIO_DEF ||
404         insn->opcode == D3DSIO_DEFI ||
405         insn->opcode == D3DSIO_DEFB)
406         sm1_dump_immediate(&insn->src[0]);
407 
408     DUMP("\n");
409 }
410 
411 struct sm1_local_const
412 {
413     INT idx;
414     struct ureg_src reg;
415     float f[4]; /* for indirect addressing of float constants */
416 };
417 
418 struct shader_translator
419 {
420     const DWORD *byte_code;
421     const DWORD *parse;
422     const DWORD *parse_next;
423 
424     struct ureg_program *ureg;
425 
426     /* shader version */
427     struct {
428         BYTE major;
429         BYTE minor;
430     } version;
431     unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
432     unsigned num_constf_allowed;
433     unsigned num_consti_allowed;
434     unsigned num_constb_allowed;
435 
436     bool native_integers;
437     bool inline_subroutines;
438     bool want_texcoord;
439     bool shift_wpos;
440     bool wpos_is_sysval;
441     bool face_is_sysval_integer;
442     bool mul_zero_wins;
443     bool always_output_pointsize;
444     bool no_vs_window_space;
445     unsigned texcoord_sn;
446 
447     struct sm1_instruction insn; /* current instruction */
448 
449     struct {
450         struct ureg_dst *r;
451         struct ureg_dst oPos;
452         struct ureg_dst oPos_out; /* the real output when doing streamout or clipplane emulation */
453         struct ureg_dst oFog;
454         struct ureg_dst oPts;
455         struct ureg_dst oCol[4];
456         struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
457         struct ureg_dst oDepth;
458         struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
459         struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
460         struct ureg_src vPos;
461         struct ureg_src vFace;
462         struct ureg_src s;
463         struct ureg_dst p;
464         struct ureg_dst address;
465         struct ureg_dst a0;
466         struct ureg_dst predicate;
467         struct ureg_dst predicate_tmp;
468         struct ureg_dst predicate_dst;
469         struct ureg_dst tS[8]; /* texture stage registers */
470         struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
471         struct ureg_dst t[8]; /* scratch TEMPs */
472         struct ureg_src vC[2]; /* PS color in */
473         struct ureg_src vT[8]; /* PS texcoord in */
474         struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop/rep ctr */
475         struct ureg_dst aL[NINE_MAX_LOOP_DEPTH]; /* aL emulation */
476     } regs;
477     unsigned num_temp; /* ARRAY_SIZE(regs.r) */
478     unsigned num_scratch;
479     unsigned loop_depth;
480     unsigned loop_depth_max;
481     unsigned cond_depth;
482     unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
483     unsigned cond_labels[NINE_MAX_COND_DEPTH];
484     bool loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
485     bool predicated_activated;
486 
487     unsigned *inst_labels; /* LABEL op */
488     unsigned num_inst_labels;
489 
490     unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
491 
492     struct sm1_local_const *lconstf;
493     unsigned num_lconstf;
494     struct sm1_local_const *lconsti;
495     unsigned num_lconsti;
496     struct sm1_local_const *lconstb;
497     unsigned num_lconstb;
498 
499     bool slots_used[NINE_MAX_CONST_ALL_VS];
500     unsigned *slot_map;
501     unsigned num_slots;
502 
503     bool indirect_const_access;
504     bool failure;
505 
506     struct nine_vs_output_info output_info[16];
507     int num_outputs;
508 
509     struct nine_shader_info *info;
510 
511     int16_t op_info_map[D3DSIO_BREAKP + 1];
512 };
513 
514 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
515 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
516 
517 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
518 
519 static void
520 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
521 
522 static void
sm1_instruction_check(const struct sm1_instruction * insn)523 sm1_instruction_check(const struct sm1_instruction *insn)
524 {
525     if (insn->opcode == D3DSIO_CRS)
526     {
527         if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
528         {
529             DBG("CRS.mask.w\n");
530         }
531     }
532 }
533 
534 static void
nine_record_outputs(struct shader_translator * tx,BYTE Usage,BYTE UsageIndex,int mask,int output_index)535 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
536                     int mask, int output_index)
537 {
538     tx->output_info[tx->num_outputs].output_semantic = Usage;
539     tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
540     tx->output_info[tx->num_outputs].mask = mask;
541     tx->output_info[tx->num_outputs].output_index = output_index;
542     tx->num_outputs++;
543 }
544 
nine_float_constant_src(struct shader_translator * tx,int idx)545 static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
546 {
547     struct ureg_src src;
548 
549     if (tx->slot_map)
550         idx = tx->slot_map[idx];
551     /* vswp constant handling: we use two buffers
552      * to fit all the float constants. The special handling
553      * doesn't need to be elsewhere, because all the instructions
554      * accessing the constants directly are VS1, and swvp
555      * is VS >= 2 */
556     if (tx->info->swvp_on && idx >= 4096) {
557         /* TODO: swvp rel is broken if many constants are used */
558         src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096);
559         src = ureg_src_dimension(src, 1);
560     } else {
561         src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
562         src = ureg_src_dimension(src, 0);
563     }
564 
565     if (!tx->info->swvp_on)
566         tx->slots_used[idx] = true;
567     if (tx->info->const_float_slots < (idx + 1))
568         tx->info->const_float_slots = idx + 1;
569     if (tx->num_slots < (idx + 1))
570         tx->num_slots = idx + 1;
571 
572     return src;
573 }
574 
nine_integer_constant_src(struct shader_translator * tx,int idx)575 static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx)
576 {
577     struct ureg_src src;
578 
579     if (tx->info->swvp_on) {
580         src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
581         src = ureg_src_dimension(src, 2);
582     } else {
583         unsigned slot_idx = tx->info->const_i_base + idx;
584         if (tx->slot_map)
585             slot_idx = tx->slot_map[slot_idx];
586         src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
587         src = ureg_src_dimension(src, 0);
588         tx->slots_used[slot_idx] = true;
589         tx->info->int_slots_used[idx] = true;
590         if (tx->num_slots < (slot_idx + 1))
591             tx->num_slots = slot_idx + 1;
592     }
593 
594     if (tx->info->const_int_slots < (idx + 1))
595         tx->info->const_int_slots = idx + 1;
596 
597     return src;
598 }
599 
nine_boolean_constant_src(struct shader_translator * tx,int idx)600 static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx)
601 {
602     struct ureg_src src;
603 
604     char r = idx / 4;
605     char s = idx & 3;
606 
607     if (tx->info->swvp_on) {
608         src = ureg_src_register(TGSI_FILE_CONSTANT, r);
609         src = ureg_src_dimension(src, 3);
610     } else {
611         unsigned slot_idx = tx->info->const_b_base + r;
612         if (tx->slot_map)
613             slot_idx = tx->slot_map[slot_idx];
614         src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
615         src = ureg_src_dimension(src, 0);
616         tx->slots_used[slot_idx] = true;
617         tx->info->bool_slots_used[idx] = true;
618         if (tx->num_slots < (slot_idx + 1))
619             tx->num_slots = slot_idx + 1;
620     }
621     src = ureg_swizzle(src, s, s, s, s);
622 
623     if (tx->info->const_bool_slots < (idx + 1))
624         tx->info->const_bool_slots = idx + 1;
625 
626     return src;
627 }
628 
nine_special_constant_src(struct shader_translator * tx,int idx)629 static struct ureg_src nine_special_constant_src(struct shader_translator *tx, int idx)
630 {
631     struct ureg_src src;
632 
633     unsigned slot_idx = idx + (IS_PS ? NINE_MAX_CONST_PS_SPE_OFFSET :
634         (tx->info->swvp_on ? NINE_MAX_CONST_SWVP_SPE_OFFSET : NINE_MAX_CONST_VS_SPE_OFFSET));
635 
636     if (!tx->info->swvp_on && tx->slot_map)
637         slot_idx = tx->slot_map[slot_idx];
638     src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
639     src = ureg_src_dimension(src, 0);
640 
641     if (!tx->info->swvp_on)
642         tx->slots_used[slot_idx] = true;
643     if (tx->num_slots < (slot_idx + 1))
644         tx->num_slots = slot_idx + 1;
645 
646     return src;
647 }
648 
649 static bool
tx_lconstf(struct shader_translator * tx,struct ureg_src * src,INT index)650 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
651 {
652    INT i;
653 
654    if (index < 0 || index >= tx->num_constf_allowed) {
655        tx->failure = true;
656        return false;
657    }
658    for (i = 0; i < tx->num_lconstf; ++i) {
659       if (tx->lconstf[i].idx == index) {
660          *src = tx->lconstf[i].reg;
661          return true;
662       }
663    }
664    return false;
665 }
666 static bool
tx_lconsti(struct shader_translator * tx,struct ureg_src * src,INT index)667 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
668 {
669    int i;
670 
671    if (index < 0 || index >= tx->num_consti_allowed) {
672        tx->failure = true;
673        return false;
674    }
675    for (i = 0; i < tx->num_lconsti; ++i) {
676       if (tx->lconsti[i].idx == index) {
677          *src = tx->lconsti[i].reg;
678          return true;
679       }
680    }
681    return false;
682 }
683 static bool
tx_lconstb(struct shader_translator * tx,struct ureg_src * src,INT index)684 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
685 {
686    int i;
687 
688    if (index < 0 || index >= tx->num_constb_allowed) {
689        tx->failure = true;
690        return false;
691    }
692    for (i = 0; i < tx->num_lconstb; ++i) {
693       if (tx->lconstb[i].idx == index) {
694          *src = tx->lconstb[i].reg;
695          return true;
696       }
697    }
698    return false;
699 }
700 
701 static void
tx_set_lconstf(struct shader_translator * tx,INT index,float f[4])702 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
703 {
704     unsigned n;
705 
706     FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
707 
708     for (n = 0; n < tx->num_lconstf; ++n)
709         if (tx->lconstf[n].idx == index)
710             break;
711     if (n == tx->num_lconstf) {
712        if ((n % 8) == 0) {
713           tx->lconstf = REALLOC(tx->lconstf,
714                                 (n + 0) * sizeof(tx->lconstf[0]),
715                                 (n + 8) * sizeof(tx->lconstf[0]));
716           assert(tx->lconstf);
717        }
718        tx->num_lconstf++;
719     }
720     tx->lconstf[n].idx = index;
721     tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
722 
723     memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
724 }
725 static void
tx_set_lconsti(struct shader_translator * tx,INT index,int i[4])726 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
727 {
728     unsigned n;
729 
730     FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
731 
732     for (n = 0; n < tx->num_lconsti; ++n)
733         if (tx->lconsti[n].idx == index)
734             break;
735     if (n == tx->num_lconsti) {
736        if ((n % 8) == 0) {
737           tx->lconsti = REALLOC(tx->lconsti,
738                                 (n + 0) * sizeof(tx->lconsti[0]),
739                                 (n + 8) * sizeof(tx->lconsti[0]));
740           assert(tx->lconsti);
741        }
742        tx->num_lconsti++;
743     }
744 
745     tx->lconsti[n].idx = index;
746     tx->lconsti[n].reg = tx->native_integers ?
747        ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
748        ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
749 }
750 static void
tx_set_lconstb(struct shader_translator * tx,INT index,BOOL b)751 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
752 {
753     unsigned n;
754 
755     FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
756 
757     for (n = 0; n < tx->num_lconstb; ++n)
758         if (tx->lconstb[n].idx == index)
759             break;
760     if (n == tx->num_lconstb) {
761        if ((n % 8) == 0) {
762           tx->lconstb = REALLOC(tx->lconstb,
763                                 (n + 0) * sizeof(tx->lconstb[0]),
764                                 (n + 8) * sizeof(tx->lconstb[0]));
765           assert(tx->lconstb);
766        }
767        tx->num_lconstb++;
768     }
769 
770     tx->lconstb[n].idx = index;
771     tx->lconstb[n].reg = tx->native_integers ?
772        ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
773        ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
774 }
775 
776 static inline struct ureg_dst
tx_scratch(struct shader_translator * tx)777 tx_scratch(struct shader_translator *tx)
778 {
779     if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
780         tx->failure = true;
781         return tx->regs.t[0];
782     }
783     if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
784         tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
785     return tx->regs.t[tx->num_scratch++];
786 }
787 
788 static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator * tx)789 tx_scratch_scalar(struct shader_translator *tx)
790 {
791     return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
792 }
793 
794 static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)795 tx_src_scalar(struct ureg_dst dst)
796 {
797     struct ureg_src src = ureg_src(dst);
798     int c = ffs(dst.WriteMask) - 1;
799     if (dst.WriteMask == (1 << c))
800         src = ureg_scalar(src, c);
801     return src;
802 }
803 
804 static inline void
tx_temp_alloc(struct shader_translator * tx,INT idx)805 tx_temp_alloc(struct shader_translator *tx, INT idx)
806 {
807     assert(idx >= 0);
808     if (idx >= tx->num_temp) {
809        unsigned k = tx->num_temp;
810        unsigned n = idx + 1;
811        tx->regs.r = REALLOC(tx->regs.r,
812                             k * sizeof(tx->regs.r[0]),
813                             n * sizeof(tx->regs.r[0]));
814        for (; k < n; ++k)
815           tx->regs.r[k] = ureg_dst_undef();
816        tx->num_temp = n;
817     }
818     if (ureg_dst_is_undef(tx->regs.r[idx]))
819         tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
820 }
821 
822 static inline void
tx_addr_alloc(struct shader_translator * tx,INT idx)823 tx_addr_alloc(struct shader_translator *tx, INT idx)
824 {
825     assert(idx == 0);
826     if (ureg_dst_is_undef(tx->regs.address))
827         tx->regs.address = ureg_DECL_address(tx->ureg);
828     if (ureg_dst_is_undef(tx->regs.a0))
829         tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
830 }
831 
832 static inline bool
TEX_if_fetch4(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)833 TEX_if_fetch4(struct shader_translator *tx, struct ureg_dst dst,
834               unsigned target, struct ureg_src src0,
835               struct ureg_src src1, INT idx)
836 {
837     struct ureg_dst tmp;
838     struct ureg_src src_tg4[3] = {src0, ureg_imm1f(tx->ureg, 0.f), src1};
839 
840     if (!(tx->info->fetch4 & (1 << idx)))
841         return false;
842 
843     /* TODO: needs more tests, but this feature is not much used at all */
844 
845     tmp = tx_scratch(tx);
846     ureg_tex_insn(tx->ureg, TGSI_OPCODE_TG4, &tmp, 1, target, TGSI_RETURN_TYPE_FLOAT,
847                   NULL, 0, src_tg4, 3);
848     ureg_MOV(tx->ureg, dst, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z, X, Y, W)));
849     return true;
850 }
851 
852 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
853  * the projection should be applied on the texture. It doesn't
854  * apply on texkill.
855  * The doc is very imprecise here (it says the projection is done
856  * before rasterization, thus in vs, which seems wrong since ps instructions
857  * are affected differently)
858  * For now we only apply to the ps TEX instruction and TEXBEM.
859  * Perhaps some other instructions would need it */
860 static inline void
apply_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,struct ureg_src src,INT idx)861 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
862                       struct ureg_src src, INT idx)
863 {
864     struct ureg_dst tmp;
865     unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
866 
867     /* no projection */
868     if (dim == 1) {
869         ureg_MOV(tx->ureg, dst, src);
870     } else {
871         tmp = tx_scratch_scalar(tx);
872         ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
873         ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
874     }
875 }
876 
877 static inline void
TEX_with_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)878 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
879                          unsigned target, struct ureg_src src0,
880                          struct ureg_src src1, INT idx)
881 {
882     unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
883     struct ureg_dst tmp;
884     bool shadow = !!(tx->info->sampler_mask_shadow & (1 << idx));
885 
886     /* dim == 1: no projection
887      * Looks like must be disabled when it makes no
888      * sense according the texture dimensions
889      */
890     if (dim == 1 || (dim <= target && !shadow)) {
891         ureg_TEX(tx->ureg, dst, target, src0, src1);
892     } else if (dim == 4) {
893         ureg_TXP(tx->ureg, dst, target, src0, src1);
894     } else {
895         tmp = tx_scratch(tx);
896         apply_ps1x_projection(tx, tmp, src0, idx);
897         ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
898     }
899 }
900 
901 static inline void
tx_texcoord_alloc(struct shader_translator * tx,INT idx)902 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
903 {
904     assert(IS_PS);
905     assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
906     if (ureg_src_is_undef(tx->regs.vT[idx]))
907        tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
908                                              TGSI_INTERPOLATE_PERSPECTIVE);
909 }
910 
911 static inline unsigned *
tx_bgnloop(struct shader_translator * tx)912 tx_bgnloop(struct shader_translator *tx)
913 {
914     tx->loop_depth++;
915     if (tx->loop_depth_max < tx->loop_depth)
916         tx->loop_depth_max = tx->loop_depth;
917     assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
918     return &tx->loop_labels[tx->loop_depth - 1];
919 }
920 
921 static inline unsigned *
tx_endloop(struct shader_translator * tx)922 tx_endloop(struct shader_translator *tx)
923 {
924     assert(tx->loop_depth);
925     tx->loop_depth--;
926     ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
927                      ureg_get_instruction_number(tx->ureg));
928     return &tx->loop_labels[tx->loop_depth];
929 }
930 
931 static struct ureg_dst
tx_get_loopctr(struct shader_translator * tx,bool loop_or_rep)932 tx_get_loopctr(struct shader_translator *tx, bool loop_or_rep)
933 {
934     const unsigned l = tx->loop_depth - 1;
935 
936     if (!tx->loop_depth)
937     {
938         DBG("loop counter requested outside of loop\n");
939         return ureg_dst_undef();
940     }
941 
942     if (ureg_dst_is_undef(tx->regs.rL[l])) {
943         /* loop or rep ctr creation */
944         tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
945         if (loop_or_rep)
946             tx->regs.aL[l] = ureg_DECL_local_temporary(tx->ureg);
947         tx->loop_or_rep[l] = loop_or_rep;
948     }
949     /* loop - rep - endloop - endrep not allowed */
950     assert(tx->loop_or_rep[l] == loop_or_rep);
951 
952     return tx->regs.rL[l];
953 }
954 
955 static struct ureg_dst
tx_get_loopal(struct shader_translator * tx)956 tx_get_loopal(struct shader_translator *tx)
957 {
958     int loop_level = tx->loop_depth - 1;
959 
960     while (loop_level >= 0) {
961         /* handle loop - rep - endrep - endloop case */
962         if (tx->loop_or_rep[loop_level])
963             /* the aL value is in the Y component (nine implementation) */
964             return tx->regs.aL[loop_level];
965         loop_level--;
966     }
967 
968     DBG("aL counter requested outside of loop\n");
969     return ureg_dst_undef();
970 }
971 
972 static inline unsigned *
tx_cond(struct shader_translator * tx)973 tx_cond(struct shader_translator *tx)
974 {
975    assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
976    tx->cond_depth++;
977    return &tx->cond_labels[tx->cond_depth - 1];
978 }
979 
980 static inline unsigned *
tx_elsecond(struct shader_translator * tx)981 tx_elsecond(struct shader_translator *tx)
982 {
983    assert(tx->cond_depth);
984    return &tx->cond_labels[tx->cond_depth - 1];
985 }
986 
987 static inline void
tx_endcond(struct shader_translator * tx)988 tx_endcond(struct shader_translator *tx)
989 {
990    assert(tx->cond_depth);
991    tx->cond_depth--;
992    ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
993                     ureg_get_instruction_number(tx->ureg));
994 }
995 
996 static inline struct ureg_dst
nine_ureg_dst_register(unsigned file,int index)997 nine_ureg_dst_register(unsigned file, int index)
998 {
999     return ureg_dst(ureg_src_register(file, index));
1000 }
1001 
1002 static inline struct ureg_src
nine_get_position_input(struct shader_translator * tx)1003 nine_get_position_input(struct shader_translator *tx)
1004 {
1005     struct ureg_program *ureg = tx->ureg;
1006 
1007     if (tx->wpos_is_sysval)
1008         return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1009     else
1010         return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
1011                                   0, TGSI_INTERPOLATE_LINEAR);
1012 }
1013 
1014 static struct ureg_src
tx_src_param(struct shader_translator * tx,const struct sm1_src_param * param)1015 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
1016 {
1017     struct ureg_program *ureg = tx->ureg;
1018     struct ureg_src src;
1019     struct ureg_dst tmp;
1020 
1021     assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) ||
1022         (param->file == D3DSPR_INPUT && tx->version.major == 3));
1023 
1024     switch (param->file)
1025     {
1026     case D3DSPR_TEMP:
1027         tx_temp_alloc(tx, param->idx);
1028         src = ureg_src(tx->regs.r[param->idx]);
1029         break;
1030  /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1031     case D3DSPR_ADDR:
1032         if (IS_VS) {
1033             assert(param->idx == 0);
1034             /* the address register (vs only) must be
1035              * assigned before use */
1036             assert(!ureg_dst_is_undef(tx->regs.a0));
1037             /* Round to lowest for vs1.1 (contrary to the doc), else
1038              * round to nearest */
1039             if (tx->version.major < 2 && tx->version.minor < 2)
1040                 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1041             else
1042                 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1043             src = ureg_src(tx->regs.address);
1044         } else {
1045             if (tx->version.major < 2 && tx->version.minor < 4) {
1046                 /* no subroutines, so should be defined */
1047                 src = ureg_src(tx->regs.tS[param->idx]);
1048             } else {
1049                 tx_texcoord_alloc(tx, param->idx);
1050                 src = tx->regs.vT[param->idx];
1051             }
1052         }
1053         break;
1054     case D3DSPR_INPUT:
1055         if (IS_VS) {
1056             src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1057         } else {
1058             if (tx->version.major < 3) {
1059                 src = ureg_DECL_fs_input_centroid(
1060                     ureg, TGSI_SEMANTIC_COLOR, param->idx,
1061                     tx->info->color_flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE,
1062                     tx->info->force_color_in_centroid ?
1063                       TGSI_INTERPOLATE_LOC_CENTROID : 0,
1064                     0, 1);
1065             } else {
1066                 if(param->rel) {
1067                     /* Copy all inputs (non consecutive)
1068                      * to temp array (consecutive).
1069                      * This is not good for performance.
1070                      * A better way would be to have inputs
1071                      * consecutive (would need implement alternative
1072                      * way to match vs outputs and ps inputs).
1073                      * However even with the better way, the temp array
1074                      * copy would need to be used if some inputs
1075                      * are not GENERIC or if they have different
1076                      * interpolation flag. */
1077                     if (ureg_src_is_undef(tx->regs.v_consecutive)) {
1078                         int i;
1079                         tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
1080                         for (i = 0; i < 10; i++) {
1081                             if (!ureg_src_is_undef(tx->regs.v[i]))
1082                                 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
1083                             else
1084                                 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
1085                         }
1086                     }
1087                     src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
1088                 } else {
1089                     assert(param->idx < ARRAY_SIZE(tx->regs.v));
1090                     src = tx->regs.v[param->idx];
1091                 }
1092             }
1093         }
1094         if (param->rel)
1095             src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1096         break;
1097     case D3DSPR_PREDICATE:
1098         if (ureg_dst_is_undef(tx->regs.predicate)) {
1099             /* Forbidden to use the predicate register before being set */
1100             tx->failure = true;
1101             tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1102         }
1103         src = ureg_src(tx->regs.predicate);
1104         break;
1105     case D3DSPR_SAMPLER:
1106         assert(param->mod == NINED3DSPSM_NONE);
1107         /* assert(param->swizzle == NINED3DSP_NOSWIZZLE); Passed by wine tests */
1108         src = ureg_DECL_sampler(ureg, param->idx);
1109         break;
1110     case D3DSPR_CONST:
1111         if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1112             src = nine_float_constant_src(tx, param->idx);
1113             if (param->rel) {
1114                 tx->indirect_const_access = true;
1115                 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1116             }
1117         }
1118         if (!IS_VS && tx->version.major < 2) {
1119             /* ps 1.X clamps constants */
1120             tmp = tx_scratch(tx);
1121             ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1122             ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1123             src = ureg_src(tmp);
1124         }
1125         break;
1126     case D3DSPR_CONST2:
1127     case D3DSPR_CONST3:
1128     case D3DSPR_CONST4:
1129         DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1130         assert(!"CONST2/3/4");
1131         src = ureg_imm1f(ureg, 0.0f);
1132         break;
1133     case D3DSPR_CONSTINT:
1134         /* relative addressing only possible for float constants in vs */
1135         if (!tx_lconsti(tx, &src, param->idx))
1136             src = nine_integer_constant_src(tx, param->idx);
1137         break;
1138     case D3DSPR_CONSTBOOL:
1139         if (!tx_lconstb(tx, &src, param->idx))
1140             src = nine_boolean_constant_src(tx, param->idx);
1141         break;
1142     case D3DSPR_LOOP:
1143         if (ureg_dst_is_undef(tx->regs.address))
1144             tx->regs.address = ureg_DECL_address(ureg);
1145         if (!tx->native_integers)
1146             ureg_ARR(ureg, tx->regs.address,
1147                      ureg_scalar(ureg_src(tx_get_loopal(tx)), TGSI_SWIZZLE_Y));
1148         else
1149             ureg_UARL(ureg, tx->regs.address,
1150                       ureg_scalar(ureg_src(tx_get_loopal(tx)), TGSI_SWIZZLE_Y));
1151         src = ureg_src(tx->regs.address);
1152         break;
1153     case D3DSPR_MISCTYPE:
1154         switch (param->idx) {
1155         case D3DSMO_POSITION:
1156            if (ureg_src_is_undef(tx->regs.vPos))
1157               tx->regs.vPos = nine_get_position_input(tx);
1158            if (tx->shift_wpos) {
1159                /* TODO: do this only once */
1160                struct ureg_dst wpos = tx_scratch(tx);
1161                ureg_ADD(ureg, wpos, tx->regs.vPos,
1162                         ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1163                src = ureg_src(wpos);
1164            } else {
1165                src = tx->regs.vPos;
1166            }
1167            break;
1168         case D3DSMO_FACE:
1169            if (ureg_src_is_undef(tx->regs.vFace)) {
1170                if (tx->face_is_sysval_integer) {
1171                    tmp = ureg_DECL_temporary(ureg);
1172                    tx->regs.vFace =
1173                        ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1174 
1175                    /* convert bool to float */
1176                    ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1177                              ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1178                    tx->regs.vFace = ureg_src(tmp);
1179                } else {
1180                    tx->regs.vFace = ureg_DECL_fs_input(ureg,
1181                                                        TGSI_SEMANTIC_FACE, 0,
1182                                                        TGSI_INTERPOLATE_CONSTANT);
1183                }
1184                tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1185            }
1186            src = tx->regs.vFace;
1187            break;
1188         default:
1189             assert(!"invalid src D3DSMO");
1190             break;
1191         }
1192         break;
1193     case D3DSPR_TEMPFLOAT16:
1194         break;
1195     default:
1196         assert(!"invalid src D3DSPR");
1197     }
1198 
1199     switch (param->mod) {
1200     case NINED3DSPSM_DW:
1201         tmp = tx_scratch(tx);
1202         /* NOTE: app is not allowed to read w with this modifier */
1203         ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1204         ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1205         src = ureg_src(tmp);
1206         break;
1207     case NINED3DSPSM_DZ:
1208         tmp = tx_scratch(tx);
1209         /* NOTE: app is not allowed to read z with this modifier */
1210         ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1211         ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1212         src = ureg_src(tmp);
1213         break;
1214     default:
1215         break;
1216     }
1217 
1218     if (param->swizzle != NINED3DSP_NOSWIZZLE && param->file != D3DSPR_SAMPLER)
1219         src = ureg_swizzle(src,
1220                            (param->swizzle >> 0) & 0x3,
1221                            (param->swizzle >> 2) & 0x3,
1222                            (param->swizzle >> 4) & 0x3,
1223                            (param->swizzle >> 6) & 0x3);
1224 
1225     switch (param->mod) {
1226     case NINED3DSPSM_ABS:
1227         src = ureg_abs(src);
1228         break;
1229     case NINED3DSPSM_ABSNEG:
1230         src = ureg_negate(ureg_abs(src));
1231         break;
1232     case NINED3DSPSM_NEG:
1233         src = ureg_negate(src);
1234         break;
1235     case NINED3DSPSM_BIAS:
1236         tmp = tx_scratch(tx);
1237         ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1238         src = ureg_src(tmp);
1239         break;
1240     case NINED3DSPSM_BIASNEG:
1241         tmp = tx_scratch(tx);
1242         ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1243         src = ureg_src(tmp);
1244         break;
1245     case NINED3DSPSM_NOT:
1246         if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) {
1247             tmp = tx_scratch(tx);
1248             ureg_NOT(ureg, tmp, src);
1249             src = ureg_src(tmp);
1250             break;
1251         } else { /* predicate */
1252             tmp = tx_scratch(tx);
1253             ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1254             src = ureg_src(tmp);
1255         }
1256         FALLTHROUGH;
1257     case NINED3DSPSM_COMP:
1258         tmp = tx_scratch(tx);
1259         ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1260         src = ureg_src(tmp);
1261         break;
1262     case NINED3DSPSM_DZ:
1263     case NINED3DSPSM_DW:
1264         /* Already handled*/
1265         break;
1266     case NINED3DSPSM_SIGN:
1267         tmp = tx_scratch(tx);
1268         ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1269         src = ureg_src(tmp);
1270         break;
1271     case NINED3DSPSM_SIGNNEG:
1272         tmp = tx_scratch(tx);
1273         ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1274         src = ureg_src(tmp);
1275         break;
1276     case NINED3DSPSM_X2:
1277         tmp = tx_scratch(tx);
1278         ureg_ADD(ureg, tmp, src, src);
1279         src = ureg_src(tmp);
1280         break;
1281     case NINED3DSPSM_X2NEG:
1282         tmp = tx_scratch(tx);
1283         ureg_ADD(ureg, tmp, src, src);
1284         src = ureg_negate(ureg_src(tmp));
1285         break;
1286     default:
1287         assert(param->mod == NINED3DSPSM_NONE);
1288         break;
1289     }
1290 
1291     return src;
1292 }
1293 
1294 static struct ureg_dst
_tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1295 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1296 {
1297     struct ureg_dst dst;
1298 
1299     switch (param->file)
1300     {
1301     case D3DSPR_TEMP:
1302         assert(!param->rel);
1303         tx_temp_alloc(tx, param->idx);
1304         dst = tx->regs.r[param->idx];
1305         break;
1306  /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1307     case D3DSPR_ADDR:
1308         assert(!param->rel);
1309         if (tx->version.major < 2 && !IS_VS) {
1310             if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1311                 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1312             dst = tx->regs.tS[param->idx];
1313         } else
1314         if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1315             tx_texcoord_alloc(tx, param->idx);
1316             dst = ureg_dst(tx->regs.vT[param->idx]);
1317         } else {
1318             tx_addr_alloc(tx, param->idx);
1319             dst = tx->regs.a0;
1320         }
1321         break;
1322     case D3DSPR_RASTOUT:
1323         assert(!param->rel);
1324         switch (param->idx) {
1325         case 0:
1326             if (ureg_dst_is_undef(tx->regs.oPos)) {
1327                 if (tx->info->clip_plane_emulation > 0) {
1328                     tx->regs.oPos = ureg_DECL_temporary(tx->ureg);
1329                 } else {
1330                     tx->regs.oPos = tx->regs.oPos_out;
1331                 }
1332             }
1333             dst = tx->regs.oPos;
1334             break;
1335         case 1:
1336             if (ureg_dst_is_undef(tx->regs.oFog))
1337                 tx->regs.oFog =
1338                     ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16));
1339             dst = tx->regs.oFog;
1340             break;
1341         case 2:
1342             if (ureg_dst_is_undef(tx->regs.oPts))
1343                 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1344             dst = tx->regs.oPts;
1345             break;
1346         default:
1347             assert(0);
1348             break;
1349         }
1350         break;
1351  /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1352     case D3DSPR_OUTPUT:
1353         if (tx->version.major < 3) {
1354             assert(!param->rel);
1355             dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1356         } else {
1357             assert(!param->rel); /* TODO */
1358             assert(param->idx < ARRAY_SIZE(tx->regs.o));
1359             dst = tx->regs.o[param->idx];
1360         }
1361         break;
1362     case D3DSPR_ATTROUT: /* VS */
1363     case D3DSPR_COLOROUT: /* PS */
1364         assert(param->idx >= 0 && param->idx < 4);
1365         assert(!param->rel);
1366         tx->info->rt_mask |= 1 << param->idx;
1367         if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1368             /* ps < 3: oCol[0] will have fog blending afterward
1369              * ps: oCol[0] might have alphatest afterward */
1370             if (!IS_VS && param->idx == 0) {
1371                 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1372             } else {
1373                 tx->regs.oCol[param->idx] =
1374                     ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1375             }
1376         }
1377         dst = tx->regs.oCol[param->idx];
1378         if (IS_VS && tx->version.major < 3)
1379             dst = ureg_saturate(dst);
1380         break;
1381     case D3DSPR_DEPTHOUT:
1382         assert(!param->rel);
1383         if (ureg_dst_is_undef(tx->regs.oDepth))
1384            tx->regs.oDepth =
1385               ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1386                                       TGSI_WRITEMASK_Z, 0, 1);
1387         dst = tx->regs.oDepth; /* XXX: must write .z component */
1388         break;
1389     case D3DSPR_PREDICATE:
1390         if (ureg_dst_is_undef(tx->regs.predicate))
1391             tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1392         dst = tx->regs.predicate;
1393         break;
1394     case D3DSPR_TEMPFLOAT16:
1395         DBG("unhandled D3DSPR: %u\n", param->file);
1396         break;
1397     default:
1398         assert(!"invalid dst D3DSPR");
1399         break;
1400     }
1401     if (param->rel)
1402         dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1403 
1404     if (param->mask != NINED3DSP_WRITEMASK_ALL)
1405         dst = ureg_writemask(dst, param->mask);
1406     if (param->mod & NINED3DSPDM_SATURATE)
1407         dst = ureg_saturate(dst);
1408 
1409     if (tx->predicated_activated) {
1410         tx->regs.predicate_dst = dst;
1411         dst = tx->regs.predicate_tmp;
1412     }
1413 
1414     return dst;
1415 }
1416 
1417 static struct ureg_dst
tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1418 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1419 {
1420     if (param->shift) {
1421         tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1422         return tx->regs.tdst;
1423     }
1424     return _tx_dst_param(tx, param);
1425 }
1426 
1427 static void
tx_apply_dst0_modifiers(struct shader_translator * tx)1428 tx_apply_dst0_modifiers(struct shader_translator *tx)
1429 {
1430     struct ureg_dst rdst;
1431     float f;
1432 
1433     if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1434         return;
1435     rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1436 
1437     assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1438 
1439     if (tx->insn.dst[0].shift < 0)
1440         f = 1.0f / (1 << -tx->insn.dst[0].shift);
1441     else
1442         f = 1 << tx->insn.dst[0].shift;
1443 
1444     ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1445 }
1446 
1447 static struct ureg_src
tx_dst_param_as_src(struct shader_translator * tx,const struct sm1_dst_param * param)1448 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1449 {
1450     struct ureg_src src;
1451 
1452     assert(!param->shift);
1453     assert(!(param->mod & NINED3DSPDM_SATURATE));
1454 
1455     switch (param->file) {
1456     case D3DSPR_INPUT:
1457         if (IS_VS) {
1458             src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1459         } else {
1460             assert(!param->rel);
1461             assert(param->idx < ARRAY_SIZE(tx->regs.v));
1462             src = tx->regs.v[param->idx];
1463         }
1464         break;
1465     default:
1466         src = ureg_src(tx_dst_param(tx, param));
1467         break;
1468     }
1469     if (param->rel)
1470         src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1471 
1472     if (!param->mask)
1473         WARN("mask is 0, using identity swizzle\n");
1474 
1475     if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1476         char s[4];
1477         int n;
1478         int c;
1479         for (n = 0, c = 0; c < 4; ++c)
1480             if (param->mask & (1 << c))
1481                 s[n++] = c;
1482         assert(n);
1483         for (c = n; c < 4; ++c)
1484             s[c] = s[n - 1];
1485         src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1486     }
1487     return src;
1488 }
1489 
1490 static HRESULT
NineTranslateInstruction_Mkxn(struct shader_translator * tx,const unsigned k,const unsigned n)1491 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1492 {
1493     struct ureg_program *ureg = tx->ureg;
1494     struct ureg_dst dst;
1495     struct ureg_src src[2];
1496     struct sm1_src_param *src_mat = &tx->insn.src[1];
1497     unsigned i;
1498 
1499     dst = tx_dst_param(tx, &tx->insn.dst[0]);
1500     src[0] = tx_src_param(tx, &tx->insn.src[0]);
1501 
1502     for (i = 0; i < n; i++)
1503     {
1504         const unsigned m = (1 << i);
1505 
1506         src[1] = tx_src_param(tx, src_mat);
1507         src_mat->idx++;
1508 
1509         if (!(dst.WriteMask & m))
1510             continue;
1511 
1512         /* XXX: src == dst case ? */
1513 
1514         switch (k) {
1515         case 3:
1516             ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1517             break;
1518         case 4:
1519             ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1520             break;
1521         default:
1522             DBG("invalid operation: M%ux%u\n", m, n);
1523             break;
1524         }
1525     }
1526 
1527     return D3D_OK;
1528 }
1529 
1530 #define VNOTSUPPORTED   0, 0
1531 #define V(maj, min)     (((maj) << 8) | (min))
1532 
1533 static inline const char *
d3dsio_to_string(unsigned opcode)1534 d3dsio_to_string( unsigned opcode )
1535 {
1536     static const char *names[] = {
1537         "NOP",
1538         "MOV",
1539         "ADD",
1540         "SUB",
1541         "MAD",
1542         "MUL",
1543         "RCP",
1544         "RSQ",
1545         "DP3",
1546         "DP4",
1547         "MIN",
1548         "MAX",
1549         "SLT",
1550         "SGE",
1551         "EXP",
1552         "LOG",
1553         "LIT",
1554         "DST",
1555         "LRP",
1556         "FRC",
1557         "M4x4",
1558         "M4x3",
1559         "M3x4",
1560         "M3x3",
1561         "M3x2",
1562         "CALL",
1563         "CALLNZ",
1564         "LOOP",
1565         "RET",
1566         "ENDLOOP",
1567         "LABEL",
1568         "DCL",
1569         "POW",
1570         "CRS",
1571         "SGN",
1572         "ABS",
1573         "NRM",
1574         "SINCOS",
1575         "REP",
1576         "ENDREP",
1577         "IF",
1578         "IFC",
1579         "ELSE",
1580         "ENDIF",
1581         "BREAK",
1582         "BREAKC",
1583         "MOVA",
1584         "DEFB",
1585         "DEFI",
1586         NULL,
1587         NULL,
1588         NULL,
1589         NULL,
1590         NULL,
1591         NULL,
1592         NULL,
1593         NULL,
1594         NULL,
1595         NULL,
1596         NULL,
1597         NULL,
1598         NULL,
1599         NULL,
1600         NULL,
1601         "TEXCOORD",
1602         "TEXKILL",
1603         "TEX",
1604         "TEXBEM",
1605         "TEXBEML",
1606         "TEXREG2AR",
1607         "TEXREG2GB",
1608         "TEXM3x2PAD",
1609         "TEXM3x2TEX",
1610         "TEXM3x3PAD",
1611         "TEXM3x3TEX",
1612         NULL,
1613         "TEXM3x3SPEC",
1614         "TEXM3x3VSPEC",
1615         "EXPP",
1616         "LOGP",
1617         "CND",
1618         "DEF",
1619         "TEXREG2RGB",
1620         "TEXDP3TEX",
1621         "TEXM3x2DEPTH",
1622         "TEXDP3",
1623         "TEXM3x3",
1624         "TEXDEPTH",
1625         "CMP",
1626         "BEM",
1627         "DP2ADD",
1628         "DSX",
1629         "DSY",
1630         "TEXLDD",
1631         "SETP",
1632         "TEXLDL",
1633         "BREAKP"
1634     };
1635 
1636     if (opcode < ARRAY_SIZE(names)) return names[opcode];
1637 
1638     switch (opcode) {
1639     case D3DSIO_PHASE: return "PHASE";
1640     case D3DSIO_COMMENT: return "COMMENT";
1641     case D3DSIO_END: return "END";
1642     default:
1643         return NULL;
1644     }
1645 }
1646 
1647 #define NULL_INSTRUCTION            { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1648 #define IS_VALID_INSTRUCTION(inst)  ((inst).vert_version.min | \
1649                                      (inst).vert_version.max | \
1650                                      (inst).frag_version.min | \
1651                                      (inst).frag_version.max)
1652 
1653 #define SPECIAL(name) \
1654     NineTranslateInstruction_##name
1655 
1656 #define DECL_SPECIAL(name) \
1657     static HRESULT \
1658     NineTranslateInstruction_##name( struct shader_translator *tx )
1659 
1660 static HRESULT
1661 NineTranslateInstruction_Generic(struct shader_translator *);
1662 
DECL_SPECIAL(NOP)1663 DECL_SPECIAL(NOP)
1664 {
1665     /* Nothing to do. NOP was used to avoid hangs
1666      * with very old d3d drivers. */
1667     return D3D_OK;
1668 }
1669 
DECL_SPECIAL(SUB)1670 DECL_SPECIAL(SUB)
1671 {
1672     struct ureg_program *ureg = tx->ureg;
1673     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1674     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1675     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1676 
1677     ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1678     return D3D_OK;
1679 }
1680 
DECL_SPECIAL(ABS)1681 DECL_SPECIAL(ABS)
1682 {
1683     struct ureg_program *ureg = tx->ureg;
1684     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1685     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1686 
1687     ureg_MOV(ureg, dst, ureg_abs(src));
1688     return D3D_OK;
1689 }
1690 
DECL_SPECIAL(XPD)1691 DECL_SPECIAL(XPD)
1692 {
1693     struct ureg_program *ureg = tx->ureg;
1694     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1695     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1696     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1697 
1698     ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1699              ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
1700                           TGSI_SWIZZLE_X, 0),
1701              ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1702                           TGSI_SWIZZLE_Y, 0));
1703     ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1704              ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1705                           TGSI_SWIZZLE_Y, 0),
1706              ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
1707                                       TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
1708              ureg_src(dst));
1709     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1710              ureg_imm1f(ureg, 1));
1711     return D3D_OK;
1712 }
1713 
DECL_SPECIAL(M4x4)1714 DECL_SPECIAL(M4x4)
1715 {
1716     return NineTranslateInstruction_Mkxn(tx, 4, 4);
1717 }
1718 
DECL_SPECIAL(M4x3)1719 DECL_SPECIAL(M4x3)
1720 {
1721     return NineTranslateInstruction_Mkxn(tx, 4, 3);
1722 }
1723 
DECL_SPECIAL(M3x4)1724 DECL_SPECIAL(M3x4)
1725 {
1726     return NineTranslateInstruction_Mkxn(tx, 3, 4);
1727 }
1728 
DECL_SPECIAL(M3x3)1729 DECL_SPECIAL(M3x3)
1730 {
1731     return NineTranslateInstruction_Mkxn(tx, 3, 3);
1732 }
1733 
DECL_SPECIAL(M3x2)1734 DECL_SPECIAL(M3x2)
1735 {
1736     return NineTranslateInstruction_Mkxn(tx, 3, 2);
1737 }
1738 
DECL_SPECIAL(CMP)1739 DECL_SPECIAL(CMP)
1740 {
1741     ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1742              tx_src_param(tx, &tx->insn.src[0]),
1743              tx_src_param(tx, &tx->insn.src[2]),
1744              tx_src_param(tx, &tx->insn.src[1]));
1745     return D3D_OK;
1746 }
1747 
DECL_SPECIAL(CND)1748 DECL_SPECIAL(CND)
1749 {
1750     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1751     struct ureg_dst cgt;
1752     struct ureg_src cnd;
1753 
1754     /* the coissue flag was a tip for compilers to advise to
1755      * execute two operations at the same time, in cases
1756      * the two executions had the same dst with different channels.
1757      * It has no effect on current hw. However it seems CND
1758      * is affected. The handling of this very specific case
1759      * handled below mimic wine behaviour */
1760     if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1761         ureg_MOV(tx->ureg,
1762                  dst, tx_src_param(tx, &tx->insn.src[1]));
1763         return D3D_OK;
1764     }
1765 
1766     cnd = tx_src_param(tx, &tx->insn.src[0]);
1767     cgt = tx_scratch(tx);
1768 
1769     if (tx->version.major == 1 && tx->version.minor < 4)
1770         cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1771 
1772     ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1773 
1774     ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1775              tx_src_param(tx, &tx->insn.src[1]),
1776              tx_src_param(tx, &tx->insn.src[2]));
1777     return D3D_OK;
1778 }
1779 
DECL_SPECIAL(CALL)1780 DECL_SPECIAL(CALL)
1781 {
1782     assert(tx->insn.src[0].idx < tx->num_inst_labels);
1783     ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1784     return D3D_OK;
1785 }
1786 
DECL_SPECIAL(CALLNZ)1787 DECL_SPECIAL(CALLNZ)
1788 {
1789     struct ureg_program *ureg = tx->ureg;
1790     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1791 
1792     if (!tx->native_integers)
1793         ureg_IF(ureg, src, tx_cond(tx));
1794     else
1795         ureg_UIF(ureg, src, tx_cond(tx));
1796     ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1797     tx_endcond(tx);
1798     ureg_ENDIF(ureg);
1799     return D3D_OK;
1800 }
1801 
DECL_SPECIAL(LOOP)1802 DECL_SPECIAL(LOOP)
1803 {
1804     struct ureg_program *ureg = tx->ureg;
1805     unsigned *label;
1806     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1807     struct ureg_dst ctr;
1808     struct ureg_dst aL;
1809     struct ureg_dst tmp;
1810     struct ureg_src ctrx;
1811 
1812     label = tx_bgnloop(tx);
1813     ctr = tx_get_loopctr(tx, true);
1814     aL = tx_get_loopal(tx);
1815     ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1816 
1817     /* src: num_iterations*/
1818     ureg_MOV(ureg, ureg_writemask(ctr, NINED3DSP_WRITEMASK_0),
1819              ureg_scalar(src, TGSI_SWIZZLE_X));
1820     /* al: unused - start_value of al - step for al - unused */
1821     ureg_MOV(ureg, aL, src);
1822     ureg_BGNLOOP(tx->ureg, label);
1823     tmp = tx_scratch_scalar(tx);
1824     /* Initially ctr.x contains the number of iterations.
1825      * ctr.y will contain the updated value of al.
1826      * We decrease ctr.x at the end of every iteration,
1827      * and stop when it reaches 0. */
1828 
1829     if (!tx->native_integers) {
1830         /* case src and ctr contain floats */
1831         /* to avoid precision issue, we stop when ctr <= 0.5 */
1832         ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1833         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1834     } else {
1835         /* case src and ctr contain integers */
1836         ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1837         ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1838     }
1839     ureg_BRK(ureg);
1840     tx_endcond(tx);
1841     ureg_ENDIF(ureg);
1842     return D3D_OK;
1843 }
1844 
DECL_SPECIAL(RET)1845 DECL_SPECIAL(RET)
1846 {
1847     /* RET as a last instruction could be safely ignored.
1848      * Remove it to prevent crashes/warnings in case underlying
1849      * driver doesn't implement arbitrary returns.
1850      */
1851     if (*(tx->parse_next) != NINED3DSP_END) {
1852         ureg_RET(tx->ureg);
1853     }
1854     return D3D_OK;
1855 }
1856 
DECL_SPECIAL(ENDLOOP)1857 DECL_SPECIAL(ENDLOOP)
1858 {
1859     struct ureg_program *ureg = tx->ureg;
1860     struct ureg_dst ctr = tx_get_loopctr(tx, true);
1861     struct ureg_dst al = tx_get_loopal(tx);
1862     struct ureg_dst dst_ctrx, dst_al;
1863     struct ureg_src src_ctr, al_counter;
1864 
1865     dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1866     dst_al = ureg_writemask(al, NINED3DSP_WRITEMASK_1);
1867     src_ctr = ureg_src(ctr);
1868     al_counter = ureg_scalar(ureg_src(al), TGSI_SWIZZLE_Z);
1869 
1870     /* ctr.x -= 1
1871      * al.y (aL) += step */
1872     if (!tx->native_integers) {
1873         ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1874         ureg_ADD(ureg, dst_al, ureg_src(al), al_counter);
1875     } else {
1876         ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1877         ureg_UADD(ureg, dst_al, ureg_src(al), al_counter);
1878     }
1879     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1880     return D3D_OK;
1881 }
1882 
DECL_SPECIAL(LABEL)1883 DECL_SPECIAL(LABEL)
1884 {
1885     unsigned k = tx->num_inst_labels;
1886     unsigned n = tx->insn.src[0].idx;
1887     assert(n < 2048);
1888     if (n >= k)
1889        tx->inst_labels = REALLOC(tx->inst_labels,
1890                                  k * sizeof(tx->inst_labels[0]),
1891                                  n * sizeof(tx->inst_labels[0]));
1892 
1893     tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1894     return D3D_OK;
1895 }
1896 
DECL_SPECIAL(SINCOS)1897 DECL_SPECIAL(SINCOS)
1898 {
1899     struct ureg_program *ureg = tx->ureg;
1900     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1901     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1902     struct ureg_dst tmp = tx_scratch_scalar(tx);
1903 
1904     assert(!(dst.WriteMask & 0xc));
1905 
1906     /* Copying to a temporary register avoids src/dst aliasing.
1907      * src is supposed to have replicated swizzle. */
1908     ureg_MOV(ureg, tmp, src);
1909 
1910     /* z undefined, w untouched */
1911     ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1912              tx_src_scalar(tmp));
1913     ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1914              tx_src_scalar(tmp));
1915     return D3D_OK;
1916 }
1917 
DECL_SPECIAL(SGN)1918 DECL_SPECIAL(SGN)
1919 {
1920     ureg_SSG(tx->ureg,
1921              tx_dst_param(tx, &tx->insn.dst[0]),
1922              tx_src_param(tx, &tx->insn.src[0]));
1923     return D3D_OK;
1924 }
1925 
DECL_SPECIAL(REP)1926 DECL_SPECIAL(REP)
1927 {
1928     struct ureg_program *ureg = tx->ureg;
1929     unsigned *label;
1930     struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1931     struct ureg_dst ctr;
1932     struct ureg_dst tmp;
1933     struct ureg_src ctrx;
1934 
1935     label = tx_bgnloop(tx);
1936     ctr = ureg_writemask(tx_get_loopctr(tx, false), NINED3DSP_WRITEMASK_0);
1937     ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1938 
1939     /* NOTE: rep must be constant, so we don't have to save the count */
1940     assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1941 
1942     /* rep: num_iterations - 0 - 0 - 0 */
1943     ureg_MOV(ureg, ctr, rep);
1944     ureg_BGNLOOP(ureg, label);
1945     tmp = tx_scratch_scalar(tx);
1946     /* Initially ctr.x contains the number of iterations.
1947      * We decrease ctr.x at the end of every iteration,
1948      * and stop when it reaches 0. */
1949 
1950     if (!tx->native_integers) {
1951         /* case src and ctr contain floats */
1952         /* to avoid precision issue, we stop when ctr <= 0.5 */
1953         ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1954         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1955     } else {
1956         /* case src and ctr contain integers */
1957         ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1958         ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1959     }
1960     ureg_BRK(ureg);
1961     tx_endcond(tx);
1962     ureg_ENDIF(ureg);
1963 
1964     return D3D_OK;
1965 }
1966 
DECL_SPECIAL(ENDREP)1967 DECL_SPECIAL(ENDREP)
1968 {
1969     struct ureg_program *ureg = tx->ureg;
1970     struct ureg_dst ctr = tx_get_loopctr(tx, false);
1971     struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1972     struct ureg_src src_ctr = ureg_src(ctr);
1973 
1974     /* ctr.x -= 1 */
1975     if (!tx->native_integers)
1976         ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1977     else
1978         ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1979 
1980     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1981     return D3D_OK;
1982 }
1983 
DECL_SPECIAL(ENDIF)1984 DECL_SPECIAL(ENDIF)
1985 {
1986     tx_endcond(tx);
1987     ureg_ENDIF(tx->ureg);
1988     return D3D_OK;
1989 }
1990 
DECL_SPECIAL(IF)1991 DECL_SPECIAL(IF)
1992 {
1993     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1994 
1995     if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1996         ureg_UIF(tx->ureg, src, tx_cond(tx));
1997     else
1998         ureg_IF(tx->ureg, src, tx_cond(tx));
1999 
2000     return D3D_OK;
2001 }
2002 
2003 static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)2004 sm1_insn_flags_to_tgsi_setop(BYTE flags)
2005 {
2006     switch (flags) {
2007     case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
2008     case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
2009     case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
2010     case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
2011     case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
2012     case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
2013     default:
2014         assert(!"invalid comparison flags");
2015         return TGSI_OPCODE_SGT;
2016     }
2017 }
2018 
DECL_SPECIAL(IFC)2019 DECL_SPECIAL(IFC)
2020 {
2021     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2022     struct ureg_src src[2];
2023     struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2024     src[0] = tx_src_param(tx, &tx->insn.src[0]);
2025     src[1] = tx_src_param(tx, &tx->insn.src[1]);
2026     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2027     ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2028     return D3D_OK;
2029 }
2030 
DECL_SPECIAL(ELSE)2031 DECL_SPECIAL(ELSE)
2032 {
2033     ureg_ELSE(tx->ureg, tx_elsecond(tx));
2034     return D3D_OK;
2035 }
2036 
DECL_SPECIAL(BREAKC)2037 DECL_SPECIAL(BREAKC)
2038 {
2039     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2040     struct ureg_src src[2];
2041     struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2042     src[0] = tx_src_param(tx, &tx->insn.src[0]);
2043     src[1] = tx_src_param(tx, &tx->insn.src[1]);
2044     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2045     ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2046     ureg_BRK(tx->ureg);
2047     tx_endcond(tx);
2048     ureg_ENDIF(tx->ureg);
2049     return D3D_OK;
2050 }
2051 
2052 static const char *sm1_declusage_names[] =
2053 {
2054     [D3DDECLUSAGE_POSITION] = "POSITION",
2055     [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
2056     [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
2057     [D3DDECLUSAGE_NORMAL] = "NORMAL",
2058     [D3DDECLUSAGE_PSIZE] = "PSIZE",
2059     [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
2060     [D3DDECLUSAGE_TANGENT] = "TANGENT",
2061     [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
2062     [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
2063     [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
2064     [D3DDECLUSAGE_COLOR] = "COLOR",
2065     [D3DDECLUSAGE_FOG] = "FOG",
2066     [D3DDECLUSAGE_DEPTH] = "DEPTH",
2067     [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
2068 };
2069 
2070 static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic * dcl)2071 sm1_to_nine_declusage(struct sm1_semantic *dcl)
2072 {
2073     return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
2074 }
2075 
2076 static void
sm1_declusage_to_tgsi(struct tgsi_declaration_semantic * sem,bool tc,struct sm1_semantic * dcl)2077 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
2078                       bool tc,
2079                       struct sm1_semantic *dcl)
2080 {
2081     BYTE index = dcl->usage_idx;
2082 
2083     /* For everything that is not matching to a TGSI_SEMANTIC_****,
2084      * we match to a TGSI_SEMANTIC_GENERIC with index.
2085      *
2086      * The index can be anything UINT16 and usage_idx is BYTE,
2087      * so we can fit everything. It doesn't matter if indices
2088      * are close together or low.
2089      *
2090      *
2091      * POSITION >= 1: 10 * index + 7
2092      * COLOR >= 2: 10 * (index-1) + 8
2093      * FOG: 16
2094      * TEXCOORD[0..15]: index
2095      * BLENDWEIGHT: 10 * index + 19
2096      * BLENDINDICES: 10 * index + 20
2097      * NORMAL: 10 * index + 21
2098      * TANGENT: 10 * index + 22
2099      * BINORMAL: 10 * index + 23
2100      * TESSFACTOR: 10 * index + 24
2101      */
2102 
2103     switch (dcl->usage) {
2104     case D3DDECLUSAGE_POSITION:
2105     case D3DDECLUSAGE_POSITIONT:
2106     case D3DDECLUSAGE_DEPTH:
2107         if (index == 0) {
2108             sem->Name = TGSI_SEMANTIC_POSITION;
2109             sem->Index = 0;
2110         } else {
2111             sem->Name = TGSI_SEMANTIC_GENERIC;
2112             sem->Index = 10 * index + 7;
2113         }
2114         break;
2115     case D3DDECLUSAGE_COLOR:
2116         if (index < 2) {
2117             sem->Name = TGSI_SEMANTIC_COLOR;
2118             sem->Index = index;
2119         } else {
2120             sem->Name = TGSI_SEMANTIC_GENERIC;
2121             sem->Index = 10 * (index-1) + 8;
2122         }
2123         break;
2124     case D3DDECLUSAGE_FOG:
2125         assert(index == 0);
2126         sem->Name = TGSI_SEMANTIC_GENERIC;
2127         sem->Index = 16;
2128         break;
2129     case D3DDECLUSAGE_PSIZE:
2130         assert(index == 0);
2131         sem->Name = TGSI_SEMANTIC_PSIZE;
2132         sem->Index = 0;
2133         break;
2134     case D3DDECLUSAGE_TEXCOORD:
2135         assert(index < 16);
2136         if (index < 8 && tc)
2137             sem->Name = TGSI_SEMANTIC_TEXCOORD;
2138         else
2139             sem->Name = TGSI_SEMANTIC_GENERIC;
2140         sem->Index = index;
2141         break;
2142     case D3DDECLUSAGE_BLENDWEIGHT:
2143         sem->Name = TGSI_SEMANTIC_GENERIC;
2144         sem->Index = 10 * index + 19;
2145         break;
2146     case D3DDECLUSAGE_BLENDINDICES:
2147         sem->Name = TGSI_SEMANTIC_GENERIC;
2148         sem->Index = 10 * index + 20;
2149         break;
2150     case D3DDECLUSAGE_NORMAL:
2151         sem->Name = TGSI_SEMANTIC_GENERIC;
2152         sem->Index = 10 * index + 21;
2153         break;
2154     case D3DDECLUSAGE_TANGENT:
2155         sem->Name = TGSI_SEMANTIC_GENERIC;
2156         sem->Index = 10 * index + 22;
2157         break;
2158     case D3DDECLUSAGE_BINORMAL:
2159         sem->Name = TGSI_SEMANTIC_GENERIC;
2160         sem->Index = 10 * index + 23;
2161         break;
2162     case D3DDECLUSAGE_TESSFACTOR:
2163         sem->Name = TGSI_SEMANTIC_GENERIC;
2164         sem->Index = 10 * index + 24;
2165         break;
2166     case D3DDECLUSAGE_SAMPLE:
2167         sem->Name = TGSI_SEMANTIC_COUNT;
2168         sem->Index = 0;
2169         break;
2170     default:
2171         unreachable("Invalid DECLUSAGE.");
2172         break;
2173     }
2174 }
2175 
2176 #define NINED3DSTT_1D     (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2177 #define NINED3DSTT_2D     (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2178 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2179 #define NINED3DSTT_CUBE   (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2180 static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)2181 d3dstt_to_tgsi_tex(BYTE sampler_type)
2182 {
2183     switch (sampler_type) {
2184     case NINED3DSTT_1D:     return TGSI_TEXTURE_1D;
2185     case NINED3DSTT_2D:     return TGSI_TEXTURE_2D;
2186     case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2187     case NINED3DSTT_CUBE:   return TGSI_TEXTURE_CUBE;
2188     default:
2189         assert(0);
2190         return TGSI_TEXTURE_UNKNOWN;
2191     }
2192 }
2193 static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)2194 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2195 {
2196     switch (sampler_type) {
2197     case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2198     case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2199     case NINED3DSTT_VOLUME:
2200     case NINED3DSTT_CUBE:
2201     default:
2202         assert(0);
2203         return TGSI_TEXTURE_UNKNOWN;
2204     }
2205 }
2206 static inline unsigned
ps1x_sampler_type(const struct nine_shader_info * info,unsigned stage)2207 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2208 {
2209     bool shadow = !!(info->sampler_mask_shadow & (1 << stage));
2210     switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2211     case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D;
2212     case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D;
2213     case 3: return TGSI_TEXTURE_3D;
2214     default:
2215         return TGSI_TEXTURE_CUBE;
2216     }
2217 }
2218 
2219 static const char *
sm1_sampler_type_name(BYTE sampler_type)2220 sm1_sampler_type_name(BYTE sampler_type)
2221 {
2222     switch (sampler_type) {
2223     case NINED3DSTT_1D:     return "1D";
2224     case NINED3DSTT_2D:     return "2D";
2225     case NINED3DSTT_VOLUME: return "VOLUME";
2226     case NINED3DSTT_CUBE:   return "CUBE";
2227     default:
2228         return "(D3DSTT_?)";
2229     }
2230 }
2231 
2232 static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic * sem)2233 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2234 {
2235     switch (sem->Name) {
2236     case TGSI_SEMANTIC_POSITION:
2237     case TGSI_SEMANTIC_NORMAL:
2238         return TGSI_INTERPOLATE_LINEAR;
2239     case TGSI_SEMANTIC_BCOLOR:
2240     case TGSI_SEMANTIC_COLOR:
2241         return TGSI_INTERPOLATE_COLOR;
2242     case TGSI_SEMANTIC_FOG:
2243     case TGSI_SEMANTIC_GENERIC:
2244     case TGSI_SEMANTIC_TEXCOORD:
2245     case TGSI_SEMANTIC_CLIPDIST:
2246     case TGSI_SEMANTIC_CLIPVERTEX:
2247         return TGSI_INTERPOLATE_PERSPECTIVE;
2248     case TGSI_SEMANTIC_EDGEFLAG:
2249     case TGSI_SEMANTIC_FACE:
2250     case TGSI_SEMANTIC_INSTANCEID:
2251     case TGSI_SEMANTIC_PCOORD:
2252     case TGSI_SEMANTIC_PRIMID:
2253     case TGSI_SEMANTIC_PSIZE:
2254     case TGSI_SEMANTIC_VERTEXID:
2255         return TGSI_INTERPOLATE_CONSTANT;
2256     default:
2257         assert(0);
2258         return TGSI_INTERPOLATE_CONSTANT;
2259     }
2260 }
2261 
DECL_SPECIAL(DCL)2262 DECL_SPECIAL(DCL)
2263 {
2264     struct ureg_program *ureg = tx->ureg;
2265     bool is_input;
2266     bool is_sampler;
2267     struct tgsi_declaration_semantic tgsi;
2268     struct sm1_semantic sem;
2269     sm1_read_semantic(tx, &sem);
2270 
2271     is_input = sem.reg.file == D3DSPR_INPUT;
2272     is_sampler =
2273         sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2274 
2275     DUMP("DCL ");
2276     sm1_dump_dst_param(&sem.reg);
2277     if (is_sampler)
2278         DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2279     else
2280     if (tx->version.major >= 3)
2281         DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2282     else
2283     if (sem.usage | sem.usage_idx)
2284         DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2285     else
2286         DUMP("\n");
2287 
2288     if (is_sampler) {
2289         const unsigned m = 1 << sem.reg.idx;
2290         ureg_DECL_sampler(ureg, sem.reg.idx);
2291         tx->info->sampler_mask |= m;
2292         tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2293             d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2294             d3dstt_to_tgsi_tex(sem.sampler_type);
2295         return D3D_OK;
2296     }
2297 
2298     sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2299     if (IS_VS) {
2300         if (is_input) {
2301             /* linkage outside of shader with vertex declaration */
2302             ureg_DECL_vs_input(ureg, sem.reg.idx);
2303             assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2304             tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2305             tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2306             /* NOTE: preserving order in case of indirect access */
2307         } else
2308         if (tx->version.major >= 3) {
2309             /* SM2 output semantic determined by file */
2310             assert(sem.reg.mask != 0);
2311             if (sem.usage == D3DDECLUSAGE_POSITIONT)
2312                 tx->info->position_t = true;
2313             assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2314             assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2315             tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2316                 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2317             nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2318             if ((tx->info->process_vertices || tx->info->clip_plane_emulation > 0) &&
2319                 sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2320                 tx->regs.oPos_out = tx->regs.o[sem.reg.idx]; /* TODO: probably not good declare it twice */
2321                 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2322                 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2323             }
2324 
2325             if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2326                 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2327                 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2328             }
2329         }
2330     } else {
2331         if (is_input && tx->version.major >= 3) {
2332             unsigned interp_flag;
2333             unsigned interp_location = 0;
2334             /* SM3 only, SM2 input semantic determined by file */
2335             assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2336             assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2337             /* PositionT and tessfactor forbidden */
2338             if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2339                 return D3DERR_INVALIDCALL;
2340 
2341             if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2342                 /* Position0 is forbidden (likely because vPos already does that) */
2343                 if (sem.usage == D3DDECLUSAGE_POSITION)
2344                     return D3DERR_INVALIDCALL;
2345                 /* Following code is for depth */
2346                 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2347                 return D3D_OK;
2348             }
2349 
2350             if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2351                 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2352                 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2353             interp_flag = nine_tgsi_to_interp_mode(&tgsi);
2354             /* We replace TGSI_INTERPOLATE_COLOR because some drivers don't support it,
2355              * and those who support it do the same replacement we do */
2356             if (interp_flag == TGSI_INTERPOLATE_COLOR)
2357                 interp_flag = tx->info->color_flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2358 
2359             tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_centroid(
2360                 ureg, tgsi.Name, tgsi.Index,
2361                 interp_flag,
2362                 interp_location, 0, 1);
2363         } else
2364         if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2365             /* FragColor or FragDepth */
2366             assert(sem.reg.mask != 0);
2367             ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2368                                     0, 1);
2369         }
2370     }
2371     return D3D_OK;
2372 }
2373 
DECL_SPECIAL(DEF)2374 DECL_SPECIAL(DEF)
2375 {
2376     tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2377     return D3D_OK;
2378 }
2379 
DECL_SPECIAL(DEFB)2380 DECL_SPECIAL(DEFB)
2381 {
2382     tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2383     return D3D_OK;
2384 }
2385 
DECL_SPECIAL(DEFI)2386 DECL_SPECIAL(DEFI)
2387 {
2388     tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2389     return D3D_OK;
2390 }
2391 
DECL_SPECIAL(POW)2392 DECL_SPECIAL(POW)
2393 {
2394     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2395     struct ureg_src src[2] = {
2396         tx_src_param(tx, &tx->insn.src[0]),
2397         tx_src_param(tx, &tx->insn.src[1])
2398     };
2399     /* Anything^0 is 1, including 0^0.
2400      * Assume mul_zero_wins drivers already have
2401      * this behaviour. Emulate for the others. */
2402     if (tx->mul_zero_wins) {
2403         ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2404     } else {
2405         struct ureg_dst tmp = tx_scratch_scalar(tx);
2406         ureg_POW(tx->ureg, tmp, ureg_abs(src[0]), src[1]);
2407         ureg_CMP(tx->ureg, dst,
2408              ureg_negate(ureg_abs(ureg_scalar(src[1], TGSI_SWIZZLE_X))),
2409              tx_src_scalar(tmp), ureg_imm1f(tx->ureg, 1.0f));
2410     }
2411     return D3D_OK;
2412 }
2413 
2414 /* Tests results on Win 10:
2415  * NV (NVIDIA GeForce GT 635M)
2416  * AMD (AMD Radeon HD 7730M)
2417  * INTEL (Intel(R) HD Graphics 4000)
2418  * PS2 and PS3:
2419  * RCP and RSQ can generate inf on NV and AMD.
2420  * RCP and RSQ are clamped on INTEL (+- FLT_MAX),
2421  * NV: log not clamped
2422  * AMD: log(0) is -FLT_MAX (but log(inf) is inf)
2423  * INTEL: log(0) is -FLT_MAX and log(inf) is 127
2424  * All devices have 0*anything = 0
2425  *
2426  * INTEL VS2 and VS3: same behaviour.
2427  * Some differences VS2 and VS3 for constants defined with inf/NaN.
2428  * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
2429  * VS2 seems to clamp to zero (may be test failure).
2430  * AMD VS2: unknown, VS3: very likely behaviour of PS3
2431  * NV VS2 and VS3: very likely behaviour of PS3
2432  * For both, Inf in VS becomes NaN is PS
2433  * "Very likely" because the test was less extensive.
2434  *
2435  * Thus all clamping can be removed for shaders 2 and 3,
2436  * as long as 0*anything = 0.
2437  * Else clamps to enforce 0*anything = 0 (anything being then
2438  * neither inf or NaN, the user being unlikely to pass them
2439  * as constant).
2440  * The status for VS1 and PS1 is unknown.
2441  */
2442 
DECL_SPECIAL(RCP)2443 DECL_SPECIAL(RCP)
2444 {
2445     struct ureg_program *ureg = tx->ureg;
2446     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2447     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2448     struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2449     ureg_RCP(ureg, tmp, src);
2450     if (!tx->mul_zero_wins) {
2451         /* FLT_MAX has issues with Rayman */
2452         ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp));
2453         ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp));
2454     }
2455     return D3D_OK;
2456 }
2457 
DECL_SPECIAL(RSQ)2458 DECL_SPECIAL(RSQ)
2459 {
2460     struct ureg_program *ureg = tx->ureg;
2461     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2462     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2463     struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2464     ureg_RSQ(ureg, tmp, ureg_abs(src));
2465     if (!tx->mul_zero_wins)
2466         ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2467     return D3D_OK;
2468 }
2469 
DECL_SPECIAL(LOG)2470 DECL_SPECIAL(LOG)
2471 {
2472     struct ureg_program *ureg = tx->ureg;
2473     struct ureg_dst tmp = tx_scratch_scalar(tx);
2474     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2475     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2476     ureg_LG2(ureg, tmp, ureg_abs(src));
2477     if (tx->mul_zero_wins) {
2478         ureg_MOV(ureg, dst, tx_src_scalar(tmp));
2479     } else {
2480         ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2481     }
2482     return D3D_OK;
2483 }
2484 
DECL_SPECIAL(LIT)2485 DECL_SPECIAL(LIT)
2486 {
2487     struct ureg_program *ureg = tx->ureg;
2488     struct ureg_dst tmp = tx_scratch(tx);
2489     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2490     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2491     ureg_LIT(ureg, tmp, src);
2492     /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2493      * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2494      * it 0^0 if src.w=0, which value is driver dependent. */
2495     ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2496              ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2497              ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2498     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2499     return D3D_OK;
2500 }
2501 
DECL_SPECIAL(NRM)2502 DECL_SPECIAL(NRM)
2503 {
2504     struct ureg_program *ureg = tx->ureg;
2505     struct ureg_dst tmp = tx_scratch_scalar(tx);
2506     struct ureg_src nrm = tx_src_scalar(tmp);
2507     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2508     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2509     ureg_DP3(ureg, tmp, src, src);
2510     ureg_RSQ(ureg, tmp, nrm);
2511     if (!tx->mul_zero_wins)
2512         ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2513     ureg_MUL(ureg, dst, src, nrm);
2514     return D3D_OK;
2515 }
2516 
DECL_SPECIAL(DP2ADD)2517 DECL_SPECIAL(DP2ADD)
2518 {
2519     struct ureg_dst tmp = tx_scratch_scalar(tx);
2520     struct ureg_src dp2 = tx_src_scalar(tmp);
2521     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2522     struct ureg_src src[3];
2523     int i;
2524     for (i = 0; i < 3; ++i)
2525         src[i] = tx_src_param(tx, &tx->insn.src[i]);
2526     assert_replicate_swizzle(&src[2]);
2527 
2528     ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2529     ureg_ADD(tx->ureg, dst, src[2], dp2);
2530 
2531     return D3D_OK;
2532 }
2533 
DECL_SPECIAL(TEXCOORD)2534 DECL_SPECIAL(TEXCOORD)
2535 {
2536     struct ureg_program *ureg = tx->ureg;
2537     const unsigned s = tx->insn.dst[0].idx;
2538     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2539 
2540     tx_texcoord_alloc(tx, s);
2541     ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2542     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2543 
2544     return D3D_OK;
2545 }
2546 
DECL_SPECIAL(TEXCOORD_ps14)2547 DECL_SPECIAL(TEXCOORD_ps14)
2548 {
2549     struct ureg_program *ureg = tx->ureg;
2550     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2551     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2552 
2553     assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2554 
2555     ureg_MOV(ureg, dst, src);
2556 
2557     return D3D_OK;
2558 }
2559 
DECL_SPECIAL(TEXKILL)2560 DECL_SPECIAL(TEXKILL)
2561 {
2562     struct ureg_src reg;
2563 
2564     if (tx->version.major > 1 || tx->version.minor > 3) {
2565         reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2566     } else {
2567         tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2568         reg = tx->regs.vT[tx->insn.dst[0].idx];
2569     }
2570     if (tx->version.major < 2)
2571         reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2572     ureg_KILL_IF(tx->ureg, reg);
2573 
2574     return D3D_OK;
2575 }
2576 
DECL_SPECIAL(TEXBEM)2577 DECL_SPECIAL(TEXBEM)
2578 {
2579     struct ureg_program *ureg = tx->ureg;
2580     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2581     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2582     struct ureg_dst tmp, tmp2, texcoord;
2583     struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2;
2584     struct ureg_src bumpenvlscale, bumpenvloffset;
2585     const int m = tx->insn.dst[0].idx;
2586 
2587     assert(tx->version.major == 1);
2588 
2589     sample = ureg_DECL_sampler(ureg, m);
2590     tx->info->sampler_mask |= 1 << m;
2591 
2592     tx_texcoord_alloc(tx, m);
2593 
2594     tmp = tx_scratch(tx);
2595     tmp2 = tx_scratch(tx);
2596     texcoord = tx_scratch(tx);
2597     /*
2598      * Bump-env-matrix:
2599      * 00 is X
2600      * 01 is Y
2601      * 10 is Z
2602      * 11 is W
2603      */
2604     c8m = nine_special_constant_src(tx, m);
2605     c16m2 = nine_special_constant_src(tx, 8+m/2);
2606 
2607     m00 = NINE_APPLY_SWIZZLE(c8m, X);
2608     m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2609     m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2610     m11 = NINE_APPLY_SWIZZLE(c8m, W);
2611 
2612     /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2613     if (m % 2 == 0) {
2614         bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X);
2615         bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y);
2616     } else {
2617         bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z);
2618         bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W);
2619     }
2620 
2621     apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2622 
2623     /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R  */
2624     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2625              NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2626     /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2627     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2628              NINE_APPLY_SWIZZLE(src, Y),
2629              NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2630 
2631     /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2632     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2633              NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2634     /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2635     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2636              NINE_APPLY_SWIZZLE(src, Y),
2637              NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2638 
2639     /* Now the texture coordinates are in tmp.xy */
2640 
2641     if (tx->insn.opcode == D3DSIO_TEXBEM) {
2642         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2643     } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2644         /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2645         ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2646         ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z),
2647                  bumpenvlscale, bumpenvloffset);
2648         ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2649     }
2650 
2651     tx->info->bumpenvmat_needed = 1;
2652 
2653     return D3D_OK;
2654 }
2655 
DECL_SPECIAL(TEXREG2AR)2656 DECL_SPECIAL(TEXREG2AR)
2657 {
2658     struct ureg_program *ureg = tx->ureg;
2659     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2660     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2661     struct ureg_src sample;
2662     const int m = tx->insn.dst[0].idx;
2663     ASSERTED const int n = tx->insn.src[0].idx;
2664     assert(m >= 0 && m > n);
2665 
2666     sample = ureg_DECL_sampler(ureg, m);
2667     tx->info->sampler_mask |= 1 << m;
2668     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample);
2669 
2670     return D3D_OK;
2671 }
2672 
DECL_SPECIAL(TEXREG2GB)2673 DECL_SPECIAL(TEXREG2GB)
2674 {
2675     struct ureg_program *ureg = tx->ureg;
2676     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2677     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2678     struct ureg_src sample;
2679     const int m = tx->insn.dst[0].idx;
2680     ASSERTED const int n = tx->insn.src[0].idx;
2681     assert(m >= 0 && m > n);
2682 
2683     sample = ureg_DECL_sampler(ureg, m);
2684     tx->info->sampler_mask |= 1 << m;
2685     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2686 
2687     return D3D_OK;
2688 }
2689 
DECL_SPECIAL(TEXM3x2PAD)2690 DECL_SPECIAL(TEXM3x2PAD)
2691 {
2692     return D3D_OK; /* this is just padding */
2693 }
2694 
DECL_SPECIAL(TEXM3x2TEX)2695 DECL_SPECIAL(TEXM3x2TEX)
2696 {
2697     struct ureg_program *ureg = tx->ureg;
2698     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2699     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2700     struct ureg_src sample;
2701     const int m = tx->insn.dst[0].idx - 1;
2702     ASSERTED const int n = tx->insn.src[0].idx;
2703     assert(m >= 0 && m > n);
2704 
2705     tx_texcoord_alloc(tx, m);
2706     tx_texcoord_alloc(tx, m+1);
2707 
2708     /* performs the matrix multiplication */
2709     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2710     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2711 
2712     sample = ureg_DECL_sampler(ureg, m + 1);
2713     tx->info->sampler_mask |= 1 << (m + 1);
2714     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2715 
2716     return D3D_OK;
2717 }
2718 
DECL_SPECIAL(TEXM3x3PAD)2719 DECL_SPECIAL(TEXM3x3PAD)
2720 {
2721     return D3D_OK; /* this is just padding */
2722 }
2723 
DECL_SPECIAL(TEXM3x3SPEC)2724 DECL_SPECIAL(TEXM3x3SPEC)
2725 {
2726     struct ureg_program *ureg = tx->ureg;
2727     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2728     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2729     struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2730     struct ureg_src sample;
2731     struct ureg_dst tmp;
2732     const int m = tx->insn.dst[0].idx - 2;
2733     ASSERTED const int n = tx->insn.src[0].idx;
2734     assert(m >= 0 && m > n);
2735 
2736     tx_texcoord_alloc(tx, m);
2737     tx_texcoord_alloc(tx, m+1);
2738     tx_texcoord_alloc(tx, m+2);
2739 
2740     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2741     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2742     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2743 
2744     sample = ureg_DECL_sampler(ureg, m + 2);
2745     tx->info->sampler_mask |= 1 << (m + 2);
2746     tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2747 
2748     /* At this step, dst = N = (u', w', z').
2749      * We want dst to be the texture sampled at (u'', w'', z''), with
2750      * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2751     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2752     ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2753     /* at this step tmp.x = 1/N.N */
2754     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2755     /* at this step tmp.y = N.E */
2756     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2757     /* at this step tmp.x = N.E/N.N */
2758     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2759     ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2760     /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2761     ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2762     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2763 
2764     return D3D_OK;
2765 }
2766 
DECL_SPECIAL(TEXREG2RGB)2767 DECL_SPECIAL(TEXREG2RGB)
2768 {
2769     struct ureg_program *ureg = tx->ureg;
2770     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2771     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2772     struct ureg_src sample;
2773     const int m = tx->insn.dst[0].idx;
2774     ASSERTED const int n = tx->insn.src[0].idx;
2775     assert(m >= 0 && m > n);
2776 
2777     sample = ureg_DECL_sampler(ureg, m);
2778     tx->info->sampler_mask |= 1 << m;
2779     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample);
2780 
2781     return D3D_OK;
2782 }
2783 
DECL_SPECIAL(TEXDP3TEX)2784 DECL_SPECIAL(TEXDP3TEX)
2785 {
2786     struct ureg_program *ureg = tx->ureg;
2787     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2788     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2789     struct ureg_dst tmp;
2790     struct ureg_src sample;
2791     const int m = tx->insn.dst[0].idx;
2792     ASSERTED const int n = tx->insn.src[0].idx;
2793     assert(m >= 0 && m > n);
2794 
2795     tx_texcoord_alloc(tx, m);
2796 
2797     tmp = tx_scratch(tx);
2798     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2799     ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2800 
2801     sample = ureg_DECL_sampler(ureg, m);
2802     tx->info->sampler_mask |= 1 << m;
2803     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2804 
2805     return D3D_OK;
2806 }
2807 
DECL_SPECIAL(TEXM3x2DEPTH)2808 DECL_SPECIAL(TEXM3x2DEPTH)
2809 {
2810     struct ureg_program *ureg = tx->ureg;
2811     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2812     struct ureg_dst tmp;
2813     const int m = tx->insn.dst[0].idx - 1;
2814     ASSERTED const int n = tx->insn.src[0].idx;
2815     assert(m >= 0 && m > n);
2816 
2817     tx_texcoord_alloc(tx, m);
2818     tx_texcoord_alloc(tx, m+1);
2819 
2820     tmp = tx_scratch(tx);
2821 
2822     /* performs the matrix multiplication */
2823     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2824     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2825 
2826     ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2827     /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2828     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2829     /* res = 'w' == 0 ? 1.0 : z/w */
2830     ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2831              ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2832     /* replace the depth for depth testing with the result */
2833     tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2834                                               TGSI_WRITEMASK_Z, 0, 1);
2835     ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2836     /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2837     return D3D_OK;
2838 }
2839 
DECL_SPECIAL(TEXDP3)2840 DECL_SPECIAL(TEXDP3)
2841 {
2842     struct ureg_program *ureg = tx->ureg;
2843     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2844     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2845     const int m = tx->insn.dst[0].idx;
2846     ASSERTED const int n = tx->insn.src[0].idx;
2847     assert(m >= 0 && m > n);
2848 
2849     tx_texcoord_alloc(tx, m);
2850 
2851     ureg_DP3(ureg, dst, tx->regs.vT[m], src);
2852 
2853     return D3D_OK;
2854 }
2855 
DECL_SPECIAL(TEXM3x3)2856 DECL_SPECIAL(TEXM3x3)
2857 {
2858     struct ureg_program *ureg = tx->ureg;
2859     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2860     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2861     struct ureg_src sample;
2862     struct ureg_dst E, tmp;
2863     const int m = tx->insn.dst[0].idx - 2;
2864     ASSERTED const int n = tx->insn.src[0].idx;
2865     assert(m >= 0 && m > n);
2866 
2867     tx_texcoord_alloc(tx, m);
2868     tx_texcoord_alloc(tx, m+1);
2869     tx_texcoord_alloc(tx, m+2);
2870 
2871     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2872     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2873     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2874 
2875     switch (tx->insn.opcode) {
2876     case D3DSIO_TEXM3x3:
2877         ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2878         break;
2879     case D3DSIO_TEXM3x3TEX:
2880         sample = ureg_DECL_sampler(ureg, m + 2);
2881         tx->info->sampler_mask |= 1 << (m + 2);
2882         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2883         break;
2884     case D3DSIO_TEXM3x3VSPEC:
2885         sample = ureg_DECL_sampler(ureg, m + 2);
2886         tx->info->sampler_mask |= 1 << (m + 2);
2887         E = tx_scratch(tx);
2888         tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2889         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2890         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2891         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2892         /* At this step, dst = N = (u', w', z').
2893          * We want dst to be the texture sampled at (u'', w'', z''), with
2894          * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2895         ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2896         ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2897         /* at this step tmp.x = 1/N.N */
2898         ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2899         /* at this step tmp.y = N.E */
2900         ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2901         /* at this step tmp.x = N.E/N.N */
2902         ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2903         ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2904         /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2905         ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2906         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2907         break;
2908     default:
2909         return D3DERR_INVALIDCALL;
2910     }
2911     return D3D_OK;
2912 }
2913 
DECL_SPECIAL(TEXDEPTH)2914 DECL_SPECIAL(TEXDEPTH)
2915 {
2916     struct ureg_program *ureg = tx->ureg;
2917     struct ureg_dst r5;
2918     struct ureg_src r5r, r5g;
2919 
2920     assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2921 
2922     /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2923      * r5 won't be used afterward, thus we can use r5.ba */
2924     r5 = tx->regs.r[5];
2925     r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2926     r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2927 
2928     ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2929     ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2930     /* r5.r = r/g */
2931     ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2932              r5r, ureg_imm1f(ureg, 1.0f));
2933     /* replace the depth for depth testing with the result */
2934     tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2935                                               TGSI_WRITEMASK_Z, 0, 1);
2936     ureg_MOV(ureg, tx->regs.oDepth, r5r);
2937 
2938     return D3D_OK;
2939 }
2940 
DECL_SPECIAL(BEM)2941 DECL_SPECIAL(BEM)
2942 {
2943     struct ureg_program *ureg = tx->ureg;
2944     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2945     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2946     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2947     struct ureg_src m00, m01, m10, m11, c8m;
2948     const int m = tx->insn.dst[0].idx;
2949     struct ureg_dst tmp = tx_scratch(tx);
2950     /*
2951      * Bump-env-matrix:
2952      * 00 is X
2953      * 01 is Y
2954      * 10 is Z
2955      * 11 is W
2956      */
2957     c8m = nine_special_constant_src(tx, m);
2958     m00 = NINE_APPLY_SWIZZLE(c8m, X);
2959     m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2960     m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2961     m11 = NINE_APPLY_SWIZZLE(c8m, W);
2962     /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r  */
2963     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2964              NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2965     /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2966     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2967              NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2968 
2969     /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2970     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2971              NINE_APPLY_SWIZZLE(src1, X), src0);
2972     /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2973     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2974              NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2975     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2976 
2977     tx->info->bumpenvmat_needed = 1;
2978 
2979     return D3D_OK;
2980 }
2981 
DECL_SPECIAL(TEXLD)2982 DECL_SPECIAL(TEXLD)
2983 {
2984     struct ureg_program *ureg = tx->ureg;
2985     unsigned target;
2986     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2987     struct ureg_src src[2] = {
2988         tx_src_param(tx, &tx->insn.src[0]),
2989         tx_src_param(tx, &tx->insn.src[1])
2990     };
2991     assert(tx->insn.src[1].idx >= 0 &&
2992            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2993     target = tx->sampler_targets[tx->insn.src[1].idx];
2994 
2995     if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
2996         return D3D_OK;
2997 
2998     switch (tx->insn.flags) {
2999     case 0:
3000         ureg_TEX(ureg, dst, target, src[0], src[1]);
3001         break;
3002     case NINED3DSI_TEXLD_PROJECT:
3003         ureg_TXP(ureg, dst, target, src[0], src[1]);
3004         break;
3005     case NINED3DSI_TEXLD_BIAS:
3006         ureg_TXB(ureg, dst, target, src[0], src[1]);
3007         break;
3008     default:
3009         assert(0);
3010         return D3DERR_INVALIDCALL;
3011     }
3012     return D3D_OK;
3013 }
3014 
DECL_SPECIAL(TEXLD_14)3015 DECL_SPECIAL(TEXLD_14)
3016 {
3017     struct ureg_program *ureg = tx->ureg;
3018     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3019     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3020     const unsigned s = tx->insn.dst[0].idx;
3021     const unsigned t = ps1x_sampler_type(tx->info, s);
3022 
3023     tx->info->sampler_mask |= 1 << s;
3024     ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
3025 
3026     return D3D_OK;
3027 }
3028 
DECL_SPECIAL(TEX)3029 DECL_SPECIAL(TEX)
3030 {
3031     struct ureg_program *ureg = tx->ureg;
3032     const unsigned s = tx->insn.dst[0].idx;
3033     const unsigned t = ps1x_sampler_type(tx->info, s);
3034     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3035     struct ureg_src src[2];
3036 
3037     tx_texcoord_alloc(tx, s);
3038 
3039     src[0] = tx->regs.vT[s];
3040     src[1] = ureg_DECL_sampler(ureg, s);
3041     tx->info->sampler_mask |= 1 << s;
3042 
3043     TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
3044 
3045     return D3D_OK;
3046 }
3047 
DECL_SPECIAL(TEXLDD)3048 DECL_SPECIAL(TEXLDD)
3049 {
3050     unsigned target;
3051     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3052     struct ureg_src src[4] = {
3053         tx_src_param(tx, &tx->insn.src[0]),
3054         tx_src_param(tx, &tx->insn.src[1]),
3055         tx_src_param(tx, &tx->insn.src[2]),
3056         tx_src_param(tx, &tx->insn.src[3])
3057     };
3058     assert(tx->insn.src[1].idx >= 0 &&
3059            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3060     target = tx->sampler_targets[tx->insn.src[1].idx];
3061 
3062     if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3063         return D3D_OK;
3064 
3065     ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
3066     return D3D_OK;
3067 }
3068 
DECL_SPECIAL(TEXLDL)3069 DECL_SPECIAL(TEXLDL)
3070 {
3071     unsigned target;
3072     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3073     struct ureg_src src[2] = {
3074        tx_src_param(tx, &tx->insn.src[0]),
3075        tx_src_param(tx, &tx->insn.src[1])
3076     };
3077     assert(tx->insn.src[1].idx >= 0 &&
3078            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3079     target = tx->sampler_targets[tx->insn.src[1].idx];
3080 
3081     if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3082         return D3D_OK;
3083 
3084     ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
3085     return D3D_OK;
3086 }
3087 
DECL_SPECIAL(SETP)3088 DECL_SPECIAL(SETP)
3089 {
3090     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
3091     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3092     struct ureg_src src[2] = {
3093        tx_src_param(tx, &tx->insn.src[0]),
3094        tx_src_param(tx, &tx->insn.src[1])
3095     };
3096     ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0);
3097     return D3D_OK;
3098 }
3099 
DECL_SPECIAL(BREAKP)3100 DECL_SPECIAL(BREAKP)
3101 {
3102     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3103     ureg_IF(tx->ureg, src, tx_cond(tx));
3104     ureg_BRK(tx->ureg);
3105     tx_endcond(tx);
3106     ureg_ENDIF(tx->ureg);
3107     return D3D_OK;
3108 }
3109 
DECL_SPECIAL(PHASE)3110 DECL_SPECIAL(PHASE)
3111 {
3112     return D3D_OK; /* we don't care about phase */
3113 }
3114 
DECL_SPECIAL(COMMENT)3115 DECL_SPECIAL(COMMENT)
3116 {
3117     return D3D_OK; /* nothing to do */
3118 }
3119 
3120 
3121 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
3122     { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
3123 
3124 static const struct sm1_op_info inst_table[] =
3125 {
3126     _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
3127     _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
3128     _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
3129     _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
3130     _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
3131     _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
3132     _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
3133     _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
3134     _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
3135     _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
3136     _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
3137     _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
3138     _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
3139     _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
3140     _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
3141     _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
3142     _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
3143     _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
3144     _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
3145     _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
3146 
3147     _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
3148     _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
3149     _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
3150     _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
3151     _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
3152 
3153     _OPI(CALL,    CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
3154     _OPI(CALLNZ,  CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
3155     _OPI(LOOP,    BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
3156     _OPI(RET,     RET,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
3157     _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
3158     _OPI(LABEL,   NOP,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
3159 
3160     _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
3161 
3162     _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
3163     _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
3164     _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
3165     _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
3166     _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
3167 
3168     _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
3169     _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
3170 
3171     /* More flow control */
3172     _OPI(REP,    NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
3173     _OPI(ENDREP, NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
3174     _OPI(IF,     IF,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
3175     _OPI(IFC,    IF,     V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
3176     _OPI(ELSE,   ELSE,   V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
3177     _OPI(ENDIF,  ENDIF,  V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
3178     _OPI(BREAK,  BRK,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
3179     _OPI(BREAKC, NOP,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
3180     /* we don't write to the address register, but a normal register (copied
3181      * when needed to the address register), thus we don't use ARR */
3182     _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3183 
3184     _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
3185     _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
3186 
3187     _OPI(TEXCOORD,     NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
3188     _OPI(TEXCOORD,     MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
3189     _OPI(TEXKILL,      KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
3190     _OPI(TEX,          TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
3191     _OPI(TEX,          TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
3192     _OPI(TEX,          TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
3193     _OPI(TEXBEM,       TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3194     _OPI(TEXBEML,      TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3195     _OPI(TEXREG2AR,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
3196     _OPI(TEXREG2GB,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
3197     _OPI(TEXM3x2PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
3198     _OPI(TEXM3x2TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
3199     _OPI(TEXM3x3PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
3200     _OPI(TEXM3x3TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3201     _OPI(TEXM3x3SPEC,  TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
3202     _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3203 
3204     _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
3205     _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3206     _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
3207     _OPI(CND,  NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
3208 
3209     _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
3210 
3211     /* More tex stuff */
3212     _OPI(TEXREG2RGB,   TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
3213     _OPI(TEXDP3TEX,    TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
3214     _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
3215     _OPI(TEXDP3,       TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
3216     _OPI(TEXM3x3,      TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3217     _OPI(TEXDEPTH,     TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
3218 
3219     /* Misc */
3220     _OPI(CMP,    CMP,  V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
3221     _OPI(BEM,    NOP,  V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
3222     _OPI(DP2ADD, NOP,  V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
3223     _OPI(DSX,    DDX,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3224     _OPI(DSY,    DDY,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3225     _OPI(TEXLDD, TXD,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
3226     _OPI(SETP,   NOP,  V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
3227     _OPI(TEXLDL, TXL,  V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
3228     _OPI(BREAKP, BRK,  V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
3229 };
3230 
3231 static const struct sm1_op_info inst_phase =
3232     _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3233 
3234 static const struct sm1_op_info inst_comment =
3235     _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3236 
3237 static void
create_op_info_map(struct shader_translator * tx)3238 create_op_info_map(struct shader_translator *tx)
3239 {
3240     const unsigned version = (tx->version.major << 8) | tx->version.minor;
3241     unsigned i;
3242 
3243     for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3244         tx->op_info_map[i] = -1;
3245 
3246     if (tx->processor == PIPE_SHADER_VERTEX) {
3247         for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3248             assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3249             if (inst_table[i].vert_version.min <= version &&
3250                 inst_table[i].vert_version.max >= version)
3251                 tx->op_info_map[inst_table[i].sio] = i;
3252         }
3253     } else {
3254         for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3255             assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3256             if (inst_table[i].frag_version.min <= version &&
3257                 inst_table[i].frag_version.max >= version)
3258                 tx->op_info_map[inst_table[i].sio] = i;
3259         }
3260     }
3261 }
3262 
3263 static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator * tx)3264 NineTranslateInstruction_Generic(struct shader_translator *tx)
3265 {
3266     struct ureg_dst dst[1];
3267     struct ureg_src src[4];
3268     unsigned i;
3269 
3270     for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3271         dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3272     for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3273         src[i] = tx_src_param(tx, &tx->insn.src[i]);
3274 
3275     ureg_insn(tx->ureg, tx->insn.info->opcode,
3276               dst, tx->insn.ndst,
3277               src, tx->insn.nsrc, 0);
3278     return D3D_OK;
3279 }
3280 
3281 static inline DWORD
TOKEN_PEEK(struct shader_translator * tx)3282 TOKEN_PEEK(struct shader_translator *tx)
3283 {
3284     return *(tx->parse);
3285 }
3286 
3287 static inline DWORD
TOKEN_NEXT(struct shader_translator * tx)3288 TOKEN_NEXT(struct shader_translator *tx)
3289 {
3290     return *(tx->parse)++;
3291 }
3292 
3293 static inline void
TOKEN_JUMP(struct shader_translator * tx)3294 TOKEN_JUMP(struct shader_translator *tx)
3295 {
3296     if (tx->parse_next && tx->parse != tx->parse_next) {
3297         WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3298         tx->parse = tx->parse_next;
3299     }
3300 }
3301 
3302 static inline bool
sm1_parse_eof(struct shader_translator * tx)3303 sm1_parse_eof(struct shader_translator *tx)
3304 {
3305     return TOKEN_PEEK(tx) == NINED3DSP_END;
3306 }
3307 
3308 static void
sm1_read_version(struct shader_translator * tx)3309 sm1_read_version(struct shader_translator *tx)
3310 {
3311     const DWORD tok = TOKEN_NEXT(tx);
3312 
3313     tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3314     tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3315 
3316     switch (tok >> 16) {
3317     case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3318     case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3319     default:
3320        DBG("Invalid shader type: %x\n", tok);
3321        tx->processor = ~0;
3322        break;
3323     }
3324 }
3325 
3326 /* This is just to check if we parsed the instruction properly. */
3327 static void
sm1_parse_get_skip(struct shader_translator * tx)3328 sm1_parse_get_skip(struct shader_translator *tx)
3329 {
3330     const DWORD tok = TOKEN_PEEK(tx);
3331 
3332     if (tx->version.major >= 2) {
3333         tx->parse_next = tx->parse + 1 /* this */ +
3334             ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3335     } else {
3336         tx->parse_next = NULL; /* TODO: determine from param count */
3337     }
3338 }
3339 
3340 static void
sm1_print_comment(const char * comment,UINT size)3341 sm1_print_comment(const char *comment, UINT size)
3342 {
3343     if (!size)
3344         return;
3345     /* TODO */
3346 }
3347 
3348 static void
sm1_parse_comments(struct shader_translator * tx,BOOL print)3349 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3350 {
3351     DWORD tok = TOKEN_PEEK(tx);
3352 
3353     while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3354     {
3355         const char *comment = "";
3356         UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3357         tx->parse += size + 1;
3358 
3359         if (print)
3360             sm1_print_comment(comment, size);
3361 
3362         tok = TOKEN_PEEK(tx);
3363     }
3364 }
3365 
3366 static void
sm1_parse_get_param(struct shader_translator * tx,DWORD * reg,DWORD * rel)3367 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3368 {
3369     *reg = TOKEN_NEXT(tx);
3370 
3371     if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3372     {
3373         if (tx->version.major < 2)
3374             *rel = (1 << 31) |
3375                 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3376                 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT)  & D3DSP_REGTYPE_MASK) |
3377                 D3DSP_NOSWIZZLE;
3378         else
3379             *rel = TOKEN_NEXT(tx);
3380     }
3381 }
3382 
3383 static void
sm1_parse_dst_param(struct sm1_dst_param * dst,DWORD tok)3384 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3385 {
3386     int8_t shift;
3387     dst->file =
3388         (tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT |
3389         (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3390     dst->type = TGSI_RETURN_TYPE_FLOAT;
3391     dst->idx = tok & D3DSP_REGNUM_MASK;
3392     dst->rel = NULL;
3393     dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3394     dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3395     shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3396     dst->shift = (shift & 0x7) - (shift & 0x8);
3397 }
3398 
3399 static void
sm1_parse_src_param(struct sm1_src_param * src,DWORD tok)3400 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3401 {
3402     src->file =
3403         ((tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT) |
3404         ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3405     src->type = TGSI_RETURN_TYPE_FLOAT;
3406     src->idx = tok & D3DSP_REGNUM_MASK;
3407     src->rel = NULL;
3408     src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3409     src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3410 
3411     switch (src->file) {
3412     case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3413     case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3414     case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3415     default:
3416         break;
3417     }
3418 }
3419 
3420 static void
sm1_parse_immediate(struct shader_translator * tx,struct sm1_src_param * imm)3421 sm1_parse_immediate(struct shader_translator *tx,
3422                     struct sm1_src_param *imm)
3423 {
3424     imm->file = NINED3DSPR_IMMEDIATE;
3425     imm->idx = INT_MIN;
3426     imm->rel = NULL;
3427     imm->swizzle = NINED3DSP_NOSWIZZLE;
3428     imm->mod = 0;
3429     switch (tx->insn.opcode) {
3430     case D3DSIO_DEF:
3431         imm->type = NINED3DSPTYPE_FLOAT4;
3432         memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3433         tx->parse += 4;
3434         break;
3435     case D3DSIO_DEFI:
3436         imm->type = NINED3DSPTYPE_INT4;
3437         memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3438         tx->parse += 4;
3439         break;
3440     case D3DSIO_DEFB:
3441         imm->type = NINED3DSPTYPE_BOOL;
3442         memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3443         tx->parse += 1;
3444         break;
3445     default:
3446        assert(0);
3447        break;
3448     }
3449 }
3450 
3451 static void
sm1_read_dst_param(struct shader_translator * tx,struct sm1_dst_param * dst,struct sm1_src_param * rel)3452 sm1_read_dst_param(struct shader_translator *tx,
3453                    struct sm1_dst_param *dst,
3454                    struct sm1_src_param *rel)
3455 {
3456     DWORD tok_dst, tok_rel = 0;
3457 
3458     sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3459     sm1_parse_dst_param(dst, tok_dst);
3460     if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3461         sm1_parse_src_param(rel, tok_rel);
3462         dst->rel = rel;
3463     }
3464 }
3465 
3466 static void
sm1_read_src_param(struct shader_translator * tx,struct sm1_src_param * src,struct sm1_src_param * rel)3467 sm1_read_src_param(struct shader_translator *tx,
3468                    struct sm1_src_param *src,
3469                    struct sm1_src_param *rel)
3470 {
3471     DWORD tok_src, tok_rel = 0;
3472 
3473     sm1_parse_get_param(tx, &tok_src, &tok_rel);
3474     sm1_parse_src_param(src, tok_src);
3475     if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3476         assert(rel);
3477         sm1_parse_src_param(rel, tok_rel);
3478         src->rel = rel;
3479     }
3480 }
3481 
3482 static void
sm1_read_semantic(struct shader_translator * tx,struct sm1_semantic * sem)3483 sm1_read_semantic(struct shader_translator *tx,
3484                   struct sm1_semantic *sem)
3485 {
3486     const DWORD tok_usg = TOKEN_NEXT(tx);
3487     const DWORD tok_dst = TOKEN_NEXT(tx);
3488 
3489     sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3490     sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3491     sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3492 
3493     sm1_parse_dst_param(&sem->reg, tok_dst);
3494 }
3495 
3496 static void
sm1_parse_instruction(struct shader_translator * tx)3497 sm1_parse_instruction(struct shader_translator *tx)
3498 {
3499     struct sm1_instruction *insn = &tx->insn;
3500     HRESULT hr;
3501     DWORD tok;
3502     const struct sm1_op_info *info = NULL;
3503     unsigned i;
3504 
3505     sm1_parse_comments(tx, true);
3506     sm1_parse_get_skip(tx);
3507 
3508     tok = TOKEN_NEXT(tx);
3509 
3510     insn->opcode = tok & D3DSI_OPCODE_MASK;
3511     insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3512     insn->coissue = !!(tok & D3DSI_COISSUE);
3513     insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3514 
3515     if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3516         int k = tx->op_info_map[insn->opcode];
3517         if (k >= 0) {
3518             assert(k < ARRAY_SIZE(inst_table));
3519             info = &inst_table[k];
3520         }
3521     } else {
3522        if (insn->opcode == D3DSIO_PHASE)   info = &inst_phase;
3523        if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3524     }
3525     if (!info) {
3526        DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3527        TOKEN_JUMP(tx);
3528        return;
3529     }
3530     insn->info = info;
3531     insn->ndst = info->ndst;
3532     insn->nsrc = info->nsrc;
3533 
3534     /* check version */
3535     {
3536         unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3537         unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3538         unsigned ver = (tx->version.major << 8) | tx->version.minor;
3539         if (ver < min || ver > max) {
3540             DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3541                 min, ver, max);
3542             return;
3543         }
3544     }
3545 
3546     for (i = 0; i < insn->ndst; ++i)
3547         sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3548     if (insn->predicated)
3549         sm1_read_src_param(tx, &insn->pred, NULL);
3550     for (i = 0; i < insn->nsrc; ++i)
3551         sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3552 
3553     /* parse here so we can dump them before processing */
3554     if (insn->opcode == D3DSIO_DEF ||
3555         insn->opcode == D3DSIO_DEFI ||
3556         insn->opcode == D3DSIO_DEFB)
3557         sm1_parse_immediate(tx, &tx->insn.src[0]);
3558 
3559     sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3560     sm1_instruction_check(insn);
3561 
3562     if (insn->predicated) {
3563         tx->predicated_activated = true;
3564         if (ureg_dst_is_undef(tx->regs.predicate_tmp)) {
3565             tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg);
3566             tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg);
3567         }
3568     }
3569 
3570     if (info->handler)
3571         hr = info->handler(tx);
3572     else
3573         hr = NineTranslateInstruction_Generic(tx);
3574     tx_apply_dst0_modifiers(tx);
3575 
3576     if (insn->predicated) {
3577         tx->predicated_activated = false;
3578         /* TODO: predicate might be allowed on outputs,
3579          * which cannot be src. Workaround it. */
3580         ureg_CMP(tx->ureg, tx->regs.predicate_dst,
3581                  ureg_negate(tx_src_param(tx, &insn->pred)),
3582                  ureg_src(tx->regs.predicate_tmp),
3583                  ureg_src(tx->regs.predicate_dst));
3584     }
3585 
3586     if (hr != D3D_OK)
3587         tx->failure = true;
3588     tx->num_scratch = 0; /* reset */
3589 
3590     TOKEN_JUMP(tx);
3591 }
3592 
3593 #define GET_CAP(n) screen->get_param( \
3594       screen, PIPE_CAP_##n)
3595 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3596       screen, info->type, PIPE_SHADER_CAP_##n)
3597 
3598 static HRESULT
tx_ctor(struct shader_translator * tx,struct pipe_screen * screen,struct nine_shader_info * info)3599 tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info)
3600 {
3601     unsigned i;
3602 
3603     memset(tx, 0, sizeof(*tx));
3604 
3605     tx->info = info;
3606 
3607     tx->byte_code = info->byte_code;
3608     tx->parse = info->byte_code;
3609 
3610     for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3611         info->input_map[i] = NINE_DECLUSAGE_NONE;
3612     info->num_inputs = 0;
3613 
3614     info->position_t = false;
3615     info->point_size = false;
3616 
3617     memset(tx->slots_used, 0, sizeof(tx->slots_used));
3618     memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
3619     memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used));
3620 
3621     tx->info->const_float_slots = 0;
3622     tx->info->const_int_slots = 0;
3623     tx->info->const_bool_slots = 0;
3624 
3625     info->sampler_mask = 0x0;
3626     info->rt_mask = 0x0;
3627 
3628     info->lconstf.data = NULL;
3629     info->lconstf.ranges = NULL;
3630 
3631     info->bumpenvmat_needed = 0;
3632 
3633     for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3634         tx->regs.rL[i] = ureg_dst_undef();
3635     }
3636     tx->regs.address = ureg_dst_undef();
3637     tx->regs.a0 = ureg_dst_undef();
3638     tx->regs.p = ureg_dst_undef();
3639     tx->regs.oDepth = ureg_dst_undef();
3640     tx->regs.vPos = ureg_src_undef();
3641     tx->regs.vFace = ureg_src_undef();
3642     for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3643         tx->regs.o[i] = ureg_dst_undef();
3644     for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3645         tx->regs.oCol[i] = ureg_dst_undef();
3646     for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3647         tx->regs.vC[i] = ureg_src_undef();
3648     for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3649         tx->regs.vT[i] = ureg_src_undef();
3650 
3651     sm1_read_version(tx);
3652 
3653     info->version = (tx->version.major << 4) | tx->version.minor;
3654 
3655     tx->num_outputs = 0;
3656 
3657     create_op_info_map(tx);
3658 
3659     tx->ureg = ureg_create(info->type);
3660     if (!tx->ureg) {
3661         return E_OUTOFMEMORY;
3662     }
3663 
3664     tx->native_integers = GET_SHADER_CAP(INTEGERS);
3665     tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3666     tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3667     tx->shift_wpos = !GET_CAP(FS_COORD_PIXEL_CENTER_INTEGER);
3668     tx->texcoord_sn = tx->want_texcoord ?
3669         TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3670     tx->wpos_is_sysval = GET_CAP(FS_POSITION_IS_SYSVAL);
3671     tx->face_is_sysval_integer = GET_CAP(FS_FACE_IS_INTEGER_SYSVAL);
3672     tx->no_vs_window_space = !GET_CAP(VS_WINDOW_SPACE_POSITION);
3673     tx->mul_zero_wins = GET_CAP(LEGACY_MATH_RULES);
3674 
3675     if (info->emulate_features) {
3676         tx->shift_wpos = true;
3677         tx->no_vs_window_space = true;
3678         tx->mul_zero_wins = false;
3679     }
3680 
3681     if (IS_VS) {
3682         tx->num_constf_allowed = NINE_MAX_CONST_F;
3683     } else if (tx->version.major < 2) {/* IS_PS v1 */
3684         tx->num_constf_allowed = 8;
3685     } else if (tx->version.major == 2) {/* IS_PS v2 */
3686         tx->num_constf_allowed = 32;
3687     } else {/* IS_PS v3 */
3688         tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3689     }
3690 
3691     if (tx->version.major < 2) {
3692         tx->num_consti_allowed = 0;
3693         tx->num_constb_allowed = 0;
3694     } else {
3695         tx->num_consti_allowed = NINE_MAX_CONST_I;
3696         tx->num_constb_allowed = NINE_MAX_CONST_B;
3697     }
3698 
3699     if (info->swvp_on) {
3700         /* TODO: The values tx->version.major == 1 */
3701         tx->num_constf_allowed = 8192;
3702         tx->num_consti_allowed = 2048;
3703         tx->num_constb_allowed = 2048;
3704     }
3705 
3706     /* VS must always write position. Declare it here to make it the 1st output.
3707      * (Some drivers like nv50 are buggy and rely on that.)
3708      */
3709     if (IS_VS) {
3710         tx->regs.oPos_out = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3711     } else {
3712         ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3713         if (!tx->shift_wpos)
3714             ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3715     }
3716 
3717     if (tx->mul_zero_wins)
3718        ureg_property(tx->ureg, TGSI_PROPERTY_LEGACY_MATH_RULES, 1);
3719 
3720     /* Add additional definition of constants */
3721     if (info->add_constants_defs.c_combination) {
3722         unsigned i;
3723 
3724         assert(info->add_constants_defs.int_const_added);
3725         assert(info->add_constants_defs.bool_const_added);
3726         /* We only add constants that are used by the shader
3727          * and that are not defined in the shader */
3728         for (i = 0; i < NINE_MAX_CONST_I; ++i) {
3729             if ((*info->add_constants_defs.int_const_added)[i]) {
3730                 DBG("Defining const i%i : { %i %i %i %i }\n", i,
3731                     info->add_constants_defs.c_combination->const_i[i][0],
3732                     info->add_constants_defs.c_combination->const_i[i][1],
3733                     info->add_constants_defs.c_combination->const_i[i][2],
3734                     info->add_constants_defs.c_combination->const_i[i][3]);
3735                 tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]);
3736             }
3737         }
3738         for (i = 0; i < NINE_MAX_CONST_B; ++i) {
3739             if ((*info->add_constants_defs.bool_const_added)[i]) {
3740                 DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0));
3741                 tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]);
3742             }
3743         }
3744     }
3745     return D3D_OK;
3746 }
3747 
3748 static void
tx_dtor(struct shader_translator * tx)3749 tx_dtor(struct shader_translator *tx)
3750 {
3751     if (tx->slot_map)
3752         FREE(tx->slot_map);
3753     if (tx->num_inst_labels)
3754         FREE(tx->inst_labels);
3755     FREE(tx->lconstf);
3756     FREE(tx->regs.r);
3757     FREE(tx);
3758 }
3759 
3760 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3761  * CONST[1].xyz = x+width/2, y+height/2, zmin */
3762 static void
shader_add_vs_viewport_transform(struct shader_translator * tx)3763 shader_add_vs_viewport_transform(struct shader_translator *tx)
3764 {
3765     struct ureg_program *ureg = tx->ureg;
3766     struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0);
3767     struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1);
3768     /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3769 
3770     c0 = ureg_src_dimension(c0, 4);
3771     c1 = ureg_src_dimension(c1, 4);
3772     /* TODO: find out when we need to apply the viewport transformation or not.
3773      * Likely will be XYZ vs XYZRHW in vdecl_out
3774      * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3775      * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3776      */
3777     ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3778 }
3779 
3780 static void
shader_add_ps_fog_stage(struct shader_translator * tx,struct ureg_dst dst_col,struct ureg_src src_col)3781 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_dst dst_col, struct ureg_src src_col)
3782 {
3783     struct ureg_program *ureg = tx->ureg;
3784     struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
3785     struct ureg_src fog_vs, fog_color;
3786     struct ureg_dst fog_factor, depth;
3787 
3788     if (!tx->info->fog_enable) {
3789         ureg_MOV(ureg, dst_col, src_col);
3790         return;
3791     }
3792 
3793     if (tx->info->fog_mode != D3DFOG_NONE) {
3794         depth = tx_scratch_scalar(tx);
3795         if (tx->info->zfog)
3796             ureg_MOV(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
3797         else /* wfog: use w. position's w contains 1/w */
3798             ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
3799     }
3800 
3801     fog_color = nine_special_constant_src(tx, 12);
3802     fog_params = nine_special_constant_src(tx, 13);
3803     fog_factor = tx_scratch_scalar(tx);
3804 
3805     if (tx->info->fog_mode == D3DFOG_LINEAR) {
3806         fog_end = NINE_APPLY_SWIZZLE(fog_params, X);
3807         fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y);
3808         ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth)));
3809         ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3810     } else if (tx->info->fog_mode == D3DFOG_EXP) {
3811         fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3812         ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3813         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3814         ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3815     } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3816         fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3817         ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3818         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3819         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3820         ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3821     } else {
3822         fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16,
3823                                             TGSI_INTERPOLATE_PERSPECTIVE),
3824                                             TGSI_SWIZZLE_X);
3825         ureg_MOV(ureg, fog_factor, fog_vs);
3826     }
3827 
3828     ureg_LRP(ureg, ureg_writemask(dst_col, TGSI_WRITEMASK_XYZ),
3829              tx_src_scalar(fog_factor), src_col, fog_color);
3830     ureg_MOV(ureg, ureg_writemask(dst_col, TGSI_WRITEMASK_W), src_col);
3831 }
3832 
3833 static void
shader_add_ps_alpha_test_stage(struct shader_translator * tx,struct ureg_src src_color)3834 shader_add_ps_alpha_test_stage(struct shader_translator *tx, struct ureg_src src_color)
3835 {
3836     struct ureg_program *ureg = tx->ureg;
3837     unsigned cmp_op;
3838     struct ureg_src src[2];
3839     struct ureg_dst tmp = tx_scratch(tx);
3840     if (tx->info->alpha_test_emulation == PIPE_FUNC_ALWAYS)
3841         return;
3842     if (tx->info->alpha_test_emulation == PIPE_FUNC_NEVER) {
3843         ureg_KILL(ureg);
3844         return;
3845     }
3846     cmp_op = pipe_comp_to_tgsi_opposite(tx->info->alpha_test_emulation);
3847     src[0] = ureg_scalar(src_color, TGSI_SWIZZLE_W); /* Read color alpha channel */
3848     src[1] = ureg_scalar(nine_special_constant_src(tx, 14), TGSI_SWIZZLE_X); /* Read alphatest */
3849     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
3850     ureg_KILL_IF(tx->ureg, ureg_negate(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X))); /* if opposite test passes, discard */
3851 }
3852 
parse_shader(struct shader_translator * tx)3853 static void parse_shader(struct shader_translator *tx)
3854 {
3855     struct nine_shader_info *info = tx->info;
3856 
3857     while (!sm1_parse_eof(tx) && !tx->failure)
3858         sm1_parse_instruction(tx);
3859     tx->parse++; /* for byte_size */
3860 
3861     if (tx->failure)
3862         return;
3863 
3864     if (IS_PS) {
3865         struct ureg_dst oCol0 = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0);
3866         struct ureg_dst tmp_oCol0;
3867         if (tx->version.major < 3) {
3868             tmp_oCol0 = ureg_DECL_temporary(tx->ureg);
3869             if (tx->version.major < 2) {
3870                 assert(tx->num_temp); /* there must be color output */
3871                 info->rt_mask |= 0x1;
3872                 shader_add_ps_fog_stage(tx, tmp_oCol0, ureg_src(tx->regs.r[0]));
3873             } else {
3874                 shader_add_ps_fog_stage(tx, tmp_oCol0, ureg_src(tx->regs.oCol[0]));
3875             }
3876         } else {
3877             assert(!ureg_dst_is_undef(tx->regs.oCol[0]));
3878             tmp_oCol0 = tx->regs.oCol[0];
3879         }
3880         shader_add_ps_alpha_test_stage(tx, ureg_src(tmp_oCol0));
3881         ureg_MOV(tx->ureg, oCol0, ureg_src(tmp_oCol0));
3882     }
3883 
3884     if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3885         tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16);
3886         ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3887     }
3888 
3889     if (info->position_t) {
3890         if (tx->no_vs_window_space) {
3891             ERR("POSITIONT is not yet implemented for your device.\n");
3892         } else {
3893             ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true);
3894         }
3895     }
3896 
3897     if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3898         struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3899         ureg_MAX(tx->ureg, ureg_writemask(tx->regs.oPts, TGSI_WRITEMASK_X), ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3900         ureg_MIN(tx->ureg, ureg_writemask(oPts, TGSI_WRITEMASK_X), ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3901         info->point_size = true;
3902     } else if (IS_VS && tx->always_output_pointsize) {
3903         struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3904         ureg_MOV(tx->ureg, ureg_writemask(oPts, TGSI_WRITEMASK_X), nine_special_constant_src(tx, 8));
3905         info->point_size = true;
3906     }
3907 
3908     if (IS_VS && tx->info->clip_plane_emulation > 0) {
3909         struct ureg_dst clipdist[2] = {ureg_dst_undef(), ureg_dst_undef()};
3910         int num_clipdist = ffs(tx->info->clip_plane_emulation);
3911         int i;
3912         /* TODO: handle undefined channels of oPos (w is not always written to I think. default is 1) *
3913          * Note in d3d9 it's not possible to output clipvert, so we don't need to check
3914          * for its existence */
3915         clipdist[0] = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_CLIPDIST, 0, ((1 << num_clipdist) - 1) & 0xf, 0, 1);
3916         if (num_clipdist >= 5)
3917             clipdist[1] = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_CLIPDIST, 1, ((1 << (num_clipdist - 4)) - 1) & 0xf, 0, 1);
3918         ureg_property(tx->ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, num_clipdist);
3919         for (i = 0; i < num_clipdist; i++) {
3920             assert(!ureg_dst_is_undef(clipdist[i>>2]));
3921             if (!(tx->info->clip_plane_emulation & (1 << i)))
3922                 ureg_MOV(tx->ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)), ureg_imm1f(tx->ureg, 0.f));
3923             else
3924                 ureg_DP4(tx->ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)),
3925                          ureg_src(tx->regs.oPos), nine_special_constant_src(tx, i));
3926         }
3927 
3928         ureg_MOV(tx->ureg, tx->regs.oPos_out, ureg_src(tx->regs.oPos));
3929     }
3930 
3931     if (info->process_vertices)
3932         shader_add_vs_viewport_transform(tx);
3933 
3934     ureg_END(tx->ureg);
3935 }
3936 
3937 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS        (1 << 2)
3938 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS        (1 << 3)
3939 #define NINE_SHADER_DEBUG_OPTION_DUMP_NIR         (1 << 4)
3940 #define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI        (1 << 5)
3941 
3942 static const struct debug_named_value nine_shader_debug_options[] = {
3943     { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." },
3944     { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." },
3945     { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." },
3946     { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." },
3947     DEBUG_NAMED_VALUE_END /* must be last */
3948 };
3949 
3950 static inline bool
nine_shader_get_debug_flag(uint64_t flag)3951 nine_shader_get_debug_flag(uint64_t flag)
3952 {
3953     static uint64_t flags = 0;
3954     static bool first_run = true;
3955 
3956     if (unlikely(first_run)) {
3957         first_run = false;
3958         flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0);
3959 
3960         // Check old TGSI dump envvar too
3961         if (debug_get_bool_option("NINE_TGSI_DUMP", false)) {
3962             flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI;
3963         }
3964     }
3965 
3966     return !!(flags & flag);
3967 }
3968 
3969 static void
nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state * state,const struct tgsi_token * tgsi_tokens,struct pipe_screen * screen)3970 nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens,
3971                                      struct pipe_screen *screen)
3972 {
3973     struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, screen->get_disk_shader_cache != NULL);
3974 
3975     if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) {
3976         nir_print_shader(nir, stdout);
3977     }
3978 
3979     state->type = PIPE_SHADER_IR_NIR;
3980     state->tokens = NULL;
3981     state->ir.nir = nir;
3982     memset(&state->stream_output, 0, sizeof(state->stream_output));
3983 }
3984 
3985 static void *
nine_ureg_create_shader(struct ureg_program * ureg,struct pipe_context * pipe,const struct pipe_stream_output_info * so)3986 nine_ureg_create_shader(struct ureg_program                  *ureg,
3987                         struct pipe_context                  *pipe,
3988                         const struct pipe_stream_output_info   *so)
3989 {
3990     struct pipe_shader_state state;
3991     const struct tgsi_token *tgsi_tokens;
3992     struct pipe_screen *screen = pipe->screen;
3993 
3994     tgsi_tokens = ureg_finalize(ureg);
3995     if (!tgsi_tokens)
3996         return NULL;
3997 
3998     assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2);
3999     enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor;
4000 
4001     bool use_nir = true;
4002 
4003     /* Allow user to override preferred IR, this is very useful for debugging */
4004     if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS)))
4005         use_nir = false;
4006     if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS)))
4007         use_nir = false;
4008 
4009     DUMP("shader type: %s, selected IR: %s\n",
4010          shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS",
4011          use_nir ? "NIR" : "TGSI");
4012 
4013     if (use_nir) {
4014         nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen);
4015     } else {
4016         pipe_shader_state_from_tgsi(&state, tgsi_tokens);
4017     }
4018 
4019     assert(state.tokens || state.ir.nir);
4020 
4021     if (so)
4022         state.stream_output = *so;
4023 
4024     switch (shader_type) {
4025     case PIPE_SHADER_VERTEX:
4026         return pipe->create_vs_state(pipe, &state);
4027     case PIPE_SHADER_FRAGMENT:
4028         return pipe->create_fs_state(pipe, &state);
4029     default:
4030         unreachable("unsupported shader type");
4031     }
4032 }
4033 
4034 
4035 void *
nine_create_shader_with_so_and_destroy(struct ureg_program * p,struct pipe_context * pipe,const struct pipe_stream_output_info * so)4036 nine_create_shader_with_so_and_destroy(struct ureg_program                   *p,
4037                                        struct pipe_context                *pipe,
4038                                        const struct pipe_stream_output_info *so)
4039 {
4040     void *result = nine_ureg_create_shader(p, pipe, so);
4041     ureg_destroy(p);
4042     return result;
4043 }
4044 
4045 HRESULT
nine_translate_shader(struct NineDevice9 * device,struct nine_shader_info * info,struct pipe_context * pipe)4046 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
4047 {
4048     struct shader_translator *tx;
4049     HRESULT hr = D3D_OK;
4050     const unsigned processor = info->type;
4051     struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
4052     unsigned *const_ranges = NULL;
4053 
4054     user_assert(processor != ~0, D3DERR_INVALIDCALL);
4055 
4056     tx = MALLOC_STRUCT(shader_translator);
4057     if (!tx)
4058         return E_OUTOFMEMORY;
4059 
4060     info->emulate_features = device->driver_caps.shader_emulate_features;
4061 
4062     if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4063         hr = E_OUTOFMEMORY;
4064         goto out;
4065     }
4066     tx->always_output_pointsize = device->driver_caps.always_output_pointsize;
4067 
4068     assert(IS_VS || !info->swvp_on);
4069 
4070     if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
4071         hr = D3DERR_INVALIDCALL;
4072         DBG("Unsupported shader version: %u.%u !\n",
4073             tx->version.major, tx->version.minor);
4074         goto out;
4075     }
4076     if (tx->processor != processor) {
4077         hr = D3DERR_INVALIDCALL;
4078         DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
4079         goto out;
4080     }
4081     DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
4082          tx->version.major, tx->version.minor);
4083 
4084     parse_shader(tx);
4085 
4086     if (tx->failure) {
4087         /* For VS shaders, we print the warning later,
4088          * we first try with swvp. */
4089         if (IS_PS)
4090             ERR("Encountered buggy shader\n");
4091         ureg_destroy(tx->ureg);
4092         hr = D3DERR_INVALIDCALL;
4093         goto out;
4094     }
4095 
4096     /* Recompile after compacting constant slots if possible */
4097     if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) {
4098         unsigned *slot_map;
4099         unsigned c;
4100         int i, j, num_ranges, prev;
4101 
4102         DBG("Recompiling shader for constant compaction\n");
4103         ureg_destroy(tx->ureg);
4104 
4105         if (tx->num_inst_labels)
4106             FREE(tx->inst_labels);
4107         FREE(tx->lconstf);
4108         FREE(tx->regs.r);
4109 
4110         num_ranges = 0;
4111         prev = -2;
4112         for (i = 0; i < NINE_MAX_CONST_ALL_VS; i++) {
4113             if (tx->slots_used[i]) {
4114                 if (prev != i - 1)
4115                     num_ranges++;
4116                 prev = i;
4117             }
4118         }
4119         slot_map = MALLOC(NINE_MAX_CONST_ALL_VS * sizeof(unsigned));
4120         const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
4121         if (!slot_map || !const_ranges) {
4122             hr = E_OUTOFMEMORY;
4123             goto out;
4124         }
4125         c = 0;
4126         j = -1;
4127         prev = -2;
4128         for (i = 0; i < NINE_MAX_CONST_ALL_VS; i++) {
4129             if (tx->slots_used[i]) {
4130                 if (prev != i - 1)
4131                     j++;
4132                 /* Initialize first slot of the range */
4133                 if (!const_ranges[2*j+1])
4134                     const_ranges[2*j] = i;
4135                 const_ranges[2*j+1]++;
4136                 prev = i;
4137                 slot_map[i] = c++;
4138             }
4139         }
4140 
4141         if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4142             hr = E_OUTOFMEMORY;
4143             goto out;
4144         }
4145         tx->always_output_pointsize = device->driver_caps.always_output_pointsize;
4146         tx->slot_map = slot_map;
4147         parse_shader(tx);
4148         assert(!tx->failure);
4149 #if !defined(NDEBUG)
4150         i = 0;
4151         j = 0;
4152         while (const_ranges[i*2+1] != 0) {
4153             j += const_ranges[i*2+1];
4154             i++;
4155         }
4156         assert(j == tx->num_slots);
4157 #endif
4158     }
4159 
4160     /* record local constants */
4161     if (tx->num_lconstf && tx->indirect_const_access) {
4162         struct nine_range *ranges;
4163         float *data;
4164         int *indices;
4165         unsigned i, k, n;
4166 
4167         hr = E_OUTOFMEMORY;
4168 
4169         data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
4170         if (!data)
4171             goto out;
4172         info->lconstf.data = data;
4173 
4174         indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
4175         if (!indices)
4176             goto out;
4177 
4178         /* lazy sort, num_lconstf should be small */
4179         for (n = 0; n < tx->num_lconstf; ++n) {
4180             for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
4181                 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
4182                     k = i;
4183             }
4184             indices[n] = tx->lconstf[k].idx;
4185             memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
4186             tx->lconstf[k].idx = INT_MAX;
4187         }
4188 
4189         /* count ranges */
4190         for (n = 1, i = 1; i < tx->num_lconstf; ++i)
4191             if (indices[i] != indices[i - 1] + 1)
4192                 ++n;
4193         ranges = MALLOC(n * sizeof(ranges[0]));
4194         if (!ranges) {
4195             FREE(indices);
4196             goto out;
4197         }
4198         info->lconstf.ranges = ranges;
4199 
4200         k = 0;
4201         ranges[k].bgn = indices[0];
4202         for (i = 1; i < tx->num_lconstf; ++i) {
4203             if (indices[i] != indices[i - 1] + 1) {
4204                 ranges[k].next = &ranges[k + 1];
4205                 ranges[k].end = indices[i - 1] + 1;
4206                 ++k;
4207                 ranges[k].bgn = indices[i];
4208             }
4209         }
4210         ranges[k].end = indices[i - 1] + 1;
4211         ranges[k].next = NULL;
4212         assert(n == (k + 1));
4213 
4214         FREE(indices);
4215         hr = D3D_OK;
4216     }
4217 
4218     /* r500 */
4219     if (info->const_float_slots > device->max_vs_const_f &&
4220         (info->const_int_slots || info->const_bool_slots) &&
4221         !info->swvp_on)
4222         ERR("Overlapping constant slots. The shader is likely to be buggy\n");
4223 
4224 
4225     if (tx->indirect_const_access) { /* vs only */
4226         info->const_float_slots = device->max_vs_const_f;
4227         tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f);
4228     }
4229 
4230     if (!info->swvp_on) {
4231         info->const_used_size = sizeof(float[4]) * tx->num_slots;
4232         if (tx->num_slots)
4233             ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0);
4234     } else {
4235          ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
4236          ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
4237          ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
4238          ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
4239     }
4240 
4241     if (info->process_vertices)
4242         ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
4243 
4244     if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) {
4245         const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
4246         tgsi_dump(toks, 0);
4247         ureg_free_tokens(toks);
4248     }
4249 
4250     if (info->process_vertices) {
4251         NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
4252                                                     tx->output_info,
4253                                                     tx->num_outputs,
4254                                                     &(info->so));
4255         info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
4256     } else
4257         info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL);
4258     if (!info->cso) {
4259         hr = D3DERR_DRIVERINTERNALERROR;
4260         FREE(info->lconstf.data);
4261         FREE(info->lconstf.ranges);
4262         goto out;
4263     }
4264 
4265     info->const_ranges = const_ranges;
4266     const_ranges = NULL;
4267     info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
4268 out:
4269     if (const_ranges)
4270         FREE(const_ranges);
4271     tx_dtor(tx);
4272     return hr;
4273 }
4274