1 /*
2 * Copyright 2011 Joakim Sindholt <[email protected]>
3 * Copyright 2013 Christoph Bumiller
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "nine_shader.h"
8
9 #include "device9.h"
10 #include "nine_debug.h"
11 #include "nine_state.h"
12 #include "vertexdeclaration9.h"
13
14 #include "util/bitscan.h"
15 #include "util/macros.h"
16 #include "util/u_memory.h"
17 #include "util/u_inlines.h"
18 #include "pipe/p_shader_tokens.h"
19 #include "tgsi/tgsi_ureg.h"
20 #include "tgsi/tgsi_dump.h"
21 #include "nir/tgsi_to_nir.h"
22
23 #define DBG_CHANNEL DBG_SHADER
24
25 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
26
27
28 struct shader_translator;
29
30 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
31
32 static inline const char *d3dsio_to_string(unsigned opcode);
33
34
35 #define NINED3D_SM1_VS 0xfffe
36 #define NINED3D_SM1_PS 0xffff
37
38 #define NINE_MAX_COND_DEPTH 64
39 #define NINE_MAX_LOOP_DEPTH 64
40
41 #define NINED3DSP_END 0x0000ffff
42
43 #define NINED3DSPTYPE_FLOAT4 0
44 #define NINED3DSPTYPE_INT4 1
45 #define NINED3DSPTYPE_BOOL 2
46
47 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
48
49 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
50 #define NINED3DSP_WRITEMASK_SHIFT 16
51
52 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
53
54 #define NINED3DSHADER_REL_OP_GT 1
55 #define NINED3DSHADER_REL_OP_EQ 2
56 #define NINED3DSHADER_REL_OP_GE 3
57 #define NINED3DSHADER_REL_OP_LT 4
58 #define NINED3DSHADER_REL_OP_NE 5
59 #define NINED3DSHADER_REL_OP_LE 6
60
61 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
62 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
63
64 #define NINED3DSI_TEXLD_PROJECT 0x1
65 #define NINED3DSI_TEXLD_BIAS 0x2
66
67 #define NINED3DSP_WRITEMASK_0 0x1
68 #define NINED3DSP_WRITEMASK_1 0x2
69 #define NINED3DSP_WRITEMASK_2 0x4
70 #define NINED3DSP_WRITEMASK_3 0x8
71 #define NINED3DSP_WRITEMASK_ALL 0xf
72
73 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
74
75 #define NINE_SWIZZLE4(x,y,z,w) \
76 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
77
78 #define NINE_APPLY_SWIZZLE(src, s) \
79 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
80
81 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
82 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
83 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
84
85 /*
86 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
87 * BIAS <= PS 1.4 (x-0.5)
88 * BIASNEG <= PS 1.4 (-(x-0.5))
89 * SIGN <= PS 1.4 (2(x-0.5))
90 * SIGNNEG <= PS 1.4 (-2(x-0.5))
91 * COMP <= PS 1.4 (1-x)
92 * X2 = PS 1.4 (2x)
93 * X2NEG = PS 1.4 (-2x)
94 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
95 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
96 * ABS >= SM 3.0 (abs(x))
97 * ABSNEG >= SM 3.0 (-abs(x))
98 * NOT >= SM 2.0 pedication only
99 */
100 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
101 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
102 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
103 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
104 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
105 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
106 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
107 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
108 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
109 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
110 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
111 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
112 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
113 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
114
115 static const char *sm1_mod_str[] =
116 {
117 [NINED3DSPSM_NONE] = "",
118 [NINED3DSPSM_NEG] = "-",
119 [NINED3DSPSM_BIAS] = "bias",
120 [NINED3DSPSM_BIASNEG] = "biasneg",
121 [NINED3DSPSM_SIGN] = "sign",
122 [NINED3DSPSM_SIGNNEG] = "signneg",
123 [NINED3DSPSM_COMP] = "comp",
124 [NINED3DSPSM_X2] = "x2",
125 [NINED3DSPSM_X2NEG] = "x2neg",
126 [NINED3DSPSM_DZ] = "dz",
127 [NINED3DSPSM_DW] = "dw",
128 [NINED3DSPSM_ABS] = "abs",
129 [NINED3DSPSM_ABSNEG] = "-abs",
130 [NINED3DSPSM_NOT] = "not"
131 };
132
133 static void
sm1_dump_writemask(BYTE mask)134 sm1_dump_writemask(BYTE mask)
135 {
136 if (mask & 1) DUMP("x"); else DUMP("_");
137 if (mask & 2) DUMP("y"); else DUMP("_");
138 if (mask & 4) DUMP("z"); else DUMP("_");
139 if (mask & 8) DUMP("w"); else DUMP("_");
140 }
141
142 static void
sm1_dump_swizzle(BYTE s)143 sm1_dump_swizzle(BYTE s)
144 {
145 char c[4] = { 'x', 'y', 'z', 'w' };
146 DUMP("%c%c%c%c",
147 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
148 }
149
150 static const char sm1_file_char[] =
151 {
152 [D3DSPR_TEMP] = 'r',
153 [D3DSPR_INPUT] = 'v',
154 [D3DSPR_CONST] = 'c',
155 [D3DSPR_ADDR] = 'A',
156 [D3DSPR_RASTOUT] = 'R',
157 [D3DSPR_ATTROUT] = 'D',
158 [D3DSPR_OUTPUT] = 'o',
159 [D3DSPR_CONSTINT] = 'I',
160 [D3DSPR_COLOROUT] = 'C',
161 [D3DSPR_DEPTHOUT] = 'D',
162 [D3DSPR_SAMPLER] = 's',
163 [D3DSPR_CONST2] = 'c',
164 [D3DSPR_CONST3] = 'c',
165 [D3DSPR_CONST4] = 'c',
166 [D3DSPR_CONSTBOOL] = 'B',
167 [D3DSPR_LOOP] = 'L',
168 [D3DSPR_TEMPFLOAT16] = 'h',
169 [D3DSPR_MISCTYPE] = 'M',
170 [D3DSPR_LABEL] = 'X',
171 [D3DSPR_PREDICATE] = 'p'
172 };
173
174 static void
sm1_dump_reg(BYTE file,INT index)175 sm1_dump_reg(BYTE file, INT index)
176 {
177 switch (file) {
178 case D3DSPR_LOOP:
179 DUMP("aL");
180 break;
181 case D3DSPR_COLOROUT:
182 DUMP("oC%i", index);
183 break;
184 case D3DSPR_DEPTHOUT:
185 DUMP("oDepth");
186 break;
187 case D3DSPR_RASTOUT:
188 DUMP("oRast%i", index);
189 break;
190 case D3DSPR_CONSTINT:
191 DUMP("iconst[%i]", index);
192 break;
193 case D3DSPR_CONSTBOOL:
194 DUMP("bconst[%i]", index);
195 break;
196 default:
197 DUMP("%c%i", sm1_file_char[file], index);
198 break;
199 }
200 }
201
202 struct sm1_src_param
203 {
204 INT idx;
205 struct sm1_src_param *rel;
206 BYTE file;
207 BYTE swizzle;
208 BYTE mod;
209 BYTE type;
210 union {
211 DWORD d[4];
212 float f[4];
213 int i[4];
214 BOOL b;
215 } imm;
216 };
217 static void
218 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
219
220 struct sm1_dst_param
221 {
222 INT idx;
223 struct sm1_src_param *rel;
224 BYTE file;
225 BYTE mask;
226 BYTE mod;
227 int8_t shift; /* sint4 */
228 BYTE type;
229 };
230
231 static inline void
assert_replicate_swizzle(const struct ureg_src * reg)232 assert_replicate_swizzle(const struct ureg_src *reg)
233 {
234 assert(reg->SwizzleY == reg->SwizzleX &&
235 reg->SwizzleZ == reg->SwizzleX &&
236 reg->SwizzleW == reg->SwizzleX);
237 }
238
239 static void
sm1_dump_immediate(const struct sm1_src_param * param)240 sm1_dump_immediate(const struct sm1_src_param *param)
241 {
242 switch (param->type) {
243 case NINED3DSPTYPE_FLOAT4:
244 DUMP("{ %f %f %f %f }",
245 param->imm.f[0], param->imm.f[1],
246 param->imm.f[2], param->imm.f[3]);
247 break;
248 case NINED3DSPTYPE_INT4:
249 DUMP("{ %i %i %i %i }",
250 param->imm.i[0], param->imm.i[1],
251 param->imm.i[2], param->imm.i[3]);
252 break;
253 case NINED3DSPTYPE_BOOL:
254 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
255 break;
256 default:
257 assert(0);
258 break;
259 }
260 }
261
262 static void
sm1_dump_src_param(const struct sm1_src_param * param)263 sm1_dump_src_param(const struct sm1_src_param *param)
264 {
265 if (param->file == NINED3DSPR_IMMEDIATE) {
266 assert(!param->mod &&
267 !param->rel &&
268 param->swizzle == NINED3DSP_NOSWIZZLE);
269 sm1_dump_immediate(param);
270 return;
271 }
272
273 if (param->mod)
274 DUMP("%s(", sm1_mod_str[param->mod]);
275 if (param->rel) {
276 DUMP("%c[", sm1_file_char[param->file]);
277 sm1_dump_src_param(param->rel);
278 DUMP("+%i]", param->idx);
279 } else {
280 sm1_dump_reg(param->file, param->idx);
281 }
282 if (param->mod)
283 DUMP(")");
284 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
285 DUMP(".");
286 sm1_dump_swizzle(param->swizzle);
287 }
288 }
289
290 static void
sm1_dump_dst_param(const struct sm1_dst_param * param)291 sm1_dump_dst_param(const struct sm1_dst_param *param)
292 {
293 if (param->mod & NINED3DSPDM_SATURATE)
294 DUMP("sat ");
295 if (param->mod & NINED3DSPDM_PARTIALP)
296 DUMP("pp ");
297 if (param->mod & NINED3DSPDM_CENTROID)
298 DUMP("centroid ");
299 if (param->shift < 0)
300 DUMP("/%u ", 1 << -param->shift);
301 if (param->shift > 0)
302 DUMP("*%u ", 1 << param->shift);
303
304 if (param->rel) {
305 DUMP("%c[", sm1_file_char[param->file]);
306 sm1_dump_src_param(param->rel);
307 DUMP("+%i]", param->idx);
308 } else {
309 sm1_dump_reg(param->file, param->idx);
310 }
311 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
312 DUMP(".");
313 sm1_dump_writemask(param->mask);
314 }
315 }
316
317 struct sm1_semantic
318 {
319 struct sm1_dst_param reg;
320 BYTE sampler_type;
321 D3DDECLUSAGE usage;
322 BYTE usage_idx;
323 };
324
325 struct sm1_op_info
326 {
327 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
328 * should be ignored completely */
329 unsigned sio;
330 unsigned opcode; /* TGSI_OPCODE_x */
331
332 /* versions are still set even handler is set */
333 struct {
334 unsigned min;
335 unsigned max;
336 } vert_version, frag_version;
337
338 /* number of regs parsed outside of special handler */
339 unsigned ndst;
340 unsigned nsrc;
341
342 /* some instructions don't map perfectly, so use a special handler */
343 translate_instruction_func handler;
344 };
345
346 struct sm1_instruction
347 {
348 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
349 BYTE flags;
350 BOOL coissue;
351 BOOL predicated;
352 BYTE ndst;
353 BYTE nsrc;
354 struct sm1_src_param src[4];
355 struct sm1_src_param src_rel[4];
356 struct sm1_src_param pred;
357 struct sm1_src_param dst_rel[1];
358 struct sm1_dst_param dst[1];
359
360 const struct sm1_op_info *info;
361 };
362
363 static void
sm1_dump_instruction(struct sm1_instruction * insn,unsigned indent)364 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
365 {
366 unsigned i;
367
368 /* no info stored for these: */
369 if (insn->opcode == D3DSIO_DCL)
370 return;
371 for (i = 0; i < indent; ++i)
372 DUMP(" ");
373
374 if (insn->predicated) {
375 DUMP("@");
376 sm1_dump_src_param(&insn->pred);
377 DUMP(" ");
378 }
379 DUMP("%s", d3dsio_to_string(insn->opcode));
380 if (insn->flags) {
381 switch (insn->opcode) {
382 case D3DSIO_TEX:
383 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
384 break;
385 default:
386 DUMP("_%x", insn->flags);
387 break;
388 }
389 }
390 if (insn->coissue)
391 DUMP("_co");
392 DUMP(" ");
393
394 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
395 sm1_dump_dst_param(&insn->dst[i]);
396 DUMP(" ");
397 }
398
399 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
400 sm1_dump_src_param(&insn->src[i]);
401 DUMP(" ");
402 }
403 if (insn->opcode == D3DSIO_DEF ||
404 insn->opcode == D3DSIO_DEFI ||
405 insn->opcode == D3DSIO_DEFB)
406 sm1_dump_immediate(&insn->src[0]);
407
408 DUMP("\n");
409 }
410
411 struct sm1_local_const
412 {
413 INT idx;
414 struct ureg_src reg;
415 float f[4]; /* for indirect addressing of float constants */
416 };
417
418 struct shader_translator
419 {
420 const DWORD *byte_code;
421 const DWORD *parse;
422 const DWORD *parse_next;
423
424 struct ureg_program *ureg;
425
426 /* shader version */
427 struct {
428 BYTE major;
429 BYTE minor;
430 } version;
431 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
432 unsigned num_constf_allowed;
433 unsigned num_consti_allowed;
434 unsigned num_constb_allowed;
435
436 bool native_integers;
437 bool inline_subroutines;
438 bool want_texcoord;
439 bool shift_wpos;
440 bool wpos_is_sysval;
441 bool face_is_sysval_integer;
442 bool mul_zero_wins;
443 bool always_output_pointsize;
444 bool no_vs_window_space;
445 unsigned texcoord_sn;
446
447 struct sm1_instruction insn; /* current instruction */
448
449 struct {
450 struct ureg_dst *r;
451 struct ureg_dst oPos;
452 struct ureg_dst oPos_out; /* the real output when doing streamout or clipplane emulation */
453 struct ureg_dst oFog;
454 struct ureg_dst oPts;
455 struct ureg_dst oCol[4];
456 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
457 struct ureg_dst oDepth;
458 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
459 struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
460 struct ureg_src vPos;
461 struct ureg_src vFace;
462 struct ureg_src s;
463 struct ureg_dst p;
464 struct ureg_dst address;
465 struct ureg_dst a0;
466 struct ureg_dst predicate;
467 struct ureg_dst predicate_tmp;
468 struct ureg_dst predicate_dst;
469 struct ureg_dst tS[8]; /* texture stage registers */
470 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
471 struct ureg_dst t[8]; /* scratch TEMPs */
472 struct ureg_src vC[2]; /* PS color in */
473 struct ureg_src vT[8]; /* PS texcoord in */
474 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop/rep ctr */
475 struct ureg_dst aL[NINE_MAX_LOOP_DEPTH]; /* aL emulation */
476 } regs;
477 unsigned num_temp; /* ARRAY_SIZE(regs.r) */
478 unsigned num_scratch;
479 unsigned loop_depth;
480 unsigned loop_depth_max;
481 unsigned cond_depth;
482 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
483 unsigned cond_labels[NINE_MAX_COND_DEPTH];
484 bool loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
485 bool predicated_activated;
486
487 unsigned *inst_labels; /* LABEL op */
488 unsigned num_inst_labels;
489
490 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
491
492 struct sm1_local_const *lconstf;
493 unsigned num_lconstf;
494 struct sm1_local_const *lconsti;
495 unsigned num_lconsti;
496 struct sm1_local_const *lconstb;
497 unsigned num_lconstb;
498
499 bool slots_used[NINE_MAX_CONST_ALL_VS];
500 unsigned *slot_map;
501 unsigned num_slots;
502
503 bool indirect_const_access;
504 bool failure;
505
506 struct nine_vs_output_info output_info[16];
507 int num_outputs;
508
509 struct nine_shader_info *info;
510
511 int16_t op_info_map[D3DSIO_BREAKP + 1];
512 };
513
514 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
515 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
516
517 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
518
519 static void
520 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
521
522 static void
sm1_instruction_check(const struct sm1_instruction * insn)523 sm1_instruction_check(const struct sm1_instruction *insn)
524 {
525 if (insn->opcode == D3DSIO_CRS)
526 {
527 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
528 {
529 DBG("CRS.mask.w\n");
530 }
531 }
532 }
533
534 static void
nine_record_outputs(struct shader_translator * tx,BYTE Usage,BYTE UsageIndex,int mask,int output_index)535 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
536 int mask, int output_index)
537 {
538 tx->output_info[tx->num_outputs].output_semantic = Usage;
539 tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
540 tx->output_info[tx->num_outputs].mask = mask;
541 tx->output_info[tx->num_outputs].output_index = output_index;
542 tx->num_outputs++;
543 }
544
nine_float_constant_src(struct shader_translator * tx,int idx)545 static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
546 {
547 struct ureg_src src;
548
549 if (tx->slot_map)
550 idx = tx->slot_map[idx];
551 /* vswp constant handling: we use two buffers
552 * to fit all the float constants. The special handling
553 * doesn't need to be elsewhere, because all the instructions
554 * accessing the constants directly are VS1, and swvp
555 * is VS >= 2 */
556 if (tx->info->swvp_on && idx >= 4096) {
557 /* TODO: swvp rel is broken if many constants are used */
558 src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096);
559 src = ureg_src_dimension(src, 1);
560 } else {
561 src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
562 src = ureg_src_dimension(src, 0);
563 }
564
565 if (!tx->info->swvp_on)
566 tx->slots_used[idx] = true;
567 if (tx->info->const_float_slots < (idx + 1))
568 tx->info->const_float_slots = idx + 1;
569 if (tx->num_slots < (idx + 1))
570 tx->num_slots = idx + 1;
571
572 return src;
573 }
574
nine_integer_constant_src(struct shader_translator * tx,int idx)575 static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx)
576 {
577 struct ureg_src src;
578
579 if (tx->info->swvp_on) {
580 src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
581 src = ureg_src_dimension(src, 2);
582 } else {
583 unsigned slot_idx = tx->info->const_i_base + idx;
584 if (tx->slot_map)
585 slot_idx = tx->slot_map[slot_idx];
586 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
587 src = ureg_src_dimension(src, 0);
588 tx->slots_used[slot_idx] = true;
589 tx->info->int_slots_used[idx] = true;
590 if (tx->num_slots < (slot_idx + 1))
591 tx->num_slots = slot_idx + 1;
592 }
593
594 if (tx->info->const_int_slots < (idx + 1))
595 tx->info->const_int_slots = idx + 1;
596
597 return src;
598 }
599
nine_boolean_constant_src(struct shader_translator * tx,int idx)600 static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx)
601 {
602 struct ureg_src src;
603
604 char r = idx / 4;
605 char s = idx & 3;
606
607 if (tx->info->swvp_on) {
608 src = ureg_src_register(TGSI_FILE_CONSTANT, r);
609 src = ureg_src_dimension(src, 3);
610 } else {
611 unsigned slot_idx = tx->info->const_b_base + r;
612 if (tx->slot_map)
613 slot_idx = tx->slot_map[slot_idx];
614 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
615 src = ureg_src_dimension(src, 0);
616 tx->slots_used[slot_idx] = true;
617 tx->info->bool_slots_used[idx] = true;
618 if (tx->num_slots < (slot_idx + 1))
619 tx->num_slots = slot_idx + 1;
620 }
621 src = ureg_swizzle(src, s, s, s, s);
622
623 if (tx->info->const_bool_slots < (idx + 1))
624 tx->info->const_bool_slots = idx + 1;
625
626 return src;
627 }
628
nine_special_constant_src(struct shader_translator * tx,int idx)629 static struct ureg_src nine_special_constant_src(struct shader_translator *tx, int idx)
630 {
631 struct ureg_src src;
632
633 unsigned slot_idx = idx + (IS_PS ? NINE_MAX_CONST_PS_SPE_OFFSET :
634 (tx->info->swvp_on ? NINE_MAX_CONST_SWVP_SPE_OFFSET : NINE_MAX_CONST_VS_SPE_OFFSET));
635
636 if (!tx->info->swvp_on && tx->slot_map)
637 slot_idx = tx->slot_map[slot_idx];
638 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
639 src = ureg_src_dimension(src, 0);
640
641 if (!tx->info->swvp_on)
642 tx->slots_used[slot_idx] = true;
643 if (tx->num_slots < (slot_idx + 1))
644 tx->num_slots = slot_idx + 1;
645
646 return src;
647 }
648
649 static bool
tx_lconstf(struct shader_translator * tx,struct ureg_src * src,INT index)650 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
651 {
652 INT i;
653
654 if (index < 0 || index >= tx->num_constf_allowed) {
655 tx->failure = true;
656 return false;
657 }
658 for (i = 0; i < tx->num_lconstf; ++i) {
659 if (tx->lconstf[i].idx == index) {
660 *src = tx->lconstf[i].reg;
661 return true;
662 }
663 }
664 return false;
665 }
666 static bool
tx_lconsti(struct shader_translator * tx,struct ureg_src * src,INT index)667 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
668 {
669 int i;
670
671 if (index < 0 || index >= tx->num_consti_allowed) {
672 tx->failure = true;
673 return false;
674 }
675 for (i = 0; i < tx->num_lconsti; ++i) {
676 if (tx->lconsti[i].idx == index) {
677 *src = tx->lconsti[i].reg;
678 return true;
679 }
680 }
681 return false;
682 }
683 static bool
tx_lconstb(struct shader_translator * tx,struct ureg_src * src,INT index)684 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
685 {
686 int i;
687
688 if (index < 0 || index >= tx->num_constb_allowed) {
689 tx->failure = true;
690 return false;
691 }
692 for (i = 0; i < tx->num_lconstb; ++i) {
693 if (tx->lconstb[i].idx == index) {
694 *src = tx->lconstb[i].reg;
695 return true;
696 }
697 }
698 return false;
699 }
700
701 static void
tx_set_lconstf(struct shader_translator * tx,INT index,float f[4])702 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
703 {
704 unsigned n;
705
706 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
707
708 for (n = 0; n < tx->num_lconstf; ++n)
709 if (tx->lconstf[n].idx == index)
710 break;
711 if (n == tx->num_lconstf) {
712 if ((n % 8) == 0) {
713 tx->lconstf = REALLOC(tx->lconstf,
714 (n + 0) * sizeof(tx->lconstf[0]),
715 (n + 8) * sizeof(tx->lconstf[0]));
716 assert(tx->lconstf);
717 }
718 tx->num_lconstf++;
719 }
720 tx->lconstf[n].idx = index;
721 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
722
723 memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
724 }
725 static void
tx_set_lconsti(struct shader_translator * tx,INT index,int i[4])726 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
727 {
728 unsigned n;
729
730 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
731
732 for (n = 0; n < tx->num_lconsti; ++n)
733 if (tx->lconsti[n].idx == index)
734 break;
735 if (n == tx->num_lconsti) {
736 if ((n % 8) == 0) {
737 tx->lconsti = REALLOC(tx->lconsti,
738 (n + 0) * sizeof(tx->lconsti[0]),
739 (n + 8) * sizeof(tx->lconsti[0]));
740 assert(tx->lconsti);
741 }
742 tx->num_lconsti++;
743 }
744
745 tx->lconsti[n].idx = index;
746 tx->lconsti[n].reg = tx->native_integers ?
747 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
748 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
749 }
750 static void
tx_set_lconstb(struct shader_translator * tx,INT index,BOOL b)751 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
752 {
753 unsigned n;
754
755 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
756
757 for (n = 0; n < tx->num_lconstb; ++n)
758 if (tx->lconstb[n].idx == index)
759 break;
760 if (n == tx->num_lconstb) {
761 if ((n % 8) == 0) {
762 tx->lconstb = REALLOC(tx->lconstb,
763 (n + 0) * sizeof(tx->lconstb[0]),
764 (n + 8) * sizeof(tx->lconstb[0]));
765 assert(tx->lconstb);
766 }
767 tx->num_lconstb++;
768 }
769
770 tx->lconstb[n].idx = index;
771 tx->lconstb[n].reg = tx->native_integers ?
772 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
773 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
774 }
775
776 static inline struct ureg_dst
tx_scratch(struct shader_translator * tx)777 tx_scratch(struct shader_translator *tx)
778 {
779 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
780 tx->failure = true;
781 return tx->regs.t[0];
782 }
783 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
784 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
785 return tx->regs.t[tx->num_scratch++];
786 }
787
788 static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator * tx)789 tx_scratch_scalar(struct shader_translator *tx)
790 {
791 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
792 }
793
794 static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)795 tx_src_scalar(struct ureg_dst dst)
796 {
797 struct ureg_src src = ureg_src(dst);
798 int c = ffs(dst.WriteMask) - 1;
799 if (dst.WriteMask == (1 << c))
800 src = ureg_scalar(src, c);
801 return src;
802 }
803
804 static inline void
tx_temp_alloc(struct shader_translator * tx,INT idx)805 tx_temp_alloc(struct shader_translator *tx, INT idx)
806 {
807 assert(idx >= 0);
808 if (idx >= tx->num_temp) {
809 unsigned k = tx->num_temp;
810 unsigned n = idx + 1;
811 tx->regs.r = REALLOC(tx->regs.r,
812 k * sizeof(tx->regs.r[0]),
813 n * sizeof(tx->regs.r[0]));
814 for (; k < n; ++k)
815 tx->regs.r[k] = ureg_dst_undef();
816 tx->num_temp = n;
817 }
818 if (ureg_dst_is_undef(tx->regs.r[idx]))
819 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
820 }
821
822 static inline void
tx_addr_alloc(struct shader_translator * tx,INT idx)823 tx_addr_alloc(struct shader_translator *tx, INT idx)
824 {
825 assert(idx == 0);
826 if (ureg_dst_is_undef(tx->regs.address))
827 tx->regs.address = ureg_DECL_address(tx->ureg);
828 if (ureg_dst_is_undef(tx->regs.a0))
829 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
830 }
831
832 static inline bool
TEX_if_fetch4(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)833 TEX_if_fetch4(struct shader_translator *tx, struct ureg_dst dst,
834 unsigned target, struct ureg_src src0,
835 struct ureg_src src1, INT idx)
836 {
837 struct ureg_dst tmp;
838 struct ureg_src src_tg4[3] = {src0, ureg_imm1f(tx->ureg, 0.f), src1};
839
840 if (!(tx->info->fetch4 & (1 << idx)))
841 return false;
842
843 /* TODO: needs more tests, but this feature is not much used at all */
844
845 tmp = tx_scratch(tx);
846 ureg_tex_insn(tx->ureg, TGSI_OPCODE_TG4, &tmp, 1, target, TGSI_RETURN_TYPE_FLOAT,
847 NULL, 0, src_tg4, 3);
848 ureg_MOV(tx->ureg, dst, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z, X, Y, W)));
849 return true;
850 }
851
852 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
853 * the projection should be applied on the texture. It doesn't
854 * apply on texkill.
855 * The doc is very imprecise here (it says the projection is done
856 * before rasterization, thus in vs, which seems wrong since ps instructions
857 * are affected differently)
858 * For now we only apply to the ps TEX instruction and TEXBEM.
859 * Perhaps some other instructions would need it */
860 static inline void
apply_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,struct ureg_src src,INT idx)861 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
862 struct ureg_src src, INT idx)
863 {
864 struct ureg_dst tmp;
865 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
866
867 /* no projection */
868 if (dim == 1) {
869 ureg_MOV(tx->ureg, dst, src);
870 } else {
871 tmp = tx_scratch_scalar(tx);
872 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
873 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
874 }
875 }
876
877 static inline void
TEX_with_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)878 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
879 unsigned target, struct ureg_src src0,
880 struct ureg_src src1, INT idx)
881 {
882 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
883 struct ureg_dst tmp;
884 bool shadow = !!(tx->info->sampler_mask_shadow & (1 << idx));
885
886 /* dim == 1: no projection
887 * Looks like must be disabled when it makes no
888 * sense according the texture dimensions
889 */
890 if (dim == 1 || (dim <= target && !shadow)) {
891 ureg_TEX(tx->ureg, dst, target, src0, src1);
892 } else if (dim == 4) {
893 ureg_TXP(tx->ureg, dst, target, src0, src1);
894 } else {
895 tmp = tx_scratch(tx);
896 apply_ps1x_projection(tx, tmp, src0, idx);
897 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
898 }
899 }
900
901 static inline void
tx_texcoord_alloc(struct shader_translator * tx,INT idx)902 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
903 {
904 assert(IS_PS);
905 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
906 if (ureg_src_is_undef(tx->regs.vT[idx]))
907 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
908 TGSI_INTERPOLATE_PERSPECTIVE);
909 }
910
911 static inline unsigned *
tx_bgnloop(struct shader_translator * tx)912 tx_bgnloop(struct shader_translator *tx)
913 {
914 tx->loop_depth++;
915 if (tx->loop_depth_max < tx->loop_depth)
916 tx->loop_depth_max = tx->loop_depth;
917 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
918 return &tx->loop_labels[tx->loop_depth - 1];
919 }
920
921 static inline unsigned *
tx_endloop(struct shader_translator * tx)922 tx_endloop(struct shader_translator *tx)
923 {
924 assert(tx->loop_depth);
925 tx->loop_depth--;
926 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
927 ureg_get_instruction_number(tx->ureg));
928 return &tx->loop_labels[tx->loop_depth];
929 }
930
931 static struct ureg_dst
tx_get_loopctr(struct shader_translator * tx,bool loop_or_rep)932 tx_get_loopctr(struct shader_translator *tx, bool loop_or_rep)
933 {
934 const unsigned l = tx->loop_depth - 1;
935
936 if (!tx->loop_depth)
937 {
938 DBG("loop counter requested outside of loop\n");
939 return ureg_dst_undef();
940 }
941
942 if (ureg_dst_is_undef(tx->regs.rL[l])) {
943 /* loop or rep ctr creation */
944 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
945 if (loop_or_rep)
946 tx->regs.aL[l] = ureg_DECL_local_temporary(tx->ureg);
947 tx->loop_or_rep[l] = loop_or_rep;
948 }
949 /* loop - rep - endloop - endrep not allowed */
950 assert(tx->loop_or_rep[l] == loop_or_rep);
951
952 return tx->regs.rL[l];
953 }
954
955 static struct ureg_dst
tx_get_loopal(struct shader_translator * tx)956 tx_get_loopal(struct shader_translator *tx)
957 {
958 int loop_level = tx->loop_depth - 1;
959
960 while (loop_level >= 0) {
961 /* handle loop - rep - endrep - endloop case */
962 if (tx->loop_or_rep[loop_level])
963 /* the aL value is in the Y component (nine implementation) */
964 return tx->regs.aL[loop_level];
965 loop_level--;
966 }
967
968 DBG("aL counter requested outside of loop\n");
969 return ureg_dst_undef();
970 }
971
972 static inline unsigned *
tx_cond(struct shader_translator * tx)973 tx_cond(struct shader_translator *tx)
974 {
975 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
976 tx->cond_depth++;
977 return &tx->cond_labels[tx->cond_depth - 1];
978 }
979
980 static inline unsigned *
tx_elsecond(struct shader_translator * tx)981 tx_elsecond(struct shader_translator *tx)
982 {
983 assert(tx->cond_depth);
984 return &tx->cond_labels[tx->cond_depth - 1];
985 }
986
987 static inline void
tx_endcond(struct shader_translator * tx)988 tx_endcond(struct shader_translator *tx)
989 {
990 assert(tx->cond_depth);
991 tx->cond_depth--;
992 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
993 ureg_get_instruction_number(tx->ureg));
994 }
995
996 static inline struct ureg_dst
nine_ureg_dst_register(unsigned file,int index)997 nine_ureg_dst_register(unsigned file, int index)
998 {
999 return ureg_dst(ureg_src_register(file, index));
1000 }
1001
1002 static inline struct ureg_src
nine_get_position_input(struct shader_translator * tx)1003 nine_get_position_input(struct shader_translator *tx)
1004 {
1005 struct ureg_program *ureg = tx->ureg;
1006
1007 if (tx->wpos_is_sysval)
1008 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1009 else
1010 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
1011 0, TGSI_INTERPOLATE_LINEAR);
1012 }
1013
1014 static struct ureg_src
tx_src_param(struct shader_translator * tx,const struct sm1_src_param * param)1015 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
1016 {
1017 struct ureg_program *ureg = tx->ureg;
1018 struct ureg_src src;
1019 struct ureg_dst tmp;
1020
1021 assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) ||
1022 (param->file == D3DSPR_INPUT && tx->version.major == 3));
1023
1024 switch (param->file)
1025 {
1026 case D3DSPR_TEMP:
1027 tx_temp_alloc(tx, param->idx);
1028 src = ureg_src(tx->regs.r[param->idx]);
1029 break;
1030 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1031 case D3DSPR_ADDR:
1032 if (IS_VS) {
1033 assert(param->idx == 0);
1034 /* the address register (vs only) must be
1035 * assigned before use */
1036 assert(!ureg_dst_is_undef(tx->regs.a0));
1037 /* Round to lowest for vs1.1 (contrary to the doc), else
1038 * round to nearest */
1039 if (tx->version.major < 2 && tx->version.minor < 2)
1040 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1041 else
1042 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1043 src = ureg_src(tx->regs.address);
1044 } else {
1045 if (tx->version.major < 2 && tx->version.minor < 4) {
1046 /* no subroutines, so should be defined */
1047 src = ureg_src(tx->regs.tS[param->idx]);
1048 } else {
1049 tx_texcoord_alloc(tx, param->idx);
1050 src = tx->regs.vT[param->idx];
1051 }
1052 }
1053 break;
1054 case D3DSPR_INPUT:
1055 if (IS_VS) {
1056 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1057 } else {
1058 if (tx->version.major < 3) {
1059 src = ureg_DECL_fs_input_centroid(
1060 ureg, TGSI_SEMANTIC_COLOR, param->idx,
1061 tx->info->color_flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE,
1062 tx->info->force_color_in_centroid ?
1063 TGSI_INTERPOLATE_LOC_CENTROID : 0,
1064 0, 1);
1065 } else {
1066 if(param->rel) {
1067 /* Copy all inputs (non consecutive)
1068 * to temp array (consecutive).
1069 * This is not good for performance.
1070 * A better way would be to have inputs
1071 * consecutive (would need implement alternative
1072 * way to match vs outputs and ps inputs).
1073 * However even with the better way, the temp array
1074 * copy would need to be used if some inputs
1075 * are not GENERIC or if they have different
1076 * interpolation flag. */
1077 if (ureg_src_is_undef(tx->regs.v_consecutive)) {
1078 int i;
1079 tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
1080 for (i = 0; i < 10; i++) {
1081 if (!ureg_src_is_undef(tx->regs.v[i]))
1082 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
1083 else
1084 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
1085 }
1086 }
1087 src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
1088 } else {
1089 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1090 src = tx->regs.v[param->idx];
1091 }
1092 }
1093 }
1094 if (param->rel)
1095 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1096 break;
1097 case D3DSPR_PREDICATE:
1098 if (ureg_dst_is_undef(tx->regs.predicate)) {
1099 /* Forbidden to use the predicate register before being set */
1100 tx->failure = true;
1101 tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1102 }
1103 src = ureg_src(tx->regs.predicate);
1104 break;
1105 case D3DSPR_SAMPLER:
1106 assert(param->mod == NINED3DSPSM_NONE);
1107 /* assert(param->swizzle == NINED3DSP_NOSWIZZLE); Passed by wine tests */
1108 src = ureg_DECL_sampler(ureg, param->idx);
1109 break;
1110 case D3DSPR_CONST:
1111 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1112 src = nine_float_constant_src(tx, param->idx);
1113 if (param->rel) {
1114 tx->indirect_const_access = true;
1115 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1116 }
1117 }
1118 if (!IS_VS && tx->version.major < 2) {
1119 /* ps 1.X clamps constants */
1120 tmp = tx_scratch(tx);
1121 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1122 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1123 src = ureg_src(tmp);
1124 }
1125 break;
1126 case D3DSPR_CONST2:
1127 case D3DSPR_CONST3:
1128 case D3DSPR_CONST4:
1129 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1130 assert(!"CONST2/3/4");
1131 src = ureg_imm1f(ureg, 0.0f);
1132 break;
1133 case D3DSPR_CONSTINT:
1134 /* relative addressing only possible for float constants in vs */
1135 if (!tx_lconsti(tx, &src, param->idx))
1136 src = nine_integer_constant_src(tx, param->idx);
1137 break;
1138 case D3DSPR_CONSTBOOL:
1139 if (!tx_lconstb(tx, &src, param->idx))
1140 src = nine_boolean_constant_src(tx, param->idx);
1141 break;
1142 case D3DSPR_LOOP:
1143 if (ureg_dst_is_undef(tx->regs.address))
1144 tx->regs.address = ureg_DECL_address(ureg);
1145 if (!tx->native_integers)
1146 ureg_ARR(ureg, tx->regs.address,
1147 ureg_scalar(ureg_src(tx_get_loopal(tx)), TGSI_SWIZZLE_Y));
1148 else
1149 ureg_UARL(ureg, tx->regs.address,
1150 ureg_scalar(ureg_src(tx_get_loopal(tx)), TGSI_SWIZZLE_Y));
1151 src = ureg_src(tx->regs.address);
1152 break;
1153 case D3DSPR_MISCTYPE:
1154 switch (param->idx) {
1155 case D3DSMO_POSITION:
1156 if (ureg_src_is_undef(tx->regs.vPos))
1157 tx->regs.vPos = nine_get_position_input(tx);
1158 if (tx->shift_wpos) {
1159 /* TODO: do this only once */
1160 struct ureg_dst wpos = tx_scratch(tx);
1161 ureg_ADD(ureg, wpos, tx->regs.vPos,
1162 ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1163 src = ureg_src(wpos);
1164 } else {
1165 src = tx->regs.vPos;
1166 }
1167 break;
1168 case D3DSMO_FACE:
1169 if (ureg_src_is_undef(tx->regs.vFace)) {
1170 if (tx->face_is_sysval_integer) {
1171 tmp = ureg_DECL_temporary(ureg);
1172 tx->regs.vFace =
1173 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1174
1175 /* convert bool to float */
1176 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1177 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1178 tx->regs.vFace = ureg_src(tmp);
1179 } else {
1180 tx->regs.vFace = ureg_DECL_fs_input(ureg,
1181 TGSI_SEMANTIC_FACE, 0,
1182 TGSI_INTERPOLATE_CONSTANT);
1183 }
1184 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1185 }
1186 src = tx->regs.vFace;
1187 break;
1188 default:
1189 assert(!"invalid src D3DSMO");
1190 break;
1191 }
1192 break;
1193 case D3DSPR_TEMPFLOAT16:
1194 break;
1195 default:
1196 assert(!"invalid src D3DSPR");
1197 }
1198
1199 switch (param->mod) {
1200 case NINED3DSPSM_DW:
1201 tmp = tx_scratch(tx);
1202 /* NOTE: app is not allowed to read w with this modifier */
1203 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1204 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1205 src = ureg_src(tmp);
1206 break;
1207 case NINED3DSPSM_DZ:
1208 tmp = tx_scratch(tx);
1209 /* NOTE: app is not allowed to read z with this modifier */
1210 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1211 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1212 src = ureg_src(tmp);
1213 break;
1214 default:
1215 break;
1216 }
1217
1218 if (param->swizzle != NINED3DSP_NOSWIZZLE && param->file != D3DSPR_SAMPLER)
1219 src = ureg_swizzle(src,
1220 (param->swizzle >> 0) & 0x3,
1221 (param->swizzle >> 2) & 0x3,
1222 (param->swizzle >> 4) & 0x3,
1223 (param->swizzle >> 6) & 0x3);
1224
1225 switch (param->mod) {
1226 case NINED3DSPSM_ABS:
1227 src = ureg_abs(src);
1228 break;
1229 case NINED3DSPSM_ABSNEG:
1230 src = ureg_negate(ureg_abs(src));
1231 break;
1232 case NINED3DSPSM_NEG:
1233 src = ureg_negate(src);
1234 break;
1235 case NINED3DSPSM_BIAS:
1236 tmp = tx_scratch(tx);
1237 ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1238 src = ureg_src(tmp);
1239 break;
1240 case NINED3DSPSM_BIASNEG:
1241 tmp = tx_scratch(tx);
1242 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1243 src = ureg_src(tmp);
1244 break;
1245 case NINED3DSPSM_NOT:
1246 if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) {
1247 tmp = tx_scratch(tx);
1248 ureg_NOT(ureg, tmp, src);
1249 src = ureg_src(tmp);
1250 break;
1251 } else { /* predicate */
1252 tmp = tx_scratch(tx);
1253 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1254 src = ureg_src(tmp);
1255 }
1256 FALLTHROUGH;
1257 case NINED3DSPSM_COMP:
1258 tmp = tx_scratch(tx);
1259 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1260 src = ureg_src(tmp);
1261 break;
1262 case NINED3DSPSM_DZ:
1263 case NINED3DSPSM_DW:
1264 /* Already handled*/
1265 break;
1266 case NINED3DSPSM_SIGN:
1267 tmp = tx_scratch(tx);
1268 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1269 src = ureg_src(tmp);
1270 break;
1271 case NINED3DSPSM_SIGNNEG:
1272 tmp = tx_scratch(tx);
1273 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1274 src = ureg_src(tmp);
1275 break;
1276 case NINED3DSPSM_X2:
1277 tmp = tx_scratch(tx);
1278 ureg_ADD(ureg, tmp, src, src);
1279 src = ureg_src(tmp);
1280 break;
1281 case NINED3DSPSM_X2NEG:
1282 tmp = tx_scratch(tx);
1283 ureg_ADD(ureg, tmp, src, src);
1284 src = ureg_negate(ureg_src(tmp));
1285 break;
1286 default:
1287 assert(param->mod == NINED3DSPSM_NONE);
1288 break;
1289 }
1290
1291 return src;
1292 }
1293
1294 static struct ureg_dst
_tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1295 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1296 {
1297 struct ureg_dst dst;
1298
1299 switch (param->file)
1300 {
1301 case D3DSPR_TEMP:
1302 assert(!param->rel);
1303 tx_temp_alloc(tx, param->idx);
1304 dst = tx->regs.r[param->idx];
1305 break;
1306 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1307 case D3DSPR_ADDR:
1308 assert(!param->rel);
1309 if (tx->version.major < 2 && !IS_VS) {
1310 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1311 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1312 dst = tx->regs.tS[param->idx];
1313 } else
1314 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1315 tx_texcoord_alloc(tx, param->idx);
1316 dst = ureg_dst(tx->regs.vT[param->idx]);
1317 } else {
1318 tx_addr_alloc(tx, param->idx);
1319 dst = tx->regs.a0;
1320 }
1321 break;
1322 case D3DSPR_RASTOUT:
1323 assert(!param->rel);
1324 switch (param->idx) {
1325 case 0:
1326 if (ureg_dst_is_undef(tx->regs.oPos)) {
1327 if (tx->info->clip_plane_emulation > 0) {
1328 tx->regs.oPos = ureg_DECL_temporary(tx->ureg);
1329 } else {
1330 tx->regs.oPos = tx->regs.oPos_out;
1331 }
1332 }
1333 dst = tx->regs.oPos;
1334 break;
1335 case 1:
1336 if (ureg_dst_is_undef(tx->regs.oFog))
1337 tx->regs.oFog =
1338 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16));
1339 dst = tx->regs.oFog;
1340 break;
1341 case 2:
1342 if (ureg_dst_is_undef(tx->regs.oPts))
1343 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1344 dst = tx->regs.oPts;
1345 break;
1346 default:
1347 assert(0);
1348 break;
1349 }
1350 break;
1351 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1352 case D3DSPR_OUTPUT:
1353 if (tx->version.major < 3) {
1354 assert(!param->rel);
1355 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1356 } else {
1357 assert(!param->rel); /* TODO */
1358 assert(param->idx < ARRAY_SIZE(tx->regs.o));
1359 dst = tx->regs.o[param->idx];
1360 }
1361 break;
1362 case D3DSPR_ATTROUT: /* VS */
1363 case D3DSPR_COLOROUT: /* PS */
1364 assert(param->idx >= 0 && param->idx < 4);
1365 assert(!param->rel);
1366 tx->info->rt_mask |= 1 << param->idx;
1367 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1368 /* ps < 3: oCol[0] will have fog blending afterward
1369 * ps: oCol[0] might have alphatest afterward */
1370 if (!IS_VS && param->idx == 0) {
1371 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1372 } else {
1373 tx->regs.oCol[param->idx] =
1374 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1375 }
1376 }
1377 dst = tx->regs.oCol[param->idx];
1378 if (IS_VS && tx->version.major < 3)
1379 dst = ureg_saturate(dst);
1380 break;
1381 case D3DSPR_DEPTHOUT:
1382 assert(!param->rel);
1383 if (ureg_dst_is_undef(tx->regs.oDepth))
1384 tx->regs.oDepth =
1385 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1386 TGSI_WRITEMASK_Z, 0, 1);
1387 dst = tx->regs.oDepth; /* XXX: must write .z component */
1388 break;
1389 case D3DSPR_PREDICATE:
1390 if (ureg_dst_is_undef(tx->regs.predicate))
1391 tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1392 dst = tx->regs.predicate;
1393 break;
1394 case D3DSPR_TEMPFLOAT16:
1395 DBG("unhandled D3DSPR: %u\n", param->file);
1396 break;
1397 default:
1398 assert(!"invalid dst D3DSPR");
1399 break;
1400 }
1401 if (param->rel)
1402 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1403
1404 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1405 dst = ureg_writemask(dst, param->mask);
1406 if (param->mod & NINED3DSPDM_SATURATE)
1407 dst = ureg_saturate(dst);
1408
1409 if (tx->predicated_activated) {
1410 tx->regs.predicate_dst = dst;
1411 dst = tx->regs.predicate_tmp;
1412 }
1413
1414 return dst;
1415 }
1416
1417 static struct ureg_dst
tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1418 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1419 {
1420 if (param->shift) {
1421 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1422 return tx->regs.tdst;
1423 }
1424 return _tx_dst_param(tx, param);
1425 }
1426
1427 static void
tx_apply_dst0_modifiers(struct shader_translator * tx)1428 tx_apply_dst0_modifiers(struct shader_translator *tx)
1429 {
1430 struct ureg_dst rdst;
1431 float f;
1432
1433 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1434 return;
1435 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1436
1437 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1438
1439 if (tx->insn.dst[0].shift < 0)
1440 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1441 else
1442 f = 1 << tx->insn.dst[0].shift;
1443
1444 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1445 }
1446
1447 static struct ureg_src
tx_dst_param_as_src(struct shader_translator * tx,const struct sm1_dst_param * param)1448 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1449 {
1450 struct ureg_src src;
1451
1452 assert(!param->shift);
1453 assert(!(param->mod & NINED3DSPDM_SATURATE));
1454
1455 switch (param->file) {
1456 case D3DSPR_INPUT:
1457 if (IS_VS) {
1458 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1459 } else {
1460 assert(!param->rel);
1461 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1462 src = tx->regs.v[param->idx];
1463 }
1464 break;
1465 default:
1466 src = ureg_src(tx_dst_param(tx, param));
1467 break;
1468 }
1469 if (param->rel)
1470 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1471
1472 if (!param->mask)
1473 WARN("mask is 0, using identity swizzle\n");
1474
1475 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1476 char s[4];
1477 int n;
1478 int c;
1479 for (n = 0, c = 0; c < 4; ++c)
1480 if (param->mask & (1 << c))
1481 s[n++] = c;
1482 assert(n);
1483 for (c = n; c < 4; ++c)
1484 s[c] = s[n - 1];
1485 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1486 }
1487 return src;
1488 }
1489
1490 static HRESULT
NineTranslateInstruction_Mkxn(struct shader_translator * tx,const unsigned k,const unsigned n)1491 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1492 {
1493 struct ureg_program *ureg = tx->ureg;
1494 struct ureg_dst dst;
1495 struct ureg_src src[2];
1496 struct sm1_src_param *src_mat = &tx->insn.src[1];
1497 unsigned i;
1498
1499 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1500 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1501
1502 for (i = 0; i < n; i++)
1503 {
1504 const unsigned m = (1 << i);
1505
1506 src[1] = tx_src_param(tx, src_mat);
1507 src_mat->idx++;
1508
1509 if (!(dst.WriteMask & m))
1510 continue;
1511
1512 /* XXX: src == dst case ? */
1513
1514 switch (k) {
1515 case 3:
1516 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1517 break;
1518 case 4:
1519 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1520 break;
1521 default:
1522 DBG("invalid operation: M%ux%u\n", m, n);
1523 break;
1524 }
1525 }
1526
1527 return D3D_OK;
1528 }
1529
1530 #define VNOTSUPPORTED 0, 0
1531 #define V(maj, min) (((maj) << 8) | (min))
1532
1533 static inline const char *
d3dsio_to_string(unsigned opcode)1534 d3dsio_to_string( unsigned opcode )
1535 {
1536 static const char *names[] = {
1537 "NOP",
1538 "MOV",
1539 "ADD",
1540 "SUB",
1541 "MAD",
1542 "MUL",
1543 "RCP",
1544 "RSQ",
1545 "DP3",
1546 "DP4",
1547 "MIN",
1548 "MAX",
1549 "SLT",
1550 "SGE",
1551 "EXP",
1552 "LOG",
1553 "LIT",
1554 "DST",
1555 "LRP",
1556 "FRC",
1557 "M4x4",
1558 "M4x3",
1559 "M3x4",
1560 "M3x3",
1561 "M3x2",
1562 "CALL",
1563 "CALLNZ",
1564 "LOOP",
1565 "RET",
1566 "ENDLOOP",
1567 "LABEL",
1568 "DCL",
1569 "POW",
1570 "CRS",
1571 "SGN",
1572 "ABS",
1573 "NRM",
1574 "SINCOS",
1575 "REP",
1576 "ENDREP",
1577 "IF",
1578 "IFC",
1579 "ELSE",
1580 "ENDIF",
1581 "BREAK",
1582 "BREAKC",
1583 "MOVA",
1584 "DEFB",
1585 "DEFI",
1586 NULL,
1587 NULL,
1588 NULL,
1589 NULL,
1590 NULL,
1591 NULL,
1592 NULL,
1593 NULL,
1594 NULL,
1595 NULL,
1596 NULL,
1597 NULL,
1598 NULL,
1599 NULL,
1600 NULL,
1601 "TEXCOORD",
1602 "TEXKILL",
1603 "TEX",
1604 "TEXBEM",
1605 "TEXBEML",
1606 "TEXREG2AR",
1607 "TEXREG2GB",
1608 "TEXM3x2PAD",
1609 "TEXM3x2TEX",
1610 "TEXM3x3PAD",
1611 "TEXM3x3TEX",
1612 NULL,
1613 "TEXM3x3SPEC",
1614 "TEXM3x3VSPEC",
1615 "EXPP",
1616 "LOGP",
1617 "CND",
1618 "DEF",
1619 "TEXREG2RGB",
1620 "TEXDP3TEX",
1621 "TEXM3x2DEPTH",
1622 "TEXDP3",
1623 "TEXM3x3",
1624 "TEXDEPTH",
1625 "CMP",
1626 "BEM",
1627 "DP2ADD",
1628 "DSX",
1629 "DSY",
1630 "TEXLDD",
1631 "SETP",
1632 "TEXLDL",
1633 "BREAKP"
1634 };
1635
1636 if (opcode < ARRAY_SIZE(names)) return names[opcode];
1637
1638 switch (opcode) {
1639 case D3DSIO_PHASE: return "PHASE";
1640 case D3DSIO_COMMENT: return "COMMENT";
1641 case D3DSIO_END: return "END";
1642 default:
1643 return NULL;
1644 }
1645 }
1646
1647 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1648 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1649 (inst).vert_version.max | \
1650 (inst).frag_version.min | \
1651 (inst).frag_version.max)
1652
1653 #define SPECIAL(name) \
1654 NineTranslateInstruction_##name
1655
1656 #define DECL_SPECIAL(name) \
1657 static HRESULT \
1658 NineTranslateInstruction_##name( struct shader_translator *tx )
1659
1660 static HRESULT
1661 NineTranslateInstruction_Generic(struct shader_translator *);
1662
DECL_SPECIAL(NOP)1663 DECL_SPECIAL(NOP)
1664 {
1665 /* Nothing to do. NOP was used to avoid hangs
1666 * with very old d3d drivers. */
1667 return D3D_OK;
1668 }
1669
DECL_SPECIAL(SUB)1670 DECL_SPECIAL(SUB)
1671 {
1672 struct ureg_program *ureg = tx->ureg;
1673 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1674 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1675 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1676
1677 ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1678 return D3D_OK;
1679 }
1680
DECL_SPECIAL(ABS)1681 DECL_SPECIAL(ABS)
1682 {
1683 struct ureg_program *ureg = tx->ureg;
1684 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1685 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1686
1687 ureg_MOV(ureg, dst, ureg_abs(src));
1688 return D3D_OK;
1689 }
1690
DECL_SPECIAL(XPD)1691 DECL_SPECIAL(XPD)
1692 {
1693 struct ureg_program *ureg = tx->ureg;
1694 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1695 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1696 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1697
1698 ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1699 ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
1700 TGSI_SWIZZLE_X, 0),
1701 ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1702 TGSI_SWIZZLE_Y, 0));
1703 ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1704 ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1705 TGSI_SWIZZLE_Y, 0),
1706 ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
1707 TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
1708 ureg_src(dst));
1709 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1710 ureg_imm1f(ureg, 1));
1711 return D3D_OK;
1712 }
1713
DECL_SPECIAL(M4x4)1714 DECL_SPECIAL(M4x4)
1715 {
1716 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1717 }
1718
DECL_SPECIAL(M4x3)1719 DECL_SPECIAL(M4x3)
1720 {
1721 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1722 }
1723
DECL_SPECIAL(M3x4)1724 DECL_SPECIAL(M3x4)
1725 {
1726 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1727 }
1728
DECL_SPECIAL(M3x3)1729 DECL_SPECIAL(M3x3)
1730 {
1731 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1732 }
1733
DECL_SPECIAL(M3x2)1734 DECL_SPECIAL(M3x2)
1735 {
1736 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1737 }
1738
DECL_SPECIAL(CMP)1739 DECL_SPECIAL(CMP)
1740 {
1741 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1742 tx_src_param(tx, &tx->insn.src[0]),
1743 tx_src_param(tx, &tx->insn.src[2]),
1744 tx_src_param(tx, &tx->insn.src[1]));
1745 return D3D_OK;
1746 }
1747
DECL_SPECIAL(CND)1748 DECL_SPECIAL(CND)
1749 {
1750 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1751 struct ureg_dst cgt;
1752 struct ureg_src cnd;
1753
1754 /* the coissue flag was a tip for compilers to advise to
1755 * execute two operations at the same time, in cases
1756 * the two executions had the same dst with different channels.
1757 * It has no effect on current hw. However it seems CND
1758 * is affected. The handling of this very specific case
1759 * handled below mimic wine behaviour */
1760 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1761 ureg_MOV(tx->ureg,
1762 dst, tx_src_param(tx, &tx->insn.src[1]));
1763 return D3D_OK;
1764 }
1765
1766 cnd = tx_src_param(tx, &tx->insn.src[0]);
1767 cgt = tx_scratch(tx);
1768
1769 if (tx->version.major == 1 && tx->version.minor < 4)
1770 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1771
1772 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1773
1774 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1775 tx_src_param(tx, &tx->insn.src[1]),
1776 tx_src_param(tx, &tx->insn.src[2]));
1777 return D3D_OK;
1778 }
1779
DECL_SPECIAL(CALL)1780 DECL_SPECIAL(CALL)
1781 {
1782 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1783 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1784 return D3D_OK;
1785 }
1786
DECL_SPECIAL(CALLNZ)1787 DECL_SPECIAL(CALLNZ)
1788 {
1789 struct ureg_program *ureg = tx->ureg;
1790 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1791
1792 if (!tx->native_integers)
1793 ureg_IF(ureg, src, tx_cond(tx));
1794 else
1795 ureg_UIF(ureg, src, tx_cond(tx));
1796 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1797 tx_endcond(tx);
1798 ureg_ENDIF(ureg);
1799 return D3D_OK;
1800 }
1801
DECL_SPECIAL(LOOP)1802 DECL_SPECIAL(LOOP)
1803 {
1804 struct ureg_program *ureg = tx->ureg;
1805 unsigned *label;
1806 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1807 struct ureg_dst ctr;
1808 struct ureg_dst aL;
1809 struct ureg_dst tmp;
1810 struct ureg_src ctrx;
1811
1812 label = tx_bgnloop(tx);
1813 ctr = tx_get_loopctr(tx, true);
1814 aL = tx_get_loopal(tx);
1815 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1816
1817 /* src: num_iterations*/
1818 ureg_MOV(ureg, ureg_writemask(ctr, NINED3DSP_WRITEMASK_0),
1819 ureg_scalar(src, TGSI_SWIZZLE_X));
1820 /* al: unused - start_value of al - step for al - unused */
1821 ureg_MOV(ureg, aL, src);
1822 ureg_BGNLOOP(tx->ureg, label);
1823 tmp = tx_scratch_scalar(tx);
1824 /* Initially ctr.x contains the number of iterations.
1825 * ctr.y will contain the updated value of al.
1826 * We decrease ctr.x at the end of every iteration,
1827 * and stop when it reaches 0. */
1828
1829 if (!tx->native_integers) {
1830 /* case src and ctr contain floats */
1831 /* to avoid precision issue, we stop when ctr <= 0.5 */
1832 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1833 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1834 } else {
1835 /* case src and ctr contain integers */
1836 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1837 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1838 }
1839 ureg_BRK(ureg);
1840 tx_endcond(tx);
1841 ureg_ENDIF(ureg);
1842 return D3D_OK;
1843 }
1844
DECL_SPECIAL(RET)1845 DECL_SPECIAL(RET)
1846 {
1847 /* RET as a last instruction could be safely ignored.
1848 * Remove it to prevent crashes/warnings in case underlying
1849 * driver doesn't implement arbitrary returns.
1850 */
1851 if (*(tx->parse_next) != NINED3DSP_END) {
1852 ureg_RET(tx->ureg);
1853 }
1854 return D3D_OK;
1855 }
1856
DECL_SPECIAL(ENDLOOP)1857 DECL_SPECIAL(ENDLOOP)
1858 {
1859 struct ureg_program *ureg = tx->ureg;
1860 struct ureg_dst ctr = tx_get_loopctr(tx, true);
1861 struct ureg_dst al = tx_get_loopal(tx);
1862 struct ureg_dst dst_ctrx, dst_al;
1863 struct ureg_src src_ctr, al_counter;
1864
1865 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1866 dst_al = ureg_writemask(al, NINED3DSP_WRITEMASK_1);
1867 src_ctr = ureg_src(ctr);
1868 al_counter = ureg_scalar(ureg_src(al), TGSI_SWIZZLE_Z);
1869
1870 /* ctr.x -= 1
1871 * al.y (aL) += step */
1872 if (!tx->native_integers) {
1873 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1874 ureg_ADD(ureg, dst_al, ureg_src(al), al_counter);
1875 } else {
1876 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1877 ureg_UADD(ureg, dst_al, ureg_src(al), al_counter);
1878 }
1879 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1880 return D3D_OK;
1881 }
1882
DECL_SPECIAL(LABEL)1883 DECL_SPECIAL(LABEL)
1884 {
1885 unsigned k = tx->num_inst_labels;
1886 unsigned n = tx->insn.src[0].idx;
1887 assert(n < 2048);
1888 if (n >= k)
1889 tx->inst_labels = REALLOC(tx->inst_labels,
1890 k * sizeof(tx->inst_labels[0]),
1891 n * sizeof(tx->inst_labels[0]));
1892
1893 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1894 return D3D_OK;
1895 }
1896
DECL_SPECIAL(SINCOS)1897 DECL_SPECIAL(SINCOS)
1898 {
1899 struct ureg_program *ureg = tx->ureg;
1900 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1901 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1902 struct ureg_dst tmp = tx_scratch_scalar(tx);
1903
1904 assert(!(dst.WriteMask & 0xc));
1905
1906 /* Copying to a temporary register avoids src/dst aliasing.
1907 * src is supposed to have replicated swizzle. */
1908 ureg_MOV(ureg, tmp, src);
1909
1910 /* z undefined, w untouched */
1911 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1912 tx_src_scalar(tmp));
1913 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1914 tx_src_scalar(tmp));
1915 return D3D_OK;
1916 }
1917
DECL_SPECIAL(SGN)1918 DECL_SPECIAL(SGN)
1919 {
1920 ureg_SSG(tx->ureg,
1921 tx_dst_param(tx, &tx->insn.dst[0]),
1922 tx_src_param(tx, &tx->insn.src[0]));
1923 return D3D_OK;
1924 }
1925
DECL_SPECIAL(REP)1926 DECL_SPECIAL(REP)
1927 {
1928 struct ureg_program *ureg = tx->ureg;
1929 unsigned *label;
1930 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1931 struct ureg_dst ctr;
1932 struct ureg_dst tmp;
1933 struct ureg_src ctrx;
1934
1935 label = tx_bgnloop(tx);
1936 ctr = ureg_writemask(tx_get_loopctr(tx, false), NINED3DSP_WRITEMASK_0);
1937 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1938
1939 /* NOTE: rep must be constant, so we don't have to save the count */
1940 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1941
1942 /* rep: num_iterations - 0 - 0 - 0 */
1943 ureg_MOV(ureg, ctr, rep);
1944 ureg_BGNLOOP(ureg, label);
1945 tmp = tx_scratch_scalar(tx);
1946 /* Initially ctr.x contains the number of iterations.
1947 * We decrease ctr.x at the end of every iteration,
1948 * and stop when it reaches 0. */
1949
1950 if (!tx->native_integers) {
1951 /* case src and ctr contain floats */
1952 /* to avoid precision issue, we stop when ctr <= 0.5 */
1953 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1954 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1955 } else {
1956 /* case src and ctr contain integers */
1957 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1958 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1959 }
1960 ureg_BRK(ureg);
1961 tx_endcond(tx);
1962 ureg_ENDIF(ureg);
1963
1964 return D3D_OK;
1965 }
1966
DECL_SPECIAL(ENDREP)1967 DECL_SPECIAL(ENDREP)
1968 {
1969 struct ureg_program *ureg = tx->ureg;
1970 struct ureg_dst ctr = tx_get_loopctr(tx, false);
1971 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1972 struct ureg_src src_ctr = ureg_src(ctr);
1973
1974 /* ctr.x -= 1 */
1975 if (!tx->native_integers)
1976 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1977 else
1978 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1979
1980 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1981 return D3D_OK;
1982 }
1983
DECL_SPECIAL(ENDIF)1984 DECL_SPECIAL(ENDIF)
1985 {
1986 tx_endcond(tx);
1987 ureg_ENDIF(tx->ureg);
1988 return D3D_OK;
1989 }
1990
DECL_SPECIAL(IF)1991 DECL_SPECIAL(IF)
1992 {
1993 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1994
1995 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1996 ureg_UIF(tx->ureg, src, tx_cond(tx));
1997 else
1998 ureg_IF(tx->ureg, src, tx_cond(tx));
1999
2000 return D3D_OK;
2001 }
2002
2003 static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)2004 sm1_insn_flags_to_tgsi_setop(BYTE flags)
2005 {
2006 switch (flags) {
2007 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
2008 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
2009 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
2010 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
2011 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
2012 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
2013 default:
2014 assert(!"invalid comparison flags");
2015 return TGSI_OPCODE_SGT;
2016 }
2017 }
2018
DECL_SPECIAL(IFC)2019 DECL_SPECIAL(IFC)
2020 {
2021 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2022 struct ureg_src src[2];
2023 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2024 src[0] = tx_src_param(tx, &tx->insn.src[0]);
2025 src[1] = tx_src_param(tx, &tx->insn.src[1]);
2026 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2027 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2028 return D3D_OK;
2029 }
2030
DECL_SPECIAL(ELSE)2031 DECL_SPECIAL(ELSE)
2032 {
2033 ureg_ELSE(tx->ureg, tx_elsecond(tx));
2034 return D3D_OK;
2035 }
2036
DECL_SPECIAL(BREAKC)2037 DECL_SPECIAL(BREAKC)
2038 {
2039 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2040 struct ureg_src src[2];
2041 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2042 src[0] = tx_src_param(tx, &tx->insn.src[0]);
2043 src[1] = tx_src_param(tx, &tx->insn.src[1]);
2044 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2045 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2046 ureg_BRK(tx->ureg);
2047 tx_endcond(tx);
2048 ureg_ENDIF(tx->ureg);
2049 return D3D_OK;
2050 }
2051
2052 static const char *sm1_declusage_names[] =
2053 {
2054 [D3DDECLUSAGE_POSITION] = "POSITION",
2055 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
2056 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
2057 [D3DDECLUSAGE_NORMAL] = "NORMAL",
2058 [D3DDECLUSAGE_PSIZE] = "PSIZE",
2059 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
2060 [D3DDECLUSAGE_TANGENT] = "TANGENT",
2061 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
2062 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
2063 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
2064 [D3DDECLUSAGE_COLOR] = "COLOR",
2065 [D3DDECLUSAGE_FOG] = "FOG",
2066 [D3DDECLUSAGE_DEPTH] = "DEPTH",
2067 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
2068 };
2069
2070 static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic * dcl)2071 sm1_to_nine_declusage(struct sm1_semantic *dcl)
2072 {
2073 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
2074 }
2075
2076 static void
sm1_declusage_to_tgsi(struct tgsi_declaration_semantic * sem,bool tc,struct sm1_semantic * dcl)2077 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
2078 bool tc,
2079 struct sm1_semantic *dcl)
2080 {
2081 BYTE index = dcl->usage_idx;
2082
2083 /* For everything that is not matching to a TGSI_SEMANTIC_****,
2084 * we match to a TGSI_SEMANTIC_GENERIC with index.
2085 *
2086 * The index can be anything UINT16 and usage_idx is BYTE,
2087 * so we can fit everything. It doesn't matter if indices
2088 * are close together or low.
2089 *
2090 *
2091 * POSITION >= 1: 10 * index + 7
2092 * COLOR >= 2: 10 * (index-1) + 8
2093 * FOG: 16
2094 * TEXCOORD[0..15]: index
2095 * BLENDWEIGHT: 10 * index + 19
2096 * BLENDINDICES: 10 * index + 20
2097 * NORMAL: 10 * index + 21
2098 * TANGENT: 10 * index + 22
2099 * BINORMAL: 10 * index + 23
2100 * TESSFACTOR: 10 * index + 24
2101 */
2102
2103 switch (dcl->usage) {
2104 case D3DDECLUSAGE_POSITION:
2105 case D3DDECLUSAGE_POSITIONT:
2106 case D3DDECLUSAGE_DEPTH:
2107 if (index == 0) {
2108 sem->Name = TGSI_SEMANTIC_POSITION;
2109 sem->Index = 0;
2110 } else {
2111 sem->Name = TGSI_SEMANTIC_GENERIC;
2112 sem->Index = 10 * index + 7;
2113 }
2114 break;
2115 case D3DDECLUSAGE_COLOR:
2116 if (index < 2) {
2117 sem->Name = TGSI_SEMANTIC_COLOR;
2118 sem->Index = index;
2119 } else {
2120 sem->Name = TGSI_SEMANTIC_GENERIC;
2121 sem->Index = 10 * (index-1) + 8;
2122 }
2123 break;
2124 case D3DDECLUSAGE_FOG:
2125 assert(index == 0);
2126 sem->Name = TGSI_SEMANTIC_GENERIC;
2127 sem->Index = 16;
2128 break;
2129 case D3DDECLUSAGE_PSIZE:
2130 assert(index == 0);
2131 sem->Name = TGSI_SEMANTIC_PSIZE;
2132 sem->Index = 0;
2133 break;
2134 case D3DDECLUSAGE_TEXCOORD:
2135 assert(index < 16);
2136 if (index < 8 && tc)
2137 sem->Name = TGSI_SEMANTIC_TEXCOORD;
2138 else
2139 sem->Name = TGSI_SEMANTIC_GENERIC;
2140 sem->Index = index;
2141 break;
2142 case D3DDECLUSAGE_BLENDWEIGHT:
2143 sem->Name = TGSI_SEMANTIC_GENERIC;
2144 sem->Index = 10 * index + 19;
2145 break;
2146 case D3DDECLUSAGE_BLENDINDICES:
2147 sem->Name = TGSI_SEMANTIC_GENERIC;
2148 sem->Index = 10 * index + 20;
2149 break;
2150 case D3DDECLUSAGE_NORMAL:
2151 sem->Name = TGSI_SEMANTIC_GENERIC;
2152 sem->Index = 10 * index + 21;
2153 break;
2154 case D3DDECLUSAGE_TANGENT:
2155 sem->Name = TGSI_SEMANTIC_GENERIC;
2156 sem->Index = 10 * index + 22;
2157 break;
2158 case D3DDECLUSAGE_BINORMAL:
2159 sem->Name = TGSI_SEMANTIC_GENERIC;
2160 sem->Index = 10 * index + 23;
2161 break;
2162 case D3DDECLUSAGE_TESSFACTOR:
2163 sem->Name = TGSI_SEMANTIC_GENERIC;
2164 sem->Index = 10 * index + 24;
2165 break;
2166 case D3DDECLUSAGE_SAMPLE:
2167 sem->Name = TGSI_SEMANTIC_COUNT;
2168 sem->Index = 0;
2169 break;
2170 default:
2171 unreachable("Invalid DECLUSAGE.");
2172 break;
2173 }
2174 }
2175
2176 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2177 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2178 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2179 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2180 static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)2181 d3dstt_to_tgsi_tex(BYTE sampler_type)
2182 {
2183 switch (sampler_type) {
2184 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
2185 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
2186 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2187 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
2188 default:
2189 assert(0);
2190 return TGSI_TEXTURE_UNKNOWN;
2191 }
2192 }
2193 static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)2194 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2195 {
2196 switch (sampler_type) {
2197 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2198 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2199 case NINED3DSTT_VOLUME:
2200 case NINED3DSTT_CUBE:
2201 default:
2202 assert(0);
2203 return TGSI_TEXTURE_UNKNOWN;
2204 }
2205 }
2206 static inline unsigned
ps1x_sampler_type(const struct nine_shader_info * info,unsigned stage)2207 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2208 {
2209 bool shadow = !!(info->sampler_mask_shadow & (1 << stage));
2210 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2211 case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D;
2212 case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D;
2213 case 3: return TGSI_TEXTURE_3D;
2214 default:
2215 return TGSI_TEXTURE_CUBE;
2216 }
2217 }
2218
2219 static const char *
sm1_sampler_type_name(BYTE sampler_type)2220 sm1_sampler_type_name(BYTE sampler_type)
2221 {
2222 switch (sampler_type) {
2223 case NINED3DSTT_1D: return "1D";
2224 case NINED3DSTT_2D: return "2D";
2225 case NINED3DSTT_VOLUME: return "VOLUME";
2226 case NINED3DSTT_CUBE: return "CUBE";
2227 default:
2228 return "(D3DSTT_?)";
2229 }
2230 }
2231
2232 static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic * sem)2233 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2234 {
2235 switch (sem->Name) {
2236 case TGSI_SEMANTIC_POSITION:
2237 case TGSI_SEMANTIC_NORMAL:
2238 return TGSI_INTERPOLATE_LINEAR;
2239 case TGSI_SEMANTIC_BCOLOR:
2240 case TGSI_SEMANTIC_COLOR:
2241 return TGSI_INTERPOLATE_COLOR;
2242 case TGSI_SEMANTIC_FOG:
2243 case TGSI_SEMANTIC_GENERIC:
2244 case TGSI_SEMANTIC_TEXCOORD:
2245 case TGSI_SEMANTIC_CLIPDIST:
2246 case TGSI_SEMANTIC_CLIPVERTEX:
2247 return TGSI_INTERPOLATE_PERSPECTIVE;
2248 case TGSI_SEMANTIC_EDGEFLAG:
2249 case TGSI_SEMANTIC_FACE:
2250 case TGSI_SEMANTIC_INSTANCEID:
2251 case TGSI_SEMANTIC_PCOORD:
2252 case TGSI_SEMANTIC_PRIMID:
2253 case TGSI_SEMANTIC_PSIZE:
2254 case TGSI_SEMANTIC_VERTEXID:
2255 return TGSI_INTERPOLATE_CONSTANT;
2256 default:
2257 assert(0);
2258 return TGSI_INTERPOLATE_CONSTANT;
2259 }
2260 }
2261
DECL_SPECIAL(DCL)2262 DECL_SPECIAL(DCL)
2263 {
2264 struct ureg_program *ureg = tx->ureg;
2265 bool is_input;
2266 bool is_sampler;
2267 struct tgsi_declaration_semantic tgsi;
2268 struct sm1_semantic sem;
2269 sm1_read_semantic(tx, &sem);
2270
2271 is_input = sem.reg.file == D3DSPR_INPUT;
2272 is_sampler =
2273 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2274
2275 DUMP("DCL ");
2276 sm1_dump_dst_param(&sem.reg);
2277 if (is_sampler)
2278 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2279 else
2280 if (tx->version.major >= 3)
2281 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2282 else
2283 if (sem.usage | sem.usage_idx)
2284 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2285 else
2286 DUMP("\n");
2287
2288 if (is_sampler) {
2289 const unsigned m = 1 << sem.reg.idx;
2290 ureg_DECL_sampler(ureg, sem.reg.idx);
2291 tx->info->sampler_mask |= m;
2292 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2293 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2294 d3dstt_to_tgsi_tex(sem.sampler_type);
2295 return D3D_OK;
2296 }
2297
2298 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2299 if (IS_VS) {
2300 if (is_input) {
2301 /* linkage outside of shader with vertex declaration */
2302 ureg_DECL_vs_input(ureg, sem.reg.idx);
2303 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2304 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2305 tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2306 /* NOTE: preserving order in case of indirect access */
2307 } else
2308 if (tx->version.major >= 3) {
2309 /* SM2 output semantic determined by file */
2310 assert(sem.reg.mask != 0);
2311 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2312 tx->info->position_t = true;
2313 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2314 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2315 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2316 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2317 nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2318 if ((tx->info->process_vertices || tx->info->clip_plane_emulation > 0) &&
2319 sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2320 tx->regs.oPos_out = tx->regs.o[sem.reg.idx]; /* TODO: probably not good declare it twice */
2321 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2322 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2323 }
2324
2325 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2326 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2327 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2328 }
2329 }
2330 } else {
2331 if (is_input && tx->version.major >= 3) {
2332 unsigned interp_flag;
2333 unsigned interp_location = 0;
2334 /* SM3 only, SM2 input semantic determined by file */
2335 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2336 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2337 /* PositionT and tessfactor forbidden */
2338 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2339 return D3DERR_INVALIDCALL;
2340
2341 if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2342 /* Position0 is forbidden (likely because vPos already does that) */
2343 if (sem.usage == D3DDECLUSAGE_POSITION)
2344 return D3DERR_INVALIDCALL;
2345 /* Following code is for depth */
2346 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2347 return D3D_OK;
2348 }
2349
2350 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2351 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2352 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2353 interp_flag = nine_tgsi_to_interp_mode(&tgsi);
2354 /* We replace TGSI_INTERPOLATE_COLOR because some drivers don't support it,
2355 * and those who support it do the same replacement we do */
2356 if (interp_flag == TGSI_INTERPOLATE_COLOR)
2357 interp_flag = tx->info->color_flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2358
2359 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_centroid(
2360 ureg, tgsi.Name, tgsi.Index,
2361 interp_flag,
2362 interp_location, 0, 1);
2363 } else
2364 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2365 /* FragColor or FragDepth */
2366 assert(sem.reg.mask != 0);
2367 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2368 0, 1);
2369 }
2370 }
2371 return D3D_OK;
2372 }
2373
DECL_SPECIAL(DEF)2374 DECL_SPECIAL(DEF)
2375 {
2376 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2377 return D3D_OK;
2378 }
2379
DECL_SPECIAL(DEFB)2380 DECL_SPECIAL(DEFB)
2381 {
2382 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2383 return D3D_OK;
2384 }
2385
DECL_SPECIAL(DEFI)2386 DECL_SPECIAL(DEFI)
2387 {
2388 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2389 return D3D_OK;
2390 }
2391
DECL_SPECIAL(POW)2392 DECL_SPECIAL(POW)
2393 {
2394 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2395 struct ureg_src src[2] = {
2396 tx_src_param(tx, &tx->insn.src[0]),
2397 tx_src_param(tx, &tx->insn.src[1])
2398 };
2399 /* Anything^0 is 1, including 0^0.
2400 * Assume mul_zero_wins drivers already have
2401 * this behaviour. Emulate for the others. */
2402 if (tx->mul_zero_wins) {
2403 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2404 } else {
2405 struct ureg_dst tmp = tx_scratch_scalar(tx);
2406 ureg_POW(tx->ureg, tmp, ureg_abs(src[0]), src[1]);
2407 ureg_CMP(tx->ureg, dst,
2408 ureg_negate(ureg_abs(ureg_scalar(src[1], TGSI_SWIZZLE_X))),
2409 tx_src_scalar(tmp), ureg_imm1f(tx->ureg, 1.0f));
2410 }
2411 return D3D_OK;
2412 }
2413
2414 /* Tests results on Win 10:
2415 * NV (NVIDIA GeForce GT 635M)
2416 * AMD (AMD Radeon HD 7730M)
2417 * INTEL (Intel(R) HD Graphics 4000)
2418 * PS2 and PS3:
2419 * RCP and RSQ can generate inf on NV and AMD.
2420 * RCP and RSQ are clamped on INTEL (+- FLT_MAX),
2421 * NV: log not clamped
2422 * AMD: log(0) is -FLT_MAX (but log(inf) is inf)
2423 * INTEL: log(0) is -FLT_MAX and log(inf) is 127
2424 * All devices have 0*anything = 0
2425 *
2426 * INTEL VS2 and VS3: same behaviour.
2427 * Some differences VS2 and VS3 for constants defined with inf/NaN.
2428 * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
2429 * VS2 seems to clamp to zero (may be test failure).
2430 * AMD VS2: unknown, VS3: very likely behaviour of PS3
2431 * NV VS2 and VS3: very likely behaviour of PS3
2432 * For both, Inf in VS becomes NaN is PS
2433 * "Very likely" because the test was less extensive.
2434 *
2435 * Thus all clamping can be removed for shaders 2 and 3,
2436 * as long as 0*anything = 0.
2437 * Else clamps to enforce 0*anything = 0 (anything being then
2438 * neither inf or NaN, the user being unlikely to pass them
2439 * as constant).
2440 * The status for VS1 and PS1 is unknown.
2441 */
2442
DECL_SPECIAL(RCP)2443 DECL_SPECIAL(RCP)
2444 {
2445 struct ureg_program *ureg = tx->ureg;
2446 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2447 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2448 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2449 ureg_RCP(ureg, tmp, src);
2450 if (!tx->mul_zero_wins) {
2451 /* FLT_MAX has issues with Rayman */
2452 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp));
2453 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp));
2454 }
2455 return D3D_OK;
2456 }
2457
DECL_SPECIAL(RSQ)2458 DECL_SPECIAL(RSQ)
2459 {
2460 struct ureg_program *ureg = tx->ureg;
2461 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2462 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2463 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2464 ureg_RSQ(ureg, tmp, ureg_abs(src));
2465 if (!tx->mul_zero_wins)
2466 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2467 return D3D_OK;
2468 }
2469
DECL_SPECIAL(LOG)2470 DECL_SPECIAL(LOG)
2471 {
2472 struct ureg_program *ureg = tx->ureg;
2473 struct ureg_dst tmp = tx_scratch_scalar(tx);
2474 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2475 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2476 ureg_LG2(ureg, tmp, ureg_abs(src));
2477 if (tx->mul_zero_wins) {
2478 ureg_MOV(ureg, dst, tx_src_scalar(tmp));
2479 } else {
2480 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2481 }
2482 return D3D_OK;
2483 }
2484
DECL_SPECIAL(LIT)2485 DECL_SPECIAL(LIT)
2486 {
2487 struct ureg_program *ureg = tx->ureg;
2488 struct ureg_dst tmp = tx_scratch(tx);
2489 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2490 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2491 ureg_LIT(ureg, tmp, src);
2492 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2493 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2494 * it 0^0 if src.w=0, which value is driver dependent. */
2495 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2496 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2497 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2498 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2499 return D3D_OK;
2500 }
2501
DECL_SPECIAL(NRM)2502 DECL_SPECIAL(NRM)
2503 {
2504 struct ureg_program *ureg = tx->ureg;
2505 struct ureg_dst tmp = tx_scratch_scalar(tx);
2506 struct ureg_src nrm = tx_src_scalar(tmp);
2507 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2508 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2509 ureg_DP3(ureg, tmp, src, src);
2510 ureg_RSQ(ureg, tmp, nrm);
2511 if (!tx->mul_zero_wins)
2512 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2513 ureg_MUL(ureg, dst, src, nrm);
2514 return D3D_OK;
2515 }
2516
DECL_SPECIAL(DP2ADD)2517 DECL_SPECIAL(DP2ADD)
2518 {
2519 struct ureg_dst tmp = tx_scratch_scalar(tx);
2520 struct ureg_src dp2 = tx_src_scalar(tmp);
2521 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2522 struct ureg_src src[3];
2523 int i;
2524 for (i = 0; i < 3; ++i)
2525 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2526 assert_replicate_swizzle(&src[2]);
2527
2528 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2529 ureg_ADD(tx->ureg, dst, src[2], dp2);
2530
2531 return D3D_OK;
2532 }
2533
DECL_SPECIAL(TEXCOORD)2534 DECL_SPECIAL(TEXCOORD)
2535 {
2536 struct ureg_program *ureg = tx->ureg;
2537 const unsigned s = tx->insn.dst[0].idx;
2538 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2539
2540 tx_texcoord_alloc(tx, s);
2541 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2542 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2543
2544 return D3D_OK;
2545 }
2546
DECL_SPECIAL(TEXCOORD_ps14)2547 DECL_SPECIAL(TEXCOORD_ps14)
2548 {
2549 struct ureg_program *ureg = tx->ureg;
2550 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2551 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2552
2553 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2554
2555 ureg_MOV(ureg, dst, src);
2556
2557 return D3D_OK;
2558 }
2559
DECL_SPECIAL(TEXKILL)2560 DECL_SPECIAL(TEXKILL)
2561 {
2562 struct ureg_src reg;
2563
2564 if (tx->version.major > 1 || tx->version.minor > 3) {
2565 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2566 } else {
2567 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2568 reg = tx->regs.vT[tx->insn.dst[0].idx];
2569 }
2570 if (tx->version.major < 2)
2571 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2572 ureg_KILL_IF(tx->ureg, reg);
2573
2574 return D3D_OK;
2575 }
2576
DECL_SPECIAL(TEXBEM)2577 DECL_SPECIAL(TEXBEM)
2578 {
2579 struct ureg_program *ureg = tx->ureg;
2580 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2581 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2582 struct ureg_dst tmp, tmp2, texcoord;
2583 struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2;
2584 struct ureg_src bumpenvlscale, bumpenvloffset;
2585 const int m = tx->insn.dst[0].idx;
2586
2587 assert(tx->version.major == 1);
2588
2589 sample = ureg_DECL_sampler(ureg, m);
2590 tx->info->sampler_mask |= 1 << m;
2591
2592 tx_texcoord_alloc(tx, m);
2593
2594 tmp = tx_scratch(tx);
2595 tmp2 = tx_scratch(tx);
2596 texcoord = tx_scratch(tx);
2597 /*
2598 * Bump-env-matrix:
2599 * 00 is X
2600 * 01 is Y
2601 * 10 is Z
2602 * 11 is W
2603 */
2604 c8m = nine_special_constant_src(tx, m);
2605 c16m2 = nine_special_constant_src(tx, 8+m/2);
2606
2607 m00 = NINE_APPLY_SWIZZLE(c8m, X);
2608 m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2609 m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2610 m11 = NINE_APPLY_SWIZZLE(c8m, W);
2611
2612 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2613 if (m % 2 == 0) {
2614 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X);
2615 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y);
2616 } else {
2617 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z);
2618 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W);
2619 }
2620
2621 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2622
2623 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2624 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2625 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2626 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2627 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2628 NINE_APPLY_SWIZZLE(src, Y),
2629 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2630
2631 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2632 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2633 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2634 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2635 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2636 NINE_APPLY_SWIZZLE(src, Y),
2637 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2638
2639 /* Now the texture coordinates are in tmp.xy */
2640
2641 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2642 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2643 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2644 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2645 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2646 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z),
2647 bumpenvlscale, bumpenvloffset);
2648 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2649 }
2650
2651 tx->info->bumpenvmat_needed = 1;
2652
2653 return D3D_OK;
2654 }
2655
DECL_SPECIAL(TEXREG2AR)2656 DECL_SPECIAL(TEXREG2AR)
2657 {
2658 struct ureg_program *ureg = tx->ureg;
2659 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2660 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2661 struct ureg_src sample;
2662 const int m = tx->insn.dst[0].idx;
2663 ASSERTED const int n = tx->insn.src[0].idx;
2664 assert(m >= 0 && m > n);
2665
2666 sample = ureg_DECL_sampler(ureg, m);
2667 tx->info->sampler_mask |= 1 << m;
2668 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample);
2669
2670 return D3D_OK;
2671 }
2672
DECL_SPECIAL(TEXREG2GB)2673 DECL_SPECIAL(TEXREG2GB)
2674 {
2675 struct ureg_program *ureg = tx->ureg;
2676 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2677 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2678 struct ureg_src sample;
2679 const int m = tx->insn.dst[0].idx;
2680 ASSERTED const int n = tx->insn.src[0].idx;
2681 assert(m >= 0 && m > n);
2682
2683 sample = ureg_DECL_sampler(ureg, m);
2684 tx->info->sampler_mask |= 1 << m;
2685 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2686
2687 return D3D_OK;
2688 }
2689
DECL_SPECIAL(TEXM3x2PAD)2690 DECL_SPECIAL(TEXM3x2PAD)
2691 {
2692 return D3D_OK; /* this is just padding */
2693 }
2694
DECL_SPECIAL(TEXM3x2TEX)2695 DECL_SPECIAL(TEXM3x2TEX)
2696 {
2697 struct ureg_program *ureg = tx->ureg;
2698 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2699 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2700 struct ureg_src sample;
2701 const int m = tx->insn.dst[0].idx - 1;
2702 ASSERTED const int n = tx->insn.src[0].idx;
2703 assert(m >= 0 && m > n);
2704
2705 tx_texcoord_alloc(tx, m);
2706 tx_texcoord_alloc(tx, m+1);
2707
2708 /* performs the matrix multiplication */
2709 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2710 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2711
2712 sample = ureg_DECL_sampler(ureg, m + 1);
2713 tx->info->sampler_mask |= 1 << (m + 1);
2714 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2715
2716 return D3D_OK;
2717 }
2718
DECL_SPECIAL(TEXM3x3PAD)2719 DECL_SPECIAL(TEXM3x3PAD)
2720 {
2721 return D3D_OK; /* this is just padding */
2722 }
2723
DECL_SPECIAL(TEXM3x3SPEC)2724 DECL_SPECIAL(TEXM3x3SPEC)
2725 {
2726 struct ureg_program *ureg = tx->ureg;
2727 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2728 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2729 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2730 struct ureg_src sample;
2731 struct ureg_dst tmp;
2732 const int m = tx->insn.dst[0].idx - 2;
2733 ASSERTED const int n = tx->insn.src[0].idx;
2734 assert(m >= 0 && m > n);
2735
2736 tx_texcoord_alloc(tx, m);
2737 tx_texcoord_alloc(tx, m+1);
2738 tx_texcoord_alloc(tx, m+2);
2739
2740 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2741 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2742 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2743
2744 sample = ureg_DECL_sampler(ureg, m + 2);
2745 tx->info->sampler_mask |= 1 << (m + 2);
2746 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2747
2748 /* At this step, dst = N = (u', w', z').
2749 * We want dst to be the texture sampled at (u'', w'', z''), with
2750 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2751 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2752 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2753 /* at this step tmp.x = 1/N.N */
2754 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2755 /* at this step tmp.y = N.E */
2756 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2757 /* at this step tmp.x = N.E/N.N */
2758 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2759 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2760 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2761 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2762 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2763
2764 return D3D_OK;
2765 }
2766
DECL_SPECIAL(TEXREG2RGB)2767 DECL_SPECIAL(TEXREG2RGB)
2768 {
2769 struct ureg_program *ureg = tx->ureg;
2770 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2771 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2772 struct ureg_src sample;
2773 const int m = tx->insn.dst[0].idx;
2774 ASSERTED const int n = tx->insn.src[0].idx;
2775 assert(m >= 0 && m > n);
2776
2777 sample = ureg_DECL_sampler(ureg, m);
2778 tx->info->sampler_mask |= 1 << m;
2779 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample);
2780
2781 return D3D_OK;
2782 }
2783
DECL_SPECIAL(TEXDP3TEX)2784 DECL_SPECIAL(TEXDP3TEX)
2785 {
2786 struct ureg_program *ureg = tx->ureg;
2787 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2788 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2789 struct ureg_dst tmp;
2790 struct ureg_src sample;
2791 const int m = tx->insn.dst[0].idx;
2792 ASSERTED const int n = tx->insn.src[0].idx;
2793 assert(m >= 0 && m > n);
2794
2795 tx_texcoord_alloc(tx, m);
2796
2797 tmp = tx_scratch(tx);
2798 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2799 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2800
2801 sample = ureg_DECL_sampler(ureg, m);
2802 tx->info->sampler_mask |= 1 << m;
2803 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2804
2805 return D3D_OK;
2806 }
2807
DECL_SPECIAL(TEXM3x2DEPTH)2808 DECL_SPECIAL(TEXM3x2DEPTH)
2809 {
2810 struct ureg_program *ureg = tx->ureg;
2811 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2812 struct ureg_dst tmp;
2813 const int m = tx->insn.dst[0].idx - 1;
2814 ASSERTED const int n = tx->insn.src[0].idx;
2815 assert(m >= 0 && m > n);
2816
2817 tx_texcoord_alloc(tx, m);
2818 tx_texcoord_alloc(tx, m+1);
2819
2820 tmp = tx_scratch(tx);
2821
2822 /* performs the matrix multiplication */
2823 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2824 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2825
2826 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2827 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2828 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2829 /* res = 'w' == 0 ? 1.0 : z/w */
2830 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2831 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2832 /* replace the depth for depth testing with the result */
2833 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2834 TGSI_WRITEMASK_Z, 0, 1);
2835 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2836 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2837 return D3D_OK;
2838 }
2839
DECL_SPECIAL(TEXDP3)2840 DECL_SPECIAL(TEXDP3)
2841 {
2842 struct ureg_program *ureg = tx->ureg;
2843 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2844 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2845 const int m = tx->insn.dst[0].idx;
2846 ASSERTED const int n = tx->insn.src[0].idx;
2847 assert(m >= 0 && m > n);
2848
2849 tx_texcoord_alloc(tx, m);
2850
2851 ureg_DP3(ureg, dst, tx->regs.vT[m], src);
2852
2853 return D3D_OK;
2854 }
2855
DECL_SPECIAL(TEXM3x3)2856 DECL_SPECIAL(TEXM3x3)
2857 {
2858 struct ureg_program *ureg = tx->ureg;
2859 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2860 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2861 struct ureg_src sample;
2862 struct ureg_dst E, tmp;
2863 const int m = tx->insn.dst[0].idx - 2;
2864 ASSERTED const int n = tx->insn.src[0].idx;
2865 assert(m >= 0 && m > n);
2866
2867 tx_texcoord_alloc(tx, m);
2868 tx_texcoord_alloc(tx, m+1);
2869 tx_texcoord_alloc(tx, m+2);
2870
2871 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2872 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2873 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2874
2875 switch (tx->insn.opcode) {
2876 case D3DSIO_TEXM3x3:
2877 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2878 break;
2879 case D3DSIO_TEXM3x3TEX:
2880 sample = ureg_DECL_sampler(ureg, m + 2);
2881 tx->info->sampler_mask |= 1 << (m + 2);
2882 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2883 break;
2884 case D3DSIO_TEXM3x3VSPEC:
2885 sample = ureg_DECL_sampler(ureg, m + 2);
2886 tx->info->sampler_mask |= 1 << (m + 2);
2887 E = tx_scratch(tx);
2888 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2889 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2890 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2891 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2892 /* At this step, dst = N = (u', w', z').
2893 * We want dst to be the texture sampled at (u'', w'', z''), with
2894 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2895 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2896 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2897 /* at this step tmp.x = 1/N.N */
2898 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2899 /* at this step tmp.y = N.E */
2900 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2901 /* at this step tmp.x = N.E/N.N */
2902 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2903 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2904 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2905 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2906 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2907 break;
2908 default:
2909 return D3DERR_INVALIDCALL;
2910 }
2911 return D3D_OK;
2912 }
2913
DECL_SPECIAL(TEXDEPTH)2914 DECL_SPECIAL(TEXDEPTH)
2915 {
2916 struct ureg_program *ureg = tx->ureg;
2917 struct ureg_dst r5;
2918 struct ureg_src r5r, r5g;
2919
2920 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2921
2922 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2923 * r5 won't be used afterward, thus we can use r5.ba */
2924 r5 = tx->regs.r[5];
2925 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2926 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2927
2928 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2929 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2930 /* r5.r = r/g */
2931 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2932 r5r, ureg_imm1f(ureg, 1.0f));
2933 /* replace the depth for depth testing with the result */
2934 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2935 TGSI_WRITEMASK_Z, 0, 1);
2936 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2937
2938 return D3D_OK;
2939 }
2940
DECL_SPECIAL(BEM)2941 DECL_SPECIAL(BEM)
2942 {
2943 struct ureg_program *ureg = tx->ureg;
2944 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2945 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2946 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2947 struct ureg_src m00, m01, m10, m11, c8m;
2948 const int m = tx->insn.dst[0].idx;
2949 struct ureg_dst tmp = tx_scratch(tx);
2950 /*
2951 * Bump-env-matrix:
2952 * 00 is X
2953 * 01 is Y
2954 * 10 is Z
2955 * 11 is W
2956 */
2957 c8m = nine_special_constant_src(tx, m);
2958 m00 = NINE_APPLY_SWIZZLE(c8m, X);
2959 m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2960 m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2961 m11 = NINE_APPLY_SWIZZLE(c8m, W);
2962 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2963 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2964 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2965 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2966 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2967 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2968
2969 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2970 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2971 NINE_APPLY_SWIZZLE(src1, X), src0);
2972 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2973 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2974 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2975 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2976
2977 tx->info->bumpenvmat_needed = 1;
2978
2979 return D3D_OK;
2980 }
2981
DECL_SPECIAL(TEXLD)2982 DECL_SPECIAL(TEXLD)
2983 {
2984 struct ureg_program *ureg = tx->ureg;
2985 unsigned target;
2986 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2987 struct ureg_src src[2] = {
2988 tx_src_param(tx, &tx->insn.src[0]),
2989 tx_src_param(tx, &tx->insn.src[1])
2990 };
2991 assert(tx->insn.src[1].idx >= 0 &&
2992 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2993 target = tx->sampler_targets[tx->insn.src[1].idx];
2994
2995 if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
2996 return D3D_OK;
2997
2998 switch (tx->insn.flags) {
2999 case 0:
3000 ureg_TEX(ureg, dst, target, src[0], src[1]);
3001 break;
3002 case NINED3DSI_TEXLD_PROJECT:
3003 ureg_TXP(ureg, dst, target, src[0], src[1]);
3004 break;
3005 case NINED3DSI_TEXLD_BIAS:
3006 ureg_TXB(ureg, dst, target, src[0], src[1]);
3007 break;
3008 default:
3009 assert(0);
3010 return D3DERR_INVALIDCALL;
3011 }
3012 return D3D_OK;
3013 }
3014
DECL_SPECIAL(TEXLD_14)3015 DECL_SPECIAL(TEXLD_14)
3016 {
3017 struct ureg_program *ureg = tx->ureg;
3018 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3019 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3020 const unsigned s = tx->insn.dst[0].idx;
3021 const unsigned t = ps1x_sampler_type(tx->info, s);
3022
3023 tx->info->sampler_mask |= 1 << s;
3024 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
3025
3026 return D3D_OK;
3027 }
3028
DECL_SPECIAL(TEX)3029 DECL_SPECIAL(TEX)
3030 {
3031 struct ureg_program *ureg = tx->ureg;
3032 const unsigned s = tx->insn.dst[0].idx;
3033 const unsigned t = ps1x_sampler_type(tx->info, s);
3034 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3035 struct ureg_src src[2];
3036
3037 tx_texcoord_alloc(tx, s);
3038
3039 src[0] = tx->regs.vT[s];
3040 src[1] = ureg_DECL_sampler(ureg, s);
3041 tx->info->sampler_mask |= 1 << s;
3042
3043 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
3044
3045 return D3D_OK;
3046 }
3047
DECL_SPECIAL(TEXLDD)3048 DECL_SPECIAL(TEXLDD)
3049 {
3050 unsigned target;
3051 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3052 struct ureg_src src[4] = {
3053 tx_src_param(tx, &tx->insn.src[0]),
3054 tx_src_param(tx, &tx->insn.src[1]),
3055 tx_src_param(tx, &tx->insn.src[2]),
3056 tx_src_param(tx, &tx->insn.src[3])
3057 };
3058 assert(tx->insn.src[1].idx >= 0 &&
3059 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3060 target = tx->sampler_targets[tx->insn.src[1].idx];
3061
3062 if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3063 return D3D_OK;
3064
3065 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
3066 return D3D_OK;
3067 }
3068
DECL_SPECIAL(TEXLDL)3069 DECL_SPECIAL(TEXLDL)
3070 {
3071 unsigned target;
3072 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3073 struct ureg_src src[2] = {
3074 tx_src_param(tx, &tx->insn.src[0]),
3075 tx_src_param(tx, &tx->insn.src[1])
3076 };
3077 assert(tx->insn.src[1].idx >= 0 &&
3078 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3079 target = tx->sampler_targets[tx->insn.src[1].idx];
3080
3081 if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3082 return D3D_OK;
3083
3084 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
3085 return D3D_OK;
3086 }
3087
DECL_SPECIAL(SETP)3088 DECL_SPECIAL(SETP)
3089 {
3090 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
3091 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3092 struct ureg_src src[2] = {
3093 tx_src_param(tx, &tx->insn.src[0]),
3094 tx_src_param(tx, &tx->insn.src[1])
3095 };
3096 ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0);
3097 return D3D_OK;
3098 }
3099
DECL_SPECIAL(BREAKP)3100 DECL_SPECIAL(BREAKP)
3101 {
3102 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3103 ureg_IF(tx->ureg, src, tx_cond(tx));
3104 ureg_BRK(tx->ureg);
3105 tx_endcond(tx);
3106 ureg_ENDIF(tx->ureg);
3107 return D3D_OK;
3108 }
3109
DECL_SPECIAL(PHASE)3110 DECL_SPECIAL(PHASE)
3111 {
3112 return D3D_OK; /* we don't care about phase */
3113 }
3114
DECL_SPECIAL(COMMENT)3115 DECL_SPECIAL(COMMENT)
3116 {
3117 return D3D_OK; /* nothing to do */
3118 }
3119
3120
3121 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
3122 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
3123
3124 static const struct sm1_op_info inst_table[] =
3125 {
3126 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
3127 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
3128 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
3129 _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
3130 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
3131 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
3132 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
3133 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
3134 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
3135 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
3136 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
3137 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
3138 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
3139 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
3140 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
3141 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
3142 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
3143 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
3144 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
3145 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
3146
3147 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
3148 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
3149 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
3150 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
3151 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
3152
3153 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
3154 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
3155 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
3156 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
3157 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
3158 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
3159
3160 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
3161
3162 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
3163 _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
3164 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
3165 _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
3166 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
3167
3168 _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
3169 _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
3170
3171 /* More flow control */
3172 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
3173 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
3174 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
3175 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
3176 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
3177 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
3178 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
3179 _OPI(BREAKC, NOP, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
3180 /* we don't write to the address register, but a normal register (copied
3181 * when needed to the address register), thus we don't use ARR */
3182 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3183
3184 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
3185 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
3186
3187 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
3188 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
3189 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
3190 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
3191 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
3192 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
3193 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3194 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3195 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
3196 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
3197 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
3198 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
3199 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
3200 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3201 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
3202 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3203
3204 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
3205 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3206 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
3207 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
3208
3209 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
3210
3211 /* More tex stuff */
3212 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
3213 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
3214 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
3215 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
3216 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3217 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
3218
3219 /* Misc */
3220 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
3221 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
3222 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
3223 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3224 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3225 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
3226 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
3227 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
3228 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
3229 };
3230
3231 static const struct sm1_op_info inst_phase =
3232 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3233
3234 static const struct sm1_op_info inst_comment =
3235 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3236
3237 static void
create_op_info_map(struct shader_translator * tx)3238 create_op_info_map(struct shader_translator *tx)
3239 {
3240 const unsigned version = (tx->version.major << 8) | tx->version.minor;
3241 unsigned i;
3242
3243 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3244 tx->op_info_map[i] = -1;
3245
3246 if (tx->processor == PIPE_SHADER_VERTEX) {
3247 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3248 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3249 if (inst_table[i].vert_version.min <= version &&
3250 inst_table[i].vert_version.max >= version)
3251 tx->op_info_map[inst_table[i].sio] = i;
3252 }
3253 } else {
3254 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3255 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3256 if (inst_table[i].frag_version.min <= version &&
3257 inst_table[i].frag_version.max >= version)
3258 tx->op_info_map[inst_table[i].sio] = i;
3259 }
3260 }
3261 }
3262
3263 static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator * tx)3264 NineTranslateInstruction_Generic(struct shader_translator *tx)
3265 {
3266 struct ureg_dst dst[1];
3267 struct ureg_src src[4];
3268 unsigned i;
3269
3270 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3271 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3272 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3273 src[i] = tx_src_param(tx, &tx->insn.src[i]);
3274
3275 ureg_insn(tx->ureg, tx->insn.info->opcode,
3276 dst, tx->insn.ndst,
3277 src, tx->insn.nsrc, 0);
3278 return D3D_OK;
3279 }
3280
3281 static inline DWORD
TOKEN_PEEK(struct shader_translator * tx)3282 TOKEN_PEEK(struct shader_translator *tx)
3283 {
3284 return *(tx->parse);
3285 }
3286
3287 static inline DWORD
TOKEN_NEXT(struct shader_translator * tx)3288 TOKEN_NEXT(struct shader_translator *tx)
3289 {
3290 return *(tx->parse)++;
3291 }
3292
3293 static inline void
TOKEN_JUMP(struct shader_translator * tx)3294 TOKEN_JUMP(struct shader_translator *tx)
3295 {
3296 if (tx->parse_next && tx->parse != tx->parse_next) {
3297 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3298 tx->parse = tx->parse_next;
3299 }
3300 }
3301
3302 static inline bool
sm1_parse_eof(struct shader_translator * tx)3303 sm1_parse_eof(struct shader_translator *tx)
3304 {
3305 return TOKEN_PEEK(tx) == NINED3DSP_END;
3306 }
3307
3308 static void
sm1_read_version(struct shader_translator * tx)3309 sm1_read_version(struct shader_translator *tx)
3310 {
3311 const DWORD tok = TOKEN_NEXT(tx);
3312
3313 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3314 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3315
3316 switch (tok >> 16) {
3317 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3318 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3319 default:
3320 DBG("Invalid shader type: %x\n", tok);
3321 tx->processor = ~0;
3322 break;
3323 }
3324 }
3325
3326 /* This is just to check if we parsed the instruction properly. */
3327 static void
sm1_parse_get_skip(struct shader_translator * tx)3328 sm1_parse_get_skip(struct shader_translator *tx)
3329 {
3330 const DWORD tok = TOKEN_PEEK(tx);
3331
3332 if (tx->version.major >= 2) {
3333 tx->parse_next = tx->parse + 1 /* this */ +
3334 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3335 } else {
3336 tx->parse_next = NULL; /* TODO: determine from param count */
3337 }
3338 }
3339
3340 static void
sm1_print_comment(const char * comment,UINT size)3341 sm1_print_comment(const char *comment, UINT size)
3342 {
3343 if (!size)
3344 return;
3345 /* TODO */
3346 }
3347
3348 static void
sm1_parse_comments(struct shader_translator * tx,BOOL print)3349 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3350 {
3351 DWORD tok = TOKEN_PEEK(tx);
3352
3353 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3354 {
3355 const char *comment = "";
3356 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3357 tx->parse += size + 1;
3358
3359 if (print)
3360 sm1_print_comment(comment, size);
3361
3362 tok = TOKEN_PEEK(tx);
3363 }
3364 }
3365
3366 static void
sm1_parse_get_param(struct shader_translator * tx,DWORD * reg,DWORD * rel)3367 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3368 {
3369 *reg = TOKEN_NEXT(tx);
3370
3371 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3372 {
3373 if (tx->version.major < 2)
3374 *rel = (1 << 31) |
3375 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3376 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
3377 D3DSP_NOSWIZZLE;
3378 else
3379 *rel = TOKEN_NEXT(tx);
3380 }
3381 }
3382
3383 static void
sm1_parse_dst_param(struct sm1_dst_param * dst,DWORD tok)3384 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3385 {
3386 int8_t shift;
3387 dst->file =
3388 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3389 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3390 dst->type = TGSI_RETURN_TYPE_FLOAT;
3391 dst->idx = tok & D3DSP_REGNUM_MASK;
3392 dst->rel = NULL;
3393 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3394 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3395 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3396 dst->shift = (shift & 0x7) - (shift & 0x8);
3397 }
3398
3399 static void
sm1_parse_src_param(struct sm1_src_param * src,DWORD tok)3400 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3401 {
3402 src->file =
3403 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3404 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3405 src->type = TGSI_RETURN_TYPE_FLOAT;
3406 src->idx = tok & D3DSP_REGNUM_MASK;
3407 src->rel = NULL;
3408 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3409 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3410
3411 switch (src->file) {
3412 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3413 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3414 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3415 default:
3416 break;
3417 }
3418 }
3419
3420 static void
sm1_parse_immediate(struct shader_translator * tx,struct sm1_src_param * imm)3421 sm1_parse_immediate(struct shader_translator *tx,
3422 struct sm1_src_param *imm)
3423 {
3424 imm->file = NINED3DSPR_IMMEDIATE;
3425 imm->idx = INT_MIN;
3426 imm->rel = NULL;
3427 imm->swizzle = NINED3DSP_NOSWIZZLE;
3428 imm->mod = 0;
3429 switch (tx->insn.opcode) {
3430 case D3DSIO_DEF:
3431 imm->type = NINED3DSPTYPE_FLOAT4;
3432 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3433 tx->parse += 4;
3434 break;
3435 case D3DSIO_DEFI:
3436 imm->type = NINED3DSPTYPE_INT4;
3437 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3438 tx->parse += 4;
3439 break;
3440 case D3DSIO_DEFB:
3441 imm->type = NINED3DSPTYPE_BOOL;
3442 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3443 tx->parse += 1;
3444 break;
3445 default:
3446 assert(0);
3447 break;
3448 }
3449 }
3450
3451 static void
sm1_read_dst_param(struct shader_translator * tx,struct sm1_dst_param * dst,struct sm1_src_param * rel)3452 sm1_read_dst_param(struct shader_translator *tx,
3453 struct sm1_dst_param *dst,
3454 struct sm1_src_param *rel)
3455 {
3456 DWORD tok_dst, tok_rel = 0;
3457
3458 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3459 sm1_parse_dst_param(dst, tok_dst);
3460 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3461 sm1_parse_src_param(rel, tok_rel);
3462 dst->rel = rel;
3463 }
3464 }
3465
3466 static void
sm1_read_src_param(struct shader_translator * tx,struct sm1_src_param * src,struct sm1_src_param * rel)3467 sm1_read_src_param(struct shader_translator *tx,
3468 struct sm1_src_param *src,
3469 struct sm1_src_param *rel)
3470 {
3471 DWORD tok_src, tok_rel = 0;
3472
3473 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3474 sm1_parse_src_param(src, tok_src);
3475 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3476 assert(rel);
3477 sm1_parse_src_param(rel, tok_rel);
3478 src->rel = rel;
3479 }
3480 }
3481
3482 static void
sm1_read_semantic(struct shader_translator * tx,struct sm1_semantic * sem)3483 sm1_read_semantic(struct shader_translator *tx,
3484 struct sm1_semantic *sem)
3485 {
3486 const DWORD tok_usg = TOKEN_NEXT(tx);
3487 const DWORD tok_dst = TOKEN_NEXT(tx);
3488
3489 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3490 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3491 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3492
3493 sm1_parse_dst_param(&sem->reg, tok_dst);
3494 }
3495
3496 static void
sm1_parse_instruction(struct shader_translator * tx)3497 sm1_parse_instruction(struct shader_translator *tx)
3498 {
3499 struct sm1_instruction *insn = &tx->insn;
3500 HRESULT hr;
3501 DWORD tok;
3502 const struct sm1_op_info *info = NULL;
3503 unsigned i;
3504
3505 sm1_parse_comments(tx, true);
3506 sm1_parse_get_skip(tx);
3507
3508 tok = TOKEN_NEXT(tx);
3509
3510 insn->opcode = tok & D3DSI_OPCODE_MASK;
3511 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3512 insn->coissue = !!(tok & D3DSI_COISSUE);
3513 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3514
3515 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3516 int k = tx->op_info_map[insn->opcode];
3517 if (k >= 0) {
3518 assert(k < ARRAY_SIZE(inst_table));
3519 info = &inst_table[k];
3520 }
3521 } else {
3522 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3523 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3524 }
3525 if (!info) {
3526 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3527 TOKEN_JUMP(tx);
3528 return;
3529 }
3530 insn->info = info;
3531 insn->ndst = info->ndst;
3532 insn->nsrc = info->nsrc;
3533
3534 /* check version */
3535 {
3536 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3537 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3538 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3539 if (ver < min || ver > max) {
3540 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3541 min, ver, max);
3542 return;
3543 }
3544 }
3545
3546 for (i = 0; i < insn->ndst; ++i)
3547 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3548 if (insn->predicated)
3549 sm1_read_src_param(tx, &insn->pred, NULL);
3550 for (i = 0; i < insn->nsrc; ++i)
3551 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3552
3553 /* parse here so we can dump them before processing */
3554 if (insn->opcode == D3DSIO_DEF ||
3555 insn->opcode == D3DSIO_DEFI ||
3556 insn->opcode == D3DSIO_DEFB)
3557 sm1_parse_immediate(tx, &tx->insn.src[0]);
3558
3559 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3560 sm1_instruction_check(insn);
3561
3562 if (insn->predicated) {
3563 tx->predicated_activated = true;
3564 if (ureg_dst_is_undef(tx->regs.predicate_tmp)) {
3565 tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg);
3566 tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg);
3567 }
3568 }
3569
3570 if (info->handler)
3571 hr = info->handler(tx);
3572 else
3573 hr = NineTranslateInstruction_Generic(tx);
3574 tx_apply_dst0_modifiers(tx);
3575
3576 if (insn->predicated) {
3577 tx->predicated_activated = false;
3578 /* TODO: predicate might be allowed on outputs,
3579 * which cannot be src. Workaround it. */
3580 ureg_CMP(tx->ureg, tx->regs.predicate_dst,
3581 ureg_negate(tx_src_param(tx, &insn->pred)),
3582 ureg_src(tx->regs.predicate_tmp),
3583 ureg_src(tx->regs.predicate_dst));
3584 }
3585
3586 if (hr != D3D_OK)
3587 tx->failure = true;
3588 tx->num_scratch = 0; /* reset */
3589
3590 TOKEN_JUMP(tx);
3591 }
3592
3593 #define GET_CAP(n) screen->get_param( \
3594 screen, PIPE_CAP_##n)
3595 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3596 screen, info->type, PIPE_SHADER_CAP_##n)
3597
3598 static HRESULT
tx_ctor(struct shader_translator * tx,struct pipe_screen * screen,struct nine_shader_info * info)3599 tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info)
3600 {
3601 unsigned i;
3602
3603 memset(tx, 0, sizeof(*tx));
3604
3605 tx->info = info;
3606
3607 tx->byte_code = info->byte_code;
3608 tx->parse = info->byte_code;
3609
3610 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3611 info->input_map[i] = NINE_DECLUSAGE_NONE;
3612 info->num_inputs = 0;
3613
3614 info->position_t = false;
3615 info->point_size = false;
3616
3617 memset(tx->slots_used, 0, sizeof(tx->slots_used));
3618 memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
3619 memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used));
3620
3621 tx->info->const_float_slots = 0;
3622 tx->info->const_int_slots = 0;
3623 tx->info->const_bool_slots = 0;
3624
3625 info->sampler_mask = 0x0;
3626 info->rt_mask = 0x0;
3627
3628 info->lconstf.data = NULL;
3629 info->lconstf.ranges = NULL;
3630
3631 info->bumpenvmat_needed = 0;
3632
3633 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3634 tx->regs.rL[i] = ureg_dst_undef();
3635 }
3636 tx->regs.address = ureg_dst_undef();
3637 tx->regs.a0 = ureg_dst_undef();
3638 tx->regs.p = ureg_dst_undef();
3639 tx->regs.oDepth = ureg_dst_undef();
3640 tx->regs.vPos = ureg_src_undef();
3641 tx->regs.vFace = ureg_src_undef();
3642 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3643 tx->regs.o[i] = ureg_dst_undef();
3644 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3645 tx->regs.oCol[i] = ureg_dst_undef();
3646 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3647 tx->regs.vC[i] = ureg_src_undef();
3648 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3649 tx->regs.vT[i] = ureg_src_undef();
3650
3651 sm1_read_version(tx);
3652
3653 info->version = (tx->version.major << 4) | tx->version.minor;
3654
3655 tx->num_outputs = 0;
3656
3657 create_op_info_map(tx);
3658
3659 tx->ureg = ureg_create(info->type);
3660 if (!tx->ureg) {
3661 return E_OUTOFMEMORY;
3662 }
3663
3664 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3665 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3666 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3667 tx->shift_wpos = !GET_CAP(FS_COORD_PIXEL_CENTER_INTEGER);
3668 tx->texcoord_sn = tx->want_texcoord ?
3669 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3670 tx->wpos_is_sysval = GET_CAP(FS_POSITION_IS_SYSVAL);
3671 tx->face_is_sysval_integer = GET_CAP(FS_FACE_IS_INTEGER_SYSVAL);
3672 tx->no_vs_window_space = !GET_CAP(VS_WINDOW_SPACE_POSITION);
3673 tx->mul_zero_wins = GET_CAP(LEGACY_MATH_RULES);
3674
3675 if (info->emulate_features) {
3676 tx->shift_wpos = true;
3677 tx->no_vs_window_space = true;
3678 tx->mul_zero_wins = false;
3679 }
3680
3681 if (IS_VS) {
3682 tx->num_constf_allowed = NINE_MAX_CONST_F;
3683 } else if (tx->version.major < 2) {/* IS_PS v1 */
3684 tx->num_constf_allowed = 8;
3685 } else if (tx->version.major == 2) {/* IS_PS v2 */
3686 tx->num_constf_allowed = 32;
3687 } else {/* IS_PS v3 */
3688 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3689 }
3690
3691 if (tx->version.major < 2) {
3692 tx->num_consti_allowed = 0;
3693 tx->num_constb_allowed = 0;
3694 } else {
3695 tx->num_consti_allowed = NINE_MAX_CONST_I;
3696 tx->num_constb_allowed = NINE_MAX_CONST_B;
3697 }
3698
3699 if (info->swvp_on) {
3700 /* TODO: The values tx->version.major == 1 */
3701 tx->num_constf_allowed = 8192;
3702 tx->num_consti_allowed = 2048;
3703 tx->num_constb_allowed = 2048;
3704 }
3705
3706 /* VS must always write position. Declare it here to make it the 1st output.
3707 * (Some drivers like nv50 are buggy and rely on that.)
3708 */
3709 if (IS_VS) {
3710 tx->regs.oPos_out = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3711 } else {
3712 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3713 if (!tx->shift_wpos)
3714 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3715 }
3716
3717 if (tx->mul_zero_wins)
3718 ureg_property(tx->ureg, TGSI_PROPERTY_LEGACY_MATH_RULES, 1);
3719
3720 /* Add additional definition of constants */
3721 if (info->add_constants_defs.c_combination) {
3722 unsigned i;
3723
3724 assert(info->add_constants_defs.int_const_added);
3725 assert(info->add_constants_defs.bool_const_added);
3726 /* We only add constants that are used by the shader
3727 * and that are not defined in the shader */
3728 for (i = 0; i < NINE_MAX_CONST_I; ++i) {
3729 if ((*info->add_constants_defs.int_const_added)[i]) {
3730 DBG("Defining const i%i : { %i %i %i %i }\n", i,
3731 info->add_constants_defs.c_combination->const_i[i][0],
3732 info->add_constants_defs.c_combination->const_i[i][1],
3733 info->add_constants_defs.c_combination->const_i[i][2],
3734 info->add_constants_defs.c_combination->const_i[i][3]);
3735 tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]);
3736 }
3737 }
3738 for (i = 0; i < NINE_MAX_CONST_B; ++i) {
3739 if ((*info->add_constants_defs.bool_const_added)[i]) {
3740 DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0));
3741 tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]);
3742 }
3743 }
3744 }
3745 return D3D_OK;
3746 }
3747
3748 static void
tx_dtor(struct shader_translator * tx)3749 tx_dtor(struct shader_translator *tx)
3750 {
3751 if (tx->slot_map)
3752 FREE(tx->slot_map);
3753 if (tx->num_inst_labels)
3754 FREE(tx->inst_labels);
3755 FREE(tx->lconstf);
3756 FREE(tx->regs.r);
3757 FREE(tx);
3758 }
3759
3760 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3761 * CONST[1].xyz = x+width/2, y+height/2, zmin */
3762 static void
shader_add_vs_viewport_transform(struct shader_translator * tx)3763 shader_add_vs_viewport_transform(struct shader_translator *tx)
3764 {
3765 struct ureg_program *ureg = tx->ureg;
3766 struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0);
3767 struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1);
3768 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3769
3770 c0 = ureg_src_dimension(c0, 4);
3771 c1 = ureg_src_dimension(c1, 4);
3772 /* TODO: find out when we need to apply the viewport transformation or not.
3773 * Likely will be XYZ vs XYZRHW in vdecl_out
3774 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3775 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3776 */
3777 ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3778 }
3779
3780 static void
shader_add_ps_fog_stage(struct shader_translator * tx,struct ureg_dst dst_col,struct ureg_src src_col)3781 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_dst dst_col, struct ureg_src src_col)
3782 {
3783 struct ureg_program *ureg = tx->ureg;
3784 struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
3785 struct ureg_src fog_vs, fog_color;
3786 struct ureg_dst fog_factor, depth;
3787
3788 if (!tx->info->fog_enable) {
3789 ureg_MOV(ureg, dst_col, src_col);
3790 return;
3791 }
3792
3793 if (tx->info->fog_mode != D3DFOG_NONE) {
3794 depth = tx_scratch_scalar(tx);
3795 if (tx->info->zfog)
3796 ureg_MOV(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
3797 else /* wfog: use w. position's w contains 1/w */
3798 ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
3799 }
3800
3801 fog_color = nine_special_constant_src(tx, 12);
3802 fog_params = nine_special_constant_src(tx, 13);
3803 fog_factor = tx_scratch_scalar(tx);
3804
3805 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3806 fog_end = NINE_APPLY_SWIZZLE(fog_params, X);
3807 fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y);
3808 ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth)));
3809 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3810 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3811 fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3812 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3813 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3814 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3815 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3816 fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3817 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3818 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3819 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3820 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3821 } else {
3822 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16,
3823 TGSI_INTERPOLATE_PERSPECTIVE),
3824 TGSI_SWIZZLE_X);
3825 ureg_MOV(ureg, fog_factor, fog_vs);
3826 }
3827
3828 ureg_LRP(ureg, ureg_writemask(dst_col, TGSI_WRITEMASK_XYZ),
3829 tx_src_scalar(fog_factor), src_col, fog_color);
3830 ureg_MOV(ureg, ureg_writemask(dst_col, TGSI_WRITEMASK_W), src_col);
3831 }
3832
3833 static void
shader_add_ps_alpha_test_stage(struct shader_translator * tx,struct ureg_src src_color)3834 shader_add_ps_alpha_test_stage(struct shader_translator *tx, struct ureg_src src_color)
3835 {
3836 struct ureg_program *ureg = tx->ureg;
3837 unsigned cmp_op;
3838 struct ureg_src src[2];
3839 struct ureg_dst tmp = tx_scratch(tx);
3840 if (tx->info->alpha_test_emulation == PIPE_FUNC_ALWAYS)
3841 return;
3842 if (tx->info->alpha_test_emulation == PIPE_FUNC_NEVER) {
3843 ureg_KILL(ureg);
3844 return;
3845 }
3846 cmp_op = pipe_comp_to_tgsi_opposite(tx->info->alpha_test_emulation);
3847 src[0] = ureg_scalar(src_color, TGSI_SWIZZLE_W); /* Read color alpha channel */
3848 src[1] = ureg_scalar(nine_special_constant_src(tx, 14), TGSI_SWIZZLE_X); /* Read alphatest */
3849 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
3850 ureg_KILL_IF(tx->ureg, ureg_negate(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X))); /* if opposite test passes, discard */
3851 }
3852
parse_shader(struct shader_translator * tx)3853 static void parse_shader(struct shader_translator *tx)
3854 {
3855 struct nine_shader_info *info = tx->info;
3856
3857 while (!sm1_parse_eof(tx) && !tx->failure)
3858 sm1_parse_instruction(tx);
3859 tx->parse++; /* for byte_size */
3860
3861 if (tx->failure)
3862 return;
3863
3864 if (IS_PS) {
3865 struct ureg_dst oCol0 = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0);
3866 struct ureg_dst tmp_oCol0;
3867 if (tx->version.major < 3) {
3868 tmp_oCol0 = ureg_DECL_temporary(tx->ureg);
3869 if (tx->version.major < 2) {
3870 assert(tx->num_temp); /* there must be color output */
3871 info->rt_mask |= 0x1;
3872 shader_add_ps_fog_stage(tx, tmp_oCol0, ureg_src(tx->regs.r[0]));
3873 } else {
3874 shader_add_ps_fog_stage(tx, tmp_oCol0, ureg_src(tx->regs.oCol[0]));
3875 }
3876 } else {
3877 assert(!ureg_dst_is_undef(tx->regs.oCol[0]));
3878 tmp_oCol0 = tx->regs.oCol[0];
3879 }
3880 shader_add_ps_alpha_test_stage(tx, ureg_src(tmp_oCol0));
3881 ureg_MOV(tx->ureg, oCol0, ureg_src(tmp_oCol0));
3882 }
3883
3884 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3885 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16);
3886 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3887 }
3888
3889 if (info->position_t) {
3890 if (tx->no_vs_window_space) {
3891 ERR("POSITIONT is not yet implemented for your device.\n");
3892 } else {
3893 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true);
3894 }
3895 }
3896
3897 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3898 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3899 ureg_MAX(tx->ureg, ureg_writemask(tx->regs.oPts, TGSI_WRITEMASK_X), ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3900 ureg_MIN(tx->ureg, ureg_writemask(oPts, TGSI_WRITEMASK_X), ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3901 info->point_size = true;
3902 } else if (IS_VS && tx->always_output_pointsize) {
3903 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3904 ureg_MOV(tx->ureg, ureg_writemask(oPts, TGSI_WRITEMASK_X), nine_special_constant_src(tx, 8));
3905 info->point_size = true;
3906 }
3907
3908 if (IS_VS && tx->info->clip_plane_emulation > 0) {
3909 struct ureg_dst clipdist[2] = {ureg_dst_undef(), ureg_dst_undef()};
3910 int num_clipdist = ffs(tx->info->clip_plane_emulation);
3911 int i;
3912 /* TODO: handle undefined channels of oPos (w is not always written to I think. default is 1) *
3913 * Note in d3d9 it's not possible to output clipvert, so we don't need to check
3914 * for its existence */
3915 clipdist[0] = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_CLIPDIST, 0, ((1 << num_clipdist) - 1) & 0xf, 0, 1);
3916 if (num_clipdist >= 5)
3917 clipdist[1] = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_CLIPDIST, 1, ((1 << (num_clipdist - 4)) - 1) & 0xf, 0, 1);
3918 ureg_property(tx->ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, num_clipdist);
3919 for (i = 0; i < num_clipdist; i++) {
3920 assert(!ureg_dst_is_undef(clipdist[i>>2]));
3921 if (!(tx->info->clip_plane_emulation & (1 << i)))
3922 ureg_MOV(tx->ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)), ureg_imm1f(tx->ureg, 0.f));
3923 else
3924 ureg_DP4(tx->ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)),
3925 ureg_src(tx->regs.oPos), nine_special_constant_src(tx, i));
3926 }
3927
3928 ureg_MOV(tx->ureg, tx->regs.oPos_out, ureg_src(tx->regs.oPos));
3929 }
3930
3931 if (info->process_vertices)
3932 shader_add_vs_viewport_transform(tx);
3933
3934 ureg_END(tx->ureg);
3935 }
3936
3937 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS (1 << 2)
3938 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS (1 << 3)
3939 #define NINE_SHADER_DEBUG_OPTION_DUMP_NIR (1 << 4)
3940 #define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI (1 << 5)
3941
3942 static const struct debug_named_value nine_shader_debug_options[] = {
3943 { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." },
3944 { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." },
3945 { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." },
3946 { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." },
3947 DEBUG_NAMED_VALUE_END /* must be last */
3948 };
3949
3950 static inline bool
nine_shader_get_debug_flag(uint64_t flag)3951 nine_shader_get_debug_flag(uint64_t flag)
3952 {
3953 static uint64_t flags = 0;
3954 static bool first_run = true;
3955
3956 if (unlikely(first_run)) {
3957 first_run = false;
3958 flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0);
3959
3960 // Check old TGSI dump envvar too
3961 if (debug_get_bool_option("NINE_TGSI_DUMP", false)) {
3962 flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI;
3963 }
3964 }
3965
3966 return !!(flags & flag);
3967 }
3968
3969 static void
nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state * state,const struct tgsi_token * tgsi_tokens,struct pipe_screen * screen)3970 nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens,
3971 struct pipe_screen *screen)
3972 {
3973 struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, screen->get_disk_shader_cache != NULL);
3974
3975 if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) {
3976 nir_print_shader(nir, stdout);
3977 }
3978
3979 state->type = PIPE_SHADER_IR_NIR;
3980 state->tokens = NULL;
3981 state->ir.nir = nir;
3982 memset(&state->stream_output, 0, sizeof(state->stream_output));
3983 }
3984
3985 static void *
nine_ureg_create_shader(struct ureg_program * ureg,struct pipe_context * pipe,const struct pipe_stream_output_info * so)3986 nine_ureg_create_shader(struct ureg_program *ureg,
3987 struct pipe_context *pipe,
3988 const struct pipe_stream_output_info *so)
3989 {
3990 struct pipe_shader_state state;
3991 const struct tgsi_token *tgsi_tokens;
3992 struct pipe_screen *screen = pipe->screen;
3993
3994 tgsi_tokens = ureg_finalize(ureg);
3995 if (!tgsi_tokens)
3996 return NULL;
3997
3998 assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2);
3999 enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor;
4000
4001 bool use_nir = true;
4002
4003 /* Allow user to override preferred IR, this is very useful for debugging */
4004 if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS)))
4005 use_nir = false;
4006 if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS)))
4007 use_nir = false;
4008
4009 DUMP("shader type: %s, selected IR: %s\n",
4010 shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS",
4011 use_nir ? "NIR" : "TGSI");
4012
4013 if (use_nir) {
4014 nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen);
4015 } else {
4016 pipe_shader_state_from_tgsi(&state, tgsi_tokens);
4017 }
4018
4019 assert(state.tokens || state.ir.nir);
4020
4021 if (so)
4022 state.stream_output = *so;
4023
4024 switch (shader_type) {
4025 case PIPE_SHADER_VERTEX:
4026 return pipe->create_vs_state(pipe, &state);
4027 case PIPE_SHADER_FRAGMENT:
4028 return pipe->create_fs_state(pipe, &state);
4029 default:
4030 unreachable("unsupported shader type");
4031 }
4032 }
4033
4034
4035 void *
nine_create_shader_with_so_and_destroy(struct ureg_program * p,struct pipe_context * pipe,const struct pipe_stream_output_info * so)4036 nine_create_shader_with_so_and_destroy(struct ureg_program *p,
4037 struct pipe_context *pipe,
4038 const struct pipe_stream_output_info *so)
4039 {
4040 void *result = nine_ureg_create_shader(p, pipe, so);
4041 ureg_destroy(p);
4042 return result;
4043 }
4044
4045 HRESULT
nine_translate_shader(struct NineDevice9 * device,struct nine_shader_info * info,struct pipe_context * pipe)4046 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
4047 {
4048 struct shader_translator *tx;
4049 HRESULT hr = D3D_OK;
4050 const unsigned processor = info->type;
4051 struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
4052 unsigned *const_ranges = NULL;
4053
4054 user_assert(processor != ~0, D3DERR_INVALIDCALL);
4055
4056 tx = MALLOC_STRUCT(shader_translator);
4057 if (!tx)
4058 return E_OUTOFMEMORY;
4059
4060 info->emulate_features = device->driver_caps.shader_emulate_features;
4061
4062 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4063 hr = E_OUTOFMEMORY;
4064 goto out;
4065 }
4066 tx->always_output_pointsize = device->driver_caps.always_output_pointsize;
4067
4068 assert(IS_VS || !info->swvp_on);
4069
4070 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
4071 hr = D3DERR_INVALIDCALL;
4072 DBG("Unsupported shader version: %u.%u !\n",
4073 tx->version.major, tx->version.minor);
4074 goto out;
4075 }
4076 if (tx->processor != processor) {
4077 hr = D3DERR_INVALIDCALL;
4078 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
4079 goto out;
4080 }
4081 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
4082 tx->version.major, tx->version.minor);
4083
4084 parse_shader(tx);
4085
4086 if (tx->failure) {
4087 /* For VS shaders, we print the warning later,
4088 * we first try with swvp. */
4089 if (IS_PS)
4090 ERR("Encountered buggy shader\n");
4091 ureg_destroy(tx->ureg);
4092 hr = D3DERR_INVALIDCALL;
4093 goto out;
4094 }
4095
4096 /* Recompile after compacting constant slots if possible */
4097 if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) {
4098 unsigned *slot_map;
4099 unsigned c;
4100 int i, j, num_ranges, prev;
4101
4102 DBG("Recompiling shader for constant compaction\n");
4103 ureg_destroy(tx->ureg);
4104
4105 if (tx->num_inst_labels)
4106 FREE(tx->inst_labels);
4107 FREE(tx->lconstf);
4108 FREE(tx->regs.r);
4109
4110 num_ranges = 0;
4111 prev = -2;
4112 for (i = 0; i < NINE_MAX_CONST_ALL_VS; i++) {
4113 if (tx->slots_used[i]) {
4114 if (prev != i - 1)
4115 num_ranges++;
4116 prev = i;
4117 }
4118 }
4119 slot_map = MALLOC(NINE_MAX_CONST_ALL_VS * sizeof(unsigned));
4120 const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
4121 if (!slot_map || !const_ranges) {
4122 hr = E_OUTOFMEMORY;
4123 goto out;
4124 }
4125 c = 0;
4126 j = -1;
4127 prev = -2;
4128 for (i = 0; i < NINE_MAX_CONST_ALL_VS; i++) {
4129 if (tx->slots_used[i]) {
4130 if (prev != i - 1)
4131 j++;
4132 /* Initialize first slot of the range */
4133 if (!const_ranges[2*j+1])
4134 const_ranges[2*j] = i;
4135 const_ranges[2*j+1]++;
4136 prev = i;
4137 slot_map[i] = c++;
4138 }
4139 }
4140
4141 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4142 hr = E_OUTOFMEMORY;
4143 goto out;
4144 }
4145 tx->always_output_pointsize = device->driver_caps.always_output_pointsize;
4146 tx->slot_map = slot_map;
4147 parse_shader(tx);
4148 assert(!tx->failure);
4149 #if !defined(NDEBUG)
4150 i = 0;
4151 j = 0;
4152 while (const_ranges[i*2+1] != 0) {
4153 j += const_ranges[i*2+1];
4154 i++;
4155 }
4156 assert(j == tx->num_slots);
4157 #endif
4158 }
4159
4160 /* record local constants */
4161 if (tx->num_lconstf && tx->indirect_const_access) {
4162 struct nine_range *ranges;
4163 float *data;
4164 int *indices;
4165 unsigned i, k, n;
4166
4167 hr = E_OUTOFMEMORY;
4168
4169 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
4170 if (!data)
4171 goto out;
4172 info->lconstf.data = data;
4173
4174 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
4175 if (!indices)
4176 goto out;
4177
4178 /* lazy sort, num_lconstf should be small */
4179 for (n = 0; n < tx->num_lconstf; ++n) {
4180 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
4181 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
4182 k = i;
4183 }
4184 indices[n] = tx->lconstf[k].idx;
4185 memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
4186 tx->lconstf[k].idx = INT_MAX;
4187 }
4188
4189 /* count ranges */
4190 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
4191 if (indices[i] != indices[i - 1] + 1)
4192 ++n;
4193 ranges = MALLOC(n * sizeof(ranges[0]));
4194 if (!ranges) {
4195 FREE(indices);
4196 goto out;
4197 }
4198 info->lconstf.ranges = ranges;
4199
4200 k = 0;
4201 ranges[k].bgn = indices[0];
4202 for (i = 1; i < tx->num_lconstf; ++i) {
4203 if (indices[i] != indices[i - 1] + 1) {
4204 ranges[k].next = &ranges[k + 1];
4205 ranges[k].end = indices[i - 1] + 1;
4206 ++k;
4207 ranges[k].bgn = indices[i];
4208 }
4209 }
4210 ranges[k].end = indices[i - 1] + 1;
4211 ranges[k].next = NULL;
4212 assert(n == (k + 1));
4213
4214 FREE(indices);
4215 hr = D3D_OK;
4216 }
4217
4218 /* r500 */
4219 if (info->const_float_slots > device->max_vs_const_f &&
4220 (info->const_int_slots || info->const_bool_slots) &&
4221 !info->swvp_on)
4222 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
4223
4224
4225 if (tx->indirect_const_access) { /* vs only */
4226 info->const_float_slots = device->max_vs_const_f;
4227 tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f);
4228 }
4229
4230 if (!info->swvp_on) {
4231 info->const_used_size = sizeof(float[4]) * tx->num_slots;
4232 if (tx->num_slots)
4233 ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0);
4234 } else {
4235 ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
4236 ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
4237 ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
4238 ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
4239 }
4240
4241 if (info->process_vertices)
4242 ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
4243
4244 if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) {
4245 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
4246 tgsi_dump(toks, 0);
4247 ureg_free_tokens(toks);
4248 }
4249
4250 if (info->process_vertices) {
4251 NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
4252 tx->output_info,
4253 tx->num_outputs,
4254 &(info->so));
4255 info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
4256 } else
4257 info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL);
4258 if (!info->cso) {
4259 hr = D3DERR_DRIVERINTERNALERROR;
4260 FREE(info->lconstf.data);
4261 FREE(info->lconstf.ranges);
4262 goto out;
4263 }
4264
4265 info->const_ranges = const_ranges;
4266 const_ranges = NULL;
4267 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
4268 out:
4269 if (const_ranges)
4270 FREE(const_ranges);
4271 tx_dtor(tx);
4272 return hr;
4273 }
4274