1 /*
2 * Copyright 2011 Joakim Sindholt <[email protected]>
3 * Copyright Axel Davy <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "device9.h"
8 #include "basetexture9.h"
9 #include "vertexdeclaration9.h"
10 #include "vertexshader9.h"
11 #include "pixelshader9.h"
12 #include "nine_ff.h"
13 #include "nine_defines.h"
14 #include "nine_helpers.h"
15 #include "nine_pipe.h"
16 #include "nine_dump.h"
17
18 #include "pipe/p_context.h"
19 #include "tgsi/tgsi_ureg.h"
20 #include "tgsi/tgsi_dump.h"
21 #include "util/bitscan.h"
22 #include "util/box.h"
23 #include "util/u_hash_table.h"
24 #include "util/u_upload_mgr.h"
25
26 #define DBG_CHANNEL DBG_FF
27
28 #define NINE_FF_NUM_VS_CONST 204
29 #define NINE_FF_NUM_PS_CONST 24
30
31 struct fvec4
32 {
33 float x, y, z, w;
34 };
35
36 struct nine_ff_vs_key
37 {
38 union {
39 struct {
40 uint32_t position_t : 1;
41 uint32_t lighting : 1;
42 uint32_t darkness : 1; /* lighting enabled but no active lights */
43 uint32_t localviewer : 1;
44 uint32_t vertexpointsize : 1;
45 uint32_t pointscale : 1;
46 uint32_t vertexblend : 3;
47 uint32_t vertexblend_indexed : 1;
48 uint32_t vertextween : 1;
49 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
50 uint32_t mtl_ambient : 2;
51 uint32_t mtl_specular : 2;
52 uint32_t mtl_emissive : 2;
53 uint32_t fog_mode : 2;
54 uint32_t fog_range : 1;
55 uint32_t color0in_one : 1;
56 uint32_t color1in_zero : 1;
57 uint32_t has_normal : 1;
58 uint32_t fog : 1;
59 uint32_t normalizenormals : 1;
60 uint32_t ucp : 1;
61 uint32_t pad1 : 4;
62 uint32_t tc_dim_input: 16; /* 8 * 2 bits */
63 uint32_t pad2 : 16;
64 uint32_t tc_dim_output: 24; /* 8 * 3 bits */
65 uint32_t pad3 : 8;
66 uint32_t tc_gen : 24; /* 8 * 3 bits */
67 uint32_t pad4 : 8;
68 uint32_t tc_idx : 24;
69 uint32_t clipplane_emulate : 8;
70 uint32_t passthrough;
71 };
72 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
73 uint32_t value32[6];
74 };
75 };
76
77 /* Texture stage state:
78 *
79 * COLOROP D3DTOP 5 bit
80 * ALPHAOP D3DTOP 5 bit
81 * COLORARG0 D3DTA 3 bit
82 * COLORARG1 D3DTA 3 bit
83 * COLORARG2 D3DTA 3 bit
84 * ALPHAARG0 D3DTA 3 bit
85 * ALPHAARG1 D3DTA 3 bit
86 * ALPHAARG2 D3DTA 3 bit
87 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
88 * TEXCOORDINDEX 0 - 7 3 bit
89 * ===========================
90 * 32 bit per stage
91 */
92 struct nine_ff_ps_key
93 {
94 union {
95 struct {
96 struct {
97 uint32_t colorop : 5;
98 uint32_t alphaop : 5;
99 uint32_t colorarg0 : 3;
100 uint32_t colorarg1 : 3;
101 uint32_t colorarg2 : 3;
102 uint32_t alphaarg0 : 3;
103 uint32_t alphaarg1 : 3;
104 uint32_t alphaarg2 : 3;
105 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
106 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
107 uint32_t pad : 1;
108 /* that's 32 bit exactly */
109 } ts[8];
110 uint32_t projected : 16;
111 uint32_t fog : 1; /* for vFog coming from VS */
112 uint32_t fog_mode : 2;
113 uint32_t fog_source : 1; /* 0: Z, 1: W */
114 uint32_t specular : 1;
115 uint32_t alpha_test_emulation : 3;
116 uint32_t flatshade : 1;
117 uint32_t pad1 : 7; /* 9 32-bit words with this */
118 uint8_t colorarg_b4[3];
119 uint8_t colorarg_b5[3];
120 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
121 uint8_t pad2[3];
122 };
123 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
124 uint32_t value32[12];
125 };
126 };
127
nine_ff_vs_key_hash(const void * key)128 static uint32_t nine_ff_vs_key_hash(const void *key)
129 {
130 const struct nine_ff_vs_key *vs = key;
131 unsigned i;
132 uint32_t hash = vs->value32[0];
133 for (i = 1; i < ARRAY_SIZE(vs->value32); ++i)
134 hash ^= vs->value32[i];
135 return hash;
136 }
nine_ff_vs_key_comp(const void * key1,const void * key2)137 static bool nine_ff_vs_key_comp(const void *key1, const void *key2)
138 {
139 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
140 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
141
142 return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0;
143 }
nine_ff_ps_key_hash(const void * key)144 static uint32_t nine_ff_ps_key_hash(const void *key)
145 {
146 const struct nine_ff_ps_key *ps = key;
147 unsigned i;
148 uint32_t hash = ps->value32[0];
149 for (i = 1; i < ARRAY_SIZE(ps->value32); ++i)
150 hash ^= ps->value32[i];
151 return hash;
152 }
nine_ff_ps_key_comp(const void * key1,const void * key2)153 static bool nine_ff_ps_key_comp(const void *key1, const void *key2)
154 {
155 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
156 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
157
158 return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0;
159 }
nine_ff_fvf_key_hash(const void * key)160 static uint32_t nine_ff_fvf_key_hash(const void *key)
161 {
162 return *(DWORD *)key;
163 }
nine_ff_fvf_key_comp(const void * key1,const void * key2)164 static bool nine_ff_fvf_key_comp(const void *key1, const void *key2)
165 {
166 return *(DWORD *)key1 == *(DWORD *)key2;
167 }
168
169 static void nine_ff_prune_vs(struct NineDevice9 *);
170 static void nine_ff_prune_ps(struct NineDevice9 *);
171
nine_ureg_tgsi_dump(struct ureg_program * ureg,bool override)172 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, bool override)
173 {
174 if (debug_get_bool_option("NINE_FF_DUMP", false) || override) {
175 const struct tgsi_token *toks = ureg_get_tokens(ureg, NULL);
176 tgsi_dump(toks, 0);
177 ureg_free_tokens(toks);
178 }
179 }
180
181 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
182 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
183 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
184 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
185
186 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
187 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
188 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
189 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
190
191 #define _XYZW(r) (r)
192
193 /* AL should contain base address of lights table. */
194 #define LIGHT_CONST(i) \
195 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
196
197 #define MATERIAL_CONST(i) \
198 ureg_DECL_constant(ureg, 19 + (i))
199
200 #define _CONST(n) ureg_DECL_constant(ureg, n)
201
202 /* VS FF constants layout:
203 *
204 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
205 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
206 * CONST[ 8..11] D3DTS_PROJECTION
207 * CONST[12..15] D3DTS_VIEW^(-1)
208 * CONST[16..18] Normal matrix
209 *
210 * CONST[19].xyz MATERIAL.Emissive + Material.Ambient * RS.Ambient
211 * CONST[20] MATERIAL.Diffuse
212 * CONST[21] MATERIAL.Ambient
213 * CONST[22] MATERIAL.Specular
214 * CONST[23].x___ MATERIAL.Power
215 * CONST[24] MATERIAL.Emissive
216 * CONST[25] RS.Ambient
217 *
218 * CONST[26].x___ RS.PointSizeMin
219 * CONST[26]._y__ RS.PointSizeMax
220 * CONST[26].__z_ RS.PointSize
221 * CONST[26].___w RS.PointScaleA
222 * CONST[27].x___ RS.PointScaleB
223 * CONST[27]._y__ RS.PointScaleC
224 *
225 * CONST[28].x___ RS.FogEnd
226 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
227 * CONST[28].__z_ RS.FogDensity
228
229 * CONST[30].x___ TWEENFACTOR
230 *
231 * CONST[32].x___ LIGHT[0].Type
232 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
233 * CONST[33] LIGHT[0].Diffuse
234 * CONST[34] LIGHT[0].Specular
235 * CONST[35] LIGHT[0].Ambient
236 * CONST[36].xyz_ LIGHT[0].Position
237 * CONST[36].___w LIGHT[0].Range
238 * CONST[37].xyz_ LIGHT[0].Direction
239 * CONST[37].___w LIGHT[0].Falloff
240 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
241 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
242 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
243 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
244 * CONST[39].___w 1 if this is the last active light, 0 if not
245 * CONST[40] LIGHT[1]
246 * CONST[48] LIGHT[2]
247 * CONST[56] LIGHT[3]
248 * CONST[64] LIGHT[4]
249 * CONST[72] LIGHT[5]
250 * CONST[80] LIGHT[6]
251 * CONST[88] LIGHT[7]
252 * NOTE: no lighting code is generated if there are no active lights
253 *
254 * CONST[100].x___ Viewport 2/width
255 * CONST[100]._y__ Viewport 2/height
256 * CONST[100].__z_ Viewport 1/(zmax - zmin)
257 * CONST[100].___w Viewport width
258 * CONST[101].x___ Viewport x0
259 * CONST[101]._y__ Viewport y0
260 * CONST[101].__z_ Viewport z0
261 *
262 * CONST[128..131] D3DTS_TEXTURE0
263 * CONST[132..135] D3DTS_TEXTURE1
264 * CONST[136..139] D3DTS_TEXTURE2
265 * CONST[140..143] D3DTS_TEXTURE3
266 * CONST[144..147] D3DTS_TEXTURE4
267 * CONST[148..151] D3DTS_TEXTURE5
268 * CONST[152..155] D3DTS_TEXTURE6
269 * CONST[156..159] D3DTS_TEXTURE7
270 *
271 * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW
272 * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW
273 * ...
274 * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW
275 * CONST[196] UCP0
276 ...
277 * CONST[203] UCP7
278 */
279 struct vs_build_ctx
280 {
281 struct ureg_program *ureg;
282 const struct nine_ff_vs_key *key;
283
284 uint16_t input[PIPE_MAX_ATTRIBS];
285 unsigned num_inputs;
286
287 struct ureg_src aVtx;
288 struct ureg_src aNrm;
289 struct ureg_src aCol[2];
290 struct ureg_src aTex[8];
291 struct ureg_src aPsz;
292 struct ureg_src aInd;
293 struct ureg_src aWgt;
294
295 struct ureg_src aVtx1; /* tweening */
296 struct ureg_src aNrm1;
297
298 struct ureg_src mtlA;
299 struct ureg_src mtlD;
300 struct ureg_src mtlS;
301 struct ureg_src mtlE;
302 };
303
304 static inline unsigned
get_texcoord_sn(struct pipe_screen * screen)305 get_texcoord_sn(struct pipe_screen *screen)
306 {
307 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
308 return TGSI_SEMANTIC_TEXCOORD;
309 return TGSI_SEMANTIC_GENERIC;
310 }
311
312 static inline struct ureg_src
build_vs_add_input(struct vs_build_ctx * vs,uint16_t ndecl)313 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
314 {
315 const unsigned i = vs->num_inputs++;
316 assert(i < PIPE_MAX_ATTRIBS);
317 vs->input[i] = ndecl;
318 return ureg_DECL_vs_input(vs->ureg, i);
319 }
320
321 /* NOTE: dst may alias src */
322 static inline void
ureg_normalize3(struct ureg_program * ureg,struct ureg_dst dst,struct ureg_src src)323 ureg_normalize3(struct ureg_program *ureg,
324 struct ureg_dst dst, struct ureg_src src)
325 {
326 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
327 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
328
329 ureg_DP3(ureg, tmp_x, src, src);
330 ureg_RSQ(ureg, tmp_x, _X(tmp));
331 ureg_MUL(ureg, dst, src, _X(tmp));
332 ureg_release_temporary(ureg, tmp);
333 }
334
335 static void *
nine_ff_build_vs(struct NineDevice9 * device,struct vs_build_ctx * vs)336 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
337 {
338 const struct nine_ff_vs_key *key = vs->key;
339 struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);
340 struct ureg_dst oPos, oCol[2], oPsz, oFog;
341 struct ureg_dst AR;
342 unsigned i, c;
343 unsigned label[32], l = 0;
344 bool need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
345 bool has_aNrm;
346 bool need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp;
347 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
348
349 vs->ureg = ureg;
350
351 /* Check which inputs we should transform. */
352 for (i = 0; i < 8 * 3; i += 3) {
353 switch ((key->tc_gen >> i) & 0x7) {
354 case NINED3DTSS_TCI_CAMERASPACENORMAL:
355 need_aNrm = true;
356 break;
357 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
358 need_aVtx = true;
359 break;
360 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
361 need_aVtx = need_aNrm = true;
362 break;
363 case NINED3DTSS_TCI_SPHEREMAP:
364 need_aVtx = need_aNrm = true;
365 break;
366 default:
367 break;
368 }
369 }
370
371 has_aNrm = need_aNrm && key->has_normal;
372
373 /* Declare and record used inputs (needed for linkage with vertex format):
374 * (texture coordinates handled later)
375 */
376 vs->aVtx = build_vs_add_input(vs,
377 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
378
379 vs->aNrm = ureg_imm1f(ureg, 0.0f);
380 if (has_aNrm)
381 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
382
383 vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
384 vs->aCol[1] = ureg_imm1f(ureg, 0.0f);
385
386 if (key->lighting || key->darkness) {
387 const unsigned mask = key->mtl_diffuse | key->mtl_specular |
388 key->mtl_ambient | key->mtl_emissive;
389 if ((mask & 0x1) && !key->color0in_one)
390 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
391 if ((mask & 0x2) && !key->color1in_zero)
392 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
393
394 vs->mtlD = MATERIAL_CONST(1);
395 vs->mtlA = MATERIAL_CONST(2);
396 vs->mtlS = MATERIAL_CONST(3);
397 vs->mtlE = MATERIAL_CONST(5);
398 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else
399 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];
400 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else
401 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];
402 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
403 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
404 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
405 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
406 } else {
407 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
408 if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
409 }
410
411 if (key->vertexpointsize)
412 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
413
414 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
415 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
416 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
417 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
418 if (key->vertextween) {
419 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
420 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
421 }
422
423 /* Declare outputs:
424 */
425 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
426 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
427 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
428 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
429 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 16);
430 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
431 }
432
433 if (key->vertexpointsize || key->pointscale || device->driver_caps.always_output_pointsize) {
434 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
435 TGSI_WRITEMASK_X, 0, 1);
436 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
437 }
438
439 if (key->lighting || key->vertexblend)
440 AR = ureg_DECL_address(ureg);
441
442 /* === Vertex transformation / vertex blending:
443 */
444
445 if (key->position_t) {
446 if (device->driver_caps.window_space_position_support) {
447 ureg_MOV(ureg, oPos, vs->aVtx);
448 } else {
449 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
450 /* vs->aVtx contains the coordinates buffer wise.
451 * later in the pipeline, clipping, viewport and division
452 * by w (rhw = 1/w) are going to be applied, so do the reverse
453 * of these transformations (except clipping) to have the good
454 * position at the end.*/
455 ureg_MOV(ureg, tmp, vs->aVtx);
456 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
457 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101)));
458 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
459 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
460 /* Y needs to be reversed */
461 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
462 /* Replace w by 1 if it equals to 0 */
463 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W))),
464 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W), ureg_imm1f(ureg, 1.0f));
465 /* inverse rhw */
466 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
467 /* multiply X, Y, Z by w */
468 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
469 ureg_MOV(ureg, oPos, ureg_src(tmp));
470 ureg_release_temporary(ureg, tmp);
471 }
472 } else if (key->vertexblend) {
473 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
474 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
475 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
476 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
477 struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg);
478 struct ureg_src cWM[4];
479
480 for (i = 160; i <= 195; ++i)
481 ureg_DECL_constant(ureg, i);
482
483 /* translate world matrix index to constant file index */
484 if (key->vertexblend_indexed) {
485 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f));
486 ureg_ARL(ureg, AR, ureg_src(tmp));
487 }
488
489 ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
490 ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
491 ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
492
493 for (i = 0; i < key->vertexblend; ++i) {
494 for (c = 0; c < 4; ++c) {
495 cWM[c] = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c), 0);
496 if (key->vertexblend_indexed)
497 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
498 }
499
500 /* multiply by WORLD(index) */
501 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
502 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
503 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
504 ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
505
506 if (has_aNrm) {
507 /* Note: the spec says the transpose of the inverse of the
508 * WorldView matrices should be used, but all tests show
509 * otherwise.
510 * Only case unknown: D3DVBF_0WEIGHTS */
511 ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]);
512 ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2));
513 ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2));
514 }
515
516 if (i < (key->vertexblend - 1)) {
517 /* accumulate weighted position value */
518 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst));
519 if (has_aNrm)
520 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));
521 /* subtract weighted position value for last value */
522 ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i)));
523 }
524 }
525
526 /* the last weighted position is always 1 - sum_of_previous_weights */
527 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst));
528 if (has_aNrm)
529 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst));
530
531 /* multiply by VIEW_PROJ */
532 ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8));
533 ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp));
534 ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp));
535 ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp));
536
537 if (need_aVtx)
538 vs->aVtx = ureg_src(aVtx_dst);
539
540 ureg_release_temporary(ureg, tmp);
541 ureg_release_temporary(ureg, tmp2);
542 ureg_release_temporary(ureg, sum_blendweights);
543 if (!need_aVtx)
544 ureg_release_temporary(ureg, aVtx_dst);
545
546 if (has_aNrm) {
547 if (key->normalizenormals)
548 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
549 vs->aNrm = ureg_src(aNrm_dst);
550 } else
551 ureg_release_temporary(ureg, aNrm_dst);
552 } else {
553 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
554
555 if (key->vertextween) {
556 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
557 ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx);
558 vs->aVtx = ureg_src(aVtx_dst);
559 if (has_aNrm) {
560 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
561 ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm);
562 vs->aNrm = ureg_src(aNrm_dst);
563 }
564 }
565
566 /* position = vertex * WORLD_VIEW_PROJ */
567 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
568 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
569 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
570 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
571 ureg_release_temporary(ureg, tmp);
572
573 if (need_aVtx) {
574 struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
575 ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4));
576 ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst));
577 ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst));
578 ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst));
579 vs->aVtx = ureg_src(aVtx_dst);
580 }
581 if (has_aNrm) {
582 struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
583 ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16));
584 ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst));
585 ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst));
586 if (key->normalizenormals)
587 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
588 vs->aNrm = ureg_src(aNrm_dst);
589 }
590 }
591
592 /* === Process point size:
593 */
594 if (key->vertexpointsize || key->pointscale || device->driver_caps.always_output_pointsize) {
595 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
596 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
597 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
598 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
599 if (key->vertexpointsize) {
600 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
601 ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1));
602 ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1));
603 } else {
604 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
605 ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1));
606 }
607
608 if (key->pointscale) {
609 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
610 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
611
612 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
613 ureg_RSQ(ureg, tmp_y, _X(tmp));
614 ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp));
615 ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f));
616 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
617 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
618 ureg_RSQ(ureg, tmp_x, _X(tmp));
619 ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp));
620 ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100)));
621 ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1));
622 ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1));
623 }
624
625 ureg_MOV(ureg, oPsz, _Z(tmp));
626 ureg_release_temporary(ureg, tmp);
627 }
628
629 for (i = 0; i < 8; ++i) {
630 struct ureg_dst tmp, tmp_x, tmp2;
631 struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed;
632 unsigned c, writemask;
633 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
634 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
635 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
636 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
637
638 /* No texture output of index s */
639 if (tci == NINED3DTSS_TCI_DISABLE)
640 continue;
641 oTex = ureg_DECL_output(ureg, texcoord_sn, i);
642 tmp = ureg_DECL_temporary(ureg);
643 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
644 input_coord = ureg_DECL_temporary(ureg);
645 transformed = ureg_DECL_temporary(ureg);
646
647 /* Get the coordinate */
648 switch (tci) {
649 case NINED3DTSS_TCI_PASSTHRU:
650 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
651 * Else the idx is used only to determine wrapping mode. */
652 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
653 ureg_MOV(ureg, input_coord, vs->aTex[idx]);
654 break;
655 case NINED3DTSS_TCI_CAMERASPACENORMAL:
656 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm);
657 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
658 dim_input = 4;
659 break;
660 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
661 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx);
662 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
663 dim_input = 4;
664 break;
665 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
666 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
667 aVtx_normed = ureg_DECL_temporary(ureg);
668 ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
669 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
670 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
671 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
672 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
673 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
674 ureg_release_temporary(ureg, aVtx_normed);
675 dim_input = 4;
676 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
677 break;
678 case NINED3DTSS_TCI_SPHEREMAP:
679 /* Implement the formula of GL_SPHERE_MAP */
680 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
681 aVtx_normed = ureg_DECL_temporary(ureg);
682 tmp2 = ureg_DECL_temporary(ureg);
683 ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
684 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
685 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
686 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
687 ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
688 /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */
689 ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp));
690 ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2));
691 ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2));
692 ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2));
693 ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f));
694 /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2)
695 * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */
696 ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2));
697 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
698 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
699 ureg_release_temporary(ureg, aVtx_normed);
700 ureg_release_temporary(ureg, tmp2);
701 dim_input = 4;
702 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
703 break;
704 default:
705 assert(0);
706 break;
707 }
708
709 /* Apply the transformation */
710 /* dim_output == 0 => do not transform the components.
711 * XYZRHW also disables transformation */
712 if (!dim_output || key->position_t) {
713 ureg_release_temporary(ureg, transformed);
714 transformed = input_coord;
715 writemask = TGSI_WRITEMASK_XYZW;
716 } else {
717 for (c = 0; c < dim_output; c++) {
718 t = ureg_writemask(transformed, 1 << c);
719 switch (dim_input) {
720 /* dim_input = 1 2 3: -> we add trailing 1 to input*/
721 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
722 break;
723 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
724 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
725 break;
726 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
727 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
728 break;
729 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
730 default:
731 assert(0);
732 }
733 }
734 writemask = (1 << dim_output) - 1;
735 ureg_release_temporary(ureg, input_coord);
736 }
737
738 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
739 ureg_release_temporary(ureg, transformed);
740 ureg_release_temporary(ureg, tmp);
741 }
742
743 /* === Lighting:
744 *
745 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
746 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
747 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
748 *
749 * vec3 normal = normalize(in.Normal * NormalMatrix);
750 * vec3 hitDir = light.direction;
751 * float atten = 1.0;
752 *
753 * if (light.type != DIRECTIONAL)
754 * {
755 * vec3 hitVec = light.position - eyeVertex;
756 * float d = length(hitVec);
757 * hitDir = hitVec / d;
758 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
759 * }
760 *
761 * if (light.type == SPOTLIGHT)
762 * {
763 * float rho = dp3(-hitVec, light.direction);
764 * if (rho < cos(light.phi / 2))
765 * atten = 0;
766 * if (rho < cos(light.theta / 2))
767 * atten *= pow(some_func(rho), light.falloff);
768 * }
769 *
770 * float nDotHit = dp3_sat(normal, hitVec);
771 * float powFact = 0.0;
772 *
773 * if (nDotHit > 0.0)
774 * {
775 * vec3 midVec = normalize(hitDir + eye);
776 * float nDotMid = dp3_sat(normal, midVec);
777 * pFact = pow(nDotMid, material.power);
778 * }
779 *
780 * ambient += light.ambient * atten;
781 * diffuse += light.diffuse * atten * nDotHit;
782 * specular += light.specular * atten * powFact;
783 */
784 if (key->lighting) {
785 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
786 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
787 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
788 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
789 struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
790 struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
791 struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
792
793 struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
794
795 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
796
797 /* Light.*.Alpha is not used. */
798 struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
799 struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
800 struct ureg_dst rS = ureg_DECL_temporary(ureg);
801
802 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
803
804 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
805 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
806 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
807 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
808 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
809 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
810 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
811 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));
812 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));
813 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));
814 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
815 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));
816 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));
817 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
818 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
819
820 const unsigned loop_label = l++;
821
822 /* Declare all light constants to allow indirect addressing */
823 for (i = 32; i < 96; i++)
824 ureg_DECL_constant(ureg, i);
825
826 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
827 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
828 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
829 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
830
831 /* loop management */
832 ureg_BGNLOOP(ureg, &label[loop_label]);
833 ureg_ARL(ureg, AL, _W(rCtr));
834
835 /* if (not DIRECTIONAL light): */
836 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
837 ureg_MOV(ureg, rHit, ureg_negate(cLDir));
838 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
839 ureg_IF(ureg, _X(tmp), &label[l++]);
840 {
841 /* hitDir = light.position - eyeVtx
842 * d = length(hitDir)
843 */
844 ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx));
845 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
846 ureg_RSQ(ureg, tmp_y, _X(tmp));
847 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
848
849 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
850 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
851 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
852 ureg_RCP(ureg, rAtt, _W(rAtt));
853 /* cut-off if distance exceeds Light.Range */
854 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
855 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
856 }
857 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
858 ureg_ENDIF(ureg);
859
860 /* normalize hitDir */
861 ureg_normalize3(ureg, rHit, ureg_src(rHit));
862
863 /* if (SPOT light) */
864 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
865 ureg_IF(ureg, _X(tmp), &label[l++]);
866 {
867 /* rho = dp3(-hitDir, light.spotDir)
868 *
869 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
870 * spotAtt = 1
871 * else
872 * if (rho <= light.cphi2)
873 * spotAtt = 0
874 * else
875 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
876 */
877 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
878 ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi));
879 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
880 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
881 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
882 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
883 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
884 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
885 }
886 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
887 ureg_ENDIF(ureg);
888
889 /* directional factors, let's not use LIT because of clarity */
890
891 if (has_aNrm) {
892 if (key->localviewer) {
893 ureg_normalize3(ureg, rMid, vs->aVtx);
894 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
895 } else {
896 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f));
897 }
898 ureg_normalize3(ureg, rMid, ureg_src(rMid));
899 ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));
900 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
901 ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp));
902 /* Tests show that specular is computed only if (dp3(normal,hitDir) > 0).
903 * For front facing, it is more restrictive than test (dp3(normal,mid) > 0).
904 * No tests were made for backfacing, so add the two conditions */
905 ureg_IF(ureg, _Z(tmp), &label[l++]);
906 {
907 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
908 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
909 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
910 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
911 }
912 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
913 ureg_ENDIF(ureg);
914
915 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
916 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
917 }
918
919 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
920
921 /* break if this was the last light */
922 ureg_IF(ureg, cLLast, &label[l++]);
923 ureg_BRK(ureg);
924 ureg_ENDIF(ureg);
925 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
926
927 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
928 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
929 ureg_ENDLOOP(ureg, &label[loop_label]);
930
931 /* Apply to material:
932 *
933 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
934 * material.ambient * ambient +
935 * material.diffuse * diffuse +
936 * oCol[1] = material.specular * specular;
937 */
938 if (key->mtl_emissive == 0 && key->mtl_ambient == 0)
939 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), vs->mtlA, _CONST(19));
940 else {
941 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
942 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
943 }
944
945 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), ureg_src(rD), vs->mtlD, ureg_src(tmp));
946 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);
947 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
948 ureg_release_temporary(ureg, rAtt);
949 ureg_release_temporary(ureg, rHit);
950 ureg_release_temporary(ureg, rMid);
951 ureg_release_temporary(ureg, rCtr);
952 ureg_release_temporary(ureg, rD);
953 ureg_release_temporary(ureg, rA);
954 ureg_release_temporary(ureg, rS);
955 ureg_release_temporary(ureg, rAtt);
956 ureg_release_temporary(ureg, tmp);
957 } else
958 /* COLOR */
959 if (key->darkness) {
960 if (key->mtl_emissive == 0 && key->mtl_ambient == 0)
961 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _CONST(19));
962 else
963 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
964 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);
965 ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f));
966 } else {
967 ureg_MOV(ureg, oCol[0], vs->aCol[0]);
968 ureg_MOV(ureg, oCol[1], vs->aCol[1]);
969 }
970
971 /* === Process fog.
972 *
973 * exp(x) = ex2(log2(e) * x)
974 */
975 if (key->fog_mode) {
976 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
977 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
978 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
979 if (key->fog_range) {
980 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
981 ureg_RSQ(ureg, tmp_z, _X(tmp));
982 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
983 } else {
984 ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx)));
985 }
986
987 if (key->fog_mode == D3DFOG_EXP) {
988 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
989 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
990 ureg_EX2(ureg, tmp_x, _X(tmp));
991 } else
992 if (key->fog_mode == D3DFOG_EXP2) {
993 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
994 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
995 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
996 ureg_EX2(ureg, tmp_x, _X(tmp));
997 } else
998 if (key->fog_mode == D3DFOG_LINEAR) {
999 ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp)));
1000 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
1001 }
1002 ureg_MOV(ureg, oFog, _X(tmp));
1003 ureg_release_temporary(ureg, tmp);
1004 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
1005 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
1006 }
1007
1008 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
1009 struct ureg_src input;
1010 struct ureg_dst output;
1011 input = vs->aWgt;
1012 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
1013 ureg_MOV(ureg, output, input);
1014 }
1015 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
1016 struct ureg_src input;
1017 struct ureg_dst output;
1018 input = vs->aInd;
1019 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
1020 ureg_MOV(ureg, output, input);
1021 }
1022 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
1023 struct ureg_src input;
1024 struct ureg_dst output;
1025 input = vs->aNrm;
1026 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
1027 ureg_MOV(ureg, output, input);
1028 }
1029 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
1030 struct ureg_src input;
1031 struct ureg_dst output;
1032 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
1033 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
1034 ureg_MOV(ureg, output, input);
1035 }
1036 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
1037 struct ureg_src input;
1038 struct ureg_dst output;
1039 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
1040 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 23);
1041 ureg_MOV(ureg, output, input);
1042 }
1043 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
1044 struct ureg_src input;
1045 struct ureg_dst output;
1046 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
1047 input = ureg_scalar(input, TGSI_SWIZZLE_X);
1048 output = oFog;
1049 ureg_MOV(ureg, output, input);
1050 }
1051 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
1052 (void) 0; /* TODO: replace z of position output ? */
1053 }
1054
1055 /* ucp for ff applies on world coordinates.
1056 * aVtx is in worldview coordinates. */
1057 if (key->ucp) {
1058 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1059 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12));
1060 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13), ureg_src(tmp));
1061 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp));
1062 if (!key->clipplane_emulate) {
1063 struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0);
1064 ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp));
1065 } else {
1066 struct ureg_dst clipdist[2] = {ureg_dst_undef(), ureg_dst_undef()};
1067 int num_clipdist = ffs(key->clipplane_emulate);
1068 ureg_ADD(ureg, tmp, _CONST(15), ureg_src(tmp));
1069 clipdist[0] = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_CLIPDIST, 0,
1070 ((1 << num_clipdist) - 1) & 0xf, 0, 1);
1071 if (num_clipdist >= 5)
1072 clipdist[1] = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_CLIPDIST, 1,
1073 ((1 << (num_clipdist - 4)) - 1) & 0xf, 0, 1);
1074 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, num_clipdist);
1075 for (i = 0; i < num_clipdist; i++) {
1076 assert(!ureg_dst_is_undef(clipdist[i>>2]));
1077 if (!(key->clipplane_emulate & (1 << i)))
1078 ureg_MOV(ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)), ureg_imm1f(ureg, 0.f));
1079 else
1080 ureg_DP4(ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)),
1081 ureg_src(tmp), _CONST(196+i));
1082 }
1083 }
1084 ureg_release_temporary(ureg, tmp);
1085 }
1086
1087 if (key->position_t && device->driver_caps.window_space_position_support)
1088 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true);
1089
1090 ureg_END(ureg);
1091 nine_ureg_tgsi_dump(ureg, false);
1092 return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL);
1093 }
1094
1095 /* PS FF constants layout:
1096 *
1097 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
1098 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
1099 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
1100 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
1101 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
1102 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
1103 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
1104 *
1105 * CONST[20] D3DRS_TEXTUREFACTOR
1106 * CONST[21] D3DRS_FOGCOLOR
1107 * CONST[22].x___ RS.FogEnd
1108 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
1109 * CONST[22].__z_ RS.FogDensity
1110 * CONST[22].___w Alpha ref
1111 */
1112 struct ps_build_ctx
1113 {
1114 struct ureg_program *ureg;
1115 unsigned color_interpolate_flag;
1116
1117 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
1118 struct ureg_src vT[8]; /* TEXCOORD[i] */
1119 struct ureg_dst rCur; /* D3DTA_CURRENT */
1120 struct ureg_dst rMod;
1121 struct ureg_src rCurSrc;
1122 struct ureg_dst rTmp; /* D3DTA_TEMP */
1123 struct ureg_src rTmpSrc;
1124 struct ureg_dst rTex;
1125 struct ureg_src rTexSrc;
1126 struct ureg_src cBEM[8];
1127 struct ureg_src s[8];
1128
1129 struct {
1130 unsigned index;
1131 unsigned index_pre_mod;
1132 } stage;
1133 };
1134
1135 static struct ureg_src
ps_get_ts_arg(struct ps_build_ctx * ps,unsigned ta)1136 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
1137 {
1138 struct ureg_src reg;
1139
1140 switch (ta & D3DTA_SELECTMASK) {
1141 case D3DTA_CONSTANT:
1142 reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
1143 break;
1144 case D3DTA_CURRENT:
1145 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
1146 break;
1147 case D3DTA_DIFFUSE:
1148 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, ps->color_interpolate_flag);
1149 break;
1150 case D3DTA_SPECULAR:
1151 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, ps->color_interpolate_flag);
1152 break;
1153 case D3DTA_TEMP:
1154 reg = ps->rTmpSrc;
1155 break;
1156 case D3DTA_TEXTURE:
1157 reg = ps->rTexSrc;
1158 break;
1159 case D3DTA_TFACTOR:
1160 reg = ureg_DECL_constant(ps->ureg, 20);
1161 break;
1162 default:
1163 assert(0);
1164 reg = ureg_src_undef();
1165 break;
1166 }
1167 if (ta & D3DTA_COMPLEMENT) {
1168 struct ureg_dst dst = ureg_DECL_temporary(ps->ureg);
1169 ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg));
1170 reg = ureg_src(dst);
1171 }
1172 if (ta & D3DTA_ALPHAREPLICATE)
1173 reg = _WWWW(reg);
1174 return reg;
1175 }
1176
1177 static struct ureg_dst
ps_get_ts_dst(struct ps_build_ctx * ps,unsigned ta)1178 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1179 {
1180 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1181
1182 switch (ta & D3DTA_SELECTMASK) {
1183 case D3DTA_CURRENT:
1184 return ps->rCur;
1185 case D3DTA_TEMP:
1186 return ps->rTmp;
1187 default:
1188 assert(0);
1189 return ureg_dst_undef();
1190 }
1191 }
1192
ps_d3dtop_args_mask(D3DTEXTUREOP top)1193 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1194 {
1195 switch (top) {
1196 case D3DTOP_DISABLE:
1197 return 0x0;
1198 case D3DTOP_SELECTARG1:
1199 case D3DTOP_PREMODULATE:
1200 return 0x2;
1201 case D3DTOP_SELECTARG2:
1202 return 0x4;
1203 case D3DTOP_MULTIPLYADD:
1204 case D3DTOP_LERP:
1205 return 0x7;
1206 default:
1207 return 0x6;
1208 }
1209 }
1210
1211 static inline bool
is_MOV_no_op(struct ureg_dst dst,struct ureg_src src)1212 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1213 {
1214 return !dst.WriteMask ||
1215 (dst.File == src.File &&
1216 dst.Index == src.Index &&
1217 !dst.Indirect &&
1218 !dst.Saturate &&
1219 !src.Indirect &&
1220 !src.Negate &&
1221 !src.Absolute &&
1222 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1223 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1224 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1225 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1226
1227 }
1228
1229 static void
ps_do_ts_op(struct ps_build_ctx * ps,unsigned top,struct ureg_dst dst,struct ureg_src * arg)1230 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1231 {
1232 struct ureg_program *ureg = ps->ureg;
1233 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1234 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
1235 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1236
1237 tmp.WriteMask = dst.WriteMask;
1238
1239 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1240 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1241 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1242 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1243 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1244 top != D3DTOP_LERP)
1245 dst = ureg_saturate(dst);
1246
1247 switch (top) {
1248 case D3DTOP_SELECTARG1:
1249 if (!is_MOV_no_op(dst, arg[1]))
1250 ureg_MOV(ureg, dst, arg[1]);
1251 break;
1252 case D3DTOP_SELECTARG2:
1253 if (!is_MOV_no_op(dst, arg[2]))
1254 ureg_MOV(ureg, dst, arg[2]);
1255 break;
1256 case D3DTOP_MODULATE:
1257 ureg_MUL(ureg, dst, arg[1], arg[2]);
1258 break;
1259 case D3DTOP_MODULATE2X:
1260 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1261 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1262 break;
1263 case D3DTOP_MODULATE4X:
1264 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1265 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1266 break;
1267 case D3DTOP_ADD:
1268 ureg_ADD(ureg, dst, arg[1], arg[2]);
1269 break;
1270 case D3DTOP_ADDSIGNED:
1271 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1272 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f));
1273 break;
1274 case D3DTOP_ADDSIGNED2X:
1275 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1276 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1277 break;
1278 case D3DTOP_SUBTRACT:
1279 ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2]));
1280 break;
1281 case D3DTOP_ADDSMOOTH:
1282 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
1283 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1284 break;
1285 case D3DTOP_BLENDDIFFUSEALPHA:
1286 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1287 break;
1288 case D3DTOP_BLENDTEXTUREALPHA:
1289 /* XXX: alpha taken from previous stage, texture or result ? */
1290 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1291 break;
1292 case D3DTOP_BLENDFACTORALPHA:
1293 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1294 break;
1295 case D3DTOP_BLENDTEXTUREALPHAPM:
1296 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex)));
1297 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1298 break;
1299 case D3DTOP_BLENDCURRENTALPHA:
1300 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1301 break;
1302 case D3DTOP_PREMODULATE:
1303 ureg_MOV(ureg, dst, arg[1]);
1304 ps->stage.index_pre_mod = ps->stage.index + 1;
1305 break;
1306 case D3DTOP_MODULATEALPHA_ADDCOLOR:
1307 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1308 break;
1309 case D3DTOP_MODULATECOLOR_ADDALPHA:
1310 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1311 break;
1312 case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1313 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1])));
1314 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1315 break;
1316 case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1317 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
1318 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1319 break;
1320 case D3DTOP_BUMPENVMAP:
1321 break;
1322 case D3DTOP_BUMPENVMAPLUMINANCE:
1323 break;
1324 case D3DTOP_DOTPRODUCT3:
1325 ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
1326 ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
1327 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1328 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1329 break;
1330 case D3DTOP_MULTIPLYADD:
1331 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1332 break;
1333 case D3DTOP_LERP:
1334 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1335 break;
1336 case D3DTOP_DISABLE:
1337 /* no-op ? */
1338 break;
1339 default:
1340 assert(!"invalid D3DTOP");
1341 break;
1342 }
1343 ureg_release_temporary(ureg, tmp);
1344 ureg_release_temporary(ureg, tmp2);
1345 }
1346
1347 static void *
nine_ff_build_ps(struct NineDevice9 * device,struct nine_ff_ps_key * key)1348 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1349 {
1350 struct ps_build_ctx ps;
1351 struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1352 struct ureg_dst oCol;
1353 unsigned s;
1354 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1355
1356 memset(&ps, 0, sizeof(ps));
1357 ps.ureg = ureg;
1358 ps.color_interpolate_flag = key->flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
1359 ps.stage.index_pre_mod = -1;
1360
1361 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, ps.color_interpolate_flag);
1362
1363 ps.rCur = ureg_DECL_temporary(ureg);
1364 ps.rTmp = ureg_DECL_temporary(ureg);
1365 ps.rTex = ureg_DECL_temporary(ureg);
1366 ps.rCurSrc = ureg_src(ps.rCur);
1367 ps.rTmpSrc = ureg_src(ps.rTmp);
1368 ps.rTexSrc = ureg_src(ps.rTex);
1369
1370 /* Initial values */
1371 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1372 ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f));
1373 ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f));
1374
1375 for (s = 0; s < 8; ++s) {
1376 ps.s[s] = ureg_src_undef();
1377
1378 if (key->ts[s].colorop != D3DTOP_DISABLE) {
1379 if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1380 key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1381 key->ts[s].colorarg2 == D3DTA_SPECULAR)
1382 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, ps.color_interpolate_flag);
1383
1384 if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1385 key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1386 key->ts[s].colorarg2 == D3DTA_TEXTURE ||
1387 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA ||
1388 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) {
1389 ps.s[s] = ureg_DECL_sampler(ureg, s);
1390 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1391 }
1392 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1393 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1394 ps.s[s] = ureg_DECL_sampler(ureg, s);
1395 }
1396
1397 if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1398 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1399 key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1400 key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1401 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, ps.color_interpolate_flag);
1402
1403 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1404 key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1405 key->ts[s].alphaarg2 == D3DTA_TEXTURE ||
1406 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA ||
1407 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) {
1408 ps.s[s] = ureg_DECL_sampler(ureg, s);
1409 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1410 }
1411 }
1412 }
1413 if (key->specular)
1414 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, ps.color_interpolate_flag);
1415
1416 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1417
1418 /* Run stages.
1419 */
1420 for (s = 0; s < 8; ++s) {
1421 unsigned colorarg[3];
1422 unsigned alphaarg[3];
1423 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1424 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1425 struct ureg_dst dst;
1426 struct ureg_src arg[3];
1427
1428 if (key->ts[s].colorop == D3DTOP_DISABLE) {
1429 assert (key->ts[s].alphaop == D3DTOP_DISABLE);
1430 continue;
1431 }
1432 ps.stage.index = s;
1433
1434 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1435 nine_D3DTOP_to_str(key->ts[s].colorop),
1436 nine_D3DTOP_to_str(key->ts[s].alphaop));
1437
1438 if (!ureg_src_is_undef(ps.s[s])) {
1439 unsigned target;
1440 struct ureg_src texture_coord = ps.vT[s];
1441 struct ureg_dst delta;
1442 switch (key->ts[s].textarget) {
1443 case 0: target = TGSI_TEXTURE_1D; break;
1444 case 1: target = TGSI_TEXTURE_2D; break;
1445 case 2: target = TGSI_TEXTURE_3D; break;
1446 case 3: target = TGSI_TEXTURE_CUBE; break;
1447 /* this is a 2 bit bitfield, do I really need a default case ? */
1448 }
1449
1450 /* Modify coordinates */
1451 if (s >= 1 &&
1452 (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP ||
1453 key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) {
1454 delta = ureg_DECL_temporary(ureg);
1455 /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */
1456 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1)));
1457 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta));
1458 /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */
1459 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1)));
1460 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta));
1461 texture_coord = ureg_src(ureg_DECL_temporary(ureg));
1462 ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]);
1463 ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta));
1464 /* Prepare luminance multiplier
1465 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */
1466 if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1467 struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2));
1468 struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2));
1469
1470 ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset);
1471 }
1472 }
1473 if (key->projected & (3 << (s *2))) {
1474 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
1475 if (dim == 4)
1476 ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1477 else {
1478 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1479 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1));
1480 ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord);
1481 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
1482 ureg_release_temporary(ureg, tmp);
1483 }
1484 } else {
1485 ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1486 }
1487 if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1488 ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta));
1489 }
1490
1491 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1492 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1493 continue;
1494
1495 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1496
1497 if (ps.stage.index_pre_mod == ps.stage.index) {
1498 ps.rMod = ureg_DECL_temporary(ureg);
1499 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1500 }
1501
1502 colorarg[0] = (key->ts[s].colorarg0 | (((key->colorarg_b4[0] >> s) & 0x1) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1503 colorarg[1] = (key->ts[s].colorarg1 | (((key->colorarg_b4[1] >> s) & 0x1) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1504 colorarg[2] = (key->ts[s].colorarg2 | (((key->colorarg_b4[2] >> s) & 0x1) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1505 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1506 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1507 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1508
1509 if (key->ts[s].colorop != key->ts[s].alphaop ||
1510 colorarg[0] != alphaarg[0] ||
1511 colorarg[1] != alphaarg[1] ||
1512 colorarg[2] != alphaarg[2])
1513 dst.WriteMask = TGSI_WRITEMASK_XYZ;
1514
1515 /* Special DOTPRODUCT behaviour (see wine tests) */
1516 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
1517 dst.WriteMask = TGSI_WRITEMASK_XYZW;
1518
1519 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1520 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1521 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1522 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1523
1524 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1525 dst.WriteMask = TGSI_WRITEMASK_W;
1526
1527 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1528 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1529 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1530 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1531 }
1532 }
1533
1534 if (key->specular)
1535 ureg_ADD(ureg, ureg_writemask(ps.rCur, TGSI_WRITEMASK_XYZ), ps.rCurSrc, ps.vC[1]);
1536
1537 if (key->alpha_test_emulation == PIPE_FUNC_NEVER) {
1538 ureg_KILL(ureg);
1539 } else if (key->alpha_test_emulation != PIPE_FUNC_ALWAYS) {
1540 unsigned cmp_op;
1541 struct ureg_src src[2];
1542 struct ureg_dst tmp = ps.rTmp;
1543 cmp_op = pipe_comp_to_tgsi_opposite(key->alpha_test_emulation);
1544 src[0] = ureg_scalar(ps.rCurSrc, TGSI_SWIZZLE_W); /* Read color alpha channel */
1545 src[1] = _WWWW(_CONST(22)); /* Read alpha ref */
1546 ureg_insn(ureg, cmp_op, &tmp, 1, src, 2, 0);
1547 ureg_KILL_IF(ureg, ureg_negate(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X))); /* if opposite test passes, discard */
1548 }
1549
1550 /* Fog.
1551 */
1552 if (key->fog_mode) {
1553 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1554 struct ureg_src vPos;
1555 if (device->screen->get_param(device->screen,
1556 PIPE_CAP_FS_POSITION_IS_SYSVAL)) {
1557 vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1558 } else {
1559 vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
1560 TGSI_INTERPOLATE_LINEAR);
1561 }
1562
1563 /* Source is either W or Z.
1564 * Z is when an orthogonal projection matrix is detected,
1565 * W (WFOG) else.
1566 */
1567 if (!key->fog_source)
1568 ureg_MOV(ureg, rFog, _ZZZZ(vPos));
1569 else
1570 /* Position's w is 1/w */
1571 ureg_RCP(ureg, rFog, _WWWW(vPos));
1572
1573 if (key->fog_mode == D3DFOG_EXP) {
1574 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
1575 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1576 ureg_EX2(ureg, rFog, _X(rFog));
1577 } else
1578 if (key->fog_mode == D3DFOG_EXP2) {
1579 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
1580 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1581 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1582 ureg_EX2(ureg, rFog, _X(rFog));
1583 } else
1584 if (key->fog_mode == D3DFOG_LINEAR) {
1585 ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog)));
1586 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1587 }
1588 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1589 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1590 } else
1591 if (key->fog) {
1592 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, TGSI_INTERPOLATE_PERSPECTIVE);
1593 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1594 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1595 } else {
1596 ureg_MOV(ureg, oCol, ps.rCurSrc);
1597 }
1598
1599 ureg_END(ureg);
1600 nine_ureg_tgsi_dump(ureg, false);
1601 return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL);
1602 }
1603
1604 static struct NineVertexShader9 *
nine_ff_get_vs(struct NineDevice9 * device)1605 nine_ff_get_vs(struct NineDevice9 *device)
1606 {
1607 const struct nine_context *context = &device->context;
1608 struct NineVertexShader9 *vs;
1609 struct vs_build_ctx bld;
1610 struct nine_ff_vs_key key;
1611 unsigned s, i;
1612 bool has_indexes = false;
1613 bool has_weights = false;
1614 int8_t input_texture_coord[8];
1615
1616 assert(sizeof(key) <= sizeof(key.value32));
1617
1618 memset(&key, 0, sizeof(key));
1619 memset(&bld, 0, sizeof(bld));
1620 memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1621
1622 bld.key = &key;
1623
1624 /* FIXME: this shouldn't be NULL, but it is on init */
1625 if (context->vdecl) {
1626 key.color0in_one = 1;
1627 key.color1in_zero = 1;
1628 for (i = 0; i < context->vdecl->nelems; i++) {
1629 uint16_t usage = context->vdecl->usage_map[i];
1630 if (usage == NINE_DECLUSAGE_POSITIONT)
1631 key.position_t = 1;
1632 else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1633 key.color0in_one = 0;
1634 else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1635 key.color1in_zero = 0;
1636 else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) {
1637 has_indexes = true;
1638 key.passthrough |= 1 << usage;
1639 } else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) {
1640 has_weights = true;
1641 key.passthrough |= 1 << usage;
1642 } else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) {
1643 key.has_normal = 1;
1644 key.passthrough |= 1 << usage;
1645 } else if (usage == NINE_DECLUSAGE_PSIZE)
1646 key.vertexpointsize = 1;
1647 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1648 s = usage / NINE_DECLUSAGE_COUNT;
1649 if (s < 8)
1650 input_texture_coord[s] = nine_decltype_get_dim(context->vdecl->decls[i].Type);
1651 else
1652 DBG("FF given texture coordinate >= 8. Ignoring\n");
1653 } else if (usage < NINE_DECLUSAGE_NONE)
1654 key.passthrough |= 1 << usage;
1655 }
1656 }
1657 /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1658 * We do restrict to indices 0 */
1659 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
1660 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
1661 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
1662 if (!key.position_t)
1663 key.passthrough = 0;
1664 key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE];
1665
1666 key.lighting = !!context->rs[D3DRS_LIGHTING] && context->ff.num_lights_active;
1667 key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active;
1668 if (key.position_t) {
1669 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1670 key.lighting = 0;
1671 }
1672 if ((key.lighting | key.darkness) && context->rs[D3DRS_COLORVERTEX]) {
1673 uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2);
1674 key.mtl_diffuse = context->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask;
1675 key.mtl_ambient = context->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask;
1676 key.mtl_specular = context->rs[D3DRS_SPECULARMATERIALSOURCE] & mask;
1677 key.mtl_emissive = context->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask;
1678 }
1679 key.fog = !!context->rs[D3DRS_FOGENABLE];
1680 key.fog_mode = (!key.position_t && context->rs[D3DRS_FOGENABLE]) ? context->rs[D3DRS_FOGVERTEXMODE] : 0;
1681 if (key.fog_mode)
1682 key.fog_range = context->rs[D3DRS_RANGEFOGENABLE];
1683
1684 key.localviewer = !!context->rs[D3DRS_LOCALVIEWER];
1685 key.normalizenormals = !!context->rs[D3DRS_NORMALIZENORMALS];
1686 key.ucp = !!context->rs[D3DRS_CLIPPLANEENABLE];
1687 key.clipplane_emulate = device->driver_caps.emulate_ucp ? (context->rs[D3DRS_CLIPPLANEENABLE] & 0xff) : 0;
1688
1689 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1690 key.vertexblend_indexed = !!context->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes;
1691
1692 switch (context->rs[D3DRS_VERTEXBLEND]) {
1693 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1694 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1695 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1696 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1697 case D3DVBF_TWEENING: key.vertextween = 1; break;
1698 default:
1699 assert(!"invalid D3DVBF");
1700 break;
1701 }
1702 if (!has_weights && context->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS)
1703 key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */
1704 }
1705
1706 for (s = 0; s < 8; ++s) {
1707 unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1708 unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7;
1709 unsigned dim;
1710
1711 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1712 gen = NINED3DTSS_TCI_PASSTHRU;
1713
1714 if (!input_texture_coord[idx] && gen == NINED3DTSS_TCI_PASSTHRU)
1715 gen = NINED3DTSS_TCI_DISABLE;
1716
1717 key.tc_gen |= gen << (s * 3);
1718 key.tc_idx |= idx << (s * 3);
1719 key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2);
1720
1721 dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
1722 if (dim > 4)
1723 dim = input_texture_coord[idx];
1724 if (dim == 1) /* NV behaviour */
1725 dim = 0;
1726 key.tc_dim_output |= dim << (s * 3);
1727 }
1728
1729 DBG("VS ff key hash: %x\n", nine_ff_vs_key_hash(&key));
1730 vs = util_hash_table_get(device->ff.ht_vs, &key);
1731 if (vs)
1732 return vs;
1733 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1734
1735 nine_ff_prune_vs(device);
1736 if (vs) {
1737 unsigned n;
1738
1739 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1740
1741 _mesa_hash_table_insert(device->ff.ht_vs, &vs->ff_key, vs);
1742 device->ff.num_vs++;
1743
1744 vs->num_inputs = bld.num_inputs;
1745 for (n = 0; n < bld.num_inputs; ++n)
1746 vs->input_map[n].ndecl = bld.input[n];
1747
1748 vs->position_t = key.position_t;
1749 vs->point_size = key.vertexpointsize | key.pointscale | device->driver_caps.always_output_pointsize;
1750 }
1751 return vs;
1752 }
1753
1754 #define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE)
1755 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1756
1757 static struct NinePixelShader9 *
nine_ff_get_ps(struct NineDevice9 * device)1758 nine_ff_get_ps(struct NineDevice9 *device)
1759 {
1760 struct nine_context *context = &device->context;
1761 struct NinePixelShader9 *ps;
1762 struct nine_ff_ps_key key;
1763 unsigned s;
1764 uint8_t sampler_mask = 0;
1765
1766 assert(sizeof(key) <= sizeof(key.value32));
1767
1768 memset(&key, 0, sizeof(key));
1769 for (s = 0; s < 8; ++s) {
1770 key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP];
1771 key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP];
1772 const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1773 const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1774 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages.
1775 * ALPHAOP cannot be enabled if COLOROP is disabled.
1776 * Verified on Windows. */
1777 if (key.ts[s].colorop == D3DTOP_DISABLE) {
1778 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1779 break;
1780 }
1781
1782 if (!context->texture[s].enabled &&
1783 ((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE &&
1784 used_c & 0x1) ||
1785 (context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE &&
1786 used_c & 0x2) ||
1787 (context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE &&
1788 used_c & 0x4))) {
1789 /* Tested on Windows: Invalid texture read disables the stage
1790 * and the subsequent ones, but only for colorop. For alpha,
1791 * it's as if the texture had alpha of 1.0, which is what
1792 * has our dummy texture in that case. Invalid color also
1793 * disabled the following alpha stages. */
1794 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1795 break;
1796 }
1797
1798 if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE ||
1799 context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE ||
1800 context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE ||
1801 context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE ||
1802 context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE ||
1803 context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE)
1804 sampler_mask |= (1 << s);
1805
1806 if (key.ts[s].colorop != D3DTOP_DISABLE) {
1807 if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0] & 0x7;
1808 if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1] & 0x7;
1809 if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2] & 0x7;
1810 if (used_c & 0x1) key.colorarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) & 0x1) << s;
1811 if (used_c & 0x1) key.colorarg_b5[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) & 0x1) << s;
1812 if (used_c & 0x2) key.colorarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) & 0x1) << s;
1813 if (used_c & 0x2) key.colorarg_b5[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) & 0x1) << s;
1814 if (used_c & 0x4) key.colorarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) & 0x1) << s;
1815 if (used_c & 0x4) key.colorarg_b5[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) & 0x1) << s;
1816 }
1817 if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1818 if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0] & 0x7;
1819 if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1] & 0x7;
1820 if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2] & 0x7;
1821 if (used_a & 0x1) key.alphaarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) & 0x1) << s;
1822 if (used_a & 0x2) key.alphaarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) & 0x1) << s;
1823 if (used_a & 0x4) key.alphaarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) & 0x1) << s;
1824 }
1825 key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1826
1827 if (context->texture[s].enabled) {
1828 switch (context->texture[s].type) {
1829 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
1830 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1831 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;
1832 default:
1833 assert(!"unexpected texture type");
1834 break;
1835 }
1836 } else {
1837 key.ts[s].textarget = 1;
1838 }
1839 }
1840
1841 /* Note: If colorop is D3DTOP_DISABLE for the first stage
1842 * (which implies alphaop is too), nothing particular happens,
1843 * that is, current is equal to diffuse (which is the case anyway,
1844 * because it is how it is initialized).
1845 * Special case seems if alphaop is D3DTOP_DISABLE and not colorop,
1846 * because then if the resultarg is TEMP, then diffuse alpha is written
1847 * to it. */
1848 if (key.ts[0].colorop != D3DTOP_DISABLE &&
1849 key.ts[0].alphaop == D3DTOP_DISABLE &&
1850 key.ts[0].resultarg != 0) {
1851 key.ts[0].alphaop = D3DTOP_SELECTARG1;
1852 key.ts[0].alphaarg1 = D3DTA_DIFFUSE;
1853 }
1854 /* When no alpha stage writes to current, diffuse alpha is taken.
1855 * Since we initialize current to diffuse, we have the behaviour. */
1856
1857 /* Last stage always writes to Current */
1858 if (s >= 1)
1859 key.ts[s-1].resultarg = 0;
1860
1861 key.projected = nine_ff_get_projected_key_ff(context);
1862 key.specular = !!context->rs[D3DRS_SPECULARENABLE];
1863 key.flatshade = context->rs[D3DRS_SHADEMODE] == D3DSHADE_FLAT;
1864
1865 for (; s < 8; ++s)
1866 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1867 if (context->rs[D3DRS_FOGENABLE])
1868 key.fog_mode = context->rs[D3DRS_FOGTABLEMODE];
1869 key.fog = !!context->rs[D3DRS_FOGENABLE];
1870 if (key.fog_mode && key.fog)
1871 key.fog_source = !context->zfog;
1872 key.alpha_test_emulation = context->rs[NINED3DRS_EMULATED_ALPHATEST] & 0x7;
1873
1874 DBG("PS ff key hash: %x\n", nine_ff_ps_key_hash(&key));
1875 ps = util_hash_table_get(device->ff.ht_ps, &key);
1876 if (ps)
1877 return ps;
1878 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1879
1880 nine_ff_prune_ps(device);
1881 if (ps) {
1882 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1883
1884 _mesa_hash_table_insert(device->ff.ht_ps, &ps->ff_key, ps);
1885 device->ff.num_ps++;
1886
1887 ps->rt_mask = 0x1;
1888 ps->sampler_mask = sampler_mask;
1889 }
1890 return ps;
1891 }
1892
1893 static void
nine_ff_load_vs_transforms(struct NineDevice9 * device)1894 nine_ff_load_vs_transforms(struct NineDevice9 *device)
1895 {
1896 struct nine_context *context = &device->context;
1897 D3DMATRIX T;
1898 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1899 unsigned i;
1900
1901 /* TODO: make this nicer, and only upload the ones we need */
1902 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1903
1904 if (IS_D3DTS_DIRTY(context, WORLD) ||
1905 IS_D3DTS_DIRTY(context, VIEW) ||
1906 IS_D3DTS_DIRTY(context, PROJECTION)) {
1907 /* WVP, WV matrices */
1908 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1909 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1910
1911 /* normal matrix == transpose(inverse(WV)) */
1912 nine_d3d_matrix_inverse(&T, &M[1]);
1913 nine_d3d_matrix_transpose(&M[4], &T);
1914
1915 /* P matrix */
1916 M[2] = *GET_D3DTS(PROJECTION);
1917
1918 /* V and W matrix */
1919 nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW));
1920 M[40] = M[1];
1921 }
1922
1923 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1924 /* load other world matrices */
1925 for (i = 1; i <= 8; ++i) {
1926 nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW));
1927 }
1928 }
1929
1930 device->ff.vs_const[30 * 4] = asfloat(context->rs[D3DRS_TWEENFACTOR]);
1931 }
1932
1933 static void
nine_ff_load_lights(struct NineDevice9 * device)1934 nine_ff_load_lights(struct NineDevice9 *device)
1935 {
1936 struct nine_context *context = &device->context;
1937 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1938 unsigned l;
1939
1940 if (context->changed.group & NINE_STATE_FF_MATERIAL) {
1941 const D3DMATERIAL9 *mtl = &context->ff.material;
1942
1943 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1944 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1945 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1946 dst[23].x = mtl->Power;
1947 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1948 d3dcolor_to_rgba(&dst[25].x, context->rs[D3DRS_AMBIENT]);
1949 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1950 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1951 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1952 }
1953
1954 if (!(context->changed.group & NINE_STATE_FF_LIGHTING) && !IS_D3DTS_DIRTY(context, VIEW))
1955 return;
1956
1957 for (l = 0; l < context->ff.num_lights_active; ++l) {
1958 const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]];
1959
1960 dst[32 + l * 8].x = light->Type;
1961 dst[32 + l * 8].y = light->Attenuation0;
1962 dst[32 + l * 8].z = light->Attenuation1;
1963 dst[32 + l * 8].w = light->Attenuation2;
1964 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1965 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1966 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1967 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1968 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1969 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1970 dst[37 + l * 8].w = light->Falloff;
1971 dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1972 dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1973 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1974 dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active);
1975 }
1976 }
1977
1978 static void
nine_ff_load_point_and_fog_params(struct NineDevice9 * device)1979 nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1980 {
1981 struct nine_context *context = &device->context;
1982 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1983
1984 if (!(context->changed.group & NINE_STATE_FF_VS_OTHER))
1985 return;
1986 dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]);
1987 dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]);
1988 dst[26].z = CLAMP(asfloat(context->rs[D3DRS_POINTSIZE]),
1989 asfloat(context->rs[D3DRS_POINTSIZE_MIN]),
1990 asfloat(context->rs[D3DRS_POINTSIZE_MAX]));
1991 dst[26].w = asfloat(context->rs[D3DRS_POINTSCALE_A]);
1992 dst[27].x = asfloat(context->rs[D3DRS_POINTSCALE_B]);
1993 dst[27].y = asfloat(context->rs[D3DRS_POINTSCALE_C]);
1994 dst[28].x = asfloat(context->rs[D3DRS_FOGEND]);
1995 dst[28].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
1996 if (isinf(dst[28].y))
1997 dst[28].y = 0.0f;
1998 dst[28].z = asfloat(context->rs[D3DRS_FOGDENSITY]);
1999 if (device->driver_caps.emulate_ucp)
2000 memcpy(&dst[196], &context->clip.ucp, sizeof(context->clip));
2001 }
2002
2003 static void
nine_ff_load_tex_matrices(struct NineDevice9 * device)2004 nine_ff_load_tex_matrices(struct NineDevice9 *device)
2005 {
2006 struct nine_context *context = &device->context;
2007 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
2008 unsigned s;
2009
2010 if (!(context->ff.changed.transform[0] & 0xff0000))
2011 return;
2012 for (s = 0; s < 8; ++s) {
2013 if (IS_D3DTS_DIRTY(context, TEXTURE0 + s))
2014 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, false));
2015 }
2016 }
2017
2018 static void
nine_ff_load_ps_params(struct NineDevice9 * device)2019 nine_ff_load_ps_params(struct NineDevice9 *device)
2020 {
2021 struct nine_context *context = &device->context;
2022 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
2023 unsigned s;
2024
2025 if (!(context->changed.group & NINE_STATE_FF_PS_CONSTS))
2026 return;
2027
2028 for (s = 0; s < 8; ++s)
2029 d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]);
2030
2031 for (s = 0; s < 8; ++s) {
2032 dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
2033 dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
2034 dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
2035 dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
2036 if (s & 1) {
2037 dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
2038 dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
2039 } else {
2040 dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
2041 dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
2042 }
2043 }
2044
2045 d3dcolor_to_rgba(&dst[20].x, context->rs[D3DRS_TEXTUREFACTOR]);
2046 d3dcolor_to_rgba(&dst[21].x, context->rs[D3DRS_FOGCOLOR]);
2047 dst[22].x = asfloat(context->rs[D3DRS_FOGEND]);
2048 dst[22].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
2049 dst[22].z = asfloat(context->rs[D3DRS_FOGDENSITY]);
2050 dst[22].w = (float)context->rs[D3DRS_ALPHAREF] / 255.f;
2051 }
2052
2053 static void
nine_ff_load_viewport_info(struct NineDevice9 * device)2054 nine_ff_load_viewport_info(struct NineDevice9 *device)
2055 {
2056 D3DVIEWPORT9 *viewport = &device->context.viewport;
2057 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
2058 float diffZ = viewport->MaxZ - viewport->MinZ;
2059
2060 /* Note: the other functions avoids to fill the const again if nothing changed.
2061 * But we don't have much to fill, and adding code to allow that may be complex
2062 * so just fill it always */
2063 dst[100].x = 2.0f / (float)(viewport->Width);
2064 dst[100].y = 2.0f / (float)(viewport->Height);
2065 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
2066 dst[100].w = (float)(viewport->Width);
2067 dst[101].x = (float)(viewport->X);
2068 dst[101].y = (float)(viewport->Y);
2069 dst[101].z = (float)(viewport->MinZ);
2070 }
2071
2072 void
nine_ff_update(struct NineDevice9 * device)2073 nine_ff_update(struct NineDevice9 *device)
2074 {
2075 struct nine_context *context = &device->context;
2076 struct pipe_constant_buffer cb;
2077
2078 DBG("vs=%p ps=%p\n", context->vs, context->ps);
2079
2080 /* NOTE: the only reference belongs to the hash table */
2081 if (!context->programmable_vs) {
2082 device->ff.vs = nine_ff_get_vs(device);
2083 context->changed.group |= NINE_STATE_VS;
2084 }
2085 if (!context->ps) {
2086 device->ff.ps = nine_ff_get_ps(device);
2087 context->changed.group |= NINE_STATE_PS;
2088 }
2089
2090 if (!context->programmable_vs) {
2091 nine_ff_load_vs_transforms(device);
2092 nine_ff_load_tex_matrices(device);
2093 nine_ff_load_lights(device);
2094 nine_ff_load_point_and_fog_params(device);
2095 nine_ff_load_viewport_info(device);
2096
2097 memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform));
2098
2099 cb.buffer_offset = 0;
2100 cb.buffer = NULL;
2101 cb.user_buffer = device->ff.vs_const;
2102 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
2103
2104 context->pipe_data.cb_vs_ff = cb;
2105 context->commit |= NINE_STATE_COMMIT_CONST_VS;
2106
2107 context->changed.group &= ~NINE_STATE_FF_VS;
2108 }
2109
2110 if (!context->ps) {
2111 nine_ff_load_ps_params(device);
2112
2113 cb.buffer_offset = 0;
2114 cb.buffer = NULL;
2115 cb.user_buffer = device->ff.ps_const;
2116 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
2117
2118 context->pipe_data.cb_ps_ff = cb;
2119 context->commit |= NINE_STATE_COMMIT_CONST_PS;
2120
2121 context->changed.group &= ~NINE_STATE_FF_PS;
2122 }
2123 }
2124
2125
2126 bool
nine_ff_init(struct NineDevice9 * device)2127 nine_ff_init(struct NineDevice9 *device)
2128 {
2129 device->ff.ht_vs = _mesa_hash_table_create(NULL, nine_ff_vs_key_hash,
2130 nine_ff_vs_key_comp);
2131 device->ff.ht_ps = _mesa_hash_table_create(NULL, nine_ff_ps_key_hash,
2132 nine_ff_ps_key_comp);
2133
2134 device->ff.ht_fvf = _mesa_hash_table_create(NULL, nine_ff_fvf_key_hash,
2135 nine_ff_fvf_key_comp);
2136
2137 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
2138 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
2139
2140 return device->ff.ht_vs && device->ff.ht_ps &&
2141 device->ff.ht_fvf &&
2142 device->ff.vs_const && device->ff.ps_const;
2143 }
2144
nine_ff_ht_delete_cb(void * key,void * value,void * data)2145 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
2146 {
2147 NineUnknown_Unbind(NineUnknown(value));
2148 return PIPE_OK;
2149 }
2150
2151 void
nine_ff_fini(struct NineDevice9 * device)2152 nine_ff_fini(struct NineDevice9 *device)
2153 {
2154 if (device->ff.ht_vs) {
2155 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2156 _mesa_hash_table_destroy(device->ff.ht_vs, NULL);
2157 }
2158 if (device->ff.ht_ps) {
2159 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2160 _mesa_hash_table_destroy(device->ff.ht_ps, NULL);
2161 }
2162 if (device->ff.ht_fvf) {
2163 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
2164 _mesa_hash_table_destroy(device->ff.ht_fvf, NULL);
2165 }
2166 device->ff.vs = NULL; /* destroyed by unbinding from hash table */
2167 device->ff.ps = NULL;
2168
2169 FREE(device->ff.vs_const);
2170 FREE(device->ff.ps_const);
2171 }
2172
2173 static void
nine_ff_prune_vs(struct NineDevice9 * device)2174 nine_ff_prune_vs(struct NineDevice9 *device)
2175 {
2176 struct nine_context *context = &device->context;
2177
2178 if (device->ff.num_vs > 1024) {
2179 /* could destroy the bound one here, so unbind */
2180 context->pipe->bind_vs_state(context->pipe, NULL);
2181 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2182 _mesa_hash_table_clear(device->ff.ht_vs, NULL);
2183 device->ff.num_vs = 0;
2184 context->changed.group |= NINE_STATE_VS;
2185 }
2186 }
2187 static void
nine_ff_prune_ps(struct NineDevice9 * device)2188 nine_ff_prune_ps(struct NineDevice9 *device)
2189 {
2190 struct nine_context *context = &device->context;
2191
2192 if (device->ff.num_ps > 1024) {
2193 /* could destroy the bound one here, so unbind */
2194 context->pipe->bind_fs_state(context->pipe, NULL);
2195 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2196 _mesa_hash_table_clear(device->ff.ht_ps, NULL);
2197 device->ff.num_ps = 0;
2198 context->changed.group |= NINE_STATE_PS;
2199 }
2200 }
2201
2202 /* ========================================================================== */
2203
2204 /* Matrix multiplication:
2205 *
2206 * in memory: 0 1 2 3 (row major)
2207 * 4 5 6 7
2208 * 8 9 a b
2209 * c d e f
2210 *
2211 * cA cB cC cD
2212 * r0 = (r0 * cA) (r0 * cB) . .
2213 * r1 = (r1 * cA) (r1 * cB)
2214 * r2 = (r2 * cA) .
2215 * r3 = (r3 * cA) .
2216 *
2217 * r: (11) (12) (13) (14)
2218 * (21) (22) (23) (24)
2219 * (31) (32) (33) (34)
2220 * (41) (42) (43) (44)
2221 * l: (11 12 13 14)
2222 * (21 22 23 24)
2223 * (31 32 33 34)
2224 * (41 42 43 44)
2225 *
2226 * v: (x y z 1 )
2227 *
2228 * t.xyzw = MUL(v.xxxx, r[0]);
2229 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2230 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2231 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2232 *
2233 * v.x = DP4(v, c[0]);
2234 * v.y = DP4(v, c[1]);
2235 * v.z = DP4(v, c[2]);
2236 * v.w = DP4(v, c[3]) = 1
2237 */
2238
2239 /*
2240 static void
2241 nine_D3DMATRIX_print(const D3DMATRIX *M)
2242 {
2243 DBG("\n(%f %f %f %f)\n"
2244 "(%f %f %f %f)\n"
2245 "(%f %f %f %f)\n"
2246 "(%f %f %f %f)\n",
2247 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2248 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2249 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2250 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2251 }
2252 */
2253
2254 static inline float
nine_DP4_row_col(const D3DMATRIX * A,int r,const D3DMATRIX * B,int c)2255 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2256 {
2257 return A->m[r][0] * B->m[0][c] +
2258 A->m[r][1] * B->m[1][c] +
2259 A->m[r][2] * B->m[2][c] +
2260 A->m[r][3] * B->m[3][c];
2261 }
2262
2263 static inline float
nine_DP4_vec_col(const D3DVECTOR * v,const D3DMATRIX * M,int c)2264 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2265 {
2266 return v->x * M->m[0][c] +
2267 v->y * M->m[1][c] +
2268 v->z * M->m[2][c] +
2269 1.0f * M->m[3][c];
2270 }
2271
2272 static inline float
nine_DP3_vec_col(const D3DVECTOR * v,const D3DMATRIX * M,int c)2273 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2274 {
2275 return v->x * M->m[0][c] +
2276 v->y * M->m[1][c] +
2277 v->z * M->m[2][c];
2278 }
2279
2280 void
nine_d3d_matrix_matrix_mul(D3DMATRIX * D,const D3DMATRIX * L,const D3DMATRIX * R)2281 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2282 {
2283 D->_11 = nine_DP4_row_col(L, 0, R, 0);
2284 D->_12 = nine_DP4_row_col(L, 0, R, 1);
2285 D->_13 = nine_DP4_row_col(L, 0, R, 2);
2286 D->_14 = nine_DP4_row_col(L, 0, R, 3);
2287
2288 D->_21 = nine_DP4_row_col(L, 1, R, 0);
2289 D->_22 = nine_DP4_row_col(L, 1, R, 1);
2290 D->_23 = nine_DP4_row_col(L, 1, R, 2);
2291 D->_24 = nine_DP4_row_col(L, 1, R, 3);
2292
2293 D->_31 = nine_DP4_row_col(L, 2, R, 0);
2294 D->_32 = nine_DP4_row_col(L, 2, R, 1);
2295 D->_33 = nine_DP4_row_col(L, 2, R, 2);
2296 D->_34 = nine_DP4_row_col(L, 2, R, 3);
2297
2298 D->_41 = nine_DP4_row_col(L, 3, R, 0);
2299 D->_42 = nine_DP4_row_col(L, 3, R, 1);
2300 D->_43 = nine_DP4_row_col(L, 3, R, 2);
2301 D->_44 = nine_DP4_row_col(L, 3, R, 3);
2302 }
2303
2304 void
nine_d3d_vector4_matrix_mul(D3DVECTOR * d,const D3DVECTOR * v,const D3DMATRIX * M)2305 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2306 {
2307 d->x = nine_DP4_vec_col(v, M, 0);
2308 d->y = nine_DP4_vec_col(v, M, 1);
2309 d->z = nine_DP4_vec_col(v, M, 2);
2310 }
2311
2312 void
nine_d3d_vector3_matrix_mul(D3DVECTOR * d,const D3DVECTOR * v,const D3DMATRIX * M)2313 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2314 {
2315 d->x = nine_DP3_vec_col(v, M, 0);
2316 d->y = nine_DP3_vec_col(v, M, 1);
2317 d->z = nine_DP3_vec_col(v, M, 2);
2318 }
2319
2320 void
nine_d3d_matrix_transpose(D3DMATRIX * D,const D3DMATRIX * M)2321 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2322 {
2323 unsigned i, j;
2324 for (i = 0; i < 4; ++i)
2325 for (j = 0; j < 4; ++j)
2326 D->m[i][j] = M->m[j][i];
2327 }
2328
2329 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2330 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2331 if (t > 0.0f) pos += t; else neg += t; } while(0)
2332
2333 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2334 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2335 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2336 float
nine_d3d_matrix_det(const D3DMATRIX * M)2337 nine_d3d_matrix_det(const D3DMATRIX *M)
2338 {
2339 float pos = 0.0f;
2340 float neg = 0.0f;
2341
2342 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2343 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2344 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2345
2346 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2347 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2348 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2349
2350 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2351 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2352 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2353
2354 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2355 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2356 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2357
2358 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2359 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2360 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2361
2362 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2363 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2364 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2365
2366 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2367 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2368 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2369
2370 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2371 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2372 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2373
2374 return pos + neg;
2375 }
2376
2377 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2378 * I have no idea where this code came from.
2379 */
2380 void
nine_d3d_matrix_inverse(D3DMATRIX * D,const D3DMATRIX * M)2381 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2382 {
2383 int i, k;
2384 float det;
2385
2386 D->m[0][0] =
2387 M->m[1][1] * M->m[2][2] * M->m[3][3] -
2388 M->m[1][1] * M->m[3][2] * M->m[2][3] -
2389 M->m[1][2] * M->m[2][1] * M->m[3][3] +
2390 M->m[1][2] * M->m[3][1] * M->m[2][3] +
2391 M->m[1][3] * M->m[2][1] * M->m[3][2] -
2392 M->m[1][3] * M->m[3][1] * M->m[2][2];
2393
2394 D->m[0][1] =
2395 -M->m[0][1] * M->m[2][2] * M->m[3][3] +
2396 M->m[0][1] * M->m[3][2] * M->m[2][3] +
2397 M->m[0][2] * M->m[2][1] * M->m[3][3] -
2398 M->m[0][2] * M->m[3][1] * M->m[2][3] -
2399 M->m[0][3] * M->m[2][1] * M->m[3][2] +
2400 M->m[0][3] * M->m[3][1] * M->m[2][2];
2401
2402 D->m[0][2] =
2403 M->m[0][1] * M->m[1][2] * M->m[3][3] -
2404 M->m[0][1] * M->m[3][2] * M->m[1][3] -
2405 M->m[0][2] * M->m[1][1] * M->m[3][3] +
2406 M->m[0][2] * M->m[3][1] * M->m[1][3] +
2407 M->m[0][3] * M->m[1][1] * M->m[3][2] -
2408 M->m[0][3] * M->m[3][1] * M->m[1][2];
2409
2410 D->m[0][3] =
2411 -M->m[0][1] * M->m[1][2] * M->m[2][3] +
2412 M->m[0][1] * M->m[2][2] * M->m[1][3] +
2413 M->m[0][2] * M->m[1][1] * M->m[2][3] -
2414 M->m[0][2] * M->m[2][1] * M->m[1][3] -
2415 M->m[0][3] * M->m[1][1] * M->m[2][2] +
2416 M->m[0][3] * M->m[2][1] * M->m[1][2];
2417
2418 D->m[1][0] =
2419 -M->m[1][0] * M->m[2][2] * M->m[3][3] +
2420 M->m[1][0] * M->m[3][2] * M->m[2][3] +
2421 M->m[1][2] * M->m[2][0] * M->m[3][3] -
2422 M->m[1][2] * M->m[3][0] * M->m[2][3] -
2423 M->m[1][3] * M->m[2][0] * M->m[3][2] +
2424 M->m[1][3] * M->m[3][0] * M->m[2][2];
2425
2426 D->m[1][1] =
2427 M->m[0][0] * M->m[2][2] * M->m[3][3] -
2428 M->m[0][0] * M->m[3][2] * M->m[2][3] -
2429 M->m[0][2] * M->m[2][0] * M->m[3][3] +
2430 M->m[0][2] * M->m[3][0] * M->m[2][3] +
2431 M->m[0][3] * M->m[2][0] * M->m[3][2] -
2432 M->m[0][3] * M->m[3][0] * M->m[2][2];
2433
2434 D->m[1][2] =
2435 -M->m[0][0] * M->m[1][2] * M->m[3][3] +
2436 M->m[0][0] * M->m[3][2] * M->m[1][3] +
2437 M->m[0][2] * M->m[1][0] * M->m[3][3] -
2438 M->m[0][2] * M->m[3][0] * M->m[1][3] -
2439 M->m[0][3] * M->m[1][0] * M->m[3][2] +
2440 M->m[0][3] * M->m[3][0] * M->m[1][2];
2441
2442 D->m[1][3] =
2443 M->m[0][0] * M->m[1][2] * M->m[2][3] -
2444 M->m[0][0] * M->m[2][2] * M->m[1][3] -
2445 M->m[0][2] * M->m[1][0] * M->m[2][3] +
2446 M->m[0][2] * M->m[2][0] * M->m[1][3] +
2447 M->m[0][3] * M->m[1][0] * M->m[2][2] -
2448 M->m[0][3] * M->m[2][0] * M->m[1][2];
2449
2450 D->m[2][0] =
2451 M->m[1][0] * M->m[2][1] * M->m[3][3] -
2452 M->m[1][0] * M->m[3][1] * M->m[2][3] -
2453 M->m[1][1] * M->m[2][0] * M->m[3][3] +
2454 M->m[1][1] * M->m[3][0] * M->m[2][3] +
2455 M->m[1][3] * M->m[2][0] * M->m[3][1] -
2456 M->m[1][3] * M->m[3][0] * M->m[2][1];
2457
2458 D->m[2][1] =
2459 -M->m[0][0] * M->m[2][1] * M->m[3][3] +
2460 M->m[0][0] * M->m[3][1] * M->m[2][3] +
2461 M->m[0][1] * M->m[2][0] * M->m[3][3] -
2462 M->m[0][1] * M->m[3][0] * M->m[2][3] -
2463 M->m[0][3] * M->m[2][0] * M->m[3][1] +
2464 M->m[0][3] * M->m[3][0] * M->m[2][1];
2465
2466 D->m[2][2] =
2467 M->m[0][0] * M->m[1][1] * M->m[3][3] -
2468 M->m[0][0] * M->m[3][1] * M->m[1][3] -
2469 M->m[0][1] * M->m[1][0] * M->m[3][3] +
2470 M->m[0][1] * M->m[3][0] * M->m[1][3] +
2471 M->m[0][3] * M->m[1][0] * M->m[3][1] -
2472 M->m[0][3] * M->m[3][0] * M->m[1][1];
2473
2474 D->m[2][3] =
2475 -M->m[0][0] * M->m[1][1] * M->m[2][3] +
2476 M->m[0][0] * M->m[2][1] * M->m[1][3] +
2477 M->m[0][1] * M->m[1][0] * M->m[2][3] -
2478 M->m[0][1] * M->m[2][0] * M->m[1][3] -
2479 M->m[0][3] * M->m[1][0] * M->m[2][1] +
2480 M->m[0][3] * M->m[2][0] * M->m[1][1];
2481
2482 D->m[3][0] =
2483 -M->m[1][0] * M->m[2][1] * M->m[3][2] +
2484 M->m[1][0] * M->m[3][1] * M->m[2][2] +
2485 M->m[1][1] * M->m[2][0] * M->m[3][2] -
2486 M->m[1][1] * M->m[3][0] * M->m[2][2] -
2487 M->m[1][2] * M->m[2][0] * M->m[3][1] +
2488 M->m[1][2] * M->m[3][0] * M->m[2][1];
2489
2490 D->m[3][1] =
2491 M->m[0][0] * M->m[2][1] * M->m[3][2] -
2492 M->m[0][0] * M->m[3][1] * M->m[2][2] -
2493 M->m[0][1] * M->m[2][0] * M->m[3][2] +
2494 M->m[0][1] * M->m[3][0] * M->m[2][2] +
2495 M->m[0][2] * M->m[2][0] * M->m[3][1] -
2496 M->m[0][2] * M->m[3][0] * M->m[2][1];
2497
2498 D->m[3][2] =
2499 -M->m[0][0] * M->m[1][1] * M->m[3][2] +
2500 M->m[0][0] * M->m[3][1] * M->m[1][2] +
2501 M->m[0][1] * M->m[1][0] * M->m[3][2] -
2502 M->m[0][1] * M->m[3][0] * M->m[1][2] -
2503 M->m[0][2] * M->m[1][0] * M->m[3][1] +
2504 M->m[0][2] * M->m[3][0] * M->m[1][1];
2505
2506 D->m[3][3] =
2507 M->m[0][0] * M->m[1][1] * M->m[2][2] -
2508 M->m[0][0] * M->m[2][1] * M->m[1][2] -
2509 M->m[0][1] * M->m[1][0] * M->m[2][2] +
2510 M->m[0][1] * M->m[2][0] * M->m[1][2] +
2511 M->m[0][2] * M->m[1][0] * M->m[2][1] -
2512 M->m[0][2] * M->m[2][0] * M->m[1][1];
2513
2514 det =
2515 M->m[0][0] * D->m[0][0] +
2516 M->m[1][0] * D->m[0][1] +
2517 M->m[2][0] * D->m[0][2] +
2518 M->m[3][0] * D->m[0][3];
2519
2520 if (fabsf(det) < 1e-30) {/* non inversible */
2521 *D = *M; /* wine tests */
2522 return;
2523 }
2524
2525 det = 1.0 / det;
2526
2527 for (i = 0; i < 4; i++)
2528 for (k = 0; k < 4; k++)
2529 D->m[i][k] *= det;
2530
2531 #if MESA_DEBUG || !defined(NDEBUG)
2532 {
2533 D3DMATRIX I;
2534
2535 nine_d3d_matrix_matrix_mul(&I, D, M);
2536
2537 for (i = 0; i < 4; ++i)
2538 for (k = 0; k < 4; ++k)
2539 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2540 DBG("Matrix inversion check FAILED !\n");
2541 }
2542 #endif
2543 }
2544