xref: /aosp_15_r20/external/mesa3d/src/gallium/frontends/nine/nine_state.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2011 Joakim Sindholt <[email protected]>
3  * Copyright 2013 Christoph Bumiller
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #define NINE_STATE
8 
9 #include "device9.h"
10 #include "swapchain9.h"
11 #include "basetexture9.h"
12 #include "buffer9.h"
13 #include "indexbuffer9.h"
14 #include "surface9.h"
15 #include "vertexbuffer9.h"
16 #include "vertexdeclaration9.h"
17 #include "vertexshader9.h"
18 #include "pixelshader9.h"
19 #include "nine_pipe.h"
20 #include "nine_ff.h"
21 #include "nine_limits.h"
22 #include "pipe/p_context.h"
23 #include "pipe/p_state.h"
24 #include "cso_cache/cso_context.h"
25 #include "util/u_atomic.h"
26 #include "util/u_upload_mgr.h"
27 #include "util/u_math.h"
28 #include "util/box.h"
29 #include "util/u_simple_shaders.h"
30 #include "util/u_gen_mipmap.h"
31 
32 /* CSMT headers */
33 #include "nine_queue.h"
34 #include "nine_csmt_helper.h"
35 #include "util/u_thread.h"
36 
37 #define DBG_CHANNEL DBG_DEVICE
38 
39 /* Nine CSMT */
40 
41 struct csmt_instruction {
42     int (* func)(struct NineDevice9 *This, struct csmt_instruction *instr);
43 };
44 
45 struct csmt_context {
46     thrd_t worker;
47     struct nine_queue_pool* pool;
48     BOOL terminate;
49     cnd_t event_processed;
50     mtx_t mutex_processed;
51     struct NineDevice9 *device;
52     BOOL processed;
53     BOOL toPause;
54     BOOL hasPaused;
55     mtx_t thread_running;
56     mtx_t thread_resume;
57 };
58 
59 /* Wait for instruction to be processed.
60  * Caller has to ensure that only one thread waits at time.
61  */
62 static void
nine_csmt_wait_processed(struct csmt_context * ctx)63 nine_csmt_wait_processed(struct csmt_context *ctx)
64 {
65     mtx_lock(&ctx->mutex_processed);
66     while (!p_atomic_read(&ctx->processed)) {
67         cnd_wait(&ctx->event_processed, &ctx->mutex_processed);
68     }
69     mtx_unlock(&ctx->mutex_processed);
70 }
71 
72 /* CSMT worker thread */
73 static
74 int
nine_csmt_worker(void * arg)75 nine_csmt_worker(void *arg)
76 {
77     struct csmt_context *ctx = arg;
78     struct csmt_instruction *instr;
79     DBG("CSMT worker spawned\n");
80 
81     u_thread_setname("CSMT-Worker");
82 
83     while (1) {
84         nine_queue_wait_flush(ctx->pool);
85         mtx_lock(&ctx->thread_running);
86 
87         /* Get instruction. NULL on empty cmdbuf. */
88         while (!p_atomic_read(&ctx->terminate) &&
89                (instr = (struct csmt_instruction *)nine_queue_get(ctx->pool))) {
90 
91             /* decode */
92             if (instr->func(ctx->device, instr)) {
93                 mtx_lock(&ctx->mutex_processed);
94                 p_atomic_set(&ctx->processed, true);
95                 cnd_signal(&ctx->event_processed);
96                 mtx_unlock(&ctx->mutex_processed);
97             }
98             if (p_atomic_read(&ctx->toPause)) {
99                 mtx_unlock(&ctx->thread_running);
100                 /* will wait here the thread can be resumed */
101                 mtx_lock(&ctx->thread_resume);
102                 mtx_lock(&ctx->thread_running);
103                 mtx_unlock(&ctx->thread_resume);
104             }
105         }
106 
107         mtx_unlock(&ctx->thread_running);
108         if (p_atomic_read(&ctx->terminate)) {
109             mtx_lock(&ctx->mutex_processed);
110             p_atomic_set(&ctx->processed, true);
111             cnd_signal(&ctx->event_processed);
112             mtx_unlock(&ctx->mutex_processed);
113             break;
114         }
115     }
116 
117     DBG("CSMT worker destroyed\n");
118     return 0;
119 }
120 
121 /* Create a CSMT context.
122  * Spawns a worker thread.
123  */
124 struct csmt_context *
nine_csmt_create(struct NineDevice9 * This)125 nine_csmt_create( struct NineDevice9 *This )
126 {
127     struct csmt_context *ctx;
128 
129     ctx = CALLOC_STRUCT(csmt_context);
130     if (!ctx)
131         return NULL;
132 
133     ctx->pool = nine_queue_create();
134     if (!ctx->pool) {
135         FREE(ctx);
136         return NULL;
137     }
138     cnd_init(&ctx->event_processed);
139     (void) mtx_init(&ctx->mutex_processed, mtx_plain);
140     (void) mtx_init(&ctx->thread_running, mtx_plain);
141     (void) mtx_init(&ctx->thread_resume, mtx_plain);
142 
143 #if MESA_DEBUG || !defined(NDEBUG)
144     u_thread_setname("Main thread");
145 #endif
146 
147     ctx->device = This;
148 
149     if (thrd_success != u_thread_create(&ctx->worker, nine_csmt_worker, ctx)) {
150         nine_queue_delete(ctx->pool);
151         FREE(ctx);
152         return NULL;
153     }
154 
155     DBG("Returning context %p\n", ctx);
156 
157     return ctx;
158 }
159 
160 static int
nop_func(struct NineDevice9 * This,struct csmt_instruction * instr)161 nop_func( struct NineDevice9 *This, struct csmt_instruction *instr )
162 {
163     (void) This;
164     (void) instr;
165 
166     return 1;
167 }
168 
169 /* Push nop instruction and flush the queue.
170  * Waits for the worker to complete. */
171 void
nine_csmt_process(struct NineDevice9 * device)172 nine_csmt_process( struct NineDevice9 *device )
173 {
174     struct csmt_instruction* instr;
175     struct csmt_context *ctx = device->csmt_ctx;
176 
177     if (!device->csmt_active)
178         return;
179 
180     if (nine_queue_isempty(ctx->pool))
181         return;
182 
183     DBG("device=%p\n", device);
184 
185     /* NOP */
186     instr = nine_queue_alloc(ctx->pool, sizeof(struct csmt_instruction));
187     assert(instr);
188     instr->func = nop_func;
189 
190     p_atomic_set(&ctx->processed, false);
191     nine_queue_flush(ctx->pool);
192 
193     nine_csmt_wait_processed(ctx);
194 }
195 
196 void
nine_csmt_flush(struct NineDevice9 * device)197 nine_csmt_flush( struct NineDevice9* device )
198 {
199     if (!device->csmt_active)
200         return;
201 
202     nine_queue_flush(device->csmt_ctx->pool);
203 }
204 
205 
206 /* Destroys a CSMT context.
207  * Waits for the worker thread to terminate.
208  */
209 void
nine_csmt_destroy(struct NineDevice9 * device,struct csmt_context * ctx)210 nine_csmt_destroy( struct NineDevice9 *device, struct csmt_context *ctx )
211 {
212     struct csmt_instruction* instr;
213     thrd_t render_thread = ctx->worker;
214 
215     DBG("device=%p ctx=%p\n", device, ctx);
216 
217     /* Push nop and flush the queue. */
218     instr = nine_queue_alloc(ctx->pool, sizeof(struct csmt_instruction));
219     assert(instr);
220     instr->func = nop_func;
221 
222     p_atomic_set(&ctx->processed, false);
223     /* Signal worker to terminate. */
224     p_atomic_set(&ctx->terminate, true);
225     nine_queue_flush(ctx->pool);
226 
227     nine_csmt_wait_processed(ctx);
228     nine_queue_delete(ctx->pool);
229 
230     mtx_destroy(&ctx->thread_resume);
231     mtx_destroy(&ctx->thread_running);
232 
233     mtx_destroy(&ctx->mutex_processed);
234     cnd_destroy(&ctx->event_processed);
235 
236     FREE(ctx);
237 
238     thrd_join(render_thread, NULL);
239 }
240 
241 static void
nine_csmt_pause(struct NineDevice9 * device)242 nine_csmt_pause( struct NineDevice9 *device )
243 {
244     struct csmt_context *ctx = device->csmt_ctx;
245 
246     if (!device->csmt_active)
247         return;
248 
249     /* No need to pause the thread */
250     if (nine_queue_no_flushed_work(ctx->pool))
251         return;
252 
253     mtx_lock(&ctx->thread_resume);
254     p_atomic_set(&ctx->toPause, true);
255 
256     /* Wait the thread is paused */
257     mtx_lock(&ctx->thread_running);
258     ctx->hasPaused = true;
259     p_atomic_set(&ctx->toPause, false);
260 }
261 
262 static void
nine_csmt_resume(struct NineDevice9 * device)263 nine_csmt_resume( struct NineDevice9 *device )
264 {
265     struct csmt_context *ctx = device->csmt_ctx;
266 
267     if (!device->csmt_active)
268         return;
269 
270     if (!ctx->hasPaused)
271         return;
272 
273     ctx->hasPaused = false;
274     mtx_unlock(&ctx->thread_running);
275     mtx_unlock(&ctx->thread_resume);
276 }
277 
278 struct pipe_context *
nine_context_get_pipe(struct NineDevice9 * device)279 nine_context_get_pipe( struct NineDevice9 *device )
280 {
281     nine_csmt_process(device);
282     return device->context.pipe;
283 }
284 
285 struct pipe_context *
nine_context_get_pipe_multithread(struct NineDevice9 * device)286 nine_context_get_pipe_multithread( struct NineDevice9 *device )
287 {
288     struct csmt_context *ctx = device->csmt_ctx;
289 
290     if (!device->csmt_active)
291         return device->context.pipe;
292 
293     if (!u_thread_is_self(ctx->worker))
294         nine_csmt_process(device);
295 
296     return device->context.pipe;
297 }
298 
299 struct pipe_context *
nine_context_get_pipe_acquire(struct NineDevice9 * device)300 nine_context_get_pipe_acquire( struct NineDevice9 *device )
301 {
302     nine_csmt_pause(device);
303     return device->context.pipe;
304 }
305 
306 void
nine_context_get_pipe_release(struct NineDevice9 * device)307 nine_context_get_pipe_release( struct NineDevice9 *device )
308 {
309     nine_csmt_resume(device);
310 }
311 
312 bool
nine_context_is_worker(struct NineDevice9 * device)313 nine_context_is_worker( struct NineDevice9 *device )
314 {
315     struct csmt_context *ctx = device->csmt_ctx;
316 
317     if (!device->csmt_active)
318         return false;
319 
320     return u_thread_is_self(ctx->worker);
321 }
322 
323 /* Nine state functions */
324 
325 /* Check if some states need to be set dirty */
326 
327 static inline DWORD
check_multisample(struct NineDevice9 * device)328 check_multisample(struct NineDevice9 *device)
329 {
330     struct nine_context *context = &device->context;
331     DWORD *rs = context->rs;
332     struct NineSurface9 *rt0 = context->rt[0];
333     bool multisampled_target;
334     DWORD new_value;
335 
336     multisampled_target = rt0 && rt0->desc.MultiSampleType >= 1;
337     if (rt0 && rt0->desc.Format == D3DFMT_NULL && context->ds)
338         multisampled_target = context->ds->desc.MultiSampleType >= 1;
339     new_value = (multisampled_target && rs[D3DRS_MULTISAMPLEANTIALIAS]) ? 1 : 0;
340     if (rs[NINED3DRS_MULTISAMPLE] != new_value) {
341         rs[NINED3DRS_MULTISAMPLE] = new_value;
342         return NINE_STATE_RASTERIZER;
343     }
344     return 0;
345 }
346 
347 /* State preparation only */
348 
349 static inline void
prepare_blend(struct NineDevice9 * device)350 prepare_blend(struct NineDevice9 *device)
351 {
352     nine_convert_blend_state(&device->context.pipe_data.blend, device->context.rs);
353     device->context.commit |= NINE_STATE_COMMIT_BLEND;
354 }
355 
356 static inline void
prepare_dsa(struct NineDevice9 * device)357 prepare_dsa(struct NineDevice9 *device)
358 {
359     nine_convert_dsa_state(&device->context.pipe_data.dsa, device->context.rs);
360     device->context.commit |= NINE_STATE_COMMIT_DSA;
361 }
362 
363 static inline void
prepare_rasterizer(struct NineDevice9 * device)364 prepare_rasterizer(struct NineDevice9 *device)
365 {
366     nine_convert_rasterizer_state(device, &device->context.pipe_data.rast, device->context.rs);
367     device->context.commit |= NINE_STATE_COMMIT_RASTERIZER;
368 }
369 
370 static void
prepare_vs_constants_userbuf_swvp(struct NineDevice9 * device)371 prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device)
372 {
373     struct nine_context *context = &device->context;
374 
375     if (device->driver_caps.emulate_ucp) {
376         /* TODO: Avoid memcpy all time by storing directly into the array */
377         memcpy(&context->vs_const_f[4 * NINE_MAX_CONST_SWVP_SPE_OFFSET], &context->clip.ucp, sizeof(context->clip));
378         context->changed.vs_const_f = 1; /* TODO optimize */
379     }
380 
381     if (device->driver_caps.always_output_pointsize) {
382         context->vs_const_f[4 * (NINE_MAX_CONST_SWVP_SPE_OFFSET + 8)] =
383             CLAMP(asfloat(context->rs[D3DRS_POINTSIZE]),
384                 asfloat(context->rs[D3DRS_POINTSIZE_MIN]),
385                 asfloat(context->rs[D3DRS_POINTSIZE_MAX]));
386         context->changed.vs_const_f = 1; /* TODO optimize */
387     }
388 
389     if (context->changed.vs_const_f || context->changed.group & NINE_STATE_SWVP) {
390         struct pipe_constant_buffer cb;
391 
392         cb.buffer_offset = 0;
393         cb.buffer_size = 4096 * sizeof(float[4]);
394         cb.user_buffer = context->vs_const_f_swvp;
395 
396         if (context->vs->lconstf.ranges) {
397             const struct nine_lconstf *lconstf = &(context->vs->lconstf);
398             const struct nine_range *r = lconstf->ranges;
399             unsigned n = 0;
400             float *dst = context->vs_lconstf_temp;
401             float *src = (float *)cb.user_buffer;
402             memcpy(dst, src, cb.buffer_size);
403             while (r) {
404                 unsigned p = r->bgn;
405                 unsigned c = r->end - r->bgn;
406                 memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
407                 n += c;
408                 r = r->next;
409             }
410             cb.user_buffer = dst;
411         }
412 
413         context->pipe_data.cb0_swvp.buffer_offset = cb.buffer_offset;
414         context->pipe_data.cb0_swvp.buffer_size = cb.buffer_size;
415         context->pipe_data.cb0_swvp.user_buffer = cb.user_buffer;
416 
417         cb.user_buffer = (int8_t *)cb.user_buffer + 4096 * sizeof(float[4]);
418         context->pipe_data.cb1_swvp.buffer_offset = cb.buffer_offset;
419         context->pipe_data.cb1_swvp.buffer_size = cb.buffer_size;
420         context->pipe_data.cb1_swvp.user_buffer = cb.user_buffer;
421 
422         context->changed.vs_const_f = 0;
423     }
424 
425     if (context->changed.vs_const_i || context->changed.group & NINE_STATE_SWVP) {
426         struct pipe_constant_buffer cb;
427 
428         cb.buffer_offset = 0;
429         cb.buffer_size = 2048 * sizeof(float[4]);
430         cb.user_buffer = context->vs_const_i;
431 
432         context->pipe_data.cb2_swvp.buffer_offset = cb.buffer_offset;
433         context->pipe_data.cb2_swvp.buffer_size = cb.buffer_size;
434         context->pipe_data.cb2_swvp.user_buffer = cb.user_buffer;
435         context->changed.vs_const_i = 0;
436     }
437 
438     if (context->changed.vs_const_b || context->changed.group & NINE_STATE_SWVP) {
439         struct pipe_constant_buffer cb;
440 
441         cb.buffer_offset = 0;
442         cb.buffer_size = 512 * sizeof(float[4]);
443         cb.user_buffer = context->vs_const_b;
444 
445         context->pipe_data.cb3_swvp.buffer_offset = cb.buffer_offset;
446         context->pipe_data.cb3_swvp.buffer_size = cb.buffer_size;
447         context->pipe_data.cb3_swvp.user_buffer = cb.user_buffer;
448         context->changed.vs_const_b = 0;
449     }
450 
451     context->changed.group &= ~NINE_STATE_VS_CONST;
452     context->commit |= NINE_STATE_COMMIT_CONST_VS;
453 }
454 
455 static void
prepare_vs_constants_userbuf(struct NineDevice9 * device)456 prepare_vs_constants_userbuf(struct NineDevice9 *device)
457 {
458     struct nine_context *context = &device->context;
459     uint8_t *upload_ptr = NULL;
460     struct pipe_constant_buffer cb;
461     cb.buffer = NULL;
462     cb.buffer_offset = 0;
463     cb.buffer_size = context->cso_shader.vs_const_used_size;
464     cb.user_buffer = context->vs_const_f;
465 
466     if (context->swvp) {
467         prepare_vs_constants_userbuf_swvp(device);
468         return;
469     }
470 
471     if (device->driver_caps.emulate_ucp) {
472         /* TODO: Avoid memcpy all time by storing directly into the array */
473         memcpy(&context->vs_const_f[4 * NINE_MAX_CONST_VS_SPE_OFFSET], &context->clip.ucp, sizeof(context->clip));
474     }
475     if (device->driver_caps.always_output_pointsize) {
476         context->vs_const_f[4 * (NINE_MAX_CONST_VS_SPE_OFFSET + 8)] =
477             CLAMP(asfloat(context->rs[D3DRS_POINTSIZE]),
478                 asfloat(context->rs[D3DRS_POINTSIZE_MIN]),
479                 asfloat(context->rs[D3DRS_POINTSIZE_MAX]));
480     }
481 
482     if (context->changed.vs_const_i || context->changed.group & NINE_STATE_SWVP) {
483         int *idst = (int *)&context->vs_const_f[4 * device->max_vs_const_f];
484         memcpy(idst, context->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4]));
485         context->changed.vs_const_i = 0;
486     }
487 
488     if (context->changed.vs_const_b || context->changed.group & NINE_STATE_SWVP) {
489         int *idst = (int *)&context->vs_const_f[4 * device->max_vs_const_f];
490         uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
491         memcpy(bdst, context->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL));
492         context->changed.vs_const_b = 0;
493     }
494 
495     if (!cb.buffer_size)
496         return;
497 
498     if (context->vs->lconstf.ranges) {
499         /* TODO: Can we make it so that we don't have to copy everything ? */
500         const struct nine_lconstf *lconstf =  &(context->vs->lconstf);
501         const struct nine_range *r = lconstf->ranges;
502         unsigned n = 0;
503         float *dst = context->vs_lconstf_temp;
504         float *src = (float *)cb.user_buffer;
505         memcpy(dst, src, cb.buffer_size);
506         while (r) {
507             unsigned p = r->bgn;
508             unsigned c = r->end - r->bgn;
509             memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
510             n += c;
511             r = r->next;
512         }
513         cb.user_buffer = dst;
514     }
515 
516     /* Note: We probably don't want to do separate memcpy to
517      * upload_ptr directly, if we have to copy some constants
518      * at random locations (context->vs->lconstf.ranges),
519      * to have efficient WC. Thus for this case we really want
520      * that intermediate buffer. */
521 
522     u_upload_alloc(context->pipe->const_uploader,
523                   0,
524                   cb.buffer_size,
525                   256, /* Be conservative about alignment */
526                   &(cb.buffer_offset),
527                   &(cb.buffer),
528                   (void**)&upload_ptr);
529 
530     assert(cb.buffer && upload_ptr);
531 
532     if (!context->cso_shader.vs_const_ranges) {
533         memcpy(upload_ptr, cb.user_buffer, cb.buffer_size);
534     } else {
535         unsigned i = 0;
536         unsigned offset = 0;
537         while (context->cso_shader.vs_const_ranges[i*2+1] != 0) {
538             memcpy(upload_ptr+offset,
539                    &((float*)cb.user_buffer)[4*context->cso_shader.vs_const_ranges[i*2]],
540                    context->cso_shader.vs_const_ranges[i*2+1] * sizeof(float[4]));
541             offset += context->cso_shader.vs_const_ranges[i*2+1] * sizeof(float[4]);
542             i++;
543         }
544     }
545 
546     u_upload_unmap(context->pipe->const_uploader);
547     cb.user_buffer = NULL;
548 
549     /* Free previous resource */
550     pipe_resource_reference(&context->pipe_data.cb_vs.buffer, NULL);
551 
552     context->pipe_data.cb_vs = cb;
553     context->changed.vs_const_f = 0;
554 
555     context->changed.group &= ~NINE_STATE_VS_CONST;
556     context->commit |= NINE_STATE_COMMIT_CONST_VS;
557 }
558 
559 static void
prepare_ps_constants_userbuf(struct NineDevice9 * device)560 prepare_ps_constants_userbuf(struct NineDevice9 *device)
561 {
562     struct nine_context *context = &device->context;
563     uint8_t *upload_ptr = NULL;
564     struct pipe_constant_buffer cb;
565     cb.buffer = NULL;
566     cb.buffer_offset = 0;
567     cb.buffer_size = context->cso_shader.ps_const_used_size;
568     cb.user_buffer = context->ps_const_f;
569 
570     if (context->changed.ps_const_i) {
571         int *idst = (int *)&context->ps_const_f[4 * NINE_MAX_CONST_F_PS3];
572         memcpy(idst, context->ps_const_i, sizeof(context->ps_const_i));
573         context->changed.ps_const_i = 0;
574     }
575     if (context->changed.ps_const_b) {
576         int *idst = (int *)&context->ps_const_f[4 * NINE_MAX_CONST_F_PS3];
577         uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
578         memcpy(bdst, context->ps_const_b, sizeof(context->ps_const_b));
579         context->changed.ps_const_b = 0;
580     }
581 
582     /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */
583     if (context->ps->bumpenvmat_needed)
584         memcpy(&context->ps_const_f[4 * NINE_MAX_CONST_PS_SPE_OFFSET], &device->context.bumpmap_vars, sizeof(device->context.bumpmap_vars));
585 
586     if (context->ps->byte_code.version < 0x30 &&
587         context->rs[D3DRS_FOGENABLE]) {
588         float *dst = &context->ps_const_f[4 * (NINE_MAX_CONST_PS_SPE_OFFSET + 12)];
589 
590         d3dcolor_to_rgba(dst, context->rs[D3DRS_FOGCOLOR]);
591         if (context->rs[D3DRS_FOGTABLEMODE] == D3DFOG_LINEAR) {
592             dst[4] = asfloat(context->rs[D3DRS_FOGEND]);
593             dst[5] = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
594         } else if (context->rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE) {
595             dst[4] = asfloat(context->rs[D3DRS_FOGDENSITY]);
596         }
597     }
598 
599     context->ps_const_f[4 * (NINE_MAX_CONST_PS_SPE_OFFSET + 14)] = context->rs[D3DRS_ALPHAREF] / 255.f;
600 
601     if (!cb.buffer_size)
602         return;
603 
604     u_upload_alloc(context->pipe->const_uploader,
605                   0,
606                   cb.buffer_size,
607                   256, /* Be conservative about alignment */
608                   &(cb.buffer_offset),
609                   &(cb.buffer),
610                   (void**)&upload_ptr);
611 
612     assert(cb.buffer && upload_ptr);
613 
614     if (!context->cso_shader.ps_const_ranges) {
615         memcpy(upload_ptr, cb.user_buffer, cb.buffer_size);
616     } else {
617         unsigned i = 0;
618         unsigned offset = 0;
619         while (context->cso_shader.ps_const_ranges[i*2+1] != 0) {
620             memcpy(upload_ptr+offset,
621                    &((float*)cb.user_buffer)[4*context->cso_shader.ps_const_ranges[i*2]],
622                    context->cso_shader.ps_const_ranges[i*2+1] * sizeof(float[4]));
623             offset += context->cso_shader.ps_const_ranges[i*2+1] * sizeof(float[4]);
624             i++;
625         }
626     }
627 
628     u_upload_unmap(context->pipe->const_uploader);
629     cb.user_buffer = NULL;
630 
631     /* Free previous resource */
632     pipe_resource_reference(&context->pipe_data.cb_ps.buffer, NULL);
633 
634     context->pipe_data.cb_ps = cb;
635     context->changed.ps_const_f = 0;
636 
637     context->changed.group &= ~NINE_STATE_PS_CONST;
638     context->commit |= NINE_STATE_COMMIT_CONST_PS;
639 }
640 
641 static inline uint32_t
prepare_vs(struct NineDevice9 * device,uint8_t shader_changed)642 prepare_vs(struct NineDevice9 *device, uint8_t shader_changed)
643 {
644     struct nine_context *context = &device->context;
645     struct NineVertexShader9 *vs = context->vs;
646     uint32_t changed_group = 0;
647     int has_key_changed = 0;
648 
649     if (likely(context->programmable_vs))
650         has_key_changed = NineVertexShader9_UpdateKey(vs, device);
651 
652     if (!shader_changed && !has_key_changed)
653         return 0;
654 
655     /* likely because we dislike FF */
656     if (likely(context->programmable_vs)) {
657         context->cso_shader.vs = NineVertexShader9_GetVariant(vs,
658                                                               &context->cso_shader.vs_const_ranges,
659                                                               &context->cso_shader.vs_const_used_size);
660     } else {
661         vs = device->ff.vs;
662         context->cso_shader.vs = vs->ff_cso;
663     }
664 
665     if (context->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) {
666         context->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size;
667         changed_group |= NINE_STATE_RASTERIZER;
668     }
669     if (context->rs[NINED3DRS_POSITIONT] != vs->position_t) {
670         context->rs[NINED3DRS_POSITIONT] = vs->position_t;
671         if (!device->driver_caps.window_space_position_support &&
672             device->driver_caps.disabling_depth_clipping_support)
673             changed_group |= NINE_STATE_RASTERIZER;
674     }
675 
676     if ((context->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask)
677         /* Bound dummy sampler. */
678         changed_group |= NINE_STATE_SAMPLER;
679 
680     context->commit |= NINE_STATE_COMMIT_VS;
681     return changed_group;
682 }
683 
684 static inline uint32_t
prepare_ps(struct NineDevice9 * device,uint8_t shader_changed)685 prepare_ps(struct NineDevice9 *device, uint8_t shader_changed)
686 {
687     struct nine_context *context = &device->context;
688     struct NinePixelShader9 *ps = context->ps;
689     uint32_t changed_group = 0;
690     int has_key_changed = 0;
691 
692     if (likely(ps))
693         has_key_changed = NinePixelShader9_UpdateKey(ps, context);
694 
695     if (!shader_changed && !has_key_changed)
696         return 0;
697 
698     if (likely(ps)) {
699         context->cso_shader.ps = NinePixelShader9_GetVariant(ps,
700                                                              &context->cso_shader.ps_const_ranges,
701                                                              &context->cso_shader.ps_const_used_size);
702     } else {
703         ps = device->ff.ps;
704         context->cso_shader.ps = ps->ff_cso;
705     }
706 
707     if ((context->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask)
708         /* Bound dummy sampler. */
709         changed_group |= NINE_STATE_SAMPLER;
710 
711     context->commit |= NINE_STATE_COMMIT_PS;
712     return changed_group;
713 }
714 
715 /* State preparation incremental */
716 
717 /* State preparation + State commit */
718 
719 static void
update_framebuffer(struct NineDevice9 * device,bool is_clear)720 update_framebuffer(struct NineDevice9 *device, bool is_clear)
721 {
722     struct nine_context *context = &device->context;
723     struct pipe_context *pipe = context->pipe;
724     struct pipe_framebuffer_state *fb = &context->pipe_data.fb;
725     unsigned i;
726     struct NineSurface9 *rt0 = context->rt[0];
727     unsigned w = rt0->desc.Width;
728     unsigned h = rt0->desc.Height;
729     unsigned nr_samples = rt0->base.info.nr_samples;
730     unsigned ps_mask = context->ps ? context->ps->rt_mask : 1;
731     unsigned mask = is_clear ? 0xf : ps_mask;
732     const int sRGB = context->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
733 
734     DBG("\n");
735 
736     context->rt_mask = 0x0;
737     fb->nr_cbufs = 0;
738 
739     /* all render targets must have the same size and the depth buffer must be
740      * bigger. Multisample has to match, according to spec. But some apps do
741      * things wrong there, and no error is returned. The behaviour they get
742      * apparently is that depth buffer is disabled if it doesn't match.
743      * Surely the same for render targets. */
744 
745     /* Special case: D3DFMT_NULL is used to bound no real render target,
746      * but render to depth buffer. We have to not take into account the render
747      * target info. TODO: know what should happen when there are several render targets
748      * and the first one is D3DFMT_NULL */
749     if (rt0->desc.Format == D3DFMT_NULL && context->ds) {
750         w = context->ds->desc.Width;
751         h = context->ds->desc.Height;
752         nr_samples = context->ds->base.info.nr_samples;
753     }
754 
755     for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
756         struct NineSurface9 *rt = context->rt[i];
757 
758         if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) &&
759             rt->desc.Width == w && rt->desc.Height == h &&
760             rt->base.info.nr_samples == nr_samples) {
761             fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB);
762             context->rt_mask |= 1 << i;
763             fb->nr_cbufs = i + 1;
764         } else {
765             /* Color outputs must match RT slot,
766              * drivers will have to handle NULL entries for GL, too.
767              */
768             fb->cbufs[i] = NULL;
769         }
770     }
771 
772     if (context->ds && context->ds->desc.Width >= w &&
773         context->ds->desc.Height >= h &&
774         context->ds->base.info.nr_samples == nr_samples) {
775         fb->zsbuf = NineSurface9_GetSurface(context->ds, 0);
776     } else {
777         fb->zsbuf = NULL;
778     }
779 
780     fb->width = w;
781     fb->height = h;
782 
783     pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */
784 
785     if (is_clear && context->rt_mask == ps_mask)
786         context->changed.group &= ~NINE_STATE_FB;
787 }
788 
789 static void
update_viewport(struct NineDevice9 * device)790 update_viewport(struct NineDevice9 *device)
791 {
792     struct nine_context *context = &device->context;
793     const D3DVIEWPORT9 *vport = &context->viewport;
794     struct pipe_viewport_state pvport;
795 
796     /* D3D coordinates are:
797      * -1 .. +1 for X,Y and
798      *  0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz)
799      */
800     pvport.scale[0] = (float)vport->Width * 0.5f;
801     pvport.scale[1] = (float)vport->Height * -0.5f;
802     pvport.scale[2] = vport->MaxZ - vport->MinZ;
803     pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X;
804     pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y;
805     pvport.translate[2] = vport->MinZ;
806     pvport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X;
807     pvport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y;
808     pvport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z;
809     pvport.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W;
810 
811     /* We found R600 and SI cards have some imprecision
812      * on the barycentric coordinates used for interpolation.
813      * Some shaders rely on having something precise.
814      * We found that the proprietary driver has the imprecision issue,
815      * except when the render target width and height are powers of two.
816      * It is using some sort of workaround for these cases
817      * which covers likely all the cases the applications rely
818      * on something precise.
819      * We haven't found the workaround, but it seems like it's better
820      * for applications if the imprecision is biased towards infinity
821      * instead of -infinity (which is what measured). So shift slightly
822      * the viewport: not enough to change rasterization result (in particular
823      * for multisampling), but enough to make the imprecision biased
824      * towards infinity. We do this shift only if render target width and
825      * height are powers of two.
826      * Solves 'red shadows' bug on UE3 games.
827      */
828     if (device->driver_bugs.buggy_barycentrics &&
829         ((vport->Width & (vport->Width-1)) == 0) &&
830         ((vport->Height & (vport->Height-1)) == 0)) {
831         pvport.translate[0] -= 1.0f / 128.0f;
832         pvport.translate[1] -= 1.0f / 128.0f;
833     }
834 
835     cso_set_viewport(context->cso, &pvport);
836 }
837 
838 /* Loop through VS inputs and pick the vertex elements with the declared
839  * usage from the vertex declaration, then insert the instance divisor from
840  * the stream source frequency setting.
841  */
842 static void
update_vertex_elements(struct NineDevice9 * device)843 update_vertex_elements(struct NineDevice9 *device)
844 {
845     struct nine_context *context = &device->context;
846     const struct NineVertexDeclaration9 *vdecl = device->context.vdecl;
847     const struct NineVertexShader9 *vs;
848     unsigned n, b, i;
849     int index;
850     int8_t vdecl_index_map[16]; /* vs->num_inputs <= 16 */
851     uint16_t used_streams = 0;
852     int dummy_vbo_stream = -1;
853     BOOL need_dummy_vbo = false;
854     struct cso_velems_state ve;
855     unsigned vtxbuf_mask;
856     unsigned vtxbuf_holes_map[PIPE_MAX_ATTRIBS];
857 
858     context->stream_usage_mask = 0;
859     memset(vdecl_index_map, -1, 16);
860     vs = context->programmable_vs ? context->vs : device->ff.vs;
861 
862     if (vdecl) {
863         for (n = 0; n < vs->num_inputs; ++n) {
864             DBG("looking up input %u (usage %u) from vdecl(%p)\n",
865                 n, vs->input_map[n].ndecl, vdecl);
866 
867             for (i = 0; i < vdecl->nelems; i++) {
868                 if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
869                     vdecl_index_map[n] = i;
870                     used_streams |= BITFIELD_BIT(vdecl->elems[i].vertex_buffer_index);
871                     break;
872                 }
873             }
874             if (vdecl_index_map[n] < 0)
875                 need_dummy_vbo = true;
876         }
877     } else {
878         /* No vertex declaration. Likely will never happen in practice,
879          * but we need not crash on this */
880         need_dummy_vbo = true;
881     }
882 
883     if (need_dummy_vbo) {
884         u_foreach_bit(bit, BITFIELD_MASK(device->caps.MaxStreams) & ~used_streams) {
885             dummy_vbo_stream = bit;
886             break;
887         }
888     }
889     /* there are fewer vertex shader inputs than stream slots,
890      * so if we need a slot for the dummy vbo, we should have found one */
891     assert (!need_dummy_vbo || dummy_vbo_stream != -1);
892 
893     /* calculate vtxbuf_holes_map to match for update_vertex_buffers() function */
894     i = 0;
895     vtxbuf_mask = context->vtxbuf_mask |
896         (need_dummy_vbo ? BITFIELD_BIT(dummy_vbo_stream) : 0);
897     u_foreach_bit(bit, vtxbuf_mask)
898         vtxbuf_holes_map[bit] = i++;
899 
900     for (n = 0; n < vs->num_inputs; ++n) {
901         index = vdecl_index_map[n];
902         if (index >= 0) {
903             ve.velems[n] = vdecl->elems[index];
904             b = ve.velems[n].vertex_buffer_index;
905             ve.velems[n].vertex_buffer_index = vtxbuf_holes_map[b];
906             ve.velems[n].src_stride = context->vtxstride[b];
907             context->stream_usage_mask |= 1 << b;
908             /* XXX wine just uses 1 here: */
909             if (context->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
910                 ve.velems[n].instance_divisor = context->stream_freq[b] & 0x7FFFFF;
911         } else {
912             /* if the vertex declaration is incomplete compared to what the
913              * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
914              * This is not precised by the spec, but is the behaviour
915              * tested on win */
916             ve.velems[n].vertex_buffer_index = vtxbuf_holes_map[dummy_vbo_stream];
917             ve.velems[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
918             ve.velems[n].src_offset = 0;
919             ve.velems[n].src_stride = 0;
920             ve.velems[n].instance_divisor = 0;
921             ve.velems[n].dual_slot = false;
922         }
923     }
924 
925     if (context->dummy_vbo_bound_at != dummy_vbo_stream) {
926         if (context->dummy_vbo_bound_at >= 0)
927             context->changed.vtxbuf |= 1 << context->dummy_vbo_bound_at;
928         if (dummy_vbo_stream >= 0)
929             context->changed.vtxbuf |= 1 << dummy_vbo_stream;
930         context->dummy_vbo_bound_at = dummy_vbo_stream;
931     }
932 
933     ve.count = vs->num_inputs;
934     cso_set_vertex_elements(context->cso, &ve);
935 }
936 
937 static void
update_vertex_buffers(struct NineDevice9 * device)938 update_vertex_buffers(struct NineDevice9 *device)
939 {
940     struct nine_context *context = &device->context;
941     struct pipe_context *pipe = context->pipe;
942     struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
943     unsigned vtxbuf_count;
944     unsigned mask, i, vtxbuf_i;
945 
946     mask = context->vtxbuf_mask |
947         ((context->dummy_vbo_bound_at >= 0) ? BITFIELD_BIT(context->dummy_vbo_bound_at) : 0);
948     vtxbuf_count = util_bitcount(mask);
949 
950     DBG("mask=%x\n", mask);
951     for (i = 0; mask; i++) {
952         vtxbuf_i = u_bit_scan(&mask);
953         if (vtxbuf_i == context->dummy_vbo_bound_at) {
954             vbuffer[i].buffer.resource = device->dummy_vbo;
955             vbuffer[i].is_user_buffer = false;
956             vbuffer[i].buffer_offset = 0;
957         } else {
958             memcpy(&vbuffer[i], &context->vtxbuf[vtxbuf_i], sizeof(struct pipe_vertex_buffer));
959         }
960     }
961 
962     if (vtxbuf_count)
963         util_set_vertex_buffers(pipe, vtxbuf_count, false, vbuffer);
964     else
965         pipe->set_vertex_buffers(pipe, 0, NULL);
966 
967     context->last_vtxbuf_count = vtxbuf_count;
968     context->changed.vtxbuf = 0;
969 }
970 
971 static inline bool
update_sampler_derived(struct nine_context * context,unsigned s)972 update_sampler_derived(struct nine_context *context, unsigned s)
973 {
974     bool changed = false;
975 
976     if (context->samp[s][NINED3DSAMP_SHADOW] != context->texture[s].shadow) {
977         changed = true;
978         context->samp[s][NINED3DSAMP_SHADOW] = context->texture[s].shadow;
979     }
980 
981     if (context->samp[s][NINED3DSAMP_CUBETEX] !=
982         (context->texture[s].type == D3DRTYPE_CUBETEXTURE)) {
983         changed = true;
984         context->samp[s][NINED3DSAMP_CUBETEX] =
985                 context->texture[s].type == D3DRTYPE_CUBETEXTURE;
986     }
987 
988     if (context->samp[s][D3DSAMP_MIPFILTER] != D3DTEXF_NONE) {
989         int lod = context->samp[s][D3DSAMP_MAXMIPLEVEL] - context->texture[s].lod;
990         if (lod < 0)
991             lod = 0;
992         if (context->samp[s][NINED3DSAMP_MINLOD] != lod) {
993             changed = true;
994             context->samp[s][NINED3DSAMP_MINLOD] = lod;
995         }
996     } else {
997         context->changed.sampler[s] &= ~0x300; /* lod changes irrelevant */
998     }
999 
1000     return changed;
1001 }
1002 
1003 /* TODO: add sRGB override to pipe_sampler_state ? */
1004 static void
update_textures_and_samplers(struct NineDevice9 * device)1005 update_textures_and_samplers(struct NineDevice9 *device)
1006 {
1007     struct nine_context *context = &device->context;
1008     struct pipe_context *pipe = context->pipe;
1009     struct pipe_sampler_view *view[NINE_MAX_SAMPLERS];
1010     unsigned num_textures = 0;
1011     bool commit_samplers;
1012     uint16_t sampler_mask = context->ps ? context->ps->sampler_mask :
1013                             device->ff.ps->sampler_mask;
1014 
1015     commit_samplers = false;
1016     const uint16_t ps_mask = sampler_mask | context->enabled_samplers_mask_ps;
1017     context->bound_samplers_mask_ps = ps_mask;
1018     num_textures = util_last_bit(ps_mask);
1019     /* iterate over the enabled samplers */
1020     u_foreach_bit(i, context->enabled_samplers_mask_ps) {
1021         const unsigned s = NINE_SAMPLER_PS(i);
1022         int sRGB = context->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0;
1023 
1024         view[i] = context->texture[s].view[sRGB];
1025 
1026         if (update_sampler_derived(context, s) || (context->changed.sampler[s] & 0x05fe)) {
1027             context->changed.sampler[s] = 0;
1028             commit_samplers = true;
1029             nine_convert_sampler_state(context->cso, s, context->samp[s]);
1030         }
1031     }
1032     /* iterate over the dummy samplers */
1033     u_foreach_bit(i, sampler_mask & ~context->enabled_samplers_mask_ps) {
1034         const unsigned s = NINE_SAMPLER_PS(i);
1035         /* Bind dummy sampler. We do not bind dummy sampler when
1036          * it is not needed because it could add overhead. The
1037          * dummy sampler should have r=g=b=0 and a=1. We do not
1038          * unbind dummy sampler directly when they are not needed
1039          * anymore, but they're going to be removed as long as texture
1040          * or sampler states are changed. */
1041         view[i] = device->dummy_sampler_view;
1042 
1043         cso_single_sampler(context->cso, PIPE_SHADER_FRAGMENT,
1044                            s - NINE_SAMPLER_PS(0), &device->dummy_sampler_state);
1045 
1046         commit_samplers = true;
1047         context->changed.sampler[s] = ~0;
1048     }
1049     /* fill in unused samplers */
1050     u_foreach_bit(i, BITFIELD_MASK(num_textures) & ~ps_mask)
1051        view[i] = NULL;
1052 
1053     pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, num_textures,
1054                             num_textures < context->enabled_sampler_count_ps ? context->enabled_sampler_count_ps - num_textures : 0,
1055                             false, view);
1056     context->enabled_sampler_count_ps = num_textures;
1057 
1058     if (commit_samplers)
1059         cso_single_sampler_done(context->cso, PIPE_SHADER_FRAGMENT);
1060 
1061     commit_samplers = false;
1062     sampler_mask = context->programmable_vs ? context->vs->sampler_mask : 0;
1063     const uint16_t vs_mask = sampler_mask | context->enabled_samplers_mask_vs;
1064     context->bound_samplers_mask_vs = vs_mask;
1065     num_textures = util_last_bit(vs_mask);
1066     u_foreach_bit(i, context->enabled_samplers_mask_vs) {
1067         const unsigned s = NINE_SAMPLER_VS(i);
1068         int sRGB = context->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0;
1069 
1070         view[i] = context->texture[s].view[sRGB];
1071 
1072         if (update_sampler_derived(context, s) || (context->changed.sampler[s] & 0x05fe)) {
1073             context->changed.sampler[s] = 0;
1074             commit_samplers = true;
1075             nine_convert_sampler_state(context->cso, s, context->samp[s]);
1076         }
1077     }
1078     u_foreach_bit(i, sampler_mask & ~context->enabled_samplers_mask_vs) {
1079         const unsigned s = NINE_SAMPLER_VS(i);
1080         /* Bind dummy sampler. We do not bind dummy sampler when
1081          * it is not needed because it could add overhead. The
1082          * dummy sampler should have r=g=b=0 and a=1. We do not
1083          * unbind dummy sampler directly when they are not needed
1084          * anymore, but they're going to be removed as long as texture
1085          * or sampler states are changed. */
1086         view[i] = device->dummy_sampler_view;
1087 
1088         cso_single_sampler(context->cso, PIPE_SHADER_VERTEX,
1089                            s - NINE_SAMPLER_VS(0), &device->dummy_sampler_state);
1090 
1091         commit_samplers = true;
1092         context->changed.sampler[s] = ~0;
1093     }
1094     /* fill in unused samplers */
1095     u_foreach_bit(i, BITFIELD_MASK(num_textures) & ~vs_mask)
1096        view[i] = NULL;
1097 
1098     pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, num_textures,
1099                             num_textures < context->enabled_sampler_count_vs ? context->enabled_sampler_count_vs - num_textures : 0,
1100                             false, view);
1101     context->enabled_sampler_count_vs = num_textures;
1102 
1103     if (commit_samplers)
1104         cso_single_sampler_done(context->cso, PIPE_SHADER_VERTEX);
1105 }
1106 
1107 /* State commit only */
1108 
1109 static inline void
commit_blend(struct NineDevice9 * device)1110 commit_blend(struct NineDevice9 *device)
1111 {
1112     struct nine_context *context = &device->context;
1113 
1114     cso_set_blend(context->cso, &context->pipe_data.blend);
1115 }
1116 
1117 static inline void
commit_dsa(struct NineDevice9 * device)1118 commit_dsa(struct NineDevice9 *device)
1119 {
1120     struct nine_context *context = &device->context;
1121 
1122     cso_set_depth_stencil_alpha(context->cso, &context->pipe_data.dsa);
1123 }
1124 
1125 static inline void
commit_scissor(struct NineDevice9 * device)1126 commit_scissor(struct NineDevice9 *device)
1127 {
1128     struct nine_context *context = &device->context;
1129     struct pipe_context *pipe = context->pipe;
1130 
1131     pipe->set_scissor_states(pipe, 0, 1, &context->scissor);
1132 }
1133 
1134 static inline void
commit_rasterizer(struct NineDevice9 * device)1135 commit_rasterizer(struct NineDevice9 *device)
1136 {
1137     struct nine_context *context = &device->context;
1138 
1139     cso_set_rasterizer(context->cso, &context->pipe_data.rast);
1140 }
1141 
1142 static inline void
commit_vs_constants(struct NineDevice9 * device)1143 commit_vs_constants(struct NineDevice9 *device)
1144 {
1145     struct nine_context *context = &device->context;
1146     struct pipe_context *pipe = context->pipe;
1147 
1148     if (unlikely(!context->programmable_vs))
1149         pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, false, &context->pipe_data.cb_vs_ff);
1150     else {
1151         if (context->swvp) {
1152             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, false, &context->pipe_data.cb0_swvp);
1153             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 1, false, &context->pipe_data.cb1_swvp);
1154             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 2, false, &context->pipe_data.cb2_swvp);
1155             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 3, false, &context->pipe_data.cb3_swvp);
1156         } else {
1157             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, false, &context->pipe_data.cb_vs);
1158         }
1159     }
1160 }
1161 
1162 static inline void
commit_ps_constants(struct NineDevice9 * device)1163 commit_ps_constants(struct NineDevice9 *device)
1164 {
1165     struct nine_context *context = &device->context;
1166     struct pipe_context *pipe = context->pipe;
1167 
1168     if (unlikely(!context->ps))
1169         pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, false, &context->pipe_data.cb_ps_ff);
1170     else
1171         pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, false, &context->pipe_data.cb_ps);
1172 }
1173 
1174 static inline void
commit_vs(struct NineDevice9 * device)1175 commit_vs(struct NineDevice9 *device)
1176 {
1177     struct nine_context *context = &device->context;
1178     assert(context->cso_shader.vs);
1179 
1180     context->pipe->bind_vs_state(context->pipe, context->cso_shader.vs);
1181 }
1182 
1183 
1184 static inline void
commit_ps(struct NineDevice9 * device)1185 commit_ps(struct NineDevice9 *device)
1186 {
1187     struct nine_context *context = &device->context;
1188 
1189     context->pipe->bind_fs_state(context->pipe, context->cso_shader.ps);
1190 }
1191 /* State Update */
1192 
1193 #define NINE_STATE_SHADER_CHANGE_VS \
1194    (NINE_STATE_VS |         \
1195     NINE_STATE_TEXTURE |    \
1196     NINE_STATE_VS_PARAMS_MISC | \
1197     NINE_STATE_SWVP)
1198 
1199 #define NINE_STATE_SHADER_CHANGE_PS \
1200    (NINE_STATE_PS |         \
1201     NINE_STATE_TEXTURE |    \
1202     NINE_STATE_PS_PARAMS_MISC)
1203 
1204 #define NINE_STATE_FREQUENT \
1205    (NINE_STATE_RASTERIZER | \
1206     NINE_STATE_TEXTURE |    \
1207     NINE_STATE_SAMPLER |    \
1208     NINE_STATE_VS_CONST |   \
1209     NINE_STATE_PS_CONST |   \
1210     NINE_STATE_MULTISAMPLE)
1211 
1212 #define NINE_STATE_COMMON \
1213    (NINE_STATE_FB |       \
1214     NINE_STATE_BLEND |    \
1215     NINE_STATE_DSA |      \
1216     NINE_STATE_VIEWPORT | \
1217     NINE_STATE_VDECL |    \
1218     NINE_STATE_IDXBUF |   \
1219     NINE_STATE_STREAMFREQ)
1220 
1221 #define NINE_STATE_RARE      \
1222    (NINE_STATE_SCISSOR |     \
1223     NINE_STATE_BLEND_COLOR | \
1224     NINE_STATE_STENCIL_REF | \
1225     NINE_STATE_SAMPLE_MASK)
1226 
1227 static void
nine_update_state(struct NineDevice9 * device)1228 nine_update_state(struct NineDevice9 *device)
1229 {
1230     struct nine_context *context = &device->context;
1231     struct pipe_context *pipe = context->pipe;
1232     uint32_t group;
1233 
1234     DBG("changed state groups: %x\n", context->changed.group);
1235 
1236     /* NOTE: We may want to use the cso cache for everything, or let
1237      * NineDevice9.RestoreNonCSOState actually set the states, then we wouldn't
1238      * have to care about state being clobbered here and could merge this back
1239      * into update_textures. Except, we also need to re-validate textures that
1240      * may be dirty anyway, even if no texture bindings changed.
1241      */
1242 
1243     /* ff_update may change VS/PS dirty bits */
1244     if (unlikely(!context->programmable_vs || !context->ps))
1245         nine_ff_update(device);
1246     group = context->changed.group;
1247 
1248     if (group & (NINE_STATE_SHADER_CHANGE_VS | NINE_STATE_SHADER_CHANGE_PS)) {
1249         if (group & NINE_STATE_SHADER_CHANGE_VS)
1250             group |= prepare_vs(device, (group & NINE_STATE_VS) != 0); /* may set NINE_STATE_RASTERIZER and NINE_STATE_SAMPLER*/
1251         if (group & NINE_STATE_SHADER_CHANGE_PS)
1252             group |= prepare_ps(device, (group & NINE_STATE_PS) != 0);
1253     }
1254 
1255     if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) {
1256         if (group & NINE_STATE_FB)
1257             update_framebuffer(device, false);
1258         if (group & NINE_STATE_BLEND)
1259             prepare_blend(device);
1260         if (group & NINE_STATE_DSA)
1261             prepare_dsa(device);
1262         if (group & NINE_STATE_VIEWPORT)
1263             update_viewport(device);
1264         if (group & (NINE_STATE_VDECL | NINE_STATE_VS | NINE_STATE_STREAMFREQ))
1265             update_vertex_elements(device);
1266     }
1267 
1268     if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS | NINE_STATE_SWVP))) {
1269         if (group & NINE_STATE_MULTISAMPLE)
1270             group |= check_multisample(device);
1271         if (group & NINE_STATE_RASTERIZER)
1272             prepare_rasterizer(device);
1273         if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
1274             update_textures_and_samplers(device);
1275         if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS | NINE_STATE_SWVP)) && context->programmable_vs)
1276             prepare_vs_constants_userbuf(device);
1277         if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && context->ps)
1278             prepare_ps_constants_userbuf(device);
1279     }
1280 
1281     if (context->changed.vtxbuf)
1282         update_vertex_buffers(device);
1283 
1284     if (context->commit & NINE_STATE_COMMIT_BLEND)
1285         commit_blend(device);
1286     if (context->commit & NINE_STATE_COMMIT_DSA)
1287         commit_dsa(device);
1288     if (context->commit & NINE_STATE_COMMIT_RASTERIZER)
1289         commit_rasterizer(device);
1290     if (context->commit & NINE_STATE_COMMIT_CONST_VS)
1291         commit_vs_constants(device);
1292     if (context->commit & NINE_STATE_COMMIT_CONST_PS)
1293         commit_ps_constants(device);
1294     if (context->commit & NINE_STATE_COMMIT_VS)
1295         commit_vs(device);
1296     if (context->commit & NINE_STATE_COMMIT_PS)
1297         commit_ps(device);
1298 
1299     context->commit = 0;
1300 
1301     if (unlikely(context->changed.ucp)) {
1302         pipe->set_clip_state(pipe, &context->clip);
1303         context->changed.ucp = false;
1304     }
1305 
1306     if (unlikely(group & NINE_STATE_RARE)) {
1307         if (group & NINE_STATE_SCISSOR)
1308             commit_scissor(device);
1309         if (group & NINE_STATE_BLEND_COLOR) {
1310             struct pipe_blend_color color;
1311             d3dcolor_to_rgba(&color.color[0], context->rs[D3DRS_BLENDFACTOR]);
1312             pipe->set_blend_color(pipe, &color);
1313         }
1314         if (group & NINE_STATE_SAMPLE_MASK) {
1315             if (context->rt[0]->desc.MultiSampleType <= D3DMULTISAMPLE_NONMASKABLE) {
1316                 pipe->set_sample_mask(pipe, ~0);
1317             } else {
1318                 pipe->set_sample_mask(pipe, context->rs[D3DRS_MULTISAMPLEMASK]);
1319             }
1320         }
1321         if (group & NINE_STATE_STENCIL_REF) {
1322             struct pipe_stencil_ref ref;
1323             ref.ref_value[0] = context->rs[D3DRS_STENCILREF];
1324             ref.ref_value[1] = ref.ref_value[0];
1325             pipe->set_stencil_ref(pipe, ref);
1326         }
1327     }
1328 
1329     context->changed.group &=
1330         (NINE_STATE_FF | NINE_STATE_VS_CONST | NINE_STATE_PS_CONST);
1331 
1332     DBG("finished\n");
1333 }
1334 
1335 #define RESZ_CODE 0x7fa05000
1336 
1337 static void
NineDevice9_ResolveZ(struct NineDevice9 * device)1338 NineDevice9_ResolveZ( struct NineDevice9 *device )
1339 {
1340     struct nine_context *context = &device->context;
1341     const struct util_format_description *desc;
1342     struct NineSurface9 *source = context->ds;
1343     struct pipe_resource *src, *dst;
1344     struct pipe_blit_info blit;
1345 
1346     DBG("RESZ resolve\n");
1347 
1348     if (!source || !context->texture[0].enabled ||
1349         context->texture[0].type != D3DRTYPE_TEXTURE)
1350         return;
1351 
1352     src = source->base.resource;
1353     dst = context->texture[0].resource;
1354 
1355     if (!src || !dst)
1356         return;
1357 
1358     /* check dst is depth format. we know already for src */
1359     desc = util_format_description(dst->format);
1360     if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1361         return;
1362 
1363     memset(&blit, 0, sizeof(blit));
1364     blit.src.resource = src;
1365     blit.src.level = 0;
1366     blit.src.format = src->format;
1367     blit.src.box.z = 0;
1368     blit.src.box.depth = 1;
1369     blit.src.box.x = 0;
1370     blit.src.box.y = 0;
1371     blit.src.box.width = src->width0;
1372     blit.src.box.height = src->height0;
1373 
1374     blit.dst.resource = dst;
1375     blit.dst.level = 0;
1376     blit.dst.format = dst->format;
1377     blit.dst.box.z = 0;
1378     blit.dst.box.depth = 1;
1379     blit.dst.box.x = 0;
1380     blit.dst.box.y = 0;
1381     blit.dst.box.width = dst->width0;
1382     blit.dst.box.height = dst->height0;
1383 
1384     blit.mask = PIPE_MASK_ZS;
1385     blit.filter = PIPE_TEX_FILTER_NEAREST;
1386     blit.scissor_enable = false;
1387 
1388     context->pipe->blit(context->pipe, &blit);
1389 }
1390 
1391 #define ALPHA_TO_COVERAGE_ENABLE   MAKEFOURCC('A', '2', 'M', '1')
1392 #define ALPHA_TO_COVERAGE_DISABLE  MAKEFOURCC('A', '2', 'M', '0')
1393 #define FETCH4_ENABLE              MAKEFOURCC('G', 'E', 'T', '4')
1394 #define FETCH4_DISABLE             MAKEFOURCC('G', 'E', 'T', '1')
1395 
1396 /* Nine_context functions.
1397  * Serialized through CSMT macros.
1398  */
1399 
1400 static void
1401 nine_context_set_texture_apply(struct NineDevice9 *device,
1402                                DWORD stage,
1403                                DWORD fetch4_shadow_enabled,
1404                                DWORD lod,
1405                                D3DRESOURCETYPE type,
1406                                uint8_t pstype,
1407                                struct pipe_resource *res,
1408                                struct pipe_sampler_view *view0,
1409                                struct pipe_sampler_view *view1);
1410 
1411 static void
1412 nine_context_set_pixel_shader_constant_i_transformed(struct NineDevice9 *device,
1413                                                      UINT StartRegister,
1414                                                      const int *pConstantData,
1415                                                      unsigned pConstantData_size,
1416                                                      UINT Vector4iCount);
1417 
CSMT_ITEM_NO_WAIT(nine_context_set_render_state,ARG_VAL (D3DRENDERSTATETYPE,State),ARG_VAL (DWORD,Value))1418 CSMT_ITEM_NO_WAIT(nine_context_set_render_state,
1419                   ARG_VAL(D3DRENDERSTATETYPE, State),
1420                   ARG_VAL(DWORD, Value))
1421 {
1422     struct nine_context *context = &device->context;
1423 
1424     /* Amd hacks (equivalent to GL extensions) */
1425     if (unlikely(State == D3DRS_POINTSIZE)) {
1426         if (Value == RESZ_CODE) {
1427             NineDevice9_ResolveZ(device);
1428             return;
1429         }
1430 
1431         /* NINED3DRS_ALPHACOVERAGE:
1432          * bit 0: NVIDIA alpha to coverage
1433          * bit 1: NVIDIA ATOC state active
1434          * bit 2: AMD alpha to coverage
1435          * These need to be separate else the set of states to
1436          * disable NVIDIA alpha to coverage can disable the AMD one */
1437         if (Value == ALPHA_TO_COVERAGE_ENABLE ||
1438             Value == ALPHA_TO_COVERAGE_DISABLE) {
1439             context->rs[NINED3DRS_ALPHACOVERAGE] &= 3;
1440             context->rs[NINED3DRS_ALPHACOVERAGE] |= (Value == ALPHA_TO_COVERAGE_ENABLE) ? 4 : 0;
1441             context->changed.group |= NINE_STATE_BLEND;
1442             return;
1443         }
1444     }
1445 
1446     /* NV hack */
1447     if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) {
1448         if (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && context->rs[NINED3DRS_ALPHACOVERAGE] & 3)) {
1449             context->rs[NINED3DRS_ALPHACOVERAGE] &= 4;
1450             context->rs[NINED3DRS_ALPHACOVERAGE] |=
1451                 ((Value == D3DFMT_ATOC) ? 3 : 0) & (context->rs[D3DRS_ALPHATESTENABLE] ? 3 : 2);
1452             context->changed.group |= NINE_STATE_BLEND;
1453             return;
1454         }
1455     }
1456     if (unlikely(State == D3DRS_ALPHATESTENABLE && (context->rs[NINED3DRS_ALPHACOVERAGE] & 2))) {
1457         context->rs[NINED3DRS_ALPHACOVERAGE] &= 6;
1458         context->rs[NINED3DRS_ALPHACOVERAGE] |= (Value ? 1 : 0);
1459     }
1460 
1461     context->rs[State] = nine_fix_render_state_value(State, Value);
1462     context->changed.group |= nine_render_state_group[State];
1463 
1464     if (device->driver_caps.alpha_test_emulation) {
1465         if (State == D3DRS_ALPHATESTENABLE || State == D3DRS_ALPHAFUNC) {
1466             context->rs[NINED3DRS_EMULATED_ALPHATEST] = context->rs[D3DRS_ALPHATESTENABLE] ?
1467                 d3dcmpfunc_to_pipe_func(context->rs[D3DRS_ALPHAFUNC]) : 7;
1468             context->changed.group |= NINE_STATE_PS_PARAMS_MISC | NINE_STATE_PS_CONST | NINE_STATE_FF_SHADER;
1469         }
1470         if (State == D3DRS_ALPHAREF)
1471             context->changed.group |= NINE_STATE_PS_CONST | NINE_STATE_FF_PS_CONSTS;
1472     }
1473 
1474     if (device->driver_caps.always_output_pointsize) {
1475         if (State == D3DRS_POINTSIZE || State == D3DRS_POINTSIZE_MIN || State == D3DRS_POINTSIZE_MAX)
1476             context->changed.group |= NINE_STATE_VS_CONST;
1477     }
1478 
1479     if (device->driver_caps.emulate_ucp && State == D3DRS_CLIPPLANEENABLE)
1480         context->changed.group |= NINE_STATE_VS_PARAMS_MISC | NINE_STATE_VS_CONST;
1481 }
1482 
CSMT_ITEM_NO_WAIT(nine_context_set_texture_apply,ARG_VAL (DWORD,stage),ARG_VAL (DWORD,fetch4_shadow_enabled),ARG_VAL (DWORD,lod),ARG_VAL (D3DRESOURCETYPE,type),ARG_VAL (uint8_t,pstype),ARG_BIND_RES (struct pipe_resource,res),ARG_BIND_VIEW (struct pipe_sampler_view,view0),ARG_BIND_VIEW (struct pipe_sampler_view,view1))1483 CSMT_ITEM_NO_WAIT(nine_context_set_texture_apply,
1484                   ARG_VAL(DWORD, stage),
1485                   ARG_VAL(DWORD, fetch4_shadow_enabled),
1486                   ARG_VAL(DWORD, lod),
1487                   ARG_VAL(D3DRESOURCETYPE, type),
1488                   ARG_VAL(uint8_t, pstype),
1489                   ARG_BIND_RES(struct pipe_resource, res),
1490                   ARG_BIND_VIEW(struct pipe_sampler_view, view0),
1491                   ARG_BIND_VIEW(struct pipe_sampler_view, view1))
1492 {
1493     struct nine_context *context = &device->context;
1494     uint enabled = fetch4_shadow_enabled & 1;
1495     uint shadow = (fetch4_shadow_enabled >> 1) & 1;
1496     uint fetch4_compatible = (fetch4_shadow_enabled >> 2) & 1;
1497 
1498     context->texture[stage].enabled = enabled;
1499     if (enabled) {
1500        if (stage < NINE_MAX_SAMPLERS_PS)
1501           context->enabled_samplers_mask_ps |= BITFIELD_BIT(stage - NINE_SAMPLER_PS(0));
1502        else if (stage >= NINE_SAMPLER_VS(0))
1503           context->enabled_samplers_mask_vs |= BITFIELD_BIT(stage - NINE_SAMPLER_VS(0));
1504     } else {
1505        if (stage < NINE_MAX_SAMPLERS_PS)
1506           context->enabled_samplers_mask_ps &= ~BITFIELD_BIT(stage - NINE_SAMPLER_PS(0));
1507        else if (stage >= NINE_SAMPLER_VS(0))
1508           context->enabled_samplers_mask_vs &= ~BITFIELD_BIT(stage - NINE_SAMPLER_VS(0));
1509     }
1510     context->samplers_shadow &= ~(1 << stage);
1511     context->samplers_shadow |= shadow << stage;
1512     context->samplers_fetch4 &= ~(1 << stage);
1513     context->samplers_fetch4 |= fetch4_compatible << stage;
1514     context->texture[stage].shadow = shadow;
1515     context->texture[stage].lod = lod;
1516     context->texture[stage].type = type;
1517     context->texture[stage].pstype = pstype;
1518     pipe_resource_reference(&context->texture[stage].resource, res);
1519     pipe_sampler_view_reference(&context->texture[stage].view[0], view0);
1520     pipe_sampler_view_reference(&context->texture[stage].view[1], view1);
1521 
1522     context->changed.group |= NINE_STATE_TEXTURE;
1523 }
1524 
1525 void
nine_context_set_texture(struct NineDevice9 * device,DWORD Stage,struct NineBaseTexture9 * tex)1526 nine_context_set_texture(struct NineDevice9 *device,
1527                          DWORD Stage,
1528                          struct NineBaseTexture9 *tex)
1529 {
1530     DWORD fetch4_shadow_enabled = 0;
1531     DWORD lod = 0;
1532     D3DRESOURCETYPE type = D3DRTYPE_TEXTURE;
1533     uint8_t pstype = 0;
1534     struct pipe_resource *res = NULL;
1535     struct pipe_sampler_view *view0 = NULL, *view1 = NULL;
1536 
1537     /* For managed pool, the data can be initially incomplete.
1538      * In that case, the texture is rebound later
1539      * (in NineBaseTexture9_Validate/NineBaseTexture9_UploadSelf). */
1540     if (tex && tex->base.resource) {
1541         fetch4_shadow_enabled = 1;
1542         fetch4_shadow_enabled |= tex->shadow << 1;
1543         fetch4_shadow_enabled |= tex->fetch4_compatible << 2;
1544         lod = tex->managed.lod;
1545         type = tex->base.type;
1546         pstype = tex->pstype;
1547         res = tex->base.resource;
1548         view0 = NineBaseTexture9_GetSamplerView(tex, 0);
1549         view1 = NineBaseTexture9_GetSamplerView(tex, 1);
1550     }
1551 
1552     nine_context_set_texture_apply(device, Stage,
1553                                    fetch4_shadow_enabled,
1554                                    lod, type, pstype,
1555                                    res, view0, view1);
1556 }
1557 
CSMT_ITEM_NO_WAIT(nine_context_set_sampler_state,ARG_VAL (DWORD,Sampler),ARG_VAL (D3DSAMPLERSTATETYPE,Type),ARG_VAL (DWORD,Value))1558 CSMT_ITEM_NO_WAIT(nine_context_set_sampler_state,
1559                   ARG_VAL(DWORD, Sampler),
1560                   ARG_VAL(D3DSAMPLERSTATETYPE, Type),
1561                   ARG_VAL(DWORD, Value))
1562 {
1563     struct nine_context *context = &device->context;
1564 
1565     if (unlikely(Type == D3DSAMP_MIPMAPLODBIAS)) {
1566         if (Value == FETCH4_ENABLE ||
1567             Value == FETCH4_DISABLE) {
1568             context->rs[NINED3DRS_FETCH4] &= ~(1 << Sampler);
1569             context->rs[NINED3DRS_FETCH4] |= (Value == FETCH4_ENABLE) << Sampler;
1570             context->changed.group |= NINE_STATE_PS_PARAMS_MISC;
1571             if (Value == FETCH4_ENABLE)
1572                 WARN_ONCE("FETCH4 support is incomplete. Please report if buggy shadows.");
1573             return;
1574         }
1575     }
1576 
1577     if (unlikely(!nine_check_sampler_state_value(Type, Value)))
1578         return;
1579 
1580     context->samp[Sampler][Type] = Value;
1581     context->changed.group |= NINE_STATE_SAMPLER;
1582     context->changed.sampler[Sampler] |= 1 << Type;
1583 }
1584 
CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_apply,ARG_VAL (UINT,StreamNumber),ARG_BIND_RES (struct pipe_resource,res),ARG_VAL (UINT,OffsetInBytes),ARG_VAL (UINT,Stride))1585 CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_apply,
1586                   ARG_VAL(UINT, StreamNumber),
1587                   ARG_BIND_RES(struct pipe_resource, res),
1588                   ARG_VAL(UINT, OffsetInBytes),
1589                   ARG_VAL(UINT, Stride))
1590 {
1591     struct nine_context *context = &device->context;
1592     const unsigned i = StreamNumber;
1593 
1594     /* For normal draws, these tests are useless,
1595      * but not for *Up draws */
1596     if (context->vtxbuf[i].buffer.resource == res &&
1597         context->vtxbuf[i].buffer_offset == OffsetInBytes &&
1598         context->vtxstride[i] == Stride)
1599         return;
1600 
1601     if (context->vtxstride[i] != Stride) {
1602         context->vtxstride[i] = Stride;
1603         context->changed.group |= NINE_STATE_VDECL;
1604     }
1605     context->vtxbuf[i].buffer_offset = OffsetInBytes;
1606     pipe_resource_reference(&context->vtxbuf[i].buffer.resource, res);
1607 
1608     context->changed.vtxbuf |= 1 << StreamNumber;
1609     if (res)
1610         context->vtxbuf_mask |= 1 << StreamNumber;
1611     else
1612         context->vtxbuf_mask &= ~(1 << StreamNumber);
1613 }
1614 
1615 void
nine_context_set_stream_source(struct NineDevice9 * device,UINT StreamNumber,struct NineVertexBuffer9 * pVBuf9,UINT OffsetInBytes,UINT Stride)1616 nine_context_set_stream_source(struct NineDevice9 *device,
1617                                UINT StreamNumber,
1618                                struct NineVertexBuffer9 *pVBuf9,
1619                                UINT OffsetInBytes,
1620                                UINT Stride)
1621 {
1622     struct pipe_resource *res = NULL;
1623     unsigned offset = 0;
1624 
1625     if (pVBuf9)
1626         res = NineVertexBuffer9_GetResource(pVBuf9, &offset);
1627     /* in the future when there is internal offset, add it
1628      * to OffsetInBytes */
1629 
1630     nine_context_set_stream_source_apply(device, StreamNumber,
1631                                          res, offset + OffsetInBytes,
1632                                          Stride);
1633 }
1634 
CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_freq,ARG_VAL (UINT,StreamNumber),ARG_VAL (UINT,Setting))1635 CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_freq,
1636                   ARG_VAL(UINT, StreamNumber),
1637                   ARG_VAL(UINT, Setting))
1638 {
1639     struct nine_context *context = &device->context;
1640 
1641     context->stream_freq[StreamNumber] = Setting;
1642 
1643     if (Setting & D3DSTREAMSOURCE_INSTANCEDATA)
1644         context->stream_instancedata_mask |= 1 << StreamNumber;
1645     else
1646         context->stream_instancedata_mask &= ~(1 << StreamNumber);
1647 
1648     if (StreamNumber != 0)
1649         context->changed.group |= NINE_STATE_STREAMFREQ;
1650 }
1651 
CSMT_ITEM_NO_WAIT(nine_context_set_indices_apply,ARG_BIND_RES (struct pipe_resource,res),ARG_VAL (UINT,IndexSize),ARG_VAL (UINT,OffsetInBytes))1652 CSMT_ITEM_NO_WAIT(nine_context_set_indices_apply,
1653                   ARG_BIND_RES(struct pipe_resource, res),
1654                   ARG_VAL(UINT, IndexSize),
1655                   ARG_VAL(UINT, OffsetInBytes))
1656 {
1657     struct nine_context *context = &device->context;
1658 
1659     context->index_size = IndexSize;
1660     context->index_offset = OffsetInBytes;
1661     pipe_resource_reference(&context->idxbuf, res);
1662 
1663     context->changed.group |= NINE_STATE_IDXBUF;
1664 }
1665 
1666 void
nine_context_set_indices(struct NineDevice9 * device,struct NineIndexBuffer9 * idxbuf)1667 nine_context_set_indices(struct NineDevice9 *device,
1668                          struct NineIndexBuffer9 *idxbuf)
1669 {
1670     struct pipe_resource *res = NULL;
1671     UINT IndexSize = 0;
1672     unsigned OffsetInBytes = 0;
1673 
1674     if (idxbuf) {
1675         res = NineIndexBuffer9_GetBuffer(idxbuf, &OffsetInBytes);
1676         IndexSize = idxbuf->index_size;
1677     }
1678 
1679     nine_context_set_indices_apply(device, res, IndexSize, OffsetInBytes);
1680 }
1681 
CSMT_ITEM_NO_WAIT(nine_context_set_vertex_declaration,ARG_BIND_REF (struct NineVertexDeclaration9,vdecl))1682 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_declaration,
1683                   ARG_BIND_REF(struct NineVertexDeclaration9, vdecl))
1684 {
1685     struct nine_context *context = &device->context;
1686     BOOL was_programmable_vs = context->programmable_vs;
1687 
1688     nine_bind(&context->vdecl, vdecl);
1689 
1690     context->programmable_vs = context->vs && !(context->vdecl && context->vdecl->position_t);
1691     if (was_programmable_vs != context->programmable_vs) {
1692         context->commit |= NINE_STATE_COMMIT_CONST_VS;
1693         context->changed.group |= NINE_STATE_VS;
1694     }
1695 
1696     context->changed.group |= NINE_STATE_VDECL;
1697 }
1698 
CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader,ARG_BIND_REF (struct NineVertexShader9,pShader))1699 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader,
1700                   ARG_BIND_REF(struct NineVertexShader9, pShader))
1701 {
1702     struct nine_context *context = &device->context;
1703     BOOL was_programmable_vs = context->programmable_vs;
1704 
1705     nine_bind(&context->vs, pShader);
1706 
1707     context->programmable_vs = context->vs && !(context->vdecl && context->vdecl->position_t);
1708 
1709     /* ff -> non-ff: commit back non-ff constants */
1710     if (!was_programmable_vs && context->programmable_vs)
1711         context->commit |= NINE_STATE_COMMIT_CONST_VS;
1712 
1713     context->changed.group |= NINE_STATE_VS;
1714 }
1715 
CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_f,ARG_VAL (UINT,StartRegister),ARG_MEM (float,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,Vector4fCount))1716 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_f,
1717                   ARG_VAL(UINT, StartRegister),
1718                   ARG_MEM(float, pConstantData),
1719                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1720                   ARG_VAL(UINT, Vector4fCount))
1721 {
1722     struct nine_context *context = &device->context;
1723     float *vs_const_f = device->may_swvp ? context->vs_const_f_swvp : context->vs_const_f;
1724 
1725     memcpy(&vs_const_f[StartRegister * 4],
1726            pConstantData,
1727            pConstantData_size);
1728 
1729     if (device->may_swvp) {
1730         Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister;
1731         if (StartRegister < NINE_MAX_CONST_F)
1732             memcpy(&context->vs_const_f[StartRegister * 4],
1733                    pConstantData,
1734                    Vector4fCount * 4 * sizeof(context->vs_const_f[0]));
1735     }
1736 
1737     context->changed.vs_const_f = true;
1738     context->changed.group |= NINE_STATE_VS_CONST;
1739 }
1740 
CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_i,ARG_VAL (UINT,StartRegister),ARG_MEM (int,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,Vector4iCount))1741 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_i,
1742                   ARG_VAL(UINT, StartRegister),
1743                   ARG_MEM(int, pConstantData),
1744                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1745                   ARG_VAL(UINT, Vector4iCount))
1746 {
1747     struct nine_context *context = &device->context;
1748     int i;
1749 
1750     if (device->driver_caps.vs_integer) {
1751         memcpy(&context->vs_const_i[4 * StartRegister],
1752                pConstantData,
1753                pConstantData_size);
1754     } else {
1755         for (i = 0; i < Vector4iCount; i++) {
1756             context->vs_const_i[4 * (StartRegister + i)] = fui((float)(pConstantData[4 * i]));
1757             context->vs_const_i[4 * (StartRegister + i) + 1] = fui((float)(pConstantData[4 * i + 1]));
1758             context->vs_const_i[4 * (StartRegister + i) + 2] = fui((float)(pConstantData[4 * i + 2]));
1759             context->vs_const_i[4 * (StartRegister + i) + 3] = fui((float)(pConstantData[4 * i + 3]));
1760         }
1761     }
1762 
1763     context->changed.vs_const_i = true;
1764     context->changed.group |= NINE_STATE_VS_CONST | NINE_STATE_VS_PARAMS_MISC;
1765 }
1766 
CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_b,ARG_VAL (UINT,StartRegister),ARG_MEM (BOOL,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,BoolCount))1767 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_b,
1768                   ARG_VAL(UINT, StartRegister),
1769                   ARG_MEM(BOOL, pConstantData),
1770                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1771                   ARG_VAL(UINT, BoolCount))
1772 {
1773     struct nine_context *context = &device->context;
1774     int i;
1775     uint32_t bool_true = device->driver_caps.vs_integer ? 0xFFFFFFFF : fui(1.0f);
1776 
1777     (void) pConstantData_size;
1778 
1779     for (i = 0; i < BoolCount; i++)
1780         context->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
1781 
1782     context->changed.vs_const_b = true;
1783     context->changed.group |= NINE_STATE_VS_CONST | NINE_STATE_VS_PARAMS_MISC;
1784 }
1785 
CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader,ARG_BIND_REF (struct NinePixelShader9,ps))1786 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader,
1787                   ARG_BIND_REF(struct NinePixelShader9, ps))
1788 {
1789     struct nine_context *context = &device->context;
1790     unsigned old_mask = context->ps ? context->ps->rt_mask : 1;
1791     unsigned mask;
1792 
1793     /* ff -> non-ff: commit back non-ff constants */
1794     if (!context->ps && ps)
1795         context->commit |= NINE_STATE_COMMIT_CONST_PS;
1796 
1797     nine_bind(&context->ps, ps);
1798 
1799     context->changed.group |= NINE_STATE_PS;
1800 
1801     mask = context->ps ? context->ps->rt_mask : 1;
1802     /* We need to update cbufs if the pixel shader would
1803      * write to different render targets */
1804     if (mask != old_mask)
1805         context->changed.group |= NINE_STATE_FB;
1806 }
1807 
CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_f,ARG_VAL (UINT,StartRegister),ARG_MEM (float,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,Vector4fCount))1808 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_f,
1809                   ARG_VAL(UINT, StartRegister),
1810                   ARG_MEM(float, pConstantData),
1811                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1812                   ARG_VAL(UINT, Vector4fCount))
1813 {
1814     struct nine_context *context = &device->context;
1815 
1816     memcpy(&context->ps_const_f[StartRegister * 4],
1817            pConstantData,
1818            pConstantData_size);
1819 
1820     context->changed.ps_const_f = true;
1821     context->changed.group |= NINE_STATE_PS_CONST;
1822 }
1823 
1824 /* For stateblocks */
CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i_transformed,ARG_VAL (UINT,StartRegister),ARG_MEM (int,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,Vector4iCount))1825 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i_transformed,
1826                   ARG_VAL(UINT, StartRegister),
1827                   ARG_MEM(int, pConstantData),
1828                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1829                   ARG_VAL(UINT, Vector4iCount))
1830 {
1831     struct nine_context *context = &device->context;
1832 
1833     memcpy(&context->ps_const_i[StartRegister][0],
1834            pConstantData,
1835            Vector4iCount * sizeof(context->ps_const_i[0]));
1836 
1837     context->changed.ps_const_i = true;
1838     context->changed.group |= NINE_STATE_PS_CONST | NINE_STATE_PS_PARAMS_MISC;
1839 }
1840 
CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i,ARG_VAL (UINT,StartRegister),ARG_MEM (int,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,Vector4iCount))1841 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i,
1842                   ARG_VAL(UINT, StartRegister),
1843                   ARG_MEM(int, pConstantData),
1844                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1845                   ARG_VAL(UINT, Vector4iCount))
1846 {
1847     struct nine_context *context = &device->context;
1848     int i;
1849 
1850     if (device->driver_caps.ps_integer) {
1851         memcpy(&context->ps_const_i[StartRegister][0],
1852                pConstantData,
1853                pConstantData_size);
1854     } else {
1855         for (i = 0; i < Vector4iCount; i++) {
1856             context->ps_const_i[StartRegister+i][0] = fui((float)(pConstantData[4*i]));
1857             context->ps_const_i[StartRegister+i][1] = fui((float)(pConstantData[4*i+1]));
1858             context->ps_const_i[StartRegister+i][2] = fui((float)(pConstantData[4*i+2]));
1859             context->ps_const_i[StartRegister+i][3] = fui((float)(pConstantData[4*i+3]));
1860         }
1861     }
1862     context->changed.ps_const_i = true;
1863     context->changed.group |= NINE_STATE_PS_CONST | NINE_STATE_PS_PARAMS_MISC;
1864 }
1865 
CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_b,ARG_VAL (UINT,StartRegister),ARG_MEM (BOOL,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,BoolCount))1866 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_b,
1867                   ARG_VAL(UINT, StartRegister),
1868                   ARG_MEM(BOOL, pConstantData),
1869                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1870                   ARG_VAL(UINT, BoolCount))
1871 {
1872     struct nine_context *context = &device->context;
1873     int i;
1874     uint32_t bool_true = device->driver_caps.ps_integer ? 0xFFFFFFFF : fui(1.0f);
1875 
1876     (void) pConstantData_size;
1877 
1878     for (i = 0; i < BoolCount; i++)
1879         context->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
1880 
1881     context->changed.ps_const_b = true;
1882     context->changed.group |= NINE_STATE_PS_CONST | NINE_STATE_PS_PARAMS_MISC;
1883 }
1884 
1885 /* XXX: use resource, as resource might change */
CSMT_ITEM_NO_WAIT(nine_context_set_render_target,ARG_VAL (DWORD,RenderTargetIndex),ARG_BIND_REF (struct NineSurface9,rt))1886 CSMT_ITEM_NO_WAIT(nine_context_set_render_target,
1887                   ARG_VAL(DWORD, RenderTargetIndex),
1888                   ARG_BIND_REF(struct NineSurface9, rt))
1889 {
1890     struct nine_context *context = &device->context;
1891     const unsigned i = RenderTargetIndex;
1892 
1893     if (i == 0) {
1894         context->changed.group |= NINE_STATE_MULTISAMPLE;
1895 
1896         if (context->rt[0] &&
1897             (context->rt[0]->desc.MultiSampleType <= D3DMULTISAMPLE_NONMASKABLE) !=
1898             (rt->desc.MultiSampleType <= D3DMULTISAMPLE_NONMASKABLE))
1899             context->changed.group |= NINE_STATE_SAMPLE_MASK;
1900     }
1901 
1902     if (context->rt[i] != rt) {
1903        nine_bind(&context->rt[i], rt);
1904        context->changed.group |= NINE_STATE_FB;
1905     }
1906 }
1907 
1908 /* XXX: use resource instead of ds, as resource might change */
CSMT_ITEM_NO_WAIT(nine_context_set_depth_stencil,ARG_BIND_REF (struct NineSurface9,ds))1909 CSMT_ITEM_NO_WAIT(nine_context_set_depth_stencil,
1910                   ARG_BIND_REF(struct NineSurface9, ds))
1911 {
1912     struct nine_context *context = &device->context;
1913 
1914     nine_bind(&context->ds, ds);
1915     context->changed.group |= NINE_STATE_FB;
1916 }
1917 
CSMT_ITEM_NO_WAIT(nine_context_set_viewport,ARG_COPY_REF (D3DVIEWPORT9,viewport))1918 CSMT_ITEM_NO_WAIT(nine_context_set_viewport,
1919                   ARG_COPY_REF(D3DVIEWPORT9, viewport))
1920 {
1921     struct nine_context *context = &device->context;
1922 
1923     if (!memcmp(viewport, &context->viewport, sizeof(context->viewport)))
1924         return;
1925 
1926     context->viewport = *viewport;
1927     context->changed.group |= NINE_STATE_VIEWPORT;
1928 }
1929 
CSMT_ITEM_NO_WAIT(nine_context_set_scissor,ARG_COPY_REF (struct pipe_scissor_state,scissor))1930 CSMT_ITEM_NO_WAIT(nine_context_set_scissor,
1931                   ARG_COPY_REF(struct pipe_scissor_state, scissor))
1932 {
1933     struct nine_context *context = &device->context;
1934 
1935     if (!memcmp(scissor, &context->scissor, sizeof(context->scissor)))
1936         return;
1937 
1938     context->scissor = *scissor;
1939     context->changed.group |= NINE_STATE_SCISSOR;
1940 }
1941 
CSMT_ITEM_NO_WAIT(nine_context_set_transform,ARG_VAL (D3DTRANSFORMSTATETYPE,State),ARG_COPY_REF (D3DMATRIX,pMatrix))1942 CSMT_ITEM_NO_WAIT(nine_context_set_transform,
1943                   ARG_VAL(D3DTRANSFORMSTATETYPE, State),
1944                   ARG_COPY_REF(D3DMATRIX, pMatrix))
1945 {
1946     struct nine_context *context = &device->context;
1947     D3DMATRIX *M = nine_state_access_transform(&context->ff, State, true);
1948 
1949     *M = *pMatrix;
1950     if (State == D3DTS_PROJECTION) {
1951         BOOL prev_zfog = context->zfog;
1952         /* Pixel fog (with WFOG advertised): source is either Z or W.
1953          * W is the source if the projection matrix is not orthogonal.
1954          * Tests on Win 10 seem to indicate _34
1955          * and _33 are checked against 0, 1. */
1956         context->zfog = (M->_34 == 0.0f &&
1957                          M->_44 == 1.0f);
1958         if (context->zfog != prev_zfog)
1959             context->changed.group |= NINE_STATE_PS_PARAMS_MISC;
1960     }
1961     context->ff.changed.transform[State / 32] |= 1 << (State % 32);
1962     context->changed.group |= NINE_STATE_FF;
1963 }
1964 
CSMT_ITEM_NO_WAIT(nine_context_set_material,ARG_COPY_REF (D3DMATERIAL9,pMaterial))1965 CSMT_ITEM_NO_WAIT(nine_context_set_material,
1966                   ARG_COPY_REF(D3DMATERIAL9, pMaterial))
1967 {
1968     struct nine_context *context = &device->context;
1969 
1970     context->ff.material = *pMaterial;
1971     context->changed.group |= NINE_STATE_FF_MATERIAL;
1972 }
1973 
CSMT_ITEM_NO_WAIT(nine_context_set_light,ARG_VAL (DWORD,Index),ARG_COPY_REF (D3DLIGHT9,pLight))1974 CSMT_ITEM_NO_WAIT(nine_context_set_light,
1975                   ARG_VAL(DWORD, Index),
1976                   ARG_COPY_REF(D3DLIGHT9, pLight))
1977 {
1978     struct nine_context *context = &device->context;
1979 
1980     (void)nine_state_set_light(&context->ff, Index, pLight);
1981     context->changed.group |= NINE_STATE_FF_LIGHTING;
1982 }
1983 
1984 
1985 /* For stateblocks */
1986 static void
nine_context_light_enable_stateblock(struct NineDevice9 * device,const uint16_t active_light[NINE_MAX_LIGHTS_ACTIVE],unsigned int num_lights_active)1987 nine_context_light_enable_stateblock(struct NineDevice9 *device,
1988                                      const uint16_t active_light[NINE_MAX_LIGHTS_ACTIVE], /* TODO: use pointer that convey size for csmt */
1989                                      unsigned int num_lights_active)
1990 {
1991     struct nine_context *context = &device->context;
1992 
1993     /* TODO: Use CSMT_* to avoid calling nine_csmt_process */
1994     nine_csmt_process(device);
1995     memcpy(context->ff.active_light, active_light, NINE_MAX_LIGHTS_ACTIVE * sizeof(context->ff.active_light[0]));
1996     context->ff.num_lights_active = num_lights_active;
1997     context->changed.group |= NINE_STATE_FF_LIGHTING;
1998 }
1999 
CSMT_ITEM_NO_WAIT(nine_context_light_enable,ARG_VAL (DWORD,Index),ARG_VAL (BOOL,Enable))2000 CSMT_ITEM_NO_WAIT(nine_context_light_enable,
2001                   ARG_VAL(DWORD, Index),
2002                   ARG_VAL(BOOL, Enable))
2003 {
2004     struct nine_context *context = &device->context;
2005 
2006     nine_state_light_enable(&context->ff, Index, Enable);
2007     context->changed.group |= NINE_STATE_FF_LIGHTING;
2008 }
2009 
CSMT_ITEM_NO_WAIT(nine_context_set_texture_stage_state,ARG_VAL (DWORD,Stage),ARG_VAL (D3DTEXTURESTAGESTATETYPE,Type),ARG_VAL (DWORD,Value))2010 CSMT_ITEM_NO_WAIT(nine_context_set_texture_stage_state,
2011                   ARG_VAL(DWORD, Stage),
2012                   ARG_VAL(D3DTEXTURESTAGESTATETYPE, Type),
2013                   ARG_VAL(DWORD, Value))
2014 {
2015     struct nine_context *context = &device->context;
2016     int bumpmap_index = -1;
2017 
2018     context->ff.tex_stage[Stage][Type] = Value;
2019     switch (Type) {
2020     case D3DTSS_BUMPENVMAT00:
2021         bumpmap_index = 4 * Stage;
2022         break;
2023     case D3DTSS_BUMPENVMAT01:
2024         bumpmap_index = 4 * Stage + 1;
2025         break;
2026     case D3DTSS_BUMPENVMAT10:
2027         bumpmap_index = 4 * Stage + 2;
2028         break;
2029     case D3DTSS_BUMPENVMAT11:
2030         bumpmap_index = 4 * Stage + 3;
2031         break;
2032     case D3DTSS_BUMPENVLSCALE:
2033         bumpmap_index = 4 * 8 + 2 * Stage;
2034         break;
2035     case D3DTSS_BUMPENVLOFFSET:
2036         bumpmap_index = 4 * 8 + 2 * Stage + 1;
2037         break;
2038     case D3DTSS_TEXTURETRANSFORMFLAGS:
2039         context->changed.group |= NINE_STATE_PS_PARAMS_MISC;
2040         break;
2041     default:
2042         break;
2043     }
2044 
2045     if (bumpmap_index >= 0) {
2046         context->bumpmap_vars[bumpmap_index] = Value;
2047         context->changed.group |= NINE_STATE_PS_CONST;
2048     }
2049 
2050     context->changed.group |= NINE_STATE_FF_PS_CONSTS;
2051     context->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
2052 }
2053 
CSMT_ITEM_NO_WAIT(nine_context_set_clip_plane,ARG_VAL (DWORD,Index),ARG_COPY_REF (struct nine_clipplane,pPlane))2054 CSMT_ITEM_NO_WAIT(nine_context_set_clip_plane,
2055                   ARG_VAL(DWORD, Index),
2056                   ARG_COPY_REF(struct nine_clipplane, pPlane))
2057 {
2058     struct nine_context *context = &device->context;
2059 
2060     memcpy(&context->clip.ucp[Index][0], pPlane, sizeof(context->clip.ucp[0]));
2061     if (!device->driver_caps.emulate_ucp)
2062         context->changed.ucp = true;
2063     else
2064         context->changed.group |= NINE_STATE_FF_VS_OTHER | NINE_STATE_VS_CONST;
2065 }
2066 
CSMT_ITEM_NO_WAIT(nine_context_set_swvp,ARG_VAL (bool,swvp))2067 CSMT_ITEM_NO_WAIT(nine_context_set_swvp,
2068                   ARG_VAL(bool, swvp))
2069 {
2070     struct nine_context *context = &device->context;
2071 
2072     context->swvp = swvp;
2073     context->changed.group |= NINE_STATE_SWVP;
2074 }
2075 
2076 /* Do not write to nine_context directly. Slower,
2077  * but works with csmt. TODO: write a special csmt version that
2078  * would record the list of commands as much as possible,
2079  * and use the version above else.
2080  */
2081 void
nine_context_apply_stateblock(struct NineDevice9 * device,const struct nine_state * src)2082 nine_context_apply_stateblock(struct NineDevice9 *device,
2083                               const struct nine_state *src)
2084 {
2085     int i;
2086 
2087     /* No need to apply src->changed.group, since all calls do
2088     * set context->changed.group */
2089 
2090     for (i = 0; i < ARRAY_SIZE(src->changed.rs); ++i) {
2091         uint32_t m = src->changed.rs[i];
2092         while (m) {
2093             const int r = ffs(m) - 1;
2094             m &= ~(1 << r);
2095             nine_context_set_render_state(device, i * 32 + r, src->rs_advertised[i * 32 + r]);
2096         }
2097     }
2098 
2099     /* Textures */
2100     if (src->changed.texture) {
2101         uint32_t m = src->changed.texture;
2102         unsigned s;
2103 
2104         for (s = 0; m; ++s, m >>= 1) {
2105             struct NineBaseTexture9 *tex = src->texture[s];
2106             if (!(m & 1))
2107                 continue;
2108             nine_context_set_texture(device, s, tex);
2109         }
2110     }
2111 
2112     /* Sampler state */
2113     if (src->changed.group & NINE_STATE_SAMPLER) {
2114         unsigned s;
2115 
2116         for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
2117             uint32_t m = src->changed.sampler[s];
2118             while (m) {
2119                 const int i = ffs(m) - 1;
2120                 m &= ~(1 << i);
2121                 nine_context_set_sampler_state(device, s, i, src->samp_advertised[s][i]);
2122             }
2123         }
2124     }
2125 
2126     /* Vertex buffers */
2127     if (src->changed.vtxbuf | src->changed.stream_freq) {
2128         uint32_t m = src->changed.vtxbuf | src->changed.stream_freq;
2129         for (i = 0; m; ++i, m >>= 1) {
2130             if (src->changed.vtxbuf & (1 << i))
2131                 nine_context_set_stream_source(device, i, src->stream[i], src->vtxbuf[i].buffer_offset, src->vtxstride[i]);
2132             if (src->changed.stream_freq & (1 << i))
2133                 nine_context_set_stream_source_freq(device, i, src->stream_freq[i]);
2134         }
2135     }
2136 
2137     /* Index buffer */
2138     if (src->changed.group & NINE_STATE_IDXBUF)
2139         nine_context_set_indices(device, src->idxbuf);
2140 
2141     /* Vertex declaration */
2142     if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl)
2143         nine_context_set_vertex_declaration(device, src->vdecl);
2144 
2145     /* Vertex shader */
2146     if (src->changed.group & NINE_STATE_VS)
2147         nine_context_set_vertex_shader(device, src->vs);
2148 
2149     /* Pixel shader */
2150     if (src->changed.group & NINE_STATE_PS)
2151         nine_context_set_pixel_shader(device, src->ps);
2152 
2153     /* Vertex constants */
2154     if (src->changed.group & NINE_STATE_VS_CONST) {
2155         struct nine_range *r;
2156         for (r = src->changed.vs_const_f; r; r = r->next)
2157             nine_context_set_vertex_shader_constant_f(device, r->bgn,
2158                                                       &src->vs_const_f[r->bgn * 4],
2159                                                       sizeof(float[4]) * (r->end - r->bgn),
2160                                                       r->end - r->bgn);
2161         for (r = src->changed.vs_const_i; r; r = r->next)
2162             nine_context_set_vertex_shader_constant_i(device, r->bgn,
2163                                                       &src->vs_const_i[r->bgn * 4],
2164                                                       sizeof(int[4]) * (r->end - r->bgn),
2165                                                       r->end - r->bgn);
2166         for (r = src->changed.vs_const_b; r; r = r->next)
2167             nine_context_set_vertex_shader_constant_b(device, r->bgn,
2168                                                       &src->vs_const_b[r->bgn * 4],
2169                                                       sizeof(BOOL) * (r->end - r->bgn),
2170                                                       r->end - r->bgn);
2171     }
2172 
2173     /* Pixel constants */
2174     if (src->changed.group & NINE_STATE_PS_CONST) {
2175         struct nine_range *r;
2176         for (r = src->changed.ps_const_f; r; r = r->next)
2177             nine_context_set_pixel_shader_constant_f(device, r->bgn,
2178                                                      &src->ps_const_f[r->bgn * 4],
2179                                                      sizeof(float[4]) * (r->end - r->bgn),
2180                                                      r->end - r->bgn);
2181         if (src->changed.ps_const_i) {
2182             uint16_t m = src->changed.ps_const_i;
2183             for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
2184                 if (m & 1)
2185                     nine_context_set_pixel_shader_constant_i_transformed(device, i,
2186                                                                          src->ps_const_i[i], sizeof(int[4]), 1);
2187         }
2188         if (src->changed.ps_const_b) {
2189             uint16_t m = src->changed.ps_const_b;
2190             for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
2191                 if (m & 1)
2192                     nine_context_set_pixel_shader_constant_b(device, i,
2193                                                              &src->ps_const_b[i], sizeof(BOOL), 1);
2194         }
2195     }
2196 
2197     /* Viewport */
2198     if (src->changed.group & NINE_STATE_VIEWPORT)
2199         nine_context_set_viewport(device, &src->viewport);
2200 
2201     /* Scissor */
2202     if (src->changed.group & NINE_STATE_SCISSOR)
2203         nine_context_set_scissor(device, &src->scissor);
2204 
2205     /* User Clip Planes */
2206     if (src->changed.ucp)
2207         for (i = 0; i < PIPE_MAX_CLIP_PLANES; ++i)
2208             if (src->changed.ucp & (1 << i))
2209                 nine_context_set_clip_plane(device, i, (struct nine_clipplane*)&src->clip.ucp[i][0]);
2210 
2211     if (!(src->changed.group & NINE_STATE_FF))
2212         return;
2213 
2214     /* Fixed function state. */
2215 
2216     if (src->changed.group & NINE_STATE_FF_MATERIAL)
2217         nine_context_set_material(device, &src->ff.material);
2218 
2219     if (src->changed.group & NINE_STATE_FF_PS_CONSTS) {
2220         unsigned s;
2221         for (s = 0; s < NINE_MAX_TEXTURE_STAGES; ++s) {
2222             for (i = 0; i < NINED3DTSS_COUNT; ++i)
2223                 if (src->ff.changed.tex_stage[s][i / 32] & (1 << (i % 32)))
2224                    nine_context_set_texture_stage_state(device, s, i, src->ff.tex_stage[s][i]);
2225         }
2226     }
2227     if (src->changed.group & NINE_STATE_FF_LIGHTING) {
2228         for (i = 0; i < src->ff.num_lights; ++i)
2229             if (src->ff.light[i].Type != NINED3DLIGHT_INVALID)
2230                 nine_context_set_light(device, i, &src->ff.light[i]);
2231 
2232         nine_context_light_enable_stateblock(device, src->ff.active_light, src->ff.num_lights_active);
2233     }
2234     if (src->changed.group & NINE_STATE_FF_VSTRANSF) {
2235         for (i = 0; i < ARRAY_SIZE(src->ff.changed.transform); ++i) {
2236             unsigned s;
2237             if (!src->ff.changed.transform[i])
2238                 continue;
2239             for (s = i * 32; s < (i * 32 + 32); ++s) {
2240                 if (!(src->ff.changed.transform[i] & (1 << (s % 32))))
2241                     continue;
2242                 /* MaxVertexBlendMatrixIndex is 8, which means
2243                  * we don't read past index D3DTS_WORLDMATRIX(8).
2244                  * swvp is supposed to allow all 256, but we don't
2245                  * implement it for now. */
2246                 if (s > D3DTS_WORLDMATRIX(8))
2247                     break;
2248                 nine_context_set_transform(device, s,
2249                                            nine_state_access_transform(
2250                                                (struct nine_ff_state *)&src->ff,
2251                                                                        s, false));
2252             }
2253         }
2254     }
2255 }
2256 
2257 static void
nine_update_state_framebuffer_clear(struct NineDevice9 * device)2258 nine_update_state_framebuffer_clear(struct NineDevice9 *device)
2259 {
2260     struct nine_context *context = &device->context;
2261 
2262     if (context->changed.group & NINE_STATE_FB)
2263         update_framebuffer(device, true);
2264 }
2265 
CSMT_ITEM_NO_WAIT(nine_context_clear_fb,ARG_VAL (DWORD,Count),ARG_COPY_REF (D3DRECT,pRects),ARG_VAL (DWORD,Flags),ARG_VAL (D3DCOLOR,Color),ARG_VAL (float,Z),ARG_VAL (DWORD,Stencil))2266 CSMT_ITEM_NO_WAIT(nine_context_clear_fb,
2267                   ARG_VAL(DWORD, Count),
2268                   ARG_COPY_REF(D3DRECT, pRects),
2269                   ARG_VAL(DWORD, Flags),
2270                   ARG_VAL(D3DCOLOR, Color),
2271                   ARG_VAL(float, Z),
2272                   ARG_VAL(DWORD, Stencil))
2273 {
2274     struct nine_context *context = &device->context;
2275     const int sRGB = context->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
2276     struct pipe_surface *cbuf, *zsbuf;
2277     struct pipe_context *pipe = context->pipe;
2278     struct NineSurface9 *zsbuf_surf = context->ds;
2279     struct NineSurface9 *rt;
2280     unsigned bufs = 0;
2281     unsigned r, i;
2282     union pipe_color_union rgba;
2283     unsigned rt_mask = 0;
2284     D3DRECT rect;
2285 
2286     nine_update_state_framebuffer_clear(device);
2287 
2288     if (Flags & D3DCLEAR_TARGET) bufs |= PIPE_CLEAR_COLOR;
2289     /* Ignore Z buffer if not bound */
2290     if (context->pipe_data.fb.zsbuf != NULL) {
2291         if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
2292         if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
2293     }
2294     if (!bufs)
2295         return;
2296     d3dcolor_to_pipe_color_union(&rgba, Color);
2297 
2298     rect.x1 = context->viewport.X;
2299     rect.y1 = context->viewport.Y;
2300     rect.x2 = context->viewport.Width + rect.x1;
2301     rect.y2 = context->viewport.Height + rect.y1;
2302 
2303     /* Both rectangles apply, which is weird, but that's D3D9. */
2304     if (context->rs[D3DRS_SCISSORTESTENABLE]) {
2305         rect.x1 = MAX2(rect.x1, context->scissor.minx);
2306         rect.y1 = MAX2(rect.y1, context->scissor.miny);
2307         rect.x2 = MIN2(rect.x2, context->scissor.maxx);
2308         rect.y2 = MIN2(rect.y2, context->scissor.maxy);
2309     }
2310 
2311     if (Count) {
2312         /* Maybe apps like to specify a large rect ? */
2313         if (pRects[0].x1 <= rect.x1 && pRects[0].x2 >= rect.x2 &&
2314             pRects[0].y1 <= rect.y1 && pRects[0].y2 >= rect.y2) {
2315             DBG("First rect covers viewport.\n");
2316             Count = 0;
2317             pRects = NULL;
2318         }
2319     }
2320 
2321     if (rect.x1 >= context->pipe_data.fb.width || rect.y1 >= context->pipe_data.fb.height)
2322         return;
2323 
2324     for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
2325         if (context->rt[i] && context->rt[i]->desc.Format != D3DFMT_NULL)
2326             rt_mask |= 1 << i;
2327     }
2328 
2329     /* fast path, clears everything at once */
2330     if (!Count &&
2331         (!(bufs & PIPE_CLEAR_COLOR) || (rt_mask == context->rt_mask)) &&
2332         rect.x1 == 0 && rect.y1 == 0 &&
2333         /* Case we clear only render target. Check clear region vs rt. */
2334         ((!(bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
2335          rect.x2 >= context->pipe_data.fb.width &&
2336          rect.y2 >= context->pipe_data.fb.height) ||
2337         /* Case we clear depth buffer (and eventually rt too).
2338          * depth buffer size is always >= rt size. Compare to clear region */
2339         ((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
2340          rect.x2 >= zsbuf_surf->desc.Width &&
2341          rect.y2 >= zsbuf_surf->desc.Height))) {
2342         DBG("Clear fast path\n");
2343         pipe->clear(pipe, bufs, NULL, &rgba, Z, Stencil);
2344         return;
2345     }
2346 
2347     if (!Count) {
2348         Count = 1;
2349         pRects = &rect;
2350     }
2351 
2352     for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
2353         rt = context->rt[i];
2354         if (!rt || rt->desc.Format == D3DFMT_NULL ||
2355             !(bufs & PIPE_CLEAR_COLOR))
2356             continue; /* save space, compiler should hoist this */
2357         cbuf = NineSurface9_GetSurface(rt, sRGB);
2358         for (r = 0; r < Count; ++r) {
2359             /* Don't trust users to pass these in the right order. */
2360             unsigned x1 = MIN2(pRects[r].x1, pRects[r].x2);
2361             unsigned y1 = MIN2(pRects[r].y1, pRects[r].y2);
2362             unsigned x2 = MAX2(pRects[r].x1, pRects[r].x2);
2363             unsigned y2 = MAX2(pRects[r].y1, pRects[r].y2);
2364 #ifndef NINE_LAX
2365             /* Drop negative rectangles (like wine expects). */
2366             if (pRects[r].x1 > pRects[r].x2) continue;
2367             if (pRects[r].y1 > pRects[r].y2) continue;
2368 #endif
2369 
2370             x1 = MAX2(x1, rect.x1);
2371             y1 = MAX2(y1, rect.y1);
2372             x2 = MIN3(x2, rect.x2, rt->desc.Width);
2373             y2 = MIN3(y2, rect.y2, rt->desc.Height);
2374 
2375             DBG("Clearing (%u..%u)x(%u..%u)\n", x1, x2, y1, y2);
2376             pipe->clear_render_target(pipe, cbuf, &rgba,
2377                                       x1, y1, x2 - x1, y2 - y1, false);
2378         }
2379     }
2380     if (!(bufs & PIPE_CLEAR_DEPTHSTENCIL))
2381         return;
2382 
2383     bufs &= PIPE_CLEAR_DEPTHSTENCIL;
2384 
2385     for (r = 0; r < Count; ++r) {
2386         unsigned x1 = MIN2(pRects[r].x1, pRects[r].x2);
2387         unsigned y1 = MIN2(pRects[r].y1, pRects[r].y2);
2388         unsigned x2 = MAX2(pRects[r].x1, pRects[r].x2);
2389         unsigned y2 = MAX2(pRects[r].y1, pRects[r].y2);
2390 #ifndef NINE_LAX
2391         /* Drop negative rectangles. */
2392         if (pRects[r].x1 > pRects[r].x2) continue;
2393         if (pRects[r].y1 > pRects[r].y2) continue;
2394 #endif
2395 
2396         x1 = MIN2(x1, rect.x1);
2397         y1 = MIN2(y1, rect.y1);
2398         x2 = MIN3(x2, rect.x2, zsbuf_surf->desc.Width);
2399         y2 = MIN3(y2, rect.y2, zsbuf_surf->desc.Height);
2400 
2401         zsbuf = NineSurface9_GetSurface(zsbuf_surf, 0);
2402         assert(zsbuf);
2403         pipe->clear_depth_stencil(pipe, zsbuf, bufs, Z, Stencil,
2404                                   x1, y1, x2 - x1, y2 - y1, false);
2405     }
2406     return;
2407 }
2408 
2409 
2410 static inline void
init_draw_info(struct pipe_draw_info * info,struct pipe_draw_start_count_bias * draw,struct NineDevice9 * dev,D3DPRIMITIVETYPE type,UINT count)2411 init_draw_info(struct pipe_draw_info *info,
2412                struct pipe_draw_start_count_bias *draw,
2413                struct NineDevice9 *dev, D3DPRIMITIVETYPE type, UINT count)
2414 {
2415     info->mode = d3dprimitivetype_to_pipe_prim(type);
2416     draw->count = prim_count_to_vertex_count(type, count);
2417     info->start_instance = 0;
2418     info->instance_count = 1;
2419     if (dev->context.stream_instancedata_mask & dev->context.stream_usage_mask)
2420         info->instance_count = MAX2(dev->context.stream_freq[0] & 0x7FFFFF, 1);
2421     info->primitive_restart = false;
2422     info->has_user_indices = false;
2423     info->take_index_buffer_ownership = false;
2424     info->index_bias_varies = false;
2425     info->increment_draw_id = false;
2426     info->was_line_loop = false;
2427     info->restart_index = 0;
2428     info->view_mask = 0;
2429 }
2430 
CSMT_ITEM_NO_WAIT(nine_context_draw_primitive,ARG_VAL (D3DPRIMITIVETYPE,PrimitiveType),ARG_VAL (UINT,StartVertex),ARG_VAL (UINT,PrimitiveCount))2431 CSMT_ITEM_NO_WAIT(nine_context_draw_primitive,
2432                   ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
2433                   ARG_VAL(UINT, StartVertex),
2434                   ARG_VAL(UINT, PrimitiveCount))
2435 {
2436     struct nine_context *context = &device->context;
2437     struct pipe_draw_info info;
2438     struct pipe_draw_start_count_bias draw;
2439 
2440     if (context->vs && context->vs->swvp_only && !context->swvp)
2441         return;
2442 
2443     nine_update_state(device);
2444 
2445     init_draw_info(&info, &draw, device, PrimitiveType, PrimitiveCount);
2446     info.index_size = 0;
2447     draw.start = StartVertex;
2448     draw.index_bias = 0;
2449     info.min_index = draw.start;
2450     info.max_index = draw.start + draw.count - 1;
2451     info.index.resource = NULL;
2452 
2453     context->pipe->draw_vbo(context->pipe, &info, 0, NULL, &draw, 1);
2454 }
2455 
CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive,ARG_VAL (D3DPRIMITIVETYPE,PrimitiveType),ARG_VAL (INT,BaseVertexIndex),ARG_VAL (UINT,MinVertexIndex),ARG_VAL (UINT,NumVertices),ARG_VAL (UINT,StartIndex),ARG_VAL (UINT,PrimitiveCount))2456 CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive,
2457                   ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
2458                   ARG_VAL(INT, BaseVertexIndex),
2459                   ARG_VAL(UINT, MinVertexIndex),
2460                   ARG_VAL(UINT, NumVertices),
2461                   ARG_VAL(UINT, StartIndex),
2462                   ARG_VAL(UINT, PrimitiveCount))
2463 {
2464     struct nine_context *context = &device->context;
2465     struct pipe_draw_info info;
2466     struct pipe_draw_start_count_bias draw;
2467 
2468     if (context->vs && context->vs->swvp_only && !context->swvp)
2469         return;
2470 
2471     nine_update_state(device);
2472 
2473     init_draw_info(&info, &draw, device, PrimitiveType, PrimitiveCount);
2474     info.index_size = context->index_size;
2475     draw.start = context->index_offset / context->index_size + StartIndex;
2476     draw.index_bias = BaseVertexIndex;
2477     info.index_bounds_valid = true;
2478     /* These don't include index bias: */
2479     info.min_index = MinVertexIndex;
2480     info.max_index = MinVertexIndex + NumVertices - 1;
2481     info.index.resource = context->idxbuf;
2482 
2483     context->pipe->draw_vbo(context->pipe, &info, 0, NULL, &draw, 1);
2484 }
2485 
CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf,ARG_VAL (D3DPRIMITIVETYPE,PrimitiveType),ARG_VAL (UINT,MinVertexIndex),ARG_VAL (UINT,NumVertices),ARG_VAL (UINT,PrimitiveCount),ARG_VAL (UINT,vbuf_stride),ARG_BIND_VBUF (struct pipe_vertex_buffer,vbuf),ARG_BIND_RES (struct pipe_resource,ibuf),ARG_VAL (void *,user_ibuf),ARG_VAL (UINT,index_offset),ARG_VAL (UINT,index_size))2486 CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf,
2487                   ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
2488                   ARG_VAL(UINT, MinVertexIndex),
2489                   ARG_VAL(UINT, NumVertices),
2490                   ARG_VAL(UINT, PrimitiveCount),
2491                   ARG_VAL(UINT, vbuf_stride),
2492                   ARG_BIND_VBUF(struct pipe_vertex_buffer, vbuf),
2493                   ARG_BIND_RES(struct pipe_resource, ibuf),
2494                   ARG_VAL(void *, user_ibuf),
2495                   ARG_VAL(UINT, index_offset),
2496                   ARG_VAL(UINT, index_size))
2497 {
2498     struct nine_context *context = &device->context;
2499     struct pipe_draw_info info;
2500     struct pipe_draw_start_count_bias draw;
2501 
2502     if (context->vs && context->vs->swvp_only && !context->swvp)
2503         return;
2504 
2505     if (context->vtxstride[0] != vbuf_stride) {
2506         context->vtxstride[0] = vbuf_stride;
2507         /* force elements update for stride.
2508          * We don't need to restore the old value,
2509          * as the caller set its to 0 after the call */
2510         context->changed.group |= NINE_STATE_VDECL;
2511     }
2512 
2513     nine_update_state(device);
2514 
2515     init_draw_info(&info, &draw, device, PrimitiveType, PrimitiveCount);
2516     info.index_size = index_size;
2517     draw.start = index_offset / info.index_size;
2518     draw.index_bias = 0;
2519     info.index_bounds_valid = true;
2520     info.min_index = MinVertexIndex;
2521     info.max_index = MinVertexIndex + NumVertices - 1;
2522     info.has_user_indices = ibuf == NULL;
2523     if (ibuf)
2524         info.index.resource = ibuf;
2525     else
2526         info.index.user = user_ibuf;
2527 
2528     util_set_vertex_buffers(context->pipe, 1, false, vbuf);
2529     context->changed.vtxbuf |= 1;
2530 
2531     context->pipe->draw_vbo(context->pipe, &info, 0, NULL, &draw, 1);
2532 }
2533 
CSMT_ITEM_NO_WAIT(nine_context_resource_copy_region,ARG_BIND_REF (struct NineUnknown,dst),ARG_BIND_REF (struct NineUnknown,src),ARG_BIND_RES (struct pipe_resource,dst_res),ARG_VAL (unsigned,dst_level),ARG_COPY_REF (struct pipe_box,dst_box),ARG_BIND_RES (struct pipe_resource,src_res),ARG_VAL (unsigned,src_level),ARG_COPY_REF (struct pipe_box,src_box))2534 CSMT_ITEM_NO_WAIT(nine_context_resource_copy_region,
2535                   ARG_BIND_REF(struct NineUnknown, dst),
2536                   ARG_BIND_REF(struct NineUnknown, src),
2537                   ARG_BIND_RES(struct pipe_resource, dst_res),
2538                   ARG_VAL(unsigned, dst_level),
2539                   ARG_COPY_REF(struct pipe_box, dst_box),
2540                   ARG_BIND_RES(struct pipe_resource, src_res),
2541                   ARG_VAL(unsigned, src_level),
2542                   ARG_COPY_REF(struct pipe_box, src_box))
2543 {
2544     struct nine_context *context = &device->context;
2545 
2546     (void) dst;
2547     (void) src;
2548 
2549     context->pipe->resource_copy_region(context->pipe,
2550             dst_res, dst_level,
2551             dst_box->x, dst_box->y, dst_box->z,
2552             src_res, src_level,
2553             src_box);
2554 }
2555 
CSMT_ITEM_NO_WAIT(nine_context_blit,ARG_BIND_REF (struct NineUnknown,dst),ARG_BIND_REF (struct NineUnknown,src),ARG_BIND_BLIT (struct pipe_blit_info,blit))2556 CSMT_ITEM_NO_WAIT(nine_context_blit,
2557                   ARG_BIND_REF(struct NineUnknown, dst),
2558                   ARG_BIND_REF(struct NineUnknown, src),
2559                   ARG_BIND_BLIT(struct pipe_blit_info, blit))
2560 {
2561     struct nine_context *context = &device->context;
2562 
2563     (void) dst;
2564     (void) src;
2565 
2566     context->pipe->blit(context->pipe, blit);
2567 }
2568 
CSMT_ITEM_NO_WAIT(nine_context_clear_render_target,ARG_BIND_REF (struct NineSurface9,surface),ARG_VAL (D3DCOLOR,color),ARG_VAL (UINT,x),ARG_VAL (UINT,y),ARG_VAL (UINT,width),ARG_VAL (UINT,height))2569 CSMT_ITEM_NO_WAIT(nine_context_clear_render_target,
2570                   ARG_BIND_REF(struct NineSurface9, surface),
2571                   ARG_VAL(D3DCOLOR, color),
2572                   ARG_VAL(UINT, x),
2573                   ARG_VAL(UINT, y),
2574                   ARG_VAL(UINT, width),
2575                   ARG_VAL(UINT, height))
2576 {
2577     struct nine_context *context = &device->context;
2578     struct pipe_surface *surf;
2579     union pipe_color_union rgba;
2580 
2581     d3dcolor_to_pipe_color_union(&rgba, color);
2582     surf = NineSurface9_GetSurface(surface, 0);
2583     context->pipe->clear_render_target(context->pipe, surf, &rgba, x, y, width, height, false);
2584 }
2585 
CSMT_ITEM_NO_WAIT(nine_context_gen_mipmap,ARG_BIND_REF (struct NineUnknown,dst),ARG_BIND_RES (struct pipe_resource,res),ARG_VAL (UINT,base_level),ARG_VAL (UINT,last_level),ARG_VAL (UINT,first_layer),ARG_VAL (UINT,last_layer),ARG_VAL (UINT,filter))2586 CSMT_ITEM_NO_WAIT(nine_context_gen_mipmap,
2587                   ARG_BIND_REF(struct NineUnknown, dst),
2588                   ARG_BIND_RES(struct pipe_resource, res),
2589                   ARG_VAL(UINT, base_level),
2590                   ARG_VAL(UINT, last_level),
2591                   ARG_VAL(UINT, first_layer),
2592                   ARG_VAL(UINT, last_layer),
2593                   ARG_VAL(UINT, filter))
2594 {
2595     struct nine_context *context = &device->context;
2596 
2597     /* We just bind dst for the bind count */
2598     (void)dst;
2599 
2600     util_gen_mipmap(context->pipe, res, res->format, base_level,
2601                     last_level, first_layer, last_layer, filter);
2602 }
2603 
CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload,ARG_BIND_REF (struct NineUnknown,src_ref),ARG_BIND_RES (struct pipe_resource,res),ARG_VAL (unsigned,offset),ARG_VAL (unsigned,size),ARG_VAL (unsigned,usage),ARG_VAL (const void *,data))2604 CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload,
2605                                ARG_BIND_REF(struct NineUnknown, src_ref),
2606                                ARG_BIND_RES(struct pipe_resource, res),
2607                                ARG_VAL(unsigned, offset),
2608                                ARG_VAL(unsigned, size),
2609                                ARG_VAL(unsigned, usage),
2610                                ARG_VAL(const void *, data))
2611 {
2612     struct nine_context *context = &device->context;
2613 
2614     /* Binding src_ref avoids release before upload */
2615     (void)src_ref;
2616 
2617     context->pipe->buffer_subdata(context->pipe, res, usage, offset, size, data);
2618 }
2619 
CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_box_upload,ARG_BIND_REF (struct NineUnknown,src_ref),ARG_BIND_RES (struct pipe_resource,res),ARG_VAL (unsigned,level),ARG_COPY_REF (struct pipe_box,dst_box),ARG_VAL (enum pipe_format,src_format),ARG_VAL (const void *,src),ARG_VAL (unsigned,src_stride),ARG_VAL (unsigned,src_layer_stride),ARG_COPY_REF (struct pipe_box,src_box))2620 CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_box_upload,
2621                                ARG_BIND_REF(struct NineUnknown, src_ref),
2622                                ARG_BIND_RES(struct pipe_resource, res),
2623                                ARG_VAL(unsigned, level),
2624                                ARG_COPY_REF(struct pipe_box, dst_box),
2625                                ARG_VAL(enum pipe_format, src_format),
2626                                ARG_VAL(const void *, src),
2627                                ARG_VAL(unsigned, src_stride),
2628                                ARG_VAL(unsigned, src_layer_stride),
2629                                ARG_COPY_REF(struct pipe_box, src_box))
2630 {
2631     struct nine_context *context = &device->context;
2632     struct pipe_context *pipe = context->pipe;
2633     struct pipe_transfer *transfer = NULL;
2634     uint8_t *map;
2635 
2636     /* Binding src_ref avoids release before upload */
2637     (void)src_ref;
2638 
2639     if (is_ATI1_ATI2(src_format)) {
2640         const unsigned bw = util_format_get_blockwidth(src_format);
2641         const unsigned bh = util_format_get_blockheight(src_format);
2642         /* For these formats, the allocate surface can be too small to contain
2643          * a block. Yet we can be asked to upload such surfaces.
2644          * It is ok for these surfaces to have buggy content,
2645          * but we should avoid crashing.
2646          * Calling util_format_translate_3d would read out of bounds. */
2647         if (dst_box->width < bw || dst_box->height < bh)
2648             return;
2649     }
2650 
2651     map = pipe->texture_map(pipe,
2652                              res,
2653                              level,
2654                              PIPE_MAP_WRITE | PIPE_MAP_DISCARD_RANGE,
2655                              dst_box, &transfer);
2656     if (!map)
2657         return;
2658 
2659     /* Note: if formats are the sames, it will revert
2660      * to normal memcpy */
2661     (void) util_format_translate_3d(res->format,
2662                                     map, transfer->stride,
2663                                     transfer->layer_stride,
2664                                     0, 0, 0,
2665                                     src_format,
2666                                     src, src_stride,
2667                                     src_layer_stride,
2668                                     src_box->x, src_box->y, src_box->z,
2669                                     dst_box->width, dst_box->height,
2670                                     dst_box->depth);
2671 
2672     pipe_texture_unmap(pipe, transfer);
2673 }
2674 
2675 struct pipe_query *
nine_context_create_query(struct NineDevice9 * device,unsigned query_type)2676 nine_context_create_query(struct NineDevice9 *device, unsigned query_type)
2677 {
2678     struct pipe_context *pipe;
2679     struct pipe_query *res;
2680 
2681     pipe = nine_context_get_pipe_acquire(device);
2682     res = pipe->create_query(pipe, query_type, 0);
2683     nine_context_get_pipe_release(device);
2684     return res;
2685 }
2686 
CSMT_ITEM_DO_WAIT(nine_context_destroy_query,ARG_REF (struct pipe_query,query))2687 CSMT_ITEM_DO_WAIT(nine_context_destroy_query,
2688                   ARG_REF(struct pipe_query, query))
2689 {
2690     struct nine_context *context = &device->context;
2691 
2692     context->pipe->destroy_query(context->pipe, query);
2693 }
2694 
CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_begin_query,ARG_REF (struct pipe_query,query))2695 CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_begin_query,
2696                                ARG_REF(struct pipe_query, query))
2697 {
2698     struct nine_context *context = &device->context;
2699 
2700     (void) context->pipe->begin_query(context->pipe, query);
2701 }
2702 
CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_end_query,ARG_REF (struct pipe_query,query))2703 CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_end_query,
2704                                ARG_REF(struct pipe_query, query))
2705 {
2706     struct nine_context *context = &device->context;
2707 
2708     (void) context->pipe->end_query(context->pipe, query);
2709 }
2710 
2711 bool
nine_context_get_query_result(struct NineDevice9 * device,struct pipe_query * query,unsigned * counter,bool flush,bool wait,union pipe_query_result * result)2712 nine_context_get_query_result(struct NineDevice9 *device, struct pipe_query *query,
2713                               unsigned *counter, bool flush, bool wait,
2714                               union pipe_query_result *result)
2715 {
2716     struct pipe_context *pipe;
2717     bool ret;
2718 
2719     if (wait)
2720         nine_csmt_process(device);
2721     else if (p_atomic_read(counter) > 0) {
2722         if (flush && device->csmt_active)
2723             nine_queue_flush(device->csmt_ctx->pool);
2724         DBG("Pending begin/end. Returning\n");
2725         return false;
2726     }
2727 
2728     pipe = nine_context_get_pipe_acquire(device);
2729     ret = pipe->get_query_result(pipe, query, wait, result);
2730     nine_context_get_pipe_release(device);
2731 
2732     DBG("Query result %s\n", ret ? "found" : "not yet available");
2733     return ret;
2734 }
2735 
CSMT_ITEM_NO_WAIT(nine_context_pipe_flush)2736 CSMT_ITEM_NO_WAIT(nine_context_pipe_flush)
2737 {
2738     struct nine_context *context = &device->context;
2739 
2740     context->pipe->flush(context->pipe, NULL, PIPE_FLUSH_ASYNC);
2741 }
2742 
2743 /* State defaults */
2744 
2745 static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] =
2746 {
2747  /* [D3DRS_ZENABLE] = D3DZB_TRUE; wine: auto_depth_stencil */
2748     [D3DRS_ZENABLE] = D3DZB_FALSE,
2749     [D3DRS_FILLMODE] = D3DFILL_SOLID,
2750     [D3DRS_SHADEMODE] = D3DSHADE_GOURAUD,
2751 /*  [D3DRS_LINEPATTERN] = 0x00000000, */
2752     [D3DRS_ZWRITEENABLE] = true,
2753     [D3DRS_ALPHATESTENABLE] = false,
2754     [D3DRS_LASTPIXEL] = true,
2755     [D3DRS_SRCBLEND] = D3DBLEND_ONE,
2756     [D3DRS_DESTBLEND] = D3DBLEND_ZERO,
2757     [D3DRS_CULLMODE] = D3DCULL_CCW,
2758     [D3DRS_ZFUNC] = D3DCMP_LESSEQUAL,
2759     [D3DRS_ALPHAFUNC] = D3DCMP_ALWAYS,
2760     [D3DRS_ALPHAREF] = 0,
2761     [D3DRS_DITHERENABLE] = false,
2762     [D3DRS_ALPHABLENDENABLE] = false,
2763     [D3DRS_FOGENABLE] = false,
2764     [D3DRS_SPECULARENABLE] = false,
2765 /*  [D3DRS_ZVISIBLE] = 0, */
2766     [D3DRS_FOGCOLOR] = 0,
2767     [D3DRS_FOGTABLEMODE] = D3DFOG_NONE,
2768     [D3DRS_FOGSTART] = 0x00000000,
2769     [D3DRS_FOGEND] = 0x3F800000,
2770     [D3DRS_FOGDENSITY] = 0x3F800000,
2771 /*  [D3DRS_EDGEANTIALIAS] = FALSE, */
2772     [D3DRS_RANGEFOGENABLE] = false,
2773     [D3DRS_STENCILENABLE] = false,
2774     [D3DRS_STENCILFAIL] = D3DSTENCILOP_KEEP,
2775     [D3DRS_STENCILZFAIL] = D3DSTENCILOP_KEEP,
2776     [D3DRS_STENCILPASS] = D3DSTENCILOP_KEEP,
2777     [D3DRS_STENCILREF] = 0,
2778     [D3DRS_STENCILMASK] = 0xFFFFFFFF,
2779     [D3DRS_STENCILFUNC] = D3DCMP_ALWAYS,
2780     [D3DRS_STENCILWRITEMASK] = 0xFFFFFFFF,
2781     [D3DRS_TEXTUREFACTOR] = 0xFFFFFFFF,
2782     [D3DRS_WRAP0] = 0,
2783     [D3DRS_WRAP1] = 0,
2784     [D3DRS_WRAP2] = 0,
2785     [D3DRS_WRAP3] = 0,
2786     [D3DRS_WRAP4] = 0,
2787     [D3DRS_WRAP5] = 0,
2788     [D3DRS_WRAP6] = 0,
2789     [D3DRS_WRAP7] = 0,
2790     [D3DRS_CLIPPING] = true,
2791     [D3DRS_LIGHTING] = true,
2792     [D3DRS_AMBIENT] = 0,
2793     [D3DRS_FOGVERTEXMODE] = D3DFOG_NONE,
2794     [D3DRS_COLORVERTEX] = true,
2795     [D3DRS_LOCALVIEWER] = true,
2796     [D3DRS_NORMALIZENORMALS] = false,
2797     [D3DRS_DIFFUSEMATERIALSOURCE] = D3DMCS_COLOR1,
2798     [D3DRS_SPECULARMATERIALSOURCE] = D3DMCS_COLOR2,
2799     [D3DRS_AMBIENTMATERIALSOURCE] = D3DMCS_MATERIAL,
2800     [D3DRS_EMISSIVEMATERIALSOURCE] = D3DMCS_MATERIAL,
2801     [D3DRS_VERTEXBLEND] = D3DVBF_DISABLE,
2802     [D3DRS_CLIPPLANEENABLE] = 0,
2803 /*  [D3DRS_SOFTWAREVERTEXPROCESSING] = FALSE, */
2804     [D3DRS_POINTSIZE] = 0x3F800000,
2805     [D3DRS_POINTSIZE_MIN] = 0x3F800000,
2806     [D3DRS_POINTSPRITEENABLE] = false,
2807     [D3DRS_POINTSCALEENABLE] = false,
2808     [D3DRS_POINTSCALE_A] = 0x3F800000,
2809     [D3DRS_POINTSCALE_B] = 0x00000000,
2810     [D3DRS_POINTSCALE_C] = 0x00000000,
2811     [D3DRS_MULTISAMPLEANTIALIAS] = true,
2812     [D3DRS_MULTISAMPLEMASK] = 0xFFFFFFFF,
2813     [D3DRS_PATCHEDGESTYLE] = D3DPATCHEDGE_DISCRETE,
2814 /*  [D3DRS_PATCHSEGMENTS] = 0x3F800000, */
2815     [D3DRS_DEBUGMONITORTOKEN] = 0xDEADCAFE,
2816     [D3DRS_POINTSIZE_MAX] = 0x3F800000, /* depends on cap */
2817     [D3DRS_INDEXEDVERTEXBLENDENABLE] = false,
2818     [D3DRS_COLORWRITEENABLE] = 0x0000000f,
2819     [D3DRS_TWEENFACTOR] = 0x00000000,
2820     [D3DRS_BLENDOP] = D3DBLENDOP_ADD,
2821     [D3DRS_POSITIONDEGREE] = D3DDEGREE_CUBIC,
2822     [D3DRS_NORMALDEGREE] = D3DDEGREE_LINEAR,
2823     [D3DRS_SCISSORTESTENABLE] = false,
2824     [D3DRS_SLOPESCALEDEPTHBIAS] = 0,
2825     [D3DRS_MINTESSELLATIONLEVEL] = 0x3F800000,
2826     [D3DRS_MAXTESSELLATIONLEVEL] = 0x3F800000,
2827     [D3DRS_ANTIALIASEDLINEENABLE] = false,
2828     [D3DRS_ADAPTIVETESS_X] = 0x00000000,
2829     [D3DRS_ADAPTIVETESS_Y] = 0x00000000,
2830     [D3DRS_ADAPTIVETESS_Z] = 0x3F800000,
2831     [D3DRS_ADAPTIVETESS_W] = 0x00000000,
2832     [D3DRS_ENABLEADAPTIVETESSELLATION] = false,
2833     [D3DRS_TWOSIDEDSTENCILMODE] = false,
2834     [D3DRS_CCW_STENCILFAIL] = D3DSTENCILOP_KEEP,
2835     [D3DRS_CCW_STENCILZFAIL] = D3DSTENCILOP_KEEP,
2836     [D3DRS_CCW_STENCILPASS] = D3DSTENCILOP_KEEP,
2837     [D3DRS_CCW_STENCILFUNC] = D3DCMP_ALWAYS,
2838     [D3DRS_COLORWRITEENABLE1] = 0x0000000F,
2839     [D3DRS_COLORWRITEENABLE2] = 0x0000000F,
2840     [D3DRS_COLORWRITEENABLE3] = 0x0000000F,
2841     [D3DRS_BLENDFACTOR] = 0xFFFFFFFF,
2842     [D3DRS_SRGBWRITEENABLE] = 0,
2843     [D3DRS_DEPTHBIAS] = 0,
2844     [D3DRS_WRAP8] = 0,
2845     [D3DRS_WRAP9] = 0,
2846     [D3DRS_WRAP10] = 0,
2847     [D3DRS_WRAP11] = 0,
2848     [D3DRS_WRAP12] = 0,
2849     [D3DRS_WRAP13] = 0,
2850     [D3DRS_WRAP14] = 0,
2851     [D3DRS_WRAP15] = 0,
2852     [D3DRS_SEPARATEALPHABLENDENABLE] = false,
2853     [D3DRS_SRCBLENDALPHA] = D3DBLEND_ONE,
2854     [D3DRS_DESTBLENDALPHA] = D3DBLEND_ZERO,
2855     [D3DRS_BLENDOPALPHA] = D3DBLENDOP_ADD,
2856     [NINED3DRS_VSPOINTSIZE] = false,
2857     [NINED3DRS_RTMASK] = 0xf,
2858     [NINED3DRS_ALPHACOVERAGE] = false,
2859     [NINED3DRS_MULTISAMPLE] = false,
2860     [NINED3DRS_FETCH4] = 0,
2861     [NINED3DRS_EMULATED_ALPHATEST] = 7 /* ALWAYS pass */
2862 };
2863 static const DWORD nine_tex_stage_state_defaults[NINED3DTSS_LAST + 1] =
2864 {
2865     [D3DTSS_COLOROP] = D3DTOP_DISABLE,
2866     [D3DTSS_ALPHAOP] = D3DTOP_DISABLE,
2867     [D3DTSS_COLORARG1] = D3DTA_TEXTURE,
2868     [D3DTSS_COLORARG2] = D3DTA_CURRENT,
2869     [D3DTSS_COLORARG0] = D3DTA_CURRENT,
2870     [D3DTSS_ALPHAARG1] = D3DTA_TEXTURE,
2871     [D3DTSS_ALPHAARG2] = D3DTA_CURRENT,
2872     [D3DTSS_ALPHAARG0] = D3DTA_CURRENT,
2873     [D3DTSS_RESULTARG] = D3DTA_CURRENT,
2874     [D3DTSS_BUMPENVMAT00] = 0,
2875     [D3DTSS_BUMPENVMAT01] = 0,
2876     [D3DTSS_BUMPENVMAT10] = 0,
2877     [D3DTSS_BUMPENVMAT11] = 0,
2878     [D3DTSS_BUMPENVLSCALE] = 0,
2879     [D3DTSS_BUMPENVLOFFSET] = 0,
2880     [D3DTSS_TEXCOORDINDEX] = 0,
2881     [D3DTSS_TEXTURETRANSFORMFLAGS] = D3DTTFF_DISABLE,
2882 };
2883 static const DWORD nine_samp_state_defaults[NINED3DSAMP_LAST + 1] =
2884 {
2885     [D3DSAMP_ADDRESSU] = D3DTADDRESS_WRAP,
2886     [D3DSAMP_ADDRESSV] = D3DTADDRESS_WRAP,
2887     [D3DSAMP_ADDRESSW] = D3DTADDRESS_WRAP,
2888     [D3DSAMP_BORDERCOLOR] = 0,
2889     [D3DSAMP_MAGFILTER] = D3DTEXF_POINT,
2890     [D3DSAMP_MINFILTER] = D3DTEXF_POINT,
2891     [D3DSAMP_MIPFILTER] = D3DTEXF_NONE,
2892     [D3DSAMP_MIPMAPLODBIAS] = 0,
2893     [D3DSAMP_MAXMIPLEVEL] = 0,
2894     [D3DSAMP_MAXANISOTROPY] = 1,
2895     [D3DSAMP_SRGBTEXTURE] = 0,
2896     [D3DSAMP_ELEMENTINDEX] = 0,
2897     [D3DSAMP_DMAPOFFSET] = 0,
2898     [NINED3DSAMP_MINLOD] = 0,
2899     [NINED3DSAMP_SHADOW] = 0,
2900     [NINED3DSAMP_CUBETEX] = 0
2901 };
2902 
2903 /* Note: The following 4 functions assume there is no
2904  * pending commands */
2905 
nine_state_restore_non_cso(struct NineDevice9 * device)2906 void nine_state_restore_non_cso(struct NineDevice9 *device)
2907 {
2908     struct nine_context *context = &device->context;
2909 
2910     context->changed.group = NINE_STATE_ALL; /* TODO: we can remove states that have prepared commits */
2911     context->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
2912     context->changed.ucp = true;
2913     context->commit |= 0xffffffff; /* re-commit everything */
2914     context->enabled_sampler_count_vs = 0;
2915     context->enabled_sampler_count_ps = 0;
2916 }
2917 
2918 void
nine_state_set_defaults(struct NineDevice9 * device,const D3DCAPS9 * caps,bool is_reset)2919 nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
2920                         bool is_reset)
2921 {
2922     struct nine_state *state = &device->state;
2923     struct nine_context *context = &device->context;
2924     unsigned s;
2925 
2926     /* Initialize defaults.
2927      */
2928     memcpy(context->rs, nine_render_state_defaults, sizeof(context->rs));
2929 
2930     for (s = 0; s < ARRAY_SIZE(state->ff.tex_stage); ++s) {
2931         memcpy(&state->ff.tex_stage[s], nine_tex_stage_state_defaults,
2932                sizeof(state->ff.tex_stage[s]));
2933         state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] = s;
2934     }
2935     state->ff.tex_stage[0][D3DTSS_COLOROP] = D3DTOP_MODULATE;
2936     state->ff.tex_stage[0][D3DTSS_ALPHAOP] = D3DTOP_SELECTARG1;
2937 
2938     for (s = 0; s < ARRAY_SIZE(state->ff.tex_stage); ++s)
2939         memcpy(&context->ff.tex_stage[s], state->ff.tex_stage[s],
2940                sizeof(state->ff.tex_stage[s]));
2941 
2942     memset(&context->bumpmap_vars, 0, sizeof(context->bumpmap_vars));
2943 
2944     for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
2945         memcpy(&context->samp[s], nine_samp_state_defaults,
2946                sizeof(context->samp[s]));
2947         memcpy(&state->samp_advertised[s], nine_samp_state_defaults,
2948                sizeof(state->samp_advertised[s]));
2949     }
2950 
2951     memset(state->vs_const_f, 0, VS_CONST_F_SIZE(device));
2952     memset(context->vs_const_f, 0, device->vs_const_size);
2953     if (context->vs_const_f_swvp)
2954         memset(context->vs_const_f_swvp, 0, NINE_MAX_CONST_F_SWVP * sizeof(float[4]));
2955     memset(state->vs_const_i, 0, VS_CONST_I_SIZE(device));
2956     memset(context->vs_const_i, 0, VS_CONST_I_SIZE(device));
2957     memset(state->vs_const_b, 0, VS_CONST_B_SIZE(device));
2958     memset(context->vs_const_b, 0, VS_CONST_B_SIZE(device));
2959     memset(state->ps_const_f, 0, device->ps_const_size);
2960     memset(context->ps_const_f, 0, device->ps_const_size);
2961     memset(state->ps_const_i, 0, sizeof(state->ps_const_i));
2962     memset(context->ps_const_i, 0, sizeof(context->ps_const_i));
2963     memset(state->ps_const_b, 0, sizeof(state->ps_const_b));
2964     memset(context->ps_const_b, 0, sizeof(context->ps_const_b));
2965     context->zfog = false; /* Guess from wine tests: both true or false are ok */
2966 
2967     /* Cap dependent initial state:
2968      */
2969     context->rs[D3DRS_POINTSIZE_MAX] = fui(caps->MaxPointSize);
2970 
2971     memcpy(state->rs_advertised, context->rs, sizeof(context->rs));
2972 
2973     /* Set changed flags to initialize driver.
2974      */
2975     context->changed.group = NINE_STATE_ALL;
2976     context->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
2977     context->changed.ucp = true;
2978 
2979     context->ff.changed.transform[0] = ~0;
2980     context->ff.changed.transform[D3DTS_WORLD / 32] |= 1 << (D3DTS_WORLD % 32);
2981 
2982     if (!is_reset) {
2983         state->viewport.MinZ = context->viewport.MinZ = 0.0f;
2984         state->viewport.MaxZ = context->viewport.MaxZ = 1.0f;
2985     }
2986 
2987     for (s = 0; s < NINE_MAX_SAMPLERS; ++s)
2988         context->changed.sampler[s] = ~0;
2989 
2990     if (!is_reset)
2991         context->dummy_vbo_bound_at = -1;
2992 }
2993 
2994 void
nine_device_state_clear(struct NineDevice9 * device)2995 nine_device_state_clear(struct NineDevice9 *device)
2996 {
2997     struct nine_state *state = &device->state;
2998     unsigned i;
2999 
3000     for (i = 0; i < ARRAY_SIZE(state->rt); ++i)
3001        nine_bind(&state->rt[i], NULL);
3002     nine_bind(&state->ds, NULL);
3003     nine_bind(&state->vs, NULL);
3004     nine_bind(&state->ps, NULL);
3005     nine_bind(&state->vdecl, NULL);
3006     for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
3007         NineBindBufferToDevice(device,
3008                                (struct NineBuffer9 **)&state->stream[i],
3009                                NULL);
3010     NineBindBufferToDevice(device,
3011                            (struct NineBuffer9 **)&state->idxbuf,
3012                            NULL);
3013 
3014     for (i = 0; i < NINE_MAX_SAMPLERS; ++i)
3015         NineBindTextureToDevice(device, &state->texture[i], NULL);
3016 }
3017 
3018 void
nine_context_clear(struct NineDevice9 * device)3019 nine_context_clear(struct NineDevice9 *device)
3020 {
3021     struct nine_context *context = &device->context;
3022     struct pipe_context *pipe = context->pipe;
3023     struct cso_context *cso = context->cso;
3024     unsigned i;
3025 
3026     /* Early device ctor failure. Nothing to do */
3027     if (!pipe || !cso)
3028         return;
3029 
3030     context->vtxbuf_mask = 0;
3031 
3032     pipe->bind_vs_state(pipe, NULL);
3033     pipe->bind_fs_state(pipe, NULL);
3034 
3035     /* Don't unbind constant buffers, they're device-private and
3036      * do not change on Reset.
3037      */
3038 
3039     cso_set_samplers(cso, PIPE_SHADER_VERTEX, 0, NULL);
3040     cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 0, NULL);
3041     context->enabled_sampler_count_vs = 0;
3042     context->enabled_sampler_count_ps = 0;
3043 
3044     pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, 0,
3045                             NINE_MAX_SAMPLERS_VS, false, NULL);
3046     pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 0,
3047                             NINE_MAX_SAMPLERS_PS, false, NULL);
3048 
3049     pipe->set_vertex_buffers(pipe, 0, NULL);
3050 
3051     for (i = 0; i < ARRAY_SIZE(context->rt); ++i)
3052        nine_bind(&context->rt[i], NULL);
3053     nine_bind(&context->ds, NULL);
3054     nine_bind(&context->vs, NULL);
3055     nine_bind(&context->ps, NULL);
3056     nine_bind(&context->vdecl, NULL);
3057     for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
3058         pipe_vertex_buffer_unreference(&context->vtxbuf[i]);
3059     pipe_resource_reference(&context->idxbuf, NULL);
3060     pipe_resource_reference(&context->pipe_data.cb_vs.buffer, NULL);
3061     pipe_resource_reference(&context->pipe_data.cb_ps.buffer, NULL);
3062 
3063     for (i = 0; i < NINE_MAX_SAMPLERS; ++i) {
3064         context->texture[i].enabled = false;
3065         pipe_resource_reference(&context->texture[i].resource,
3066                                 NULL);
3067         pipe_sampler_view_reference(&context->texture[i].view[0],
3068                                     NULL);
3069         pipe_sampler_view_reference(&context->texture[i].view[1],
3070                                     NULL);
3071     }
3072 }
3073 
3074 void
nine_context_update_state(struct NineDevice9 * device)3075 nine_context_update_state(struct NineDevice9 *device)
3076 {
3077     nine_update_state(device);
3078 }
3079 
3080 void
nine_state_init_sw(struct NineDevice9 * device)3081 nine_state_init_sw(struct NineDevice9 *device)
3082 {
3083     struct pipe_context *pipe_sw = device->pipe_sw;
3084     struct pipe_rasterizer_state rast;
3085     struct pipe_blend_state blend;
3086     struct pipe_depth_stencil_alpha_state dsa;
3087     struct pipe_framebuffer_state fb;
3088 
3089     /* Only used with Streamout */
3090     memset(&rast, 0, sizeof(rast));
3091     rast.rasterizer_discard = true;
3092     rast.point_quad_rasterization = 1; /* to make llvmpipe happy */
3093     cso_set_rasterizer(device->cso_sw, &rast);
3094 
3095     /* dummy settings */
3096     memset(&blend, 0, sizeof(blend));
3097     memset(&dsa, 0, sizeof(dsa));
3098     memset(&fb, 0, sizeof(fb));
3099     cso_set_blend(device->cso_sw, &blend);
3100     cso_set_depth_stencil_alpha(device->cso_sw, &dsa);
3101     cso_set_framebuffer(device->cso_sw, &fb);
3102     cso_set_viewport_dims(device->cso_sw, 1.0, 1.0, false);
3103     cso_set_fragment_shader_handle(device->cso_sw, util_make_empty_fragment_shader(pipe_sw));
3104 }
3105 
3106 /* There is duplication with update_vertex_elements.
3107  * TODO: Share the code */
3108 
3109 static int
update_vertex_elements_sw(struct NineDevice9 * device)3110 update_vertex_elements_sw(struct NineDevice9 *device)
3111 {
3112     struct nine_state *state = &device->state;
3113     const struct NineVertexDeclaration9 *vdecl = device->state.vdecl;
3114     const struct NineVertexShader9 *vs;
3115     unsigned n, b, i, j;
3116     int index;
3117     int8_t vdecl_index_map[16]; /* vs->num_inputs <= 16 */
3118     int8_t used_streams[device->caps.MaxStreams];
3119     BOOL need_dummy_vbo = false;
3120     int dummy_vbo_stream = -1;
3121     struct cso_velems_state ve;
3122     bool programmable_vs = state->vs && !(state->vdecl && state->vdecl->position_t);
3123     unsigned vtxbuf_holes_map[PIPE_MAX_ATTRIBS];
3124 
3125     memset(vdecl_index_map, -1, 16);
3126     memset(used_streams, 0, device->caps.MaxStreams);
3127     vs = programmable_vs ? device->state.vs : device->ff.vs;
3128 
3129     if (vdecl) {
3130         for (n = 0; n < vs->num_inputs; ++n) {
3131             DBG("looking up input %u (usage %u) from vdecl(%p)\n",
3132                 n, vs->input_map[n].ndecl, vdecl);
3133 
3134             for (i = 0; i < vdecl->nelems; i++) {
3135                 if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
3136                     vdecl_index_map[n] = i;
3137                     used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
3138                     break;
3139                 }
3140             }
3141             if (vdecl_index_map[n] < 0)
3142                 need_dummy_vbo = true;
3143         }
3144     } else {
3145         /* No vertex declaration. Likely will never happen in practice,
3146          * but we need not crash on this */
3147         need_dummy_vbo = true;
3148     }
3149 
3150     if (need_dummy_vbo) {
3151         for (i = 0; i < device->caps.MaxStreams; i++) {
3152             if (!used_streams[i]) {
3153                 dummy_vbo_stream = i;
3154                 break;
3155             }
3156         }
3157     }
3158     /* there are less vertex shader inputs than stream slots,
3159      * so if we need a slot for the dummy vbo, we should have found one */
3160     assert (!need_dummy_vbo || dummy_vbo_stream != -1);
3161 
3162     /* calculate vtxbuf_holes_map to match for update_vertex_buffers_sw() function */
3163     j = 0;
3164     for (i = 0; i < device->caps.MaxStreams; i++) {
3165         if (state->stream[i] || (need_dummy_vbo && dummy_vbo_stream == i)) {
3166             vtxbuf_holes_map[i] = j;
3167             j++;
3168         }
3169     }
3170 
3171     for (n = 0; n < vs->num_inputs; ++n) {
3172         index = vdecl_index_map[n];
3173         if (index >= 0) {
3174             ve.velems[n] = vdecl->elems[index];
3175             b = ve.velems[n].vertex_buffer_index;
3176             ve.velems[n].vertex_buffer_index = vtxbuf_holes_map[b];
3177             /* XXX wine just uses 1 here: */
3178             if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
3179                 ve.velems[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
3180         } else {
3181             /* if the vertex declaration is incomplete compared to what the
3182              * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
3183              * This is not precised by the spec, but is the behaviour
3184              * tested on win */
3185             ve.velems[n].vertex_buffer_index = vtxbuf_holes_map[dummy_vbo_stream];
3186             ve.velems[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
3187             ve.velems[n].src_offset = 0;
3188             ve.velems[n].instance_divisor = 0;
3189             ve.velems[n].dual_slot = false;
3190         }
3191     }
3192 
3193     ve.count = vs->num_inputs;
3194     cso_set_vertex_elements(device->cso_sw, &ve);
3195     return dummy_vbo_stream;
3196 }
3197 
3198 static void
update_vertex_buffers_sw(struct NineDevice9 * device,int dummy_vbo_stream,int start_vertice,int num_vertices)3199 update_vertex_buffers_sw(struct NineDevice9 *device, int dummy_vbo_stream,
3200                          int start_vertice, int num_vertices)
3201 {
3202     struct pipe_context *pipe = nine_context_get_pipe_acquire(device);
3203     struct pipe_context *pipe_sw = device->pipe_sw;
3204     struct nine_state *state = &device->state;
3205     struct nine_state_sw_internal *sw_internal = &device->state_sw_internal;
3206     struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
3207     unsigned vtxbuf_count = 0;
3208     unsigned i, j = 0;
3209 
3210     for (i = 0; i < device->caps.MaxStreams; i++) {
3211         if (dummy_vbo_stream == i) {
3212             vbuffer[j].buffer.resource = NULL;
3213             pipe_resource_reference(&vbuffer[j].buffer.resource, device->dummy_vbo_sw);
3214             vbuffer[j].is_user_buffer = false;
3215             vbuffer[j].buffer_offset = 0;
3216             j++;
3217         } else if (state->stream[i]) {
3218             unsigned offset;
3219             struct pipe_resource *buf;
3220             struct pipe_box box;
3221             void *userbuf;
3222 
3223             vbuffer[j] = state->vtxbuf[i];
3224             buf = NineVertexBuffer9_GetResource(state->stream[i], &offset);
3225 
3226             DBG("Locking %p (offset %d, length %d)\n", buf,
3227                 vbuffer[j].buffer_offset, num_vertices * state->vtxstride[i]);
3228 
3229             u_box_1d(vbuffer[j].buffer_offset + offset + start_vertice *
3230                      state->vtxstride[i], num_vertices * state->vtxstride[i], &box);
3231 
3232             userbuf = pipe->buffer_map(pipe, buf, 0, PIPE_MAP_READ, &box,
3233                                        &(sw_internal->transfers_so[i]));
3234             vbuffer[j].is_user_buffer = true;
3235             vbuffer[j].buffer.user = userbuf;
3236 
3237             if (!device->driver_caps.user_sw_vbufs) {
3238                 vbuffer[j].buffer.resource = NULL;
3239                 vbuffer[j].is_user_buffer = false;
3240                 u_upload_data(device->pipe_sw->stream_uploader,
3241                               0,
3242                               box.width,
3243                               16,
3244                               userbuf,
3245                               &(vbuffer[j].buffer_offset),
3246                               &(vbuffer[j].buffer.resource));
3247                 u_upload_unmap(device->pipe_sw->stream_uploader);
3248             }
3249             j++;
3250         }
3251     }
3252 
3253     vtxbuf_count = j;
3254     pipe_sw->set_vertex_buffers(pipe_sw, vtxbuf_count, vbuffer);
3255 
3256     nine_context_get_pipe_release(device);
3257 }
3258 
3259 static void
update_vs_constants_sw(struct NineDevice9 * device)3260 update_vs_constants_sw(struct NineDevice9 *device)
3261 {
3262     struct nine_state *state = &device->state;
3263     struct pipe_context *pipe_sw = device->pipe_sw;
3264 
3265     DBG("updating\n");
3266 
3267     {
3268         struct pipe_constant_buffer cb;
3269         const void *buf;
3270 
3271         cb.buffer = NULL;
3272         cb.buffer_offset = 0;
3273         cb.buffer_size = 4096 * sizeof(float[4]);
3274         cb.user_buffer = state->vs_const_f;
3275 
3276         if (state->vs->lconstf.ranges) {
3277             const struct nine_lconstf *lconstf =  &device->state.vs->lconstf;
3278             const struct nine_range *r = lconstf->ranges;
3279             unsigned n = 0;
3280             float *dst = device->state.vs_lconstf_temp;
3281             float *src = (float *)cb.user_buffer;
3282             memcpy(dst, src, 8192 * sizeof(float[4]));
3283             while (r) {
3284                 unsigned p = r->bgn;
3285                 unsigned c = r->end - r->bgn;
3286                 memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
3287                 n += c;
3288                 r = r->next;
3289             }
3290             cb.user_buffer = dst;
3291         }
3292 
3293         buf = cb.user_buffer;
3294 
3295         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 0, false, &cb);
3296         if (cb.buffer)
3297             pipe_resource_reference(&cb.buffer, NULL);
3298 
3299         cb.user_buffer = (int8_t *)buf + 4096 * sizeof(float[4]);
3300 
3301         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 1, false, &cb);
3302         if (cb.buffer)
3303             pipe_resource_reference(&cb.buffer, NULL);
3304     }
3305 
3306     {
3307         struct pipe_constant_buffer cb;
3308 
3309         cb.buffer = NULL;
3310         cb.buffer_offset = 0;
3311         cb.buffer_size = 2048 * sizeof(float[4]);
3312         cb.user_buffer = state->vs_const_i;
3313 
3314         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 2, false, &cb);
3315         if (cb.buffer)
3316             pipe_resource_reference(&cb.buffer, NULL);
3317     }
3318 
3319     {
3320         struct pipe_constant_buffer cb;
3321 
3322         cb.buffer = NULL;
3323         cb.buffer_offset = 0;
3324         cb.buffer_size = 512 * sizeof(float[4]);
3325         cb.user_buffer = state->vs_const_b;
3326 
3327         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 3, false, &cb);
3328         if (cb.buffer)
3329             pipe_resource_reference(&cb.buffer, NULL);
3330     }
3331 
3332     {
3333         struct pipe_constant_buffer cb;
3334         const D3DVIEWPORT9 *vport = &device->state.viewport;
3335         float viewport_data[8] = {(float)vport->Width * 0.5f,
3336             (float)vport->Height * -0.5f, vport->MaxZ - vport->MinZ, 0.f,
3337             (float)vport->Width * 0.5f + (float)vport->X,
3338             (float)vport->Height * 0.5f + (float)vport->Y,
3339             vport->MinZ, 0.f};
3340 
3341         cb.buffer = NULL;
3342         cb.buffer_offset = 0;
3343         cb.buffer_size = 2 * sizeof(float[4]);
3344         cb.user_buffer = viewport_data;
3345 
3346         {
3347             u_upload_data(device->pipe_sw->const_uploader,
3348                           0,
3349                           cb.buffer_size,
3350                           16,
3351                           cb.user_buffer,
3352                           &(cb.buffer_offset),
3353                           &(cb.buffer));
3354             u_upload_unmap(device->pipe_sw->const_uploader);
3355             cb.user_buffer = NULL;
3356         }
3357 
3358         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 4, false, &cb);
3359         if (cb.buffer)
3360             pipe_resource_reference(&cb.buffer, NULL);
3361     }
3362 
3363 }
3364 
3365 void
nine_state_prepare_draw_sw(struct NineDevice9 * device,struct NineVertexDeclaration9 * vdecl_out,int start_vertice,int num_vertices,struct pipe_stream_output_info * so)3366 nine_state_prepare_draw_sw(struct NineDevice9 *device, struct NineVertexDeclaration9 *vdecl_out,
3367                            int start_vertice, int num_vertices, struct pipe_stream_output_info *so)
3368 {
3369     struct nine_state *state = &device->state;
3370     bool programmable_vs = state->vs && !(state->vdecl && state->vdecl->position_t);
3371     struct NineVertexShader9 *vs = programmable_vs ? device->state.vs : device->ff.vs;
3372     int dummy_vbo_stream;
3373 
3374     assert(programmable_vs);
3375 
3376     DBG("Preparing draw\n");
3377     cso_set_vertex_shader_handle(device->cso_sw,
3378                                  NineVertexShader9_GetVariantProcessVertices(vs, vdecl_out, so));
3379     dummy_vbo_stream = update_vertex_elements_sw(device);
3380     update_vertex_buffers_sw(device, dummy_vbo_stream, start_vertice, num_vertices);
3381     update_vs_constants_sw(device);
3382     DBG("Preparation succeeded\n");
3383 }
3384 
3385 void
nine_state_after_draw_sw(struct NineDevice9 * device)3386 nine_state_after_draw_sw(struct NineDevice9 *device)
3387 {
3388     struct nine_state_sw_internal *sw_internal = &device->state_sw_internal;
3389     struct pipe_context *pipe = nine_context_get_pipe_acquire(device);
3390     struct pipe_context *pipe_sw = device->pipe_sw;
3391     int i;
3392 
3393     pipe_sw->set_vertex_buffers(pipe_sw, 0, NULL);
3394     for (i = 0; i < 4; i++) {
3395         if (sw_internal->transfers_so[i])
3396             pipe->buffer_unmap(pipe, sw_internal->transfers_so[i]);
3397         sw_internal->transfers_so[i] = NULL;
3398     }
3399     nine_context_get_pipe_release(device);
3400 }
3401 
3402 void
nine_state_destroy_sw(struct NineDevice9 * device)3403 nine_state_destroy_sw(struct NineDevice9 *device)
3404 {
3405     (void) device;
3406     /* Everything destroyed with cso */
3407 }
3408 
3409 /*
3410 static const DWORD nine_render_states_pixel[] =
3411 {
3412     D3DRS_ALPHABLENDENABLE,
3413     D3DRS_ALPHAFUNC,
3414     D3DRS_ALPHAREF,
3415     D3DRS_ALPHATESTENABLE,
3416     D3DRS_ANTIALIASEDLINEENABLE,
3417     D3DRS_BLENDFACTOR,
3418     D3DRS_BLENDOP,
3419     D3DRS_BLENDOPALPHA,
3420     D3DRS_CCW_STENCILFAIL,
3421     D3DRS_CCW_STENCILPASS,
3422     D3DRS_CCW_STENCILZFAIL,
3423     D3DRS_COLORWRITEENABLE,
3424     D3DRS_COLORWRITEENABLE1,
3425     D3DRS_COLORWRITEENABLE2,
3426     D3DRS_COLORWRITEENABLE3,
3427     D3DRS_DEPTHBIAS,
3428     D3DRS_DESTBLEND,
3429     D3DRS_DESTBLENDALPHA,
3430     D3DRS_DITHERENABLE,
3431     D3DRS_FILLMODE,
3432     D3DRS_FOGDENSITY,
3433     D3DRS_FOGEND,
3434     D3DRS_FOGSTART,
3435     D3DRS_LASTPIXEL,
3436     D3DRS_SCISSORTESTENABLE,
3437     D3DRS_SEPARATEALPHABLENDENABLE,
3438     D3DRS_SHADEMODE,
3439     D3DRS_SLOPESCALEDEPTHBIAS,
3440     D3DRS_SRCBLEND,
3441     D3DRS_SRCBLENDALPHA,
3442     D3DRS_SRGBWRITEENABLE,
3443     D3DRS_STENCILENABLE,
3444     D3DRS_STENCILFAIL,
3445     D3DRS_STENCILFUNC,
3446     D3DRS_STENCILMASK,
3447     D3DRS_STENCILPASS,
3448     D3DRS_STENCILREF,
3449     D3DRS_STENCILWRITEMASK,
3450     D3DRS_STENCILZFAIL,
3451     D3DRS_TEXTUREFACTOR,
3452     D3DRS_TWOSIDEDSTENCILMODE,
3453     D3DRS_WRAP0,
3454     D3DRS_WRAP1,
3455     D3DRS_WRAP10,
3456     D3DRS_WRAP11,
3457     D3DRS_WRAP12,
3458     D3DRS_WRAP13,
3459     D3DRS_WRAP14,
3460     D3DRS_WRAP15,
3461     D3DRS_WRAP2,
3462     D3DRS_WRAP3,
3463     D3DRS_WRAP4,
3464     D3DRS_WRAP5,
3465     D3DRS_WRAP6,
3466     D3DRS_WRAP7,
3467     D3DRS_WRAP8,
3468     D3DRS_WRAP9,
3469     D3DRS_ZENABLE,
3470     D3DRS_ZFUNC,
3471     D3DRS_ZWRITEENABLE
3472 };
3473 */
3474 const uint32_t nine_render_states_pixel[(NINED3DRS_LAST + 31) / 32] =
3475 {
3476     0x0f99c380, 0x1ff00070, 0x00000000, 0x00000000,
3477     0x000000ff, 0xde01c900, 0x0003ffcf
3478 };
3479 
3480 /*
3481 static const DWORD nine_render_states_vertex[] =
3482 {
3483     D3DRS_ADAPTIVETESS_W,
3484     D3DRS_ADAPTIVETESS_X,
3485     D3DRS_ADAPTIVETESS_Y,
3486     D3DRS_ADAPTIVETESS_Z,
3487     D3DRS_AMBIENT,
3488     D3DRS_AMBIENTMATERIALSOURCE,
3489     D3DRS_CLIPPING,
3490     D3DRS_CLIPPLANEENABLE,
3491     D3DRS_COLORVERTEX,
3492     D3DRS_CULLMODE,
3493     D3DRS_DIFFUSEMATERIALSOURCE,
3494     D3DRS_EMISSIVEMATERIALSOURCE,
3495     D3DRS_ENABLEADAPTIVETESSELLATION,
3496     D3DRS_FOGCOLOR,
3497     D3DRS_FOGDENSITY,
3498     D3DRS_FOGENABLE,
3499     D3DRS_FOGEND,
3500     D3DRS_FOGSTART,
3501     D3DRS_FOGTABLEMODE,
3502     D3DRS_FOGVERTEXMODE,
3503     D3DRS_INDEXEDVERTEXBLENDENABLE,
3504     D3DRS_LIGHTING,
3505     D3DRS_LOCALVIEWER,
3506     D3DRS_MAXTESSELLATIONLEVEL,
3507     D3DRS_MINTESSELLATIONLEVEL,
3508     D3DRS_MULTISAMPLEANTIALIAS,
3509     D3DRS_MULTISAMPLEMASK,
3510     D3DRS_NORMALDEGREE,
3511     D3DRS_NORMALIZENORMALS,
3512     D3DRS_PATCHEDGESTYLE,
3513     D3DRS_POINTSCALE_A,
3514     D3DRS_POINTSCALE_B,
3515     D3DRS_POINTSCALE_C,
3516     D3DRS_POINTSCALEENABLE,
3517     D3DRS_POINTSIZE,
3518     D3DRS_POINTSIZE_MAX,
3519     D3DRS_POINTSIZE_MIN,
3520     D3DRS_POINTSPRITEENABLE,
3521     D3DRS_POSITIONDEGREE,
3522     D3DRS_RANGEFOGENABLE,
3523     D3DRS_SHADEMODE,
3524     D3DRS_SPECULARENABLE,
3525     D3DRS_SPECULARMATERIALSOURCE,
3526     D3DRS_TWEENFACTOR,
3527     D3DRS_VERTEXBLEND
3528 };
3529 */
3530 const uint32_t nine_render_states_vertex[(NINED3DRS_LAST + 31) / 32] =
3531 {
3532     0x30400200, 0x0001007c, 0x00000000, 0x00000000,
3533     0xfd9efb00, 0x01fc34cf, 0x00000000
3534 };
3535 
3536 /* TODO: put in the right values */
3537 const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] =
3538 {
3539     [D3DRS_ZENABLE] = NINE_STATE_DSA | NINE_STATE_MULTISAMPLE,
3540     [D3DRS_FILLMODE] = NINE_STATE_RASTERIZER,
3541     [D3DRS_SHADEMODE] = NINE_STATE_RASTERIZER | NINE_STATE_PS_PARAMS_MISC,
3542     [D3DRS_ZWRITEENABLE] = NINE_STATE_DSA,
3543     [D3DRS_ALPHATESTENABLE] = NINE_STATE_DSA,
3544     [D3DRS_LASTPIXEL] = NINE_STATE_RASTERIZER,
3545     [D3DRS_SRCBLEND] = NINE_STATE_BLEND,
3546     [D3DRS_DESTBLEND] = NINE_STATE_BLEND,
3547     [D3DRS_CULLMODE] = NINE_STATE_RASTERIZER,
3548     [D3DRS_ZFUNC] = NINE_STATE_DSA,
3549     [D3DRS_ALPHAREF] = NINE_STATE_DSA,
3550     [D3DRS_ALPHAFUNC] = NINE_STATE_DSA,
3551     [D3DRS_DITHERENABLE] = NINE_STATE_BLEND,
3552     [D3DRS_ALPHABLENDENABLE] = NINE_STATE_BLEND,
3553     [D3DRS_FOGENABLE] = NINE_STATE_FF_SHADER | NINE_STATE_VS_PARAMS_MISC | NINE_STATE_PS_PARAMS_MISC | NINE_STATE_PS_CONST,
3554     [D3DRS_SPECULARENABLE] = NINE_STATE_FF_LIGHTING,
3555     [D3DRS_FOGCOLOR] = NINE_STATE_FF_PS_CONSTS | NINE_STATE_PS_CONST,
3556     [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_SHADER | NINE_STATE_PS_PARAMS_MISC | NINE_STATE_PS_CONST,
3557     [D3DRS_FOGSTART] = NINE_STATE_FF_VS_OTHER | NINE_STATE_FF_PS_CONSTS | NINE_STATE_PS_CONST,
3558     [D3DRS_FOGEND] = NINE_STATE_FF_VS_OTHER | NINE_STATE_FF_PS_CONSTS | NINE_STATE_PS_CONST,
3559     [D3DRS_FOGDENSITY] = NINE_STATE_FF_VS_OTHER | NINE_STATE_FF_PS_CONSTS | NINE_STATE_PS_CONST,
3560     [D3DRS_RANGEFOGENABLE] = NINE_STATE_FF_SHADER,
3561     [D3DRS_STENCILENABLE] = NINE_STATE_DSA | NINE_STATE_MULTISAMPLE,
3562     [D3DRS_STENCILFAIL] = NINE_STATE_DSA,
3563     [D3DRS_STENCILZFAIL] = NINE_STATE_DSA,
3564     [D3DRS_STENCILPASS] = NINE_STATE_DSA,
3565     [D3DRS_STENCILFUNC] = NINE_STATE_DSA,
3566     [D3DRS_STENCILREF] = NINE_STATE_STENCIL_REF,
3567     [D3DRS_STENCILMASK] = NINE_STATE_DSA,
3568     [D3DRS_STENCILWRITEMASK] = NINE_STATE_DSA,
3569     [D3DRS_TEXTUREFACTOR] = NINE_STATE_FF_PS_CONSTS,
3570     [D3DRS_WRAP0] = NINE_STATE_UNHANDLED, /* cylindrical wrap is crazy */
3571     [D3DRS_WRAP1] = NINE_STATE_UNHANDLED,
3572     [D3DRS_WRAP2] = NINE_STATE_UNHANDLED,
3573     [D3DRS_WRAP3] = NINE_STATE_UNHANDLED,
3574     [D3DRS_WRAP4] = NINE_STATE_UNHANDLED,
3575     [D3DRS_WRAP5] = NINE_STATE_UNHANDLED,
3576     [D3DRS_WRAP6] = NINE_STATE_UNHANDLED,
3577     [D3DRS_WRAP7] = NINE_STATE_UNHANDLED,
3578     [D3DRS_CLIPPING] = 0, /* software vertex processing only */
3579     [D3DRS_LIGHTING] = NINE_STATE_FF_LIGHTING,
3580     [D3DRS_AMBIENT] = NINE_STATE_FF_LIGHTING | NINE_STATE_FF_MATERIAL,
3581     [D3DRS_FOGVERTEXMODE] = NINE_STATE_FF_SHADER,
3582     [D3DRS_COLORVERTEX] = NINE_STATE_FF_LIGHTING,
3583     [D3DRS_LOCALVIEWER] = NINE_STATE_FF_LIGHTING,
3584     [D3DRS_NORMALIZENORMALS] = NINE_STATE_FF_SHADER,
3585     [D3DRS_DIFFUSEMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
3586     [D3DRS_SPECULARMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
3587     [D3DRS_AMBIENTMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
3588     [D3DRS_EMISSIVEMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
3589     [D3DRS_VERTEXBLEND] = NINE_STATE_FF_SHADER,
3590     [D3DRS_CLIPPLANEENABLE] = NINE_STATE_RASTERIZER,
3591     [D3DRS_POINTSIZE] = NINE_STATE_RASTERIZER | NINE_STATE_FF_VS_OTHER,
3592     [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER | NINE_STATE_FF_VS_OTHER | NINE_STATE_VS_PARAMS_MISC,
3593     [D3DRS_POINTSPRITEENABLE] = NINE_STATE_RASTERIZER,
3594     [D3DRS_POINTSCALEENABLE] = NINE_STATE_FF_SHADER,
3595     [D3DRS_POINTSCALE_A] = NINE_STATE_FF_VS_OTHER,
3596     [D3DRS_POINTSCALE_B] = NINE_STATE_FF_VS_OTHER,
3597     [D3DRS_POINTSCALE_C] = NINE_STATE_FF_VS_OTHER,
3598     [D3DRS_MULTISAMPLEANTIALIAS] = NINE_STATE_MULTISAMPLE,
3599     [D3DRS_MULTISAMPLEMASK] = NINE_STATE_SAMPLE_MASK,
3600     [D3DRS_PATCHEDGESTYLE] = NINE_STATE_UNHANDLED,
3601     [D3DRS_DEBUGMONITORTOKEN] = NINE_STATE_UNHANDLED,
3602     [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER | NINE_STATE_FF_VS_OTHER | NINE_STATE_VS_PARAMS_MISC,
3603     [D3DRS_INDEXEDVERTEXBLENDENABLE] = NINE_STATE_FF_SHADER,
3604     [D3DRS_COLORWRITEENABLE] = NINE_STATE_BLEND,
3605     [D3DRS_TWEENFACTOR] = NINE_STATE_FF_VS_OTHER,
3606     [D3DRS_BLENDOP] = NINE_STATE_BLEND,
3607     [D3DRS_POSITIONDEGREE] = NINE_STATE_UNHANDLED,
3608     [D3DRS_NORMALDEGREE] = NINE_STATE_UNHANDLED,
3609     [D3DRS_SCISSORTESTENABLE] = NINE_STATE_RASTERIZER,
3610     [D3DRS_SLOPESCALEDEPTHBIAS] = NINE_STATE_RASTERIZER,
3611     [D3DRS_ANTIALIASEDLINEENABLE] = NINE_STATE_RASTERIZER,
3612     [D3DRS_MINTESSELLATIONLEVEL] = NINE_STATE_UNHANDLED,
3613     [D3DRS_MAXTESSELLATIONLEVEL] = NINE_STATE_UNHANDLED,
3614     [D3DRS_ADAPTIVETESS_X] = NINE_STATE_UNHANDLED,
3615     [D3DRS_ADAPTIVETESS_Y] = NINE_STATE_UNHANDLED,
3616     [D3DRS_ADAPTIVETESS_Z] = NINE_STATE_UNHANDLED,
3617     [D3DRS_ADAPTIVETESS_W] = NINE_STATE_UNHANDLED,
3618     [D3DRS_ENABLEADAPTIVETESSELLATION] = NINE_STATE_UNHANDLED,
3619     [D3DRS_TWOSIDEDSTENCILMODE] = NINE_STATE_DSA,
3620     [D3DRS_CCW_STENCILFAIL] = NINE_STATE_DSA,
3621     [D3DRS_CCW_STENCILZFAIL] = NINE_STATE_DSA,
3622     [D3DRS_CCW_STENCILPASS] = NINE_STATE_DSA,
3623     [D3DRS_CCW_STENCILFUNC] = NINE_STATE_DSA,
3624     [D3DRS_COLORWRITEENABLE1] = NINE_STATE_BLEND,
3625     [D3DRS_COLORWRITEENABLE2] = NINE_STATE_BLEND,
3626     [D3DRS_COLORWRITEENABLE3] = NINE_STATE_BLEND,
3627     [D3DRS_BLENDFACTOR] = NINE_STATE_BLEND_COLOR,
3628     [D3DRS_SRGBWRITEENABLE] = NINE_STATE_FB,
3629     [D3DRS_DEPTHBIAS] = NINE_STATE_RASTERIZER,
3630     [D3DRS_WRAP8] = NINE_STATE_UNHANDLED, /* cylwrap has to be done via GP */
3631     [D3DRS_WRAP9] = NINE_STATE_UNHANDLED,
3632     [D3DRS_WRAP10] = NINE_STATE_UNHANDLED,
3633     [D3DRS_WRAP11] = NINE_STATE_UNHANDLED,
3634     [D3DRS_WRAP12] = NINE_STATE_UNHANDLED,
3635     [D3DRS_WRAP13] = NINE_STATE_UNHANDLED,
3636     [D3DRS_WRAP14] = NINE_STATE_UNHANDLED,
3637     [D3DRS_WRAP15] = NINE_STATE_UNHANDLED,
3638     [D3DRS_SEPARATEALPHABLENDENABLE] = NINE_STATE_BLEND,
3639     [D3DRS_SRCBLENDALPHA] = NINE_STATE_BLEND,
3640     [D3DRS_DESTBLENDALPHA] = NINE_STATE_BLEND,
3641     [D3DRS_BLENDOPALPHA] = NINE_STATE_BLEND
3642 };
3643 
3644 /* Misc */
3645 
3646 static D3DMATRIX nine_state_identity = { .m[0] = { 1, 0, 0, 0 },
3647                                          .m[1] = { 0, 1, 0, 0 },
3648                                          .m[2] = { 0, 0, 1, 0 },
3649                                          .m[3] = { 0, 0, 0, 1 } };
3650 
3651 void
nine_state_resize_transform(struct nine_ff_state * ff_state,unsigned N)3652 nine_state_resize_transform(struct nine_ff_state *ff_state, unsigned N)
3653 {
3654     unsigned n = ff_state->num_transforms;
3655 
3656     if (N <= n)
3657         return;
3658 
3659     ff_state->transform = REALLOC(ff_state->transform,
3660                                   n * sizeof(D3DMATRIX),
3661                                   N * sizeof(D3DMATRIX));
3662     for (; n < N; ++n)
3663         ff_state->transform[n] = nine_state_identity;
3664     ff_state->num_transforms = N;
3665 }
3666 
3667 D3DMATRIX *
nine_state_access_transform(struct nine_ff_state * ff_state,D3DTRANSFORMSTATETYPE t,bool alloc)3668 nine_state_access_transform(struct nine_ff_state *ff_state, D3DTRANSFORMSTATETYPE t,
3669                             bool alloc)
3670 {
3671     unsigned index;
3672 
3673     switch (t) {
3674     case D3DTS_VIEW: index = 0; break;
3675     case D3DTS_PROJECTION: index = 1; break;
3676     case D3DTS_TEXTURE0: index = 2; break;
3677     case D3DTS_TEXTURE1: index = 3; break;
3678     case D3DTS_TEXTURE2: index = 4; break;
3679     case D3DTS_TEXTURE3: index = 5; break;
3680     case D3DTS_TEXTURE4: index = 6; break;
3681     case D3DTS_TEXTURE5: index = 7; break;
3682     case D3DTS_TEXTURE6: index = 8; break;
3683     case D3DTS_TEXTURE7: index = 9; break;
3684     default:
3685         if (!(t >= D3DTS_WORLDMATRIX(0) && t <= D3DTS_WORLDMATRIX(255)))
3686             return NULL;
3687         index = 10 + (t - D3DTS_WORLDMATRIX(0));
3688         break;
3689     }
3690 
3691     if (index >= ff_state->num_transforms) {
3692         if (!alloc)
3693             return &nine_state_identity;
3694         nine_state_resize_transform(ff_state, index + 1);
3695     }
3696     return &ff_state->transform[index];
3697 }
3698 
3699 HRESULT
nine_state_set_light(struct nine_ff_state * ff_state,DWORD Index,const D3DLIGHT9 * pLight)3700 nine_state_set_light(struct nine_ff_state *ff_state, DWORD Index,
3701                      const D3DLIGHT9 *pLight)
3702 {
3703     if (Index >= ff_state->num_lights) {
3704         unsigned n = ff_state->num_lights;
3705         unsigned N = Index + 1;
3706 
3707         ff_state->light = REALLOC(ff_state->light, n * sizeof(D3DLIGHT9),
3708                                                    N * sizeof(D3DLIGHT9));
3709         if (!ff_state->light)
3710             return E_OUTOFMEMORY;
3711         ff_state->num_lights = N;
3712 
3713         for (; n < Index; ++n) {
3714             memset(&ff_state->light[n], 0, sizeof(D3DLIGHT9));
3715             ff_state->light[n].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
3716         }
3717     }
3718     ff_state->light[Index] = *pLight;
3719 
3720     if (pLight->Type == D3DLIGHT_SPOT && pLight->Theta >= pLight->Phi) {
3721         DBG("Warning: clamping D3DLIGHT9.Theta\n");
3722         ff_state->light[Index].Theta = ff_state->light[Index].Phi;
3723     }
3724     return D3D_OK;
3725 }
3726 
3727 HRESULT
nine_state_light_enable(struct nine_ff_state * ff_state,DWORD Index,BOOL Enable)3728 nine_state_light_enable(struct nine_ff_state *ff_state,
3729                         DWORD Index, BOOL Enable)
3730 {
3731     unsigned i;
3732 
3733     user_assert(Index < ff_state->num_lights, D3DERR_INVALIDCALL);
3734 
3735     for (i = 0; i < ff_state->num_lights_active; ++i) {
3736         if (ff_state->active_light[i] == Index)
3737             break;
3738     }
3739 
3740     if (Enable) {
3741         if (i < ff_state->num_lights_active)
3742             return D3D_OK;
3743         /* XXX wine thinks this should still succeed:
3744          */
3745         user_assert(i < NINE_MAX_LIGHTS_ACTIVE, D3DERR_INVALIDCALL);
3746 
3747         ff_state->active_light[i] = Index;
3748         ff_state->num_lights_active++;
3749     } else {
3750         if (i == ff_state->num_lights_active)
3751             return D3D_OK;
3752         --ff_state->num_lights_active;
3753         for (; i < ff_state->num_lights_active; ++i)
3754             ff_state->active_light[i] = ff_state->active_light[i + 1];
3755     }
3756 
3757     return D3D_OK;
3758 }
3759 
3760 #define D3DRS_TO_STRING_CASE(n) case D3DRS_##n: return "D3DRS_"#n
nine_d3drs_to_string(DWORD State)3761 const char *nine_d3drs_to_string(DWORD State)
3762 {
3763     switch (State) {
3764     D3DRS_TO_STRING_CASE(ZENABLE);
3765     D3DRS_TO_STRING_CASE(FILLMODE);
3766     D3DRS_TO_STRING_CASE(SHADEMODE);
3767     D3DRS_TO_STRING_CASE(ZWRITEENABLE);
3768     D3DRS_TO_STRING_CASE(ALPHATESTENABLE);
3769     D3DRS_TO_STRING_CASE(LASTPIXEL);
3770     D3DRS_TO_STRING_CASE(SRCBLEND);
3771     D3DRS_TO_STRING_CASE(DESTBLEND);
3772     D3DRS_TO_STRING_CASE(CULLMODE);
3773     D3DRS_TO_STRING_CASE(ZFUNC);
3774     D3DRS_TO_STRING_CASE(ALPHAREF);
3775     D3DRS_TO_STRING_CASE(ALPHAFUNC);
3776     D3DRS_TO_STRING_CASE(DITHERENABLE);
3777     D3DRS_TO_STRING_CASE(ALPHABLENDENABLE);
3778     D3DRS_TO_STRING_CASE(FOGENABLE);
3779     D3DRS_TO_STRING_CASE(SPECULARENABLE);
3780     D3DRS_TO_STRING_CASE(FOGCOLOR);
3781     D3DRS_TO_STRING_CASE(FOGTABLEMODE);
3782     D3DRS_TO_STRING_CASE(FOGSTART);
3783     D3DRS_TO_STRING_CASE(FOGEND);
3784     D3DRS_TO_STRING_CASE(FOGDENSITY);
3785     D3DRS_TO_STRING_CASE(RANGEFOGENABLE);
3786     D3DRS_TO_STRING_CASE(STENCILENABLE);
3787     D3DRS_TO_STRING_CASE(STENCILFAIL);
3788     D3DRS_TO_STRING_CASE(STENCILZFAIL);
3789     D3DRS_TO_STRING_CASE(STENCILPASS);
3790     D3DRS_TO_STRING_CASE(STENCILFUNC);
3791     D3DRS_TO_STRING_CASE(STENCILREF);
3792     D3DRS_TO_STRING_CASE(STENCILMASK);
3793     D3DRS_TO_STRING_CASE(STENCILWRITEMASK);
3794     D3DRS_TO_STRING_CASE(TEXTUREFACTOR);
3795     D3DRS_TO_STRING_CASE(WRAP0);
3796     D3DRS_TO_STRING_CASE(WRAP1);
3797     D3DRS_TO_STRING_CASE(WRAP2);
3798     D3DRS_TO_STRING_CASE(WRAP3);
3799     D3DRS_TO_STRING_CASE(WRAP4);
3800     D3DRS_TO_STRING_CASE(WRAP5);
3801     D3DRS_TO_STRING_CASE(WRAP6);
3802     D3DRS_TO_STRING_CASE(WRAP7);
3803     D3DRS_TO_STRING_CASE(CLIPPING);
3804     D3DRS_TO_STRING_CASE(LIGHTING);
3805     D3DRS_TO_STRING_CASE(AMBIENT);
3806     D3DRS_TO_STRING_CASE(FOGVERTEXMODE);
3807     D3DRS_TO_STRING_CASE(COLORVERTEX);
3808     D3DRS_TO_STRING_CASE(LOCALVIEWER);
3809     D3DRS_TO_STRING_CASE(NORMALIZENORMALS);
3810     D3DRS_TO_STRING_CASE(DIFFUSEMATERIALSOURCE);
3811     D3DRS_TO_STRING_CASE(SPECULARMATERIALSOURCE);
3812     D3DRS_TO_STRING_CASE(AMBIENTMATERIALSOURCE);
3813     D3DRS_TO_STRING_CASE(EMISSIVEMATERIALSOURCE);
3814     D3DRS_TO_STRING_CASE(VERTEXBLEND);
3815     D3DRS_TO_STRING_CASE(CLIPPLANEENABLE);
3816     D3DRS_TO_STRING_CASE(POINTSIZE);
3817     D3DRS_TO_STRING_CASE(POINTSIZE_MIN);
3818     D3DRS_TO_STRING_CASE(POINTSPRITEENABLE);
3819     D3DRS_TO_STRING_CASE(POINTSCALEENABLE);
3820     D3DRS_TO_STRING_CASE(POINTSCALE_A);
3821     D3DRS_TO_STRING_CASE(POINTSCALE_B);
3822     D3DRS_TO_STRING_CASE(POINTSCALE_C);
3823     D3DRS_TO_STRING_CASE(MULTISAMPLEANTIALIAS);
3824     D3DRS_TO_STRING_CASE(MULTISAMPLEMASK);
3825     D3DRS_TO_STRING_CASE(PATCHEDGESTYLE);
3826     D3DRS_TO_STRING_CASE(DEBUGMONITORTOKEN);
3827     D3DRS_TO_STRING_CASE(POINTSIZE_MAX);
3828     D3DRS_TO_STRING_CASE(INDEXEDVERTEXBLENDENABLE);
3829     D3DRS_TO_STRING_CASE(COLORWRITEENABLE);
3830     D3DRS_TO_STRING_CASE(TWEENFACTOR);
3831     D3DRS_TO_STRING_CASE(BLENDOP);
3832     D3DRS_TO_STRING_CASE(POSITIONDEGREE);
3833     D3DRS_TO_STRING_CASE(NORMALDEGREE);
3834     D3DRS_TO_STRING_CASE(SCISSORTESTENABLE);
3835     D3DRS_TO_STRING_CASE(SLOPESCALEDEPTHBIAS);
3836     D3DRS_TO_STRING_CASE(ANTIALIASEDLINEENABLE);
3837     D3DRS_TO_STRING_CASE(MINTESSELLATIONLEVEL);
3838     D3DRS_TO_STRING_CASE(MAXTESSELLATIONLEVEL);
3839     D3DRS_TO_STRING_CASE(ADAPTIVETESS_X);
3840     D3DRS_TO_STRING_CASE(ADAPTIVETESS_Y);
3841     D3DRS_TO_STRING_CASE(ADAPTIVETESS_Z);
3842     D3DRS_TO_STRING_CASE(ADAPTIVETESS_W);
3843     D3DRS_TO_STRING_CASE(ENABLEADAPTIVETESSELLATION);
3844     D3DRS_TO_STRING_CASE(TWOSIDEDSTENCILMODE);
3845     D3DRS_TO_STRING_CASE(CCW_STENCILFAIL);
3846     D3DRS_TO_STRING_CASE(CCW_STENCILZFAIL);
3847     D3DRS_TO_STRING_CASE(CCW_STENCILPASS);
3848     D3DRS_TO_STRING_CASE(CCW_STENCILFUNC);
3849     D3DRS_TO_STRING_CASE(COLORWRITEENABLE1);
3850     D3DRS_TO_STRING_CASE(COLORWRITEENABLE2);
3851     D3DRS_TO_STRING_CASE(COLORWRITEENABLE3);
3852     D3DRS_TO_STRING_CASE(BLENDFACTOR);
3853     D3DRS_TO_STRING_CASE(SRGBWRITEENABLE);
3854     D3DRS_TO_STRING_CASE(DEPTHBIAS);
3855     D3DRS_TO_STRING_CASE(WRAP8);
3856     D3DRS_TO_STRING_CASE(WRAP9);
3857     D3DRS_TO_STRING_CASE(WRAP10);
3858     D3DRS_TO_STRING_CASE(WRAP11);
3859     D3DRS_TO_STRING_CASE(WRAP12);
3860     D3DRS_TO_STRING_CASE(WRAP13);
3861     D3DRS_TO_STRING_CASE(WRAP14);
3862     D3DRS_TO_STRING_CASE(WRAP15);
3863     D3DRS_TO_STRING_CASE(SEPARATEALPHABLENDENABLE);
3864     D3DRS_TO_STRING_CASE(SRCBLENDALPHA);
3865     D3DRS_TO_STRING_CASE(DESTBLENDALPHA);
3866     D3DRS_TO_STRING_CASE(BLENDOPALPHA);
3867     default:
3868         return "(invalid)";
3869     }
3870 }
3871