xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 #include "util/format/u_format.h"
2 #include "util/u_framebuffer.h"
3 #include "util/u_math.h"
4 #include "util/u_viewport.h"
5 
6 #include "nvc0/nvc0_context.h"
7 
8 static inline void
nvc0_fb_set_null_rt(struct nouveau_pushbuf * push,unsigned i,unsigned layers)9 nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
10 {
11    BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
12    PUSH_DATA (push, 0);
13    PUSH_DATA (push, 0);
14    PUSH_DATA (push, 64);     // width
15    PUSH_DATA (push, 0);      // height
16    PUSH_DATA (push, 0);      // format
17    PUSH_DATA (push, 0);      // tile mode
18    PUSH_DATA (push, layers); // layers
19    PUSH_DATA (push, 0);      // layer stride
20    PUSH_DATA (push, 0);      // base layer
21 }
22 
23 static uint32_t
gm200_encode_cb_sample_location(uint8_t x,uint8_t y)24 gm200_encode_cb_sample_location(uint8_t x, uint8_t y)
25 {
26    static const uint8_t lut[] = {
27       0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
28       0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};
29    uint32_t result = 0;
30    /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */
31    result |= lut[x] << 8 | lut[y] << 24;
32    /* fill in gaps with data in a representation for SV_SAMPLE_POS */
33    result |= x << 12 | y << 28;
34    return result;
35 }
36 
37 static void
gm200_validate_sample_locations(struct nvc0_context * nvc0,unsigned ms)38 gm200_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
39 {
40    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
41    struct nvc0_screen *screen = nvc0->screen;
42    unsigned grid_width, grid_height, hw_grid_width;
43    uint8_t sample_locations[16][2];
44    unsigned cb[64];
45    unsigned i, pixel, pixel_y, pixel_x, sample;
46    uint32_t packed_locations[4] = {};
47 
48    screen->base.base.get_sample_pixel_grid(
49       &screen->base.base, ms, &grid_width, &grid_height);
50 
51    hw_grid_width = grid_width;
52    if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */
53       hw_grid_width = 4;
54 
55    if (nvc0->sample_locations_enabled) {
56       uint8_t locations[2 * 4 * 8];
57       memcpy(locations, nvc0->sample_locations, sizeof(locations));
58       util_sample_locations_flip_y(
59          &screen->base.base, nvc0->framebuffer.height, ms, locations);
60 
61       for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) {
62          for (sample = 0; sample < ms; sample++) {
63             unsigned pixel_x = pixel % hw_grid_width;
64             unsigned pixel_y = pixel / hw_grid_width;
65             unsigned wi = pixel * ms + sample;
66             unsigned ri = (pixel_y * grid_width + pixel_x % grid_width);
67             ri = ri * ms + sample;
68             sample_locations[wi][0] = locations[ri] & 0xf;
69             sample_locations[wi][1] = 16 - (locations[ri] >> 4);
70          }
71       }
72    } else {
73       const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
74       for (i = 0; i < 16; i++) {
75          sample_locations[i][0] = ptr[i % ms][0];
76          sample_locations[i][1] = ptr[i % ms][1];
77       }
78    }
79 
80    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
81    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
82    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
83    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
84    BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 64);
85    PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
86    for (pixel_y = 0; pixel_y < 4; pixel_y++) {
87       for (pixel_x = 0; pixel_x < 2; pixel_x++) {
88          for (sample = 0; sample < ms; sample++) {
89             unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample;
90             unsigned read_index = pixel_y % grid_height * hw_grid_width;
91             uint8_t x, y;
92             read_index += pixel_x % grid_width;
93             read_index = read_index * ms + sample;
94             x = sample_locations[read_index][0];
95             y = sample_locations[read_index][1];
96             cb[write_index] = gm200_encode_cb_sample_location(x, y);
97          }
98       }
99    }
100    PUSH_DATAp(push, cb, 64);
101 
102    for (i = 0; i < 16; i++) {
103       packed_locations[i / 4] |= sample_locations[i][0] << ((i % 4) * 8);
104       packed_locations[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4);
105    }
106 
107    BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
108    PUSH_DATAp(push, packed_locations, 4);
109 }
110 
111 static void
nvc0_validate_sample_locations(struct nvc0_context * nvc0,unsigned ms)112 nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
113 {
114    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
115    struct nvc0_screen *screen = nvc0->screen;
116    unsigned i;
117 
118    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
119    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
120    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
121    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
122    BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
123    PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
124    for (i = 0; i < ms; i++) {
125       float xy[2];
126       nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
127       PUSH_DATAf(push, xy[0]);
128       PUSH_DATAf(push, xy[1]);
129    }
130 }
131 
132 static void
validate_sample_locations(struct nvc0_context * nvc0)133 validate_sample_locations(struct nvc0_context *nvc0)
134 {
135    unsigned ms = util_framebuffer_get_num_samples(&nvc0->framebuffer);
136 
137    if (nvc0->screen->base.class_3d >= GM200_3D_CLASS)
138       gm200_validate_sample_locations(nvc0, ms);
139    else
140       nvc0_validate_sample_locations(nvc0, ms);
141 }
142 
143 static void
nvc0_validate_fb(struct nvc0_context * nvc0)144 nvc0_validate_fb(struct nvc0_context *nvc0)
145 {
146    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
147    struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
148    unsigned i;
149    unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
150    unsigned nr_cbufs = fb->nr_cbufs;
151    bool serialize = false;
152 
153    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
154 
155    BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
156    PUSH_DATA (push, fb->width << 16);
157    PUSH_DATA (push, fb->height << 16);
158 
159    for (i = 0; i < fb->nr_cbufs; ++i) {
160       struct nv50_surface *sf;
161       struct nv04_resource *res;
162       struct nouveau_bo *bo;
163 
164       if (!fb->cbufs[i]) {
165          nvc0_fb_set_null_rt(push, i, 0);
166          continue;
167       }
168 
169       sf = nv50_surface(fb->cbufs[i]);
170       res = nv04_resource(sf->base.texture);
171       bo = res->bo;
172 
173       BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
174       PUSH_DATAh(push, res->address + sf->offset);
175       PUSH_DATA (push, res->address + sf->offset);
176       if (likely(nouveau_bo_memtype(bo))) {
177          struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
178 
179          assert(sf->base.texture->target != PIPE_BUFFER);
180 
181          PUSH_DATA(push, sf->width);
182          PUSH_DATA(push, sf->height);
183          PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
184          PUSH_DATA(push, (mt->layout_3d << 16) |
185                           mt->level[sf->base.u.tex.level].tile_mode);
186          PUSH_DATA(push, sf->base.u.tex.first_layer + sf->depth);
187          PUSH_DATA(push, mt->layer_stride >> 2);
188          PUSH_DATA(push, sf->base.u.tex.first_layer);
189 
190          ms_mode = mt->ms_mode;
191       } else {
192          if (res->base.target == PIPE_BUFFER) {
193             PUSH_DATA(push, 262144);
194             PUSH_DATA(push, 1);
195          } else {
196             PUSH_DATA(push, nv50_miptree(sf->base.texture)->level[0].pitch);
197             PUSH_DATA(push, sf->height);
198          }
199          PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
200          PUSH_DATA(push, 1 << 12);
201          PUSH_DATA(push, 1);
202          PUSH_DATA(push, 0);
203          PUSH_DATA(push, 0);
204 
205          nvc0_resource_fence(nvc0, res, NOUVEAU_BO_WR);
206 
207          assert(!fb->zsbuf);
208       }
209 
210       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING)
211          serialize = true;
212       res->status |=  NOUVEAU_BUFFER_STATUS_GPU_WRITING;
213       res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
214 
215       /* only register for writing, otherwise we'd always serialize here */
216       BCTX_REFN(nvc0->bufctx_3d, 3D_FB, res, WR);
217    }
218 
219    if (fb->zsbuf) {
220       struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
221       struct nv50_surface *sf = nv50_surface(fb->zsbuf);
222       int unk = mt->base.base.target == PIPE_TEXTURE_2D;
223 
224       BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);
225       PUSH_DATAh(push, mt->base.address + sf->offset);
226       PUSH_DATA (push, mt->base.address + sf->offset);
227       PUSH_DATA (push, nvc0_format_table[fb->zsbuf->format].rt);
228       PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
229       PUSH_DATA (push, mt->layer_stride >> 2);
230       BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
231       PUSH_DATA (push, 1);
232       BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);
233       PUSH_DATA (push, sf->width);
234       PUSH_DATA (push, sf->height);
235       PUSH_DATA (push, (unk << 16) |
236                 (sf->base.u.tex.first_layer + sf->depth));
237       BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
238       PUSH_DATA (push, sf->base.u.tex.first_layer);
239 
240       ms_mode = mt->ms_mode;
241 
242       if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
243          serialize = true;
244       mt->base.status |=  NOUVEAU_BUFFER_STATUS_GPU_WRITING;
245       mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
246 
247       BCTX_REFN(nvc0->bufctx_3d, 3D_FB, &mt->base, WR);
248    } else {
249        BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
250       PUSH_DATA (push, 0);
251    }
252 
253    if (nr_cbufs == 0 && !fb->zsbuf) {
254       assert(util_is_power_of_two_or_zero(fb->samples));
255       assert(fb->samples <= 8);
256 
257       nvc0_fb_set_null_rt(push, 0, fb->layers);
258 
259       if (fb->samples > 1)
260          ms_mode = ffs(fb->samples) - 1;
261       nr_cbufs = 1;
262    }
263 
264    BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
265    PUSH_DATA (push, (076543210 << 4) | nr_cbufs);
266    IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
267 
268    if (serialize)
269       IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
270 
271    NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, serialize);
272 }
273 
274 static void
nvc0_validate_blend_colour(struct nvc0_context * nvc0)275 nvc0_validate_blend_colour(struct nvc0_context *nvc0)
276 {
277    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
278 
279    BEGIN_NVC0(push, NVC0_3D(BLEND_COLOR(0)), 4);
280    PUSH_DATAf(push, nvc0->blend_colour.color[0]);
281    PUSH_DATAf(push, nvc0->blend_colour.color[1]);
282    PUSH_DATAf(push, nvc0->blend_colour.color[2]);
283    PUSH_DATAf(push, nvc0->blend_colour.color[3]);
284 }
285 
286 static void
nvc0_validate_stencil_ref(struct nvc0_context * nvc0)287 nvc0_validate_stencil_ref(struct nvc0_context *nvc0)
288 {
289     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
290     const uint8_t *ref = &nvc0->stencil_ref.ref_value[0];
291 
292     IMMED_NVC0(push, NVC0_3D(STENCIL_FRONT_FUNC_REF), ref[0]);
293     IMMED_NVC0(push, NVC0_3D(STENCIL_BACK_FUNC_REF), ref[1]);
294 }
295 
296 static void
nvc0_validate_stipple(struct nvc0_context * nvc0)297 nvc0_validate_stipple(struct nvc0_context *nvc0)
298 {
299     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
300     unsigned i;
301 
302     BEGIN_NVC0(push, NVC0_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
303     for (i = 0; i < 32; ++i)
304         PUSH_DATA(push, util_bswap32(nvc0->stipple.stipple[i]));
305 }
306 
307 static void
nvc0_validate_scissor(struct nvc0_context * nvc0)308 nvc0_validate_scissor(struct nvc0_context *nvc0)
309 {
310    int i;
311    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
312 
313    if (!(nvc0->dirty_3d & NVC0_NEW_3D_SCISSOR) &&
314       nvc0->rast->pipe.scissor == nvc0->state.scissor)
315       return;
316 
317    if (nvc0->state.scissor != nvc0->rast->pipe.scissor)
318       nvc0->scissors_dirty = (1 << NVC0_MAX_VIEWPORTS) - 1;
319 
320    nvc0->state.scissor = nvc0->rast->pipe.scissor;
321 
322    for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
323       struct pipe_scissor_state *s = &nvc0->scissors[i];
324       if (!(nvc0->scissors_dirty & (1 << i)))
325          continue;
326 
327       BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(i)), 2);
328       if (nvc0->rast->pipe.scissor) {
329          PUSH_DATA(push, (s->maxx << 16) | s->minx);
330          PUSH_DATA(push, (s->maxy << 16) | s->miny);
331       } else {
332          PUSH_DATA(push, (0xffff << 16) | 0);
333          PUSH_DATA(push, (0xffff << 16) | 0);
334       }
335    }
336    nvc0->scissors_dirty = 0;
337 }
338 
339 static void
nvc0_validate_viewport(struct nvc0_context * nvc0)340 nvc0_validate_viewport(struct nvc0_context *nvc0)
341 {
342    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
343    uint16_t class_3d = nvc0->screen->base.class_3d;
344    int x, y, w, h, i;
345    float zmin, zmax;
346 
347    for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
348       struct pipe_viewport_state *vp = &nvc0->viewports[i];
349 
350       if (!(nvc0->viewports_dirty & (1 << i)))
351          continue;
352 
353       BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSLATE_X(i)), 3);
354       PUSH_DATAf(push, vp->translate[0]);
355       PUSH_DATAf(push, vp->translate[1]);
356       PUSH_DATAf(push, vp->translate[2]);
357 
358       BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SCALE_X(i)), 3);
359       PUSH_DATAf(push, vp->scale[0]);
360       PUSH_DATAf(push, vp->scale[1]);
361       PUSH_DATAf(push, vp->scale[2]);
362 
363       /* now set the viewport rectangle to viewport dimensions for clipping */
364 
365       x = util_iround(MAX2(0.0f, vp->translate[0] - fabsf(vp->scale[0])));
366       y = util_iround(MAX2(0.0f, vp->translate[1] - fabsf(vp->scale[1])));
367       w = util_iround(vp->translate[0] + fabsf(vp->scale[0])) - x;
368       h = util_iround(vp->translate[1] + fabsf(vp->scale[1])) - y;
369 
370       BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(i)), 2);
371       PUSH_DATA (push, (w << 16) | x);
372       PUSH_DATA (push, (h << 16) | y);
373 
374       /* If the halfz setting ever changes, the viewports will also get
375        * updated. The rast will get updated before the validate function has a
376        * chance to hit, so we can just use it directly without an atom
377        * dependency.
378        */
379       util_viewport_zmin_zmax(vp, nvc0->rast->pipe.clip_halfz, &zmin, &zmax);
380 
381       BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2);
382       PUSH_DATAf(push, zmin);
383       PUSH_DATAf(push, zmax);
384 
385       if (class_3d >= GM200_3D_CLASS) {
386          BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SWIZZLE(i)), 1);
387          PUSH_DATA (push, vp->swizzle_x << 0 |
388                           vp->swizzle_y << 4 |
389                           vp->swizzle_z << 8 |
390                           vp->swizzle_w << 12);
391       }
392    }
393    nvc0->viewports_dirty = 0;
394 }
395 
396 static void
nvc0_validate_window_rects(struct nvc0_context * nvc0)397 nvc0_validate_window_rects(struct nvc0_context *nvc0)
398 {
399    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
400    bool enable = nvc0->window_rect.rects > 0 || nvc0->window_rect.inclusive;
401    int i;
402 
403    IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_EN), enable);
404    if (!enable)
405       return;
406 
407    IMMED_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), !nvc0->window_rect.inclusive);
408    BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), NVC0_MAX_WINDOW_RECTANGLES * 2);
409    for (i = 0; i < nvc0->window_rect.rects; i++) {
410       struct pipe_scissor_state *s = &nvc0->window_rect.rect[i];
411       PUSH_DATA(push, (s->maxx << 16) | s->minx);
412       PUSH_DATA(push, (s->maxy << 16) | s->miny);
413    }
414    for (; i < NVC0_MAX_WINDOW_RECTANGLES; i++) {
415       PUSH_DATA(push, 0);
416       PUSH_DATA(push, 0);
417    }
418 }
419 
420 static inline void
nvc0_upload_uclip_planes(struct nvc0_context * nvc0,unsigned s)421 nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
422 {
423    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
424    struct nvc0_screen *screen = nvc0->screen;
425 
426    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
427    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
428    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
429    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
430    BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
431    PUSH_DATA (push, NVC0_CB_AUX_UCP_INFO);
432    PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
433 }
434 
435 static inline void
nvc0_check_program_ucps(struct nvc0_context * nvc0,struct nvc0_program * vp,uint8_t mask)436 nvc0_check_program_ucps(struct nvc0_context *nvc0,
437                         struct nvc0_program *vp, uint8_t mask)
438 {
439    const unsigned n = util_logbase2(mask) + 1;
440 
441    if (vp->vp.num_ucps >= n)
442       return;
443    nvc0_program_destroy(nvc0, vp);
444 
445    vp->vp.num_ucps = n;
446    if (likely(vp == nvc0->vertprog))
447       nvc0_vertprog_validate(nvc0);
448    else
449    if (likely(vp == nvc0->gmtyprog))
450       nvc0_gmtyprog_validate(nvc0);
451    else
452       nvc0_tevlprog_validate(nvc0);
453 }
454 
455 static void
nvc0_validate_clip(struct nvc0_context * nvc0)456 nvc0_validate_clip(struct nvc0_context *nvc0)
457 {
458    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
459    struct nvc0_program *vp;
460    unsigned stage;
461    uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable;
462 
463    if (nvc0->gmtyprog) {
464       stage = 3;
465       vp = nvc0->gmtyprog;
466    } else
467    if (nvc0->tevlprog) {
468       stage = 2;
469       vp = nvc0->tevlprog;
470    } else {
471       stage = 0;
472       vp = nvc0->vertprog;
473    }
474 
475    if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES)
476       nvc0_check_program_ucps(nvc0, vp, clip_enable);
477 
478    if (nvc0->dirty_3d & (NVC0_NEW_3D_CLIP | (NVC0_NEW_3D_VERTPROG << stage)))
479       if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES)
480          nvc0_upload_uclip_planes(nvc0, stage);
481 
482    clip_enable &= vp->vp.clip_enable;
483    clip_enable |= vp->vp.cull_enable;
484 
485    if (nvc0->state.clip_enable != clip_enable) {
486       nvc0->state.clip_enable = clip_enable;
487       IMMED_NVC0(push, NVC0_3D(CLIP_DISTANCE_ENABLE), clip_enable);
488    }
489    if (nvc0->state.clip_mode != vp->vp.clip_mode) {
490       nvc0->state.clip_mode = vp->vp.clip_mode;
491       BEGIN_NVC0(push, NVC0_3D(CLIP_DISTANCE_MODE), 1);
492       PUSH_DATA (push, vp->vp.clip_mode);
493    }
494 }
495 
496 static void
nvc0_validate_blend(struct nvc0_context * nvc0)497 nvc0_validate_blend(struct nvc0_context *nvc0)
498 {
499    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
500 
501    PUSH_SPACE(push, nvc0->blend->size);
502    PUSH_DATAp(push, nvc0->blend->state, nvc0->blend->size);
503 }
504 
505 static void
nvc0_validate_zsa(struct nvc0_context * nvc0)506 nvc0_validate_zsa(struct nvc0_context *nvc0)
507 {
508    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
509 
510    PUSH_SPACE(push, nvc0->zsa->size);
511    PUSH_DATAp(push, nvc0->zsa->state, nvc0->zsa->size);
512 }
513 
514 static void
nvc0_validate_rasterizer(struct nvc0_context * nvc0)515 nvc0_validate_rasterizer(struct nvc0_context *nvc0)
516 {
517    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
518 
519    PUSH_SPACE(push, nvc0->rast->size);
520    PUSH_DATAp(push, nvc0->rast->state, nvc0->rast->size);
521 }
522 
523 static void
nvc0_constbufs_validate(struct nvc0_context * nvc0)524 nvc0_constbufs_validate(struct nvc0_context *nvc0)
525 {
526    unsigned s;
527 
528    bool can_serialize = true;
529    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
530 
531    for (s = 0; s < 5; ++s) {
532       while (nvc0->constbuf_dirty[s]) {
533          int i = ffs(nvc0->constbuf_dirty[s]) - 1;
534          nvc0->constbuf_dirty[s] &= ~(1 << i);
535 
536          if (nvc0->constbuf[s][i].user) {
537             struct nouveau_bo *bo = nvc0->screen->uniform_bo;
538             const unsigned base = NVC0_CB_USR_INFO(s);
539             const unsigned size = nvc0->constbuf[s][0].size;
540             assert(i == 0); /* we really only want OpenGL uniforms here */
541             assert(nvc0->constbuf[s][0].u.data);
542 
543             if (!nvc0->state.uniform_buffer_bound[s]) {
544                nvc0->state.uniform_buffer_bound[s] = true;
545 
546                nvc0_screen_bind_cb_3d(nvc0->screen, push, &can_serialize, s, i,
547                                       NVC0_MAX_CONSTBUF_SIZE, bo->offset + base);
548             }
549             nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
550                          base, NVC0_MAX_CONSTBUF_SIZE,
551                          0, (size + 3) / 4,
552                          nvc0->constbuf[s][0].u.data);
553          } else {
554             struct nv04_resource *res =
555                nv04_resource(nvc0->constbuf[s][i].u.buf);
556             if (res) {
557                nvc0_screen_bind_cb_3d(nvc0->screen, push, &can_serialize, s, i,
558                                       nvc0->constbuf[s][i].size,
559                                       res->address + nvc0->constbuf[s][i].offset);
560 
561                BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD);
562 
563                nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
564                res->cb_bindings[s] |= 1 << i;
565 
566                if (i == 0)
567                   nvc0->state.uniform_buffer_bound[s] = false;
568             } else if (i != 0) {
569                nvc0_screen_bind_cb_3d(nvc0->screen, push, &can_serialize, s, i, -1, 0);
570             }
571          }
572       }
573    }
574 
575    if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
576       /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
577       nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
578       nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5];
579       nvc0->state.uniform_buffer_bound[5] = false;
580    }
581 }
582 
583 static void
nvc0_validate_buffers(struct nvc0_context * nvc0)584 nvc0_validate_buffers(struct nvc0_context *nvc0)
585 {
586    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
587    struct nvc0_screen *screen = nvc0->screen;
588    int i, s;
589 
590    for (s = 0; s < 5; s++) {
591       BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
592       PUSH_DATA (push, NVC0_CB_AUX_SIZE);
593       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
594       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
595       BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
596       PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
597       for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
598          if (nvc0->buffers[s][i].buffer) {
599             struct nv04_resource *res =
600                nv04_resource(nvc0->buffers[s][i].buffer);
601             PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
602             PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
603             PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
604             PUSH_DATA (push, 0);
605             BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR);
606             util_range_add(&res->base, &res->valid_buffer_range,
607                            nvc0->buffers[s][i].buffer_offset,
608                            nvc0->buffers[s][i].buffer_offset +
609                            nvc0->buffers[s][i].buffer_size);
610          } else {
611             PUSH_DATA (push, 0);
612             PUSH_DATA (push, 0);
613             PUSH_DATA (push, 0);
614             PUSH_DATA (push, 0);
615          }
616       }
617    }
618 
619 }
620 
621 static void
nvc0_validate_sample_mask(struct nvc0_context * nvc0)622 nvc0_validate_sample_mask(struct nvc0_context *nvc0)
623 {
624    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
625 
626    unsigned mask[4] =
627    {
628       nvc0->sample_mask & 0xffff,
629       nvc0->sample_mask & 0xffff,
630       nvc0->sample_mask & 0xffff,
631       nvc0->sample_mask & 0xffff
632    };
633 
634    BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4);
635    PUSH_DATA (push, mask[0]);
636    PUSH_DATA (push, mask[1]);
637    PUSH_DATA (push, mask[2]);
638    PUSH_DATA (push, mask[3]);
639 }
640 
641 static void
nvc0_validate_min_samples(struct nvc0_context * nvc0)642 nvc0_validate_min_samples(struct nvc0_context *nvc0)
643 {
644    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
645    int samples;
646 
647    samples = util_next_power_of_two(nvc0->min_samples);
648    if (samples > 1) {
649       // If we're using the incoming sample mask and doing sample shading, we
650       // have to do sample shading "to the max", otherwise there's no way to
651       // tell which sets of samples are covered by the current invocation.
652       // Similarly for reading the framebuffer.
653       if (nvc0->fragprog && (
654                 nvc0->fragprog->fp.sample_mask_in ||
655                 nvc0->fragprog->fp.reads_framebuffer))
656          samples = util_framebuffer_get_num_samples(&nvc0->framebuffer);
657       samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;
658    }
659 
660    IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples);
661 }
662 
663 static void
nvc0_validate_driverconst(struct nvc0_context * nvc0)664 nvc0_validate_driverconst(struct nvc0_context *nvc0)
665 {
666    struct nvc0_screen *screen = nvc0->screen;
667    int i;
668 
669    for (i = 0; i < 5; ++i)
670       nvc0_screen_bind_cb_3d(screen, nvc0->base.pushbuf, NULL, i, 15, NVC0_CB_AUX_SIZE,
671                              screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
672 
673    nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
674 }
675 
676 static void
nvc0_validate_fp_zsa_rast(struct nvc0_context * nvc0)677 nvc0_validate_fp_zsa_rast(struct nvc0_context *nvc0)
678 {
679    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
680    bool rasterizer_discard;
681 
682    if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) {
683       rasterizer_discard = true;
684    } else {
685       bool zs = nvc0->zsa &&
686          (nvc0->zsa->pipe.depth_enabled || nvc0->zsa->pipe.stencil[0].enabled);
687       rasterizer_discard = !zs &&
688          (!nvc0->fragprog || !nvc0->fragprog->hdr[18]);
689    }
690 
691    if (rasterizer_discard != nvc0->state.rasterizer_discard) {
692       nvc0->state.rasterizer_discard = rasterizer_discard;
693       IMMED_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), !rasterizer_discard);
694    }
695 }
696 
697 /* alpha test is disabled if there are no color RTs, so make sure we have at
698  * least one if alpha test is enabled. Note that this must run after
699  * nvc0_validate_fb, otherwise that will override the RT count setting.
700  */
701 static void
nvc0_validate_zsa_fb(struct nvc0_context * nvc0)702 nvc0_validate_zsa_fb(struct nvc0_context *nvc0)
703 {
704    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
705 
706    if (nvc0->zsa && nvc0->zsa->pipe.alpha_enabled &&
707        nvc0->framebuffer.zsbuf &&
708        nvc0->framebuffer.nr_cbufs == 0) {
709       nvc0_fb_set_null_rt(push, 0, 0);
710       BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
711       PUSH_DATA (push, (076543210 << 4) | 1);
712    }
713 }
714 
715 static void
nvc0_validate_rast_fb(struct nvc0_context * nvc0)716 nvc0_validate_rast_fb(struct nvc0_context *nvc0)
717 {
718    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
719    struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
720    struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
721 
722    if (!rast)
723       return;
724 
725    if (rast->offset_units_unscaled) {
726       BEGIN_NVC0(push, NVC0_3D(POLYGON_OFFSET_UNITS), 1);
727       if (fb->zsbuf && fb->zsbuf->format == PIPE_FORMAT_Z16_UNORM)
728          PUSH_DATAf(push, rast->offset_units * (1 << 16));
729       else
730          PUSH_DATAf(push, rast->offset_units * (1 << 24));
731    }
732 }
733 
734 
735 static void
nvc0_validate_tess_state(struct nvc0_context * nvc0)736 nvc0_validate_tess_state(struct nvc0_context *nvc0)
737 {
738    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
739 
740    BEGIN_NVC0(push, NVC0_3D(TESS_LEVEL_OUTER(0)), 6);
741    PUSH_DATAp(push, nvc0->default_tess_outer, 4);
742    PUSH_DATAp(push, nvc0->default_tess_inner, 2);
743 }
744 
745 /* If we have a frag shader bound which tries to read from the framebuffer, we
746  * have to make sure that the fb is bound as a texture in the expected
747  * location. For Fermi, that's in the special driver slot 16, while for Kepler
748  * it's a regular binding stored in the driver constbuf.
749  */
750 static void
nvc0_validate_fbread(struct nvc0_context * nvc0)751 nvc0_validate_fbread(struct nvc0_context *nvc0)
752 {
753    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
754    struct nvc0_screen *screen = nvc0->screen;
755    struct pipe_context *pipe = &nvc0->base.pipe;
756    struct pipe_sampler_view *old_view = nvc0->fbtexture;
757    struct pipe_sampler_view *new_view = NULL;
758 
759    if (nvc0->fragprog &&
760        nvc0->fragprog->fp.reads_framebuffer &&
761        nvc0->framebuffer.nr_cbufs &&
762        nvc0->framebuffer.cbufs[0]) {
763       struct pipe_sampler_view tmpl = {0};
764       struct pipe_surface *sf = nvc0->framebuffer.cbufs[0];
765 
766       tmpl.target = PIPE_TEXTURE_2D_ARRAY;
767       tmpl.format = sf->format;
768       tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level;
769       tmpl.u.tex.first_layer = sf->u.tex.first_layer;
770       tmpl.u.tex.last_layer = sf->u.tex.last_layer;
771       tmpl.swizzle_r = PIPE_SWIZZLE_X;
772       tmpl.swizzle_g = PIPE_SWIZZLE_Y;
773       tmpl.swizzle_b = PIPE_SWIZZLE_Z;
774       tmpl.swizzle_a = PIPE_SWIZZLE_W;
775 
776       /* Bail if it's the same parameters */
777       if (old_view && old_view->texture == sf->texture &&
778           old_view->format == sf->format &&
779           old_view->u.tex.first_level == sf->u.tex.level &&
780           old_view->u.tex.first_layer == sf->u.tex.first_layer &&
781           old_view->u.tex.last_layer == sf->u.tex.last_layer)
782          return;
783 
784       new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl);
785    } else if (old_view == NULL) {
786       return;
787    }
788 
789    if (old_view)
790       pipe_sampler_view_reference(&nvc0->fbtexture, NULL);
791    nvc0->fbtexture = new_view;
792 
793    if (new_view) {
794       struct nv50_tic_entry *tic = nv50_tic_entry(new_view);
795       assert(tic->id < 0);
796       tic->id = nvc0_screen_tic_alloc(screen, tic);
797       nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32,
798                            NV_VRAM_DOMAIN(&screen->base), 32, tic->tic);
799       screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
800 
801       if (screen->base.class_3d >= NVE4_3D_CLASS) {
802          BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
803          PUSH_DATA (push, NVC0_CB_AUX_SIZE);
804          PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
805          PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
806          BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1);
807          PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO);
808          PUSH_DATA (push, (0 << 20) | tic->id);
809       } else {
810          BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1);
811          PUSH_DATA (push, (tic->id << 9) | 1);
812       }
813 
814       IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
815    }
816 }
817 
818 static void
nvc0_switch_pipe_context(struct nvc0_context * ctx_to)819 nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
820 {
821    struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx;
822    unsigned s;
823 
824    simple_mtx_assert_locked(&ctx_to->screen->state_lock);
825    if (ctx_from)
826       ctx_to->state = ctx_from->state;
827    else
828       ctx_to->state = ctx_to->screen->save_state;
829 
830    ctx_to->dirty_3d = ~0;
831    ctx_to->dirty_cp = ~0;
832    ctx_to->viewports_dirty = ~0;
833    ctx_to->scissors_dirty = ~0;
834 
835    for (s = 0; s < 6; ++s) {
836       ctx_to->samplers_dirty[s] = ~0;
837       ctx_to->textures_dirty[s] = ~0;
838       ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1;
839       ctx_to->buffers_dirty[s]  = ~0;
840       ctx_to->images_dirty[s]   = ~0;
841    }
842 
843    /* Reset tfb as the shader that owns it may have been deleted. */
844    ctx_to->state.tfb = NULL;
845 
846    if (!ctx_to->vertex)
847       ctx_to->dirty_3d &= ~(NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS);
848 
849    if (!ctx_to->vertprog)
850       ctx_to->dirty_3d &= ~NVC0_NEW_3D_VERTPROG;
851    if (!ctx_to->fragprog)
852       ctx_to->dirty_3d &= ~NVC0_NEW_3D_FRAGPROG;
853 
854    if (!ctx_to->blend)
855       ctx_to->dirty_3d &= ~NVC0_NEW_3D_BLEND;
856    if (!ctx_to->rast)
857       ctx_to->dirty_3d &= ~(NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_SCISSOR);
858    if (!ctx_to->zsa)
859       ctx_to->dirty_3d &= ~NVC0_NEW_3D_ZSA;
860 
861    ctx_to->screen->cur_ctx = ctx_to;
862 }
863 
864 static struct nvc0_state_validate
865 validate_list_3d[] = {
866     { nvc0_validate_fb,            NVC0_NEW_3D_FRAMEBUFFER },
867     { nvc0_validate_blend,         NVC0_NEW_3D_BLEND },
868     { nvc0_validate_zsa,           NVC0_NEW_3D_ZSA },
869     { nvc0_validate_sample_mask,   NVC0_NEW_3D_SAMPLE_MASK },
870     { nvc0_validate_rasterizer,    NVC0_NEW_3D_RASTERIZER },
871     { nvc0_validate_blend_colour,  NVC0_NEW_3D_BLEND_COLOUR },
872     { nvc0_validate_stencil_ref,   NVC0_NEW_3D_STENCIL_REF },
873     { nvc0_validate_stipple,       NVC0_NEW_3D_STIPPLE },
874     { nvc0_validate_scissor,       NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_RASTERIZER },
875     { nvc0_validate_viewport,      NVC0_NEW_3D_VIEWPORT },
876     { nvc0_validate_window_rects,  NVC0_NEW_3D_WINDOW_RECTS },
877     { nvc0_vertprog_validate,      NVC0_NEW_3D_VERTPROG },
878     { nvc0_tctlprog_validate,      NVC0_NEW_3D_TCTLPROG },
879     { nvc0_tevlprog_validate,      NVC0_NEW_3D_TEVLPROG },
880     { nvc0_validate_tess_state,    NVC0_NEW_3D_TESSFACTOR },
881     { nvc0_gmtyprog_validate,      NVC0_NEW_3D_GMTYPROG },
882     { nvc0_validate_min_samples,   NVC0_NEW_3D_MIN_SAMPLES |
883                                    NVC0_NEW_3D_FRAGPROG |
884                                    NVC0_NEW_3D_FRAMEBUFFER },
885     { nvc0_fragprog_validate,      NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER },
886     { nvc0_validate_fp_zsa_rast,   NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA |
887                                    NVC0_NEW_3D_RASTERIZER },
888     { nvc0_validate_zsa_fb,        NVC0_NEW_3D_ZSA | NVC0_NEW_3D_FRAMEBUFFER },
889     { nvc0_validate_rast_fb,       NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_FRAMEBUFFER },
890     { nvc0_validate_clip,          NVC0_NEW_3D_CLIP | NVC0_NEW_3D_RASTERIZER |
891                                    NVC0_NEW_3D_VERTPROG |
892                                    NVC0_NEW_3D_TEVLPROG |
893                                    NVC0_NEW_3D_GMTYPROG },
894     { nvc0_constbufs_validate,     NVC0_NEW_3D_CONSTBUF },
895     { nvc0_validate_textures,      NVC0_NEW_3D_TEXTURES },
896     { nvc0_validate_samplers,      NVC0_NEW_3D_SAMPLERS },
897     { nve4_set_tex_handles,        NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },
898     { nvc0_validate_fbread,        NVC0_NEW_3D_FRAGPROG |
899                                    NVC0_NEW_3D_FRAMEBUFFER },
900     { nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },
901     { nvc0_validate_surfaces,      NVC0_NEW_3D_SURFACES },
902     { nvc0_validate_buffers,       NVC0_NEW_3D_BUFFERS },
903     { nvc0_tfb_validate,           NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG },
904     { nvc0_layer_validate,         NVC0_NEW_3D_VERTPROG |
905                                    NVC0_NEW_3D_TEVLPROG |
906                                    NVC0_NEW_3D_GMTYPROG },
907     { nvc0_validate_driverconst,   NVC0_NEW_3D_DRIVERCONST },
908     { validate_sample_locations,   NVC0_NEW_3D_SAMPLE_LOCATIONS |
909                                    NVC0_NEW_3D_FRAMEBUFFER},
910 };
911 
912 bool
nvc0_state_validate(struct nvc0_context * nvc0,uint32_t mask,struct nvc0_state_validate * validate_list,int size,uint32_t * dirty,struct nouveau_bufctx * bufctx)913 nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask,
914                     struct nvc0_state_validate *validate_list, int size,
915                     uint32_t *dirty, struct nouveau_bufctx *bufctx)
916 {
917    uint32_t state_mask;
918    int ret;
919    unsigned i;
920 
921    simple_mtx_assert_locked(&nvc0->screen->state_lock);
922 
923    if (nvc0->screen->cur_ctx != nvc0)
924       nvc0_switch_pipe_context(nvc0);
925 
926    state_mask = *dirty & mask;
927 
928    if (state_mask) {
929       for (i = 0; i < size; ++i) {
930          struct nvc0_state_validate *validate = &validate_list[i];
931 
932          if (state_mask & validate->states)
933             validate->func(nvc0);
934       }
935       *dirty &= ~state_mask;
936 
937       nvc0_bufctx_fence(nvc0, bufctx, false);
938    }
939 
940    nouveau_pushbuf_bufctx(nvc0->base.pushbuf, bufctx);
941    ret = PUSH_VAL(nvc0->base.pushbuf);
942 
943    return !ret;
944 }
945 
946 bool
nvc0_state_validate_3d(struct nvc0_context * nvc0,uint32_t mask)947 nvc0_state_validate_3d(struct nvc0_context *nvc0, uint32_t mask)
948 {
949    bool ret;
950 
951    ret = nvc0_state_validate(nvc0, mask, validate_list_3d,
952                              ARRAY_SIZE(validate_list_3d), &nvc0->dirty_3d,
953                              nvc0->bufctx_3d);
954 
955    if (unlikely(nvc0->state.flushed)) {
956       nvc0->state.flushed = false;
957       nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, true);
958    }
959    return ret;
960 }
961