xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/llvmpipe/lp_setup_rect.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2010-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * Setup/binning code for screen-aligned quads.
30  */
31 
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "lp_perf.h"
35 #include "lp_setup_context.h"
36 #include "lp_rast.h"
37 #include "lp_state_fs.h"
38 #include "lp_state_setup.h"
39 
40 
41 #define NUM_CHANNELS 4
42 
43 #define UNDETERMINED_BLIT  -1
44 
45 
46 static inline int
subpixel_snap(float a)47 subpixel_snap(float a)
48 {
49    return util_iround(FIXED_ONE * a);
50 }
51 
52 
53 /**
54  * Alloc space for a new rectangle plus the input.a0/dadx/dady arrays
55  * immediately after it.
56  * The memory is allocated from the per-scene pool, not per-tile.
57  * \param size  returns number of bytes allocated
58  * \param nr_inputs  number of fragment shader inputs
59  * \return pointer to rectangle space
60  */
61 struct lp_rast_rectangle *
lp_setup_alloc_rectangle(struct lp_scene * scene,unsigned nr_inputs)62 lp_setup_alloc_rectangle(struct lp_scene *scene, unsigned nr_inputs)
63 {
64    unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
65    struct lp_rast_rectangle *rect;
66    unsigned bytes = sizeof(*rect) + (3 * input_array_sz);
67    rect = lp_scene_alloc_aligned(scene, bytes, 16);
68    if (rect == NULL)
69       return NULL;
70 
71    rect->inputs.stride = input_array_sz;
72 
73    return rect;
74 }
75 
76 
77 /**
78  * The rectangle covers the whole tile- shade whole tile.
79  * XXX no rectangle/triangle dependencies in this file - share it with
80  * the same code in lp_setup_tri.c
81  * \param tx, ty  the tile position in tiles, not pixels
82  */
83 bool
lp_setup_whole_tile(struct lp_setup_context * setup,const struct lp_rast_shader_inputs * inputs,int tx,int ty,bool opaque)84 lp_setup_whole_tile(struct lp_setup_context *setup,
85                     const struct lp_rast_shader_inputs *inputs,
86                     int tx, int ty, bool opaque)
87 {
88    struct lp_scene *scene = setup->scene;
89 
90    LP_COUNT(nr_fully_covered_64);
91 
92    /* if variant is opaque and scissor doesn't effect the tile */
93    if (opaque) {
94       /* Several things prevent this optimization from working:
95        * - For layered rendering we can't determine if this covers the same
96        * layer as previous rendering (or in case of clears those actually
97        * always cover all layers so optimization is impossible). Need to use
98        * fb_max_layer and not setup->layer_slot to determine this since even
99        * if there's currently no slot assigned previous rendering could have
100        * used one.
101        * - If there were any Begin/End query commands in the scene then those
102        * would get removed which would be very wrong. Furthermore, if queries
103        * were just active we also can't do the optimization since to get
104        * accurate query results we unfortunately need to execute the rendering
105        * commands.
106        */
107       if (!scene->fb.zsbuf && scene->fb_max_layer == 0 &&
108           !scene->had_queries) {
109          /*
110           * All previous rendering will be overwritten so reset the bin.
111           */
112          lp_scene_bin_reset(scene, tx, ty);
113       }
114 
115       if (inputs->is_blit) {
116          LP_COUNT(nr_blit_64);
117          return lp_scene_bin_cmd_with_state(scene, tx, ty,
118                                             setup->fs.stored,
119                                             LP_RAST_OP_BLIT,
120                                             lp_rast_arg_inputs(inputs));
121       } else {
122          LP_COUNT(nr_shade_opaque_64);
123          return lp_scene_bin_cmd_with_state(scene, tx, ty,
124                                             setup->fs.stored,
125                                             LP_RAST_OP_SHADE_TILE_OPAQUE,
126                                             lp_rast_arg_inputs(inputs));
127       }
128    } else {
129       LP_COUNT(nr_shade_64);
130       return lp_scene_bin_cmd_with_state(scene, tx, ty,
131                                          setup->fs.stored,
132                                          LP_RAST_OP_SHADE_TILE,
133                                          lp_rast_arg_inputs(inputs));
134    }
135 }
136 
137 
138 bool
lp_setup_is_blit(const struct lp_setup_context * setup,const struct lp_rast_shader_inputs * inputs)139 lp_setup_is_blit(const struct lp_setup_context *setup,
140                  const struct lp_rast_shader_inputs *inputs)
141 {
142    const struct lp_fragment_shader_variant *variant =
143       setup->fs.current.variant;
144 
145    if (variant->blit) {
146       /*
147        * Detect blits.
148        */
149       const struct lp_jit_texture *texture =
150          &setup->fs.current.jit_resources.textures[0];
151 
152       /* XXX: dadx vs dady confusion below?
153        */
154       const float dsdx = GET_DADX(inputs)[1][0] * texture->width;
155       const float dsdy = GET_DADX(inputs)[1][1] * texture->width;
156       const float dtdx = GET_DADY(inputs)[1][0] * texture->height;
157       const float dtdy = GET_DADY(inputs)[1][1] * texture->height;
158 
159       /*
160        * We don't need to check s0/t0 tolerances
161        * as we establish as pre-condition that there is no
162        * texture filtering.
163        */
164 
165       ASSERTED struct lp_sampler_static_state *samp0 = lp_fs_variant_key_sampler_idx(&variant->key, 0);
166       assert(samp0);
167       assert(samp0->sampler_state.min_img_filter == PIPE_TEX_FILTER_NEAREST);
168       assert(samp0->sampler_state.mag_img_filter == PIPE_TEX_FILTER_NEAREST);
169 
170       /*
171        * Check for 1:1 match of texels to dest pixels
172        */
173 
174       if (util_is_approx(dsdx, 1.0f, 1.0f/LP_MAX_WIDTH) &&
175           util_is_approx(dsdy, 0.0f, 1.0f/LP_MAX_HEIGHT) &&
176           util_is_approx(dtdx, 0.0f, 1.0f/LP_MAX_WIDTH) &&
177           util_is_approx(dtdy, 1.0f, 1.0f/LP_MAX_HEIGHT)) {
178          return true;
179       } else {
180 #if 0
181          debug_printf("dsdx = %f\n", dsdx);
182          debug_printf("dsdy = %f\n", dsdy);
183          debug_printf("dtdx = %f\n", dtdx);
184          debug_printf("dtdy = %f\n", dtdy);
185          debug_printf("\n");
186 #endif
187          return false;
188       }
189    }
190 
191    return false;
192 }
193 
194 
195 static inline void
partial(struct lp_setup_context * setup,const struct lp_rast_rectangle * rect,bool opaque,unsigned ix,unsigned iy,unsigned mask)196 partial(struct lp_setup_context *setup,
197         const struct lp_rast_rectangle *rect,
198         bool opaque,
199         unsigned ix, unsigned iy,
200         unsigned mask) // RECT_PLANE_x bits
201 {
202    if (mask == 0) {
203       assert(rect->box.x0 <= ix * TILE_SIZE);
204       assert(rect->box.y0 <= iy * TILE_SIZE);
205       assert(rect->box.x1 >= (ix+1) * TILE_SIZE - 1);
206       assert(rect->box.y1 >= (iy+1) * TILE_SIZE - 1);
207 
208       lp_setup_whole_tile(setup, &rect->inputs, ix, iy, opaque);
209    } else {
210       LP_COUNT(nr_partially_covered_64);
211       lp_scene_bin_cmd_with_state(setup->scene,
212                                   ix, iy,
213                                   setup->fs.stored,
214                                   LP_RAST_OP_RECTANGLE,
215                                   lp_rast_arg_rectangle(rect));
216    }
217 }
218 
219 
220 /**
221  * Setup/bin a screen-aligned rect.
222  * We need three corner vertices in order to correctly setup
223  * interpolated parameters.  We *could* get away with just the
224  * diagonal vertices but it'd cause ugliness elsewhere.
225  *
226  *   + -------v0
227  *   |        |
228  *  v2 ------ v1
229  *
230  * By an unfortunate mixup between GL and D3D coordinate spaces, half
231  * of this file talks about clockwise rectangles (which were CCW in GL
232  * coordinate space), while the other half prefers to work with D3D
233  * CCW rectangles.
234  */
235 static bool
try_rect_cw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],bool frontfacing)236 try_rect_cw(struct lp_setup_context *setup,
237             const float (*v0)[4],
238             const float (*v1)[4],
239             const float (*v2)[4],
240             bool frontfacing)
241 {
242    const struct lp_fragment_shader_variant *variant =
243       setup->fs.current.variant;
244    const struct lp_setup_variant_key *key = &setup->setup.variant->key;
245    struct lp_scene *scene = setup->scene;
246 
247    /* x/y positions in fixed point */
248    int x0 = subpixel_snap(v0[0][0] - setup->pixel_offset);
249    int x1 = subpixel_snap(v1[0][0] - setup->pixel_offset);
250    int x2 = subpixel_snap(v2[0][0] - setup->pixel_offset);
251    int y0 = subpixel_snap(v0[0][1] - setup->pixel_offset);
252    int y1 = subpixel_snap(v1[0][1] - setup->pixel_offset);
253    int y2 = subpixel_snap(v2[0][1] - setup->pixel_offset);
254 
255    LP_COUNT(nr_rects);
256 
257    /* Cull clockwise rects without overflowing.
258     */
259    const bool cw = (x2 < x1) ^ (y0 < y2);
260    if (cw) {
261       LP_COUNT(nr_culled_rects);
262       return true;
263    }
264 
265    const float (*pv)[4];
266    if (setup->flatshade_first) {
267       pv = v0;
268    } else {
269       pv = v2;
270    }
271 
272    unsigned viewport_index = 0;
273    if (setup->viewport_index_slot > 0) {
274       unsigned *udata = (unsigned*)pv[setup->viewport_index_slot];
275       viewport_index = lp_clamp_viewport_idx(*udata);
276    }
277 
278    unsigned layer = 0;
279    if (setup->layer_slot > 0) {
280       layer = *(unsigned*)pv[setup->layer_slot];
281       layer = MIN2(layer, scene->fb_max_layer);
282    }
283 
284    /* Bounding rectangle (in pixels) */
285    struct u_rect bbox;
286    {
287       /* Yes this is necessary to accurately calculate bounding boxes
288        * with the two fill-conventions we support.  GL (normally) ends
289        * up needing a bottom-left fill convention, which requires
290        * slightly different rounding.
291        */
292       int adj = (setup->bottom_edge_rule != 0) ? 1 : 0;
293 
294       bbox.x0 = (MIN3(x0, x1, x2) + (FIXED_ONE-1)) >> FIXED_ORDER;
295       bbox.x1 = (MAX3(x0, x1, x2) + (FIXED_ONE-1)) >> FIXED_ORDER;
296       bbox.y0 = (MIN3(y0, y1, y2) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
297       bbox.y1 = (MAX3(y0, y1, y2) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
298 
299       /* Inclusive coordinates:
300        */
301       bbox.x1--;
302       bbox.y1--;
303    }
304 
305    if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
306       if (0) debug_printf("no intersection\n");
307       LP_COUNT(nr_culled_rects);
308       return true;
309    }
310 
311    u_rect_find_intersection(&setup->draw_regions[viewport_index], &bbox);
312 
313    struct lp_rast_rectangle *rect =
314       lp_setup_alloc_rectangle(scene, key->num_inputs);
315    if (!rect)
316       return false;
317 
318 #if MESA_DEBUG
319    rect->v[0][0] = v0[0][0];
320    rect->v[0][1] = v0[0][1];
321    rect->v[1][0] = v1[0][0];
322    rect->v[1][1] = v1[0][1];
323 #endif
324 
325    rect->box.x0 = bbox.x0;
326    rect->box.x1 = bbox.x1;
327    rect->box.y0 = bbox.y0;
328    rect->box.y1 = bbox.y1;
329 
330    /* Setup parameter interpolants:
331     */
332    setup->setup.variant->jit_function(v0,
333                                       v1,
334                                       v2,
335                                       frontfacing,
336                                       GET_A0(&rect->inputs),
337                                       GET_DADX(&rect->inputs),
338                                       GET_DADY(&rect->inputs),
339                                       &setup->setup.variant->key);
340 
341    rect->inputs.frontfacing = frontfacing;
342    rect->inputs.disable = false;
343    rect->inputs.is_blit = lp_setup_is_blit(setup, &rect->inputs);
344    rect->inputs.layer = layer;
345    rect->inputs.viewport_index = viewport_index;
346    rect->inputs.view_index = setup->view_index;
347 
348    return lp_setup_bin_rectangle(setup, rect, variant->opaque);
349 }
350 
351 
352 bool
lp_setup_bin_rectangle(struct lp_setup_context * setup,struct lp_rast_rectangle * rect,bool opaque)353 lp_setup_bin_rectangle(struct lp_setup_context *setup,
354                        struct lp_rast_rectangle *rect,
355                        bool opaque)
356 {
357    struct lp_scene *scene = setup->scene;
358    unsigned left_mask = 0;
359    unsigned right_mask = 0;
360    unsigned top_mask = 0;
361    unsigned bottom_mask = 0;
362 
363    /*
364     * All fields of 'rect' are now set.  The remaining code here is
365     * concerned with binning.
366     */
367 
368    /* Convert to inclusive tile coordinates:
369     */
370    const unsigned ix0 = rect->box.x0 / TILE_SIZE;
371    const unsigned iy0 = rect->box.y0 / TILE_SIZE;
372    const unsigned ix1 = rect->box.x1 / TILE_SIZE;
373    const unsigned iy1 = rect->box.y1 / TILE_SIZE;
374 
375    /*
376     * Clamp to framebuffer size
377     */
378    assert(ix0 == MAX2(ix0, 0));
379    assert(iy0 == MAX2(iy0, 0));
380    assert(ix1 == MIN2(ix1, scene->tiles_x - 1));
381    assert(iy1 == MIN2(iy1, scene->tiles_y - 1));
382 
383    if (ix0 * TILE_SIZE != rect->box.x0)
384       left_mask = RECT_PLANE_LEFT;
385 
386    if (ix1 * TILE_SIZE + TILE_SIZE - 1 != rect->box.x1)
387       right_mask  = RECT_PLANE_RIGHT;
388 
389    if (iy0 * TILE_SIZE != rect->box.y0)
390       top_mask    = RECT_PLANE_TOP;
391 
392    if (iy1 * TILE_SIZE + TILE_SIZE - 1 != rect->box.y1)
393       bottom_mask = RECT_PLANE_BOTTOM;
394 
395    /* Determine which tile(s) intersect the rectangle's bounding box
396     */
397    if (iy0 == iy1 && ix0 == ix1) {
398       partial(setup, rect, opaque, ix0, iy0,
399               (left_mask | right_mask | top_mask | bottom_mask));
400    } else if (ix0 == ix1) {
401       unsigned mask = left_mask | right_mask;
402       partial(setup, rect, opaque, ix0, iy0, mask | top_mask);
403       for (unsigned i = iy0 + 1; i < iy1; i++)
404          partial(setup, rect, opaque, ix0, i, mask);
405       partial(setup, rect, opaque, ix0, iy1, mask | bottom_mask);
406    } else if (iy0 == iy1) {
407       unsigned mask = top_mask | bottom_mask;
408       partial(setup, rect, opaque, ix0, iy0, mask | left_mask);
409       for (unsigned i = ix0 + 1; i < ix1; i++)
410          partial(setup, rect, opaque, i, iy0, mask);
411       partial(setup, rect, opaque, ix1, iy0, mask | right_mask);
412    } else {
413       partial(setup, rect, opaque, ix0, iy0, left_mask  | top_mask);
414       partial(setup, rect, opaque, ix0, iy1, left_mask  | bottom_mask);
415       partial(setup, rect, opaque, ix1, iy0, right_mask | top_mask);
416       partial(setup, rect, opaque, ix1, iy1, right_mask | bottom_mask);
417 
418       /* Top/Bottom fringes
419        */
420       for (unsigned i = ix0 + 1; i < ix1; i++) {
421          partial(setup, rect, opaque, i, iy0, top_mask);
422          partial(setup, rect, opaque, i, iy1, bottom_mask);
423       }
424 
425       /* Left/Right fringes
426        */
427       for (unsigned i = iy0 + 1; i < iy1; i++) {
428          partial(setup, rect, opaque, ix0, i, left_mask);
429          partial(setup, rect, opaque, ix1, i, right_mask);
430       }
431 
432       /* Full interior tiles
433        */
434       for (unsigned j = iy0 + 1; j < iy1; j++) {
435          for (unsigned i = ix0 + 1; i < ix1; i++) {
436             lp_setup_whole_tile(setup, &rect->inputs, i, j, opaque);
437          }
438       }
439    }
440 
441    /* Catch any out-of-memory which occurred during binning.  Do this
442     * once here rather than checking all the return values throughout.
443     */
444    if (lp_scene_is_oom(scene)) {
445       /* Disable rasterization of this partially-binned rectangle.
446        * We'll flush this scene and re-bin the entire rectangle:
447        */
448       rect->inputs.disable = true;
449       return false;
450    }
451 
452    return true;
453 }
454 
455 
456 void
lp_rect_cw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],bool frontfacing)457 lp_rect_cw(struct lp_setup_context *setup,
458            const float (*v0)[4],
459            const float (*v1)[4],
460            const float (*v2)[4],
461            bool frontfacing)
462 {
463    if (lp_setup_zero_sample_mask(setup)) {
464       if (0) debug_printf("zero sample mask\n");
465       LP_COUNT(nr_culled_rects);
466       return;
467    }
468 
469    if (!try_rect_cw(setup, v0, v1, v2, frontfacing)) {
470       if (!lp_setup_flush_and_restart(setup))
471          return;
472 
473       if (!try_rect_cw(setup, v0, v1, v2, frontfacing))
474          return;
475    }
476 }
477 
478 
479 /**
480  * Take the six vertices for two triangles and try to determine if they
481  * form a screen-aligned quad/rectangle.  If so, draw the rect directly
482  * and return true.  Else, return false.
483  */
484 static bool
do_rect_ccw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4],bool front)485 do_rect_ccw(struct lp_setup_context *setup,
486             const float (*v0)[4],
487             const float (*v1)[4],
488             const float (*v2)[4],
489             const float (*v3)[4],
490             const float (*v4)[4],
491             const float (*v5)[4],
492             bool front)
493 {
494    const float (*rv0)[4], (*rv1)[4], (*rv2)[4], (*rv3)[4];  /* rect verts */
495 
496 #define SAME_POS(A, B)   (A[0][0] == B[0][0] && \
497                           A[0][1] == B[0][1] && \
498                           A[0][2] == B[0][2] && \
499                           A[0][3] == B[0][3])
500 
501    /* Only need to consider CCW orientations.  There are nine ways
502     * that two counter-clockwise triangles can join up:
503     */
504    if (SAME_POS(v0, v3)) {
505       if (SAME_POS(v2, v4)) {
506          /*
507           *    v5   v4/v2
508           *     +-----+
509           *     |   / |
510           *     |  /  |
511           *     | /   |
512           *     +-----+
513           *   v3/v0   v1
514           */
515          rv0 = v5;
516          rv1 = v0;
517          rv2 = v1;
518          rv3 = v2;
519       } else if (SAME_POS(v1, v5)) {
520          /*
521           *    v4   v3/v0
522           *     +-----+
523           *     |   / |
524           *     |  /  |
525           *     | /   |
526           *     +-----+
527           *   v5/v1   v2
528           */
529          rv0 = v4;
530          rv1 = v1;
531          rv2 = v2;
532          rv3 = v0;
533       } else {
534          return false;
535       }
536    } else if (SAME_POS(v0, v5)) {
537       if (SAME_POS(v2, v3)) {
538          /*
539           *    v4   v3/v2
540           *     +-----+
541           *     |   / |
542           *     |  /  |
543           *     | /   |
544           *     +-----+
545           *   v5/v0   v1
546           */
547          rv0 = v4;
548          rv1 = v0;
549          rv2 = v1;
550          rv3 = v2;
551       } else if (SAME_POS(v1, v4)) {
552          /*
553           *    v3   v5/v0
554           *     +-----+
555           *     |   / |
556           *     |  /  |
557           *     | /   |
558           *     +-----+
559           *   v4/v1   v2
560           */
561          rv0 = v3;
562          rv1 = v1;
563          rv2 = v2;
564          rv3 = v0;
565       } else {
566          return false;
567       }
568    } else if (SAME_POS(v0, v4)) {
569       if (SAME_POS(v2, v5)) {
570          /*
571           *    v3   v5/v2
572           *     +-----+
573           *     |   / |
574           *     |  /  |
575           *     | /   |
576           *     +-----+
577           *   v4/v0   v1
578           */
579          rv0 = v3;
580          rv1 = v0;
581          rv2 = v1;
582          rv3 = v2;
583       } else if (SAME_POS(v1, v3)) {
584          /*
585           *    v5   v4/v0
586           *     +-----+
587           *     |   / |
588           *     |  /  |
589           *     | /   |
590           *     +-----+
591           *   v3/v1   v2
592           */
593          rv0 = v5;
594          rv1 = v1;
595          rv2 = v2;
596          rv3 = v0;
597       } else {
598          return false;
599       }
600    } else if (SAME_POS(v2, v3)) {
601       if (SAME_POS(v1, v4)) {
602          /*
603           *    v5   v4/v1
604           *     +-----+
605           *     |   / |
606           *     |  /  |
607           *     | /   |
608           *     +-----+
609           *   v3/v2   v0
610           */
611          rv0 = v5;
612          rv1 = v2;
613          rv2 = v0;
614          rv3 = v1;
615       } else {
616          return false;
617       }
618    } else if (SAME_POS(v2, v5)) {
619       if (SAME_POS(v1, v3)) {
620          /*
621           *    v4   v3/v1
622           *     +-----+
623           *     |   / |
624           *     |  /  |
625           *     | /   |
626           *     +-----+
627           *   v5/v2   v0
628           */
629          rv0 = v4;
630          rv1 = v2;
631          rv2 = v0;
632          rv3 = v1;
633       } else {
634          return false;
635       }
636    } else if (SAME_POS(v2, v4)) {
637       if (SAME_POS(v1, v5)) {
638          /*
639           *    v3   v5/v1
640           *     +-----+
641           *     |   / |
642           *     |  /  |
643           *     | /   |
644           *     +-----+
645           *   v4/v2   v0
646           */
647          rv0 = v3;
648          rv1 = v2;
649          rv2 = v0;
650          rv3 = v1;
651       } else {
652          return false;
653       }
654    } else {
655       return false;
656    }
657 
658 #define SAME_X(A, B)   (A[0][0] == B[0][0])
659 #define SAME_Y(A, B)   (A[0][1] == B[0][1])
660 
661    /* The vertices are now counter clockwise, as such:
662     *
663     *  rv0 -------rv3
664     *    |        |
665     *  rv1 ------ rv2
666     *
667     * To render as a rectangle,
668     *   * The X values should be the same at v0, v1 and v2, v3.
669     *   * The Y values should be the same at v0, v3 and v1, v2.
670     */
671    if (SAME_Y(rv0, rv1)) {
672       const float (*tmp)[4];
673       tmp = rv0;
674       rv0 = rv1;
675       rv1 = rv2;
676       rv2 = rv3;
677       rv3 = tmp;
678    }
679 
680    if (SAME_X(rv0, rv1) && SAME_X(rv2, rv3) &&
681        SAME_Y(rv0, rv3) && SAME_Y(rv1, rv2)) {
682       /* We have a rectangle */
683 
684       /* Check that all vertex W components are equal.  When we divide by W in
685        * lp_linear_init_interp() we assume all vertices have the same W value.
686        */
687       const float v0_w = rv0[0][3];
688       if (rv1[0][3] != v0_w ||
689           rv2[0][3] != v0_w ||
690           rv3[0][3] != v0_w) {
691          return false;
692       }
693 
694       const struct lp_setup_variant_key *key = &setup->setup.variant->key;
695       const unsigned n = key->num_inputs;
696 
697       /* Check that the other attributes are coplanar */
698       for (unsigned i = 0; i < n; i++) {
699          for (unsigned j = 0; j < 4; j++) {
700             if (key->inputs[i].usage_mask & (1<<j)) {
701                unsigned k = key->inputs[i].src_index;
702                float dxdx1, dxdx2, dxdy1, dxdy2;
703                dxdx1 = rv0[k][j] - rv3[k][j];
704                dxdx2 = rv1[k][j] - rv2[k][j];
705                dxdy1 = rv0[k][j] - rv1[k][j];
706                dxdy2 = rv3[k][j] - rv2[k][j];
707                if (dxdx1 != dxdx2 ||
708                    dxdy1 != dxdy2) {
709                   return false;
710                }
711             }
712          }
713       }
714 
715       /* Note we're changing to clockwise here.  Fix this by reworking
716        * lp_rect_cw to expect/operate on ccw rects.  Note that
717        * function was previously misnamed.
718        */
719       lp_rect_cw(setup, rv0, rv2, rv1, front);
720       return true;
721    } else {
722       /* setup->quad(setup, rv0, rv1, rv2, rv3); */
723    }
724 
725    return false;
726 }
727 
728 
729 enum winding {
730    WINDING_NONE = 0,
731    WINDING_CCW,
732    WINDING_CW
733 };
734 
735 
736 static inline enum winding
winding(const float (* v0)[4],const float (* v1)[4],const float (* v2)[4])737 winding(const float (*v0)[4],
738         const float (*v1)[4],
739         const float (*v2)[4])
740 {
741    /* edge vectors e = v0 - v2, f = v1 - v2 */
742    const float ex = v0[0][0] - v2[0][0];
743    const float ey = v0[0][1] - v2[0][1];
744    const float fx = v1[0][0] - v2[0][0];
745    const float fy = v1[0][1] - v2[0][1];
746 
747    /* det = cross(e,f).z */
748    const float det = ex * fy - ey * fx;
749 
750    if (det < 0.0f)
751       return WINDING_CCW;
752    else if (det > 0.0f)
753       return WINDING_CW;
754    else
755       return WINDING_NONE;
756 }
757 
758 
759 static bool
setup_rect_cw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])760 setup_rect_cw(struct lp_setup_context *setup,
761               const float (*v0)[4],
762               const float (*v1)[4],
763               const float (*v2)[4],
764               const float (*v3)[4],
765               const float (*v4)[4],
766               const float (*v5)[4])
767 {
768    enum winding winding0 = winding(v0, v1, v2);
769    enum winding winding1 = winding(v3, v4, v5);
770 
771    if (winding0 == WINDING_CW &&
772        winding1 == WINDING_CW) {
773       return do_rect_ccw(setup, v0, v2, v1, v3, v5, v4, !setup->ccw_is_frontface);
774    } else if (winding0 == WINDING_CW) {
775       setup->triangle(setup, v0, v1, v2);
776       return true;
777    } else if (winding1 == WINDING_CW) {
778       setup->triangle(setup, v3, v4, v5);
779       return true;
780    } else {
781       return true;
782    }
783 }
784 
785 
786 static bool
setup_rect_ccw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])787 setup_rect_ccw(struct lp_setup_context *setup,
788                const float (*v0)[4],
789                const float (*v1)[4],
790                const float (*v2)[4],
791                const float (*v3)[4],
792                const float (*v4)[4],
793                const float (*v5)[4])
794 {
795    enum winding winding0 = winding(v0, v1, v2);
796    enum winding winding1 = winding(v3, v4, v5);
797 
798    if (winding0 == WINDING_CCW &&
799        winding1 == WINDING_CCW) {
800       return do_rect_ccw(setup, v0, v1, v2, v3, v4, v5, setup->ccw_is_frontface);
801    } else if (winding0 == WINDING_CCW) {
802       setup->triangle(setup, v0, v1, v2);
803       return true;
804    } else if (winding1 == WINDING_CCW) {
805       return false;
806       setup->triangle(setup, v3, v4, v5);
807       return true;
808    } else {
809       return true;
810    }
811 }
812 
813 
814 static bool
setup_rect_noop(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])815 setup_rect_noop(struct lp_setup_context *setup,
816                 const float (*v0)[4],
817                 const float (*v1)[4],
818                 const float (*v2)[4],
819                 const float (*v3)[4],
820                 const float (*v4)[4],
821                 const float (*v5)[4])
822 {
823    return true;
824 }
825 
826 
827 /*
828  * Return true if the rect is handled here, else return false indicating
829  * the caller should render with triangles instead.
830  */
831 static bool
setup_rect_both(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])832 setup_rect_both(struct lp_setup_context *setup,
833                 const float (*v0)[4],
834                 const float (*v1)[4],
835                 const float (*v2)[4],
836                 const float (*v3)[4],
837                 const float (*v4)[4],
838                 const float (*v5)[4])
839 {
840    enum winding winding0 = winding(v0, v1, v2);
841    enum winding winding1 = winding(v3, v4, v5);
842 
843    if (winding0 != winding1) {
844       /* If we knew that the "front" parameter wasn't going to be
845        * referenced, could rearrange one of the two triangles such
846        * that they were both CCW.  Aero actually does send mixed
847        * CW/CCW rectangles under some circumstances, but we catch them
848        * explicitly.
849        */
850       return false;
851    } else if (winding0 == WINDING_CCW) {
852       return do_rect_ccw(setup, v0, v1, v2, v3, v4, v5, setup->ccw_is_frontface);
853    } else if (winding0 == WINDING_CW) {
854       return do_rect_ccw(setup, v0, v2, v1, v3, v5, v4, !setup->ccw_is_frontface);
855    } else {
856       return true;
857    }
858 }
859 
860 
861 void
lp_setup_choose_rect(struct lp_setup_context * setup)862 lp_setup_choose_rect(struct lp_setup_context *setup)
863 {
864    if (setup->rasterizer_discard) {
865       setup->rect = setup_rect_noop;
866       return;
867    }
868 
869    switch (setup->cullmode) {
870    case PIPE_FACE_NONE:
871       setup->rect = setup_rect_both;
872       break;
873    case PIPE_FACE_BACK:
874       setup->rect = setup->ccw_is_frontface ? setup_rect_ccw : setup_rect_cw;
875       break;
876    case PIPE_FACE_FRONT:
877       setup->rect = setup->ccw_is_frontface ? setup_rect_cw : setup_rect_ccw;
878       break;
879    default:
880       setup->rect = setup_rect_noop;
881       break;
882    }
883 }
884