xref: /aosp_15_r20/external/mesa3d/src/mesa/state_tracker/st_atom_array.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * Copyright 2012 Marek Olšák <[email protected]>
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /*
30  * This converts the VBO's vertex attribute/array information into
31  * Gallium vertex state and binds it.
32  *
33  * Authors:
34  *   Keith Whitwell <[email protected]>
35  *   Marek Olšák <[email protected]>
36  */
37 
38 #include "st_context.h"
39 #include "st_atom.h"
40 #include "st_draw.h"
41 #include "st_program.h"
42 
43 #include "cso_cache/cso_context.h"
44 #include "util/u_cpu_detect.h"
45 #include "util/u_math.h"
46 #include "util/u_upload_mgr.h"
47 #include "util/u_threaded_context.h"
48 #include "main/bufferobj.h"
49 #include "main/glformats.h"
50 #include "main/varray.h"
51 #include "main/arrayobj.h"
52 
53 enum st_fill_tc_set_vb {
54    FILL_TC_SET_VB_OFF,        /* always works */
55    FILL_TC_SET_VB_ON,         /* specialized version (faster) */
56 };
57 
58 enum st_use_vao_fast_path {
59    VAO_FAST_PATH_OFF,         /* more complicated version (slower) */
60    VAO_FAST_PATH_ON,          /* always works (faster) */
61 };
62 
63 enum st_allow_zero_stride_attribs {
64    ZERO_STRIDE_ATTRIBS_OFF,   /* specialized version (faster) */
65    ZERO_STRIDE_ATTRIBS_ON,    /* always works */
66 };
67 
68 /* Whether vertex attrib indices are equal to their vertex buffer indices. */
69 enum st_identity_attrib_mapping {
70    IDENTITY_ATTRIB_MAPPING_OFF,  /* always works */
71    IDENTITY_ATTRIB_MAPPING_ON,   /* specialized version (faster) */
72 };
73 
74 enum st_allow_user_buffers {
75    USER_BUFFERS_OFF,          /* specialized version (faster) */
76    USER_BUFFERS_ON,           /* always works */
77 };
78 
79 enum st_update_velems {
80    UPDATE_VELEMS_OFF,         /* specialized version (faster) */
81    UPDATE_VELEMS_ON,          /* always works */
82 };
83 
84 /* Always inline the non-64bit element code, so that the compiler can see
85  * that velements is on the stack.
86  */
87 static void ALWAYS_INLINE
init_velement(struct pipe_vertex_element * velements,const struct gl_vertex_format * vformat,int src_offset,unsigned src_stride,unsigned instance_divisor,int vbo_index,bool dual_slot,int idx)88 init_velement(struct pipe_vertex_element *velements,
89               const struct gl_vertex_format *vformat,
90               int src_offset, unsigned src_stride,
91               unsigned instance_divisor,
92               int vbo_index, bool dual_slot, int idx)
93 {
94    velements[idx].src_offset = src_offset;
95    velements[idx].src_stride = src_stride;
96    velements[idx].src_format = vformat->_PipeFormat;
97    velements[idx].instance_divisor = instance_divisor;
98    velements[idx].vertex_buffer_index = vbo_index;
99    velements[idx].dual_slot = dual_slot;
100    assert(velements[idx].src_format);
101 }
102 
103 /* ALWAYS_INLINE helps the compiler realize that most of the parameters are
104  * on the stack.
105  */
106 template<util_popcnt POPCNT,
107          st_fill_tc_set_vb FILL_TC_SET_VB,
108          st_use_vao_fast_path USE_VAO_FAST_PATH,
109          st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
110          st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
111          st_allow_user_buffers ALLOW_USER_BUFFERS,
112          st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
setup_arrays(struct gl_context * ctx,const struct gl_vertex_array_object * vao,const GLbitfield dual_slot_inputs,const GLbitfield inputs_read,GLbitfield mask,struct cso_velems_state * velements,struct pipe_vertex_buffer * vbuffer,unsigned * num_vbuffers)113 setup_arrays(struct gl_context *ctx,
114              const struct gl_vertex_array_object *vao,
115              const GLbitfield dual_slot_inputs,
116              const GLbitfield inputs_read,
117              GLbitfield mask,
118              struct cso_velems_state *velements,
119              struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
120 {
121    /* Set up enabled vertex arrays. */
122    if (USE_VAO_FAST_PATH) {
123       const GLubyte *attribute_map =
124          !HAS_IDENTITY_ATTRIB_MAPPING ?
125                _mesa_vao_attribute_map[vao->_AttributeMapMode] : NULL;
126       struct pipe_context *pipe = ctx->pipe;
127       struct tc_buffer_list *next_buffer_list = NULL;
128 
129       if (FILL_TC_SET_VB)
130          next_buffer_list = tc_get_next_buffer_list(pipe);
131 
132       /* Note: I did try to unroll this loop by passing the number of
133        * iterations as a template parameter, but it resulted in more overhead.
134        */
135       while (mask) {
136          const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&mask);
137          const struct gl_array_attributes *attrib;
138          const struct gl_vertex_buffer_binding *binding;
139 
140          if (HAS_IDENTITY_ATTRIB_MAPPING) {
141             attrib = &vao->VertexAttrib[attr];
142             binding = &vao->BufferBinding[attr];
143          } else {
144             attrib = &vao->VertexAttrib[attribute_map[attr]];
145             binding = &vao->BufferBinding[attrib->BufferBindingIndex];
146          }
147          const unsigned bufidx = (*num_vbuffers)++;
148 
149          /* Set the vertex buffer. */
150          if (!ALLOW_USER_BUFFERS || binding->BufferObj) {
151             assert(binding->BufferObj);
152             struct pipe_resource *buf =
153                _mesa_get_bufferobj_reference(ctx, binding->BufferObj);
154             vbuffer[bufidx].buffer.resource = buf;
155             vbuffer[bufidx].is_user_buffer = false;
156             vbuffer[bufidx].buffer_offset = binding->Offset +
157                                             attrib->RelativeOffset;
158             if (FILL_TC_SET_VB)
159                tc_track_vertex_buffer(pipe, bufidx, buf, next_buffer_list);
160          } else {
161             vbuffer[bufidx].buffer.user = attrib->Ptr;
162             vbuffer[bufidx].is_user_buffer = true;
163             vbuffer[bufidx].buffer_offset = 0;
164             assert(!FILL_TC_SET_VB);
165          }
166 
167          if (!UPDATE_VELEMS)
168             continue;
169 
170          /* Determine the vertex element index without popcnt
171           * if !ALLOW_ZERO_STRIDE_ATTRIBS, which means that we don't need
172           * to leave any holes for zero-stride attribs, thus the mapping from
173           * vertex elements to vertex buffers is identity.
174           */
175          unsigned index;
176 
177          if (ALLOW_ZERO_STRIDE_ATTRIBS) {
178             assert(POPCNT != POPCNT_INVALID);
179             index = util_bitcount_fast<POPCNT>(inputs_read &
180                                                BITFIELD_MASK(attr));
181          } else {
182             index = bufidx;
183             assert(index == util_bitcount(inputs_read &
184                                           BITFIELD_MASK(attr)));
185          }
186 
187          /* Set the vertex element. */
188          init_velement(velements->velems, &attrib->Format, 0, binding->Stride,
189                        binding->InstanceDivisor, bufidx,
190                        dual_slot_inputs & BITFIELD_BIT(attr), index);
191       }
192       return;
193    }
194 
195    /* The slow path needs more fields initialized, which is not done if it's
196     * disabled.
197     */
198    assert(!ctx->Const.UseVAOFastPath || vao->SharedAndImmutable);
199 
200    /* Require these because we don't use them here and we don't want to
201     * generate identical template variants.
202     */
203    assert(!FILL_TC_SET_VB);
204    assert(ALLOW_ZERO_STRIDE_ATTRIBS);
205    assert(!HAS_IDENTITY_ATTRIB_MAPPING);
206    assert(ALLOW_USER_BUFFERS);
207    assert(UPDATE_VELEMS);
208 
209    while (mask) {
210       /* The attribute index to start pulling a binding */
211       const gl_vert_attrib i = (gl_vert_attrib)(ffs(mask) - 1);
212       const struct gl_vertex_buffer_binding *const binding
213          = _mesa_draw_buffer_binding(vao, i);
214       const unsigned bufidx = (*num_vbuffers)++;
215 
216       if (binding->BufferObj) {
217          /* Set the binding */
218          vbuffer[bufidx].buffer.resource =
219             _mesa_get_bufferobj_reference(ctx, binding->BufferObj);
220          vbuffer[bufidx].is_user_buffer = false;
221          vbuffer[bufidx].buffer_offset = _mesa_draw_binding_offset(binding);
222       } else {
223          /* Set the binding */
224          const void *ptr = (const void *)_mesa_draw_binding_offset(binding);
225          vbuffer[bufidx].buffer.user = ptr;
226          vbuffer[bufidx].is_user_buffer = true;
227          vbuffer[bufidx].buffer_offset = 0;
228       }
229 
230       const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(binding);
231       GLbitfield attrmask = mask & boundmask;
232       /* Mark the those attributes as processed */
233       mask &= ~boundmask;
234       /* We can assume that we have array for the binding */
235       assert(attrmask);
236 
237 
238       /* Walk attributes belonging to the binding */
239       do {
240          const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&attrmask);
241          const struct gl_array_attributes *const attrib
242             = _mesa_draw_array_attrib(vao, attr);
243          const GLuint off = _mesa_draw_attributes_relative_offset(attrib);
244          assert(POPCNT != POPCNT_INVALID);
245 
246          init_velement(velements->velems, &attrib->Format, off,
247                        binding->Stride, binding->InstanceDivisor, bufidx,
248                        dual_slot_inputs & BITFIELD_BIT(attr),
249                        util_bitcount_fast<POPCNT>(inputs_read &
250                                                   BITFIELD_MASK(attr)));
251       } while (attrmask);
252    }
253 }
254 
255 /* Only used by the select/feedback mode. */
256 void
st_setup_arrays(struct st_context * st,const struct gl_vertex_program * vp,const struct st_common_variant * vp_variant,struct cso_velems_state * velements,struct pipe_vertex_buffer * vbuffer,unsigned * num_vbuffers)257 st_setup_arrays(struct st_context *st,
258                 const struct gl_vertex_program *vp,
259                 const struct st_common_variant *vp_variant,
260                 struct cso_velems_state *velements,
261                 struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
262 {
263    struct gl_context *ctx = st->ctx;
264    GLbitfield enabled_arrays = _mesa_get_enabled_vertex_arrays(ctx);
265 
266    setup_arrays<POPCNT_NO, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_ON,
267                 ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
268                 USER_BUFFERS_ON, UPDATE_VELEMS_ON>
269       (ctx, ctx->Array._DrawVAO, vp->Base.DualSlotInputs,
270        vp_variant->vert_attrib_mask,
271        vp_variant->vert_attrib_mask & enabled_arrays,
272        velements, vbuffer, num_vbuffers);
273 }
274 
275 /* ALWAYS_INLINE helps the compiler realize that most of the parameters are
276  * on the stack.
277  *
278  * Return the index of the vertex buffer where current attribs have been
279  * uploaded.
280  */
281 template<util_popcnt POPCNT,
282          st_fill_tc_set_vb FILL_TC_SET_VB,
283          st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
st_setup_current(struct st_context * st,const GLbitfield dual_slot_inputs,const GLbitfield inputs_read,GLbitfield curmask,struct cso_velems_state * velements,struct pipe_vertex_buffer * vbuffer,unsigned * num_vbuffers)284 st_setup_current(struct st_context *st,
285                  const GLbitfield dual_slot_inputs,
286                  const GLbitfield inputs_read,
287                  GLbitfield curmask,
288                  struct cso_velems_state *velements,
289                  struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
290 {
291    /* Process values that should have better been uniforms in the application */
292    if (curmask) {
293       struct gl_context *ctx = st->ctx;
294       assert(POPCNT != POPCNT_INVALID);
295       unsigned num_attribs = util_bitcount_fast<POPCNT>(curmask);
296       unsigned num_dual_attribs = util_bitcount_fast<POPCNT>(curmask &
297                                                              dual_slot_inputs);
298       /* num_attribs includes num_dual_attribs, so adding num_dual_attribs
299        * doubles the size of those attribs.
300        */
301       unsigned max_size = (num_attribs + num_dual_attribs) * 16;
302 
303       const unsigned bufidx = (*num_vbuffers)++;
304       vbuffer[bufidx].is_user_buffer = false;
305       vbuffer[bufidx].buffer.resource = NULL;
306       /* vbuffer[bufidx].buffer_offset is set below */
307 
308       /* Use const_uploader for zero-stride vertex attributes, because
309        * it may use a better memory placement than stream_uploader.
310        * The reason is that zero-stride attributes can be fetched many
311        * times (thousands of times), so a better placement is going to
312        * perform better.
313        */
314       struct u_upload_mgr *uploader = st->can_bind_const_buffer_as_vertex ?
315                                       st->pipe->const_uploader :
316                                       st->pipe->stream_uploader;
317       uint8_t *ptr = NULL;
318 
319       u_upload_alloc(uploader, 0, max_size, 16,
320                      &vbuffer[bufidx].buffer_offset,
321                      &vbuffer[bufidx].buffer.resource, (void**)&ptr);
322       uint8_t *cursor = ptr;
323 
324       if (FILL_TC_SET_VB) {
325          struct pipe_context *pipe = ctx->pipe;
326          tc_track_vertex_buffer(pipe, bufidx, vbuffer[bufidx].buffer.resource,
327                                 tc_get_next_buffer_list(pipe));
328       }
329 
330       do {
331          const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&curmask);
332          const struct gl_array_attributes *const attrib
333             = _mesa_draw_current_attrib(ctx, attr);
334          const unsigned size = attrib->Format._ElementSize;
335 
336          /* When the current attribs are set (e.g. via glColor3ub or
337           * glVertexAttrib2s), they are always converted to float32 or int32
338           * or dual slots being 2x int32, so they are always dword-aligned.
339           * glBegin/End behaves in the same way. It's really an internal Mesa
340           * inefficiency that is convenient here, which is why this assertion
341           * is always true.
342           */
343          assert(size % 4 == 0); /* assume a hw-friendly alignment */
344          memcpy(cursor, attrib->Ptr, size);
345 
346          if (UPDATE_VELEMS) {
347             init_velement(velements->velems, &attrib->Format, cursor - ptr,
348                           0, 0, bufidx, dual_slot_inputs & BITFIELD_BIT(attr),
349                           util_bitcount_fast<POPCNT>(inputs_read &
350                                                      BITFIELD_MASK(attr)));
351          }
352 
353          cursor += size;
354       } while (curmask);
355 
356       /* Always unmap. The uploader might use explicit flushes. */
357       u_upload_unmap(uploader);
358    }
359 }
360 
361 /* Only used by the select/feedback mode. */
362 void
st_setup_current_user(struct st_context * st,const struct gl_vertex_program * vp,const struct st_common_variant * vp_variant,struct cso_velems_state * velements,struct pipe_vertex_buffer * vbuffer,unsigned * num_vbuffers)363 st_setup_current_user(struct st_context *st,
364                       const struct gl_vertex_program *vp,
365                       const struct st_common_variant *vp_variant,
366                       struct cso_velems_state *velements,
367                       struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
368 {
369    struct gl_context *ctx = st->ctx;
370    const GLbitfield enabled_arrays = _mesa_get_enabled_vertex_arrays(ctx);
371    const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
372    const GLbitfield dual_slot_inputs = vp->Base.DualSlotInputs;
373 
374    /* Process values that should have better been uniforms in the application */
375    GLbitfield curmask = inputs_read & ~enabled_arrays;
376    /* For each attribute, make an own user buffer binding. */
377    while (curmask) {
378       const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&curmask);
379       const struct gl_array_attributes *const attrib
380          = _mesa_draw_current_attrib(ctx, attr);
381       const unsigned bufidx = (*num_vbuffers)++;
382 
383       init_velement(velements->velems, &attrib->Format, 0, 0, 0,
384                     bufidx, dual_slot_inputs & BITFIELD_BIT(attr),
385                     util_bitcount(inputs_read & BITFIELD_MASK(attr)));
386 
387       vbuffer[bufidx].is_user_buffer = true;
388       vbuffer[bufidx].buffer.user = attrib->Ptr;
389       vbuffer[bufidx].buffer_offset = 0;
390    }
391 }
392 
393 template<util_popcnt POPCNT,
394          st_fill_tc_set_vb FILL_TC_SET_VB,
395          st_use_vao_fast_path USE_VAO_FAST_PATH,
396          st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
397          st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
398          st_allow_user_buffers ALLOW_USER_BUFFERS,
399          st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
st_update_array_templ(struct st_context * st,const GLbitfield enabled_arrays,const GLbitfield enabled_user_arrays,const GLbitfield nonzero_divisor_arrays)400 st_update_array_templ(struct st_context *st,
401                       const GLbitfield enabled_arrays,
402                       const GLbitfield enabled_user_arrays,
403                       const GLbitfield nonzero_divisor_arrays)
404 {
405    struct gl_context *ctx = st->ctx;
406 
407    /* vertex program validation must be done before this */
408    /* _NEW_PROGRAM, ST_NEW_VS_STATE */
409    const struct gl_vertex_program *vp =
410       (struct gl_vertex_program *)ctx->VertexProgram._Current;
411    const struct st_common_variant *vp_variant = st->vp_variant;
412    const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
413    const GLbitfield dual_slot_inputs = vp->Base.DualSlotInputs;
414    const GLbitfield userbuf_arrays =
415       ALLOW_USER_BUFFERS ? inputs_read & enabled_user_arrays : 0;
416    bool uses_user_vertex_buffers = userbuf_arrays != 0;
417 
418    st->draw_needs_minmax_index =
419       (userbuf_arrays & ~nonzero_divisor_arrays) != 0;
420 
421    struct pipe_vertex_buffer vbuffer_local[PIPE_MAX_ATTRIBS];
422    struct pipe_vertex_buffer *vbuffer;
423    unsigned num_vbuffers = 0, num_vbuffers_tc;
424    struct cso_velems_state velements;
425 
426    if (FILL_TC_SET_VB) {
427       assert(!uses_user_vertex_buffers);
428       assert(POPCNT != POPCNT_INVALID);
429       num_vbuffers_tc = util_bitcount_fast<POPCNT>(inputs_read &
430                                                    enabled_arrays);
431 
432       /* Add up to 1 vertex buffer for zero-stride vertex attribs. */
433       num_vbuffers_tc += ALLOW_ZERO_STRIDE_ATTRIBS &&
434                          inputs_read & ~enabled_arrays;
435       vbuffer = tc_add_set_vertex_buffers_call(st->pipe, num_vbuffers_tc);
436    } else {
437       vbuffer = vbuffer_local;
438    }
439 
440    /* ST_NEW_VERTEX_ARRAYS */
441    /* Setup arrays */
442    setup_arrays<POPCNT, FILL_TC_SET_VB, USE_VAO_FAST_PATH,
443                 ALLOW_ZERO_STRIDE_ATTRIBS, HAS_IDENTITY_ATTRIB_MAPPING,
444                 ALLOW_USER_BUFFERS, UPDATE_VELEMS>
445       (ctx, ctx->Array._DrawVAO, dual_slot_inputs, inputs_read,
446        inputs_read & enabled_arrays, &velements, vbuffer, &num_vbuffers);
447 
448    /* _NEW_CURRENT_ATTRIB */
449    /* Setup zero-stride attribs. */
450    if (ALLOW_ZERO_STRIDE_ATTRIBS) {
451       st_setup_current<POPCNT, FILL_TC_SET_VB, UPDATE_VELEMS>
452          (st, dual_slot_inputs, inputs_read, inputs_read & ~enabled_arrays,
453           &velements, vbuffer, &num_vbuffers);
454    } else {
455       assert(!(inputs_read & ~enabled_arrays));
456    }
457 
458    if (FILL_TC_SET_VB)
459          assert(num_vbuffers == num_vbuffers_tc);
460 
461    if (UPDATE_VELEMS) {
462       struct cso_context *cso = st->cso_context;
463       velements.count = vp->num_inputs + vp_variant->key.passthrough_edgeflags;
464 
465       /* Set vertex buffers and elements. */
466       if (FILL_TC_SET_VB) {
467          cso_set_vertex_elements(cso, &velements);
468       } else {
469          cso_set_vertex_buffers_and_elements(cso, &velements, num_vbuffers,
470                                              uses_user_vertex_buffers, vbuffer);
471       }
472       /* The driver should clear this after it has processed the update. */
473       ctx->Array.NewVertexElements = false;
474       st->uses_user_vertex_buffers = uses_user_vertex_buffers;
475    } else {
476       /* Only vertex buffers. */
477       if (!FILL_TC_SET_VB)
478          cso_set_vertex_buffers(st->cso_context, num_vbuffers, true, vbuffer);
479 
480       /* This can change only when we update vertex elements. */
481       assert(st->uses_user_vertex_buffers == uses_user_vertex_buffers);
482    }
483 }
484 
485 typedef void (*update_array_func)(struct st_context *st,
486                                   const GLbitfield enabled_arrays,
487                                   const GLbitfield enabled_user_attribs,
488                                   const GLbitfield nonzero_divisor_attribs);
489 
490 /* This just initializes the table of all st_update_array variants. */
491 struct st_update_array_table {
492    update_array_func funcs[2][2][2][2][2][2];
493 
494    template<util_popcnt POPCNT,
495             st_fill_tc_set_vb FILL_TC_SET_VB,
496             st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
497             st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
498             st_allow_user_buffers ALLOW_USER_BUFFERS,
499             st_update_velems UPDATE_VELEMS>
init_onest_update_array_table500    void init_one()
501    {
502       /* These conditions reduce the number of compiled variants. */
503       /* The TC path is only valid without user buffers.
504        */
505       constexpr st_fill_tc_set_vb fill_tc_set_vb =
506          !ALLOW_USER_BUFFERS ? FILL_TC_SET_VB : FILL_TC_SET_VB_OFF;
507 
508       /* POPCNT is unused without zero-stride attribs and without TC. */
509       constexpr util_popcnt popcnt =
510          !ALLOW_ZERO_STRIDE_ATTRIBS && !fill_tc_set_vb ?
511             POPCNT_INVALID : POPCNT;
512 
513       funcs[POPCNT][FILL_TC_SET_VB][ALLOW_ZERO_STRIDE_ATTRIBS]
514            [HAS_IDENTITY_ATTRIB_MAPPING][ALLOW_USER_BUFFERS][UPDATE_VELEMS] =
515          st_update_array_templ<
516             popcnt,
517             fill_tc_set_vb,
518             VAO_FAST_PATH_ON,
519             ALLOW_ZERO_STRIDE_ATTRIBS,
520             HAS_IDENTITY_ATTRIB_MAPPING,
521             ALLOW_USER_BUFFERS,
522             UPDATE_VELEMS>;
523    }
524 
525    /* We have to do this in stages because of the combinatorial explosion of
526     * variants.
527     */
528    template<util_popcnt POPCNT,
529             st_fill_tc_set_vb FILL_TC_SET_VB,
530             st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS>
init_last_3_argsst_update_array_table531    void init_last_3_args()
532    {
533       init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
534                IDENTITY_ATTRIB_MAPPING_OFF, USER_BUFFERS_OFF,
535                UPDATE_VELEMS_OFF>();
536       init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
537                IDENTITY_ATTRIB_MAPPING_OFF,
538                USER_BUFFERS_OFF, UPDATE_VELEMS_ON>();
539       init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
540                IDENTITY_ATTRIB_MAPPING_OFF,
541                USER_BUFFERS_ON,  UPDATE_VELEMS_OFF>();
542       init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
543                IDENTITY_ATTRIB_MAPPING_OFF,
544                USER_BUFFERS_ON,  UPDATE_VELEMS_ON>();
545       init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
546                IDENTITY_ATTRIB_MAPPING_ON,
547                USER_BUFFERS_OFF, UPDATE_VELEMS_OFF>();
548       init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
549                IDENTITY_ATTRIB_MAPPING_ON,
550                USER_BUFFERS_OFF, UPDATE_VELEMS_ON>();
551       init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
552                IDENTITY_ATTRIB_MAPPING_ON,
553                USER_BUFFERS_ON,  UPDATE_VELEMS_OFF>();
554       init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
555                IDENTITY_ATTRIB_MAPPING_ON,
556                USER_BUFFERS_ON,  UPDATE_VELEMS_ON>();
557    }
558 
st_update_array_tablest_update_array_table559    st_update_array_table()
560    {
561       init_last_3_args<POPCNT_NO,  FILL_TC_SET_VB_OFF,
562                        ZERO_STRIDE_ATTRIBS_OFF>();
563       init_last_3_args<POPCNT_NO,  FILL_TC_SET_VB_OFF,
564                        ZERO_STRIDE_ATTRIBS_ON>();
565       init_last_3_args<POPCNT_NO,  FILL_TC_SET_VB_ON,
566                        ZERO_STRIDE_ATTRIBS_OFF>();
567       init_last_3_args<POPCNT_NO,  FILL_TC_SET_VB_ON,
568                        ZERO_STRIDE_ATTRIBS_ON>();
569       init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_OFF,
570                        ZERO_STRIDE_ATTRIBS_OFF>();
571       init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_OFF,
572                        ZERO_STRIDE_ATTRIBS_ON>();
573       init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_ON,
574                        ZERO_STRIDE_ATTRIBS_OFF>();
575       init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_ON,
576                        ZERO_STRIDE_ATTRIBS_ON>();
577    }
578 };
579 
580 static st_update_array_table update_array_table;
581 
582 template<util_popcnt POPCNT,
583          st_use_vao_fast_path USE_VAO_FAST_PATH> void ALWAYS_INLINE
st_update_array_impl(struct st_context * st)584 st_update_array_impl(struct st_context *st)
585 {
586    struct gl_context *ctx = st->ctx;
587    struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
588    const GLbitfield enabled_arrays = _mesa_get_enabled_vertex_arrays(ctx);
589    GLbitfield enabled_user_arrays;
590    GLbitfield nonzero_divisor_arrays;
591 
592    assert(vao->_EnabledWithMapMode ==
593           _mesa_vao_enable_to_vp_inputs(vao->_AttributeMapMode, vao->Enabled));
594 
595    if (!USE_VAO_FAST_PATH && !vao->SharedAndImmutable)
596       _mesa_update_vao_derived_arrays(ctx, vao, false);
597 
598    _mesa_get_derived_vao_masks(ctx, enabled_arrays, &enabled_user_arrays,
599                                &nonzero_divisor_arrays);
600 
601    /* Execute the slow path without using multiple C++ template variants. */
602    if (!USE_VAO_FAST_PATH) {
603       st_update_array_templ<POPCNT, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_OFF,
604                             ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
605                             USER_BUFFERS_ON, UPDATE_VELEMS_ON>
606          (st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
607       return;
608    }
609 
610    /* The fast path that selects from multiple C++ template variants. */
611    const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
612    const GLbitfield enabled_arrays_read = inputs_read & enabled_arrays;
613 
614    /* Check cso_context whether it goes directly to TC. */
615    bool fill_tc_set_vbs = st->cso_context->draw_vbo == tc_draw_vbo;
616    bool has_zero_stride_attribs = inputs_read & ~enabled_arrays;
617    uint32_t non_identity_attrib_mapping =
618       vao->_AttributeMapMode == ATTRIBUTE_MAP_MODE_IDENTITY ? 0 :
619       vao->_AttributeMapMode == ATTRIBUTE_MAP_MODE_POSITION ? VERT_BIT_GENERIC0
620                                                             : VERT_BIT_POS;
621    bool has_identity_mapping = !(enabled_arrays_read &
622                                  (vao->NonIdentityBufferAttribMapping |
623                                   non_identity_attrib_mapping));
624    /* has_user_buffers is always false with glthread. */
625    bool has_user_buffers = inputs_read & enabled_user_arrays;
626    /* Changing from user to non-user buffers and vice versa can switch between
627     * cso and u_vbuf, which means that we need to update vertex elements even
628     * when they have not changed.
629     */
630    bool update_velems = ctx->Array.NewVertexElements ||
631                         st->uses_user_vertex_buffers != has_user_buffers;
632 
633    update_array_table.funcs[POPCNT][fill_tc_set_vbs][has_zero_stride_attribs]
634                            [has_identity_mapping][has_user_buffers]
635                            [update_velems]
636       (st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
637 }
638 
639 /* The default callback that must be present before st_init_update_array
640  * selects the driver-dependent variant.
641  */
642 void
st_update_array(struct st_context * st)643 st_update_array(struct st_context *st)
644 {
645    unreachable("st_init_update_array not called");
646 }
647 
648 void
st_init_update_array(struct st_context * st)649 st_init_update_array(struct st_context *st)
650 {
651    st_update_func_t *func = &st->update_functions[ST_NEW_VERTEX_ARRAYS_INDEX];
652 
653    if (util_get_cpu_caps()->has_popcnt) {
654       if (st->ctx->Const.UseVAOFastPath)
655          *func = st_update_array_impl<POPCNT_YES, VAO_FAST_PATH_ON>;
656       else
657          *func = st_update_array_impl<POPCNT_YES, VAO_FAST_PATH_OFF>;
658    } else {
659       if (st->ctx->Const.UseVAOFastPath)
660          *func = st_update_array_impl<POPCNT_NO, VAO_FAST_PATH_ON>;
661       else
662          *func = st_update_array_impl<POPCNT_NO, VAO_FAST_PATH_OFF>;
663    }
664 }
665 
666 struct pipe_vertex_state *
st_create_gallium_vertex_state(struct gl_context * ctx,const struct gl_vertex_array_object * vao,struct gl_buffer_object * indexbuf,uint32_t enabled_arrays)667 st_create_gallium_vertex_state(struct gl_context *ctx,
668                                const struct gl_vertex_array_object *vao,
669                                struct gl_buffer_object *indexbuf,
670                                uint32_t enabled_arrays)
671 {
672    struct st_context *st = st_context(ctx);
673    const GLbitfield inputs_read = enabled_arrays;
674    const GLbitfield dual_slot_inputs = 0; /* always zero */
675    struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
676    unsigned num_vbuffers = 0;
677    struct cso_velems_state velements;
678 
679    /* This should use the slow path because there is only 1 interleaved
680     * vertex buffers.
681     */
682    setup_arrays<POPCNT_NO, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_OFF,
683                 ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
684                 USER_BUFFERS_ON, UPDATE_VELEMS_ON>
685       (ctx, vao, dual_slot_inputs, inputs_read, inputs_read, &velements,
686        vbuffer, &num_vbuffers);
687 
688    if (num_vbuffers != 1) {
689       assert(!"this should never happen with display lists");
690       return NULL;
691    }
692 
693    velements.count = util_bitcount(inputs_read);
694 
695    struct pipe_screen *screen = st->screen;
696    struct pipe_vertex_state *state =
697       screen->create_vertex_state(screen, &vbuffer[0], velements.velems,
698                                   velements.count,
699                                   indexbuf ?
700                                   indexbuf->buffer : NULL,
701                                   enabled_arrays);
702 
703    for (unsigned i = 0; i < num_vbuffers; i++)
704       pipe_vertex_buffer_unreference(&vbuffer[i]);
705    return state;
706 }
707