1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * Copyright 2012 Marek Olšák <[email protected]>
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /*
30 * This converts the VBO's vertex attribute/array information into
31 * Gallium vertex state and binds it.
32 *
33 * Authors:
34 * Keith Whitwell <[email protected]>
35 * Marek Olšák <[email protected]>
36 */
37
38 #include "st_context.h"
39 #include "st_atom.h"
40 #include "st_draw.h"
41 #include "st_program.h"
42
43 #include "cso_cache/cso_context.h"
44 #include "util/u_cpu_detect.h"
45 #include "util/u_math.h"
46 #include "util/u_upload_mgr.h"
47 #include "util/u_threaded_context.h"
48 #include "main/bufferobj.h"
49 #include "main/glformats.h"
50 #include "main/varray.h"
51 #include "main/arrayobj.h"
52
53 enum st_fill_tc_set_vb {
54 FILL_TC_SET_VB_OFF, /* always works */
55 FILL_TC_SET_VB_ON, /* specialized version (faster) */
56 };
57
58 enum st_use_vao_fast_path {
59 VAO_FAST_PATH_OFF, /* more complicated version (slower) */
60 VAO_FAST_PATH_ON, /* always works (faster) */
61 };
62
63 enum st_allow_zero_stride_attribs {
64 ZERO_STRIDE_ATTRIBS_OFF, /* specialized version (faster) */
65 ZERO_STRIDE_ATTRIBS_ON, /* always works */
66 };
67
68 /* Whether vertex attrib indices are equal to their vertex buffer indices. */
69 enum st_identity_attrib_mapping {
70 IDENTITY_ATTRIB_MAPPING_OFF, /* always works */
71 IDENTITY_ATTRIB_MAPPING_ON, /* specialized version (faster) */
72 };
73
74 enum st_allow_user_buffers {
75 USER_BUFFERS_OFF, /* specialized version (faster) */
76 USER_BUFFERS_ON, /* always works */
77 };
78
79 enum st_update_velems {
80 UPDATE_VELEMS_OFF, /* specialized version (faster) */
81 UPDATE_VELEMS_ON, /* always works */
82 };
83
84 /* Always inline the non-64bit element code, so that the compiler can see
85 * that velements is on the stack.
86 */
87 static void ALWAYS_INLINE
init_velement(struct pipe_vertex_element * velements,const struct gl_vertex_format * vformat,int src_offset,unsigned src_stride,unsigned instance_divisor,int vbo_index,bool dual_slot,int idx)88 init_velement(struct pipe_vertex_element *velements,
89 const struct gl_vertex_format *vformat,
90 int src_offset, unsigned src_stride,
91 unsigned instance_divisor,
92 int vbo_index, bool dual_slot, int idx)
93 {
94 velements[idx].src_offset = src_offset;
95 velements[idx].src_stride = src_stride;
96 velements[idx].src_format = vformat->_PipeFormat;
97 velements[idx].instance_divisor = instance_divisor;
98 velements[idx].vertex_buffer_index = vbo_index;
99 velements[idx].dual_slot = dual_slot;
100 assert(velements[idx].src_format);
101 }
102
103 /* ALWAYS_INLINE helps the compiler realize that most of the parameters are
104 * on the stack.
105 */
106 template<util_popcnt POPCNT,
107 st_fill_tc_set_vb FILL_TC_SET_VB,
108 st_use_vao_fast_path USE_VAO_FAST_PATH,
109 st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
110 st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
111 st_allow_user_buffers ALLOW_USER_BUFFERS,
112 st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
setup_arrays(struct gl_context * ctx,const struct gl_vertex_array_object * vao,const GLbitfield dual_slot_inputs,const GLbitfield inputs_read,GLbitfield mask,struct cso_velems_state * velements,struct pipe_vertex_buffer * vbuffer,unsigned * num_vbuffers)113 setup_arrays(struct gl_context *ctx,
114 const struct gl_vertex_array_object *vao,
115 const GLbitfield dual_slot_inputs,
116 const GLbitfield inputs_read,
117 GLbitfield mask,
118 struct cso_velems_state *velements,
119 struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
120 {
121 /* Set up enabled vertex arrays. */
122 if (USE_VAO_FAST_PATH) {
123 const GLubyte *attribute_map =
124 !HAS_IDENTITY_ATTRIB_MAPPING ?
125 _mesa_vao_attribute_map[vao->_AttributeMapMode] : NULL;
126 struct pipe_context *pipe = ctx->pipe;
127 struct tc_buffer_list *next_buffer_list = NULL;
128
129 if (FILL_TC_SET_VB)
130 next_buffer_list = tc_get_next_buffer_list(pipe);
131
132 /* Note: I did try to unroll this loop by passing the number of
133 * iterations as a template parameter, but it resulted in more overhead.
134 */
135 while (mask) {
136 const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&mask);
137 const struct gl_array_attributes *attrib;
138 const struct gl_vertex_buffer_binding *binding;
139
140 if (HAS_IDENTITY_ATTRIB_MAPPING) {
141 attrib = &vao->VertexAttrib[attr];
142 binding = &vao->BufferBinding[attr];
143 } else {
144 attrib = &vao->VertexAttrib[attribute_map[attr]];
145 binding = &vao->BufferBinding[attrib->BufferBindingIndex];
146 }
147 const unsigned bufidx = (*num_vbuffers)++;
148
149 /* Set the vertex buffer. */
150 if (!ALLOW_USER_BUFFERS || binding->BufferObj) {
151 assert(binding->BufferObj);
152 struct pipe_resource *buf =
153 _mesa_get_bufferobj_reference(ctx, binding->BufferObj);
154 vbuffer[bufidx].buffer.resource = buf;
155 vbuffer[bufidx].is_user_buffer = false;
156 vbuffer[bufidx].buffer_offset = binding->Offset +
157 attrib->RelativeOffset;
158 if (FILL_TC_SET_VB)
159 tc_track_vertex_buffer(pipe, bufidx, buf, next_buffer_list);
160 } else {
161 vbuffer[bufidx].buffer.user = attrib->Ptr;
162 vbuffer[bufidx].is_user_buffer = true;
163 vbuffer[bufidx].buffer_offset = 0;
164 assert(!FILL_TC_SET_VB);
165 }
166
167 if (!UPDATE_VELEMS)
168 continue;
169
170 /* Determine the vertex element index without popcnt
171 * if !ALLOW_ZERO_STRIDE_ATTRIBS, which means that we don't need
172 * to leave any holes for zero-stride attribs, thus the mapping from
173 * vertex elements to vertex buffers is identity.
174 */
175 unsigned index;
176
177 if (ALLOW_ZERO_STRIDE_ATTRIBS) {
178 assert(POPCNT != POPCNT_INVALID);
179 index = util_bitcount_fast<POPCNT>(inputs_read &
180 BITFIELD_MASK(attr));
181 } else {
182 index = bufidx;
183 assert(index == util_bitcount(inputs_read &
184 BITFIELD_MASK(attr)));
185 }
186
187 /* Set the vertex element. */
188 init_velement(velements->velems, &attrib->Format, 0, binding->Stride,
189 binding->InstanceDivisor, bufidx,
190 dual_slot_inputs & BITFIELD_BIT(attr), index);
191 }
192 return;
193 }
194
195 /* The slow path needs more fields initialized, which is not done if it's
196 * disabled.
197 */
198 assert(!ctx->Const.UseVAOFastPath || vao->SharedAndImmutable);
199
200 /* Require these because we don't use them here and we don't want to
201 * generate identical template variants.
202 */
203 assert(!FILL_TC_SET_VB);
204 assert(ALLOW_ZERO_STRIDE_ATTRIBS);
205 assert(!HAS_IDENTITY_ATTRIB_MAPPING);
206 assert(ALLOW_USER_BUFFERS);
207 assert(UPDATE_VELEMS);
208
209 while (mask) {
210 /* The attribute index to start pulling a binding */
211 const gl_vert_attrib i = (gl_vert_attrib)(ffs(mask) - 1);
212 const struct gl_vertex_buffer_binding *const binding
213 = _mesa_draw_buffer_binding(vao, i);
214 const unsigned bufidx = (*num_vbuffers)++;
215
216 if (binding->BufferObj) {
217 /* Set the binding */
218 vbuffer[bufidx].buffer.resource =
219 _mesa_get_bufferobj_reference(ctx, binding->BufferObj);
220 vbuffer[bufidx].is_user_buffer = false;
221 vbuffer[bufidx].buffer_offset = _mesa_draw_binding_offset(binding);
222 } else {
223 /* Set the binding */
224 const void *ptr = (const void *)_mesa_draw_binding_offset(binding);
225 vbuffer[bufidx].buffer.user = ptr;
226 vbuffer[bufidx].is_user_buffer = true;
227 vbuffer[bufidx].buffer_offset = 0;
228 }
229
230 const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(binding);
231 GLbitfield attrmask = mask & boundmask;
232 /* Mark the those attributes as processed */
233 mask &= ~boundmask;
234 /* We can assume that we have array for the binding */
235 assert(attrmask);
236
237
238 /* Walk attributes belonging to the binding */
239 do {
240 const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&attrmask);
241 const struct gl_array_attributes *const attrib
242 = _mesa_draw_array_attrib(vao, attr);
243 const GLuint off = _mesa_draw_attributes_relative_offset(attrib);
244 assert(POPCNT != POPCNT_INVALID);
245
246 init_velement(velements->velems, &attrib->Format, off,
247 binding->Stride, binding->InstanceDivisor, bufidx,
248 dual_slot_inputs & BITFIELD_BIT(attr),
249 util_bitcount_fast<POPCNT>(inputs_read &
250 BITFIELD_MASK(attr)));
251 } while (attrmask);
252 }
253 }
254
255 /* Only used by the select/feedback mode. */
256 void
st_setup_arrays(struct st_context * st,const struct gl_vertex_program * vp,const struct st_common_variant * vp_variant,struct cso_velems_state * velements,struct pipe_vertex_buffer * vbuffer,unsigned * num_vbuffers)257 st_setup_arrays(struct st_context *st,
258 const struct gl_vertex_program *vp,
259 const struct st_common_variant *vp_variant,
260 struct cso_velems_state *velements,
261 struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
262 {
263 struct gl_context *ctx = st->ctx;
264 GLbitfield enabled_arrays = _mesa_get_enabled_vertex_arrays(ctx);
265
266 setup_arrays<POPCNT_NO, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_ON,
267 ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
268 USER_BUFFERS_ON, UPDATE_VELEMS_ON>
269 (ctx, ctx->Array._DrawVAO, vp->Base.DualSlotInputs,
270 vp_variant->vert_attrib_mask,
271 vp_variant->vert_attrib_mask & enabled_arrays,
272 velements, vbuffer, num_vbuffers);
273 }
274
275 /* ALWAYS_INLINE helps the compiler realize that most of the parameters are
276 * on the stack.
277 *
278 * Return the index of the vertex buffer where current attribs have been
279 * uploaded.
280 */
281 template<util_popcnt POPCNT,
282 st_fill_tc_set_vb FILL_TC_SET_VB,
283 st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
st_setup_current(struct st_context * st,const GLbitfield dual_slot_inputs,const GLbitfield inputs_read,GLbitfield curmask,struct cso_velems_state * velements,struct pipe_vertex_buffer * vbuffer,unsigned * num_vbuffers)284 st_setup_current(struct st_context *st,
285 const GLbitfield dual_slot_inputs,
286 const GLbitfield inputs_read,
287 GLbitfield curmask,
288 struct cso_velems_state *velements,
289 struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
290 {
291 /* Process values that should have better been uniforms in the application */
292 if (curmask) {
293 struct gl_context *ctx = st->ctx;
294 assert(POPCNT != POPCNT_INVALID);
295 unsigned num_attribs = util_bitcount_fast<POPCNT>(curmask);
296 unsigned num_dual_attribs = util_bitcount_fast<POPCNT>(curmask &
297 dual_slot_inputs);
298 /* num_attribs includes num_dual_attribs, so adding num_dual_attribs
299 * doubles the size of those attribs.
300 */
301 unsigned max_size = (num_attribs + num_dual_attribs) * 16;
302
303 const unsigned bufidx = (*num_vbuffers)++;
304 vbuffer[bufidx].is_user_buffer = false;
305 vbuffer[bufidx].buffer.resource = NULL;
306 /* vbuffer[bufidx].buffer_offset is set below */
307
308 /* Use const_uploader for zero-stride vertex attributes, because
309 * it may use a better memory placement than stream_uploader.
310 * The reason is that zero-stride attributes can be fetched many
311 * times (thousands of times), so a better placement is going to
312 * perform better.
313 */
314 struct u_upload_mgr *uploader = st->can_bind_const_buffer_as_vertex ?
315 st->pipe->const_uploader :
316 st->pipe->stream_uploader;
317 uint8_t *ptr = NULL;
318
319 u_upload_alloc(uploader, 0, max_size, 16,
320 &vbuffer[bufidx].buffer_offset,
321 &vbuffer[bufidx].buffer.resource, (void**)&ptr);
322 uint8_t *cursor = ptr;
323
324 if (FILL_TC_SET_VB) {
325 struct pipe_context *pipe = ctx->pipe;
326 tc_track_vertex_buffer(pipe, bufidx, vbuffer[bufidx].buffer.resource,
327 tc_get_next_buffer_list(pipe));
328 }
329
330 do {
331 const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&curmask);
332 const struct gl_array_attributes *const attrib
333 = _mesa_draw_current_attrib(ctx, attr);
334 const unsigned size = attrib->Format._ElementSize;
335
336 /* When the current attribs are set (e.g. via glColor3ub or
337 * glVertexAttrib2s), they are always converted to float32 or int32
338 * or dual slots being 2x int32, so they are always dword-aligned.
339 * glBegin/End behaves in the same way. It's really an internal Mesa
340 * inefficiency that is convenient here, which is why this assertion
341 * is always true.
342 */
343 assert(size % 4 == 0); /* assume a hw-friendly alignment */
344 memcpy(cursor, attrib->Ptr, size);
345
346 if (UPDATE_VELEMS) {
347 init_velement(velements->velems, &attrib->Format, cursor - ptr,
348 0, 0, bufidx, dual_slot_inputs & BITFIELD_BIT(attr),
349 util_bitcount_fast<POPCNT>(inputs_read &
350 BITFIELD_MASK(attr)));
351 }
352
353 cursor += size;
354 } while (curmask);
355
356 /* Always unmap. The uploader might use explicit flushes. */
357 u_upload_unmap(uploader);
358 }
359 }
360
361 /* Only used by the select/feedback mode. */
362 void
st_setup_current_user(struct st_context * st,const struct gl_vertex_program * vp,const struct st_common_variant * vp_variant,struct cso_velems_state * velements,struct pipe_vertex_buffer * vbuffer,unsigned * num_vbuffers)363 st_setup_current_user(struct st_context *st,
364 const struct gl_vertex_program *vp,
365 const struct st_common_variant *vp_variant,
366 struct cso_velems_state *velements,
367 struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
368 {
369 struct gl_context *ctx = st->ctx;
370 const GLbitfield enabled_arrays = _mesa_get_enabled_vertex_arrays(ctx);
371 const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
372 const GLbitfield dual_slot_inputs = vp->Base.DualSlotInputs;
373
374 /* Process values that should have better been uniforms in the application */
375 GLbitfield curmask = inputs_read & ~enabled_arrays;
376 /* For each attribute, make an own user buffer binding. */
377 while (curmask) {
378 const gl_vert_attrib attr = (gl_vert_attrib)u_bit_scan(&curmask);
379 const struct gl_array_attributes *const attrib
380 = _mesa_draw_current_attrib(ctx, attr);
381 const unsigned bufidx = (*num_vbuffers)++;
382
383 init_velement(velements->velems, &attrib->Format, 0, 0, 0,
384 bufidx, dual_slot_inputs & BITFIELD_BIT(attr),
385 util_bitcount(inputs_read & BITFIELD_MASK(attr)));
386
387 vbuffer[bufidx].is_user_buffer = true;
388 vbuffer[bufidx].buffer.user = attrib->Ptr;
389 vbuffer[bufidx].buffer_offset = 0;
390 }
391 }
392
393 template<util_popcnt POPCNT,
394 st_fill_tc_set_vb FILL_TC_SET_VB,
395 st_use_vao_fast_path USE_VAO_FAST_PATH,
396 st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
397 st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
398 st_allow_user_buffers ALLOW_USER_BUFFERS,
399 st_update_velems UPDATE_VELEMS> void ALWAYS_INLINE
st_update_array_templ(struct st_context * st,const GLbitfield enabled_arrays,const GLbitfield enabled_user_arrays,const GLbitfield nonzero_divisor_arrays)400 st_update_array_templ(struct st_context *st,
401 const GLbitfield enabled_arrays,
402 const GLbitfield enabled_user_arrays,
403 const GLbitfield nonzero_divisor_arrays)
404 {
405 struct gl_context *ctx = st->ctx;
406
407 /* vertex program validation must be done before this */
408 /* _NEW_PROGRAM, ST_NEW_VS_STATE */
409 const struct gl_vertex_program *vp =
410 (struct gl_vertex_program *)ctx->VertexProgram._Current;
411 const struct st_common_variant *vp_variant = st->vp_variant;
412 const GLbitfield inputs_read = vp_variant->vert_attrib_mask;
413 const GLbitfield dual_slot_inputs = vp->Base.DualSlotInputs;
414 const GLbitfield userbuf_arrays =
415 ALLOW_USER_BUFFERS ? inputs_read & enabled_user_arrays : 0;
416 bool uses_user_vertex_buffers = userbuf_arrays != 0;
417
418 st->draw_needs_minmax_index =
419 (userbuf_arrays & ~nonzero_divisor_arrays) != 0;
420
421 struct pipe_vertex_buffer vbuffer_local[PIPE_MAX_ATTRIBS];
422 struct pipe_vertex_buffer *vbuffer;
423 unsigned num_vbuffers = 0, num_vbuffers_tc;
424 struct cso_velems_state velements;
425
426 if (FILL_TC_SET_VB) {
427 assert(!uses_user_vertex_buffers);
428 assert(POPCNT != POPCNT_INVALID);
429 num_vbuffers_tc = util_bitcount_fast<POPCNT>(inputs_read &
430 enabled_arrays);
431
432 /* Add up to 1 vertex buffer for zero-stride vertex attribs. */
433 num_vbuffers_tc += ALLOW_ZERO_STRIDE_ATTRIBS &&
434 inputs_read & ~enabled_arrays;
435 vbuffer = tc_add_set_vertex_buffers_call(st->pipe, num_vbuffers_tc);
436 } else {
437 vbuffer = vbuffer_local;
438 }
439
440 /* ST_NEW_VERTEX_ARRAYS */
441 /* Setup arrays */
442 setup_arrays<POPCNT, FILL_TC_SET_VB, USE_VAO_FAST_PATH,
443 ALLOW_ZERO_STRIDE_ATTRIBS, HAS_IDENTITY_ATTRIB_MAPPING,
444 ALLOW_USER_BUFFERS, UPDATE_VELEMS>
445 (ctx, ctx->Array._DrawVAO, dual_slot_inputs, inputs_read,
446 inputs_read & enabled_arrays, &velements, vbuffer, &num_vbuffers);
447
448 /* _NEW_CURRENT_ATTRIB */
449 /* Setup zero-stride attribs. */
450 if (ALLOW_ZERO_STRIDE_ATTRIBS) {
451 st_setup_current<POPCNT, FILL_TC_SET_VB, UPDATE_VELEMS>
452 (st, dual_slot_inputs, inputs_read, inputs_read & ~enabled_arrays,
453 &velements, vbuffer, &num_vbuffers);
454 } else {
455 assert(!(inputs_read & ~enabled_arrays));
456 }
457
458 if (FILL_TC_SET_VB)
459 assert(num_vbuffers == num_vbuffers_tc);
460
461 if (UPDATE_VELEMS) {
462 struct cso_context *cso = st->cso_context;
463 velements.count = vp->num_inputs + vp_variant->key.passthrough_edgeflags;
464
465 /* Set vertex buffers and elements. */
466 if (FILL_TC_SET_VB) {
467 cso_set_vertex_elements(cso, &velements);
468 } else {
469 cso_set_vertex_buffers_and_elements(cso, &velements, num_vbuffers,
470 uses_user_vertex_buffers, vbuffer);
471 }
472 /* The driver should clear this after it has processed the update. */
473 ctx->Array.NewVertexElements = false;
474 st->uses_user_vertex_buffers = uses_user_vertex_buffers;
475 } else {
476 /* Only vertex buffers. */
477 if (!FILL_TC_SET_VB)
478 cso_set_vertex_buffers(st->cso_context, num_vbuffers, true, vbuffer);
479
480 /* This can change only when we update vertex elements. */
481 assert(st->uses_user_vertex_buffers == uses_user_vertex_buffers);
482 }
483 }
484
485 typedef void (*update_array_func)(struct st_context *st,
486 const GLbitfield enabled_arrays,
487 const GLbitfield enabled_user_attribs,
488 const GLbitfield nonzero_divisor_attribs);
489
490 /* This just initializes the table of all st_update_array variants. */
491 struct st_update_array_table {
492 update_array_func funcs[2][2][2][2][2][2];
493
494 template<util_popcnt POPCNT,
495 st_fill_tc_set_vb FILL_TC_SET_VB,
496 st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS,
497 st_identity_attrib_mapping HAS_IDENTITY_ATTRIB_MAPPING,
498 st_allow_user_buffers ALLOW_USER_BUFFERS,
499 st_update_velems UPDATE_VELEMS>
init_onest_update_array_table500 void init_one()
501 {
502 /* These conditions reduce the number of compiled variants. */
503 /* The TC path is only valid without user buffers.
504 */
505 constexpr st_fill_tc_set_vb fill_tc_set_vb =
506 !ALLOW_USER_BUFFERS ? FILL_TC_SET_VB : FILL_TC_SET_VB_OFF;
507
508 /* POPCNT is unused without zero-stride attribs and without TC. */
509 constexpr util_popcnt popcnt =
510 !ALLOW_ZERO_STRIDE_ATTRIBS && !fill_tc_set_vb ?
511 POPCNT_INVALID : POPCNT;
512
513 funcs[POPCNT][FILL_TC_SET_VB][ALLOW_ZERO_STRIDE_ATTRIBS]
514 [HAS_IDENTITY_ATTRIB_MAPPING][ALLOW_USER_BUFFERS][UPDATE_VELEMS] =
515 st_update_array_templ<
516 popcnt,
517 fill_tc_set_vb,
518 VAO_FAST_PATH_ON,
519 ALLOW_ZERO_STRIDE_ATTRIBS,
520 HAS_IDENTITY_ATTRIB_MAPPING,
521 ALLOW_USER_BUFFERS,
522 UPDATE_VELEMS>;
523 }
524
525 /* We have to do this in stages because of the combinatorial explosion of
526 * variants.
527 */
528 template<util_popcnt POPCNT,
529 st_fill_tc_set_vb FILL_TC_SET_VB,
530 st_allow_zero_stride_attribs ALLOW_ZERO_STRIDE_ATTRIBS>
init_last_3_argsst_update_array_table531 void init_last_3_args()
532 {
533 init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
534 IDENTITY_ATTRIB_MAPPING_OFF, USER_BUFFERS_OFF,
535 UPDATE_VELEMS_OFF>();
536 init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
537 IDENTITY_ATTRIB_MAPPING_OFF,
538 USER_BUFFERS_OFF, UPDATE_VELEMS_ON>();
539 init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
540 IDENTITY_ATTRIB_MAPPING_OFF,
541 USER_BUFFERS_ON, UPDATE_VELEMS_OFF>();
542 init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
543 IDENTITY_ATTRIB_MAPPING_OFF,
544 USER_BUFFERS_ON, UPDATE_VELEMS_ON>();
545 init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
546 IDENTITY_ATTRIB_MAPPING_ON,
547 USER_BUFFERS_OFF, UPDATE_VELEMS_OFF>();
548 init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
549 IDENTITY_ATTRIB_MAPPING_ON,
550 USER_BUFFERS_OFF, UPDATE_VELEMS_ON>();
551 init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
552 IDENTITY_ATTRIB_MAPPING_ON,
553 USER_BUFFERS_ON, UPDATE_VELEMS_OFF>();
554 init_one<POPCNT, FILL_TC_SET_VB, ALLOW_ZERO_STRIDE_ATTRIBS,
555 IDENTITY_ATTRIB_MAPPING_ON,
556 USER_BUFFERS_ON, UPDATE_VELEMS_ON>();
557 }
558
st_update_array_tablest_update_array_table559 st_update_array_table()
560 {
561 init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_OFF,
562 ZERO_STRIDE_ATTRIBS_OFF>();
563 init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_OFF,
564 ZERO_STRIDE_ATTRIBS_ON>();
565 init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_ON,
566 ZERO_STRIDE_ATTRIBS_OFF>();
567 init_last_3_args<POPCNT_NO, FILL_TC_SET_VB_ON,
568 ZERO_STRIDE_ATTRIBS_ON>();
569 init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_OFF,
570 ZERO_STRIDE_ATTRIBS_OFF>();
571 init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_OFF,
572 ZERO_STRIDE_ATTRIBS_ON>();
573 init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_ON,
574 ZERO_STRIDE_ATTRIBS_OFF>();
575 init_last_3_args<POPCNT_YES, FILL_TC_SET_VB_ON,
576 ZERO_STRIDE_ATTRIBS_ON>();
577 }
578 };
579
580 static st_update_array_table update_array_table;
581
582 template<util_popcnt POPCNT,
583 st_use_vao_fast_path USE_VAO_FAST_PATH> void ALWAYS_INLINE
st_update_array_impl(struct st_context * st)584 st_update_array_impl(struct st_context *st)
585 {
586 struct gl_context *ctx = st->ctx;
587 struct gl_vertex_array_object *vao = ctx->Array._DrawVAO;
588 const GLbitfield enabled_arrays = _mesa_get_enabled_vertex_arrays(ctx);
589 GLbitfield enabled_user_arrays;
590 GLbitfield nonzero_divisor_arrays;
591
592 assert(vao->_EnabledWithMapMode ==
593 _mesa_vao_enable_to_vp_inputs(vao->_AttributeMapMode, vao->Enabled));
594
595 if (!USE_VAO_FAST_PATH && !vao->SharedAndImmutable)
596 _mesa_update_vao_derived_arrays(ctx, vao, false);
597
598 _mesa_get_derived_vao_masks(ctx, enabled_arrays, &enabled_user_arrays,
599 &nonzero_divisor_arrays);
600
601 /* Execute the slow path without using multiple C++ template variants. */
602 if (!USE_VAO_FAST_PATH) {
603 st_update_array_templ<POPCNT, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_OFF,
604 ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
605 USER_BUFFERS_ON, UPDATE_VELEMS_ON>
606 (st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
607 return;
608 }
609
610 /* The fast path that selects from multiple C++ template variants. */
611 const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask;
612 const GLbitfield enabled_arrays_read = inputs_read & enabled_arrays;
613
614 /* Check cso_context whether it goes directly to TC. */
615 bool fill_tc_set_vbs = st->cso_context->draw_vbo == tc_draw_vbo;
616 bool has_zero_stride_attribs = inputs_read & ~enabled_arrays;
617 uint32_t non_identity_attrib_mapping =
618 vao->_AttributeMapMode == ATTRIBUTE_MAP_MODE_IDENTITY ? 0 :
619 vao->_AttributeMapMode == ATTRIBUTE_MAP_MODE_POSITION ? VERT_BIT_GENERIC0
620 : VERT_BIT_POS;
621 bool has_identity_mapping = !(enabled_arrays_read &
622 (vao->NonIdentityBufferAttribMapping |
623 non_identity_attrib_mapping));
624 /* has_user_buffers is always false with glthread. */
625 bool has_user_buffers = inputs_read & enabled_user_arrays;
626 /* Changing from user to non-user buffers and vice versa can switch between
627 * cso and u_vbuf, which means that we need to update vertex elements even
628 * when they have not changed.
629 */
630 bool update_velems = ctx->Array.NewVertexElements ||
631 st->uses_user_vertex_buffers != has_user_buffers;
632
633 update_array_table.funcs[POPCNT][fill_tc_set_vbs][has_zero_stride_attribs]
634 [has_identity_mapping][has_user_buffers]
635 [update_velems]
636 (st, enabled_arrays, enabled_user_arrays, nonzero_divisor_arrays);
637 }
638
639 /* The default callback that must be present before st_init_update_array
640 * selects the driver-dependent variant.
641 */
642 void
st_update_array(struct st_context * st)643 st_update_array(struct st_context *st)
644 {
645 unreachable("st_init_update_array not called");
646 }
647
648 void
st_init_update_array(struct st_context * st)649 st_init_update_array(struct st_context *st)
650 {
651 st_update_func_t *func = &st->update_functions[ST_NEW_VERTEX_ARRAYS_INDEX];
652
653 if (util_get_cpu_caps()->has_popcnt) {
654 if (st->ctx->Const.UseVAOFastPath)
655 *func = st_update_array_impl<POPCNT_YES, VAO_FAST_PATH_ON>;
656 else
657 *func = st_update_array_impl<POPCNT_YES, VAO_FAST_PATH_OFF>;
658 } else {
659 if (st->ctx->Const.UseVAOFastPath)
660 *func = st_update_array_impl<POPCNT_NO, VAO_FAST_PATH_ON>;
661 else
662 *func = st_update_array_impl<POPCNT_NO, VAO_FAST_PATH_OFF>;
663 }
664 }
665
666 struct pipe_vertex_state *
st_create_gallium_vertex_state(struct gl_context * ctx,const struct gl_vertex_array_object * vao,struct gl_buffer_object * indexbuf,uint32_t enabled_arrays)667 st_create_gallium_vertex_state(struct gl_context *ctx,
668 const struct gl_vertex_array_object *vao,
669 struct gl_buffer_object *indexbuf,
670 uint32_t enabled_arrays)
671 {
672 struct st_context *st = st_context(ctx);
673 const GLbitfield inputs_read = enabled_arrays;
674 const GLbitfield dual_slot_inputs = 0; /* always zero */
675 struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
676 unsigned num_vbuffers = 0;
677 struct cso_velems_state velements;
678
679 /* This should use the slow path because there is only 1 interleaved
680 * vertex buffers.
681 */
682 setup_arrays<POPCNT_NO, FILL_TC_SET_VB_OFF, VAO_FAST_PATH_OFF,
683 ZERO_STRIDE_ATTRIBS_ON, IDENTITY_ATTRIB_MAPPING_OFF,
684 USER_BUFFERS_ON, UPDATE_VELEMS_ON>
685 (ctx, vao, dual_slot_inputs, inputs_read, inputs_read, &velements,
686 vbuffer, &num_vbuffers);
687
688 if (num_vbuffers != 1) {
689 assert(!"this should never happen with display lists");
690 return NULL;
691 }
692
693 velements.count = util_bitcount(inputs_read);
694
695 struct pipe_screen *screen = st->screen;
696 struct pipe_vertex_state *state =
697 screen->create_vertex_state(screen, &vbuffer[0], velements.velems,
698 velements.count,
699 indexbuf ?
700 indexbuf->buffer : NULL,
701 enabled_arrays);
702
703 for (unsigned i = 0; i < num_vbuffers; i++)
704 pipe_vertex_buffer_unreference(&vbuffer[i]);
705 return state;
706 }
707