1 /*
2 * Copyright (c) 2014-2024 Broadcom. All Rights Reserved.
3 * The term “Broadcom” refers to Broadcom Inc.
4 * and/or its subsidiaries.
5 * SPDX-License-Identifier: MIT
6 */
7
8 #include "util/u_memory.h"
9 #include "util/u_bitmask.h"
10
11 #include "svga_cmd.h"
12 #include "svga_context.h"
13 #include "svga_resource_buffer.h"
14 #include "svga_shader.h"
15 #include "svga_debug.h"
16 #include "svga_streamout.h"
17
18 struct svga_stream_output_target {
19 struct pipe_stream_output_target base;
20 };
21
22 /** cast wrapper */
23 static inline struct svga_stream_output_target *
svga_stream_output_target(struct pipe_stream_output_target * s)24 svga_stream_output_target(struct pipe_stream_output_target *s)
25 {
26 return (struct svga_stream_output_target *)s;
27 }
28
29
30 /**
31 * A helper function to send different version of the DefineStreamOutput command
32 * depending on if device is SM5 capable or not.
33 */
34 static enum pipe_error
svga_define_stream_output(struct svga_context * svga,SVGA3dStreamOutputId soid,uint32 numOutputStreamEntries,uint32 numOutputStreamStrides,uint32 streamStrides[SVGA3D_DX_MAX_SOTARGETS],const SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS],uint32 rasterizedStream,struct svga_stream_output * streamout)35 svga_define_stream_output(struct svga_context *svga,
36 SVGA3dStreamOutputId soid,
37 uint32 numOutputStreamEntries,
38 uint32 numOutputStreamStrides,
39 uint32 streamStrides[SVGA3D_DX_MAX_SOTARGETS],
40 const SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS],
41 uint32 rasterizedStream,
42 struct svga_stream_output *streamout)
43 {
44 unsigned i;
45
46 SVGA_DBG(DEBUG_STREAMOUT, "%s: id=%d\n", __func__, soid);
47 SVGA_DBG(DEBUG_STREAMOUT,
48 "numOutputStreamEntires=%d\n", numOutputStreamEntries);
49
50 for (i = 0; i < numOutputStreamEntries; i++) {
51 SVGA_DBG(DEBUG_STREAMOUT,
52 " %d: slot=%d regIdx=%d regMask=0x%x stream=%d\n",
53 i, decls[i].outputSlot, decls[i].registerIndex,
54 decls[i].registerMask, decls[i].stream);
55 }
56
57 SVGA_DBG(DEBUG_STREAMOUT,
58 "numOutputStreamStrides=%d\n", numOutputStreamStrides);
59 for (i = 0; i < numOutputStreamStrides; i++) {
60 SVGA_DBG(DEBUG_STREAMOUT, " %d ", streamStrides[i]);
61 }
62 SVGA_DBG(DEBUG_STREAMOUT, "\n");
63
64 if (svga_have_sm5(svga) &&
65 (numOutputStreamEntries > SVGA3D_MAX_DX10_STREAMOUT_DECLS ||
66 numOutputStreamStrides > 1)) {
67 unsigned bufSize = sizeof(SVGA3dStreamOutputDeclarationEntry)
68 * numOutputStreamEntries;
69 struct svga_winsys_buffer *declBuf;
70 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
71 void *map;
72
73 declBuf = svga_winsys_buffer_create(svga, 1, SVGA_BUFFER_USAGE_PINNED,
74 bufSize);
75 if (!declBuf)
76 return PIPE_ERROR;
77 map = sws->buffer_map(sws, declBuf, PIPE_MAP_WRITE);
78 if (!map) {
79 sws->buffer_destroy(sws, declBuf);
80 return PIPE_ERROR;
81 }
82
83 /* copy decls to buffer */
84 memcpy(map, decls, bufSize);
85
86 /* unmap buffer */
87 sws->buffer_unmap(sws, declBuf);
88 streamout->declBuf = declBuf;
89
90 SVGA_RETRY(svga, SVGA3D_sm5_DefineAndBindStreamOutput
91 (svga->swc, soid,
92 numOutputStreamEntries,
93 numOutputStreamStrides,
94 streamStrides,
95 streamout->declBuf,
96 rasterizedStream,
97 bufSize));
98 } else {
99 SVGA_RETRY(svga, SVGA3D_vgpu10_DefineStreamOutput(svga->swc, soid,
100 numOutputStreamEntries,
101 streamStrides,
102 decls));
103 }
104
105 return PIPE_OK;
106 }
107
108
109 /**
110 * Creates stream output from the stream output info.
111 */
112 struct svga_stream_output *
svga_create_stream_output(struct svga_context * svga,struct svga_shader * shader,const struct pipe_stream_output_info * info)113 svga_create_stream_output(struct svga_context *svga,
114 struct svga_shader *shader,
115 const struct pipe_stream_output_info *info)
116 {
117 struct svga_stream_output *streamout;
118 SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS];
119 unsigned strides[SVGA3D_DX_MAX_SOTARGETS];
120 unsigned dstOffset[SVGA3D_DX_MAX_SOTARGETS];
121 unsigned numStreamStrides = 0;
122 unsigned numDecls;
123 unsigned i;
124 enum pipe_error ret;
125 unsigned id;
126 ASSERTED unsigned maxDecls = 0;
127
128 assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS);
129
130 /* Gallium utility creates shaders with stream output.
131 * For non-DX10, just return NULL.
132 */
133 if (!svga_have_vgpu10(svga))
134 return NULL;
135
136 if (svga_have_sm5(svga))
137 maxDecls = SVGA3D_MAX_STREAMOUT_DECLS;
138 else if (svga_have_vgpu10(svga))
139 maxDecls = SVGA3D_MAX_DX10_STREAMOUT_DECLS;
140
141 assert(info->num_outputs <= maxDecls);
142
143 /* Allocate an integer ID for the stream output */
144 id = util_bitmask_add(svga->stream_output_id_bm);
145 if (id == UTIL_BITMASK_INVALID_INDEX) {
146 return NULL;
147 }
148
149 /* Allocate the streamout data structure */
150 streamout = CALLOC_STRUCT(svga_stream_output);
151
152 if (!streamout)
153 return NULL;
154
155 streamout->info = *info;
156 streamout->id = id;
157 streamout->pos_out_index = -1;
158 streamout->streammask = 0;
159
160 /* Init whole decls and stride arrays to zero to avoid garbage values */
161 memset(decls, 0, sizeof(decls));
162 memset(strides, 0, sizeof(strides));
163 memset(dstOffset, 0, sizeof(dstOffset));
164
165 SVGA_DBG(DEBUG_STREAMOUT, "%s: num_outputs=%d\n",
166 __func__, info->num_outputs);
167
168 for (i = 0, numDecls = 0; i < info->num_outputs; i++, numDecls++) {
169 unsigned reg_idx = info->output[i].register_index;
170 unsigned buf_idx = info->output[i].output_buffer;
171 const enum tgsi_semantic sem_name =
172 shader->tgsi_info.output_semantic_name[reg_idx];
173
174 assert(buf_idx <= PIPE_MAX_SO_BUFFERS);
175
176 numStreamStrides = MAX2(numStreamStrides, buf_idx);
177
178 SVGA_DBG(DEBUG_STREAMOUT,
179 " %d: register_index=%d output_buffer=%d stream=%d\n",
180 i, reg_idx, buf_idx, info->output[i].stream);
181
182 SVGA_DBG(DEBUG_STREAMOUT,
183 " dst_offset=%d start_component=%d num_components=%d\n",
184 info->output[i].dst_offset,
185 info->output[i].start_component,
186 info->output[i].num_components);
187
188 streamout->buffer_stream |= info->output[i].stream << (buf_idx * 4);
189
190 /**
191 * Check if the destination offset of the current output
192 * is at the expected offset. If it is greater, then that means
193 * there is a gap in the stream output. We need to insert
194 * extra declaration entries with an invalid register index
195 * to specify a gap.
196 */
197 while (info->output[i].dst_offset > dstOffset[buf_idx]) {
198
199 unsigned numComponents = info->output[i].dst_offset -
200 dstOffset[buf_idx];;
201
202 assert(svga_have_sm5(svga));
203
204 /* We can only specify at most 4 components to skip in each
205 * declaration entry.
206 */
207 numComponents = numComponents > 4 ? 4 : numComponents;
208
209 decls[numDecls].outputSlot = buf_idx,
210 decls[numDecls].stream = info->output[i].stream;
211 decls[numDecls].registerIndex = SVGA3D_INVALID_ID;
212 decls[numDecls].registerMask = (1 << numComponents) - 1;
213
214 dstOffset[buf_idx] += numComponents;
215 numDecls++;
216 }
217
218 if (sem_name == TGSI_SEMANTIC_POSITION) {
219 /**
220 * Check if streaming out POSITION. If so, replace the
221 * register index with the index for NON_ADJUSTED POSITION.
222 */
223 decls[numDecls].registerIndex = shader->tgsi_info.num_outputs;
224
225 /* Save this output index, so we can tell later if this stream output
226 * includes an output of a vertex position
227 */
228 streamout->pos_out_index = numDecls;
229 }
230 else if (sem_name == TGSI_SEMANTIC_CLIPDIST) {
231 /**
232 * Use the shadow copy for clip distance because
233 * CLIPDIST instruction is only emitted for enabled clip planes.
234 * It's valid to write to ClipDistance variable for non-enabled
235 * clip planes.
236 */
237 decls[numDecls].registerIndex =
238 shader->tgsi_info.num_outputs + 1 +
239 shader->tgsi_info.output_semantic_index[reg_idx];
240 }
241 else {
242 decls[numDecls].registerIndex = reg_idx;
243 }
244
245 decls[numDecls].outputSlot = buf_idx;
246 decls[numDecls].registerMask =
247 ((1 << info->output[i].num_components) - 1)
248 << info->output[i].start_component;
249
250 decls[numDecls].stream = info->output[i].stream;
251 assert(decls[numDecls].stream == 0 || svga_have_sm5(svga));
252
253 /* Set the bit in streammask for the enabled stream */
254 streamout->streammask |= 1 << info->output[i].stream;
255
256 /* Update the expected offset for the next output */
257 dstOffset[buf_idx] += info->output[i].num_components;
258
259 strides[buf_idx] = info->stride[buf_idx] * sizeof(float);
260 }
261
262 assert(numDecls <= maxDecls);
263
264 /* Send the DefineStreamOutput command.
265 * Note, rasterizedStream is always 0.
266 */
267 ret = svga_define_stream_output(svga, id,
268 numDecls, numStreamStrides+1,
269 strides, decls, 0, streamout);
270
271 if (ret != PIPE_OK) {
272 util_bitmask_clear(svga->stream_output_id_bm, id);
273 FREE(streamout);
274 streamout = NULL;
275 }
276 return streamout;
277 }
278
279
280 enum pipe_error
svga_set_stream_output(struct svga_context * svga,struct svga_stream_output * streamout)281 svga_set_stream_output(struct svga_context *svga,
282 struct svga_stream_output *streamout)
283 {
284 unsigned id = streamout ? streamout->id : SVGA3D_INVALID_ID;
285
286 if (!svga_have_vgpu10(svga)) {
287 return PIPE_OK;
288 }
289
290 SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x id=%d\n", __func__,
291 streamout, id);
292
293 if (svga->current_so != streamout) {
294
295 /* Before unbinding the current stream output, stop the stream output
296 * statistics queries for the active streams.
297 */
298 if (svga_have_sm5(svga) && svga->current_so) {
299 svga->vcount_buffer_stream = svga->current_so->buffer_stream;
300 svga_end_stream_output_queries(svga, svga->current_so->streammask);
301 }
302
303 enum pipe_error ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);
304 if (ret != PIPE_OK) {
305 return ret;
306 }
307
308 svga->current_so = streamout;
309
310 /* After binding the new stream output, start the stream output
311 * statistics queries for the active streams.
312 */
313 if (svga_have_sm5(svga) && svga->current_so) {
314 svga_begin_stream_output_queries(svga, svga->current_so->streammask);
315 }
316 }
317
318 return PIPE_OK;
319 }
320
321 void
svga_delete_stream_output(struct svga_context * svga,struct svga_stream_output * streamout)322 svga_delete_stream_output(struct svga_context *svga,
323 struct svga_stream_output *streamout)
324 {
325 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
326
327 SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __func__, streamout);
328
329 assert(svga_have_vgpu10(svga));
330 assert(streamout != NULL);
331
332 SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyStreamOutput(svga->swc,
333 streamout->id));
334
335 if (svga_have_sm5(svga) && streamout->declBuf) {
336 sws->buffer_destroy(sws, streamout->declBuf);
337 }
338
339 /* Before deleting the current streamout, make sure to stop any pending
340 * SO queries.
341 */
342 if (svga->current_so == streamout) {
343 if (svga->in_streamout)
344 svga_end_stream_output_queries(svga, svga->current_so->streammask);
345 svga->current_so = NULL;
346 }
347
348 /* Release the ID */
349 util_bitmask_clear(svga->stream_output_id_bm, streamout->id);
350
351 /* Free streamout structure */
352 FREE(streamout);
353 }
354
355
356 static struct pipe_stream_output_target *
svga_create_stream_output_target(struct pipe_context * pipe,struct pipe_resource * buffer,unsigned buffer_offset,unsigned buffer_size)357 svga_create_stream_output_target(struct pipe_context *pipe,
358 struct pipe_resource *buffer,
359 unsigned buffer_offset,
360 unsigned buffer_size)
361 {
362 struct svga_context *svga = svga_context(pipe);
363 struct svga_stream_output_target *sot;
364
365 SVGA_DBG(DEBUG_STREAMOUT, "%s offset=%d size=%d\n", __func__,
366 buffer_offset, buffer_size);
367
368 assert(svga_have_vgpu10(svga));
369 (void) svga;
370
371 sot = CALLOC_STRUCT(svga_stream_output_target);
372 if (!sot)
373 return NULL;
374
375 pipe_reference_init(&sot->base.reference, 1);
376 pipe_resource_reference(&sot->base.buffer, buffer);
377 sot->base.context = pipe;
378 sot->base.buffer = buffer;
379 sot->base.buffer_offset = buffer_offset;
380 sot->base.buffer_size = buffer_size;
381
382 return &sot->base;
383 }
384
385 static void
svga_destroy_stream_output_target(struct pipe_context * pipe,struct pipe_stream_output_target * target)386 svga_destroy_stream_output_target(struct pipe_context *pipe,
387 struct pipe_stream_output_target *target)
388 {
389 struct svga_stream_output_target *sot = svga_stream_output_target(target);
390
391 SVGA_DBG(DEBUG_STREAMOUT, "%s\n", __func__);
392
393 pipe_resource_reference(&sot->base.buffer, NULL);
394 FREE(sot);
395 }
396
397 static void
svga_set_stream_output_targets(struct pipe_context * pipe,unsigned num_targets,struct pipe_stream_output_target ** targets,const unsigned * offsets)398 svga_set_stream_output_targets(struct pipe_context *pipe,
399 unsigned num_targets,
400 struct pipe_stream_output_target **targets,
401 const unsigned *offsets)
402 {
403 struct svga_context *svga = svga_context(pipe);
404 struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS];
405 unsigned i;
406 unsigned num_so_targets;
407 bool begin_so_queries = num_targets > 0;
408
409 SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __func__,
410 num_targets);
411
412 assert(svga_have_vgpu10(svga));
413
414 /* Mark the streamout buffers as dirty so that we'll issue readbacks
415 * before mapping.
416 */
417 for (i = 0; i < svga->num_so_targets; i++) {
418 struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer);
419 sbuf->dirty = true;
420 }
421
422 /* Before the currently bound streamout targets are unbound,
423 * save them in case they need to be referenced to retrieve the
424 * number of vertices being streamed out.
425 */
426 for (i = 0; i < ARRAY_SIZE(svga->so_targets); i++) {
427 svga->vcount_so_targets[i] = svga->so_targets[i];
428 }
429
430 assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS);
431
432 for (i = 0; i < num_targets; i++) {
433 struct svga_stream_output_target *sot
434 = svga_stream_output_target(targets[i]);
435 struct svga_buffer *sbuf = svga_buffer(sot->base.buffer);
436 unsigned size;
437
438 svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer,
439 PIPE_BIND_STREAM_OUTPUT);
440
441 assert(svga_buffer(sot->base.buffer)->key.flags
442 & SVGA3D_SURFACE_BIND_STREAM_OUTPUT);
443
444 /* Mark the buffer surface as RENDERED */
445 assert(sbuf->bufsurf);
446 sbuf->bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED;
447
448 svga->so_targets[i] = &sot->base;
449 if (offsets[i] == -1) {
450 soBindings[i].offset = -1;
451
452 /* The streamout is being resumed. There is no need to restart streamout statistics
453 * queries for the draw-auto fallback since those queries are still active.
454 */
455 begin_so_queries = false;
456 }
457 else
458 soBindings[i].offset = sot->base.buffer_offset + offsets[i];
459
460 /* The size cannot extend beyond the end of the buffer. Clamp it. */
461 size = MIN2(sot->base.buffer_size,
462 sot->base.buffer->width0 - sot->base.buffer_offset);
463
464 soBindings[i].sizeInBytes = size;
465 }
466
467 /* unbind any previously bound stream output buffers */
468 for (; i < svga->num_so_targets; i++) {
469 svga->so_surfaces[i] = NULL;
470 svga->so_targets[i] = NULL;
471 }
472
473 num_so_targets = MAX2(svga->num_so_targets, num_targets);
474 SVGA_RETRY(svga, SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
475 soBindings, svga->so_surfaces));
476 svga->num_so_targets = num_targets;
477
478 if (svga_have_sm5(svga) && svga->current_so && begin_so_queries) {
479
480 /* If there are already active queries and we need to start a new streamout,
481 * we need to stop the current active queries first.
482 */
483 if (svga->in_streamout) {
484 svga_end_stream_output_queries(svga, svga->current_so->streammask);
485 }
486
487 /* Start stream out statistics queries for the new streamout */
488 svga_begin_stream_output_queries(svga, svga->current_so->streammask);
489 }
490 }
491
492 /**
493 * Rebind stream output target surfaces
494 */
495 enum pipe_error
svga_rebind_stream_output_targets(struct svga_context * svga)496 svga_rebind_stream_output_targets(struct svga_context *svga)
497 {
498 struct svga_winsys_context *swc = svga->swc;
499 enum pipe_error ret;
500 unsigned i;
501
502 for (i = 0; i < svga->num_so_targets; i++) {
503 ret = swc->resource_rebind(swc, svga->so_surfaces[i], NULL, SVGA_RELOC_WRITE);
504 if (ret != PIPE_OK)
505 return ret;
506 }
507
508 return PIPE_OK;
509 }
510
511
512 void
svga_init_stream_output_functions(struct svga_context * svga)513 svga_init_stream_output_functions(struct svga_context *svga)
514 {
515 svga->pipe.create_stream_output_target = svga_create_stream_output_target;
516 svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target;
517 svga->pipe.set_stream_output_targets = svga_set_stream_output_targets;
518 }
519
520
521 /**
522 * A helper function to create stream output statistics queries for each stream.
523 * These queries are created as a workaround for DrawTransformFeedbackInstanced or
524 * DrawTransformFeedbackStreamInstanced when auto draw doesn't support
525 * instancing or non-0 stream. In this case, the vertex count will
526 * be retrieved from the stream output statistics query.
527 */
528 void
svga_create_stream_output_queries(struct svga_context * svga)529 svga_create_stream_output_queries(struct svga_context *svga)
530 {
531 unsigned i;
532
533 if (!svga_have_sm5(svga))
534 return;
535
536 for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
537 svga->so_queries[i] = svga->pipe.create_query(&svga->pipe,
538 PIPE_QUERY_SO_STATISTICS, i);
539 assert(svga->so_queries[i] != NULL);
540 }
541 }
542
543
544 /**
545 * Destroy the stream output statistics queries for the draw-auto workaround.
546 */
547 void
svga_destroy_stream_output_queries(struct svga_context * svga)548 svga_destroy_stream_output_queries(struct svga_context *svga)
549 {
550 unsigned i;
551
552 if (!svga_have_sm5(svga))
553 return;
554
555 for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
556 svga->pipe.destroy_query(&svga->pipe, svga->so_queries[i]);
557 }
558 }
559
560
561 /**
562 * Start stream output statistics queries for the active streams.
563 */
564 void
svga_begin_stream_output_queries(struct svga_context * svga,unsigned streammask)565 svga_begin_stream_output_queries(struct svga_context *svga,
566 unsigned streammask)
567 {
568 assert(svga_have_sm5(svga));
569 assert(!svga->in_streamout);
570
571 for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
572 bool ret;
573 if (streammask & (1 << i)) {
574 ret = svga->pipe.begin_query(&svga->pipe, svga->so_queries[i]);
575 }
576 (void) ret;
577 }
578 svga->in_streamout = true;
579
580 return;
581 }
582
583
584 /**
585 * Stop stream output statistics queries for the active streams.
586 */
587 void
svga_end_stream_output_queries(struct svga_context * svga,unsigned streammask)588 svga_end_stream_output_queries(struct svga_context *svga,
589 unsigned streammask)
590 {
591 assert(svga_have_sm5(svga));
592
593 if (!svga->in_streamout)
594 return;
595
596 for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
597 bool ret;
598 if (streammask & (1 << i)) {
599 ret = svga->pipe.end_query(&svga->pipe, svga->so_queries[i]);
600 }
601 (void) ret;
602 }
603 svga->in_streamout = false;
604
605 return;
606 }
607
608
609 /**
610 * Return the primitive count returned from the stream output statistics query
611 * for the specified stream.
612 */
613 unsigned
svga_get_primcount_from_stream_output(struct svga_context * svga,unsigned stream)614 svga_get_primcount_from_stream_output(struct svga_context *svga,
615 unsigned stream)
616 {
617 unsigned primcount = 0;
618 union pipe_query_result result;
619 bool ret;
620
621 if (svga->current_so) {
622 svga_end_stream_output_queries(svga, svga->current_so->streammask);
623 }
624
625 ret = svga->pipe.get_query_result(&svga->pipe,
626 svga->so_queries[stream],
627 true, &result);
628 if (ret)
629 primcount = result.so_statistics.num_primitives_written;
630
631 return primcount;
632 }
633