xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/svga/svga_pipe_streamout.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (c) 2014-2024 Broadcom. All Rights Reserved.
3  * The term “Broadcom” refers to Broadcom Inc.
4  * and/or its subsidiaries.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include "util/u_memory.h"
9 #include "util/u_bitmask.h"
10 
11 #include "svga_cmd.h"
12 #include "svga_context.h"
13 #include "svga_resource_buffer.h"
14 #include "svga_shader.h"
15 #include "svga_debug.h"
16 #include "svga_streamout.h"
17 
18 struct svga_stream_output_target {
19    struct pipe_stream_output_target base;
20 };
21 
22 /** cast wrapper */
23 static inline struct svga_stream_output_target *
svga_stream_output_target(struct pipe_stream_output_target * s)24 svga_stream_output_target(struct pipe_stream_output_target *s)
25 {
26    return (struct svga_stream_output_target *)s;
27 }
28 
29 
30 /**
31  * A helper function to send different version of the DefineStreamOutput command
32  * depending on if device is SM5 capable or not.
33  */
34 static enum pipe_error
svga_define_stream_output(struct svga_context * svga,SVGA3dStreamOutputId soid,uint32 numOutputStreamEntries,uint32 numOutputStreamStrides,uint32 streamStrides[SVGA3D_DX_MAX_SOTARGETS],const SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS],uint32 rasterizedStream,struct svga_stream_output * streamout)35 svga_define_stream_output(struct svga_context *svga,
36        SVGA3dStreamOutputId soid,
37        uint32 numOutputStreamEntries,
38        uint32 numOutputStreamStrides,
39        uint32 streamStrides[SVGA3D_DX_MAX_SOTARGETS],
40        const SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS],
41        uint32 rasterizedStream,
42        struct svga_stream_output *streamout)
43 {
44    unsigned i;
45 
46    SVGA_DBG(DEBUG_STREAMOUT, "%s: id=%d\n", __func__, soid);
47    SVGA_DBG(DEBUG_STREAMOUT,
48             "numOutputStreamEntires=%d\n", numOutputStreamEntries);
49 
50    for (i = 0; i < numOutputStreamEntries; i++) {
51       SVGA_DBG(DEBUG_STREAMOUT,
52                "  %d: slot=%d regIdx=%d regMask=0x%x stream=%d\n",
53                i, decls[i].outputSlot, decls[i].registerIndex,
54                decls[i].registerMask, decls[i].stream);
55    }
56 
57    SVGA_DBG(DEBUG_STREAMOUT,
58             "numOutputStreamStrides=%d\n", numOutputStreamStrides);
59    for (i = 0; i < numOutputStreamStrides; i++) {
60       SVGA_DBG(DEBUG_STREAMOUT, "  %d ", streamStrides[i]);
61    }
62    SVGA_DBG(DEBUG_STREAMOUT, "\n");
63 
64    if (svga_have_sm5(svga) &&
65        (numOutputStreamEntries > SVGA3D_MAX_DX10_STREAMOUT_DECLS ||
66         numOutputStreamStrides > 1)) {
67       unsigned bufSize = sizeof(SVGA3dStreamOutputDeclarationEntry)
68          * numOutputStreamEntries;
69       struct svga_winsys_buffer *declBuf;
70       struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
71       void *map;
72 
73       declBuf = svga_winsys_buffer_create(svga, 1, SVGA_BUFFER_USAGE_PINNED,
74                                           bufSize);
75       if (!declBuf)
76          return PIPE_ERROR;
77       map = sws->buffer_map(sws, declBuf, PIPE_MAP_WRITE);
78       if (!map) {
79          sws->buffer_destroy(sws, declBuf);
80          return PIPE_ERROR;
81       }
82 
83       /* copy decls to buffer */
84       memcpy(map, decls, bufSize);
85 
86       /* unmap buffer */
87       sws->buffer_unmap(sws, declBuf);
88       streamout->declBuf = declBuf;
89 
90       SVGA_RETRY(svga, SVGA3D_sm5_DefineAndBindStreamOutput
91                  (svga->swc, soid,
92                   numOutputStreamEntries,
93                   numOutputStreamStrides,
94                   streamStrides,
95                   streamout->declBuf,
96                   rasterizedStream,
97                   bufSize));
98    } else {
99       SVGA_RETRY(svga, SVGA3D_vgpu10_DefineStreamOutput(svga->swc, soid,
100                                                         numOutputStreamEntries,
101                                                         streamStrides,
102                                                         decls));
103    }
104 
105    return PIPE_OK;
106 }
107 
108 
109 /**
110  * Creates stream output from the stream output info.
111  */
112 struct svga_stream_output *
svga_create_stream_output(struct svga_context * svga,struct svga_shader * shader,const struct pipe_stream_output_info * info)113 svga_create_stream_output(struct svga_context *svga,
114                           struct svga_shader *shader,
115                           const struct pipe_stream_output_info *info)
116 {
117    struct svga_stream_output *streamout;
118    SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS];
119    unsigned strides[SVGA3D_DX_MAX_SOTARGETS];
120    unsigned dstOffset[SVGA3D_DX_MAX_SOTARGETS];
121    unsigned numStreamStrides = 0;
122    unsigned numDecls;
123    unsigned i;
124    enum pipe_error ret;
125    unsigned id;
126    ASSERTED unsigned maxDecls = 0;
127 
128    assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS);
129 
130    /* Gallium utility creates shaders with stream output.
131     * For non-DX10, just return NULL.
132     */
133    if (!svga_have_vgpu10(svga))
134       return NULL;
135 
136    if (svga_have_sm5(svga))
137       maxDecls = SVGA3D_MAX_STREAMOUT_DECLS;
138    else if (svga_have_vgpu10(svga))
139       maxDecls = SVGA3D_MAX_DX10_STREAMOUT_DECLS;
140 
141    assert(info->num_outputs <= maxDecls);
142 
143    /* Allocate an integer ID for the stream output */
144    id = util_bitmask_add(svga->stream_output_id_bm);
145    if (id == UTIL_BITMASK_INVALID_INDEX) {
146       return NULL;
147    }
148 
149    /* Allocate the streamout data structure */
150    streamout = CALLOC_STRUCT(svga_stream_output);
151 
152    if (!streamout)
153       return NULL;
154 
155    streamout->info = *info;
156    streamout->id = id;
157    streamout->pos_out_index = -1;
158    streamout->streammask = 0;
159 
160    /* Init whole decls and stride arrays to zero to avoid garbage values */
161    memset(decls, 0, sizeof(decls));
162    memset(strides, 0, sizeof(strides));
163    memset(dstOffset, 0, sizeof(dstOffset));
164 
165    SVGA_DBG(DEBUG_STREAMOUT, "%s: num_outputs=%d\n",
166             __func__, info->num_outputs);
167 
168    for (i = 0, numDecls = 0; i < info->num_outputs; i++, numDecls++) {
169       unsigned reg_idx = info->output[i].register_index;
170       unsigned buf_idx = info->output[i].output_buffer;
171       const enum tgsi_semantic sem_name =
172          shader->tgsi_info.output_semantic_name[reg_idx];
173 
174       assert(buf_idx <= PIPE_MAX_SO_BUFFERS);
175 
176       numStreamStrides = MAX2(numStreamStrides, buf_idx);
177 
178       SVGA_DBG(DEBUG_STREAMOUT,
179                "  %d: register_index=%d output_buffer=%d stream=%d\n",
180                i, reg_idx, buf_idx, info->output[i].stream);
181 
182       SVGA_DBG(DEBUG_STREAMOUT,
183                "     dst_offset=%d start_component=%d num_components=%d\n",
184                info->output[i].dst_offset,
185                info->output[i].start_component,
186                info->output[i].num_components);
187 
188       streamout->buffer_stream |= info->output[i].stream << (buf_idx * 4);
189 
190       /**
191        * Check if the destination offset of the current output
192        * is at the expected offset. If it is greater, then that means
193        * there is a gap in the stream output. We need to insert
194        * extra declaration entries with an invalid register index
195        * to specify a gap.
196        */
197       while (info->output[i].dst_offset > dstOffset[buf_idx]) {
198 
199          unsigned numComponents = info->output[i].dst_offset -
200                                   dstOffset[buf_idx];;
201 
202          assert(svga_have_sm5(svga));
203 
204          /* We can only specify at most 4 components to skip in each
205           * declaration entry.
206           */
207          numComponents = numComponents > 4 ? 4 : numComponents;
208 
209          decls[numDecls].outputSlot = buf_idx,
210          decls[numDecls].stream = info->output[i].stream;
211          decls[numDecls].registerIndex = SVGA3D_INVALID_ID;
212          decls[numDecls].registerMask = (1 << numComponents) - 1;
213 
214          dstOffset[buf_idx] += numComponents;
215          numDecls++;
216       }
217 
218       if (sem_name == TGSI_SEMANTIC_POSITION) {
219          /**
220           * Check if streaming out POSITION. If so, replace the
221           * register index with the index for NON_ADJUSTED POSITION.
222           */
223          decls[numDecls].registerIndex = shader->tgsi_info.num_outputs;
224 
225          /* Save this output index, so we can tell later if this stream output
226           * includes an output of a vertex position
227           */
228          streamout->pos_out_index = numDecls;
229       }
230       else if (sem_name == TGSI_SEMANTIC_CLIPDIST) {
231          /**
232           * Use the shadow copy for clip distance because
233           * CLIPDIST instruction is only emitted for enabled clip planes.
234           * It's valid to write to ClipDistance variable for non-enabled
235           * clip planes.
236           */
237          decls[numDecls].registerIndex =
238             shader->tgsi_info.num_outputs + 1 +
239             shader->tgsi_info.output_semantic_index[reg_idx];
240       }
241       else {
242          decls[numDecls].registerIndex = reg_idx;
243       }
244 
245       decls[numDecls].outputSlot = buf_idx;
246       decls[numDecls].registerMask =
247          ((1 << info->output[i].num_components) - 1)
248             << info->output[i].start_component;
249 
250       decls[numDecls].stream = info->output[i].stream;
251       assert(decls[numDecls].stream == 0 || svga_have_sm5(svga));
252 
253       /* Set the bit in streammask for the enabled stream */
254       streamout->streammask |= 1 << info->output[i].stream;
255 
256       /* Update the expected offset for the next output */
257       dstOffset[buf_idx] += info->output[i].num_components;
258 
259       strides[buf_idx] = info->stride[buf_idx] * sizeof(float);
260    }
261 
262    assert(numDecls <= maxDecls);
263 
264    /* Send the DefineStreamOutput command.
265     * Note, rasterizedStream is always 0.
266     */
267    ret = svga_define_stream_output(svga, id,
268                                    numDecls, numStreamStrides+1,
269                                    strides, decls, 0, streamout);
270 
271    if (ret != PIPE_OK) {
272       util_bitmask_clear(svga->stream_output_id_bm, id);
273       FREE(streamout);
274       streamout = NULL;
275    }
276    return streamout;
277 }
278 
279 
280 enum pipe_error
svga_set_stream_output(struct svga_context * svga,struct svga_stream_output * streamout)281 svga_set_stream_output(struct svga_context *svga,
282                        struct svga_stream_output *streamout)
283 {
284    unsigned id = streamout ? streamout->id : SVGA3D_INVALID_ID;
285 
286    if (!svga_have_vgpu10(svga)) {
287       return PIPE_OK;
288    }
289 
290    SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x id=%d\n", __func__,
291             streamout, id);
292 
293    if (svga->current_so != streamout) {
294 
295       /* Before unbinding the current stream output, stop the stream output
296        * statistics queries for the active streams.
297        */
298       if (svga_have_sm5(svga) && svga->current_so) {
299          svga->vcount_buffer_stream = svga->current_so->buffer_stream;
300          svga_end_stream_output_queries(svga, svga->current_so->streammask);
301       }
302 
303       enum pipe_error ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);
304       if (ret != PIPE_OK) {
305          return ret;
306       }
307 
308       svga->current_so = streamout;
309 
310       /* After binding the new stream output, start the stream output
311        * statistics queries for the active streams.
312        */
313       if (svga_have_sm5(svga) && svga->current_so) {
314          svga_begin_stream_output_queries(svga, svga->current_so->streammask);
315       }
316    }
317 
318    return PIPE_OK;
319 }
320 
321 void
svga_delete_stream_output(struct svga_context * svga,struct svga_stream_output * streamout)322 svga_delete_stream_output(struct svga_context *svga,
323                           struct svga_stream_output *streamout)
324 {
325    struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
326 
327    SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __func__, streamout);
328 
329    assert(svga_have_vgpu10(svga));
330    assert(streamout != NULL);
331 
332    SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyStreamOutput(svga->swc,
333                                                       streamout->id));
334 
335    if (svga_have_sm5(svga) && streamout->declBuf) {
336       sws->buffer_destroy(sws, streamout->declBuf);
337    }
338 
339    /* Before deleting the current streamout, make sure to stop any pending
340     * SO queries.
341     */
342    if (svga->current_so == streamout) {
343       if (svga->in_streamout)
344          svga_end_stream_output_queries(svga, svga->current_so->streammask);
345       svga->current_so = NULL;
346    }
347 
348    /* Release the ID */
349    util_bitmask_clear(svga->stream_output_id_bm, streamout->id);
350 
351    /* Free streamout structure */
352    FREE(streamout);
353 }
354 
355 
356 static struct pipe_stream_output_target *
svga_create_stream_output_target(struct pipe_context * pipe,struct pipe_resource * buffer,unsigned buffer_offset,unsigned buffer_size)357 svga_create_stream_output_target(struct pipe_context *pipe,
358                                  struct pipe_resource *buffer,
359                                  unsigned buffer_offset,
360                                  unsigned buffer_size)
361 {
362    struct svga_context *svga = svga_context(pipe);
363    struct svga_stream_output_target *sot;
364 
365    SVGA_DBG(DEBUG_STREAMOUT, "%s offset=%d size=%d\n", __func__,
366             buffer_offset, buffer_size);
367 
368    assert(svga_have_vgpu10(svga));
369    (void) svga;
370 
371    sot = CALLOC_STRUCT(svga_stream_output_target);
372    if (!sot)
373       return NULL;
374 
375    pipe_reference_init(&sot->base.reference, 1);
376    pipe_resource_reference(&sot->base.buffer, buffer);
377    sot->base.context = pipe;
378    sot->base.buffer = buffer;
379    sot->base.buffer_offset = buffer_offset;
380    sot->base.buffer_size = buffer_size;
381 
382    return &sot->base;
383 }
384 
385 static void
svga_destroy_stream_output_target(struct pipe_context * pipe,struct pipe_stream_output_target * target)386 svga_destroy_stream_output_target(struct pipe_context *pipe,
387                                   struct pipe_stream_output_target *target)
388 {
389    struct svga_stream_output_target *sot = svga_stream_output_target(target);
390 
391    SVGA_DBG(DEBUG_STREAMOUT, "%s\n", __func__);
392 
393    pipe_resource_reference(&sot->base.buffer, NULL);
394    FREE(sot);
395 }
396 
397 static void
svga_set_stream_output_targets(struct pipe_context * pipe,unsigned num_targets,struct pipe_stream_output_target ** targets,const unsigned * offsets)398 svga_set_stream_output_targets(struct pipe_context *pipe,
399                                unsigned num_targets,
400                                struct pipe_stream_output_target **targets,
401                                const unsigned *offsets)
402 {
403    struct svga_context *svga = svga_context(pipe);
404    struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS];
405    unsigned i;
406    unsigned num_so_targets;
407    bool begin_so_queries = num_targets > 0;
408 
409    SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __func__,
410             num_targets);
411 
412    assert(svga_have_vgpu10(svga));
413 
414    /* Mark the streamout buffers as dirty so that we'll issue readbacks
415     * before mapping.
416     */
417    for (i = 0; i < svga->num_so_targets; i++) {
418       struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer);
419       sbuf->dirty = true;
420    }
421 
422    /* Before the currently bound streamout targets are unbound,
423     * save them in case they need to be referenced to retrieve the
424     * number of vertices being streamed out.
425     */
426    for (i = 0; i < ARRAY_SIZE(svga->so_targets); i++) {
427       svga->vcount_so_targets[i] = svga->so_targets[i];
428    }
429 
430    assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS);
431 
432    for (i = 0; i < num_targets; i++) {
433       struct svga_stream_output_target *sot
434          = svga_stream_output_target(targets[i]);
435       struct svga_buffer *sbuf = svga_buffer(sot->base.buffer);
436       unsigned size;
437 
438       svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer,
439                                                 PIPE_BIND_STREAM_OUTPUT);
440 
441       assert(svga_buffer(sot->base.buffer)->key.flags
442              & SVGA3D_SURFACE_BIND_STREAM_OUTPUT);
443 
444       /* Mark the buffer surface as RENDERED */
445       assert(sbuf->bufsurf);
446       sbuf->bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED;
447 
448       svga->so_targets[i] = &sot->base;
449       if (offsets[i] == -1) {
450          soBindings[i].offset = -1;
451 
452          /* The streamout is being resumed. There is no need to restart streamout statistics
453           * queries for the draw-auto fallback since those queries are still active.
454           */
455          begin_so_queries = false;
456       }
457       else
458          soBindings[i].offset = sot->base.buffer_offset + offsets[i];
459 
460       /* The size cannot extend beyond the end of the buffer.  Clamp it. */
461       size = MIN2(sot->base.buffer_size,
462                   sot->base.buffer->width0 - sot->base.buffer_offset);
463 
464       soBindings[i].sizeInBytes = size;
465    }
466 
467    /* unbind any previously bound stream output buffers */
468    for (; i < svga->num_so_targets; i++) {
469       svga->so_surfaces[i] = NULL;
470       svga->so_targets[i] = NULL;
471    }
472 
473    num_so_targets = MAX2(svga->num_so_targets, num_targets);
474    SVGA_RETRY(svga, SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
475                                                soBindings, svga->so_surfaces));
476    svga->num_so_targets = num_targets;
477 
478    if (svga_have_sm5(svga) && svga->current_so && begin_so_queries) {
479 
480       /* If there are already active queries and we need to start a new streamout,
481        * we need to stop the current active queries first.
482        */
483       if (svga->in_streamout) {
484          svga_end_stream_output_queries(svga, svga->current_so->streammask);
485       }
486 
487       /* Start stream out statistics queries for the new streamout */
488       svga_begin_stream_output_queries(svga, svga->current_so->streammask);
489    }
490 }
491 
492 /**
493  * Rebind stream output target surfaces
494  */
495 enum pipe_error
svga_rebind_stream_output_targets(struct svga_context * svga)496 svga_rebind_stream_output_targets(struct svga_context *svga)
497 {
498    struct svga_winsys_context *swc = svga->swc;
499    enum pipe_error ret;
500    unsigned i;
501 
502    for (i = 0; i < svga->num_so_targets; i++) {
503       ret = swc->resource_rebind(swc, svga->so_surfaces[i], NULL, SVGA_RELOC_WRITE);
504       if (ret != PIPE_OK)
505          return ret;
506    }
507 
508    return PIPE_OK;
509 }
510 
511 
512 void
svga_init_stream_output_functions(struct svga_context * svga)513 svga_init_stream_output_functions(struct svga_context *svga)
514 {
515    svga->pipe.create_stream_output_target = svga_create_stream_output_target;
516    svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target;
517    svga->pipe.set_stream_output_targets = svga_set_stream_output_targets;
518 }
519 
520 
521 /**
522  * A helper function to create stream output statistics queries for each stream.
523  * These queries are created as a workaround for DrawTransformFeedbackInstanced or
524  * DrawTransformFeedbackStreamInstanced when auto draw doesn't support
525  * instancing or non-0 stream. In this case, the vertex count will
526  * be retrieved from the stream output statistics query.
527  */
528 void
svga_create_stream_output_queries(struct svga_context * svga)529 svga_create_stream_output_queries(struct svga_context *svga)
530 {
531    unsigned i;
532 
533    if (!svga_have_sm5(svga))
534       return;
535 
536    for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
537       svga->so_queries[i] = svga->pipe.create_query(&svga->pipe,
538                                PIPE_QUERY_SO_STATISTICS, i);
539       assert(svga->so_queries[i] != NULL);
540    }
541 }
542 
543 
544 /**
545  * Destroy the stream output statistics queries for the draw-auto workaround.
546  */
547 void
svga_destroy_stream_output_queries(struct svga_context * svga)548 svga_destroy_stream_output_queries(struct svga_context *svga)
549 {
550    unsigned i;
551 
552    if (!svga_have_sm5(svga))
553       return;
554 
555    for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
556       svga->pipe.destroy_query(&svga->pipe, svga->so_queries[i]);
557    }
558 }
559 
560 
561 /**
562  * Start stream output statistics queries for the active streams.
563  */
564 void
svga_begin_stream_output_queries(struct svga_context * svga,unsigned streammask)565 svga_begin_stream_output_queries(struct svga_context *svga,
566                                  unsigned streammask)
567 {
568    assert(svga_have_sm5(svga));
569    assert(!svga->in_streamout);
570 
571    for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
572       bool ret;
573       if (streammask & (1 << i)) {
574          ret = svga->pipe.begin_query(&svga->pipe, svga->so_queries[i]);
575       }
576       (void) ret;
577    }
578    svga->in_streamout = true;
579 
580    return;
581 }
582 
583 
584 /**
585  * Stop stream output statistics queries for the active streams.
586  */
587 void
svga_end_stream_output_queries(struct svga_context * svga,unsigned streammask)588 svga_end_stream_output_queries(struct svga_context *svga,
589                                unsigned streammask)
590 {
591    assert(svga_have_sm5(svga));
592 
593    if (!svga->in_streamout)
594       return;
595 
596    for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
597       bool ret;
598       if (streammask & (1 << i)) {
599          ret = svga->pipe.end_query(&svga->pipe, svga->so_queries[i]);
600       }
601       (void) ret;
602    }
603    svga->in_streamout = false;
604 
605    return;
606 }
607 
608 
609 /**
610  * Return the primitive count returned from the stream output statistics query
611  * for the specified stream.
612  */
613 unsigned
svga_get_primcount_from_stream_output(struct svga_context * svga,unsigned stream)614 svga_get_primcount_from_stream_output(struct svga_context *svga,
615                                       unsigned stream)
616 {
617    unsigned primcount = 0;
618    union pipe_query_result result;
619    bool ret;
620 
621    if (svga->current_so) {
622       svga_end_stream_output_queries(svga, svga->current_so->streammask);
623    }
624 
625    ret = svga->pipe.get_query_result(&svga->pipe,
626                                      svga->so_queries[stream],
627                                      true, &result);
628    if (ret)
629       primcount = result.so_statistics.num_primitives_written;
630 
631    return primcount;
632 }
633