xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/asahi/agx_streamout.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Alyssa Rosenzweig
3  * Copyright 2022 Collabora Ltd.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "compiler/nir/nir_builder.h"
8 #include "compiler/nir/nir_xfb_info.h"
9 #include "pipe/p_defines.h"
10 #include "util/u_draw.h"
11 #include "util/u_dump.h"
12 #include "util/u_inlines.h"
13 #include "util/u_prim.h"
14 #include "agx_state.h"
15 
16 static struct pipe_stream_output_target *
agx_create_stream_output_target(struct pipe_context * pctx,struct pipe_resource * prsc,unsigned buffer_offset,unsigned buffer_size)17 agx_create_stream_output_target(struct pipe_context *pctx,
18                                 struct pipe_resource *prsc,
19                                 unsigned buffer_offset, unsigned buffer_size)
20 {
21    struct agx_streamout_target *target =
22       rzalloc(pctx, struct agx_streamout_target);
23 
24    if (!target)
25       return NULL;
26 
27    pipe_reference_init(&target->base.reference, 1);
28    pipe_resource_reference(&target->base.buffer, prsc);
29 
30    target->base.context = pctx;
31    target->base.buffer_offset = buffer_offset;
32    target->base.buffer_size = buffer_size;
33 
34    uint32_t zero = 0;
35    target->offset = pipe_buffer_create_with_data(pctx, PIPE_BIND_GLOBAL,
36                                                  PIPE_USAGE_DEFAULT, 4, &zero);
37 
38    return &target->base;
39 }
40 
41 static void
agx_stream_output_target_destroy(struct pipe_context * pctx,struct pipe_stream_output_target * target)42 agx_stream_output_target_destroy(struct pipe_context *pctx,
43                                  struct pipe_stream_output_target *target)
44 {
45    struct agx_streamout_target *tgt = agx_so_target(target);
46 
47    pipe_resource_reference(&tgt->base.buffer, NULL);
48    pipe_resource_reference(&tgt->offset, NULL);
49    ralloc_free(target);
50 }
51 
52 static void
agx_set_stream_output_targets(struct pipe_context * pctx,unsigned num_targets,struct pipe_stream_output_target ** targets,const unsigned * offsets)53 agx_set_stream_output_targets(struct pipe_context *pctx, unsigned num_targets,
54                               struct pipe_stream_output_target **targets,
55                               const unsigned *offsets)
56 {
57    struct agx_context *ctx = agx_context(pctx);
58    struct agx_streamout *so = &ctx->streamout;
59 
60    assert(num_targets <= ARRAY_SIZE(so->targets));
61 
62    for (unsigned i = 0; i < num_targets; i++) {
63       /* From the Gallium documentation:
64        *
65        *    -1 means the buffer should be appended to, and everything else sets
66        *    the internal offset.
67        *
68        * We append regardless, so just check for != -1. Yes, using a negative
69        * sentinel value with an unsigned type is bananas. But it's in the
70        * Gallium contract and it will work out fine. Probably should be
71        * redefined to be ~0 instead of -1 but it doesn't really matter.
72        */
73       if (offsets[i] != -1 && targets[i] != NULL) {
74          pipe_buffer_write(pctx, agx_so_target(targets[i])->offset, 0, 4,
75                            &offsets[i]);
76       }
77 
78       pipe_so_target_reference(&so->targets[i], targets[i]);
79    }
80 
81    for (unsigned i = num_targets; i < so->num_targets; i++)
82       pipe_so_target_reference(&so->targets[i], NULL);
83 
84    so->num_targets = num_targets;
85 }
86 
87 static struct pipe_stream_output_target *
get_target(struct agx_context * ctx,unsigned buffer)88 get_target(struct agx_context *ctx, unsigned buffer)
89 {
90    if (buffer < ctx->streamout.num_targets)
91       return ctx->streamout.targets[buffer];
92    else
93       return NULL;
94 }
95 
96 /*
97  * Return the address of the indexed streamout buffer. This will be
98  * pushed into the streamout shader.
99  */
100 uint64_t
agx_batch_get_so_address(struct agx_batch * batch,unsigned buffer,uint32_t * size)101 agx_batch_get_so_address(struct agx_batch *batch, unsigned buffer,
102                          uint32_t *size)
103 {
104    struct pipe_stream_output_target *target = get_target(batch->ctx, buffer);
105 
106    /* If there's no target, don't write anything */
107    if (!target) {
108       *size = 0;
109       return 0;
110    }
111 
112    /* Otherwise, write the target */
113    struct agx_resource *rsrc = agx_resource(target->buffer);
114    agx_batch_writes_range(batch, rsrc, target->buffer_offset,
115                           target->buffer_size);
116 
117    *size = target->buffer_size;
118    return rsrc->bo->va->addr + target->buffer_offset;
119 }
120 
121 void
agx_draw_vbo_from_xfb(struct pipe_context * pctx,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect)122 agx_draw_vbo_from_xfb(struct pipe_context *pctx,
123                       const struct pipe_draw_info *info, unsigned drawid_offset,
124                       const struct pipe_draw_indirect_info *indirect)
125 {
126    perf_debug_ctx(agx_context(pctx), "draw auto");
127 
128    struct agx_streamout_target *so =
129       agx_so_target(indirect->count_from_stream_output);
130 
131    unsigned offset_B = 0;
132    pipe_buffer_read(pctx, so->offset, 0, 4, &offset_B);
133 
134    unsigned count = offset_B / so->stride;
135 
136    struct pipe_draw_start_count_bias draw = {
137       .start = 0,
138       .count = count,
139    };
140 
141    pctx->draw_vbo(pctx, info, drawid_offset, NULL, &draw, 1);
142 }
143 
144 static uint32_t
xfb_prims_for_vertices(enum mesa_prim mode,unsigned verts)145 xfb_prims_for_vertices(enum mesa_prim mode, unsigned verts)
146 {
147    uint32_t prims = u_decomposed_prims_for_vertices(mode, verts);
148 
149    /* The GL spec isn't super clear about this, but it implies that quads are
150     * supposed to be tessellated into primitives and piglit
151     * (ext_transform_feedback-tessellation quads) checks this.
152     */
153    if (u_decomposed_prim(mode) == MESA_PRIM_QUADS)
154       prims *= 2;
155 
156    return prims;
157 }
158 
159 /*
160  * Count generated primitives on the CPU for transform feedback. This only works
161  * in the absence of indirect draws, geometry shaders, or tessellation.
162  */
163 void
agx_primitives_update_direct(struct agx_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw)164 agx_primitives_update_direct(struct agx_context *ctx,
165                              const struct pipe_draw_info *info,
166                              const struct pipe_draw_start_count_bias *draw)
167 {
168    assert(ctx->active_queries && ctx->prims_generated[0] && "precondition");
169    assert(!ctx->stage[PIPE_SHADER_GEOMETRY].shader &&
170           "Geometry shaders use their own counting");
171 
172    agx_query_increment_cpu(ctx, ctx->prims_generated[0],
173                            xfb_prims_for_vertices(info->mode, draw->count));
174 }
175 
176 void
agx_init_streamout_functions(struct pipe_context * ctx)177 agx_init_streamout_functions(struct pipe_context *ctx)
178 {
179    ctx->create_stream_output_target = agx_create_stream_output_target;
180    ctx->stream_output_target_destroy = agx_stream_output_target_destroy;
181    ctx->set_stream_output_targets = agx_set_stream_output_targets;
182 }
183