xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/panfrost/pan_helpers.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2020 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "util/u_vbuf.h"
25 #include "pan_context.h"
26 
27 void
panfrost_analyze_sysvals(struct panfrost_compiled_shader * ss)28 panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss)
29 {
30    unsigned dirty = 0;
31    unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST;
32 
33    for (unsigned i = 0; i < ss->sysvals.sysval_count; ++i) {
34       switch (PAN_SYSVAL_TYPE(ss->sysvals.sysvals[i])) {
35       case PAN_SYSVAL_VIEWPORT_SCALE:
36       case PAN_SYSVAL_VIEWPORT_OFFSET:
37          dirty |= PAN_DIRTY_VIEWPORT;
38          break;
39 
40       case PAN_SYSVAL_TEXTURE_SIZE:
41          dirty_shader |= PAN_DIRTY_STAGE_TEXTURE;
42          break;
43 
44       case PAN_SYSVAL_SSBO:
45          dirty_shader |= PAN_DIRTY_STAGE_SSBO;
46          break;
47 
48       case PAN_SYSVAL_XFB:
49          dirty |= PAN_DIRTY_SO;
50          break;
51 
52       case PAN_SYSVAL_SAMPLER:
53          dirty_shader |= PAN_DIRTY_STAGE_SAMPLER;
54          break;
55 
56       case PAN_SYSVAL_IMAGE_SIZE:
57          dirty_shader |= PAN_DIRTY_STAGE_IMAGE;
58          break;
59 
60       case PAN_SYSVAL_NUM_WORK_GROUPS:
61       case PAN_SYSVAL_LOCAL_GROUP_SIZE:
62       case PAN_SYSVAL_WORK_DIM:
63       case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
64       case PAN_SYSVAL_NUM_VERTICES:
65          dirty |= PAN_DIRTY_PARAMS;
66          break;
67 
68       case PAN_SYSVAL_DRAWID:
69          dirty |= PAN_DIRTY_DRAWID;
70          break;
71 
72       case PAN_SYSVAL_SAMPLE_POSITIONS:
73       case PAN_SYSVAL_MULTISAMPLED:
74       case PAN_SYSVAL_RT_CONVERSION:
75          /* Nothing beyond the batch itself */
76          break;
77       default:
78          unreachable("Invalid sysval");
79       }
80    }
81 
82    ss->dirty_3d = dirty;
83    ss->dirty_shader = dirty_shader;
84 }
85 
86 /*
87  * Gets a GPU address for the associated index buffer. Only gauranteed to be
88  * good for the duration of the draw (transient), could last longer. Bounds are
89  * not calculated.
90  */
91 mali_ptr
panfrost_get_index_buffer(struct panfrost_batch * batch,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw)92 panfrost_get_index_buffer(struct panfrost_batch *batch,
93                           const struct pipe_draw_info *info,
94                           const struct pipe_draw_start_count_bias *draw)
95 {
96    struct panfrost_resource *rsrc = pan_resource(info->index.resource);
97    off_t offset = draw->start * info->index_size;
98 
99    if (!info->has_user_indices) {
100       /* Only resources can be directly mapped */
101       panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
102       return rsrc->image.data.base + offset;
103    } else {
104       /* Otherwise, we need to upload to transient memory */
105       const uint8_t *ibuf8 = (const uint8_t *)info->index.user;
106       struct panfrost_ptr T = pan_pool_alloc_aligned(
107          &batch->pool.base, draw->count * info->index_size, info->index_size);
108 
109       memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size);
110       return T.gpu;
111    }
112 }
113 
114 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
115  * good for the duration of the draw (transient), could last longer. Also get
116  * the bounds on the index buffer for the range accessed by the draw. We do
117  * these operations together because there are natural optimizations which
118  * require them to be together. */
119 
120 mali_ptr
panfrost_get_index_buffer_bounded(struct panfrost_batch * batch,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,unsigned * min_index,unsigned * max_index)121 panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
122                                   const struct pipe_draw_info *info,
123                                   const struct pipe_draw_start_count_bias *draw,
124                                   unsigned *min_index, unsigned *max_index)
125 {
126    struct panfrost_resource *rsrc = pan_resource(info->index.resource);
127    struct panfrost_context *ctx = batch->ctx;
128    bool needs_indices = true;
129 
130    /* Note: if index_bounds_valid is set but the bounds are wrong, page faults
131     * (at least on Mali-G52) can be triggered an underflow reading varyings.
132     * Providing invalid index bounds in GLES is implementation-defined
133     * behaviour. This should be fine for now but this needs to be revisited when
134     * wiring up robustness later.
135     */
136    if (info->index_bounds_valid) {
137       *min_index = info->min_index;
138       *max_index = info->max_index;
139       needs_indices = false;
140    } else if (!info->has_user_indices) {
141       /* Check the cache */
142       needs_indices = !panfrost_minmax_cache_get(
143          rsrc->index_cache, draw->start, draw->count, min_index, max_index);
144    }
145 
146    if (needs_indices) {
147       /* Fallback */
148       u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index);
149 
150       if (!info->has_user_indices)
151          panfrost_minmax_cache_add(rsrc->index_cache, draw->start, draw->count,
152                                    *min_index, *max_index);
153    }
154 
155    return panfrost_get_index_buffer(batch, info, draw);
156 }
157 
158 /**
159  * Given an (index, divisor) tuple, assign a vertex buffer. Midgard and
160  * Bifrost put divisor information on the attribute buffer descriptor, so this
161  * is the most we can compact in general. Crucially, this runs at vertex
162  * elements CSO create time, not at draw time.
163  */
164 unsigned
pan_assign_vertex_buffer(struct pan_vertex_buffer * buffers,unsigned * nr_bufs,unsigned vbi,unsigned divisor)165 pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, unsigned *nr_bufs,
166                          unsigned vbi, unsigned divisor)
167 {
168    /* Look up the buffer */
169    for (unsigned i = 0; i < (*nr_bufs); ++i) {
170       if (buffers[i].vbi == vbi && buffers[i].divisor == divisor)
171          return i;
172    }
173 
174    /* Else, create a new buffer */
175    unsigned idx = (*nr_bufs)++;
176 
177    buffers[idx] = (struct pan_vertex_buffer){
178       .vbi = vbi,
179       .divisor = divisor,
180    };
181 
182    return idx;
183 }
184 
185 /*
186  * Helper to add a PIPE_CLEAR_* to batch->draws and batch->resolve together,
187  * meaning that we draw to a given target. Adding to only one mask does not
188  * generally make sense, except for clears which add to batch->clear and
189  * batch->resolve together.
190  */
191 static void
panfrost_draw_target(struct panfrost_batch * batch,unsigned target)192 panfrost_draw_target(struct panfrost_batch *batch, unsigned target)
193 {
194    batch->draws |= target;
195    batch->resolve |= target;
196 }
197 
198 /*
199  * Draw time helper to set batch->{read, draws, resolve} based on current blend
200  * and depth-stencil state. To be called when blend or depth/stencil dirty state
201  * respectively changes.
202  */
203 void
panfrost_set_batch_masks_blend(struct panfrost_batch * batch)204 panfrost_set_batch_masks_blend(struct panfrost_batch *batch)
205 {
206    struct panfrost_context *ctx = batch->ctx;
207    struct panfrost_blend_state *blend = ctx->blend;
208 
209    for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
210       if (blend->info[i].enabled && batch->key.cbufs[i])
211          panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i);
212    }
213 }
214 
215 void
panfrost_set_batch_masks_zs(struct panfrost_batch * batch)216 panfrost_set_batch_masks_zs(struct panfrost_batch *batch)
217 {
218    struct panfrost_context *ctx = batch->ctx;
219    struct pipe_depth_stencil_alpha_state *zsa = (void *)ctx->depth_stencil;
220 
221    /* Assume depth is read (TODO: perf) */
222    if (zsa->depth_enabled)
223       batch->read |= PIPE_CLEAR_DEPTH;
224 
225    if (zsa->depth_writemask)
226       panfrost_draw_target(batch, PIPE_CLEAR_DEPTH);
227 
228    if (zsa->stencil[0].enabled) {
229       panfrost_draw_target(batch, PIPE_CLEAR_STENCIL);
230 
231       /* Assume stencil is read (TODO: perf) */
232       batch->read |= PIPE_CLEAR_STENCIL;
233    }
234 }
235 
236 void
panfrost_track_image_access(struct panfrost_batch * batch,enum pipe_shader_type stage,struct pipe_image_view * image)237 panfrost_track_image_access(struct panfrost_batch *batch,
238                             enum pipe_shader_type stage,
239                             struct pipe_image_view *image)
240 {
241    struct panfrost_resource *rsrc = pan_resource(image->resource);
242 
243    if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
244       panfrost_batch_write_rsrc(batch, rsrc, stage);
245 
246       bool is_buffer = rsrc->base.target == PIPE_BUFFER;
247       unsigned level = is_buffer ? 0 : image->u.tex.level;
248       BITSET_SET(rsrc->valid.data, level);
249 
250       if (is_buffer) {
251          util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 0,
252                         rsrc->base.width0);
253       }
254    } else {
255       panfrost_batch_read_rsrc(batch, rsrc, stage);
256    }
257 }
258