1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "util/u_vbuf.h"
25 #include "pan_context.h"
26
27 void
panfrost_analyze_sysvals(struct panfrost_compiled_shader * ss)28 panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss)
29 {
30 unsigned dirty = 0;
31 unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST;
32
33 for (unsigned i = 0; i < ss->sysvals.sysval_count; ++i) {
34 switch (PAN_SYSVAL_TYPE(ss->sysvals.sysvals[i])) {
35 case PAN_SYSVAL_VIEWPORT_SCALE:
36 case PAN_SYSVAL_VIEWPORT_OFFSET:
37 dirty |= PAN_DIRTY_VIEWPORT;
38 break;
39
40 case PAN_SYSVAL_TEXTURE_SIZE:
41 dirty_shader |= PAN_DIRTY_STAGE_TEXTURE;
42 break;
43
44 case PAN_SYSVAL_SSBO:
45 dirty_shader |= PAN_DIRTY_STAGE_SSBO;
46 break;
47
48 case PAN_SYSVAL_XFB:
49 dirty |= PAN_DIRTY_SO;
50 break;
51
52 case PAN_SYSVAL_SAMPLER:
53 dirty_shader |= PAN_DIRTY_STAGE_SAMPLER;
54 break;
55
56 case PAN_SYSVAL_IMAGE_SIZE:
57 dirty_shader |= PAN_DIRTY_STAGE_IMAGE;
58 break;
59
60 case PAN_SYSVAL_NUM_WORK_GROUPS:
61 case PAN_SYSVAL_LOCAL_GROUP_SIZE:
62 case PAN_SYSVAL_WORK_DIM:
63 case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
64 case PAN_SYSVAL_NUM_VERTICES:
65 dirty |= PAN_DIRTY_PARAMS;
66 break;
67
68 case PAN_SYSVAL_DRAWID:
69 dirty |= PAN_DIRTY_DRAWID;
70 break;
71
72 case PAN_SYSVAL_SAMPLE_POSITIONS:
73 case PAN_SYSVAL_MULTISAMPLED:
74 case PAN_SYSVAL_RT_CONVERSION:
75 /* Nothing beyond the batch itself */
76 break;
77 default:
78 unreachable("Invalid sysval");
79 }
80 }
81
82 ss->dirty_3d = dirty;
83 ss->dirty_shader = dirty_shader;
84 }
85
86 /*
87 * Gets a GPU address for the associated index buffer. Only gauranteed to be
88 * good for the duration of the draw (transient), could last longer. Bounds are
89 * not calculated.
90 */
91 mali_ptr
panfrost_get_index_buffer(struct panfrost_batch * batch,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw)92 panfrost_get_index_buffer(struct panfrost_batch *batch,
93 const struct pipe_draw_info *info,
94 const struct pipe_draw_start_count_bias *draw)
95 {
96 struct panfrost_resource *rsrc = pan_resource(info->index.resource);
97 off_t offset = draw->start * info->index_size;
98
99 if (!info->has_user_indices) {
100 /* Only resources can be directly mapped */
101 panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
102 return rsrc->image.data.base + offset;
103 } else {
104 /* Otherwise, we need to upload to transient memory */
105 const uint8_t *ibuf8 = (const uint8_t *)info->index.user;
106 struct panfrost_ptr T = pan_pool_alloc_aligned(
107 &batch->pool.base, draw->count * info->index_size, info->index_size);
108
109 memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size);
110 return T.gpu;
111 }
112 }
113
114 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
115 * good for the duration of the draw (transient), could last longer. Also get
116 * the bounds on the index buffer for the range accessed by the draw. We do
117 * these operations together because there are natural optimizations which
118 * require them to be together. */
119
120 mali_ptr
panfrost_get_index_buffer_bounded(struct panfrost_batch * batch,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,unsigned * min_index,unsigned * max_index)121 panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
122 const struct pipe_draw_info *info,
123 const struct pipe_draw_start_count_bias *draw,
124 unsigned *min_index, unsigned *max_index)
125 {
126 struct panfrost_resource *rsrc = pan_resource(info->index.resource);
127 struct panfrost_context *ctx = batch->ctx;
128 bool needs_indices = true;
129
130 /* Note: if index_bounds_valid is set but the bounds are wrong, page faults
131 * (at least on Mali-G52) can be triggered an underflow reading varyings.
132 * Providing invalid index bounds in GLES is implementation-defined
133 * behaviour. This should be fine for now but this needs to be revisited when
134 * wiring up robustness later.
135 */
136 if (info->index_bounds_valid) {
137 *min_index = info->min_index;
138 *max_index = info->max_index;
139 needs_indices = false;
140 } else if (!info->has_user_indices) {
141 /* Check the cache */
142 needs_indices = !panfrost_minmax_cache_get(
143 rsrc->index_cache, draw->start, draw->count, min_index, max_index);
144 }
145
146 if (needs_indices) {
147 /* Fallback */
148 u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index);
149
150 if (!info->has_user_indices)
151 panfrost_minmax_cache_add(rsrc->index_cache, draw->start, draw->count,
152 *min_index, *max_index);
153 }
154
155 return panfrost_get_index_buffer(batch, info, draw);
156 }
157
158 /**
159 * Given an (index, divisor) tuple, assign a vertex buffer. Midgard and
160 * Bifrost put divisor information on the attribute buffer descriptor, so this
161 * is the most we can compact in general. Crucially, this runs at vertex
162 * elements CSO create time, not at draw time.
163 */
164 unsigned
pan_assign_vertex_buffer(struct pan_vertex_buffer * buffers,unsigned * nr_bufs,unsigned vbi,unsigned divisor)165 pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, unsigned *nr_bufs,
166 unsigned vbi, unsigned divisor)
167 {
168 /* Look up the buffer */
169 for (unsigned i = 0; i < (*nr_bufs); ++i) {
170 if (buffers[i].vbi == vbi && buffers[i].divisor == divisor)
171 return i;
172 }
173
174 /* Else, create a new buffer */
175 unsigned idx = (*nr_bufs)++;
176
177 buffers[idx] = (struct pan_vertex_buffer){
178 .vbi = vbi,
179 .divisor = divisor,
180 };
181
182 return idx;
183 }
184
185 /*
186 * Helper to add a PIPE_CLEAR_* to batch->draws and batch->resolve together,
187 * meaning that we draw to a given target. Adding to only one mask does not
188 * generally make sense, except for clears which add to batch->clear and
189 * batch->resolve together.
190 */
191 static void
panfrost_draw_target(struct panfrost_batch * batch,unsigned target)192 panfrost_draw_target(struct panfrost_batch *batch, unsigned target)
193 {
194 batch->draws |= target;
195 batch->resolve |= target;
196 }
197
198 /*
199 * Draw time helper to set batch->{read, draws, resolve} based on current blend
200 * and depth-stencil state. To be called when blend or depth/stencil dirty state
201 * respectively changes.
202 */
203 void
panfrost_set_batch_masks_blend(struct panfrost_batch * batch)204 panfrost_set_batch_masks_blend(struct panfrost_batch *batch)
205 {
206 struct panfrost_context *ctx = batch->ctx;
207 struct panfrost_blend_state *blend = ctx->blend;
208
209 for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
210 if (blend->info[i].enabled && batch->key.cbufs[i])
211 panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i);
212 }
213 }
214
215 void
panfrost_set_batch_masks_zs(struct panfrost_batch * batch)216 panfrost_set_batch_masks_zs(struct panfrost_batch *batch)
217 {
218 struct panfrost_context *ctx = batch->ctx;
219 struct pipe_depth_stencil_alpha_state *zsa = (void *)ctx->depth_stencil;
220
221 /* Assume depth is read (TODO: perf) */
222 if (zsa->depth_enabled)
223 batch->read |= PIPE_CLEAR_DEPTH;
224
225 if (zsa->depth_writemask)
226 panfrost_draw_target(batch, PIPE_CLEAR_DEPTH);
227
228 if (zsa->stencil[0].enabled) {
229 panfrost_draw_target(batch, PIPE_CLEAR_STENCIL);
230
231 /* Assume stencil is read (TODO: perf) */
232 batch->read |= PIPE_CLEAR_STENCIL;
233 }
234 }
235
236 void
panfrost_track_image_access(struct panfrost_batch * batch,enum pipe_shader_type stage,struct pipe_image_view * image)237 panfrost_track_image_access(struct panfrost_batch *batch,
238 enum pipe_shader_type stage,
239 struct pipe_image_view *image)
240 {
241 struct panfrost_resource *rsrc = pan_resource(image->resource);
242
243 if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
244 panfrost_batch_write_rsrc(batch, rsrc, stage);
245
246 bool is_buffer = rsrc->base.target == PIPE_BUFFER;
247 unsigned level = is_buffer ? 0 : image->u.tex.level;
248 BITSET_SET(rsrc->valid.data, level);
249
250 if (is_buffer) {
251 util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 0,
252 rsrc->base.width0);
253 }
254 } else {
255 panfrost_batch_read_rsrc(batch, rsrc, stage);
256 }
257 }
258