1 /*
2 * Copyright © 2014 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <[email protected]>
7 */
8
9 #include "ir3/ir3_nir.h"
10
11 /* This has to reach into the fd_context a bit more than the rest of
12 * ir3, but it needs to be aligned with the compiler, so both agree
13 * on which const regs hold what. And the logic is identical between
14 * ir3 generations, the only difference is small details in the actual
15 * CP_LOAD_STATE packets (which is handled inside the generation
16 * specific ctx->emit_const(_bo)() fxns)
17 *
18 * This file should be included in only a single .c file per gen, which
19 * defines the following functions:
20 */
21
22 static bool is_stateobj(struct fd_ringbuffer *ring);
23
24 static void emit_const_user(struct fd_ringbuffer *ring,
25 const struct ir3_shader_variant *v, uint32_t regid,
26 uint32_t size, const uint32_t *user_buffer);
27
28 static void emit_const_bo(struct fd_ringbuffer *ring,
29 const struct ir3_shader_variant *v, uint32_t regid,
30 uint32_t offset, uint32_t size, struct fd_bo *bo);
31
32 static void
emit_const_prsc(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t offset,uint32_t size,struct pipe_resource * buffer)33 emit_const_prsc(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
34 uint32_t regid, uint32_t offset, uint32_t size,
35 struct pipe_resource *buffer)
36 {
37 struct fd_resource *rsc = fd_resource(buffer);
38 emit_const_bo(ring, v, regid, offset, size, rsc->bo);
39 }
40
41 static void emit_const_ptrs(struct fd_ringbuffer *ring,
42 const struct ir3_shader_variant *v,
43 uint32_t dst_offset, uint32_t num,
44 struct fd_bo **bos, uint32_t *offsets);
45
46 static void
emit_const_asserts(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t sizedwords)47 emit_const_asserts(struct fd_ringbuffer *ring,
48 const struct ir3_shader_variant *v, uint32_t regid,
49 uint32_t sizedwords)
50 {
51 assert((regid % 4) == 0);
52 assert((sizedwords % 4) == 0);
53 assert(regid + sizedwords <= v->constlen * 4);
54 }
55
56 static void
ring_wfi(struct fd_batch * batch,struct fd_ringbuffer * ring)57 ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
58 {
59 /* when we emit const state via ring (IB2) we need a WFI, but when
60 * it is emit'd via stateobj, we don't
61 */
62 if (is_stateobj(ring))
63 return;
64
65 fd_wfi(batch, ring);
66 }
67
68 /**
69 * Indirectly calculates size of cmdstream needed for ir3_emit_user_consts().
70 * Returns number of packets, and total size of all the payload.
71 *
72 * The value can be a worst-case, ie. some shader variants may not read all
73 * consts, etc.
74 *
75 * Returns size in dwords.
76 */
77 static inline void
ir3_user_consts_size(const struct ir3_ubo_analysis_state * state,unsigned * packets,unsigned * size)78 ir3_user_consts_size(const struct ir3_ubo_analysis_state *state, unsigned *packets,
79 unsigned *size)
80 {
81 *packets = *size = 0;
82
83 for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
84 if (state->range[i].start < state->range[i].end) {
85 *size += state->range[i].end - state->range[i].start;
86 (*packets)++;
87 }
88 }
89 }
90
91 /**
92 * Uploads the referenced subranges of the nir constant_data to the hardware's
93 * constant buffer.
94 */
95 static inline void
ir3_emit_constant_data(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)96 ir3_emit_constant_data(const struct ir3_shader_variant *v,
97 struct fd_ringbuffer *ring)
98 {
99 const struct ir3_const_state *const_state = ir3_const_state(v);
100 const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
101
102 for (unsigned i = 0; i < state->num_enabled; i++) {
103 unsigned ubo = state->range[i].ubo.block;
104 if (ubo != const_state->consts_ubo.idx)
105 continue;
106
107 uint32_t size = state->range[i].end - state->range[i].start;
108
109 /* Pre-a6xx, we might have ranges enabled in the shader that aren't
110 * used in the binning variant.
111 */
112 if (16 * v->constlen <= state->range[i].offset)
113 continue;
114
115 /* and even if the start of the const buffer is before
116 * first_immediate, the end may not be:
117 */
118 size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
119
120 if (size == 0)
121 continue;
122
123 emit_const_bo(ring, v, state->range[i].offset / 4,
124 v->info.constant_data_offset + state->range[i].start,
125 size / 4, v->bo);
126 }
127 }
128
129 /**
130 * Uploads sub-ranges of UBOs to the hardware's constant buffer (UBO access
131 * outside of these ranges will be done using full UBO accesses in the
132 * shader).
133 */
134 static inline void
ir3_emit_user_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)135 ir3_emit_user_consts(const struct ir3_shader_variant *v,
136 struct fd_ringbuffer *ring,
137 struct fd_constbuf_stateobj *constbuf)
138 {
139 const struct ir3_const_state *const_state = ir3_const_state(v);
140 const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
141
142 for (unsigned i = 0; i < state->num_enabled; i++) {
143 assert(!state->range[i].ubo.bindless);
144 unsigned ubo = state->range[i].ubo.block;
145 if (!(constbuf->enabled_mask & (1 << ubo)) ||
146 ubo == const_state->consts_ubo.idx) {
147 continue;
148 }
149 struct pipe_constant_buffer *cb = &constbuf->cb[ubo];
150
151 uint32_t size = state->range[i].end - state->range[i].start;
152 uint32_t offset = cb->buffer_offset + state->range[i].start;
153
154 /* Pre-a6xx, we might have ranges enabled in the shader that aren't
155 * used in the binning variant.
156 */
157 if (16 * v->constlen <= state->range[i].offset)
158 continue;
159
160 /* and even if the start of the const buffer is before
161 * first_immediate, the end may not be:
162 */
163 size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
164
165 if (size == 0)
166 continue;
167
168 /* things should be aligned to vec4: */
169 assert((state->range[i].offset % 16) == 0);
170 assert((size % 16) == 0);
171 assert((offset % 16) == 0);
172
173 if (cb->user_buffer) {
174 uint8_t *p = (uint8_t *)cb->user_buffer;
175 p += state->range[i].start;
176 emit_const_user(ring, v, state->range[i].offset / 4, size / 4, (uint32_t *)p);
177 } else {
178 emit_const_prsc(ring, v, state->range[i].offset / 4, offset, size / 4,
179 cb->buffer);
180 }
181 }
182 }
183
184 static inline void
ir3_emit_ubos(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)185 ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
186 struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
187 {
188 const struct ir3_const_state *const_state = ir3_const_state(v);
189 uint32_t offset = const_state->offsets.ubo;
190
191 /* a6xx+ uses UBO state and ldc instead of pointers emitted in
192 * const state and ldg:
193 */
194 if (ctx->screen->gen >= 6)
195 return;
196
197 if (v->constlen > offset) {
198 uint32_t params = const_state->num_ubos;
199 uint32_t offsets[params];
200 struct fd_bo *bos[params];
201
202 for (uint32_t i = 0; i < params; i++) {
203 if (i == const_state->consts_ubo.idx) {
204 bos[i] = v->bo;
205 offsets[i] = v->info.constant_data_offset;
206 continue;
207 }
208
209 struct pipe_constant_buffer *cb = &constbuf->cb[i];
210
211 /* If we have user pointers (constbuf 0, aka GL uniforms), upload
212 * them to a buffer now, and save it in the constbuf so that we
213 * don't have to reupload until they get changed.
214 */
215 if (cb->user_buffer) {
216 struct pipe_context *pctx = &ctx->base;
217 u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64,
218 cb->user_buffer, &cb->buffer_offset, &cb->buffer);
219 cb->user_buffer = NULL;
220 }
221
222 if ((constbuf->enabled_mask & (1 << i)) && cb->buffer) {
223 offsets[i] = cb->buffer_offset;
224 bos[i] = fd_resource(cb->buffer)->bo;
225 } else {
226 offsets[i] = 0;
227 bos[i] = NULL;
228 }
229 }
230
231 assert(offset * 4 + params <= v->constlen * 4);
232
233 emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
234 }
235 }
236
237 static inline void
ir3_emit_image_dims(struct fd_screen * screen,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_shaderimg_stateobj * si)238 ir3_emit_image_dims(struct fd_screen *screen,
239 const struct ir3_shader_variant *v,
240 struct fd_ringbuffer *ring,
241 struct fd_shaderimg_stateobj *si)
242 {
243 const struct ir3_const_state *const_state = ir3_const_state(v);
244 uint32_t offset = const_state->offsets.image_dims;
245 if (v->constlen > offset) {
246 uint32_t dims[align(const_state->image_dims.count, 4)];
247 unsigned mask = const_state->image_dims.mask;
248
249 while (mask) {
250 struct pipe_image_view *img;
251 struct fd_resource *rsc;
252 unsigned index = u_bit_scan(&mask);
253 unsigned off = const_state->image_dims.off[index];
254
255 img = &si->si[index];
256 rsc = fd_resource(img->resource);
257
258 dims[off + 0] = util_format_get_blocksize(img->format);
259 if (img->resource->target != PIPE_BUFFER) {
260 struct fdl_slice *slice = fd_resource_slice(rsc, img->u.tex.level);
261 /* note for 2d/cube/etc images, even if re-interpreted
262 * as a different color format, the pixel size should
263 * be the same, so use original dimensions for y and z
264 * stride:
265 */
266 dims[off + 1] = fd_resource_pitch(rsc, img->u.tex.level);
267 /* see corresponding logic in fd_resource_offset(): */
268 if (rsc->layout.layer_first) {
269 dims[off + 2] = rsc->layout.layer_size;
270 } else {
271 dims[off + 2] = slice->size0;
272 }
273 } else {
274 /* For buffer-backed images, the log2 of the format's
275 * bytes-per-pixel is placed on the 2nd slot. This is useful
276 * when emitting image_size instructions, for which we need
277 * to divide by bpp for image buffers. Since the bpp
278 * can only be power-of-two, the division is implemented
279 * as a SHR, and for that it is handy to have the log2 of
280 * bpp as a constant. (log2 = first-set-bit - 1)
281 */
282 dims[off + 1] = ffs(dims[off + 0]) - 1;
283 }
284 }
285 uint32_t size = MIN2(ARRAY_SIZE(dims), v->constlen * 4 - offset * 4);
286
287 emit_const_user(ring, v, offset * 4, size, dims);
288 }
289 }
290
291 static inline void
ir3_emit_immediates(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)292 ir3_emit_immediates(const struct ir3_shader_variant *v,
293 struct fd_ringbuffer *ring)
294 {
295 const struct ir3_const_state *const_state = ir3_const_state(v);
296 uint32_t base = const_state->offsets.immediate;
297 int size = DIV_ROUND_UP(const_state->immediates_count, 4);
298
299 /* truncate size to avoid writing constants that shader
300 * does not use:
301 */
302 size = MIN2(size + base, v->constlen) - base;
303
304 /* convert out of vec4: */
305 base *= 4;
306 size *= 4;
307
308 if (size > 0)
309 emit_const_user(ring, v, base, size, const_state->immediates);
310
311 /* NIR constant data has the same lifetime as immediates, so upload it
312 * now, too.
313 */
314 ir3_emit_constant_data(v, ring);
315 }
316
317 static inline void
ir3_emit_link_map(const struct ir3_shader_variant * producer,const struct ir3_shader_variant * consumer,struct fd_ringbuffer * ring)318 ir3_emit_link_map(const struct ir3_shader_variant *producer,
319 const struct ir3_shader_variant *consumer,
320 struct fd_ringbuffer *ring)
321 {
322 const struct ir3_const_state *const_state = ir3_const_state(consumer);
323 uint32_t base = const_state->offsets.primitive_map;
324 int size = DIV_ROUND_UP(consumer->input_size, 4);
325
326 /* truncate size to avoid writing constants that shader
327 * does not use:
328 */
329 size = MIN2(size + base, consumer->constlen) - base;
330
331 /* convert out of vec4: */
332 base *= 4;
333 size *= 4;
334
335 if (size > 0)
336 emit_const_user(ring, consumer, base, size, producer->output_loc);
337 }
338
339 /* emit stream-out buffers: */
340 static inline void
emit_tfbos(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)341 emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
342 struct fd_ringbuffer *ring)
343 {
344 /* streamout addresses after driver-params: */
345 const struct ir3_const_state *const_state = ir3_const_state(v);
346 uint32_t offset = const_state->offsets.tfbo;
347 if (v->constlen > offset) {
348 struct fd_streamout_stateobj *so = &ctx->streamout;
349 const struct ir3_stream_output_info *info = &v->stream_output;
350 uint32_t params = 4;
351 uint32_t offsets[params];
352 struct fd_bo *bos[params];
353
354 for (uint32_t i = 0; i < params; i++) {
355 struct pipe_stream_output_target *target = so->targets[i];
356
357 if (target) {
358 offsets[i] =
359 (so->offsets[i] * info->stride[i] * 4) + target->buffer_offset;
360 bos[i] = fd_resource(target->buffer)->bo;
361 } else {
362 offsets[i] = 0;
363 bos[i] = NULL;
364 }
365 }
366
367 assert(offset * 4 + params <= v->constlen * 4);
368
369 emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
370 }
371 }
372
373 static inline void
emit_common_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,enum pipe_shader_type t)374 emit_common_consts(const struct ir3_shader_variant *v,
375 struct fd_ringbuffer *ring, struct fd_context *ctx,
376 enum pipe_shader_type t) assert_dt
377 {
378 enum fd_dirty_shader_state dirty = ctx->dirty_shader[t];
379
380 /* When we use CP_SET_DRAW_STATE objects to emit constant state,
381 * if we emit any of it we need to emit all. This is because
382 * we are using the same state-group-id each time for uniform
383 * state, and if previous update is never evaluated (due to no
384 * visible primitives in the current tile) then the new stateobj
385 * completely replaces the old one.
386 *
387 * Possibly if we split up different parts of the const state to
388 * different state-objects we could avoid this.
389 */
390 if (dirty && is_stateobj(ring))
391 dirty = (enum fd_dirty_shader_state)~0;
392
393 if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) {
394 struct fd_constbuf_stateobj *constbuf;
395 bool shader_dirty;
396
397 constbuf = &ctx->constbuf[t];
398 shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG);
399
400 ring_wfi(ctx->batch, ring);
401
402 ir3_emit_user_consts(v, ring, constbuf);
403 ir3_emit_ubos(ctx, v, ring, constbuf);
404 if (shader_dirty)
405 ir3_emit_immediates(v, ring);
406 }
407
408 if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) {
409 struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t];
410 ring_wfi(ctx->batch, ring);
411 ir3_emit_image_dims(ctx->screen, v, ring, si);
412 }
413 }
414
415 /* emit kernel params */
416 static inline void
emit_kernel_params(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,const struct pipe_grid_info * info)417 emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v,
418 struct fd_ringbuffer *ring, const struct pipe_grid_info *info)
419 assert_dt
420 {
421 const struct ir3_const_state *const_state = ir3_const_state(v);
422 uint32_t offset = const_state->offsets.kernel_params;
423 if (v->constlen > offset) {
424 ring_wfi(ctx->batch, ring);
425 emit_const_user(ring, v, offset * 4,
426 align(v->cs.req_input_mem, 4),
427 (uint32_t *)info->input);
428 }
429 }
430
431 static inline void
ir3_emit_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draw,const uint32_t draw_id)432 ir3_emit_driver_params(const struct ir3_shader_variant *v,
433 struct fd_ringbuffer *ring, struct fd_context *ctx,
434 const struct pipe_draw_info *info,
435 const struct pipe_draw_indirect_info *indirect,
436 const struct pipe_draw_start_count_bias *draw,
437 const uint32_t draw_id) assert_dt
438 {
439 assert(v->need_driver_params);
440
441 const struct ir3_const_state *const_state = ir3_const_state(v);
442 uint32_t offset = const_state->offsets.driver_param;
443 uint32_t vertex_params[IR3_DP_VS_COUNT] = {
444 [IR3_DP_DRAWID] = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
445 [IR3_DP_VTXID_BASE] = info->index_size ? draw->index_bias : draw->start,
446 [IR3_DP_INSTID_BASE] = info->start_instance,
447 [IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx,
448 [IR3_DP_IS_INDEXED_DRAW] = info->index_size != 0 ? ~0 : 0,
449 };
450 if (v->key.ucp_enables) {
451 struct pipe_clip_state *ucp = &ctx->ucp;
452 unsigned pos = IR3_DP_UCP0_X;
453 for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
454 for (unsigned j = 0; j < 4; j++) {
455 vertex_params[pos] = fui(ucp->ucp[i][j]);
456 pos++;
457 }
458 }
459 }
460
461 /* Only emit as many params as needed, i.e. up to the highest enabled UCP
462 * plane. However a binning pass may drop even some of these, so limit to
463 * program max.
464 */
465 const uint32_t vertex_params_size =
466 MIN2(const_state->num_driver_params, (v->constlen - offset) * 4);
467 assert(vertex_params_size <= IR3_DP_VS_COUNT);
468
469 bool needs_vtxid_base =
470 ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) !=
471 regid(63, 0);
472
473 /* for indirect draw, we need to copy VTXID_BASE from
474 * indirect-draw parameters buffer.. which is annoying
475 * and means we can't easily emit these consts in cmd
476 * stream so need to copy them to bo.
477 */
478 if (indirect && needs_vtxid_base) {
479 uint32_t vertex_params_area = align(vertex_params_size, 16);
480 struct pipe_resource *vertex_params_rsc =
481 pipe_buffer_create(&ctx->screen->base, PIPE_BIND_CONSTANT_BUFFER,
482 PIPE_USAGE_STREAM, vertex_params_area * 4);
483 unsigned src_off = indirect->offset;
484 void *ptr;
485
486 ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo);
487 memcpy(ptr, vertex_params, vertex_params_size * 4);
488
489 if (info->index_size) {
490 /* indexed draw, index_bias is 4th field: */
491 src_off += 3 * 4;
492 } else {
493 /* non-indexed draw, start is 3rd field: */
494 src_off += 2 * 4;
495 }
496
497 /* copy index_bias or start from draw params: */
498 ctx->screen->mem_to_mem(ring, vertex_params_rsc, 0, indirect->buffer,
499 src_off, 1);
500
501 emit_const_prsc(ring, v, offset * 4, 0, vertex_params_area,
502 vertex_params_rsc);
503
504 pipe_resource_reference(&vertex_params_rsc, NULL);
505 } else {
506 emit_const_user(ring, v, offset * 4, vertex_params_size, vertex_params);
507 }
508
509 /* if needed, emit stream-out buffer addresses: */
510 if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
511 emit_tfbos(ctx, v, ring);
512 }
513 }
514
515
516 static inline void
ir3_emit_hs_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx)517 ir3_emit_hs_driver_params(const struct ir3_shader_variant *v,
518 struct fd_ringbuffer *ring,
519 struct fd_context *ctx)
520 assert_dt
521 {
522 assert(v->need_driver_params);
523
524 const struct ir3_const_state *const_state = ir3_const_state(v);
525 uint32_t offset = const_state->offsets.driver_param;
526 uint32_t hs_params[IR3_DP_HS_COUNT] = {
527 [IR3_DP_HS_DEFAULT_OUTER_LEVEL_X] = fui(ctx->default_outer_level[0]),
528 [IR3_DP_HS_DEFAULT_OUTER_LEVEL_Y] = fui(ctx->default_outer_level[1]),
529 [IR3_DP_HS_DEFAULT_OUTER_LEVEL_Z] = fui(ctx->default_outer_level[2]),
530 [IR3_DP_HS_DEFAULT_OUTER_LEVEL_W] = fui(ctx->default_outer_level[3]),
531 [IR3_DP_HS_DEFAULT_INNER_LEVEL_X] = fui(ctx->default_inner_level[0]),
532 [IR3_DP_HS_DEFAULT_INNER_LEVEL_Y] = fui(ctx->default_inner_level[1]),
533 };
534
535 const uint32_t hs_params_size =
536 MIN2(const_state->num_driver_params, (v->constlen - offset) * 4);
537 assert(hs_params_size <= IR3_DP_HS_COUNT);
538
539 emit_const_user(ring, v, offset * 4, hs_params_size, hs_params);
540 }
541
542
543 static inline void
ir3_emit_vs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draw)544 ir3_emit_vs_consts(const struct ir3_shader_variant *v,
545 struct fd_ringbuffer *ring, struct fd_context *ctx,
546 const struct pipe_draw_info *info,
547 const struct pipe_draw_indirect_info *indirect,
548 const struct pipe_draw_start_count_bias *draw) assert_dt
549 {
550 assert(v->type == MESA_SHADER_VERTEX);
551
552 emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX);
553
554 /* emit driver params every time: */
555 if (info && v->need_driver_params) {
556 ring_wfi(ctx->batch, ring);
557 ir3_emit_driver_params(v, ring, ctx, info, indirect, draw, 0);
558 }
559 }
560
561 static inline void
ir3_emit_fs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx)562 ir3_emit_fs_consts(const struct ir3_shader_variant *v,
563 struct fd_ringbuffer *ring, struct fd_context *ctx) assert_dt
564 {
565 assert(v->type == MESA_SHADER_FRAGMENT);
566
567 emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT);
568 }
569
570 static inline void
ir3_emit_cs_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_grid_info * info)571 ir3_emit_cs_driver_params(const struct ir3_shader_variant *v,
572 struct fd_ringbuffer *ring, struct fd_context *ctx,
573 const struct pipe_grid_info *info)
574 assert_dt
575 {
576 emit_kernel_params(ctx, v, ring, info);
577
578 /* a3xx/a4xx can inject these directly */
579 if (ctx->screen->gen <= 4)
580 return;
581
582 /* emit compute-shader driver-params: */
583 const struct ir3_const_state *const_state = ir3_const_state(v);
584 uint32_t offset = const_state->offsets.driver_param;
585 if (v->constlen > offset) {
586 ring_wfi(ctx->batch, ring);
587
588 if (info->indirect) {
589 struct pipe_resource *indirect = NULL;
590 unsigned indirect_offset;
591
592 /* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs
593 * to be aligned more strongly than 4 bytes. So in this case
594 * we need a temporary buffer to copy NumWorkGroups.xyz to.
595 *
596 * TODO if previous compute job is writing to info->indirect,
597 * we might need a WFI.. but since we currently flush for each
598 * compute job, we are probably ok for now.
599 */
600 if (info->indirect_offset & 0xf) {
601 indirect = pipe_buffer_create(&ctx->screen->base,
602 PIPE_BIND_COMMAND_ARGS_BUFFER,
603 PIPE_USAGE_STREAM, 0x1000);
604 indirect_offset = 0;
605
606 ctx->screen->mem_to_mem(ring, indirect, 0, info->indirect,
607 info->indirect_offset, 3);
608 } else {
609 pipe_resource_reference(&indirect, info->indirect);
610 indirect_offset = info->indirect_offset;
611 }
612
613 emit_const_prsc(ring, v, offset * 4, indirect_offset, 16, indirect);
614
615 pipe_resource_reference(&indirect, NULL);
616 } else {
617 // TODO some of these are not part of the indirect state.. so we
618 // need to emit some of this directly in both cases.
619 uint32_t compute_params[IR3_DP_CS_COUNT] = {
620 [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
621 [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
622 [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2],
623 [IR3_DP_WORK_DIM] = info->work_dim,
624 [IR3_DP_BASE_GROUP_X] = info->grid_base[0],
625 [IR3_DP_BASE_GROUP_Y] = info->grid_base[1],
626 [IR3_DP_BASE_GROUP_Z] = info->grid_base[2],
627 [IR3_DP_CS_SUBGROUP_SIZE] = v->info.subgroup_size,
628 [IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0],
629 [IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1],
630 [IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2],
631 [IR3_DP_SUBGROUP_ID_SHIFT] = util_logbase2(v->info.subgroup_size),
632 [IR3_DP_WORKGROUP_ID_X] = 0, // TODO
633 [IR3_DP_WORKGROUP_ID_Y] = 0, // TODO
634 [IR3_DP_WORKGROUP_ID_Z] = 0, // TODO
635 };
636 uint32_t size =
637 MIN2(const_state->num_driver_params, v->constlen * 4 - offset * 4);
638
639 emit_const_user(ring, v, offset * 4, size, compute_params);
640 }
641 }
642 }
643
644 /* emit compute-shader consts: */
645 static inline void
ir3_emit_cs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_grid_info * info)646 ir3_emit_cs_consts(const struct ir3_shader_variant *v,
647 struct fd_ringbuffer *ring, struct fd_context *ctx,
648 const struct pipe_grid_info *info) assert_dt
649 {
650 assert(gl_shader_stage_is_compute(v->type));
651
652 emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
653
654 ir3_emit_cs_driver_params(v, ring, ctx, info);
655 }
656