xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/ir3/ir3_const.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2014 Rob Clark <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <[email protected]>
7  */
8 
9 #include "ir3/ir3_nir.h"
10 
11 /* This has to reach into the fd_context a bit more than the rest of
12  * ir3, but it needs to be aligned with the compiler, so both agree
13  * on which const regs hold what.  And the logic is identical between
14  * ir3 generations, the only difference is small details in the actual
15  * CP_LOAD_STATE packets (which is handled inside the generation
16  * specific ctx->emit_const(_bo)() fxns)
17  *
18  * This file should be included in only a single .c file per gen, which
19  * defines the following functions:
20  */
21 
22 static bool is_stateobj(struct fd_ringbuffer *ring);
23 
24 static void emit_const_user(struct fd_ringbuffer *ring,
25                             const struct ir3_shader_variant *v, uint32_t regid,
26                             uint32_t size, const uint32_t *user_buffer);
27 
28 static void emit_const_bo(struct fd_ringbuffer *ring,
29                           const struct ir3_shader_variant *v, uint32_t regid,
30                           uint32_t offset, uint32_t size, struct fd_bo *bo);
31 
32 static void
emit_const_prsc(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t offset,uint32_t size,struct pipe_resource * buffer)33 emit_const_prsc(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
34                 uint32_t regid, uint32_t offset, uint32_t size,
35                 struct pipe_resource *buffer)
36 {
37    struct fd_resource *rsc = fd_resource(buffer);
38    emit_const_bo(ring, v, regid, offset, size, rsc->bo);
39 }
40 
41 static void emit_const_ptrs(struct fd_ringbuffer *ring,
42                             const struct ir3_shader_variant *v,
43                             uint32_t dst_offset, uint32_t num,
44                             struct fd_bo **bos, uint32_t *offsets);
45 
46 static void
emit_const_asserts(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t sizedwords)47 emit_const_asserts(struct fd_ringbuffer *ring,
48                    const struct ir3_shader_variant *v, uint32_t regid,
49                    uint32_t sizedwords)
50 {
51    assert((regid % 4) == 0);
52    assert((sizedwords % 4) == 0);
53    assert(regid + sizedwords <= v->constlen * 4);
54 }
55 
56 static void
ring_wfi(struct fd_batch * batch,struct fd_ringbuffer * ring)57 ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
58 {
59    /* when we emit const state via ring (IB2) we need a WFI, but when
60     * it is emit'd via stateobj, we don't
61     */
62    if (is_stateobj(ring))
63       return;
64 
65    fd_wfi(batch, ring);
66 }
67 
68 /**
69  * Indirectly calculates size of cmdstream needed for ir3_emit_user_consts().
70  * Returns number of packets, and total size of all the payload.
71  *
72  * The value can be a worst-case, ie. some shader variants may not read all
73  * consts, etc.
74  *
75  * Returns size in dwords.
76  */
77 static inline void
ir3_user_consts_size(const struct ir3_ubo_analysis_state * state,unsigned * packets,unsigned * size)78 ir3_user_consts_size(const struct ir3_ubo_analysis_state *state, unsigned *packets,
79                      unsigned *size)
80 {
81    *packets = *size = 0;
82 
83    for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
84       if (state->range[i].start < state->range[i].end) {
85          *size += state->range[i].end - state->range[i].start;
86          (*packets)++;
87       }
88    }
89 }
90 
91 /**
92  * Uploads the referenced subranges of the nir constant_data to the hardware's
93  * constant buffer.
94  */
95 static inline void
ir3_emit_constant_data(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)96 ir3_emit_constant_data(const struct ir3_shader_variant *v,
97                        struct fd_ringbuffer *ring)
98 {
99    const struct ir3_const_state *const_state = ir3_const_state(v);
100    const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
101 
102    for (unsigned i = 0; i < state->num_enabled; i++) {
103       unsigned ubo = state->range[i].ubo.block;
104       if (ubo != const_state->consts_ubo.idx)
105          continue;
106 
107       uint32_t size = state->range[i].end - state->range[i].start;
108 
109       /* Pre-a6xx, we might have ranges enabled in the shader that aren't
110        * used in the binning variant.
111        */
112       if (16 * v->constlen <= state->range[i].offset)
113          continue;
114 
115       /* and even if the start of the const buffer is before
116        * first_immediate, the end may not be:
117        */
118       size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
119 
120       if (size == 0)
121          continue;
122 
123       emit_const_bo(ring, v, state->range[i].offset / 4,
124                     v->info.constant_data_offset + state->range[i].start,
125                     size / 4, v->bo);
126    }
127 }
128 
129 /**
130  * Uploads sub-ranges of UBOs to the hardware's constant buffer (UBO access
131  * outside of these ranges will be done using full UBO accesses in the
132  * shader).
133  */
134 static inline void
ir3_emit_user_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)135 ir3_emit_user_consts(const struct ir3_shader_variant *v,
136                      struct fd_ringbuffer *ring,
137                      struct fd_constbuf_stateobj *constbuf)
138 {
139    const struct ir3_const_state *const_state = ir3_const_state(v);
140    const struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
141 
142    for (unsigned i = 0; i < state->num_enabled; i++) {
143       assert(!state->range[i].ubo.bindless);
144       unsigned ubo = state->range[i].ubo.block;
145       if (!(constbuf->enabled_mask & (1 << ubo)) ||
146           ubo == const_state->consts_ubo.idx) {
147          continue;
148       }
149       struct pipe_constant_buffer *cb = &constbuf->cb[ubo];
150 
151       uint32_t size = state->range[i].end - state->range[i].start;
152       uint32_t offset = cb->buffer_offset + state->range[i].start;
153 
154       /* Pre-a6xx, we might have ranges enabled in the shader that aren't
155        * used in the binning variant.
156        */
157       if (16 * v->constlen <= state->range[i].offset)
158          continue;
159 
160       /* and even if the start of the const buffer is before
161        * first_immediate, the end may not be:
162        */
163       size = MIN2(size, (16 * v->constlen) - state->range[i].offset);
164 
165       if (size == 0)
166          continue;
167 
168       /* things should be aligned to vec4: */
169       assert((state->range[i].offset % 16) == 0);
170       assert((size % 16) == 0);
171       assert((offset % 16) == 0);
172 
173       if (cb->user_buffer) {
174          uint8_t *p = (uint8_t *)cb->user_buffer;
175          p += state->range[i].start;
176          emit_const_user(ring, v, state->range[i].offset / 4, size / 4, (uint32_t *)p);
177       } else {
178          emit_const_prsc(ring, v, state->range[i].offset / 4, offset, size / 4,
179                          cb->buffer);
180       }
181    }
182 }
183 
184 static inline void
ir3_emit_ubos(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)185 ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
186               struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
187 {
188    const struct ir3_const_state *const_state = ir3_const_state(v);
189    uint32_t offset = const_state->offsets.ubo;
190 
191    /* a6xx+ uses UBO state and ldc instead of pointers emitted in
192     * const state and ldg:
193     */
194    if (ctx->screen->gen >= 6)
195       return;
196 
197    if (v->constlen > offset) {
198       uint32_t params = const_state->num_ubos;
199       uint32_t offsets[params];
200       struct fd_bo *bos[params];
201 
202       for (uint32_t i = 0; i < params; i++) {
203          if (i == const_state->consts_ubo.idx) {
204             bos[i] = v->bo;
205             offsets[i] = v->info.constant_data_offset;
206             continue;
207          }
208 
209          struct pipe_constant_buffer *cb = &constbuf->cb[i];
210 
211          /* If we have user pointers (constbuf 0, aka GL uniforms), upload
212           * them to a buffer now, and save it in the constbuf so that we
213           * don't have to reupload until they get changed.
214           */
215          if (cb->user_buffer) {
216             struct pipe_context *pctx = &ctx->base;
217             u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64,
218                           cb->user_buffer, &cb->buffer_offset, &cb->buffer);
219             cb->user_buffer = NULL;
220          }
221 
222          if ((constbuf->enabled_mask & (1 << i)) && cb->buffer) {
223             offsets[i] = cb->buffer_offset;
224             bos[i] = fd_resource(cb->buffer)->bo;
225          } else {
226             offsets[i] = 0;
227             bos[i] = NULL;
228          }
229       }
230 
231       assert(offset * 4 + params <= v->constlen * 4);
232 
233       emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
234    }
235 }
236 
237 static inline void
ir3_emit_image_dims(struct fd_screen * screen,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_shaderimg_stateobj * si)238 ir3_emit_image_dims(struct fd_screen *screen,
239                     const struct ir3_shader_variant *v,
240                     struct fd_ringbuffer *ring,
241                     struct fd_shaderimg_stateobj *si)
242 {
243    const struct ir3_const_state *const_state = ir3_const_state(v);
244    uint32_t offset = const_state->offsets.image_dims;
245    if (v->constlen > offset) {
246       uint32_t dims[align(const_state->image_dims.count, 4)];
247       unsigned mask = const_state->image_dims.mask;
248 
249       while (mask) {
250          struct pipe_image_view *img;
251          struct fd_resource *rsc;
252          unsigned index = u_bit_scan(&mask);
253          unsigned off = const_state->image_dims.off[index];
254 
255          img = &si->si[index];
256          rsc = fd_resource(img->resource);
257 
258          dims[off + 0] = util_format_get_blocksize(img->format);
259          if (img->resource->target != PIPE_BUFFER) {
260             struct fdl_slice *slice = fd_resource_slice(rsc, img->u.tex.level);
261             /* note for 2d/cube/etc images, even if re-interpreted
262              * as a different color format, the pixel size should
263              * be the same, so use original dimensions for y and z
264              * stride:
265              */
266             dims[off + 1] = fd_resource_pitch(rsc, img->u.tex.level);
267             /* see corresponding logic in fd_resource_offset(): */
268             if (rsc->layout.layer_first) {
269                dims[off + 2] = rsc->layout.layer_size;
270             } else {
271                dims[off + 2] = slice->size0;
272             }
273          } else {
274             /* For buffer-backed images, the log2 of the format's
275              * bytes-per-pixel is placed on the 2nd slot. This is useful
276              * when emitting image_size instructions, for which we need
277              * to divide by bpp for image buffers. Since the bpp
278              * can only be power-of-two, the division is implemented
279              * as a SHR, and for that it is handy to have the log2 of
280              * bpp as a constant. (log2 = first-set-bit - 1)
281              */
282             dims[off + 1] = ffs(dims[off + 0]) - 1;
283          }
284       }
285       uint32_t size = MIN2(ARRAY_SIZE(dims), v->constlen * 4 - offset * 4);
286 
287       emit_const_user(ring, v, offset * 4, size, dims);
288    }
289 }
290 
291 static inline void
ir3_emit_immediates(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)292 ir3_emit_immediates(const struct ir3_shader_variant *v,
293                     struct fd_ringbuffer *ring)
294 {
295    const struct ir3_const_state *const_state = ir3_const_state(v);
296    uint32_t base = const_state->offsets.immediate;
297    int size = DIV_ROUND_UP(const_state->immediates_count, 4);
298 
299    /* truncate size to avoid writing constants that shader
300     * does not use:
301     */
302    size = MIN2(size + base, v->constlen) - base;
303 
304    /* convert out of vec4: */
305    base *= 4;
306    size *= 4;
307 
308    if (size > 0)
309       emit_const_user(ring, v, base, size, const_state->immediates);
310 
311    /* NIR constant data has the same lifetime as immediates, so upload it
312     * now, too.
313     */
314    ir3_emit_constant_data(v, ring);
315 }
316 
317 static inline void
ir3_emit_link_map(const struct ir3_shader_variant * producer,const struct ir3_shader_variant * consumer,struct fd_ringbuffer * ring)318 ir3_emit_link_map(const struct ir3_shader_variant *producer,
319                   const struct ir3_shader_variant *consumer,
320                   struct fd_ringbuffer *ring)
321 {
322    const struct ir3_const_state *const_state = ir3_const_state(consumer);
323    uint32_t base = const_state->offsets.primitive_map;
324    int size = DIV_ROUND_UP(consumer->input_size, 4);
325 
326    /* truncate size to avoid writing constants that shader
327     * does not use:
328     */
329    size = MIN2(size + base, consumer->constlen) - base;
330 
331    /* convert out of vec4: */
332    base *= 4;
333    size *= 4;
334 
335    if (size > 0)
336       emit_const_user(ring, consumer, base, size, producer->output_loc);
337 }
338 
339 /* emit stream-out buffers: */
340 static inline void
emit_tfbos(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)341 emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
342            struct fd_ringbuffer *ring)
343 {
344    /* streamout addresses after driver-params: */
345    const struct ir3_const_state *const_state = ir3_const_state(v);
346    uint32_t offset = const_state->offsets.tfbo;
347    if (v->constlen > offset) {
348       struct fd_streamout_stateobj *so = &ctx->streamout;
349       const struct ir3_stream_output_info *info = &v->stream_output;
350       uint32_t params = 4;
351       uint32_t offsets[params];
352       struct fd_bo *bos[params];
353 
354       for (uint32_t i = 0; i < params; i++) {
355          struct pipe_stream_output_target *target = so->targets[i];
356 
357          if (target) {
358             offsets[i] =
359                (so->offsets[i] * info->stride[i] * 4) + target->buffer_offset;
360             bos[i] = fd_resource(target->buffer)->bo;
361          } else {
362             offsets[i] = 0;
363             bos[i] = NULL;
364          }
365       }
366 
367       assert(offset * 4 + params <= v->constlen * 4);
368 
369       emit_const_ptrs(ring, v, offset * 4, params, bos, offsets);
370    }
371 }
372 
373 static inline void
emit_common_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,enum pipe_shader_type t)374 emit_common_consts(const struct ir3_shader_variant *v,
375                    struct fd_ringbuffer *ring, struct fd_context *ctx,
376                    enum pipe_shader_type t) assert_dt
377 {
378    enum fd_dirty_shader_state dirty = ctx->dirty_shader[t];
379 
380    /* When we use CP_SET_DRAW_STATE objects to emit constant state,
381     * if we emit any of it we need to emit all.  This is because
382     * we are using the same state-group-id each time for uniform
383     * state, and if previous update is never evaluated (due to no
384     * visible primitives in the current tile) then the new stateobj
385     * completely replaces the old one.
386     *
387     * Possibly if we split up different parts of the const state to
388     * different state-objects we could avoid this.
389     */
390    if (dirty && is_stateobj(ring))
391       dirty = (enum fd_dirty_shader_state)~0;
392 
393    if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) {
394       struct fd_constbuf_stateobj *constbuf;
395       bool shader_dirty;
396 
397       constbuf = &ctx->constbuf[t];
398       shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG);
399 
400       ring_wfi(ctx->batch, ring);
401 
402       ir3_emit_user_consts(v, ring, constbuf);
403       ir3_emit_ubos(ctx, v, ring, constbuf);
404       if (shader_dirty)
405          ir3_emit_immediates(v, ring);
406    }
407 
408    if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) {
409       struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t];
410       ring_wfi(ctx->batch, ring);
411       ir3_emit_image_dims(ctx->screen, v, ring, si);
412    }
413 }
414 
415 /* emit kernel params */
416 static inline void
emit_kernel_params(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,const struct pipe_grid_info * info)417 emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v,
418                    struct fd_ringbuffer *ring, const struct pipe_grid_info *info)
419    assert_dt
420 {
421    const struct ir3_const_state *const_state = ir3_const_state(v);
422    uint32_t offset = const_state->offsets.kernel_params;
423    if (v->constlen > offset) {
424       ring_wfi(ctx->batch, ring);
425       emit_const_user(ring, v, offset * 4,
426                       align(v->cs.req_input_mem, 4),
427                       (uint32_t *)info->input);
428    }
429 }
430 
431 static inline void
ir3_emit_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draw,const uint32_t draw_id)432 ir3_emit_driver_params(const struct ir3_shader_variant *v,
433                        struct fd_ringbuffer *ring, struct fd_context *ctx,
434                        const struct pipe_draw_info *info,
435                        const struct pipe_draw_indirect_info *indirect,
436                        const struct pipe_draw_start_count_bias *draw,
437                        const uint32_t draw_id) assert_dt
438 {
439    assert(v->need_driver_params);
440 
441    const struct ir3_const_state *const_state = ir3_const_state(v);
442    uint32_t offset = const_state->offsets.driver_param;
443    uint32_t vertex_params[IR3_DP_VS_COUNT] = {
444       [IR3_DP_DRAWID] = draw_id, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */
445       [IR3_DP_VTXID_BASE] = info->index_size ? draw->index_bias : draw->start,
446       [IR3_DP_INSTID_BASE] = info->start_instance,
447       [IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx,
448       [IR3_DP_IS_INDEXED_DRAW] = info->index_size != 0 ? ~0 : 0,
449    };
450    if (v->key.ucp_enables) {
451       struct pipe_clip_state *ucp = &ctx->ucp;
452       unsigned pos = IR3_DP_UCP0_X;
453       for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) {
454          for (unsigned j = 0; j < 4; j++) {
455             vertex_params[pos] = fui(ucp->ucp[i][j]);
456             pos++;
457          }
458       }
459    }
460 
461    /* Only emit as many params as needed, i.e. up to the highest enabled UCP
462     * plane. However a binning pass may drop even some of these, so limit to
463     * program max.
464     */
465    const uint32_t vertex_params_size =
466       MIN2(const_state->num_driver_params, (v->constlen - offset) * 4);
467    assert(vertex_params_size <= IR3_DP_VS_COUNT);
468 
469    bool needs_vtxid_base =
470       ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) !=
471       regid(63, 0);
472 
473    /* for indirect draw, we need to copy VTXID_BASE from
474     * indirect-draw parameters buffer.. which is annoying
475     * and means we can't easily emit these consts in cmd
476     * stream so need to copy them to bo.
477     */
478    if (indirect && needs_vtxid_base) {
479       uint32_t vertex_params_area = align(vertex_params_size, 16);
480       struct pipe_resource *vertex_params_rsc =
481          pipe_buffer_create(&ctx->screen->base, PIPE_BIND_CONSTANT_BUFFER,
482                             PIPE_USAGE_STREAM, vertex_params_area * 4);
483       unsigned src_off = indirect->offset;
484       void *ptr;
485 
486       ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo);
487       memcpy(ptr, vertex_params, vertex_params_size * 4);
488 
489       if (info->index_size) {
490          /* indexed draw, index_bias is 4th field: */
491          src_off += 3 * 4;
492       } else {
493          /* non-indexed draw, start is 3rd field: */
494          src_off += 2 * 4;
495       }
496 
497       /* copy index_bias or start from draw params: */
498       ctx->screen->mem_to_mem(ring, vertex_params_rsc, 0, indirect->buffer,
499                               src_off, 1);
500 
501       emit_const_prsc(ring, v, offset * 4, 0, vertex_params_area,
502                       vertex_params_rsc);
503 
504       pipe_resource_reference(&vertex_params_rsc, NULL);
505    } else {
506       emit_const_user(ring, v, offset * 4, vertex_params_size, vertex_params);
507    }
508 
509    /* if needed, emit stream-out buffer addresses: */
510    if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
511       emit_tfbos(ctx, v, ring);
512    }
513 }
514 
515 
516 static inline void
ir3_emit_hs_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx)517 ir3_emit_hs_driver_params(const struct ir3_shader_variant *v,
518                           struct fd_ringbuffer *ring,
519                           struct fd_context *ctx)
520    assert_dt
521 {
522    assert(v->need_driver_params);
523 
524    const struct ir3_const_state *const_state = ir3_const_state(v);
525    uint32_t offset = const_state->offsets.driver_param;
526    uint32_t hs_params[IR3_DP_HS_COUNT] = {
527       [IR3_DP_HS_DEFAULT_OUTER_LEVEL_X] = fui(ctx->default_outer_level[0]),
528       [IR3_DP_HS_DEFAULT_OUTER_LEVEL_Y] = fui(ctx->default_outer_level[1]),
529       [IR3_DP_HS_DEFAULT_OUTER_LEVEL_Z] = fui(ctx->default_outer_level[2]),
530       [IR3_DP_HS_DEFAULT_OUTER_LEVEL_W] = fui(ctx->default_outer_level[3]),
531       [IR3_DP_HS_DEFAULT_INNER_LEVEL_X] = fui(ctx->default_inner_level[0]),
532       [IR3_DP_HS_DEFAULT_INNER_LEVEL_Y] = fui(ctx->default_inner_level[1]),
533    };
534 
535    const uint32_t hs_params_size =
536       MIN2(const_state->num_driver_params, (v->constlen - offset) * 4);
537    assert(hs_params_size <= IR3_DP_HS_COUNT);
538 
539    emit_const_user(ring, v, offset * 4, hs_params_size, hs_params);
540 }
541 
542 
543 static inline void
ir3_emit_vs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draw)544 ir3_emit_vs_consts(const struct ir3_shader_variant *v,
545                    struct fd_ringbuffer *ring, struct fd_context *ctx,
546                    const struct pipe_draw_info *info,
547                    const struct pipe_draw_indirect_info *indirect,
548                    const struct pipe_draw_start_count_bias *draw) assert_dt
549 {
550    assert(v->type == MESA_SHADER_VERTEX);
551 
552    emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX);
553 
554    /* emit driver params every time: */
555    if (info && v->need_driver_params) {
556       ring_wfi(ctx->batch, ring);
557       ir3_emit_driver_params(v, ring, ctx, info, indirect, draw, 0);
558    }
559 }
560 
561 static inline void
ir3_emit_fs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx)562 ir3_emit_fs_consts(const struct ir3_shader_variant *v,
563                    struct fd_ringbuffer *ring, struct fd_context *ctx) assert_dt
564 {
565    assert(v->type == MESA_SHADER_FRAGMENT);
566 
567    emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT);
568 }
569 
570 static inline void
ir3_emit_cs_driver_params(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_grid_info * info)571 ir3_emit_cs_driver_params(const struct ir3_shader_variant *v,
572                           struct fd_ringbuffer *ring, struct fd_context *ctx,
573                           const struct pipe_grid_info *info)
574    assert_dt
575 {
576    emit_kernel_params(ctx, v, ring, info);
577 
578    /* a3xx/a4xx can inject these directly */
579    if (ctx->screen->gen <= 4)
580       return;
581 
582    /* emit compute-shader driver-params: */
583    const struct ir3_const_state *const_state = ir3_const_state(v);
584    uint32_t offset = const_state->offsets.driver_param;
585    if (v->constlen > offset) {
586       ring_wfi(ctx->batch, ring);
587 
588       if (info->indirect) {
589          struct pipe_resource *indirect = NULL;
590          unsigned indirect_offset;
591 
592          /* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs
593           * to be aligned more strongly than 4 bytes.  So in this case
594           * we need a temporary buffer to copy NumWorkGroups.xyz to.
595           *
596           * TODO if previous compute job is writing to info->indirect,
597           * we might need a WFI.. but since we currently flush for each
598           * compute job, we are probably ok for now.
599           */
600          if (info->indirect_offset & 0xf) {
601             indirect = pipe_buffer_create(&ctx->screen->base,
602                                           PIPE_BIND_COMMAND_ARGS_BUFFER,
603                                           PIPE_USAGE_STREAM, 0x1000);
604             indirect_offset = 0;
605 
606             ctx->screen->mem_to_mem(ring, indirect, 0, info->indirect,
607                                     info->indirect_offset, 3);
608          } else {
609             pipe_resource_reference(&indirect, info->indirect);
610             indirect_offset = info->indirect_offset;
611          }
612 
613          emit_const_prsc(ring, v, offset * 4, indirect_offset, 16, indirect);
614 
615          pipe_resource_reference(&indirect, NULL);
616       } else {
617          // TODO some of these are not part of the indirect state.. so we
618          // need to emit some of this directly in both cases.
619          uint32_t compute_params[IR3_DP_CS_COUNT] = {
620             [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
621             [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
622             [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2],
623             [IR3_DP_WORK_DIM]          = info->work_dim,
624             [IR3_DP_BASE_GROUP_X]      = info->grid_base[0],
625             [IR3_DP_BASE_GROUP_Y]      = info->grid_base[1],
626             [IR3_DP_BASE_GROUP_Z]      = info->grid_base[2],
627             [IR3_DP_CS_SUBGROUP_SIZE]  = v->info.subgroup_size,
628             [IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0],
629             [IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1],
630             [IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2],
631             [IR3_DP_SUBGROUP_ID_SHIFT] = util_logbase2(v->info.subgroup_size),
632             [IR3_DP_WORKGROUP_ID_X]    = 0,  // TODO
633             [IR3_DP_WORKGROUP_ID_Y]    = 0,  // TODO
634             [IR3_DP_WORKGROUP_ID_Z]    = 0,  // TODO
635          };
636          uint32_t size =
637             MIN2(const_state->num_driver_params, v->constlen * 4 - offset * 4);
638 
639          emit_const_user(ring, v, offset * 4, size, compute_params);
640       }
641    }
642 }
643 
644 /* emit compute-shader consts: */
645 static inline void
ir3_emit_cs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_grid_info * info)646 ir3_emit_cs_consts(const struct ir3_shader_variant *v,
647                    struct fd_ringbuffer *ring, struct fd_context *ctx,
648                    const struct pipe_grid_info *info) assert_dt
649 {
650    assert(gl_shader_stage_is_compute(v->type));
651 
652    emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
653 
654    ir3_emit_cs_driver_params(v, ring, ctx, info);
655 }
656