xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2013 Nouveau Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Christoph Bumiller, Samuel Pitoiset
23  */
24 
25 #include "nvc0/nvc0_context.h"
26 
27 #include "nvc0/nvc0_compute.xml.h"
28 
29 int
nvc0_screen_compute_setup(struct nvc0_screen * screen,struct nouveau_pushbuf * push)30 nvc0_screen_compute_setup(struct nvc0_screen *screen,
31                           struct nouveau_pushbuf *push)
32 {
33    int i;
34 
35    BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
36    PUSH_DATA (push, screen->compute->oclass);
37 
38    /* hardware limit */
39    BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1);
40    PUSH_DATA (push, screen->mp_count);
41    BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1);
42    PUSH_DATA (push, 0xf);
43 
44    BEGIN_NVC0(push, SUBC_CP(0x02a0), 1);
45    PUSH_DATA (push, 0x8000);
46 
47    /* global memory setup */
48    BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
49    PUSH_DATA (push, 0);
50    BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100);
51    for (i = 0; i <= 0xff; i++)
52       PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
53    BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
54    PUSH_DATA (push, 1);
55 
56    /* local memory and cstack setup */
57    BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2);
58    PUSH_DATAh(push, screen->tls->offset);
59    PUSH_DATA (push, screen->tls->offset);
60    BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2);
61    PUSH_DATAh(push, screen->tls->size);
62    PUSH_DATA (push, screen->tls->size);
63    BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1);
64    PUSH_DATA (push, 0);
65    BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1);
66    PUSH_DATA (push, 0xff << 24);
67 
68    /* shared memory setup */
69    BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1);
70    PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
71    BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1);
72    PUSH_DATA (push, 0xfe << 24);
73    BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1);
74    PUSH_DATA (push, 0);
75 
76    /* code segment setup */
77    BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
78    PUSH_DATAh(push, screen->text->offset);
79    PUSH_DATA (push, screen->text->offset);
80 
81    /* textures */
82    BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3);
83    PUSH_DATAh(push, screen->txc->offset);
84    PUSH_DATA (push, screen->txc->offset);
85    PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
86 
87    /* samplers */
88    BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3);
89    PUSH_DATAh(push, screen->txc->offset + 65536);
90    PUSH_DATA (push, screen->txc->offset + 65536);
91    PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
92 
93    /* MS sample coordinate offsets */
94    BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
95    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
96    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
97    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
98    BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 2 * 8);
99    PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);
100    PUSH_DATA (push, 0); /* 0 */
101    PUSH_DATA (push, 0);
102    PUSH_DATA (push, 1); /* 1 */
103    PUSH_DATA (push, 0);
104    PUSH_DATA (push, 0); /* 2 */
105    PUSH_DATA (push, 1);
106    PUSH_DATA (push, 1); /* 3 */
107    PUSH_DATA (push, 1);
108    PUSH_DATA (push, 2); /* 4 */
109    PUSH_DATA (push, 0);
110    PUSH_DATA (push, 3); /* 5 */
111    PUSH_DATA (push, 0);
112    PUSH_DATA (push, 2); /* 6 */
113    PUSH_DATA (push, 1);
114    PUSH_DATA (push, 3); /* 7 */
115    PUSH_DATA (push, 1);
116 
117    return 0;
118 }
119 
120 static void
nvc0_compute_validate_samplers(struct nvc0_context * nvc0)121 nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
122 {
123    bool need_flush = nvc0_validate_tsc(nvc0, 5);
124    if (need_flush) {
125       BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1);
126       PUSH_DATA (nvc0->base.pushbuf, 0);
127    }
128 
129    /* Invalidate all 3D samplers because they are aliased. */
130    for (int s = 0; s < 5; s++)
131       nvc0->samplers_dirty[s] = ~0;
132    nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
133 }
134 
135 static void
nvc0_compute_validate_textures(struct nvc0_context * nvc0)136 nvc0_compute_validate_textures(struct nvc0_context *nvc0)
137 {
138    bool need_flush = nvc0_validate_tic(nvc0, 5);
139    if (need_flush) {
140       BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1);
141       PUSH_DATA (nvc0->base.pushbuf, 0);
142    }
143 
144    /* Invalidate all 3D textures because they are aliased. */
145    for (int s = 0; s < 5; s++) {
146       for (int i = 0; i < nvc0->num_textures[s]; i++)
147          nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
148       nvc0->textures_dirty[s] = ~0;
149    }
150    nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
151 }
152 
153 static inline void
nvc0_compute_invalidate_constbufs(struct nvc0_context * nvc0)154 nvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0)
155 {
156    int s;
157 
158    /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
159    for (s = 0; s < 5; s++) {
160       nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
161       nvc0->state.uniform_buffer_bound[s] = false;
162    }
163    nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
164 }
165 
166 static void
nvc0_compute_validate_constbufs(struct nvc0_context * nvc0)167 nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
168 {
169    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
170    const int s = 5;
171 
172    while (nvc0->constbuf_dirty[s]) {
173       int i = ffs(nvc0->constbuf_dirty[s]) - 1;
174       nvc0->constbuf_dirty[s] &= ~(1 << i);
175 
176       if (nvc0->constbuf[s][i].user) {
177          struct nouveau_bo *bo = nvc0->screen->uniform_bo;
178          const unsigned base = NVC0_CB_USR_INFO(s);
179          const unsigned size = nvc0->constbuf[s][0].size;
180          assert(i == 0); /* we really only want OpenGL uniforms here */
181          assert(nvc0->constbuf[s][0].u.data);
182 
183          if (!nvc0->state.uniform_buffer_bound[s]) {
184             nvc0->state.uniform_buffer_bound[s] = true;
185 
186             BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
187             PUSH_DATA (push, NVC0_MAX_CONSTBUF_SIZE);
188             PUSH_DATAh(push, bo->offset + base);
189             PUSH_DATA (push, bo->offset + base);
190             BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
191             PUSH_DATA (push, (0 << 8) | 1);
192          }
193          nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
194                          base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4,
195                          nvc0->constbuf[s][0].u.data);
196       } else {
197          struct nv04_resource *res =
198             nv04_resource(nvc0->constbuf[s][i].u.buf);
199          if (res) {
200             BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
201             PUSH_DATA (push, nvc0->constbuf[s][i].size);
202             PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
203             PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
204             BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
205             PUSH_DATA (push, (i << 8) | 1);
206 
207             BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
208 
209             res->cb_bindings[s] |= 1 << i;
210          } else {
211             BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
212             PUSH_DATA (push, (i << 8) | 0);
213          }
214          if (i == 0)
215             nvc0->state.uniform_buffer_bound[s] = false;
216       }
217    }
218 
219    nvc0_compute_invalidate_constbufs(nvc0);
220 
221    BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
222    PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
223 }
224 
225 static void
nvc0_compute_validate_driverconst(struct nvc0_context * nvc0)226 nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
227 {
228    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
229    struct nvc0_screen *screen = nvc0->screen;
230 
231    BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
232    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
233    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
234    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
235    BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
236    PUSH_DATA (push, (15 << 8) | 1);
237 
238    nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST;
239 }
240 
241 static void
nvc0_compute_validate_buffers(struct nvc0_context * nvc0)242 nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
243 {
244    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
245    struct nvc0_screen *screen = nvc0->screen;
246    const int s = 5;
247    int i;
248 
249    BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
250    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
251    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
252    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
253    BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
254    PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
255 
256    for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
257       if (nvc0->buffers[s][i].buffer) {
258          struct nv04_resource *res =
259             nv04_resource(nvc0->buffers[s][i].buffer);
260          PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
261          PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
262          PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
263          PUSH_DATA (push, 0);
264          BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
265          util_range_add(&res->base, &res->valid_buffer_range,
266                         nvc0->buffers[s][i].buffer_offset,
267                         nvc0->buffers[s][i].buffer_offset +
268                         nvc0->buffers[s][i].buffer_size);
269       } else {
270          PUSH_DATA (push, 0);
271          PUSH_DATA (push, 0);
272          PUSH_DATA (push, 0);
273          PUSH_DATA (push, 0);
274       }
275    }
276 }
277 
278 void
nvc0_compute_validate_globals(struct nvc0_context * nvc0)279 nvc0_compute_validate_globals(struct nvc0_context *nvc0)
280 {
281    unsigned i;
282 
283    for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
284         ++i) {
285       struct pipe_resource *res = *util_dynarray_element(
286          &nvc0->global_residents, struct pipe_resource *, i);
287       if (res)
288          nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL,
289                            nv04_resource(res), NOUVEAU_BO_RDWR);
290    }
291 }
292 
293 static inline void
nvc0_compute_invalidate_surfaces(struct nvc0_context * nvc0,const int s)294 nvc0_compute_invalidate_surfaces(struct nvc0_context *nvc0, const int s)
295 {
296    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
297    int i;
298 
299    for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
300       if (s == 5)
301          BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
302       else
303          BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
304       PUSH_DATA(push, 0);
305       PUSH_DATA(push, 0);
306       PUSH_DATA(push, 0);
307       PUSH_DATA(push, 0);
308       PUSH_DATA(push, 0x14000);
309       PUSH_DATA(push, 0);
310    }
311 }
312 
313 static void
nvc0_compute_validate_surfaces(struct nvc0_context * nvc0)314 nvc0_compute_validate_surfaces(struct nvc0_context *nvc0)
315 {
316    /* TODO: Invalidating both 3D and CP surfaces before validating surfaces for
317     * compute is probably not really necessary, but we didn't find any better
318     * solutions for now. This fixes some invalidation issues when compute and
319     * fragment shaders are used inside the same context. Anyway, we definitely
320     * have invalidation issues between 3D and CP for other resources like SSBO
321     * and atomic counters. */
322    nvc0_compute_invalidate_surfaces(nvc0, 4);
323    nvc0_compute_invalidate_surfaces(nvc0, 5);
324 
325    nvc0_validate_suf(nvc0, 5);
326 
327    /* Invalidate all FRAGMENT images because they are aliased with COMPUTE. */
328    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
329    nvc0->dirty_3d |= NVC0_NEW_3D_SURFACES;
330    nvc0->images_dirty[4] |= nvc0->images_valid[4];
331 }
332 
333 static struct nvc0_state_validate
334 validate_list_cp[] = {
335    { nvc0_compprog_validate,              NVC0_NEW_CP_PROGRAM     },
336    { nvc0_compute_validate_constbufs,     NVC0_NEW_CP_CONSTBUF    },
337    { nvc0_compute_validate_driverconst,   NVC0_NEW_CP_DRIVERCONST },
338    { nvc0_compute_validate_buffers,       NVC0_NEW_CP_BUFFERS     },
339    { nvc0_compute_validate_textures,      NVC0_NEW_CP_TEXTURES    },
340    { nvc0_compute_validate_samplers,      NVC0_NEW_CP_SAMPLERS    },
341    { nvc0_compute_validate_globals,       NVC0_NEW_CP_GLOBALS     },
342    { nvc0_compute_validate_surfaces,      NVC0_NEW_CP_SURFACES    },
343 };
344 
345 static bool
nvc0_state_validate_cp(struct nvc0_context * nvc0,uint32_t mask)346 nvc0_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask)
347 {
348    bool ret;
349 
350    ret = nvc0_state_validate(nvc0, mask, validate_list_cp,
351                              ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp,
352                              nvc0->bufctx_cp);
353 
354    if (unlikely(nvc0->state.flushed))
355       nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
356    return ret;
357 }
358 
359 static void
nvc0_compute_upload_input(struct nvc0_context * nvc0,const struct pipe_grid_info * info)360 nvc0_compute_upload_input(struct nvc0_context *nvc0,
361                           const struct pipe_grid_info *info)
362 {
363    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
364    struct nvc0_screen *screen = nvc0->screen;
365    struct nvc0_program *cp = nvc0->compprog;
366 
367    if (cp->parm_size) {
368       struct nouveau_bo *bo = screen->uniform_bo;
369       const unsigned base = NVC0_CB_USR_INFO(5);
370 
371       BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
372       PUSH_DATA (push, align(cp->parm_size, 0x100));
373       PUSH_DATAh(push, bo->offset + base);
374       PUSH_DATA (push, bo->offset + base);
375       BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
376       PUSH_DATA (push, (0 << 8) | 1);
377       /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
378       BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
379       PUSH_DATA (push, 0);
380       PUSH_DATAp(push, info->input, cp->parm_size / 4);
381 
382       nvc0_compute_invalidate_constbufs(nvc0);
383    }
384 
385    BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
386    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
387    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
388    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
389 
390    BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1);
391    /* (7) as we only upload work_dim on nvc0, the rest uses special regs */
392    PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7));
393    PUSH_DATA (push, info->work_dim);
394 
395    BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
396    PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
397 }
398 
399 void
nvc0_launch_grid(struct pipe_context * pipe,const struct pipe_grid_info * info)400 nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
401 {
402    struct nvc0_context *nvc0 = nvc0_context(pipe);
403    struct nvc0_screen *screen = nvc0->screen;
404    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
405    struct nvc0_program *cp = nvc0->compprog;
406    int ret;
407 
408    simple_mtx_lock(&screen->state_lock);
409    ret = !nvc0_state_validate_cp(nvc0, ~0);
410    if (ret) {
411       NOUVEAU_ERR("Failed to launch grid !\n");
412       goto out;
413    }
414 
415    nvc0_compute_upload_input(nvc0, info);
416 
417    BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
418    PUSH_DATA (push, cp->code_base);
419 
420    BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3);
421    PUSH_DATA (push, cp->hdr[1] & 0xfffff0);
422    PUSH_DATA (push, 0);
423    PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
424 
425    BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3);
426    PUSH_DATA (push, align(cp->cp.smem_size + info->variable_shared_mem, 0x100));
427    PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
428    PUSH_DATA (push, cp->num_barriers);
429    BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1);
430    PUSH_DATA (push, cp->num_gprs);
431 
432    /* launch preliminary setup */
433    BEGIN_NVC0(push, NVC0_CP(GRIDID), 1);
434    PUSH_DATA (push, 0x1);
435    BEGIN_NVC0(push, SUBC_CP(0x036c), 1);
436    PUSH_DATA (push, 0);
437    BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
438    PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
439 
440    /* block setup */
441    BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2);
442    PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
443    PUSH_DATA (push, info->block[2]);
444 
445    PUSH_SPACE_EX(push, 32, 2, 1);
446    PUSH_REF1(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD);
447 
448    if (unlikely(info->indirect)) {
449       struct nv04_resource *res = nv04_resource(info->indirect);
450       uint32_t offset = res->offset + info->indirect_offset;
451       unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT;
452 
453       PUSH_REF1(push, res->bo, NOUVEAU_BO_RD | res->domain);
454       PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
455       nouveau_pushbuf_data(push, res->bo, offset,
456                            NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
457    } else {
458       /* grid setup */
459       BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2);
460       PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
461       PUSH_DATA (push, info->grid[2]);
462 
463       /* kernel launching */
464       BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1);
465       PUSH_DATA (push, 0);
466       BEGIN_NVC0(push, SUBC_CP(0x0a08), 1);
467       PUSH_DATA (push, 0);
468       BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1);
469       PUSH_DATA (push, 0x1000);
470       BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1);
471       PUSH_DATA (push, 0);
472       BEGIN_NVC0(push, SUBC_CP(0x0360), 1);
473       PUSH_DATA (push, 0x1);
474    }
475 
476    /* TODO: Not sure if this is really necessary. */
477    nvc0_compute_invalidate_surfaces(nvc0, 5);
478    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
479    nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
480    nvc0->images_dirty[5] |= nvc0->images_valid[5];
481 
482    nvc0_update_compute_invocations_counter(nvc0, info);
483 
484 out:
485    PUSH_KICK(push);
486    simple_mtx_unlock(&screen->state_lock);
487 }
488 
489 static void
nvc0_compute_update_indirect_invocations(struct nvc0_context * nvc0,const struct pipe_grid_info * info)490 nvc0_compute_update_indirect_invocations(struct nvc0_context *nvc0,
491                                          const struct pipe_grid_info *info) {
492    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
493    struct nv04_resource *res = nv04_resource(info->indirect);
494    uint32_t offset = res->offset + info->indirect_offset;
495 
496    PUSH_SPACE_EX(push, 16, 0, 8);
497    PUSH_REF1(push, res->bo, NOUVEAU_BO_RD | res->domain);
498    BEGIN_1IC0(push, NVC0_3D(MACRO_COMPUTE_COUNTER), 7);
499    PUSH_DATA(push, 6);
500    PUSH_DATA(push, info->block[0]);
501    PUSH_DATA(push, info->block[1]);
502    PUSH_DATA(push, info->block[2]);
503    nouveau_pushbuf_data(push, res->bo, offset,
504                         NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
505 }
506 
507 void
nvc0_update_compute_invocations_counter(struct nvc0_context * nvc0,const struct pipe_grid_info * info)508 nvc0_update_compute_invocations_counter(struct nvc0_context *nvc0,
509                                         const struct pipe_grid_info *info) {
510    if (unlikely(info->indirect)) {
511       nvc0_compute_update_indirect_invocations(nvc0, info);
512    } else {
513       uint64_t invocations = info->block[0] * info->block[1] * info->block[2];
514       invocations *= info->grid[0] * info->grid[1] * info->grid[2];
515       nvc0->compute_invocations += invocations;
516    }
517 }
518