1 /*
2 * Copyright 2013 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christoph Bumiller, Samuel Pitoiset
23 */
24
25 #include "nvc0/nvc0_context.h"
26
27 #include "nvc0/nvc0_compute.xml.h"
28
29 int
nvc0_screen_compute_setup(struct nvc0_screen * screen,struct nouveau_pushbuf * push)30 nvc0_screen_compute_setup(struct nvc0_screen *screen,
31 struct nouveau_pushbuf *push)
32 {
33 int i;
34
35 BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
36 PUSH_DATA (push, screen->compute->oclass);
37
38 /* hardware limit */
39 BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1);
40 PUSH_DATA (push, screen->mp_count);
41 BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1);
42 PUSH_DATA (push, 0xf);
43
44 BEGIN_NVC0(push, SUBC_CP(0x02a0), 1);
45 PUSH_DATA (push, 0x8000);
46
47 /* global memory setup */
48 BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
49 PUSH_DATA (push, 0);
50 BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100);
51 for (i = 0; i <= 0xff; i++)
52 PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
53 BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
54 PUSH_DATA (push, 1);
55
56 /* local memory and cstack setup */
57 BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2);
58 PUSH_DATAh(push, screen->tls->offset);
59 PUSH_DATA (push, screen->tls->offset);
60 BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2);
61 PUSH_DATAh(push, screen->tls->size);
62 PUSH_DATA (push, screen->tls->size);
63 BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1);
64 PUSH_DATA (push, 0);
65 BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1);
66 PUSH_DATA (push, 0xff << 24);
67
68 /* shared memory setup */
69 BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1);
70 PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
71 BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1);
72 PUSH_DATA (push, 0xfe << 24);
73 BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1);
74 PUSH_DATA (push, 0);
75
76 /* code segment setup */
77 BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
78 PUSH_DATAh(push, screen->text->offset);
79 PUSH_DATA (push, screen->text->offset);
80
81 /* textures */
82 BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3);
83 PUSH_DATAh(push, screen->txc->offset);
84 PUSH_DATA (push, screen->txc->offset);
85 PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
86
87 /* samplers */
88 BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3);
89 PUSH_DATAh(push, screen->txc->offset + 65536);
90 PUSH_DATA (push, screen->txc->offset + 65536);
91 PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
92
93 /* MS sample coordinate offsets */
94 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
95 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
96 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
97 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
98 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 2 * 8);
99 PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);
100 PUSH_DATA (push, 0); /* 0 */
101 PUSH_DATA (push, 0);
102 PUSH_DATA (push, 1); /* 1 */
103 PUSH_DATA (push, 0);
104 PUSH_DATA (push, 0); /* 2 */
105 PUSH_DATA (push, 1);
106 PUSH_DATA (push, 1); /* 3 */
107 PUSH_DATA (push, 1);
108 PUSH_DATA (push, 2); /* 4 */
109 PUSH_DATA (push, 0);
110 PUSH_DATA (push, 3); /* 5 */
111 PUSH_DATA (push, 0);
112 PUSH_DATA (push, 2); /* 6 */
113 PUSH_DATA (push, 1);
114 PUSH_DATA (push, 3); /* 7 */
115 PUSH_DATA (push, 1);
116
117 return 0;
118 }
119
120 static void
nvc0_compute_validate_samplers(struct nvc0_context * nvc0)121 nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
122 {
123 bool need_flush = nvc0_validate_tsc(nvc0, 5);
124 if (need_flush) {
125 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1);
126 PUSH_DATA (nvc0->base.pushbuf, 0);
127 }
128
129 /* Invalidate all 3D samplers because they are aliased. */
130 for (int s = 0; s < 5; s++)
131 nvc0->samplers_dirty[s] = ~0;
132 nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
133 }
134
135 static void
nvc0_compute_validate_textures(struct nvc0_context * nvc0)136 nvc0_compute_validate_textures(struct nvc0_context *nvc0)
137 {
138 bool need_flush = nvc0_validate_tic(nvc0, 5);
139 if (need_flush) {
140 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1);
141 PUSH_DATA (nvc0->base.pushbuf, 0);
142 }
143
144 /* Invalidate all 3D textures because they are aliased. */
145 for (int s = 0; s < 5; s++) {
146 for (int i = 0; i < nvc0->num_textures[s]; i++)
147 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
148 nvc0->textures_dirty[s] = ~0;
149 }
150 nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
151 }
152
153 static inline void
nvc0_compute_invalidate_constbufs(struct nvc0_context * nvc0)154 nvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0)
155 {
156 int s;
157
158 /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
159 for (s = 0; s < 5; s++) {
160 nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s];
161 nvc0->state.uniform_buffer_bound[s] = false;
162 }
163 nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF;
164 }
165
166 static void
nvc0_compute_validate_constbufs(struct nvc0_context * nvc0)167 nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
168 {
169 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
170 const int s = 5;
171
172 while (nvc0->constbuf_dirty[s]) {
173 int i = ffs(nvc0->constbuf_dirty[s]) - 1;
174 nvc0->constbuf_dirty[s] &= ~(1 << i);
175
176 if (nvc0->constbuf[s][i].user) {
177 struct nouveau_bo *bo = nvc0->screen->uniform_bo;
178 const unsigned base = NVC0_CB_USR_INFO(s);
179 const unsigned size = nvc0->constbuf[s][0].size;
180 assert(i == 0); /* we really only want OpenGL uniforms here */
181 assert(nvc0->constbuf[s][0].u.data);
182
183 if (!nvc0->state.uniform_buffer_bound[s]) {
184 nvc0->state.uniform_buffer_bound[s] = true;
185
186 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
187 PUSH_DATA (push, NVC0_MAX_CONSTBUF_SIZE);
188 PUSH_DATAh(push, bo->offset + base);
189 PUSH_DATA (push, bo->offset + base);
190 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
191 PUSH_DATA (push, (0 << 8) | 1);
192 }
193 nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
194 base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4,
195 nvc0->constbuf[s][0].u.data);
196 } else {
197 struct nv04_resource *res =
198 nv04_resource(nvc0->constbuf[s][i].u.buf);
199 if (res) {
200 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
201 PUSH_DATA (push, nvc0->constbuf[s][i].size);
202 PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
203 PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
204 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
205 PUSH_DATA (push, (i << 8) | 1);
206
207 BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
208
209 res->cb_bindings[s] |= 1 << i;
210 } else {
211 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
212 PUSH_DATA (push, (i << 8) | 0);
213 }
214 if (i == 0)
215 nvc0->state.uniform_buffer_bound[s] = false;
216 }
217 }
218
219 nvc0_compute_invalidate_constbufs(nvc0);
220
221 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
222 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
223 }
224
225 static void
nvc0_compute_validate_driverconst(struct nvc0_context * nvc0)226 nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
227 {
228 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
229 struct nvc0_screen *screen = nvc0->screen;
230
231 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
232 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
233 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
234 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
235 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
236 PUSH_DATA (push, (15 << 8) | 1);
237
238 nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST;
239 }
240
241 static void
nvc0_compute_validate_buffers(struct nvc0_context * nvc0)242 nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
243 {
244 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
245 struct nvc0_screen *screen = nvc0->screen;
246 const int s = 5;
247 int i;
248
249 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
250 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
251 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
252 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
253 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
254 PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
255
256 for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
257 if (nvc0->buffers[s][i].buffer) {
258 struct nv04_resource *res =
259 nv04_resource(nvc0->buffers[s][i].buffer);
260 PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
261 PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
262 PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
263 PUSH_DATA (push, 0);
264 BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
265 util_range_add(&res->base, &res->valid_buffer_range,
266 nvc0->buffers[s][i].buffer_offset,
267 nvc0->buffers[s][i].buffer_offset +
268 nvc0->buffers[s][i].buffer_size);
269 } else {
270 PUSH_DATA (push, 0);
271 PUSH_DATA (push, 0);
272 PUSH_DATA (push, 0);
273 PUSH_DATA (push, 0);
274 }
275 }
276 }
277
278 void
nvc0_compute_validate_globals(struct nvc0_context * nvc0)279 nvc0_compute_validate_globals(struct nvc0_context *nvc0)
280 {
281 unsigned i;
282
283 for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
284 ++i) {
285 struct pipe_resource *res = *util_dynarray_element(
286 &nvc0->global_residents, struct pipe_resource *, i);
287 if (res)
288 nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL,
289 nv04_resource(res), NOUVEAU_BO_RDWR);
290 }
291 }
292
293 static inline void
nvc0_compute_invalidate_surfaces(struct nvc0_context * nvc0,const int s)294 nvc0_compute_invalidate_surfaces(struct nvc0_context *nvc0, const int s)
295 {
296 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
297 int i;
298
299 for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
300 if (s == 5)
301 BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
302 else
303 BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
304 PUSH_DATA(push, 0);
305 PUSH_DATA(push, 0);
306 PUSH_DATA(push, 0);
307 PUSH_DATA(push, 0);
308 PUSH_DATA(push, 0x14000);
309 PUSH_DATA(push, 0);
310 }
311 }
312
313 static void
nvc0_compute_validate_surfaces(struct nvc0_context * nvc0)314 nvc0_compute_validate_surfaces(struct nvc0_context *nvc0)
315 {
316 /* TODO: Invalidating both 3D and CP surfaces before validating surfaces for
317 * compute is probably not really necessary, but we didn't find any better
318 * solutions for now. This fixes some invalidation issues when compute and
319 * fragment shaders are used inside the same context. Anyway, we definitely
320 * have invalidation issues between 3D and CP for other resources like SSBO
321 * and atomic counters. */
322 nvc0_compute_invalidate_surfaces(nvc0, 4);
323 nvc0_compute_invalidate_surfaces(nvc0, 5);
324
325 nvc0_validate_suf(nvc0, 5);
326
327 /* Invalidate all FRAGMENT images because they are aliased with COMPUTE. */
328 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
329 nvc0->dirty_3d |= NVC0_NEW_3D_SURFACES;
330 nvc0->images_dirty[4] |= nvc0->images_valid[4];
331 }
332
333 static struct nvc0_state_validate
334 validate_list_cp[] = {
335 { nvc0_compprog_validate, NVC0_NEW_CP_PROGRAM },
336 { nvc0_compute_validate_constbufs, NVC0_NEW_CP_CONSTBUF },
337 { nvc0_compute_validate_driverconst, NVC0_NEW_CP_DRIVERCONST },
338 { nvc0_compute_validate_buffers, NVC0_NEW_CP_BUFFERS },
339 { nvc0_compute_validate_textures, NVC0_NEW_CP_TEXTURES },
340 { nvc0_compute_validate_samplers, NVC0_NEW_CP_SAMPLERS },
341 { nvc0_compute_validate_globals, NVC0_NEW_CP_GLOBALS },
342 { nvc0_compute_validate_surfaces, NVC0_NEW_CP_SURFACES },
343 };
344
345 static bool
nvc0_state_validate_cp(struct nvc0_context * nvc0,uint32_t mask)346 nvc0_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask)
347 {
348 bool ret;
349
350 ret = nvc0_state_validate(nvc0, mask, validate_list_cp,
351 ARRAY_SIZE(validate_list_cp), &nvc0->dirty_cp,
352 nvc0->bufctx_cp);
353
354 if (unlikely(nvc0->state.flushed))
355 nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
356 return ret;
357 }
358
359 static void
nvc0_compute_upload_input(struct nvc0_context * nvc0,const struct pipe_grid_info * info)360 nvc0_compute_upload_input(struct nvc0_context *nvc0,
361 const struct pipe_grid_info *info)
362 {
363 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
364 struct nvc0_screen *screen = nvc0->screen;
365 struct nvc0_program *cp = nvc0->compprog;
366
367 if (cp->parm_size) {
368 struct nouveau_bo *bo = screen->uniform_bo;
369 const unsigned base = NVC0_CB_USR_INFO(5);
370
371 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
372 PUSH_DATA (push, align(cp->parm_size, 0x100));
373 PUSH_DATAh(push, bo->offset + base);
374 PUSH_DATA (push, bo->offset + base);
375 BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
376 PUSH_DATA (push, (0 << 8) | 1);
377 /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
378 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
379 PUSH_DATA (push, 0);
380 PUSH_DATAp(push, info->input, cp->parm_size / 4);
381
382 nvc0_compute_invalidate_constbufs(nvc0);
383 }
384
385 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
386 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
387 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
388 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
389
390 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1);
391 /* (7) as we only upload work_dim on nvc0, the rest uses special regs */
392 PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7));
393 PUSH_DATA (push, info->work_dim);
394
395 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
396 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
397 }
398
399 void
nvc0_launch_grid(struct pipe_context * pipe,const struct pipe_grid_info * info)400 nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
401 {
402 struct nvc0_context *nvc0 = nvc0_context(pipe);
403 struct nvc0_screen *screen = nvc0->screen;
404 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
405 struct nvc0_program *cp = nvc0->compprog;
406 int ret;
407
408 simple_mtx_lock(&screen->state_lock);
409 ret = !nvc0_state_validate_cp(nvc0, ~0);
410 if (ret) {
411 NOUVEAU_ERR("Failed to launch grid !\n");
412 goto out;
413 }
414
415 nvc0_compute_upload_input(nvc0, info);
416
417 BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
418 PUSH_DATA (push, cp->code_base);
419
420 BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3);
421 PUSH_DATA (push, cp->hdr[1] & 0xfffff0);
422 PUSH_DATA (push, 0);
423 PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
424
425 BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3);
426 PUSH_DATA (push, align(cp->cp.smem_size + info->variable_shared_mem, 0x100));
427 PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
428 PUSH_DATA (push, cp->num_barriers);
429 BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1);
430 PUSH_DATA (push, cp->num_gprs);
431
432 /* launch preliminary setup */
433 BEGIN_NVC0(push, NVC0_CP(GRIDID), 1);
434 PUSH_DATA (push, 0x1);
435 BEGIN_NVC0(push, SUBC_CP(0x036c), 1);
436 PUSH_DATA (push, 0);
437 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
438 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
439
440 /* block setup */
441 BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2);
442 PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
443 PUSH_DATA (push, info->block[2]);
444
445 PUSH_SPACE_EX(push, 32, 2, 1);
446 PUSH_REF1(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD);
447
448 if (unlikely(info->indirect)) {
449 struct nv04_resource *res = nv04_resource(info->indirect);
450 uint32_t offset = res->offset + info->indirect_offset;
451 unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT;
452
453 PUSH_REF1(push, res->bo, NOUVEAU_BO_RD | res->domain);
454 PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
455 nouveau_pushbuf_data(push, res->bo, offset,
456 NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
457 } else {
458 /* grid setup */
459 BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2);
460 PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
461 PUSH_DATA (push, info->grid[2]);
462
463 /* kernel launching */
464 BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1);
465 PUSH_DATA (push, 0);
466 BEGIN_NVC0(push, SUBC_CP(0x0a08), 1);
467 PUSH_DATA (push, 0);
468 BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1);
469 PUSH_DATA (push, 0x1000);
470 BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1);
471 PUSH_DATA (push, 0);
472 BEGIN_NVC0(push, SUBC_CP(0x0360), 1);
473 PUSH_DATA (push, 0x1);
474 }
475
476 /* TODO: Not sure if this is really necessary. */
477 nvc0_compute_invalidate_surfaces(nvc0, 5);
478 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
479 nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
480 nvc0->images_dirty[5] |= nvc0->images_valid[5];
481
482 nvc0_update_compute_invocations_counter(nvc0, info);
483
484 out:
485 PUSH_KICK(push);
486 simple_mtx_unlock(&screen->state_lock);
487 }
488
489 static void
nvc0_compute_update_indirect_invocations(struct nvc0_context * nvc0,const struct pipe_grid_info * info)490 nvc0_compute_update_indirect_invocations(struct nvc0_context *nvc0,
491 const struct pipe_grid_info *info) {
492 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
493 struct nv04_resource *res = nv04_resource(info->indirect);
494 uint32_t offset = res->offset + info->indirect_offset;
495
496 PUSH_SPACE_EX(push, 16, 0, 8);
497 PUSH_REF1(push, res->bo, NOUVEAU_BO_RD | res->domain);
498 BEGIN_1IC0(push, NVC0_3D(MACRO_COMPUTE_COUNTER), 7);
499 PUSH_DATA(push, 6);
500 PUSH_DATA(push, info->block[0]);
501 PUSH_DATA(push, info->block[1]);
502 PUSH_DATA(push, info->block[2]);
503 nouveau_pushbuf_data(push, res->bo, offset,
504 NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
505 }
506
507 void
nvc0_update_compute_invocations_counter(struct nvc0_context * nvc0,const struct pipe_grid_info * info)508 nvc0_update_compute_invocations_counter(struct nvc0_context *nvc0,
509 const struct pipe_grid_info *info) {
510 if (unlikely(info->indirect)) {
511 nvc0_compute_update_indirect_invocations(nvc0, info);
512 } else {
513 uint64_t invocations = info->block[0] * info->block[1] * info->block[2];
514 invocations *= info->grid[0] * info->grid[1] * info->grid[2];
515 nvc0->compute_invocations += invocations;
516 }
517 }
518