1 /*
2 * Copyright © 2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/format/u_format.h"
25 #include "util/macros.h"
26 #include "v3d_context.h"
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/common/v3d_tiling.h"
29 #include "broadcom/common/v3d_util.h"
30 #include "broadcom/cle/v3dx_pack.h"
31
32 #define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 | \
33 PIPE_CLEAR_COLOR1 | \
34 PIPE_CLEAR_COLOR2 | \
35 PIPE_CLEAR_COLOR3) \
36
37 #define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1)
38
39 static void
load_general(struct v3d_cl * cl,struct pipe_surface * psurf,int buffer,int layer,uint32_t pipe_bit,uint32_t * loads_pending)40 load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
41 int layer, uint32_t pipe_bit, uint32_t *loads_pending)
42 {
43 struct v3d_surface *surf = v3d_surface(psurf);
44 bool separate_stencil = surf->separate_stencil && buffer == STENCIL;
45 if (separate_stencil) {
46 psurf = surf->separate_stencil;
47 surf = v3d_surface(psurf);
48 }
49
50 struct v3d_resource *rsc = v3d_resource(psurf->texture);
51
52 uint32_t layer_offset =
53 v3d_layer_offset(&rsc->base, psurf->u.tex.level,
54 psurf->u.tex.first_layer + layer);
55 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
56 load.buffer_to_load = buffer;
57 load.address = cl_address(rsc->bo, layer_offset);
58
59 load.memory_format = surf->tiling;
60 if (separate_stencil)
61 load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
62 else
63 load.input_image_format = surf->format;
64 load.r_b_swap = surf->swap_rb;
65 load.force_alpha_1 = util_format_has_alpha1(psurf->format);
66 if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
67 surf->tiling == V3D_TILING_UIF_XOR) {
68 load.height_in_ub_or_stride =
69 surf->padded_height_of_output_image_in_uif_blocks;
70 } else if (surf->tiling == V3D_TILING_RASTER) {
71 struct v3d_resource_slice *slice =
72 &rsc->slices[psurf->u.tex.level];
73 load.height_in_ub_or_stride = slice->stride;
74 }
75
76 if (psurf->texture->nr_samples > 1)
77 load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
78 else
79 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
80
81 }
82
83 *loads_pending &= ~pipe_bit;
84 }
85
86 static void
store_general(struct v3d_job * job,struct v3d_cl * cl,struct pipe_surface * psurf,int layer,int buffer,int pipe_bit,uint32_t * stores_pending,bool general_color_clear,bool resolve_4x)87 store_general(struct v3d_job *job,
88 struct v3d_cl *cl, struct pipe_surface *psurf,
89 int layer, int buffer, int pipe_bit,
90 uint32_t *stores_pending, bool general_color_clear,
91 bool resolve_4x)
92 {
93 struct v3d_surface *surf = v3d_surface(psurf);
94 bool separate_stencil = surf->separate_stencil && buffer == STENCIL;
95 if (separate_stencil) {
96 psurf = surf->separate_stencil;
97 surf = v3d_surface(psurf);
98 }
99
100 if (stores_pending)
101 *stores_pending &= ~pipe_bit;
102
103 struct v3d_resource *rsc = v3d_resource(psurf->texture);
104
105 rsc->writes++;
106 rsc->graphics_written = true;
107
108 uint32_t layer_offset =
109 v3d_layer_offset(&rsc->base, psurf->u.tex.level,
110 psurf->u.tex.first_layer + layer);
111 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
112 store.buffer_to_store = buffer;
113 store.address = cl_address(rsc->bo, layer_offset);
114
115 store.clear_buffer_being_stored = false;
116
117 if (separate_stencil)
118 store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
119 else
120 store.output_image_format = surf->format;
121
122 store.r_b_swap = surf->swap_rb;
123 store.memory_format = surf->tiling;
124
125 if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
126 surf->tiling == V3D_TILING_UIF_XOR) {
127 store.height_in_ub_or_stride =
128 surf->padded_height_of_output_image_in_uif_blocks;
129 } else if (surf->tiling == V3D_TILING_RASTER) {
130 struct v3d_resource_slice *slice =
131 &rsc->slices[psurf->u.tex.level];
132 store.height_in_ub_or_stride = slice->stride;
133 }
134
135 if (psurf->texture->nr_samples > 1) {
136 store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
137 } else if (resolve_4x) {
138 /* We are resolving from a MSAA blit buffer or we are
139 * resolving directly from TLB to a resolve buffer
140 */
141 assert((job->bbuf && job->bbuf->texture->nr_samples > 1) ||
142 (job->dbuf && job->dbuf->texture->nr_samples <= 1));
143 store.decimate_mode = V3D_DECIMATE_MODE_4X;
144 } else {
145 store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
146 }
147 }
148 }
149
150 static int
zs_buffer_from_pipe_bits(int pipe_clear_bits)151 zs_buffer_from_pipe_bits(int pipe_clear_bits)
152 {
153 switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) {
154 case PIPE_CLEAR_DEPTHSTENCIL:
155 return ZSTENCIL;
156 case PIPE_CLEAR_DEPTH:
157 return Z;
158 case PIPE_CLEAR_STENCIL:
159 return STENCIL;
160 default:
161 return NONE;
162 }
163 }
164
165 static void
v3d_rcl_emit_loads(struct v3d_job * job,struct v3d_cl * cl,int layer)166 v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
167 {
168 /* When blitting, no color or zs buffer is loaded; instead the blit
169 * source buffer is loaded for the aspects that we are going to blit.
170 */
171 assert(!job->bbuf || job->load == 0);
172 assert(!job->bbuf || job->nr_cbufs <= 1);
173
174 uint32_t loads_pending = job->bbuf ? job->store : job->load;
175
176 for (int i = 0; i < job->nr_cbufs; i++) {
177 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
178 if (!(loads_pending & bit))
179 continue;
180
181 struct pipe_surface *psurf = job->bbuf ? job->bbuf : job->cbufs[i];
182 assert(!job->bbuf || i == 0);
183
184 if (!psurf)
185 continue;
186
187 load_general(cl, psurf, RENDER_TARGET_0 + i, layer,
188 bit, &loads_pending);
189 }
190
191 if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
192 assert(!job->early_zs_clear);
193 struct pipe_surface *src = job->bbuf ? job->bbuf : job->zsbuf;
194 struct v3d_resource *rsc = v3d_resource(src->texture);
195
196 if (rsc->separate_stencil &&
197 (loads_pending & PIPE_CLEAR_STENCIL)) {
198 load_general(cl, src,
199 STENCIL, layer,
200 PIPE_CLEAR_STENCIL,
201 &loads_pending);
202 }
203
204 if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
205 load_general(cl, src,
206 zs_buffer_from_pipe_bits(loads_pending),
207 layer,
208 loads_pending & PIPE_CLEAR_DEPTHSTENCIL,
209 &loads_pending);
210 }
211 }
212
213 assert(!loads_pending);
214 cl_emit(cl, END_OF_LOADS, end);
215 }
216
217 static void
v3d_rcl_emit_stores(struct v3d_job * job,struct v3d_cl * cl,int layer)218 v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
219 {
220 bool general_color_clear = false;
221 uint32_t stores_pending = job->store;
222
223 /* For V3D 4.1, use general stores for all TLB stores.
224 *
225 * For V3D 3.3, we only use general stores to do raw stores for any
226 * MSAA surfaces. These output UIF tiled images where each 4x MSAA
227 * pixel is a 2x2 quad, and the format will be that of the
228 * internal_type/internal_bpp, rather than the format from GL's
229 * perspective. Non-MSAA surfaces will use
230 * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED.
231 */
232 assert((!job->bbuf && !job->dbuf) || job->nr_cbufs <= 1);
233 for (int i = 0; i < job->nr_cbufs; i++) {
234 struct pipe_surface *psurf = job->cbufs[i];
235 if (!psurf)
236 continue;
237
238 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
239 if (job->blit_tlb & bit) {
240 assert(job->dbuf);
241 bool blit_resolve =
242 job->dbuf->texture->nr_samples <= 1 &&
243 psurf->texture->nr_samples > 1;
244 store_general(job, cl, job->dbuf, layer,
245 RENDER_TARGET_0 + i, bit, NULL,
246 false, blit_resolve);
247 }
248
249 if (!(job->store & bit))
250 continue;
251
252 bool blit_resolve =
253 job->bbuf && job->bbuf->texture->nr_samples > 1 &&
254 psurf->texture->nr_samples <= 1;
255 store_general(job, cl, psurf, layer, RENDER_TARGET_0 + i, bit,
256 &stores_pending, general_color_clear, blit_resolve);
257 }
258
259 if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf) {
260 assert(!job->early_zs_clear);
261 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
262 if (rsc->separate_stencil) {
263 if (job->store & PIPE_CLEAR_DEPTH) {
264 store_general(job, cl, job->zsbuf, layer,
265 Z, PIPE_CLEAR_DEPTH,
266 &stores_pending,
267 general_color_clear,
268 false);
269 }
270
271 if (job->store & PIPE_CLEAR_STENCIL) {
272 store_general(job, cl, job->zsbuf, layer,
273 STENCIL, PIPE_CLEAR_STENCIL,
274 &stores_pending,
275 general_color_clear,
276 false);
277 }
278 } else {
279 store_general(job, cl, job->zsbuf, layer,
280 zs_buffer_from_pipe_bits(job->store),
281 job->store & PIPE_CLEAR_DEPTHSTENCIL,
282 &stores_pending, general_color_clear,
283 false);
284 }
285 }
286
287
288 /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments,
289 * we still need to emit some sort of store.
290 */
291 if (!job->store) {
292 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
293 store.buffer_to_store = NONE;
294 }
295 }
296
297 assert(!stores_pending);
298
299 /* GFXH-1461/GFXH-1689: The per-buffer store command's clear
300 * buffer bit is broken for depth/stencil. In addition, the
301 * clear packet's Z/S bit is broken, but the RTs bit ends up
302 * clearing Z/S.
303 */
304 if (job->clear_tlb) {
305 #if V3D_VERSION == 42
306 cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
307 clear.clear_z_stencil_buffer = !job->early_zs_clear;
308 clear.clear_all_render_targets = true;
309 }
310 #endif
311 #if V3D_VERSION >= 71
312 cl_emit(cl, CLEAR_RENDER_TARGETS, clear);
313 #endif
314
315 }
316 }
317
318 static void
v3d_rcl_emit_generic_per_tile_list(struct v3d_job * job,int layer)319 v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
320 {
321 /* Emit the generic list in our indirect state -- the rcl will just
322 * have pointers into it.
323 */
324 struct v3d_cl *cl = &job->indirect;
325 v3d_cl_ensure_space(cl, 200, 1);
326 struct v3d_cl_reloc tile_list_start = cl_get_address(cl);
327
328 /* V3D 4.x/7.x only requires a single tile coordinates, and
329 * END_OF_LOADS switches us between loading and rendering.
330 */
331 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
332
333 v3d_rcl_emit_loads(job, cl, layer);
334
335 /* The binner starts out writing tiles assuming that the initial mode
336 * is triangles, so make sure that's the case.
337 */
338 cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
339 fmt.primitive_type = LIST_TRIANGLES;
340 }
341
342 /* PTB assumes that value to be 0, but hw will not set it. */
343 cl_emit(cl, SET_INSTANCEID, set) {
344 set.instance_id = 0;
345 }
346
347 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
348
349 v3d_rcl_emit_stores(job, cl, layer);
350
351 cl_emit(cl, END_OF_TILE_MARKER, end);
352
353 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
354
355 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
356 branch.start = tile_list_start;
357 branch.end = cl_get_address(cl);
358 }
359 }
360
361 /* Note that for v71, render target cfg packets has just one field that
362 * combined the internal type and clamp mode. For simplicity we keep just one
363 * helper.
364 *
365 * Note: rt_type is in fact a "enum V3DX(Internal_Type)".
366 *
367 */
368 static uint32_t
v3dX(clamp_for_format_and_type)369 v3dX(clamp_for_format_and_type)(uint32_t rt_type,
370 enum pipe_format format)
371 {
372 #if V3D_VERSION == 42
373 if (util_format_is_srgb(format)) {
374 return V3D_RENDER_TARGET_CLAMP_NORM;
375 } else if (util_format_is_pure_integer(format)) {
376 return V3D_RENDER_TARGET_CLAMP_INT;
377 } else {
378 return V3D_RENDER_TARGET_CLAMP_NONE;
379 }
380 #endif
381 #if V3D_VERSION >= 71
382 switch (rt_type) {
383 case V3D_INTERNAL_TYPE_8I:
384 return V3D_RENDER_TARGET_TYPE_CLAMP_8I_CLAMPED;
385 case V3D_INTERNAL_TYPE_8UI:
386 return V3D_RENDER_TARGET_TYPE_CLAMP_8UI_CLAMPED;
387 case V3D_INTERNAL_TYPE_8:
388 return V3D_RENDER_TARGET_TYPE_CLAMP_8;
389 case V3D_INTERNAL_TYPE_16I:
390 return V3D_RENDER_TARGET_TYPE_CLAMP_16I_CLAMPED;
391 case V3D_INTERNAL_TYPE_16UI:
392 return V3D_RENDER_TARGET_TYPE_CLAMP_16UI_CLAMPED;
393 case V3D_INTERNAL_TYPE_16F:
394 return util_format_is_srgb(format) ?
395 V3D_RENDER_TARGET_TYPE_CLAMP_16F_CLAMP_NORM :
396 V3D_RENDER_TARGET_TYPE_CLAMP_16F;
397 case V3D_INTERNAL_TYPE_32I:
398 return V3D_RENDER_TARGET_TYPE_CLAMP_32I_CLAMPED;
399 case V3D_INTERNAL_TYPE_32UI:
400 return V3D_RENDER_TARGET_TYPE_CLAMP_32UI_CLAMPED;
401 case V3D_INTERNAL_TYPE_32F:
402 return V3D_RENDER_TARGET_TYPE_CLAMP_32F;
403 default:
404 unreachable("Unknown internal render target type");
405 }
406 return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID;
407 #endif
408 unreachable("Wrong V3D_VERSION");
409 }
410
411 #if V3D_VERSION >= 71
412 static void
v3d_setup_render_target(struct v3d_job * job,int cbuf,uint32_t * rt_bpp,uint32_t * rt_type_clamp)413 v3d_setup_render_target(struct v3d_job *job,
414 int cbuf,
415 uint32_t *rt_bpp,
416 uint32_t *rt_type_clamp)
417 {
418 if (!job->cbufs[cbuf])
419 return;
420
421 struct v3d_surface *surf = v3d_surface(job->cbufs[cbuf]);
422 *rt_bpp = surf->internal_bpp;
423 if (job->bbuf) {
424 struct v3d_surface *bsurf = v3d_surface(job->bbuf);
425 *rt_bpp = MAX2(*rt_bpp, bsurf->internal_bpp);
426 }
427 *rt_type_clamp = v3dX(clamp_for_format_and_type)(surf->internal_type,
428 surf->base.format);
429 }
430 #endif
431
432 #if V3D_VERSION == 42
433 static void
v3d_setup_render_target(struct v3d_job * job,int cbuf,uint32_t * rt_bpp,uint32_t * rt_type,uint32_t * rt_clamp)434 v3d_setup_render_target(struct v3d_job *job,
435 int cbuf,
436 uint32_t *rt_bpp,
437 uint32_t *rt_type,
438 uint32_t *rt_clamp)
439 {
440 if (!job->cbufs[cbuf])
441 return;
442
443 struct v3d_surface *surf = v3d_surface(job->cbufs[cbuf]);
444 *rt_bpp = surf->internal_bpp;
445 if (job->bbuf) {
446 struct v3d_surface *bsurf = v3d_surface(job->bbuf);
447 *rt_bpp = MAX2(*rt_bpp, bsurf->internal_bpp);
448 }
449 *rt_type = surf->internal_type;
450 *rt_clamp = v3dX(clamp_for_format_and_type)(surf->internal_type,
451 surf->base.format);
452 }
453 #endif
454
455 static bool
supertile_in_job_scissors(struct v3d_job * job,uint32_t x,uint32_t y,uint32_t w,uint32_t h)456 supertile_in_job_scissors(struct v3d_job *job,
457 uint32_t x, uint32_t y, uint32_t w, uint32_t h)
458 {
459 if (job->scissor.disabled || job->scissor.count == 0)
460 return true;
461
462 const uint32_t min_x = x * w;
463 const uint32_t min_y = y * h;
464 const uint32_t max_x = min_x + w - 1;
465 const uint32_t max_y = min_y + h - 1;
466
467 for (uint32_t i = 0; i < job->scissor.count; i++) {
468 const uint32_t min_s_x = job->scissor.rects[i].min_x;
469 const uint32_t min_s_y = job->scissor.rects[i].min_y;
470 const uint32_t max_s_x = job->scissor.rects[i].max_x;
471 const uint32_t max_s_y = job->scissor.rects[i].max_y;
472
473 if (max_x < min_s_x || min_x > max_s_x ||
474 max_y < min_s_y || min_y > max_s_y) {
475 continue;
476 }
477
478 return true;
479 }
480
481 return false;
482 }
483
484 static inline bool
do_double_initial_tile_clear(const struct v3d_job * job)485 do_double_initial_tile_clear(const struct v3d_job *job)
486 {
487 /* Our rendering code emits an initial clear per layer, unlike the
488 * Vulkan driver, which only executes a single initial clear for all
489 * layers. This is because in GL we don't use the
490 * 'clear_buffer_being_stored' bit when storing tiles, so each layer
491 * needs the iniital clear. This is also why this helper, unlike the
492 * Vulkan version, doesn't check the layer count to decide if double
493 * clear for double buffer mode is required.
494 */
495 return job->double_buffer &&
496 (job->draw_tiles_x > 1 || job->draw_tiles_y > 1);
497 }
498
499 static void
emit_render_layer(struct v3d_job * job,uint32_t layer)500 emit_render_layer(struct v3d_job *job, uint32_t layer)
501 {
502 uint32_t supertile_w = 1, supertile_h = 1;
503
504 /* If doing multicore binning, we would need to initialize each
505 * core's tile list here.
506 */
507 uint32_t tile_alloc_offset =
508 layer * job->draw_tiles_x * job->draw_tiles_y * 64;
509 cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
510 list.address = cl_address(job->tile_alloc, tile_alloc_offset);
511 }
512
513 cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
514 uint32_t frame_w_in_supertiles, frame_h_in_supertiles;
515 const uint32_t max_supertiles = 256;
516
517 /* Size up our supertiles until we get under the limit. */
518 for (;;) {
519 frame_w_in_supertiles = DIV_ROUND_UP(job->draw_tiles_x,
520 supertile_w);
521 frame_h_in_supertiles = DIV_ROUND_UP(job->draw_tiles_y,
522 supertile_h);
523 if (frame_w_in_supertiles *
524 frame_h_in_supertiles < max_supertiles) {
525 break;
526 }
527
528 if (supertile_w < supertile_h)
529 supertile_w++;
530 else
531 supertile_h++;
532 }
533
534 config.number_of_bin_tile_lists = 1;
535 config.total_frame_width_in_tiles = job->draw_tiles_x;
536 config.total_frame_height_in_tiles = job->draw_tiles_y;
537
538 config.supertile_width_in_tiles = supertile_w;
539 config.supertile_height_in_tiles = supertile_h;
540
541 config.total_frame_width_in_supertiles = frame_w_in_supertiles;
542 config.total_frame_height_in_supertiles = frame_h_in_supertiles;
543 }
544
545 /* Start by clearing the tile buffer. */
546 cl_emit(&job->rcl, TILE_COORDINATES, coords) {
547 coords.tile_column_number = 0;
548 coords.tile_row_number = 0;
549 }
550
551 /* Emit an initial clear of the tile buffers. This is necessary
552 * for any buffers that should be cleared (since clearing
553 * normally happens at the *end* of the generic tile list), but
554 * it's also nice to clear everything so the first tile doesn't
555 * inherit any contents from some previous frame.
556 *
557 * Also, implement the GFXH-1742 workaround. There's a race in
558 * the HW between the RCL updating the TLB's internal type/size
559 * and thespawning of the QPU instances using the TLB's current
560 * internal type/size. To make sure the QPUs get the right
561 * state, we need 1 dummy store in between internal type/size
562 * changes on V3D 3.x, and 2 dummy stores on 4.x.
563 */
564 for (int i = 0; i < 2; i++) {
565 if (i > 0)
566 cl_emit(&job->rcl, TILE_COORDINATES, coords);
567 cl_emit(&job->rcl, END_OF_LOADS, end);
568 cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
569 store.buffer_to_store = NONE;
570 }
571
572 if (i == 0 || do_double_initial_tile_clear(job)) {
573 #if V3D_VERSION < 71
574 cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) {
575 clear.clear_z_stencil_buffer = !job->early_zs_clear;
576 clear.clear_all_render_targets = true;
577 }
578 #else
579 cl_emit(&job->rcl, CLEAR_RENDER_TARGETS, clear);
580 #endif
581 }
582 cl_emit(&job->rcl, END_OF_TILE_MARKER, end);
583 }
584 cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
585
586 v3d_rcl_emit_generic_per_tile_list(job, layer);
587
588 /* XXX perf: We should expose GL_MESA_tile_raster_order to
589 * improve X11 performance, but we should use Morton order
590 * otherwise to improve cache locality.
591 */
592 uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
593 uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
594 uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
595 uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels;
596
597 uint32_t max_x_supertile = 0;
598 uint32_t max_y_supertile = 0;
599 if (job->draw_max_x != 0 && job->draw_max_y != 0) {
600 max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels;
601 max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels;
602 }
603
604 for (int y = min_y_supertile; y <= max_y_supertile; y++) {
605 for (int x = min_x_supertile; x <= max_x_supertile; x++) {
606 if (supertile_in_job_scissors(job, x, y,
607 supertile_w_in_pixels,
608 supertile_h_in_pixels)) {
609 cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) {
610 coords.column_number_in_supertiles = x;
611 coords.row_number_in_supertiles = y;
612 }
613 }
614 }
615 }
616 }
617
618 void
v3dX(emit_rcl)619 v3dX(emit_rcl)(struct v3d_job *job)
620 {
621 /* The RCL list should be empty. */
622 assert(!job->rcl.bo);
623
624 v3d_cl_ensure_space_with_branch(&job->rcl, 200 +
625 MAX2(job->num_layers, 1) * 256 *
626 cl_packet_length(SUPERTILE_COORDINATES));
627 job->submit.rcl_start = job->rcl.bo->offset;
628 v3d_job_add_bo(job, job->rcl.bo);
629
630 /* Common config must be the first TILE_RENDERING_MODE_CFG
631 * and Z_STENCIL_CLEAR_VALUES must be last. The ones in between are
632 * optional updates to the previous HW state.
633 */
634 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
635 if (job->zsbuf) {
636 struct v3d_surface *surf = v3d_surface(job->zsbuf);
637 config.internal_depth_type = surf->internal_type;
638 }
639
640 if (job->decided_global_ez_enable) {
641 switch (job->first_ez_state) {
642 case V3D_EZ_UNDECIDED:
643 case V3D_EZ_LT_LE:
644 config.early_z_disable = false;
645 config.early_z_test_and_update_direction =
646 EARLY_Z_DIRECTION_LT_LE;
647 break;
648 case V3D_EZ_GT_GE:
649 config.early_z_disable = false;
650 config.early_z_test_and_update_direction =
651 EARLY_Z_DIRECTION_GT_GE;
652 break;
653 case V3D_EZ_DISABLED:
654 config.early_z_disable = true;
655 }
656 } else {
657 assert(job->draw_calls_queued == 0);
658 config.early_z_disable = true;
659 }
660
661 assert(job->zsbuf || config.early_z_disable);
662
663 job->early_zs_clear = (job->clear_tlb & PIPE_CLEAR_DEPTHSTENCIL) &&
664 !(job->load & PIPE_CLEAR_DEPTHSTENCIL) &&
665 !(job->store & PIPE_CLEAR_DEPTHSTENCIL);
666
667 config.early_depth_stencil_clear = job->early_zs_clear;
668
669 config.image_width_pixels = job->draw_width;
670 config.image_height_pixels = job->draw_height;
671
672 config.number_of_render_targets = MAX2(job->nr_cbufs, 1);
673
674 assert(!job->msaa || !job->double_buffer);
675 config.multisample_mode_4x = job->msaa;
676 config.double_buffer_in_non_ms_mode = job->double_buffer;
677
678 #if V3D_VERSION == 42
679 config.maximum_bpp_of_all_render_targets = job->internal_bpp;
680 #endif
681 #if V3D_VERSION >= 71
682 config.log2_tile_width = log2_tile_size(job->tile_width);
683 config.log2_tile_height = log2_tile_size(job->tile_height);
684
685 /* FIXME: ideallly we would like next assert on the packet header (as is
686 * general, so also applies to GL). We would need to expand
687 * gen_pack_header for that.
688 */
689 assert(config.log2_tile_width == config.log2_tile_height ||
690 config.log2_tile_width == config.log2_tile_height + 1);
691 #endif
692
693 }
694
695 #if V3D_VERSION >= 71
696 uint32_t base_addr = 0;
697
698 /* If we don't have any color RTs, we sill need to emit one and flag
699 * it as not used using stride = 1
700 */
701 if (job->nr_cbufs == 0) {
702 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
703 rt.stride = 1; /* Unused */
704 }
705 }
706 #endif
707 for (int i = 0; i < job->nr_cbufs; i++) {
708 struct pipe_surface *psurf = job->cbufs[i];
709 if (!psurf) {
710 #if V3D_VERSION >= 71
711 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
712 rt.render_target_number = i;
713 rt.stride = 1; /* Unused */
714 }
715 #endif
716 continue;
717 }
718
719 struct v3d_surface *surf = v3d_surface(psurf);
720 struct v3d_resource *rsc = v3d_resource(psurf->texture);
721
722 UNUSED uint32_t config_pad = 0;
723 UNUSED uint32_t clear_pad = 0;
724
725 /* XXX: Set the pad for raster. */
726 if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
727 surf->tiling == V3D_TILING_UIF_XOR) {
728 int uif_block_height = v3d_utile_height(rsc->cpp) * 2;
729 uint32_t implicit_padded_height = (align(job->draw_height, uif_block_height) /
730 uif_block_height);
731 if (surf->padded_height_of_output_image_in_uif_blocks -
732 implicit_padded_height < 15) {
733 config_pad = (surf->padded_height_of_output_image_in_uif_blocks -
734 implicit_padded_height);
735 } else {
736 config_pad = 15;
737 clear_pad = surf->padded_height_of_output_image_in_uif_blocks;
738 }
739 }
740
741 #if V3D_VERSION == 42
742 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1,
743 clear) {
744 clear.clear_color_low_32_bits = job->clear_color[i][0];
745 clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff;
746 clear.render_target_number = i;
747 };
748
749 if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) {
750 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2,
751 clear) {
752 clear.clear_color_mid_low_32_bits =
753 ((job->clear_color[i][1] >> 24) |
754 (job->clear_color[i][2] << 8));
755 clear.clear_color_mid_high_24_bits =
756 ((job->clear_color[i][2] >> 24) |
757 ((job->clear_color[i][3] & 0xffff) << 8));
758 clear.render_target_number = i;
759 };
760 }
761
762 if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
763 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3,
764 clear) {
765 clear.uif_padded_height_in_uif_blocks = clear_pad;
766 clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16;
767 clear.render_target_number = i;
768 };
769 }
770 #endif
771 #if V3D_VERSION >= 71
772 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
773 rt.clear_color_low_bits = job->clear_color[i][0];
774 v3d_setup_render_target(job, i, &rt.internal_bpp,
775 &rt.internal_type_and_clamping);
776 rt.stride =
777 v3d_compute_rt_row_row_stride_128_bits(job->tile_width,
778 v3d_internal_bpp_words(rt.internal_bpp));
779 rt.base_address = base_addr;
780 rt.render_target_number = i;
781
782 base_addr += (job->tile_height * rt.stride) / 8;
783 }
784
785 if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) {
786 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) {
787 rt.clear_color_mid_bits = /* 40 bits (32 + 8) */
788 ((uint64_t) job->clear_color[i][1]) |
789 (((uint64_t) (job->clear_color[i][2] & 0xff)) << 32);
790 rt.render_target_number = i;
791 }
792 }
793
794 if (surf->internal_bpp >= V3D_INTERNAL_BPP_128) {
795 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) {
796 rt.clear_color_top_bits = /* 56 bits (24 + 32) */
797 (((uint64_t) (job->clear_color[i][2] & 0xffffff00)) >> 8) |
798 (((uint64_t) (job->clear_color[i][3])) << 24);
799 rt.render_target_number = i;
800 }
801 }
802 #endif
803 }
804
805 #if V3D_VERSION == 42
806 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
807 v3d_setup_render_target(job, 0,
808 &rt.render_target_0_internal_bpp,
809 &rt.render_target_0_internal_type,
810 &rt.render_target_0_clamp);
811 v3d_setup_render_target(job, 1,
812 &rt.render_target_1_internal_bpp,
813 &rt.render_target_1_internal_type,
814 &rt.render_target_1_clamp);
815 v3d_setup_render_target(job, 2,
816 &rt.render_target_2_internal_bpp,
817 &rt.render_target_2_internal_type,
818 &rt.render_target_2_clamp);
819 v3d_setup_render_target(job, 3,
820 &rt.render_target_3_internal_bpp,
821 &rt.render_target_3_internal_type,
822 &rt.render_target_3_clamp);
823 }
824 #endif
825
826 /* Ends rendering mode config. */
827 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES,
828 clear) {
829 clear.z_clear_value = job->clear_z;
830 clear.stencil_clear_value = job->clear_s;
831 };
832
833 /* Always set initial block size before the first branch, which needs
834 * to match the value from binning mode config.
835 */
836 cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
837 init.use_auto_chained_tile_lists = true;
838 init.size_of_first_block_in_chained_tile_lists =
839 TILE_ALLOCATION_BLOCK_SIZE_64B;
840 }
841
842 /* ARB_framebuffer_no_attachments allows rendering to happen even when
843 * the framebuffer has no attachments, the idea being that fragment
844 * shaders can still do image load/store, ssbo, etc without having to
845 * write to actual attachments, so always run at least one iteration
846 * of the loop.
847 */
848 assert(job->num_layers > 0 || (job->load == 0 && job->store == 0));
849 for (int layer = 0; layer < MAX2(1, job->num_layers); layer++)
850 emit_render_layer(job, layer);
851
852 cl_emit(&job->rcl, END_OF_RENDERING, end);
853 }
854