xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/v3d/v3dx_rcl.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/format/u_format.h"
25 #include "util/macros.h"
26 #include "v3d_context.h"
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/common/v3d_tiling.h"
29 #include "broadcom/common/v3d_util.h"
30 #include "broadcom/cle/v3dx_pack.h"
31 
32 #define PIPE_CLEAR_COLOR_BUFFERS (PIPE_CLEAR_COLOR0 |                   \
33                                   PIPE_CLEAR_COLOR1 |                   \
34                                   PIPE_CLEAR_COLOR2 |                   \
35                                   PIPE_CLEAR_COLOR3)                    \
36 
37 #define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1)
38 
39 static void
load_general(struct v3d_cl * cl,struct pipe_surface * psurf,int buffer,int layer,uint32_t pipe_bit,uint32_t * loads_pending)40 load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
41              int layer, uint32_t pipe_bit, uint32_t *loads_pending)
42 {
43         struct v3d_surface *surf = v3d_surface(psurf);
44         bool separate_stencil = surf->separate_stencil && buffer == STENCIL;
45         if (separate_stencil) {
46                 psurf = surf->separate_stencil;
47                 surf = v3d_surface(psurf);
48         }
49 
50         struct v3d_resource *rsc = v3d_resource(psurf->texture);
51 
52         uint32_t layer_offset =
53                 v3d_layer_offset(&rsc->base, psurf->u.tex.level,
54                                  psurf->u.tex.first_layer + layer);
55         cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
56                 load.buffer_to_load = buffer;
57                 load.address = cl_address(rsc->bo, layer_offset);
58 
59                 load.memory_format = surf->tiling;
60                 if (separate_stencil)
61                         load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
62                 else
63                         load.input_image_format = surf->format;
64                 load.r_b_swap = surf->swap_rb;
65                 load.force_alpha_1 = util_format_has_alpha1(psurf->format);
66                 if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
67                     surf->tiling == V3D_TILING_UIF_XOR) {
68                         load.height_in_ub_or_stride =
69                                 surf->padded_height_of_output_image_in_uif_blocks;
70                 } else if (surf->tiling == V3D_TILING_RASTER) {
71                         struct v3d_resource_slice *slice =
72                                 &rsc->slices[psurf->u.tex.level];
73                         load.height_in_ub_or_stride = slice->stride;
74                 }
75 
76                 if (psurf->texture->nr_samples > 1)
77                         load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
78                 else
79                         load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
80 
81         }
82 
83         *loads_pending &= ~pipe_bit;
84 }
85 
86 static void
store_general(struct v3d_job * job,struct v3d_cl * cl,struct pipe_surface * psurf,int layer,int buffer,int pipe_bit,uint32_t * stores_pending,bool general_color_clear,bool resolve_4x)87 store_general(struct v3d_job *job,
88               struct v3d_cl *cl, struct pipe_surface *psurf,
89               int layer, int buffer, int pipe_bit,
90               uint32_t *stores_pending, bool general_color_clear,
91               bool resolve_4x)
92 {
93         struct v3d_surface *surf = v3d_surface(psurf);
94         bool separate_stencil = surf->separate_stencil && buffer == STENCIL;
95         if (separate_stencil) {
96                 psurf = surf->separate_stencil;
97                 surf = v3d_surface(psurf);
98         }
99 
100         if (stores_pending)
101                 *stores_pending &= ~pipe_bit;
102 
103         struct v3d_resource *rsc = v3d_resource(psurf->texture);
104 
105         rsc->writes++;
106         rsc->graphics_written = true;
107 
108         uint32_t layer_offset =
109                 v3d_layer_offset(&rsc->base, psurf->u.tex.level,
110                                  psurf->u.tex.first_layer + layer);
111         cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
112                 store.buffer_to_store = buffer;
113                 store.address = cl_address(rsc->bo, layer_offset);
114 
115                 store.clear_buffer_being_stored = false;
116 
117                 if (separate_stencil)
118                         store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
119                 else
120                         store.output_image_format = surf->format;
121 
122                 store.r_b_swap = surf->swap_rb;
123                 store.memory_format = surf->tiling;
124 
125                 if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
126                     surf->tiling == V3D_TILING_UIF_XOR) {
127                         store.height_in_ub_or_stride =
128                                 surf->padded_height_of_output_image_in_uif_blocks;
129                 } else if (surf->tiling == V3D_TILING_RASTER) {
130                         struct v3d_resource_slice *slice =
131                                 &rsc->slices[psurf->u.tex.level];
132                         store.height_in_ub_or_stride = slice->stride;
133                 }
134 
135                 if (psurf->texture->nr_samples > 1) {
136                         store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
137                 } else if (resolve_4x) {
138                         /* We are resolving from a MSAA blit buffer or we are
139                          * resolving directly from TLB to a resolve buffer
140                          */
141                         assert((job->bbuf && job->bbuf->texture->nr_samples > 1) ||
142                                (job->dbuf && job->dbuf->texture->nr_samples <= 1));
143                         store.decimate_mode = V3D_DECIMATE_MODE_4X;
144                 } else {
145                         store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
146                 }
147         }
148 }
149 
150 static int
zs_buffer_from_pipe_bits(int pipe_clear_bits)151 zs_buffer_from_pipe_bits(int pipe_clear_bits)
152 {
153         switch (pipe_clear_bits & PIPE_CLEAR_DEPTHSTENCIL) {
154         case PIPE_CLEAR_DEPTHSTENCIL:
155                 return ZSTENCIL;
156         case PIPE_CLEAR_DEPTH:
157                 return Z;
158         case PIPE_CLEAR_STENCIL:
159                 return STENCIL;
160         default:
161                 return NONE;
162         }
163 }
164 
165 static void
v3d_rcl_emit_loads(struct v3d_job * job,struct v3d_cl * cl,int layer)166 v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
167 {
168         /* When blitting, no color or zs buffer is loaded; instead the blit
169          * source buffer is loaded for the aspects that we are going to blit.
170          */
171         assert(!job->bbuf || job->load == 0);
172         assert(!job->bbuf || job->nr_cbufs <= 1);
173 
174         uint32_t loads_pending = job->bbuf ? job->store : job->load;
175 
176         for (int i = 0; i < job->nr_cbufs; i++) {
177                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
178                 if (!(loads_pending & bit))
179                         continue;
180 
181                 struct pipe_surface *psurf = job->bbuf ? job->bbuf : job->cbufs[i];
182                 assert(!job->bbuf || i == 0);
183 
184                 if (!psurf)
185                         continue;
186 
187                 load_general(cl, psurf, RENDER_TARGET_0 + i, layer,
188                              bit, &loads_pending);
189         }
190 
191         if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
192                 assert(!job->early_zs_clear);
193                 struct pipe_surface *src = job->bbuf ? job->bbuf : job->zsbuf;
194                 struct v3d_resource *rsc = v3d_resource(src->texture);
195 
196                 if (rsc->separate_stencil &&
197                     (loads_pending & PIPE_CLEAR_STENCIL)) {
198                         load_general(cl, src,
199                                      STENCIL, layer,
200                                      PIPE_CLEAR_STENCIL,
201                                      &loads_pending);
202                 }
203 
204                 if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
205                         load_general(cl, src,
206                                      zs_buffer_from_pipe_bits(loads_pending),
207                                      layer,
208                                      loads_pending & PIPE_CLEAR_DEPTHSTENCIL,
209                                      &loads_pending);
210                 }
211         }
212 
213         assert(!loads_pending);
214         cl_emit(cl, END_OF_LOADS, end);
215 }
216 
217 static void
v3d_rcl_emit_stores(struct v3d_job * job,struct v3d_cl * cl,int layer)218 v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
219 {
220         bool general_color_clear = false;
221         uint32_t stores_pending = job->store;
222 
223         /* For V3D 4.1, use general stores for all TLB stores.
224          *
225          * For V3D 3.3, we only use general stores to do raw stores for any
226          * MSAA surfaces.  These output UIF tiled images where each 4x MSAA
227          * pixel is a 2x2 quad, and the format will be that of the
228          * internal_type/internal_bpp, rather than the format from GL's
229          * perspective.  Non-MSAA surfaces will use
230          * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED.
231          */
232         assert((!job->bbuf && !job->dbuf) || job->nr_cbufs <= 1);
233         for (int i = 0; i < job->nr_cbufs; i++) {
234                 struct pipe_surface *psurf = job->cbufs[i];
235                 if (!psurf)
236                         continue;
237 
238                 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
239                 if (job->blit_tlb & bit) {
240                         assert(job->dbuf);
241                         bool blit_resolve =
242                                 job->dbuf->texture->nr_samples <= 1 &&
243                                 psurf->texture->nr_samples > 1;
244                         store_general(job, cl, job->dbuf, layer,
245                                       RENDER_TARGET_0 + i, bit, NULL,
246                                       false, blit_resolve);
247                 }
248 
249                 if (!(job->store & bit))
250                         continue;
251 
252                 bool blit_resolve =
253                         job->bbuf && job->bbuf->texture->nr_samples > 1 &&
254                         psurf->texture->nr_samples <= 1;
255                 store_general(job, cl, psurf, layer, RENDER_TARGET_0 + i, bit,
256                               &stores_pending, general_color_clear, blit_resolve);
257         }
258 
259         if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf) {
260                 assert(!job->early_zs_clear);
261                 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
262                 if (rsc->separate_stencil) {
263                         if (job->store & PIPE_CLEAR_DEPTH) {
264                                 store_general(job, cl, job->zsbuf, layer,
265                                               Z, PIPE_CLEAR_DEPTH,
266                                               &stores_pending,
267                                               general_color_clear,
268                                               false);
269                         }
270 
271                         if (job->store & PIPE_CLEAR_STENCIL) {
272                                 store_general(job, cl, job->zsbuf, layer,
273                                               STENCIL, PIPE_CLEAR_STENCIL,
274                                               &stores_pending,
275                                               general_color_clear,
276                                               false);
277                         }
278                 } else {
279                         store_general(job, cl, job->zsbuf, layer,
280                                       zs_buffer_from_pipe_bits(job->store),
281                                       job->store & PIPE_CLEAR_DEPTHSTENCIL,
282                                       &stores_pending, general_color_clear,
283                                       false);
284                 }
285         }
286 
287 
288         /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments,
289          * we still need to emit some sort of store.
290          */
291         if (!job->store) {
292                 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
293                         store.buffer_to_store = NONE;
294                 }
295         }
296 
297         assert(!stores_pending);
298 
299         /* GFXH-1461/GFXH-1689: The per-buffer store command's clear
300          * buffer bit is broken for depth/stencil.  In addition, the
301          * clear packet's Z/S bit is broken, but the RTs bit ends up
302          * clearing Z/S.
303          */
304         if (job->clear_tlb) {
305 #if V3D_VERSION == 42
306                 cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
307                         clear.clear_z_stencil_buffer = !job->early_zs_clear;
308                         clear.clear_all_render_targets = true;
309                 }
310 #endif
311 #if V3D_VERSION >= 71
312                 cl_emit(cl, CLEAR_RENDER_TARGETS, clear);
313 #endif
314 
315         }
316 }
317 
318 static void
v3d_rcl_emit_generic_per_tile_list(struct v3d_job * job,int layer)319 v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
320 {
321         /* Emit the generic list in our indirect state -- the rcl will just
322          * have pointers into it.
323          */
324         struct v3d_cl *cl = &job->indirect;
325         v3d_cl_ensure_space(cl, 200, 1);
326         struct v3d_cl_reloc tile_list_start = cl_get_address(cl);
327 
328         /* V3D 4.x/7.x only requires a single tile coordinates, and
329          * END_OF_LOADS switches us between loading and rendering.
330          */
331         cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
332 
333         v3d_rcl_emit_loads(job, cl, layer);
334 
335         /* The binner starts out writing tiles assuming that the initial mode
336          * is triangles, so make sure that's the case.
337          */
338         cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
339                 fmt.primitive_type = LIST_TRIANGLES;
340         }
341 
342         /* PTB assumes that value to be 0, but hw will not set it. */
343         cl_emit(cl, SET_INSTANCEID, set) {
344            set.instance_id = 0;
345         }
346 
347         cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
348 
349         v3d_rcl_emit_stores(job, cl, layer);
350 
351         cl_emit(cl, END_OF_TILE_MARKER, end);
352 
353         cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
354 
355         cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
356                 branch.start = tile_list_start;
357                 branch.end = cl_get_address(cl);
358         }
359 }
360 
361 /* Note that for v71, render target cfg packets has just one field that
362  * combined the internal type and clamp mode. For simplicity we keep just one
363  * helper.
364  *
365  * Note: rt_type is in fact a "enum V3DX(Internal_Type)".
366  *
367  */
368 static uint32_t
v3dX(clamp_for_format_and_type)369 v3dX(clamp_for_format_and_type)(uint32_t rt_type,
370                                 enum pipe_format format)
371 {
372 #if V3D_VERSION == 42
373         if (util_format_is_srgb(format)) {
374                 return V3D_RENDER_TARGET_CLAMP_NORM;
375         } else if (util_format_is_pure_integer(format)) {
376                 return V3D_RENDER_TARGET_CLAMP_INT;
377         } else {
378                 return V3D_RENDER_TARGET_CLAMP_NONE;
379         }
380 #endif
381 #if V3D_VERSION >= 71
382         switch (rt_type) {
383         case V3D_INTERNAL_TYPE_8I:
384                 return V3D_RENDER_TARGET_TYPE_CLAMP_8I_CLAMPED;
385         case V3D_INTERNAL_TYPE_8UI:
386                 return V3D_RENDER_TARGET_TYPE_CLAMP_8UI_CLAMPED;
387         case V3D_INTERNAL_TYPE_8:
388                 return V3D_RENDER_TARGET_TYPE_CLAMP_8;
389         case V3D_INTERNAL_TYPE_16I:
390                 return V3D_RENDER_TARGET_TYPE_CLAMP_16I_CLAMPED;
391         case V3D_INTERNAL_TYPE_16UI:
392                 return V3D_RENDER_TARGET_TYPE_CLAMP_16UI_CLAMPED;
393         case V3D_INTERNAL_TYPE_16F:
394                 return util_format_is_srgb(format) ?
395                         V3D_RENDER_TARGET_TYPE_CLAMP_16F_CLAMP_NORM :
396                         V3D_RENDER_TARGET_TYPE_CLAMP_16F;
397         case V3D_INTERNAL_TYPE_32I:
398                 return V3D_RENDER_TARGET_TYPE_CLAMP_32I_CLAMPED;
399         case V3D_INTERNAL_TYPE_32UI:
400                 return V3D_RENDER_TARGET_TYPE_CLAMP_32UI_CLAMPED;
401         case V3D_INTERNAL_TYPE_32F:
402                 return V3D_RENDER_TARGET_TYPE_CLAMP_32F;
403         default:
404                 unreachable("Unknown internal render target type");
405         }
406         return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID;
407 #endif
408         unreachable("Wrong V3D_VERSION");
409 }
410 
411 #if V3D_VERSION >= 71
412 static void
v3d_setup_render_target(struct v3d_job * job,int cbuf,uint32_t * rt_bpp,uint32_t * rt_type_clamp)413 v3d_setup_render_target(struct v3d_job *job,
414                         int cbuf,
415                         uint32_t *rt_bpp,
416                         uint32_t *rt_type_clamp)
417 {
418         if (!job->cbufs[cbuf])
419                 return;
420 
421         struct v3d_surface *surf = v3d_surface(job->cbufs[cbuf]);
422         *rt_bpp = surf->internal_bpp;
423         if (job->bbuf) {
424            struct v3d_surface *bsurf = v3d_surface(job->bbuf);
425            *rt_bpp = MAX2(*rt_bpp, bsurf->internal_bpp);
426         }
427         *rt_type_clamp = v3dX(clamp_for_format_and_type)(surf->internal_type,
428                                                          surf->base.format);
429 }
430 #endif
431 
432 #if V3D_VERSION == 42
433 static void
v3d_setup_render_target(struct v3d_job * job,int cbuf,uint32_t * rt_bpp,uint32_t * rt_type,uint32_t * rt_clamp)434 v3d_setup_render_target(struct v3d_job *job,
435                         int cbuf,
436                         uint32_t *rt_bpp,
437                         uint32_t *rt_type,
438                         uint32_t *rt_clamp)
439 {
440         if (!job->cbufs[cbuf])
441                 return;
442 
443         struct v3d_surface *surf = v3d_surface(job->cbufs[cbuf]);
444         *rt_bpp = surf->internal_bpp;
445         if (job->bbuf) {
446            struct v3d_surface *bsurf = v3d_surface(job->bbuf);
447            *rt_bpp = MAX2(*rt_bpp, bsurf->internal_bpp);
448         }
449         *rt_type = surf->internal_type;
450         *rt_clamp = v3dX(clamp_for_format_and_type)(surf->internal_type,
451                                                     surf->base.format);
452 }
453 #endif
454 
455 static bool
supertile_in_job_scissors(struct v3d_job * job,uint32_t x,uint32_t y,uint32_t w,uint32_t h)456 supertile_in_job_scissors(struct v3d_job *job,
457                           uint32_t x, uint32_t y, uint32_t w, uint32_t h)
458 {
459    if (job->scissor.disabled || job->scissor.count == 0)
460       return true;
461 
462    const uint32_t min_x = x * w;
463    const uint32_t min_y = y * h;
464    const uint32_t max_x = min_x + w - 1;
465    const uint32_t max_y = min_y + h - 1;
466 
467    for (uint32_t i = 0; i < job->scissor.count; i++) {
468            const uint32_t min_s_x = job->scissor.rects[i].min_x;
469            const uint32_t min_s_y = job->scissor.rects[i].min_y;
470            const uint32_t max_s_x = job->scissor.rects[i].max_x;
471            const uint32_t max_s_y = job->scissor.rects[i].max_y;
472 
473            if (max_x < min_s_x || min_x > max_s_x ||
474                max_y < min_s_y || min_y > max_s_y) {
475                    continue;
476            }
477 
478            return true;
479    }
480 
481    return false;
482 }
483 
484 static inline bool
do_double_initial_tile_clear(const struct v3d_job * job)485 do_double_initial_tile_clear(const struct v3d_job *job)
486 {
487         /* Our rendering code emits an initial clear per layer, unlike the
488          * Vulkan driver, which only executes a single initial clear for all
489          * layers. This is because in GL we don't use the
490          * 'clear_buffer_being_stored' bit when storing tiles, so each layer
491          * needs the iniital clear. This is also why this helper, unlike the
492          * Vulkan version, doesn't check the layer count to decide if double
493          * clear for double buffer mode is required.
494          */
495         return job->double_buffer &&
496                (job->draw_tiles_x > 1 || job->draw_tiles_y > 1);
497 }
498 
499 static void
emit_render_layer(struct v3d_job * job,uint32_t layer)500 emit_render_layer(struct v3d_job *job, uint32_t layer)
501 {
502         uint32_t supertile_w = 1, supertile_h = 1;
503 
504         /* If doing multicore binning, we would need to initialize each
505          * core's tile list here.
506          */
507         uint32_t tile_alloc_offset =
508                 layer * job->draw_tiles_x * job->draw_tiles_y * 64;
509         cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
510                 list.address = cl_address(job->tile_alloc, tile_alloc_offset);
511         }
512 
513         cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
514                 uint32_t frame_w_in_supertiles, frame_h_in_supertiles;
515                 const uint32_t max_supertiles = 256;
516 
517                 /* Size up our supertiles until we get under the limit. */
518                 for (;;) {
519                         frame_w_in_supertiles = DIV_ROUND_UP(job->draw_tiles_x,
520                                                              supertile_w);
521                         frame_h_in_supertiles = DIV_ROUND_UP(job->draw_tiles_y,
522                                                              supertile_h);
523                         if (frame_w_in_supertiles *
524                                 frame_h_in_supertiles < max_supertiles) {
525                                 break;
526                         }
527 
528                         if (supertile_w < supertile_h)
529                                 supertile_w++;
530                         else
531                                 supertile_h++;
532                 }
533 
534                 config.number_of_bin_tile_lists = 1;
535                 config.total_frame_width_in_tiles = job->draw_tiles_x;
536                 config.total_frame_height_in_tiles = job->draw_tiles_y;
537 
538                 config.supertile_width_in_tiles = supertile_w;
539                 config.supertile_height_in_tiles = supertile_h;
540 
541                 config.total_frame_width_in_supertiles = frame_w_in_supertiles;
542                 config.total_frame_height_in_supertiles = frame_h_in_supertiles;
543         }
544 
545         /* Start by clearing the tile buffer. */
546         cl_emit(&job->rcl, TILE_COORDINATES, coords) {
547                 coords.tile_column_number = 0;
548                 coords.tile_row_number = 0;
549         }
550 
551         /* Emit an initial clear of the tile buffers.  This is necessary
552          * for any buffers that should be cleared (since clearing
553          * normally happens at the *end* of the generic tile list), but
554          * it's also nice to clear everything so the first tile doesn't
555          * inherit any contents from some previous frame.
556          *
557          * Also, implement the GFXH-1742 workaround.  There's a race in
558          * the HW between the RCL updating the TLB's internal type/size
559          * and thespawning of the QPU instances using the TLB's current
560          * internal type/size.  To make sure the QPUs get the right
561          * state, we need 1 dummy store in between internal type/size
562          * changes on V3D 3.x, and 2 dummy stores on 4.x.
563          */
564         for (int i = 0; i < 2; i++) {
565                 if (i > 0)
566                         cl_emit(&job->rcl, TILE_COORDINATES, coords);
567                 cl_emit(&job->rcl, END_OF_LOADS, end);
568                 cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
569                         store.buffer_to_store = NONE;
570                 }
571 
572                 if (i == 0 || do_double_initial_tile_clear(job)) {
573 #if V3D_VERSION < 71
574                         cl_emit(&job->rcl, CLEAR_TILE_BUFFERS, clear) {
575                                 clear.clear_z_stencil_buffer = !job->early_zs_clear;
576                                 clear.clear_all_render_targets = true;
577                         }
578 #else
579                         cl_emit(&job->rcl, CLEAR_RENDER_TARGETS, clear);
580 #endif
581                 }
582                 cl_emit(&job->rcl, END_OF_TILE_MARKER, end);
583         }
584         cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
585 
586         v3d_rcl_emit_generic_per_tile_list(job, layer);
587 
588         /* XXX perf: We should expose GL_MESA_tile_raster_order to
589          * improve X11 performance, but we should use Morton order
590          * otherwise to improve cache locality.
591          */
592         uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
593         uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
594         uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
595         uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels;
596 
597         uint32_t max_x_supertile = 0;
598         uint32_t max_y_supertile = 0;
599         if (job->draw_max_x != 0 && job->draw_max_y != 0) {
600                 max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels;
601                 max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels;
602         }
603 
604         for (int y = min_y_supertile; y <= max_y_supertile; y++) {
605                 for (int x = min_x_supertile; x <= max_x_supertile; x++) {
606                         if (supertile_in_job_scissors(job, x, y,
607                                                       supertile_w_in_pixels,
608                                                       supertile_h_in_pixels)) {
609                                 cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) {
610                                       coords.column_number_in_supertiles = x;
611                                       coords.row_number_in_supertiles = y;
612                                 }
613                         }
614                 }
615         }
616 }
617 
618 void
v3dX(emit_rcl)619 v3dX(emit_rcl)(struct v3d_job *job)
620 {
621         /* The RCL list should be empty. */
622         assert(!job->rcl.bo);
623 
624         v3d_cl_ensure_space_with_branch(&job->rcl, 200 +
625                                         MAX2(job->num_layers, 1) * 256 *
626                                         cl_packet_length(SUPERTILE_COORDINATES));
627         job->submit.rcl_start = job->rcl.bo->offset;
628         v3d_job_add_bo(job, job->rcl.bo);
629 
630         /* Common config must be the first TILE_RENDERING_MODE_CFG
631          * and Z_STENCIL_CLEAR_VALUES must be last.  The ones in between are
632          * optional updates to the previous HW state.
633          */
634         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
635                 if (job->zsbuf) {
636                         struct v3d_surface *surf = v3d_surface(job->zsbuf);
637                         config.internal_depth_type = surf->internal_type;
638                 }
639 
640                 if (job->decided_global_ez_enable) {
641                         switch (job->first_ez_state) {
642                         case V3D_EZ_UNDECIDED:
643                         case V3D_EZ_LT_LE:
644                                 config.early_z_disable = false;
645                                 config.early_z_test_and_update_direction =
646                                         EARLY_Z_DIRECTION_LT_LE;
647                                 break;
648                         case V3D_EZ_GT_GE:
649                                 config.early_z_disable = false;
650                                 config.early_z_test_and_update_direction =
651                                         EARLY_Z_DIRECTION_GT_GE;
652                                 break;
653                         case V3D_EZ_DISABLED:
654                                 config.early_z_disable = true;
655                         }
656                 } else {
657                         assert(job->draw_calls_queued == 0);
658                         config.early_z_disable = true;
659                 }
660 
661                 assert(job->zsbuf || config.early_z_disable);
662 
663                 job->early_zs_clear = (job->clear_tlb & PIPE_CLEAR_DEPTHSTENCIL) &&
664                         !(job->load & PIPE_CLEAR_DEPTHSTENCIL) &&
665                         !(job->store & PIPE_CLEAR_DEPTHSTENCIL);
666 
667                 config.early_depth_stencil_clear = job->early_zs_clear;
668 
669                 config.image_width_pixels = job->draw_width;
670                 config.image_height_pixels = job->draw_height;
671 
672                 config.number_of_render_targets = MAX2(job->nr_cbufs, 1);
673 
674                 assert(!job->msaa || !job->double_buffer);
675                 config.multisample_mode_4x = job->msaa;
676                 config.double_buffer_in_non_ms_mode = job->double_buffer;
677 
678 #if V3D_VERSION == 42
679                 config.maximum_bpp_of_all_render_targets = job->internal_bpp;
680 #endif
681 #if V3D_VERSION >= 71
682                 config.log2_tile_width = log2_tile_size(job->tile_width);
683                 config.log2_tile_height = log2_tile_size(job->tile_height);
684 
685                 /* FIXME: ideallly we would like next assert on the packet header (as is
686                  * general, so also applies to GL). We would need to expand
687                  * gen_pack_header for that.
688                  */
689                 assert(config.log2_tile_width == config.log2_tile_height ||
690                        config.log2_tile_width == config.log2_tile_height + 1);
691 #endif
692 
693         }
694 
695 #if V3D_VERSION >= 71
696         uint32_t base_addr = 0;
697 
698         /* If we don't have any color RTs, we sill need to emit one and flag
699          * it as not used using stride = 1
700          */
701         if (job->nr_cbufs == 0) {
702            cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
703               rt.stride = 1; /* Unused */
704            }
705         }
706 #endif
707         for (int i = 0; i < job->nr_cbufs; i++) {
708                 struct pipe_surface *psurf = job->cbufs[i];
709                 if (!psurf) {
710 #if V3D_VERSION >= 71
711                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
712                                 rt.render_target_number = i;
713                                 rt.stride = 1; /* Unused */
714                         }
715 #endif
716                         continue;
717                 }
718 
719                 struct v3d_surface *surf = v3d_surface(psurf);
720                 struct v3d_resource *rsc = v3d_resource(psurf->texture);
721 
722                 UNUSED uint32_t config_pad = 0;
723                 UNUSED uint32_t clear_pad = 0;
724 
725                 /* XXX: Set the pad for raster. */
726                 if (surf->tiling == V3D_TILING_UIF_NO_XOR ||
727                     surf->tiling == V3D_TILING_UIF_XOR) {
728                         int uif_block_height = v3d_utile_height(rsc->cpp) * 2;
729                         uint32_t implicit_padded_height = (align(job->draw_height, uif_block_height) /
730                                                            uif_block_height);
731                         if (surf->padded_height_of_output_image_in_uif_blocks -
732                             implicit_padded_height < 15) {
733                                 config_pad = (surf->padded_height_of_output_image_in_uif_blocks -
734                                               implicit_padded_height);
735                         } else {
736                                 config_pad = 15;
737                                 clear_pad = surf->padded_height_of_output_image_in_uif_blocks;
738                         }
739                 }
740 
741 #if V3D_VERSION == 42
742                 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1,
743                         clear) {
744                         clear.clear_color_low_32_bits = job->clear_color[i][0];
745                         clear.clear_color_next_24_bits = job->clear_color[i][1] & 0xffffff;
746                         clear.render_target_number = i;
747                 };
748 
749                 if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) {
750                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2,
751                                 clear) {
752                                 clear.clear_color_mid_low_32_bits =
753                                         ((job->clear_color[i][1] >> 24) |
754                                          (job->clear_color[i][2] << 8));
755                                 clear.clear_color_mid_high_24_bits =
756                                         ((job->clear_color[i][2] >> 24) |
757                                          ((job->clear_color[i][3] & 0xffff) << 8));
758                                 clear.render_target_number = i;
759                         };
760                 }
761 
762                 if (surf->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
763                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3,
764                                 clear) {
765                                 clear.uif_padded_height_in_uif_blocks = clear_pad;
766                                 clear.clear_color_high_16_bits = job->clear_color[i][3] >> 16;
767                                 clear.render_target_number = i;
768                         };
769                 }
770 #endif
771 #if V3D_VERSION >= 71
772                 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
773                         rt.clear_color_low_bits = job->clear_color[i][0];
774                         v3d_setup_render_target(job, i, &rt.internal_bpp,
775                                                 &rt.internal_type_and_clamping);
776                         rt.stride =
777                                 v3d_compute_rt_row_row_stride_128_bits(job->tile_width,
778                                                                        v3d_internal_bpp_words(rt.internal_bpp));
779                         rt.base_address = base_addr;
780                         rt.render_target_number = i;
781 
782                         base_addr += (job->tile_height * rt.stride) / 8;
783                 }
784 
785                 if (surf->internal_bpp >= V3D_INTERNAL_BPP_64) {
786                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) {
787                                 rt.clear_color_mid_bits = /* 40 bits (32 + 8)  */
788                                         ((uint64_t) job->clear_color[i][1]) |
789                                         (((uint64_t) (job->clear_color[i][2] & 0xff)) << 32);
790                                 rt.render_target_number = i;
791                         }
792                 }
793 
794                 if (surf->internal_bpp >= V3D_INTERNAL_BPP_128) {
795                         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) {
796                                 rt.clear_color_top_bits = /* 56 bits (24 + 32) */
797                                         (((uint64_t) (job->clear_color[i][2] & 0xffffff00)) >> 8) |
798                                         (((uint64_t) (job->clear_color[i][3])) << 24);
799                                 rt.render_target_number = i;
800                         }
801                 }
802 #endif
803         }
804 
805 #if V3D_VERSION == 42
806         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
807                 v3d_setup_render_target(job, 0,
808                                         &rt.render_target_0_internal_bpp,
809                                         &rt.render_target_0_internal_type,
810                                         &rt.render_target_0_clamp);
811                 v3d_setup_render_target(job, 1,
812                                         &rt.render_target_1_internal_bpp,
813                                         &rt.render_target_1_internal_type,
814                                         &rt.render_target_1_clamp);
815                 v3d_setup_render_target(job, 2,
816                                         &rt.render_target_2_internal_bpp,
817                                         &rt.render_target_2_internal_type,
818                                         &rt.render_target_2_clamp);
819                 v3d_setup_render_target(job, 3,
820                                         &rt.render_target_3_internal_bpp,
821                                         &rt.render_target_3_internal_type,
822                                         &rt.render_target_3_clamp);
823         }
824 #endif
825 
826         /* Ends rendering mode config. */
827         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES,
828                 clear) {
829                 clear.z_clear_value = job->clear_z;
830                 clear.stencil_clear_value = job->clear_s;
831         };
832 
833         /* Always set initial block size before the first branch, which needs
834          * to match the value from binning mode config.
835          */
836         cl_emit(&job->rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
837                 init.use_auto_chained_tile_lists = true;
838                 init.size_of_first_block_in_chained_tile_lists =
839                         TILE_ALLOCATION_BLOCK_SIZE_64B;
840         }
841 
842         /* ARB_framebuffer_no_attachments allows rendering to happen even when
843          * the framebuffer has no attachments, the idea being that fragment
844          * shaders can still do image load/store, ssbo, etc without having to
845          * write to actual attachments, so always run at least one iteration
846          * of the loop.
847          */
848         assert(job->num_layers > 0 || (job->load == 0 && job->store == 0));
849         for (int layer = 0; layer < MAX2(1, job->num_layers); layer++)
850                 emit_render_layer(job, layer);
851 
852         cl_emit(&job->rcl, END_OF_RENDERING, end);
853 }
854