xref: /aosp_15_r20/external/mesa3d/src/intel/blorp/blorp_clear.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/ralloc.h"
25 
26 #include "util/macros.h" /* Needed for MAX3 and MAX2 for format_rgb9e5 */
27 #include "util/format_rgb9e5.h"
28 #include "util/format_srgb.h"
29 #include "util/u_math.h"
30 
31 #include "blorp_priv.h"
32 #include "dev/intel_debug.h"
33 #include "dev/intel_device_info.h"
34 
35 #include "blorp_nir_builder.h"
36 
37 #define FILE_DEBUG_FLAG DEBUG_BLORP
38 
39 #pragma pack(push, 1)
40 struct blorp_const_color_prog_key
41 {
42    struct blorp_base_key base;
43    bool use_simd16_replicated_data;
44    bool clear_rgb_as_red;
45    uint8_t local_y;
46 };
47 #pragma pack(pop)
48 
49 static bool
blorp_params_get_clear_kernel_fs(struct blorp_batch * batch,struct blorp_params * params,bool want_replicated_data,bool clear_rgb_as_red)50 blorp_params_get_clear_kernel_fs(struct blorp_batch *batch,
51                                  struct blorp_params *params,
52                                  bool want_replicated_data,
53                                  bool clear_rgb_as_red)
54 {
55    const bool use_replicated_data = want_replicated_data &&
56       batch->blorp->isl_dev->info->ver < 20;
57    struct blorp_context *blorp = batch->blorp;
58 
59    const struct blorp_const_color_prog_key blorp_key = {
60       .base = BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR),
61       .base.shader_pipeline = BLORP_SHADER_PIPELINE_RENDER,
62       .use_simd16_replicated_data = use_replicated_data,
63       .clear_rgb_as_red = clear_rgb_as_red,
64       .local_y = 0,
65    };
66 
67    params->shader_type = blorp_key.base.shader_type;
68    params->shader_pipeline = blorp_key.base.shader_pipeline;
69 
70    if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
71                             &params->wm_prog_kernel, &params->wm_prog_data))
72       return true;
73 
74    void *mem_ctx = ralloc_context(NULL);
75 
76    nir_builder b;
77    blorp_nir_init_shader(&b, blorp, mem_ctx, MESA_SHADER_FRAGMENT,
78                          blorp_shader_type_to_name(blorp_key.base.shader_type));
79 
80    nir_variable *v_color =
81       BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type());
82    nir_def *color = nir_load_var(&b, v_color);
83 
84    if (clear_rgb_as_red) {
85       nir_def *pos = nir_f2i32(&b, nir_load_frag_coord(&b));
86       nir_def *comp = nir_umod_imm(&b, nir_channel(&b, pos, 0), 3);
87       color = nir_pad_vec4(&b, nir_vector_extract(&b, color, comp));
88    }
89 
90    nir_variable *frag_color = nir_variable_create(b.shader, nir_var_shader_out,
91                                                   glsl_vec4_type(),
92                                                   "gl_FragColor");
93    frag_color->data.location = FRAG_RESULT_COLOR;
94    nir_store_var(&b, frag_color, color, 0xf);
95 
96    const bool multisample_fbo = false;
97    struct blorp_program p =
98       blorp_compile_fs(blorp, mem_ctx, b.shader, multisample_fbo, use_replicated_data);
99 
100    bool result =
101       blorp->upload_shader(batch, MESA_SHADER_FRAGMENT,
102                            &blorp_key, sizeof(blorp_key),
103                            p.kernel, p.kernel_size,
104                            p.prog_data, p.prog_data_size,
105                            &params->wm_prog_kernel, &params->wm_prog_data);
106 
107    ralloc_free(mem_ctx);
108    return result;
109 }
110 
111 static bool
blorp_params_get_clear_kernel_cs(struct blorp_batch * batch,struct blorp_params * params,bool clear_rgb_as_red)112 blorp_params_get_clear_kernel_cs(struct blorp_batch *batch,
113                                  struct blorp_params *params,
114                                  bool clear_rgb_as_red)
115 {
116    struct blorp_context *blorp = batch->blorp;
117 
118    const struct blorp_const_color_prog_key blorp_key = {
119       .base = BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR),
120       .base.shader_pipeline = BLORP_SHADER_PIPELINE_COMPUTE,
121       .use_simd16_replicated_data = false,
122       .clear_rgb_as_red = clear_rgb_as_red,
123       .local_y = blorp_get_cs_local_y(params),
124    };
125 
126    params->shader_type = blorp_key.base.shader_type;
127    params->shader_pipeline = blorp_key.base.shader_pipeline;
128 
129    if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
130                             &params->cs_prog_kernel, &params->cs_prog_data))
131       return true;
132 
133    void *mem_ctx = ralloc_context(NULL);
134 
135    nir_builder b;
136    blorp_nir_init_shader(&b, blorp, mem_ctx, MESA_SHADER_COMPUTE,
137                          "BLORP-gpgpu-clear");
138    blorp_set_cs_dims(b.shader, blorp_key.local_y);
139 
140    nir_def *dst_pos = nir_load_global_invocation_id(&b, 32);
141 
142    nir_variable *v_color =
143       BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type());
144    nir_def *color = nir_load_var(&b, v_color);
145 
146    nir_variable *v_bounds_rect =
147       BLORP_CREATE_NIR_INPUT(b.shader, bounds_rect, glsl_vec4_type());
148    nir_def *bounds_rect = nir_load_var(&b, v_bounds_rect);
149    nir_def *in_bounds = blorp_check_in_bounds(&b, bounds_rect, dst_pos);
150 
151    if (clear_rgb_as_red) {
152       nir_def *comp = nir_umod_imm(&b, nir_channel(&b, dst_pos, 0), 3);
153       color = nir_pad_vec4(&b, nir_vector_extract(&b, color, comp));
154    }
155 
156    nir_push_if(&b, in_bounds);
157 
158    nir_image_store(&b, nir_imm_int(&b, 0),
159                    nir_pad_vector_imm_int(&b, dst_pos, 0, 4),
160                    nir_imm_int(&b, 0),
161                    nir_pad_vector_imm_int(&b, color, 0, 4),
162                    nir_imm_int(&b, 0),
163                    .image_dim = GLSL_SAMPLER_DIM_2D,
164                    .image_array = true,
165                    .access = ACCESS_NON_READABLE);
166 
167    nir_pop_if(&b, NULL);
168 
169    const struct blorp_program p =
170       blorp_compile_cs(blorp, mem_ctx, b.shader);
171 
172    bool result =
173       blorp->upload_shader(batch, MESA_SHADER_COMPUTE,
174                            &blorp_key, sizeof(blorp_key),
175                            p.kernel, p.kernel_size,
176                            p.prog_data, p.prog_data_size,
177                            &params->cs_prog_kernel, &params->cs_prog_data);
178 
179    ralloc_free(mem_ctx);
180    return result;
181 }
182 
183 static bool
blorp_params_get_clear_kernel(struct blorp_batch * batch,struct blorp_params * params,bool use_replicated_data,bool clear_rgb_as_red)184 blorp_params_get_clear_kernel(struct blorp_batch *batch,
185                               struct blorp_params *params,
186                               bool use_replicated_data,
187                               bool clear_rgb_as_red)
188 {
189    if (batch->flags & BLORP_BATCH_USE_COMPUTE) {
190       assert(!use_replicated_data);
191       return blorp_params_get_clear_kernel_cs(batch, params, clear_rgb_as_red);
192    } else {
193       return blorp_params_get_clear_kernel_fs(batch, params,
194                                               use_replicated_data,
195                                               clear_rgb_as_red);
196    }
197 }
198 
199 /* The x0, y0, x1, and y1 parameters must already be populated with the render
200  * area of the framebuffer to be cleared.
201  */
202 static void
get_fast_clear_rect(const struct isl_device * dev,const struct isl_surf * surf,const struct isl_surf * aux_surf,unsigned * x0,unsigned * y0,unsigned * x1,unsigned * y1)203 get_fast_clear_rect(const struct isl_device *dev,
204                     const struct isl_surf *surf,
205                     const struct isl_surf *aux_surf,
206                     unsigned *x0, unsigned *y0,
207                     unsigned *x1, unsigned *y1)
208 {
209    unsigned int x_align, y_align;
210    unsigned int x_scaledown, y_scaledown;
211 
212    /* Only single sampled surfaces need to (and actually can) be resolved. */
213    if (surf->samples == 1) {
214       const uint32_t bs = isl_format_get_layout(surf->format)->bpb / 8;
215       if (dev->info->ver >= 20) {
216          /* From Bspec 57340, "MCS/CCS Buffers, Fast Clear for Render Target(s)":
217           *
218           *    Table "Tile4/Tile64 2D/2D Array/Cube Surface"
219           *    Table "Tile64 3D/Volumetric"
220           *
221           * The below calculation is derived from these tables.
222           */
223          assert(surf->tiling == ISL_TILING_4 ||
224                 surf->tiling == ISL_TILING_64_XE2);
225          x_align = x_scaledown = 64 / bs;
226          y_align = y_scaledown = 4;
227       } else if (dev->info->verx10 >= 125) {
228          /* From Bspec 47709, "MCS/CCS Buffer for Render Target(s)":
229           *
230           *    SW must ensure that clearing rectangle dimensions cover the
231           *    entire area desired, to accomplish this task initial X/Y
232           *    dimensions need to be rounded up to next multiple of scaledown
233           *    factor before dividing by scale down factor:
234           *
235           * The X and Y scale down factors in the table that follows are used
236           * for both alignment and scaling down.
237           */
238          if (surf->tiling == ISL_TILING_4) {
239             x_align = x_scaledown = 1024 / bs;
240             y_align = y_scaledown = 16;
241          } else if (surf->tiling == ISL_TILING_64) {
242             switch (bs) {
243             case 1:
244                x_align = x_scaledown = 128;
245                y_align = y_scaledown = 128;
246                break;
247             case 2:
248                x_align = x_scaledown = 128;
249                y_align = y_scaledown = 64;
250                break;
251             case 4:
252                x_align = x_scaledown = 64;
253                y_align = y_scaledown = 64;
254                break;
255             case 8:
256                x_align = x_scaledown = 64;
257                y_align = y_scaledown = 32;
258                break;
259             case 16:
260                x_align = x_scaledown = 32;
261                y_align = y_scaledown = 32;
262                break;
263             default:
264                unreachable("unsupported bpp");
265             }
266          } else {
267             unreachable("Unsupported tiling format");
268          }
269       } else {
270          /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
271           * Target(s)", beneath the "Fast Color Clear" bullet (p327):
272           *
273           *     Clear pass must have a clear rectangle that must follow
274           *     alignment rules in terms of pixels and lines as shown in the
275           *     table below. Further, the clear-rectangle height and width
276           *     must be multiple of the following dimensions. If the height
277           *     and width of the render target being cleared do not meet these
278           *     requirements, an MCS buffer can be created such that it
279           *     follows the requirement and covers the RT.
280           *
281           * The alignment size in the table that follows is a multiple of the
282           * alignment size that is baked into the CCS surface format.
283           */
284          enum isl_format ccs_format;
285          if (ISL_GFX_VERX10(dev) == 120) {
286             assert(surf->tiling == ISL_TILING_Y0);
287             switch (isl_format_get_layout(surf->format)->bpb) {
288             case   8: ccs_format = ISL_FORMAT_GFX12_CCS_8BPP_Y0;   break;
289             case  16: ccs_format = ISL_FORMAT_GFX12_CCS_16BPP_Y0;  break;
290             case  32: ccs_format = ISL_FORMAT_GFX12_CCS_32BPP_Y0;  break;
291             case  64: ccs_format = ISL_FORMAT_GFX12_CCS_64BPP_Y0;  break;
292             case 128: ccs_format = ISL_FORMAT_GFX12_CCS_128BPP_Y0; break;
293             default:  unreachable("Invalid surface bpb for fast clearing");
294             }
295          } else {
296             assert(aux_surf->usage == ISL_SURF_USAGE_CCS_BIT);
297             ccs_format = aux_surf->format;
298          }
299 
300          x_align = isl_format_get_layout(ccs_format)->bw * 16;
301          y_align = isl_format_get_layout(ccs_format)->bh * 32 /
302                    isl_format_get_layout(ccs_format)->bpb;
303 
304          /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
305           * Target(s)", beneath the "Fast Color Clear" bullet (p327):
306           *
307           *     In order to optimize the performance MCS buffer (when bound to
308           *     1X RT) clear similarly to MCS buffer clear for MSRT case,
309           *     clear rect is required to be scaled by the following factors
310           *     in the horizontal and vertical directions:
311           *
312           * The X and Y scale down factors in the table that follows are each
313           * equal to half the alignment value computed above.
314           */
315          x_scaledown = x_align / 2;
316          y_scaledown = y_align / 2;
317       }
318 
319       if (ISL_DEV_IS_HASWELL(dev)) {
320          /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel
321           * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color
322           * Clear of Non-MultiSampled Render Target Restrictions":
323           *
324           *   Clear rectangle must be aligned to two times the number of
325           *   pixels in the table shown below due to 16x16 hashing across the
326           *   slice.
327           *
328           * This restriction is only documented to exist on HSW GT3 but
329           * empirical evidence suggests that it's also needed GT2.
330           */
331          x_align *= 2;
332          y_align *= 2;
333       }
334    } else {
335       assert(aux_surf->usage == ISL_SURF_USAGE_MCS_BIT);
336 
337       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
338        * Target(s)", beneath the "MSAA Compression" bullet (p326):
339        *
340        *     Clear pass for this case requires that scaled down primitive
341        *     is sent down with upper left coordinate to coincide with
342        *     actual rectangle being cleared. For MSAA, clear rectangle’s
343        *     height and width need to as show in the following table in
344        *     terms of (width,height) of the RT.
345        *
346        *     MSAA  Width of Clear Rect  Height of Clear Rect
347        *      2X     Ceil(1/8*width)      Ceil(1/2*height)
348        *      4X     Ceil(1/8*width)      Ceil(1/2*height)
349        *      8X     Ceil(1/2*width)      Ceil(1/2*height)
350        *     16X         width            Ceil(1/2*height)
351        *
352        * The text "with upper left coordinate to coincide with actual
353        * rectangle being cleared" is a little confusing--it seems to imply
354        * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to
355        * feed the pipeline using the rectangle (x,y) to
356        * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on
357        * the number of samples.  Experiments indicate that this is not
358        * quite correct; actually, what the hardware appears to do is to
359        * align whatever rectangle is sent down the pipeline to the nearest
360        * multiple of 2x2 blocks, and then scale it up by a factor of N
361        * horizontally and 2 vertically.  So the resulting alignment is 4
362        * vertically and either 4 or 16 horizontally, and the scaledown
363        * factor is 2 vertically and either 2 or 8 horizontally.
364        *
365        * On Xe2+:
366        * Bspec 57340 (r59562):
367        *
368        *    Fast Clear MCS Surface
369        *    (Table)
370        *
371        * The scaled down values in the Xe2 table are different from what's in
372        * the previous platforms.
373        */
374       switch (aux_surf->format) {
375       case ISL_FORMAT_MCS_2X:
376       case ISL_FORMAT_MCS_4X:
377          x_scaledown = dev->info->ver >= 20 ? 64 : 8;
378          break;
379       case ISL_FORMAT_MCS_8X:
380          x_scaledown = dev->info->ver >= 20 ? 16 : 2;
381          break;
382       case ISL_FORMAT_MCS_16X:
383          x_scaledown = dev->info->ver >= 20 ? 8 : 1;
384          break;
385       default:
386          unreachable("Unexpected MCS format for fast clear");
387       }
388       y_scaledown = dev->info->ver >= 20 ? 4 : 2;
389       x_align = x_scaledown * 2;
390       y_align = y_scaledown * 2;
391    }
392 
393    *x0 = ROUND_DOWN_TO(*x0,  x_align) / x_scaledown;
394    *y0 = ROUND_DOWN_TO(*y0, y_align) / y_scaledown;
395    *x1 = ALIGN(*x1, x_align) / x_scaledown;
396    *y1 = ALIGN(*y1, y_align) / y_scaledown;
397 }
398 
399 void
blorp_fast_clear(struct blorp_batch * batch,const struct blorp_surf * surf,enum isl_format format,struct isl_swizzle swizzle,uint32_t level,uint32_t start_layer,uint32_t num_layers,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1)400 blorp_fast_clear(struct blorp_batch *batch,
401                  const struct blorp_surf *surf,
402                  enum isl_format format, struct isl_swizzle swizzle,
403                  uint32_t level, uint32_t start_layer, uint32_t num_layers,
404                  uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1)
405 {
406    struct blorp_params params;
407    blorp_params_init(&params);
408    params.num_layers = num_layers;
409    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
410 
411    params.x0 = x0;
412    params.y0 = y0;
413    params.x1 = x1;
414    params.y1 = y1;
415 
416    if (batch->blorp->isl_dev->info->ver >= 20) {
417       /* Bspec 57340 (r59562):
418        *
419        *   Overview of Fast Clear:
420        *      Pixel shader's color output is treated as Clear Value, value
421        *      should be a constant.
422        */
423       memcpy(&params.wm_inputs.clear_color, &surf->clear_color,
424              4 * sizeof(float));
425    } else {
426       /* BSpec: 2423 (r153658):
427        *
428        *   The pixel shader kernel requires no attributes, and delivers a
429        *   value of 0xFFFFFFFF in all channels of the render target write
430        *   message The replicated color message should be used.
431        */
432       memset(&params.wm_inputs.clear_color, 0xff, 4 * sizeof(float));
433    }
434 
435    params.fast_clear_op = ISL_AUX_OP_FAST_CLEAR;
436 
437    get_fast_clear_rect(batch->blorp->isl_dev, surf->surf, surf->aux_surf,
438                        &params.x0, &params.y0, &params.x1, &params.y1);
439 
440    if (!blorp_params_get_clear_kernel(batch, &params, true, false))
441       return;
442 
443    blorp_surface_info_init(batch, &params.dst, surf, level,
444                                start_layer, format, true);
445    params.num_samples = params.dst.surf.samples;
446 
447    assert(params.num_samples != 0);
448    if (params.num_samples == 1)
449       params.op = BLORP_OP_CCS_COLOR_CLEAR;
450    else
451       params.op = BLORP_OP_MCS_COLOR_CLEAR;
452 
453    /* If a swizzle was provided, we need to swizzle the clear color so that
454     * the hardware color format conversion will work properly.
455     */
456    params.dst.clear_color =
457       isl_color_value_swizzle_inv(params.dst.clear_color, swizzle);
458 
459    batch->blorp->exec(batch, &params);
460 }
461 
462 bool
blorp_clear_supports_blitter(struct blorp_context * blorp,const struct blorp_surf * surf,uint8_t color_write_disable,bool blend_enabled)463 blorp_clear_supports_blitter(struct blorp_context *blorp,
464                              const struct blorp_surf *surf,
465                              uint8_t color_write_disable,
466                              bool blend_enabled)
467 {
468    const struct intel_device_info *devinfo = blorp->isl_dev->info;
469 
470    if (devinfo->ver < 12)
471       return false;
472 
473    if (surf->surf->samples > 1)
474       return false;
475 
476    if (color_write_disable != 0 || blend_enabled)
477       return false;
478 
479    if (!blorp_blitter_supports_aux(devinfo, surf->aux_usage))
480       return false;
481 
482    const struct isl_format_layout *fmtl =
483       isl_format_get_layout(surf->surf->format);
484 
485    /* We can only support linear mode for 96bpp. */
486    if (fmtl->bpb == 96 && surf->surf->tiling != ISL_TILING_LINEAR)
487       return false;
488 
489    return true;
490 }
491 
492 bool
blorp_clear_supports_compute(struct blorp_context * blorp,uint8_t color_write_disable,bool blend_enabled,enum isl_aux_usage aux_usage)493 blorp_clear_supports_compute(struct blorp_context *blorp,
494                              uint8_t color_write_disable, bool blend_enabled,
495                              enum isl_aux_usage aux_usage)
496 {
497    if (blorp->isl_dev->info->ver < 7)
498       return false;
499    if (color_write_disable != 0 || blend_enabled)
500       return false;
501    if (blorp->isl_dev->info->ver >= 12) {
502       return aux_usage == ISL_AUX_USAGE_FCV_CCS_E ||
503              aux_usage == ISL_AUX_USAGE_CCS_E ||
504              aux_usage == ISL_AUX_USAGE_NONE;
505    } else {
506       return aux_usage == ISL_AUX_USAGE_NONE;
507    }
508 }
509 
510 void
blorp_clear(struct blorp_batch * batch,const struct blorp_surf * surf,enum isl_format format,struct isl_swizzle swizzle,uint32_t level,uint32_t start_layer,uint32_t num_layers,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,union isl_color_value clear_color,uint8_t color_write_disable)511 blorp_clear(struct blorp_batch *batch,
512             const struct blorp_surf *surf,
513             enum isl_format format, struct isl_swizzle swizzle,
514             uint32_t level, uint32_t start_layer, uint32_t num_layers,
515             uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
516             union isl_color_value clear_color,
517             uint8_t color_write_disable)
518 {
519    struct blorp_params params;
520    blorp_params_init(&params);
521    params.op = BLORP_OP_SLOW_COLOR_CLEAR;
522 
523    const bool compute = batch->flags & BLORP_BATCH_USE_COMPUTE;
524    if (compute) {
525       assert(blorp_clear_supports_compute(batch->blorp, color_write_disable,
526                                           false, surf->aux_usage));
527    } else if (batch->flags & BLORP_BATCH_USE_BLITTER) {
528       assert(blorp_clear_supports_blitter(batch->blorp, surf,
529                                           color_write_disable, false));
530    }
531 
532    /* Manually apply the clear destination swizzle.  This way swizzled clears
533     * will work for swizzles which we can't normally use for rendering and it
534     * also ensures that they work on pre-Haswell hardware which can't swizlle
535     * at all.
536     */
537    clear_color = isl_color_value_swizzle_inv(clear_color, swizzle);
538    swizzle = ISL_SWIZZLE_IDENTITY;
539 
540    bool clear_rgb_as_red = false;
541    if (format == ISL_FORMAT_R9G9B9E5_SHAREDEXP) {
542       clear_color.u32[0] = float3_to_rgb9e5(clear_color.f32);
543       format = ISL_FORMAT_R32_UINT;
544    } else if (format == ISL_FORMAT_L8_UNORM_SRGB) {
545       clear_color.f32[0] = util_format_linear_to_srgb_float(clear_color.f32[0]);
546       format = ISL_FORMAT_R8_UNORM;
547    } else if (format == ISL_FORMAT_A4B4G4R4_UNORM) {
548       /* Broadwell and earlier cannot render to this format so we need to work
549        * around it by swapping the colors around and using B4G4R4A4 instead.
550        */
551       const struct isl_swizzle ARGB = ISL_SWIZZLE(ALPHA, RED, GREEN, BLUE);
552       clear_color = isl_color_value_swizzle_inv(clear_color, ARGB);
553       format = ISL_FORMAT_B4G4R4A4_UNORM;
554    } else if (isl_format_get_layout(format)->bpb % 3 == 0) {
555       clear_rgb_as_red = true;
556       if (format == ISL_FORMAT_R8G8B8_UNORM_SRGB) {
557          clear_color.f32[0] = util_format_linear_to_srgb_float(clear_color.f32[0]);
558          clear_color.f32[1] = util_format_linear_to_srgb_float(clear_color.f32[1]);
559          clear_color.f32[2] = util_format_linear_to_srgb_float(clear_color.f32[2]);
560       }
561    }
562 
563    memcpy(&params.wm_inputs.clear_color, clear_color.f32, sizeof(float) * 4);
564 
565    bool use_simd16_replicated_data = true;
566 
567    /* From the SNB PRM (Vol4_Part1):
568     *
569     *     "Replicated data (Message Type = 111) is only supported when
570     *      accessing tiled memory.  Using this Message Type to access linear
571     *      (untiled) memory is UNDEFINED."
572     */
573    if (surf->surf->tiling == ISL_TILING_LINEAR)
574       use_simd16_replicated_data = false;
575 
576    /* Replicated clears don't work before gfx6 */
577    if (batch->blorp->isl_dev->info->ver < 6)
578       use_simd16_replicated_data = false;
579 
580    /* From the BSpec: 47719 (TGL/DG2/MTL) Replicate Data:
581     *
582     * "Replicate Data Render Target Write message should not be used
583     *  on all projects TGL+."
584     *
585     * Xe2 spec (57350) does not mention this restriction.
586     *
587     *  See 14017879046, 14017880152 for additional information.
588     */
589    if (batch->blorp->isl_dev->info->ver >= 12 &&
590        batch->blorp->isl_dev->info->ver < 20)
591       use_simd16_replicated_data = false;
592 
593    if (compute)
594       use_simd16_replicated_data = false;
595 
596    /* Constant color writes ignore everything in blend and color calculator
597     * state.  This is not documented.
598     */
599    params.color_write_disable = color_write_disable & BITFIELD_MASK(4);
600    if (color_write_disable)
601       use_simd16_replicated_data = false;
602 
603    if (!blorp_params_get_clear_kernel(batch, &params,
604                                       use_simd16_replicated_data,
605                                       clear_rgb_as_red))
606       return;
607 
608    if (!compute && !blorp_ensure_sf_program(batch, &params))
609       return;
610 
611    while (num_layers > 0) {
612       blorp_surface_info_init(batch, &params.dst, surf, level,
613                                   start_layer, format, true);
614       params.dst.view.swizzle = swizzle;
615 
616       params.x0 = x0;
617       params.y0 = y0;
618       params.x1 = x1;
619       params.y1 = y1;
620 
621       if (compute) {
622          params.wm_inputs.bounds_rect.x0 = x0;
623          params.wm_inputs.bounds_rect.y0 = y0;
624          params.wm_inputs.bounds_rect.x1 = x1;
625          params.wm_inputs.bounds_rect.y1 = y1;
626       }
627 
628       if (params.dst.tile_x_sa || params.dst.tile_y_sa) {
629          assert(params.dst.surf.samples == 1);
630          assert(num_layers == 1);
631          params.x0 += params.dst.tile_x_sa;
632          params.y0 += params.dst.tile_y_sa;
633          params.x1 += params.dst.tile_x_sa;
634          params.y1 += params.dst.tile_y_sa;
635       }
636 
637       /* The MinLOD and MinimumArrayElement don't work properly for cube maps.
638        * Convert them to a single slice on gfx4.
639        */
640       if (batch->blorp->isl_dev->info->ver == 4 &&
641           (params.dst.surf.usage & ISL_SURF_USAGE_CUBE_BIT)) {
642          blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, &params.dst);
643       }
644 
645       if (clear_rgb_as_red) {
646          surf_fake_rgb_with_red(batch->blorp->isl_dev, &params.dst);
647          params.x0 *= 3;
648          params.x1 *= 3;
649       }
650 
651       if (isl_format_is_compressed(params.dst.surf.format)) {
652          blorp_surf_convert_to_uncompressed(batch->blorp->isl_dev, &params.dst,
653                                             NULL, NULL, NULL, NULL);
654                                             //&dst_x, &dst_y, &dst_w, &dst_h);
655       }
656 
657       if (params.dst.tile_x_sa || params.dst.tile_y_sa) {
658          /* Either we're on gfx4 where there is no multisampling or the
659           * surface is compressed which also implies no multisampling.
660           * Therefore, sa == px and we don't need to do a conversion.
661           */
662          assert(params.dst.surf.samples == 1);
663          params.x0 += params.dst.tile_x_sa;
664          params.y0 += params.dst.tile_y_sa;
665          params.x1 += params.dst.tile_x_sa;
666          params.y1 += params.dst.tile_y_sa;
667       }
668 
669       params.num_samples = params.dst.surf.samples;
670 
671       /* We may be restricted on the number of layers we can bind at any one
672        * time.  In particular, Sandy Bridge has a maximum number of layers of
673        * 512 but a maximum 3D texture size is much larger.
674        */
675       params.num_layers = MIN2(params.dst.view.array_len, num_layers);
676 
677       const unsigned max_image_width = 16 * 1024;
678       if (params.dst.surf.logical_level0_px.width > max_image_width) {
679          /* Clearing an RGB image as red multiplies the surface width by 3
680           * so it may now be too wide for the hardware surface limits.  We
681           * have to break the clear up into pieces in order to clear wide
682           * images.
683           */
684          assert(clear_rgb_as_red);
685          assert(params.dst.surf.dim == ISL_SURF_DIM_2D);
686          assert(params.dst.surf.tiling == ISL_TILING_LINEAR);
687          assert(params.dst.surf.logical_level0_px.depth == 1);
688          assert(params.dst.surf.logical_level0_px.array_len == 1);
689          assert(params.dst.surf.levels == 1);
690          assert(params.dst.surf.samples == 1);
691          assert(params.dst.tile_x_sa == 0 || params.dst.tile_y_sa == 0);
692          assert(params.dst.aux_usage == ISL_AUX_USAGE_NONE);
693 
694          /* max_image_width rounded down to a multiple of 3 */
695          const unsigned max_fake_rgb_width = (max_image_width / 3) * 3;
696          const unsigned cpp =
697             isl_format_get_layout(params.dst.surf.format)->bpb / 8;
698 
699          params.dst.surf.logical_level0_px.width = max_fake_rgb_width;
700          params.dst.surf.phys_level0_sa.width = max_fake_rgb_width;
701 
702          uint32_t orig_x0 = params.x0, orig_x1 = params.x1;
703          uint64_t orig_offset = params.dst.addr.offset;
704          for (uint32_t x = orig_x0; x < orig_x1; x += max_fake_rgb_width) {
705             /* Offset to the surface.  It's easy because we're linear */
706             params.dst.addr.offset = orig_offset + x * cpp;
707 
708             params.x0 = 0;
709             params.x1 = MIN2(orig_x1 - x, max_image_width);
710 
711             batch->blorp->exec(batch, &params);
712          }
713       } else {
714          batch->blorp->exec(batch, &params);
715       }
716 
717       start_layer += params.num_layers;
718       num_layers -= params.num_layers;
719    }
720 }
721 
722 static bool
blorp_clear_stencil_as_rgba(struct blorp_batch * batch,const struct blorp_surf * surf,uint32_t level,uint32_t start_layer,uint32_t num_layers,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,uint8_t stencil_mask,uint8_t stencil_value)723 blorp_clear_stencil_as_rgba(struct blorp_batch *batch,
724                             const struct blorp_surf *surf,
725                             uint32_t level, uint32_t start_layer,
726                             uint32_t num_layers,
727                             uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
728                             uint8_t stencil_mask, uint8_t stencil_value)
729 {
730    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
731 
732    /* Stencil mask support would require piles of shader magic */
733    if (stencil_mask != 0xff)
734       return false;
735 
736    /* We only support separate W-tiled stencil for now */
737    if (surf->surf->format != ISL_FORMAT_R8_UINT ||
738        surf->surf->tiling != ISL_TILING_W)
739       return false;
740 
741    if (surf->surf->samples > 1) {
742       /* Adjust x0, y0, x1, and y1 to be in units of samples */
743       assert(surf->surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
744       struct isl_extent2d msaa_px_size_sa =
745          isl_get_interleaved_msaa_px_size_sa(surf->surf->samples);
746 
747       x0 *= msaa_px_size_sa.w;
748       y0 *= msaa_px_size_sa.h;
749       x1 *= msaa_px_size_sa.w;
750       y1 *= msaa_px_size_sa.h;
751    }
752 
753    /* W-tiles and Y-tiles have the same layout as far as cache lines are
754     * concerned: both are 8x8 cache lines laid out Y-major.  The difference is
755     * entirely in how the data is arranged within the cache line.  W-tiling
756     * is 8x8 pixels in a swizzled pattern while Y-tiling is 16B by 4 rows
757     * regardless of image format size.  As long as everything is aligned to 8,
758     * we can just treat the W-tiled image as Y-tiled, ignore the layout
759     * difference within a cache line, and blast out data.
760     */
761    if (x0 % 8 != 0 || y0 % 8 != 0 || x1 % 8 != 0 || y1 % 8 != 0)
762       return false;
763 
764    struct blorp_params params;
765    blorp_params_init(&params);
766    params.op = BLORP_OP_SLOW_DEPTH_CLEAR;
767 
768    if (!blorp_params_get_clear_kernel(batch, &params, true, false))
769       return false;
770 
771    memset(&params.wm_inputs.clear_color, stencil_value,
772           sizeof(params.wm_inputs.clear_color));
773 
774    /* The Sandy Bridge PRM Vol. 4 Pt. 2, section 2.11.2.1.1 has the
775     * following footnote to the format table:
776     *
777     *    128 BPE Formats cannot be Tiled Y when used as render targets
778     *
779     * We have to use RGBA16_UINT on SNB.
780     */
781    enum isl_format wide_format;
782    if (ISL_GFX_VER(batch->blorp->isl_dev) <= 6) {
783       wide_format = ISL_FORMAT_R16G16B16A16_UINT;
784 
785       /* For RGBA16_UINT, we need to mask the stencil value otherwise, we risk
786        * clamping giving us the wrong values
787        */
788       for (unsigned i = 0; i < 4; i++)
789          params.wm_inputs.clear_color[i] &= 0xffff;
790    } else {
791       wide_format = ISL_FORMAT_R32G32B32A32_UINT;
792    }
793 
794    for (uint32_t a = 0; a < num_layers; a++) {
795       uint32_t layer = start_layer + a;
796 
797       blorp_surface_info_init(batch, &params.dst, surf, level,
798                                   layer, ISL_FORMAT_UNSUPPORTED, true);
799 
800       if (surf->surf->samples > 1)
801          blorp_surf_fake_interleaved_msaa(batch->blorp->isl_dev, &params.dst);
802 
803       /* Make it Y-tiled */
804       blorp_surf_retile_w_to_y(batch->blorp->isl_dev, &params.dst);
805 
806       unsigned wide_Bpp =
807          isl_format_get_layout(wide_format)->bpb / 8;
808 
809       params.dst.view.format = params.dst.surf.format = wide_format;
810       assert(params.dst.surf.logical_level0_px.width % wide_Bpp == 0);
811       params.dst.surf.logical_level0_px.width /= wide_Bpp;
812       assert(params.dst.tile_x_sa % wide_Bpp == 0);
813       params.dst.tile_x_sa /= wide_Bpp;
814 
815       params.x0 = params.dst.tile_x_sa + x0 / (wide_Bpp / 2);
816       params.y0 = params.dst.tile_y_sa + y0 / 2;
817       params.x1 = params.dst.tile_x_sa + x1 / (wide_Bpp / 2);
818       params.y1 = params.dst.tile_y_sa + y1 / 2;
819 
820       batch->blorp->exec(batch, &params);
821    }
822 
823    return true;
824 }
825 
826 void
blorp_clear_depth_stencil(struct blorp_batch * batch,const struct blorp_surf * depth,const struct blorp_surf * stencil,uint32_t level,uint32_t start_layer,uint32_t num_layers,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,bool clear_depth,float depth_value,uint8_t stencil_mask,uint8_t stencil_value)827 blorp_clear_depth_stencil(struct blorp_batch *batch,
828                           const struct blorp_surf *depth,
829                           const struct blorp_surf *stencil,
830                           uint32_t level, uint32_t start_layer,
831                           uint32_t num_layers,
832                           uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
833                           bool clear_depth, float depth_value,
834                           uint8_t stencil_mask, uint8_t stencil_value)
835 {
836    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
837 
838    if (!clear_depth && blorp_clear_stencil_as_rgba(batch, stencil, level,
839                                                    start_layer, num_layers,
840                                                    x0, y0, x1, y1,
841                                                    stencil_mask,
842                                                    stencil_value))
843       return;
844 
845    struct blorp_params params;
846    blorp_params_init(&params);
847    params.op = BLORP_OP_SLOW_DEPTH_CLEAR;
848 
849    params.x0 = x0;
850    params.y0 = y0;
851    params.x1 = x1;
852    params.y1 = y1;
853 
854    if (ISL_GFX_VER(batch->blorp->isl_dev) == 6) {
855       /* For some reason, Sandy Bridge gets occlusion queries wrong if we
856        * don't have a shader.  In particular, it records samples even though
857        * we disable statistics in 3DSTATE_WM.  Give it the usual clear shader
858        * to work around the issue.
859        */
860       if (!blorp_params_get_clear_kernel(batch, &params, false, false))
861          return;
862    }
863 
864    while (num_layers > 0) {
865       params.num_layers = num_layers;
866 
867       if (stencil_mask) {
868          blorp_surface_info_init(batch, &params.stencil, stencil,
869                                      level, start_layer,
870                                      ISL_FORMAT_UNSUPPORTED, true);
871          params.stencil_mask = stencil_mask;
872          params.stencil_ref = stencil_value;
873 
874          params.dst.surf.samples = params.stencil.surf.samples;
875          params.dst.surf.logical_level0_px =
876             params.stencil.surf.logical_level0_px;
877          params.dst.view = params.stencil.view;
878 
879          params.num_samples = params.stencil.surf.samples;
880 
881          /* We may be restricted on the number of layers we can bind at any
882           * one time.  In particular, Sandy Bridge has a maximum number of
883           * layers of 512 but a maximum 3D texture size is much larger.
884           */
885          if (params.stencil.view.array_len < params.num_layers)
886             params.num_layers = params.stencil.view.array_len;
887       }
888 
889       if (clear_depth) {
890          blorp_surface_info_init(batch, &params.depth, depth,
891                                      level, start_layer,
892                                      ISL_FORMAT_UNSUPPORTED, true);
893          params.z = depth_value;
894          params.depth_format =
895             isl_format_get_depth_format(depth->surf->format, false);
896 
897          params.dst.surf.samples = params.depth.surf.samples;
898          params.dst.surf.logical_level0_px =
899             params.depth.surf.logical_level0_px;
900          params.dst.view = params.depth.view;
901 
902          params.num_samples = params.depth.surf.samples;
903 
904          /* We may be restricted on the number of layers we can bind at any
905           * one time.  In particular, Sandy Bridge has a maximum number of
906           * layers of 512 but a maximum 3D texture size is much larger.
907           */
908          if (params.depth.view.array_len < params.num_layers)
909             params.num_layers = params.depth.view.array_len;
910       }
911 
912       batch->blorp->exec(batch, &params);
913 
914       start_layer += params.num_layers;
915       num_layers -= params.num_layers;
916    }
917 }
918 
919 static bool
blorp_can_clear_full_surface(const struct blorp_surf * depth,const struct blorp_surf * stencil,uint32_t level,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,bool clear_depth,bool clear_stencil)920 blorp_can_clear_full_surface(const struct blorp_surf *depth,
921                              const struct blorp_surf *stencil,
922                              uint32_t level,
923                              uint32_t x0, uint32_t y0,
924                              uint32_t x1, uint32_t y1,
925                              bool clear_depth,
926                              bool clear_stencil)
927 {
928    uint32_t width = 0, height = 0;
929    if (clear_stencil) {
930       width = u_minify(stencil->surf->logical_level0_px.width, level);
931       height = u_minify(stencil->surf->logical_level0_px.height, level);
932    }
933 
934    if (clear_depth && !(width || height)) {
935       width = u_minify(depth->surf->logical_level0_px.width, level);
936       height = u_minify(depth->surf->logical_level0_px.height, level);
937    }
938 
939    return x0 == 0 && y0 == 0 && width == x1 && height == y1;
940 }
941 
942 void
blorp_hiz_clear_depth_stencil(struct blorp_batch * batch,const struct blorp_surf * depth,const struct blorp_surf * stencil,uint32_t level,uint32_t start_layer,uint32_t num_layers,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,bool clear_depth,float depth_value,bool clear_stencil,uint8_t stencil_value)943 blorp_hiz_clear_depth_stencil(struct blorp_batch *batch,
944                               const struct blorp_surf *depth,
945                               const struct blorp_surf *stencil,
946                               uint32_t level,
947                               uint32_t start_layer, uint32_t num_layers,
948                               uint32_t x0, uint32_t y0,
949                               uint32_t x1, uint32_t y1,
950                               bool clear_depth, float depth_value,
951                               bool clear_stencil, uint8_t stencil_value)
952 {
953    struct blorp_params params;
954    blorp_params_init(&params);
955    params.op = BLORP_OP_HIZ_CLEAR;
956 
957    /* This requires WM_HZ_OP which only exists on gfx8+ */
958    assert(ISL_GFX_VER(batch->blorp->isl_dev) >= 8);
959 
960    params.hiz_op = ISL_AUX_OP_FAST_CLEAR;
961    /* From BSpec: 3DSTATE_WM_HZ_OP_BODY >> Full Surface Depth and Stencil Clear
962     *
963     *    "Software must set this only when the APP requires the entire Depth
964     *    surface to be cleared."
965     */
966    params.full_surface_hiz_op =
967       blorp_can_clear_full_surface(depth, stencil, level, x0, y0, x1, y1,
968                                    clear_depth, clear_stencil);
969    params.num_layers = 1;
970 
971    params.x0 = x0;
972    params.y0 = y0;
973    params.x1 = x1;
974    params.y1 = y1;
975 
976    for (uint32_t l = 0; l < num_layers; l++) {
977       const uint32_t layer = start_layer + l;
978       if (clear_stencil) {
979          blorp_surface_info_init(batch, &params.stencil, stencil,
980                                      level, layer,
981                                      ISL_FORMAT_UNSUPPORTED, true);
982          params.stencil_mask = 0xff;
983          params.stencil_ref = stencil_value;
984          params.num_samples = params.stencil.surf.samples;
985       }
986 
987       if (clear_depth) {
988          /* If we're clearing depth, we must have HiZ */
989          assert(depth && isl_aux_usage_has_hiz(depth->aux_usage));
990 
991          blorp_surface_info_init(batch, &params.depth, depth,
992                                      level, layer,
993                                      ISL_FORMAT_UNSUPPORTED, true);
994          params.depth.clear_color.f32[0] = depth_value;
995          params.depth_format =
996             isl_format_get_depth_format(depth->surf->format, false);
997          params.num_samples = params.depth.surf.samples;
998       }
999 
1000       batch->blorp->exec(batch, &params);
1001    }
1002 }
1003 
1004 /* Given a depth stencil attachment, this function performs a fast depth clear
1005  * on a depth portion and a regular clear on the stencil portion. When
1006  * performing a fast depth clear on the depth portion, the HiZ buffer is simply
1007  * tagged as cleared so the depth clear value is not actually needed.
1008  */
1009 void
blorp_gfx8_hiz_clear_attachments(struct blorp_batch * batch,uint32_t num_samples,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,bool clear_depth,bool clear_stencil,uint8_t stencil_value)1010 blorp_gfx8_hiz_clear_attachments(struct blorp_batch *batch,
1011                                  uint32_t num_samples,
1012                                  uint32_t x0, uint32_t y0,
1013                                  uint32_t x1, uint32_t y1,
1014                                  bool clear_depth, bool clear_stencil,
1015                                  uint8_t stencil_value)
1016 {
1017    assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1018 
1019    struct blorp_params params;
1020    blorp_params_init(&params);
1021    params.op = BLORP_OP_HIZ_CLEAR;
1022    params.num_layers = 1;
1023    params.hiz_op = ISL_AUX_OP_FAST_CLEAR;
1024    params.x0 = x0;
1025    params.y0 = y0;
1026    params.x1 = x1;
1027    params.y1 = y1;
1028    params.num_samples = num_samples;
1029    params.depth.enabled = clear_depth;
1030    params.stencil.enabled = clear_stencil;
1031    params.stencil_ref = stencil_value;
1032    batch->blorp->exec(batch, &params);
1033 }
1034 
1035 /** Clear active color/depth/stencili attachments
1036  *
1037  * This function performs a clear operation on the currently bound
1038  * color/depth/stencil attachments.  It is assumed that any information passed
1039  * in here is valid, consistent, and in-bounds relative to the currently
1040  * attached depth/stencil.  The binding_table_offset parameter is the 32-bit
1041  * offset relative to surface state base address where pre-baked binding table
1042  * that we are to use lives.  If clear_color is false, binding_table_offset
1043  * must point to a binding table with one entry which is a valid null surface
1044  * that matches the currently bound depth and stencil.
1045  */
1046 void
blorp_clear_attachments(struct blorp_batch * batch,uint32_t binding_table_offset,enum isl_format depth_format,uint32_t num_samples,uint32_t start_layer,uint32_t num_layers,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,bool clear_color,union isl_color_value color_value,bool clear_depth,float depth_value,uint8_t stencil_mask,uint8_t stencil_value)1047 blorp_clear_attachments(struct blorp_batch *batch,
1048                         uint32_t binding_table_offset,
1049                         enum isl_format depth_format,
1050                         uint32_t num_samples,
1051                         uint32_t start_layer, uint32_t num_layers,
1052                         uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
1053                         bool clear_color, union isl_color_value color_value,
1054                         bool clear_depth, float depth_value,
1055                         uint8_t stencil_mask, uint8_t stencil_value)
1056 {
1057    struct blorp_params params;
1058    blorp_params_init(&params);
1059 
1060    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
1061    assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1062 
1063    params.x0 = x0;
1064    params.y0 = y0;
1065    params.x1 = x1;
1066    params.y1 = y1;
1067 
1068    params.use_pre_baked_binding_table = true;
1069    params.pre_baked_binding_table_offset = binding_table_offset;
1070 
1071    params.num_layers = num_layers;
1072    params.num_samples = num_samples;
1073 
1074    if (clear_color) {
1075       params.dst.enabled = true;
1076       params.op = BLORP_OP_SLOW_COLOR_CLEAR;
1077 
1078       memcpy(&params.wm_inputs.clear_color, color_value.f32, sizeof(float) * 4);
1079 
1080       /* Unfortunately, without knowing whether or not our destination surface
1081        * is tiled or not, we have to assume it may be linear.  This means no
1082        * SIMD16_REPDATA for us. :-(
1083        */
1084       if (!blorp_params_get_clear_kernel(batch, &params, false, false))
1085          return;
1086    }
1087 
1088    if (clear_depth) {
1089       params.depth.enabled = true;
1090       params.op = BLORP_OP_SLOW_DEPTH_CLEAR;
1091 
1092       params.z = depth_value;
1093       params.depth_format = isl_format_get_depth_format(depth_format, false);
1094    }
1095 
1096    if (stencil_mask) {
1097       params.stencil.enabled = true;
1098       params.op = BLORP_OP_SLOW_DEPTH_CLEAR;
1099 
1100       params.stencil_mask = stencil_mask;
1101       params.stencil_ref = stencil_value;
1102    }
1103 
1104    if (!blorp_params_get_layer_offset_vs(batch, &params))
1105       return;
1106 
1107    params.vs_inputs.base_layer = start_layer;
1108 
1109    batch->blorp->exec(batch, &params);
1110 }
1111 
1112 void
blorp_ccs_resolve(struct blorp_batch * batch,struct blorp_surf * surf,uint32_t level,uint32_t start_layer,uint32_t num_layers,enum isl_format format,enum isl_aux_op resolve_op)1113 blorp_ccs_resolve(struct blorp_batch *batch,
1114                   struct blorp_surf *surf, uint32_t level,
1115                   uint32_t start_layer, uint32_t num_layers,
1116                   enum isl_format format,
1117                   enum isl_aux_op resolve_op)
1118 {
1119    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
1120    struct blorp_params params;
1121 
1122    blorp_params_init(&params);
1123    switch(resolve_op) {
1124    case ISL_AUX_OP_AMBIGUATE:
1125       params.op = BLORP_OP_CCS_AMBIGUATE;
1126       break;
1127    case ISL_AUX_OP_FULL_RESOLVE:
1128       params.op = BLORP_OP_CCS_RESOLVE;
1129       break;
1130    case ISL_AUX_OP_PARTIAL_RESOLVE:
1131       params.op = BLORP_OP_CCS_PARTIAL_RESOLVE;
1132       break;
1133    default:
1134       assert(false);
1135    }
1136    blorp_surface_info_init(batch, &params.dst, surf,
1137                                level, start_layer, format, true);
1138 
1139    params.x0 = params.y0 = 0;
1140    params.x1 = u_minify(params.dst.surf.logical_level0_px.width, level);
1141    params.y1 = u_minify(params.dst.surf.logical_level0_px.height, level);
1142    if (ISL_GFX_VER(batch->blorp->isl_dev) >= 9) {
1143       /* From Bspec 2424, "Render Target Resolve":
1144        *
1145        *    The Resolve Rectangle size is same as Clear Rectangle size from
1146        *    SKL+.
1147        *
1148        * Note that this differs from Vol7 of the Sky Lake PRM, which only
1149        * specifies aligning by the scaledown factors.
1150        */
1151       get_fast_clear_rect(batch->blorp->isl_dev, surf->surf, surf->aux_surf,
1152                           &params.x0, &params.y0, &params.x1, &params.y1);
1153    } else {
1154       /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve":
1155        *
1156        *    A rectangle primitive must be scaled down by the following factors
1157        *    with respect to render target being resolved.
1158        *
1159        * The scaledown factors in the table that follows are related to the
1160        * block size of the CCS format. For IVB and HSW, we divide by two, for
1161        * BDW we multiply by 8 and 16.
1162        */
1163       const struct isl_format_layout *aux_fmtl =
1164          isl_format_get_layout(params.dst.aux_surf.format);
1165       assert(aux_fmtl->txc == ISL_TXC_CCS);
1166 
1167       unsigned x_scaledown, y_scaledown;
1168       if (ISL_GFX_VER(batch->blorp->isl_dev) >= 8) {
1169          x_scaledown = aux_fmtl->bw * 8;
1170          y_scaledown = aux_fmtl->bh * 16;
1171       } else {
1172          x_scaledown = aux_fmtl->bw / 2;
1173          y_scaledown = aux_fmtl->bh / 2;
1174       }
1175       params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown;
1176       params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown;
1177    }
1178 
1179    if (batch->blorp->isl_dev->info->ver >= 10) {
1180       assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE ||
1181              resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE ||
1182              resolve_op == ISL_AUX_OP_AMBIGUATE);
1183    } else if (batch->blorp->isl_dev->info->ver >= 9) {
1184       assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE ||
1185              resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE);
1186    } else {
1187       /* Broadwell and earlier do not have a partial resolve */
1188       assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE);
1189    }
1190    params.fast_clear_op = resolve_op;
1191    params.num_layers = num_layers;
1192 
1193    /* Note: there is no need to initialize push constants because it doesn't
1194     * matter what data gets dispatched to the render target.  However, we must
1195     * ensure that the fragment shader delivers the data using the "replicated
1196     * color" message.
1197     */
1198 
1199    if (!blorp_params_get_clear_kernel(batch, &params, true, false))
1200       return;
1201 
1202    batch->blorp->exec(batch, &params);
1203 
1204    if (batch->blorp->isl_dev->info->ver <= 8) {
1205       assert(surf->aux_usage == ISL_AUX_USAGE_CCS_D);
1206       assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE);
1207       /* ISL's state-machine of CCS_D describes full resolves as leaving the
1208        * aux buffer in the pass-through state. Hardware doesn't behave this
1209        * way on Broadwell however. On that platform, full resolves transition
1210        * the aux buffer to the resolved state. We assume that gfx7 behaves the
1211        * same. Use an ambiguate to match driver expectations.
1212        */
1213       for (int l = 0; l < num_layers; l++)
1214          blorp_ccs_ambiguate(batch, surf, level, start_layer + l);
1215    }
1216 }
1217 
1218 static nir_def *
blorp_nir_bit(nir_builder * b,nir_def * src,unsigned bit)1219 blorp_nir_bit(nir_builder *b, nir_def *src, unsigned bit)
1220 {
1221    return nir_iand_imm(b, nir_ushr_imm(b, src, bit), 1);
1222 }
1223 
1224 #pragma pack(push, 1)
1225 struct blorp_mcs_partial_resolve_key
1226 {
1227    struct blorp_base_key base;
1228    bool indirect_clear_color;
1229    bool int_format;
1230    uint32_t num_samples;
1231 };
1232 #pragma pack(pop)
1233 
1234 static bool
blorp_params_get_mcs_partial_resolve_kernel(struct blorp_batch * batch,struct blorp_params * params)1235 blorp_params_get_mcs_partial_resolve_kernel(struct blorp_batch *batch,
1236                                             struct blorp_params *params)
1237 {
1238    struct blorp_context *blorp = batch->blorp;
1239    const struct blorp_mcs_partial_resolve_key blorp_key = {
1240       .base = BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE),
1241       .indirect_clear_color = params->dst.clear_color_addr.buffer != NULL,
1242       .int_format = isl_format_has_int_channel(params->dst.view.format),
1243       .num_samples = params->num_samples,
1244    };
1245 
1246    if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
1247                             &params->wm_prog_kernel, &params->wm_prog_data))
1248       return true;
1249 
1250    void *mem_ctx = ralloc_context(NULL);
1251 
1252    nir_builder b;
1253    blorp_nir_init_shader(&b, blorp, mem_ctx, MESA_SHADER_FRAGMENT,
1254                          blorp_shader_type_to_name(blorp_key.base.shader_type));
1255 
1256    nir_variable *v_color =
1257       BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type());
1258 
1259    nir_variable *frag_color =
1260       nir_variable_create(b.shader, nir_var_shader_out,
1261                           glsl_vec4_type(), "gl_FragColor");
1262    frag_color->data.location = FRAG_RESULT_COLOR;
1263 
1264    /* Do an MCS fetch and check if it is equal to the magic clear value */
1265    nir_def *mcs =
1266       blorp_nir_txf_ms_mcs(&b, nir_f2i32(&b, nir_load_frag_coord(&b)),
1267                                nir_load_layer_id(&b));
1268    nir_def *is_clear =
1269       blorp_nir_mcs_is_clear_color(&b, mcs, blorp_key.num_samples);
1270 
1271    /* If we aren't the clear value, discard. */
1272    nir_discard_if(&b, nir_inot(&b, is_clear));
1273 
1274    nir_def *clear_color = nir_load_var(&b, v_color);
1275    if (blorp_key.indirect_clear_color && blorp->isl_dev->info->ver <= 8) {
1276       /* Gfx7-8 clear colors are stored as single 0/1 bits */
1277       clear_color = nir_vec4(&b, blorp_nir_bit(&b, clear_color, 31),
1278                                  blorp_nir_bit(&b, clear_color, 30),
1279                                  blorp_nir_bit(&b, clear_color, 29),
1280                                  blorp_nir_bit(&b, clear_color, 28));
1281 
1282       if (!blorp_key.int_format)
1283          clear_color = nir_i2f32(&b, clear_color);
1284    }
1285    nir_store_var(&b, frag_color, clear_color, 0xf);
1286 
1287    const bool multisample_fbo = true;
1288    const struct blorp_program p =
1289       blorp_compile_fs(blorp, mem_ctx, b.shader, multisample_fbo, false);
1290 
1291    bool result =
1292       blorp->upload_shader(batch, MESA_SHADER_FRAGMENT,
1293                            &blorp_key, sizeof(blorp_key),
1294                            p.kernel, p.kernel_size,
1295                            p.prog_data, p.prog_data_size,
1296                            &params->wm_prog_kernel, &params->wm_prog_data);
1297 
1298    ralloc_free(mem_ctx);
1299    return result;
1300 }
1301 
1302 void
blorp_mcs_partial_resolve(struct blorp_batch * batch,struct blorp_surf * surf,enum isl_format format,uint32_t start_layer,uint32_t num_layers)1303 blorp_mcs_partial_resolve(struct blorp_batch *batch,
1304                           struct blorp_surf *surf,
1305                           enum isl_format format,
1306                           uint32_t start_layer, uint32_t num_layers)
1307 {
1308    struct blorp_params params;
1309    blorp_params_init(&params);
1310    params.op = BLORP_OP_MCS_PARTIAL_RESOLVE;
1311 
1312    assert(batch->blorp->isl_dev->info->ver >= 7);
1313 
1314    params.x0 = 0;
1315    params.y0 = 0;
1316    params.x1 = surf->surf->logical_level0_px.width;
1317    params.y1 = surf->surf->logical_level0_px.height;
1318 
1319    blorp_surface_info_init(batch, &params.src, surf, 0,
1320                                start_layer, format, false);
1321    blorp_surface_info_init(batch, &params.dst, surf, 0,
1322                                start_layer, format, true);
1323 
1324    params.num_samples = params.dst.surf.samples;
1325    params.num_layers = num_layers;
1326    params.dst_clear_color_as_input = surf->clear_color_addr.buffer != NULL;
1327 
1328    memcpy(&params.wm_inputs.clear_color,
1329           surf->clear_color.f32, sizeof(float) * 4);
1330 
1331    if (!blorp_params_get_mcs_partial_resolve_kernel(batch, &params))
1332       return;
1333 
1334    batch->blorp->exec(batch, &params);
1335 }
1336 
1337 static uint64_t
get_mcs_ambiguate_pixel(int sample_count)1338 get_mcs_ambiguate_pixel(int sample_count)
1339 {
1340    /* See the Broadwell PRM, Volume 5 "Memory Views", Section "Compressed
1341     * Multisample Surfaces".
1342     */
1343    assert(sample_count >= 2);
1344    assert(sample_count <= 16);
1345 
1346    /* Each MCS element contains an array of sample slice (SS) elements. The
1347     * size of this array matches the sample count.
1348     */
1349    const int num_ss_entries = sample_count;
1350 
1351    /* The width of each SS entry is just large enough to index every slice. */
1352    const int ss_entry_size_b = util_logbase2(num_ss_entries);
1353 
1354    /* The encoding for "ambiguated" has each sample slice value storing its
1355     * index (e.g., SS[0] = 0, SS[1] = 1, etc.). The values are stored in
1356     * little endian order. The unused bits are defined as either Reserved or
1357     * Reserved (MBZ). We choose to interpret both as MBZ.
1358     */
1359    uint64_t ambiguate_pixel = 0;
1360    for (uint64_t entry = 0; entry < num_ss_entries; entry++)
1361       ambiguate_pixel |= entry << (entry * ss_entry_size_b);
1362 
1363    return ambiguate_pixel;
1364 }
1365 
1366 /** Clear an MCS to the "uncompressed" state
1367  *
1368  * This pass is the MCS equivalent of a "HiZ resolve".  It sets the MCS values
1369  * for a given layer of a surface to a sample-count dependent value which is
1370  * the "uncompressed" state which tells the sampler to go look at the main
1371  * surface.
1372  */
1373 void
blorp_mcs_ambiguate(struct blorp_batch * batch,struct blorp_surf * surf,uint32_t start_layer,uint32_t num_layers)1374 blorp_mcs_ambiguate(struct blorp_batch *batch,
1375                     struct blorp_surf *surf,
1376                     uint32_t start_layer, uint32_t num_layers)
1377 {
1378    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
1379 
1380    struct blorp_params params;
1381    blorp_params_init(&params);
1382    params.op = BLORP_OP_MCS_AMBIGUATE;
1383 
1384    assert(ISL_GFX_VER(batch->blorp->isl_dev) >= 7);
1385 
1386    enum isl_format renderable_format;
1387    switch (isl_format_get_layout(surf->aux_surf->format)->bpb) {
1388    case 8:  renderable_format = ISL_FORMAT_R8_UINT;     break;
1389    case 32: renderable_format = ISL_FORMAT_R32_UINT;    break;
1390    case 64: renderable_format = ISL_FORMAT_R32G32_UINT; break;
1391    default: unreachable("Unexpected MCS format size for ambiguate");
1392    }
1393 
1394    /* From Bspec 57340 (r59562):
1395     *
1396     *   To the calculated MCS size we add 4kb page to be used as clear value
1397     *   storage.
1398     *
1399     * and
1400     *
1401     *   When allocating memory, MCS buffer size is extended by 4KB over its
1402     *   original calculated size. First 4KB page of the MCS is reserved for
1403     *   internal HW usage.
1404     *
1405     * We shift aux buffer's start address by 4KB, accordingly.
1406     */
1407    struct blorp_address aux_addr = surf->aux_addr;
1408    if (ISL_GFX_VER(batch->blorp->isl_dev) >= 20)
1409       aux_addr.offset += 4096;
1410 
1411    params.dst = (struct blorp_surface_info) {
1412       .enabled = true,
1413       .surf = *surf->aux_surf,
1414       .addr = aux_addr,
1415       .view = {
1416          .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
1417          .format = renderable_format,
1418          .base_level = 0,
1419          .base_array_layer = start_layer,
1420          .levels = 1,
1421          .array_len = num_layers,
1422          .swizzle = ISL_SWIZZLE_IDENTITY,
1423       },
1424    };
1425 
1426    params.x0 = 0;
1427    params.y0 = 0;
1428    params.x1 = params.dst.surf.logical_level0_px.width;
1429    params.y1 = params.dst.surf.logical_level0_px.height;
1430    params.num_layers = params.dst.view.array_len;
1431 
1432    const uint64_t pixel = get_mcs_ambiguate_pixel(surf->surf->samples);
1433    params.wm_inputs.clear_color[0] = pixel & 0xFFFFFFFF;
1434    params.wm_inputs.clear_color[1] = pixel >> 32;
1435 
1436    if (!blorp_params_get_clear_kernel(batch, &params, true, false))
1437       return;
1438 
1439    batch->blorp->exec(batch, &params);
1440 }
1441 
1442 /** Clear a CCS to the "uncompressed" state
1443  *
1444  * This pass is the CCS equivalent of a "HiZ resolve".  It sets the CCS values
1445  * for a given layer/level of a surface to 0x0 which is the "uncompressed"
1446  * state which tells the sampler to go look at the main surface.
1447  */
1448 void
blorp_ccs_ambiguate(struct blorp_batch * batch,struct blorp_surf * surf,uint32_t level,uint32_t layer)1449 blorp_ccs_ambiguate(struct blorp_batch *batch,
1450                     struct blorp_surf *surf,
1451                     uint32_t level, uint32_t layer)
1452 {
1453    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
1454 
1455    if (ISL_GFX_VER(batch->blorp->isl_dev) >= 10) {
1456       /* On gfx10 and above, we have a hardware resolve op for this */
1457       return blorp_ccs_resolve(batch, surf, level, layer, 1,
1458                                surf->surf->format, ISL_AUX_OP_AMBIGUATE);
1459    }
1460 
1461    struct blorp_params params;
1462    blorp_params_init(&params);
1463    params.op = BLORP_OP_CCS_AMBIGUATE;
1464 
1465    assert(ISL_GFX_VER(batch->blorp->isl_dev) >= 7);
1466 
1467    const struct isl_format_layout *aux_fmtl =
1468       isl_format_get_layout(surf->aux_surf->format);
1469    assert(aux_fmtl->txc == ISL_TXC_CCS);
1470 
1471    params.dst = (struct blorp_surface_info) {
1472       .enabled = true,
1473       .addr = surf->aux_addr,
1474       .view = {
1475          .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
1476          .format = ISL_FORMAT_R32G32B32A32_UINT,
1477          .base_level = 0,
1478          .base_array_layer = 0,
1479          .levels = 1,
1480          .array_len = 1,
1481          .swizzle = ISL_SWIZZLE_IDENTITY,
1482       },
1483    };
1484 
1485    uint32_t z = 0;
1486    if (surf->surf->dim == ISL_SURF_DIM_3D) {
1487       z = layer;
1488       layer = 0;
1489    }
1490 
1491    uint64_t offset_B;
1492    uint32_t x_offset_el, y_offset_el;
1493    isl_surf_get_image_offset_B_tile_el(surf->aux_surf, level, layer, z,
1494                                        &offset_B, &x_offset_el, &y_offset_el);
1495    params.dst.addr.offset += offset_B;
1496 
1497    const uint32_t width_px =
1498       u_minify(surf->aux_surf->logical_level0_px.width, level);
1499    const uint32_t height_px =
1500       u_minify(surf->aux_surf->logical_level0_px.height, level);
1501    const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
1502    const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
1503 
1504    struct isl_tile_info ccs_tile_info;
1505    isl_surf_get_tile_info(surf->aux_surf, &ccs_tile_info);
1506 
1507    /* We're going to map it as a regular RGBA32_UINT surface.  We need to
1508     * downscale a good deal.  We start by computing the area on the CCS to
1509     * clear in units of Y-tiled cache lines.
1510     */
1511    uint32_t x_offset_cl, y_offset_cl, width_cl, height_cl;
1512    if (ISL_GFX_VER(batch->blorp->isl_dev) >= 8) {
1513       /* From the Sky Lake PRM Vol. 12 in the section on planes:
1514        *
1515        *    "The Color Control Surface (CCS) contains the compression status
1516        *    of the cache-line pairs. The compression state of the cache-line
1517        *    pair is specified by 2 bits in the CCS.  Each CCS cache-line
1518        *    represents an area on the main surface of 16x16 sets of 128 byte
1519        *    Y-tiled cache-line-pairs. CCS is always Y tiled."
1520        *
1521        * Each 2-bit surface element in the CCS corresponds to a single
1522        * cache-line pair in the main surface.  This means that 16x16 el block
1523        * in the CCS maps to a Y-tiled cache line.  Fortunately, CCS layouts
1524        * are calculated with a very large alignment so we can round up to a
1525        * whole cache line without worrying about overdraw.
1526        */
1527 
1528       /* On Broadwell and above, a CCS tile is the same as a Y tile when
1529        * viewed at the cache-line granularity.  Fortunately, the horizontal
1530        * and vertical alignment requirements of the CCS are such that we can
1531        * align to an entire cache line without worrying about crossing over
1532        * from one LOD to another.
1533        */
1534       const uint32_t x_el_per_cl = ccs_tile_info.logical_extent_el.w / 8;
1535       const uint32_t y_el_per_cl = ccs_tile_info.logical_extent_el.h / 8;
1536       assert(surf->aux_surf->image_alignment_el.w % x_el_per_cl == 0);
1537       assert(surf->aux_surf->image_alignment_el.h % y_el_per_cl == 0);
1538 
1539       assert(x_offset_el % x_el_per_cl == 0);
1540       assert(y_offset_el % y_el_per_cl == 0);
1541       x_offset_cl = x_offset_el / x_el_per_cl;
1542       y_offset_cl = y_offset_el / y_el_per_cl;
1543       width_cl = DIV_ROUND_UP(width_el, x_el_per_cl);
1544       height_cl = DIV_ROUND_UP(height_el, y_el_per_cl);
1545    } else {
1546       /* On gfx7, the CCS tiling is not so nice.  However, there we are
1547        * guaranteed that we only have a single level and slice so we don't
1548        * have to worry about it and can just align to a whole tile.
1549        */
1550       assert(surf->aux_surf->logical_level0_px.depth == 1);
1551       assert(surf->aux_surf->logical_level0_px.array_len == 1);
1552       assert(x_offset_el == 0 && y_offset_el == 0);
1553       const uint32_t width_tl =
1554          DIV_ROUND_UP(width_el, ccs_tile_info.logical_extent_el.w);
1555       const uint32_t height_tl =
1556          DIV_ROUND_UP(height_el, ccs_tile_info.logical_extent_el.h);
1557       x_offset_cl = 0;
1558       y_offset_cl = 0;
1559       width_cl = width_tl * 8;
1560       height_cl = height_tl * 8;
1561    }
1562 
1563    /* We're going to use a RGBA32 format so as to write data as quickly as
1564     * possible.  A y-tiled cache line will then be 1x4 px.
1565     */
1566    const uint32_t x_offset_rgba_px = x_offset_cl;
1567    const uint32_t y_offset_rgba_px = y_offset_cl * 4;
1568    const uint32_t width_rgba_px = width_cl;
1569    const uint32_t height_rgba_px = height_cl * 4;
1570 
1571    ASSERTED bool ok =
1572       isl_surf_init(batch->blorp->isl_dev, &params.dst.surf,
1573                     .dim = ISL_SURF_DIM_2D,
1574                     .format = ISL_FORMAT_R32G32B32A32_UINT,
1575                     .width = width_rgba_px + x_offset_rgba_px,
1576                     .height = height_rgba_px + y_offset_rgba_px,
1577                     .depth = 1,
1578                     .levels = 1,
1579                     .array_len = 1,
1580                     .samples = 1,
1581                     .row_pitch_B = surf->aux_surf->row_pitch_B,
1582                     .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
1583                     .tiling_flags = ISL_TILING_Y0_BIT);
1584    assert(ok);
1585 
1586    params.x0 = x_offset_rgba_px;
1587    params.y0 = y_offset_rgba_px;
1588    params.x1 = x_offset_rgba_px + width_rgba_px;
1589    params.y1 = y_offset_rgba_px + height_rgba_px;
1590 
1591    /* A CCS value of 0 means "uncompressed." */
1592    memset(&params.wm_inputs.clear_color, 0,
1593           sizeof(params.wm_inputs.clear_color));
1594 
1595    if (!blorp_params_get_clear_kernel(batch, &params, true, false))
1596       return;
1597 
1598    batch->blorp->exec(batch, &params);
1599 }
1600