xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/meta/radv_meta_clear.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "nir/nir_builder.h"
8 #include "radv_debug.h"
9 #include "radv_entrypoints.h"
10 #include "radv_formats.h"
11 #include "radv_meta.h"
12 
13 #include "util/format_rgb9e5.h"
14 #include "vk_common_entrypoints.h"
15 #include "vk_format.h"
16 #include "vk_shader_module.h"
17 
18 #include "ac_formats.h"
19 
20 enum { DEPTH_CLEAR_SLOW, DEPTH_CLEAR_FAST };
21 
22 static void
build_color_shaders(struct radv_device * dev,struct nir_shader ** out_vs,struct nir_shader ** out_fs,uint32_t frag_output)23 build_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs,
24                     uint32_t frag_output)
25 {
26    nir_builder vs_b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_clear_color_vs");
27    nir_builder fs_b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output);
28 
29    const struct glsl_type *position_type = glsl_vec4_type();
30    const struct glsl_type *color_type = glsl_vec4_type();
31 
32    nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
33    vs_out_pos->data.location = VARYING_SLOT_POS;
34 
35    nir_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
36 
37    nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
38    fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
39 
40    nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
41 
42    nir_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL);
43    nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
44 
45    const struct glsl_type *layer_type = glsl_int_type();
46    nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
47    vs_out_layer->data.location = VARYING_SLOT_LAYER;
48    vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
49    nir_def *inst_id = nir_load_instance_id(&vs_b);
50    nir_def *base_instance = nir_load_base_instance(&vs_b);
51 
52    nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
53    nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
54 
55    *out_vs = vs_b.shader;
56    *out_fs = fs_b.shader;
57 }
58 
59 static VkResult
create_pipeline(struct radv_device * device,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineRenderingCreateInfo * dyn_state,const VkPipelineLayout layout,const struct radv_graphics_pipeline_create_info * extra,const VkAllocationCallbacks * alloc,VkPipeline * pipeline)60 create_pipeline(struct radv_device *device, uint32_t samples, struct nir_shader *vs_nir, struct nir_shader *fs_nir,
61                 const VkPipelineVertexInputStateCreateInfo *vi_state,
62                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
63                 const VkPipelineColorBlendStateCreateInfo *cb_state, const VkPipelineRenderingCreateInfo *dyn_state,
64                 const VkPipelineLayout layout, const struct radv_graphics_pipeline_create_info *extra,
65                 const VkAllocationCallbacks *alloc, VkPipeline *pipeline)
66 {
67    VkDevice device_h = radv_device_to_handle(device);
68    VkResult result;
69 
70    result = radv_graphics_pipeline_create(device_h, device->meta_state.cache,
71                                           &(VkGraphicsPipelineCreateInfo){
72                                              .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
73                                              .pNext = dyn_state,
74                                              .stageCount = fs_nir ? 2 : 1,
75                                              .pStages =
76                                                 (VkPipelineShaderStageCreateInfo[]){
77                                                    {
78                                                       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
79                                                       .stage = VK_SHADER_STAGE_VERTEX_BIT,
80                                                       .module = vk_shader_module_handle_from_nir(vs_nir),
81                                                       .pName = "main",
82                                                    },
83                                                    {
84                                                       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
85                                                       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
86                                                       .module = vk_shader_module_handle_from_nir(fs_nir),
87                                                       .pName = "main",
88                                                    },
89                                                 },
90                                              .pVertexInputState = vi_state,
91                                              .pInputAssemblyState =
92                                                 &(VkPipelineInputAssemblyStateCreateInfo){
93                                                    .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
94                                                    .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
95                                                    .primitiveRestartEnable = false,
96                                                 },
97                                              .pViewportState =
98                                                 &(VkPipelineViewportStateCreateInfo){
99                                                    .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
100                                                    .viewportCount = 1,
101                                                    .scissorCount = 1,
102                                                 },
103                                              .pRasterizationState =
104                                                 &(VkPipelineRasterizationStateCreateInfo){
105                                                    .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
106                                                    .rasterizerDiscardEnable = false,
107                                                    .polygonMode = VK_POLYGON_MODE_FILL,
108                                                    .cullMode = VK_CULL_MODE_NONE,
109                                                    .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
110                                                    .depthBiasEnable = false,
111                                                    .depthBiasConstantFactor = 0.0f,
112                                                    .depthBiasClamp = 0.0f,
113                                                    .depthBiasSlopeFactor = 0.0f,
114                                                    .lineWidth = 1.0f,
115                                                 },
116                                              .pMultisampleState =
117                                                 &(VkPipelineMultisampleStateCreateInfo){
118                                                    .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
119                                                    .rasterizationSamples = samples,
120                                                    .sampleShadingEnable = false,
121                                                    .pSampleMask = NULL,
122                                                    .alphaToCoverageEnable = false,
123                                                    .alphaToOneEnable = false,
124                                                 },
125                                              .pDepthStencilState = ds_state,
126                                              .pColorBlendState = cb_state,
127                                              .pDynamicState =
128                                                 &(VkPipelineDynamicStateCreateInfo){
129                                                    .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
130                                                    .dynamicStateCount = 3,
131                                                    .pDynamicStates =
132                                                       (VkDynamicState[]){
133                                                          VK_DYNAMIC_STATE_VIEWPORT,
134                                                          VK_DYNAMIC_STATE_SCISSOR,
135                                                          VK_DYNAMIC_STATE_STENCIL_REFERENCE,
136                                                       },
137                                                 },
138                                              .layout = layout,
139                                              .flags = 0,
140                                              .renderPass = VK_NULL_HANDLE,
141                                              .subpass = 0,
142                                           },
143                                           extra, alloc, pipeline);
144 
145    ralloc_free(vs_nir);
146    ralloc_free(fs_nir);
147 
148    return result;
149 }
150 
151 static VkResult
create_color_pipeline(struct radv_device * device,uint32_t samples,uint32_t frag_output,VkFormat format,VkPipeline * pipeline)152 create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output, VkFormat format,
153                       VkPipeline *pipeline)
154 {
155    struct nir_shader *vs_nir;
156    struct nir_shader *fs_nir;
157    VkResult result;
158 
159    if (!device->meta_state.clear_color_p_layout) {
160       const VkPushConstantRange pc_range_color = {
161          .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
162          .size = 16,
163       };
164 
165       result =
166          radv_meta_create_pipeline_layout(device, NULL, 1, &pc_range_color, &device->meta_state.clear_color_p_layout);
167       if (result != VK_SUCCESS)
168          return result;
169    }
170 
171    build_color_shaders(device, &vs_nir, &fs_nir, frag_output);
172 
173    const VkPipelineVertexInputStateCreateInfo vi_state = {
174       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
175       .vertexBindingDescriptionCount = 0,
176       .vertexAttributeDescriptionCount = 0,
177    };
178 
179    const VkPipelineDepthStencilStateCreateInfo ds_state = {
180       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
181       .depthTestEnable = false,
182       .depthWriteEnable = false,
183       .depthBoundsTestEnable = false,
184       .stencilTestEnable = false,
185       .minDepthBounds = 0.0f,
186       .maxDepthBounds = 1.0f,
187    };
188 
189    VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0};
190    blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){
191       .blendEnable = false,
192       .colorWriteMask =
193          VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT,
194    };
195 
196    const VkPipelineColorBlendStateCreateInfo cb_state = {
197       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
198       .logicOpEnable = false,
199       .attachmentCount = MAX_RTS,
200       .pAttachments = blend_attachment_state,
201       .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}};
202 
203    VkFormat att_formats[MAX_RTS] = {0};
204    att_formats[frag_output] = format;
205 
206    const VkPipelineRenderingCreateInfo rendering_create_info = {
207       .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
208       .colorAttachmentCount = MAX_RTS,
209       .pColorAttachmentFormats = att_formats,
210    };
211 
212    struct radv_graphics_pipeline_create_info extra = {
213       .use_rectlist = true,
214    };
215    result = create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &rendering_create_info,
216                             device->meta_state.clear_color_p_layout, &extra, &device->meta_state.alloc, pipeline);
217 
218    return result;
219 }
220 
221 static VkResult
get_color_pipeline(struct radv_device * device,uint32_t samples,uint32_t frag_output,VkFormat format,VkPipeline * pipeline_out)222 get_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output, VkFormat format,
223                    VkPipeline *pipeline_out)
224 {
225    struct radv_meta_state *state = &device->meta_state;
226    const uint32_t fs_key = radv_format_meta_fs_key(device, format);
227    const uint32_t samples_log2 = ffs(samples) - 1;
228    VkResult result = VK_SUCCESS;
229    VkPipeline *pipeline;
230 
231    mtx_lock(&state->mtx);
232    pipeline = &state->color_clear[samples_log2][frag_output].color_pipelines[fs_key];
233    if (!*pipeline) {
234       result = create_color_pipeline(device, samples, frag_output, radv_fs_key_format_exemplars[fs_key], pipeline);
235       if (result != VK_SUCCESS)
236          goto fail;
237    }
238 
239    *pipeline_out = *pipeline;
240 
241 fail:
242    mtx_unlock(&state->mtx);
243    return result;
244 }
245 
246 static void
finish_meta_clear_htile_mask_state(struct radv_device * device)247 finish_meta_clear_htile_mask_state(struct radv_device *device)
248 {
249    struct radv_meta_state *state = &device->meta_state;
250 
251    radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline, &state->alloc);
252    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout, &state->alloc);
253    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
254                                                         state->clear_htile_mask_ds_layout, &state->alloc);
255 }
256 
257 static void
finish_meta_clear_dcc_comp_to_single_state(struct radv_device * device)258 finish_meta_clear_dcc_comp_to_single_state(struct radv_device *device)
259 {
260    struct radv_meta_state *state = &device->meta_state;
261 
262    for (uint32_t i = 0; i < 2; i++) {
263       radv_DestroyPipeline(radv_device_to_handle(device), state->clear_dcc_comp_to_single_pipeline[i], &state->alloc);
264    }
265    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_dcc_comp_to_single_p_layout, &state->alloc);
266    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
267                                                         state->clear_dcc_comp_to_single_ds_layout, &state->alloc);
268 }
269 
270 void
radv_device_finish_meta_clear_state(struct radv_device * device)271 radv_device_finish_meta_clear_state(struct radv_device *device)
272 {
273    struct radv_meta_state *state = &device->meta_state;
274 
275    for (uint32_t i = 0; i < ARRAY_SIZE(state->color_clear); ++i) {
276       for (uint32_t j = 0; j < ARRAY_SIZE(state->color_clear[0]); ++j) {
277          for (uint32_t k = 0; k < ARRAY_SIZE(state->color_clear[i][j].color_pipelines); ++k) {
278             radv_DestroyPipeline(radv_device_to_handle(device), state->color_clear[i][j].color_pipelines[k],
279                                  &state->alloc);
280          }
281       }
282    }
283    for (uint32_t i = 0; i < ARRAY_SIZE(state->ds_clear); ++i) {
284       for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
285          radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depth_only_pipeline[j], &state->alloc);
286          radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].stencil_only_pipeline[j],
287                               &state->alloc);
288          radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depthstencil_pipeline[j],
289                               &state->alloc);
290 
291          radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depth_only_unrestricted_pipeline[j],
292                               &state->alloc);
293          radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].stencil_only_unrestricted_pipeline[j],
294                               &state->alloc);
295          radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depthstencil_unrestricted_pipeline[j],
296                               &state->alloc);
297       }
298    }
299    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout, &state->alloc);
300    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout, &state->alloc);
301    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_unrestricted_p_layout, &state->alloc);
302 
303    finish_meta_clear_htile_mask_state(device);
304    finish_meta_clear_dcc_comp_to_single_state(device);
305 }
306 
307 static void
emit_color_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,uint32_t view_mask)308 emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, const VkClearRect *clear_rect,
309                  uint32_t view_mask)
310 {
311    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
312    const struct radv_rendering_state *render = &cmd_buffer->state.render;
313    uint32_t samples;
314    VkFormat format;
315    VkClearColorValue clear_value = clear_att->clearValue.color;
316    VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
317    VkPipeline pipeline;
318    VkResult result;
319 
320    assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
321    assert(clear_att->colorAttachment < render->color_att_count);
322    const struct radv_attachment *color_att = &render->color_att[clear_att->colorAttachment];
323 
324    /* When a framebuffer is bound to the current command buffer, get the
325     * number of samples from it. Otherwise, get the number of samples from
326     * the render pass because it's likely a secondary command buffer.
327     */
328    if (color_att->iview) {
329       samples = color_att->iview->image->vk.samples;
330       format = color_att->iview->vk.format;
331    } else {
332       samples = render->max_samples;
333       format = color_att->format;
334    }
335    assert(format != VK_FORMAT_UNDEFINED);
336 
337    assert(util_is_power_of_two_nonzero(samples));
338 
339    result = get_color_pipeline(device, samples, clear_att->colorAttachment, format, &pipeline);
340    if (result != VK_SUCCESS) {
341       vk_command_buffer_set_error(&cmd_buffer->vk, result);
342       return;
343    }
344 
345    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_color_p_layout,
346                               VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, &clear_value);
347 
348    radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
349 
350    radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
351                        &(VkViewport){.x = clear_rect->rect.offset.x,
352                                      .y = clear_rect->rect.offset.y,
353                                      .width = clear_rect->rect.extent.width,
354                                      .height = clear_rect->rect.extent.height,
355                                      .minDepth = 0.0f,
356                                      .maxDepth = 1.0f});
357 
358    radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
359 
360    if (view_mask) {
361       u_foreach_bit (i, view_mask)
362          radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
363    } else {
364       radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
365    }
366 }
367 
368 static void
build_depthstencil_shader(struct radv_device * dev,struct nir_shader ** out_vs,struct nir_shader ** out_fs,bool unrestricted)369 build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs,
370                           bool unrestricted)
371 {
372    nir_builder vs_b = radv_meta_init_shader(
373       dev, MESA_SHADER_VERTEX, unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs");
374    nir_builder fs_b =
375       radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT,
376                             unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs");
377 
378    const struct glsl_type *position_out_type = glsl_vec4_type();
379 
380    nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
381    vs_out_pos->data.location = VARYING_SLOT_POS;
382 
383    nir_def *z;
384    if (unrestricted) {
385       nir_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
386 
387       nir_variable *fs_out_depth = nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
388       fs_out_depth->data.location = FRAG_RESULT_DEPTH;
389       nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);
390 
391       z = nir_imm_float(&vs_b, 0.0);
392    } else {
393       z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4);
394    }
395 
396    nir_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL);
397    nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
398 
399    const struct glsl_type *layer_type = glsl_int_type();
400    nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
401    vs_out_layer->data.location = VARYING_SLOT_LAYER;
402    vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
403    nir_def *inst_id = nir_load_instance_id(&vs_b);
404    nir_def *base_instance = nir_load_base_instance(&vs_b);
405 
406    nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
407    nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
408 
409    *out_vs = vs_b.shader;
410    *out_fs = fs_b.shader;
411 }
412 
413 static VkResult
create_depthstencil_pipeline(struct radv_device * device,VkImageAspectFlags aspects,uint32_t samples,int index,bool unrestricted,VkPipeline * pipeline)414 create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects, uint32_t samples, int index,
415                              bool unrestricted, VkPipeline *pipeline)
416 {
417    struct nir_shader *vs_nir, *fs_nir;
418    VkResult result;
419 
420    if (!device->meta_state.clear_depth_p_layout) {
421       const VkPushConstantRange pc_range_depth = {
422          .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
423          .size = 4,
424       };
425 
426       result =
427          radv_meta_create_pipeline_layout(device, NULL, 1, &pc_range_depth, &device->meta_state.clear_depth_p_layout);
428       if (result != VK_SUCCESS)
429          return result;
430    }
431 
432    if (!device->meta_state.clear_depth_unrestricted_p_layout) {
433       const VkPushConstantRange pc_range_depth_unrestricted = {
434          .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
435          .size = 4,
436       };
437 
438       result = radv_meta_create_pipeline_layout(device, NULL, 1, &pc_range_depth_unrestricted,
439                                                 &device->meta_state.clear_depth_unrestricted_p_layout);
440       if (result != VK_SUCCESS)
441          return result;
442    }
443 
444    build_depthstencil_shader(device, &vs_nir, &fs_nir, unrestricted);
445 
446    const VkPipelineVertexInputStateCreateInfo vi_state = {
447       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
448       .vertexBindingDescriptionCount = 0,
449       .vertexAttributeDescriptionCount = 0,
450    };
451 
452    const VkPipelineDepthStencilStateCreateInfo ds_state = {
453       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
454       .depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
455       .depthCompareOp = VK_COMPARE_OP_ALWAYS,
456       .depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
457       .depthBoundsTestEnable = false,
458       .stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
459       .front =
460          {
461             .passOp = VK_STENCIL_OP_REPLACE,
462             .compareOp = VK_COMPARE_OP_ALWAYS,
463             .writeMask = UINT32_MAX,
464             .reference = 0, /* dynamic */
465          },
466       .back = {0 /* dont care */},
467       .minDepthBounds = 0.0f,
468       .maxDepthBounds = 1.0f,
469    };
470 
471    const VkPipelineColorBlendStateCreateInfo cb_state = {
472       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
473       .logicOpEnable = false,
474       .attachmentCount = 0,
475       .pAttachments = NULL,
476       .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
477    };
478 
479    const VkPipelineRenderingCreateInfo rendering_create_info = {
480       .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
481       .depthAttachmentFormat = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_FORMAT_D32_SFLOAT : VK_FORMAT_UNDEFINED,
482       .stencilAttachmentFormat = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? VK_FORMAT_S8_UINT : VK_FORMAT_UNDEFINED,
483    };
484 
485    struct radv_graphics_pipeline_create_info extra = {
486       .use_rectlist = true,
487    };
488 
489    if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
490       extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;
491    }
492    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
493       extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
494    }
495    result = create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &rendering_create_info,
496                             device->meta_state.clear_depth_p_layout, &extra, &device->meta_state.alloc, pipeline);
497 
498    return result;
499 }
500 
501 static bool radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
502                                       VkImageLayout image_layout, VkImageAspectFlags aspects,
503                                       const VkClearRect *clear_rect, const VkClearDepthStencilValue clear_value,
504                                       uint32_t view_mask);
505 
506 static VkResult
get_depth_stencil_pipeline(struct radv_device * device,int samples_log2,VkImageAspectFlags aspects,bool fast,VkPipeline * pipeline_out)507 get_depth_stencil_pipeline(struct radv_device *device, int samples_log2, VkImageAspectFlags aspects, bool fast,
508                            VkPipeline *pipeline_out)
509 {
510    struct radv_meta_state *meta_state = &device->meta_state;
511    bool unrestricted = device->vk.enabled_extensions.EXT_depth_range_unrestricted;
512    int index = fast ? DEPTH_CLEAR_FAST : DEPTH_CLEAR_SLOW;
513    VkResult result = VK_SUCCESS;
514    VkPipeline *pipeline;
515 
516    mtx_lock(&meta_state->mtx);
517 
518    switch (aspects) {
519    case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
520       pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].depthstencil_unrestricted_pipeline[index]
521                               : &meta_state->ds_clear[samples_log2].depthstencil_pipeline[index];
522       break;
523    case VK_IMAGE_ASPECT_DEPTH_BIT:
524       pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].depth_only_unrestricted_pipeline[index]
525                               : &meta_state->ds_clear[samples_log2].depth_only_pipeline[index];
526       break;
527    case VK_IMAGE_ASPECT_STENCIL_BIT:
528       pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].stencil_only_unrestricted_pipeline[index]
529                               : &meta_state->ds_clear[samples_log2].stencil_only_pipeline[index];
530       break;
531    default:
532       unreachable("expected depth or stencil aspect");
533    }
534 
535    if (!*pipeline) {
536       result = create_depthstencil_pipeline(device, aspects, 1u << samples_log2, index, unrestricted, pipeline);
537       if (result != VK_SUCCESS)
538          goto fail;
539    }
540 
541    *pipeline_out = *pipeline;
542 
543 fail:
544    mtx_unlock(&meta_state->mtx);
545    return result;
546 }
547 
548 static void
emit_depthstencil_clear(struct radv_cmd_buffer * cmd_buffer,VkClearDepthStencilValue clear_value,VkImageAspectFlags aspects,const VkClearRect * clear_rect,uint32_t view_mask,bool can_fast_clear)549 emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, VkClearDepthStencilValue clear_value,
550                         VkImageAspectFlags aspects, const VkClearRect *clear_rect, uint32_t view_mask,
551                         bool can_fast_clear)
552 {
553    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
554    const struct radv_rendering_state *render = &cmd_buffer->state.render;
555    uint32_t samples, samples_log2;
556    VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
557    VkPipeline pipeline;
558    VkResult result;
559 
560    /* When a framebuffer is bound to the current command buffer, get the
561     * number of samples from it. Otherwise, get the number of samples from
562     * the render pass because it's likely a secondary command buffer.
563     */
564    struct radv_image_view *iview = render->ds_att.iview;
565    if (iview) {
566       samples = iview->image->vk.samples;
567    } else {
568       assert(render->ds_att.format != VK_FORMAT_UNDEFINED);
569       samples = render->max_samples;
570    }
571 
572    assert(util_is_power_of_two_nonzero(samples));
573    samples_log2 = ffs(samples) - 1;
574 
575    result = get_depth_stencil_pipeline(device, samples_log2, aspects, can_fast_clear, &pipeline);
576    if (result != VK_SUCCESS) {
577       vk_command_buffer_set_error(&cmd_buffer->vk, result);
578       return;
579    }
580 
581    if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
582       clear_value.depth = 1.0f;
583 
584    if (device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
585       vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
586                                  device->meta_state.clear_depth_unrestricted_p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
587                                  4, &clear_value.depth);
588    } else {
589       vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_depth_p_layout,
590                                  VK_SHADER_STAGE_VERTEX_BIT, 0, 4, &clear_value.depth);
591    }
592 
593    uint32_t prev_reference = cmd_buffer->state.dynamic.vk.ds.stencil.front.reference;
594    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
595       radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil);
596    }
597 
598    radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
599 
600    if (can_fast_clear)
601       radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
602 
603    radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
604                        &(VkViewport){.x = clear_rect->rect.offset.x,
605                                      .y = clear_rect->rect.offset.y,
606                                      .width = clear_rect->rect.extent.width,
607                                      .height = clear_rect->rect.extent.height,
608                                      .minDepth = 0.0f,
609                                      .maxDepth = 1.0f});
610 
611    radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
612 
613    if (view_mask) {
614       u_foreach_bit (i, view_mask)
615          radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
616    } else {
617       radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
618    }
619 
620    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
621       radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, prev_reference);
622    }
623 }
624 
625 static nir_shader *
build_clear_htile_mask_shader(struct radv_device * dev)626 build_clear_htile_mask_shader(struct radv_device *dev)
627 {
628    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_htile_mask");
629    b.shader->info.workgroup_size[0] = 64;
630 
631    nir_def *global_id = get_global_ids(&b, 1);
632 
633    nir_def *offset = nir_imul_imm(&b, global_id, 16);
634    offset = nir_channel(&b, offset, 0);
635 
636    nir_def *buf = radv_meta_load_descriptor(&b, 0, 0);
637 
638    nir_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
639 
640    nir_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);
641 
642    /* data = (data & ~htile_mask) | (htile_value & htile_mask) */
643    nir_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
644    data = nir_ior(&b, data, nir_channel(&b, constants, 0));
645 
646    nir_store_ssbo(&b, data, buf, offset, .access = ACCESS_NON_READABLE, .align_mul = 16);
647 
648    return b.shader;
649 }
650 
651 static VkResult
create_clear_htile_mask_pipeline(struct radv_device * device)652 create_clear_htile_mask_pipeline(struct radv_device *device)
653 {
654    struct radv_meta_state *state = &device->meta_state;
655    VkResult result;
656 
657    const VkDescriptorSetLayoutBinding binding = {
658       .binding = 0,
659       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
660       .descriptorCount = 1,
661       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
662    };
663 
664    result = radv_meta_create_descriptor_set_layout(device, 1, &binding, &state->clear_htile_mask_ds_layout);
665    if (result != VK_SUCCESS)
666       return result;
667 
668    const VkPushConstantRange pc_range = {
669       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
670       .size = 8,
671    };
672 
673    result = radv_meta_create_pipeline_layout(device, &state->clear_htile_mask_ds_layout, 1, &pc_range,
674                                              &state->clear_htile_mask_p_layout);
675    if (result != VK_SUCCESS)
676       return result;
677 
678    nir_shader *cs = build_clear_htile_mask_shader(device);
679 
680    result = radv_meta_create_compute_pipeline(device, cs, state->clear_htile_mask_p_layout,
681                                               &state->clear_htile_mask_pipeline);
682 
683    ralloc_free(cs);
684    return result;
685 }
686 
687 static VkResult
get_clear_htile_mask_pipeline(struct radv_device * device,VkPipeline * pipeline_out)688 get_clear_htile_mask_pipeline(struct radv_device *device, VkPipeline *pipeline_out)
689 {
690    struct radv_meta_state *state = &device->meta_state;
691    VkResult result = VK_SUCCESS;
692 
693    mtx_lock(&state->mtx);
694    if (!state->clear_htile_mask_pipeline) {
695       result = create_clear_htile_mask_pipeline(device);
696       if (result != VK_SUCCESS)
697          goto fail;
698    }
699 
700    *pipeline_out = state->clear_htile_mask_pipeline;
701 
702 fail:
703    mtx_unlock(&state->mtx);
704    return result;
705 }
706 
707 static uint32_t
clear_htile_mask(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t htile_value,uint32_t htile_mask)708 clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, struct radeon_winsys_bo *bo,
709                  uint64_t offset, uint64_t size, uint32_t htile_value, uint32_t htile_mask)
710 {
711    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
712    struct radv_meta_state *state = &device->meta_state;
713    uint64_t block_count = DIV_ROUND_UP(size, 1024);
714    struct radv_meta_saved_state saved_state;
715    struct radv_buffer dst_buffer;
716    VkPipeline pipeline;
717    VkResult result;
718 
719    result = get_clear_htile_mask_pipeline(device, &pipeline);
720    if (result != VK_SUCCESS) {
721       vk_command_buffer_set_error(&cmd_buffer->vk, result);
722       return 0;
723    }
724 
725    radv_meta_save(&saved_state, cmd_buffer,
726                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
727 
728    radv_buffer_init(&dst_buffer, device, bo, size, offset);
729 
730    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
731 
732    radv_meta_push_descriptor_set(
733       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->clear_htile_mask_p_layout, 0, 1,
734       (VkWriteDescriptorSet[]){
735          {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
736           .dstBinding = 0,
737           .dstArrayElement = 0,
738           .descriptorCount = 1,
739           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
740           .pBufferInfo =
741              &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer), .offset = 0, .range = size}}});
742 
743    const unsigned constants[2] = {
744       htile_value & htile_mask,
745       ~htile_mask,
746    };
747 
748    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), state->clear_htile_mask_p_layout,
749                               VK_SHADER_STAGE_COMPUTE_BIT, 0, 8, constants);
750 
751    vk_common_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
752 
753    radv_buffer_finish(&dst_buffer);
754 
755    radv_meta_restore(&saved_state, cmd_buffer);
756 
757    return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
758                                                                  VK_ACCESS_2_SHADER_WRITE_BIT, image);
759 }
760 
761 static uint32_t
radv_get_htile_fast_clear_value(const struct radv_device * device,const struct radv_image * image,VkClearDepthStencilValue value)762 radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image,
763                                 VkClearDepthStencilValue value)
764 {
765    uint32_t max_zval = 0x3fff; /* maximum 14-bit value. */
766    uint32_t zmask = 0, smem = 0;
767    uint32_t htile_value;
768    uint32_t zmin, zmax;
769 
770    /* Convert the depth value to 14-bit zmin/zmax values. */
771    zmin = lroundf(value.depth * max_zval);
772    zmax = zmin;
773 
774    if (radv_image_tile_stencil_disabled(device, image)) {
775       /* Z only (no stencil):
776        *
777        * |31     18|17      4|3     0|
778        * +---------+---------+-------+
779        * |  Max Z  |  Min Z  | ZMask |
780        */
781       htile_value = (((zmax & 0x3fff) << 18) | ((zmin & 0x3fff) << 4) | ((zmask & 0xf) << 0));
782    } else {
783 
784       /* Z and stencil:
785        *
786        * |31       12|11 10|9    8|7   6|5   4|3     0|
787        * +-----------+-----+------+-----+-----+-------+
788        * |  Z Range  |     | SMem | SR1 | SR0 | ZMask |
789        *
790        * Z, stencil, 4 bit VRS encoding:
791        * |31       12| 11      10 |9    8|7         6 |5   4|3     0|
792        * +-----------+------------+------+------------+-----+-------+
793        * |  Z Range  | VRS Y-rate | SMem | VRS X-rate | SR0 | ZMask |
794        */
795       uint32_t delta = 0;
796       uint32_t zrange = ((zmax << 6) | delta);
797       uint32_t sresults = 0xf; /* SR0/SR1 both as 0x3. */
798 
799       if (radv_image_has_vrs_htile(device, image))
800          sresults = 0x3;
801 
802       htile_value = (((zrange & 0xfffff) << 12) | ((smem & 0x3) << 8) | ((sresults & 0xf) << 4) | ((zmask & 0xf) << 0));
803    }
804 
805    return htile_value;
806 }
807 
808 static uint32_t
radv_get_htile_mask(const struct radv_device * device,const struct radv_image * image,VkImageAspectFlags aspects)809 radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image, VkImageAspectFlags aspects)
810 {
811    uint32_t mask = 0;
812 
813    if (radv_image_tile_stencil_disabled(device, image)) {
814       /* All the HTILE buffer is used when there is no stencil. */
815       mask = UINT32_MAX;
816    } else {
817       if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
818          mask |= 0xfffffc0f;
819       if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
820          mask |= 0x000003f0;
821    }
822 
823    return mask;
824 }
825 
826 static bool
radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)827 radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)
828 {
829    return value.depth == 1.0f || value.depth == 0.0f;
830 }
831 
832 static bool
radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)833 radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)
834 {
835    return value.stencil == 0;
836 }
837 
838 static bool
radv_can_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,VkImageAspectFlags aspects,const VkClearRect * clear_rect,const VkClearDepthStencilValue clear_value,uint32_t view_mask)839 radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
840                           VkImageLayout image_layout, VkImageAspectFlags aspects, const VkClearRect *clear_rect,
841                           const VkClearDepthStencilValue clear_value, uint32_t view_mask)
842 {
843    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
844 
845    if (!iview || !iview->support_fast_clear)
846       return false;
847 
848    if (!radv_layout_is_htile_compressed(device, iview->image, image_layout,
849                                         radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf)))
850       return false;
851 
852    if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
853        clear_rect->rect.extent.width != iview->image->vk.extent.width ||
854        clear_rect->rect.extent.height != iview->image->vk.extent.height)
855       return false;
856 
857    if (view_mask && (iview->image->vk.array_layers >= 32 || (1u << iview->image->vk.array_layers) - 1u != view_mask))
858       return false;
859    if (!view_mask && clear_rect->baseArrayLayer != 0)
860       return false;
861    if (!view_mask && clear_rect->layerCount != iview->image->vk.array_layers)
862       return false;
863 
864    if (device->vk.enabled_extensions.EXT_depth_range_unrestricted && (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
865        (clear_value.depth < 0.0 || clear_value.depth > 1.0))
866       return false;
867 
868    if (radv_image_is_tc_compat_htile(iview->image) &&
869        (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && !radv_is_fast_clear_depth_allowed(clear_value)) ||
870         ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && !radv_is_fast_clear_stencil_allowed(clear_value))))
871       return false;
872 
873    if (iview->image->vk.mip_levels > 1) {
874       uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
875       if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
876          /* Do not fast clears if one level can't be fast cleared. */
877          return false;
878       }
879    }
880 
881    return true;
882 }
883 
884 static void
radv_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkClearDepthStencilValue clear_value,VkImageAspectFlags aspects,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)885 radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
886                       VkClearDepthStencilValue clear_value, VkImageAspectFlags aspects,
887                       enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
888 {
889    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
890    uint32_t clear_word, flush_bits;
891 
892    clear_word = radv_get_htile_fast_clear_value(device, iview->image, clear_value);
893 
894    if (pre_flush) {
895       enum radv_cmd_flush_bits bits =
896          radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
897                                VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, iview->image) |
898          radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT,
899                                iview->image);
900       cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
901       *pre_flush |= cmd_buffer->state.flush_bits;
902    }
903 
904    VkImageSubresourceRange range = {
905       .aspectMask = aspects,
906       .baseMipLevel = iview->vk.base_mip_level,
907       .levelCount = iview->vk.level_count,
908       .baseArrayLayer = iview->vk.base_array_layer,
909       .layerCount = iview->vk.layer_count,
910    };
911 
912    flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
913 
914    if (iview->image->planes[0].surface.has_stencil &&
915        !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
916       /* Synchronize after performing a depth-only or a stencil-only
917        * fast clear because the driver uses an optimized path which
918        * performs a read-modify-write operation, and the two separate
919        * aspects might use the same HTILE memory.
920        */
921       cmd_buffer->state.flush_bits |= flush_bits;
922    }
923 
924    radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
925    if (post_flush) {
926       *post_flush |= flush_bits;
927    }
928 }
929 
930 /* Clear DCC using comp-to-single by storing the clear value at the beginning of every 256B block.
931  * For MSAA images, clearing the first sample should be enough as long as CMASK is also cleared.
932  */
933 static nir_shader *
build_clear_dcc_comp_to_single_shader(struct radv_device * dev,bool is_msaa)934 build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa)
935 {
936    enum glsl_sampler_dim dim = is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
937    const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT);
938 
939    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s",
940                                          is_msaa ? "multisampled" : "singlesampled");
941    b.shader->info.workgroup_size[0] = 8;
942    b.shader->info.workgroup_size[1] = 8;
943 
944    nir_def *global_id = get_global_ids(&b, 3);
945 
946    /* Load the dimensions in pixels of a block that gets compressed to one DCC byte. */
947    nir_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
948 
949    /* Compute the coordinates. */
950    nir_def *coord = nir_trim_vector(&b, global_id, 2);
951    coord = nir_imul(&b, coord, dcc_block_size);
952    coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, global_id, 2),
953                     nir_undef(&b, 1, 32));
954 
955    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
956    output_img->data.descriptor_set = 0;
957    output_img->data.binding = 0;
958 
959    /* Load the clear color values. */
960    nir_def *clear_values = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 8), .range = 24);
961 
962    nir_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1),
963                             nir_channel(&b, clear_values, 2), nir_channel(&b, clear_values, 3));
964 
965    /* Store the clear color values. */
966    nir_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_undef(&b, 1, 32);
967    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, sample_id, data, nir_imm_int(&b, 0),
968                          .image_dim = dim, .image_array = true);
969 
970    return b.shader;
971 }
972 
973 static VkResult
create_dcc_comp_to_single_pipeline(struct radv_device * device,bool is_msaa,VkPipeline * pipeline)974 create_dcc_comp_to_single_pipeline(struct radv_device *device, bool is_msaa, VkPipeline *pipeline)
975 {
976    struct radv_meta_state *state = &device->meta_state;
977    VkResult result;
978 
979    if (!state->clear_dcc_comp_to_single_ds_layout) {
980       const VkDescriptorSetLayoutBinding binding = {
981          .binding = 0,
982          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
983          .descriptorCount = 1,
984          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
985       };
986 
987       result = radv_meta_create_descriptor_set_layout(device, 1, &binding, &state->clear_dcc_comp_to_single_ds_layout);
988       if (result != VK_SUCCESS)
989          return result;
990    }
991 
992    if (!state->clear_dcc_comp_to_single_p_layout) {
993       const VkPushConstantRange pc_range = {
994          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
995          .size = 24,
996       };
997 
998       result = radv_meta_create_pipeline_layout(device, &state->clear_dcc_comp_to_single_ds_layout, 1, &pc_range,
999                                                 &state->clear_dcc_comp_to_single_p_layout);
1000       if (result != VK_SUCCESS)
1001          return result;
1002    }
1003 
1004    nir_shader *cs = build_clear_dcc_comp_to_single_shader(device, is_msaa);
1005 
1006    result = radv_meta_create_compute_pipeline(device, cs, state->clear_dcc_comp_to_single_p_layout, pipeline);
1007 
1008    ralloc_free(cs);
1009    return result;
1010 }
1011 
1012 static VkResult
init_meta_clear_dcc_comp_to_single_state(struct radv_device * device)1013 init_meta_clear_dcc_comp_to_single_state(struct radv_device *device)
1014 {
1015    struct radv_meta_state *state = &device->meta_state;
1016    VkResult result;
1017 
1018    for (uint32_t i = 0; i < 2; i++) {
1019       result = create_dcc_comp_to_single_pipeline(device, !!i, &state->clear_dcc_comp_to_single_pipeline[i]);
1020       if (result != VK_SUCCESS)
1021          return result;
1022    }
1023 
1024    return result;
1025 }
1026 
1027 VkResult
radv_device_init_meta_clear_state(struct radv_device * device,bool on_demand)1028 radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
1029 {
1030    VkResult res;
1031    struct radv_meta_state *state = &device->meta_state;
1032 
1033    if (on_demand)
1034       return VK_SUCCESS;
1035 
1036    res = init_meta_clear_dcc_comp_to_single_state(device);
1037    if (res != VK_SUCCESS)
1038       return res;
1039 
1040    res = create_clear_htile_mask_pipeline(device);
1041    if (res != VK_SUCCESS)
1042       return res;
1043 
1044    for (uint32_t i = 0; i < ARRAY_SIZE(state->color_clear); ++i) {
1045       uint32_t samples = 1 << i;
1046 
1047       /* Only precompile meta pipelines for attachment 0 as other are uncommon. */
1048       for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
1049          VkFormat format = radv_fs_key_format_exemplars[j];
1050          unsigned fs_key = radv_format_meta_fs_key(device, format);
1051          assert(!state->color_clear[i][0].color_pipelines[fs_key]);
1052 
1053          res = create_color_pipeline(device, samples, 0, format, &state->color_clear[i][0].color_pipelines[fs_key]);
1054          if (res != VK_SUCCESS)
1055             return res;
1056       }
1057    }
1058    for (uint32_t i = 0; i < ARRAY_SIZE(state->ds_clear); ++i) {
1059       uint32_t samples = 1 << i;
1060 
1061       for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
1062          res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, false,
1063                                             &state->ds_clear[i].depth_only_pipeline[j]);
1064          if (res != VK_SUCCESS)
1065             return res;
1066 
1067          res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,
1068                                             &state->ds_clear[i].stencil_only_pipeline[j]);
1069          if (res != VK_SUCCESS)
1070             return res;
1071 
1072          res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j,
1073                                             false, &state->ds_clear[i].depthstencil_pipeline[j]);
1074          if (res != VK_SUCCESS)
1075             return res;
1076 
1077          res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, true,
1078                                             &state->ds_clear[i].depth_only_unrestricted_pipeline[j]);
1079          if (res != VK_SUCCESS)
1080             return res;
1081 
1082          res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
1083                                             &state->ds_clear[i].stencil_only_unrestricted_pipeline[j]);
1084          if (res != VK_SUCCESS)
1085             return res;
1086 
1087          res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j,
1088                                             true, &state->ds_clear[i].depthstencil_unrestricted_pipeline[j]);
1089          if (res != VK_SUCCESS)
1090             return res;
1091       }
1092    }
1093    return VK_SUCCESS;
1094 }
1095 
1096 static uint32_t
radv_get_cmask_fast_clear_value(const struct radv_image * image)1097 radv_get_cmask_fast_clear_value(const struct radv_image *image)
1098 {
1099    uint32_t value = 0; /* Default value when no DCC. */
1100 
1101    /* The fast-clear value is different for images that have both DCC and
1102     * CMASK metadata.
1103     */
1104    if (radv_image_has_dcc(image)) {
1105       /* DCC fast clear with MSAA should clear CMASK to 0xC. */
1106       return image->vk.samples > 1 ? 0xcccccccc : 0xffffffff;
1107    }
1108 
1109    return value;
1110 }
1111 
1112 uint32_t
radv_clear_cmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1113 radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
1114                  uint32_t value)
1115 {
1116    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1117    const struct radv_physical_device *pdev = radv_device_physical(device);
1118    uint64_t cmask_offset = image->planes[0].surface.cmask_offset;
1119    uint64_t size;
1120 
1121    if (pdev->info.gfx_level == GFX9) {
1122       /* TODO: clear layers. */
1123       size = image->planes[0].surface.cmask_size;
1124    } else {
1125       unsigned slice_size = image->planes[0].surface.cmask_slice_size;
1126 
1127       cmask_offset += slice_size * range->baseArrayLayer;
1128       size = slice_size * vk_image_subresource_layer_count(&image->vk, range);
1129    }
1130 
1131    return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_image_get_va(image, 0) + cmask_offset, size,
1132                            value);
1133 }
1134 
1135 uint32_t
radv_clear_fmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1136 radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
1137                  uint32_t value)
1138 {
1139    uint64_t fmask_offset = image->planes[0].surface.fmask_offset;
1140    unsigned slice_size = image->planes[0].surface.fmask_slice_size;
1141    uint64_t size;
1142 
1143    /* MSAA images do not support mipmap levels. */
1144    assert(range->baseMipLevel == 0 && vk_image_subresource_level_count(&image->vk, range) == 1);
1145 
1146    fmask_offset += slice_size * range->baseArrayLayer;
1147    size = slice_size * vk_image_subresource_layer_count(&image->vk, range);
1148 
1149    return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_image_get_va(image, 0) + fmask_offset, size,
1150                            value);
1151 }
1152 
1153 uint32_t
radv_clear_dcc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1154 radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
1155                uint32_t value)
1156 {
1157    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1158    const struct radv_physical_device *pdev = radv_device_physical(device);
1159    uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
1160    uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range);
1161    uint32_t flush_bits = 0;
1162 
1163    /* Mark the image as being compressed. */
1164    radv_update_dcc_metadata(cmd_buffer, image, range, true);
1165 
1166    for (uint32_t l = 0; l < level_count; l++) {
1167       uint64_t dcc_offset = image->planes[0].surface.meta_offset;
1168       uint32_t level = range->baseMipLevel + l;
1169       uint64_t size;
1170 
1171       if (pdev->info.gfx_level >= GFX10) {
1172          /* DCC for mipmaps+layers is currently disabled. */
1173          dcc_offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer +
1174                        image->planes[0].surface.u.gfx9.meta_levels[level].offset;
1175          size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count;
1176       } else if (pdev->info.gfx_level == GFX9) {
1177          /* Mipmap levels and layers aren't implemented. */
1178          assert(level == 0);
1179          size = image->planes[0].surface.meta_size;
1180       } else {
1181          const struct legacy_surf_dcc_level *dcc_level = &image->planes[0].surface.u.legacy.color.dcc_level[level];
1182 
1183          /* If dcc_fast_clear_size is 0 (which might happens for
1184           * mipmaps) the fill buffer operation below is a no-op.
1185           * This can only happen during initialization as the
1186           * fast clear path fallbacks to slow clears if one
1187           * level can't be fast cleared.
1188           */
1189          dcc_offset += dcc_level->dcc_offset + dcc_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
1190          size = dcc_level->dcc_slice_fast_clear_size * vk_image_subresource_layer_count(&image->vk, range);
1191       }
1192 
1193       /* Do not clear this level if it can't be compressed. */
1194       if (!size)
1195          continue;
1196 
1197       flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_image_get_va(image, 0) + dcc_offset,
1198                                      size, value);
1199    }
1200 
1201    return flush_bits;
1202 }
1203 
1204 static VkResult
get_clear_dcc_comp_to_single_pipeline(struct radv_device * device,bool is_msaa,VkPipeline * pipeline_out)1205 get_clear_dcc_comp_to_single_pipeline(struct radv_device *device, bool is_msaa, VkPipeline *pipeline_out)
1206 {
1207    struct radv_meta_state *state = &device->meta_state;
1208    VkResult result = VK_SUCCESS;
1209 
1210    mtx_lock(&state->mtx);
1211    if (!state->clear_dcc_comp_to_single_pipeline[is_msaa]) {
1212       result = create_dcc_comp_to_single_pipeline(device, is_msaa, &state->clear_dcc_comp_to_single_pipeline[is_msaa]);
1213       if (result != VK_SUCCESS)
1214          goto fail;
1215    }
1216 
1217    *pipeline_out = state->clear_dcc_comp_to_single_pipeline[is_msaa];
1218 
1219 fail:
1220    mtx_unlock(&state->mtx);
1221    return result;
1222 }
1223 
1224 static uint32_t
radv_clear_dcc_comp_to_single(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t color_values[4])1225 radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1226                               const VkImageSubresourceRange *range, uint32_t color_values[4])
1227 {
1228    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1229    unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
1230    unsigned layer_count = vk_image_subresource_layer_count(&image->vk, range);
1231    struct radv_meta_saved_state saved_state;
1232    bool is_msaa = image->vk.samples > 1;
1233    struct radv_image_view iview;
1234    VkPipeline pipeline;
1235    VkResult result;
1236    VkFormat format;
1237 
1238    switch (bytes_per_pixel) {
1239    case 1:
1240       format = VK_FORMAT_R8_UINT;
1241       break;
1242    case 2:
1243       format = VK_FORMAT_R16_UINT;
1244       break;
1245    case 4:
1246       format = VK_FORMAT_R32_UINT;
1247       break;
1248    case 8:
1249       format = VK_FORMAT_R32G32_UINT;
1250       break;
1251    case 16:
1252       format = VK_FORMAT_R32G32B32A32_UINT;
1253       break;
1254    default:
1255       unreachable("Unsupported number of bytes per pixel");
1256    }
1257 
1258    result = get_clear_dcc_comp_to_single_pipeline(device, is_msaa, &pipeline);
1259    if (result != VK_SUCCESS) {
1260       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1261       return 0;
1262    }
1263 
1264    radv_meta_save(&saved_state, cmd_buffer,
1265                   RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
1266 
1267    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1268 
1269    for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, range); l++) {
1270       uint32_t width, height;
1271 
1272       /* Do not write the clear color value for levels without DCC. */
1273       if (!radv_dcc_enabled(image, range->baseMipLevel + l))
1274          continue;
1275 
1276       width = u_minify(image->vk.extent.width, range->baseMipLevel + l);
1277       height = u_minify(image->vk.extent.height, range->baseMipLevel + l);
1278 
1279       radv_image_view_init(&iview, device,
1280                            &(VkImageViewCreateInfo){
1281                               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1282                               .image = radv_image_to_handle(image),
1283                               .viewType = VK_IMAGE_VIEW_TYPE_2D,
1284                               .format = format,
1285                               .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1286                                                    .baseMipLevel = range->baseMipLevel + l,
1287                                                    .levelCount = 1,
1288                                                    .baseArrayLayer = range->baseArrayLayer,
1289                                                    .layerCount = layer_count},
1290                            },
1291                            0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
1292 
1293       radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1294                                     device->meta_state.clear_dcc_comp_to_single_p_layout, 0, 1,
1295                                     (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1296                                                               .dstBinding = 0,
1297                                                               .dstArrayElement = 0,
1298                                                               .descriptorCount = 1,
1299                                                               .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1300                                                               .pImageInfo = (VkDescriptorImageInfo[]){
1301                                                                  {
1302                                                                     .sampler = VK_NULL_HANDLE,
1303                                                                     .imageView = radv_image_view_to_handle(&iview),
1304                                                                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1305                                                                  },
1306                                                               }}});
1307 
1308       unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);
1309       unsigned dcc_height = DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
1310 
1311       const unsigned constants[6] = {
1312          image->planes[0].surface.u.gfx9.color.dcc_block_width,
1313          image->planes[0].surface.u.gfx9.color.dcc_block_height,
1314          color_values[0],
1315          color_values[1],
1316          color_values[2],
1317          color_values[3],
1318       };
1319 
1320       vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1321                                  device->meta_state.clear_dcc_comp_to_single_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1322                                  24, constants);
1323 
1324       radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, layer_count);
1325 
1326       radv_image_view_finish(&iview);
1327    }
1328 
1329    radv_meta_restore(&saved_state, cmd_buffer);
1330 
1331    return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1332                                                                  VK_ACCESS_2_SHADER_WRITE_BIT, image);
1333 }
1334 
1335 uint32_t
radv_clear_htile(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1336 radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
1337                  const VkImageSubresourceRange *range, uint32_t value)
1338 {
1339    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1340    const struct radv_physical_device *pdev = radv_device_physical(device);
1341    uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
1342    uint32_t flush_bits = 0;
1343    uint32_t htile_mask;
1344 
1345    htile_mask = radv_get_htile_mask(device, image, range->aspectMask);
1346 
1347    if (level_count != image->vk.mip_levels) {
1348       assert(pdev->info.gfx_level >= GFX10);
1349 
1350       /* Clear individuals levels separately. */
1351       for (uint32_t l = 0; l < level_count; l++) {
1352          uint32_t level = range->baseMipLevel + l;
1353          uint64_t htile_offset =
1354             image->planes[0].surface.meta_offset + image->planes[0].surface.u.gfx9.meta_levels[level].offset;
1355          uint32_t size = image->planes[0].surface.u.gfx9.meta_levels[level].size;
1356 
1357          /* Do not clear this level if it can be compressed. */
1358          if (!size)
1359             continue;
1360 
1361          if (htile_mask == UINT_MAX) {
1362             /* Clear the whole HTILE buffer. */
1363             flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1364                                            radv_image_get_va(image, 0) + htile_offset, size, value);
1365          } else {
1366             /* Only clear depth or stencil bytes in the HTILE buffer. */
1367             flush_bits |= clear_htile_mask(cmd_buffer, image, image->bindings[0].bo,
1368                                            image->bindings[0].offset + htile_offset, size, value, htile_mask);
1369          }
1370       }
1371    } else {
1372       unsigned layer_count = vk_image_subresource_layer_count(&image->vk, range);
1373       uint64_t size = image->planes[0].surface.meta_slice_size * layer_count;
1374       uint64_t htile_offset =
1375          image->planes[0].surface.meta_offset + image->planes[0].surface.meta_slice_size * range->baseArrayLayer;
1376 
1377       if (htile_mask == UINT_MAX) {
1378          /* Clear the whole HTILE buffer. */
1379          flush_bits = radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1380                                        radv_image_get_va(image, 0) + htile_offset, size, value);
1381       } else {
1382          /* Only clear depth or stencil bytes in the HTILE buffer. */
1383          flush_bits = clear_htile_mask(cmd_buffer, image, image->bindings[0].bo,
1384                                        image->bindings[0].offset + htile_offset, size, value, htile_mask);
1385       }
1386    }
1387 
1388    return flush_bits;
1389 }
1390 
1391 enum {
1392    RADV_DCC_CLEAR_0000 = 0x00000000U,
1393    RADV_DCC_GFX8_CLEAR_0001 = 0x40404040U,
1394    RADV_DCC_GFX8_CLEAR_1110 = 0x80808080U,
1395    RADV_DCC_GFX8_CLEAR_1111 = 0xC0C0C0C0U,
1396    RADV_DCC_GFX8_CLEAR_REG = 0x20202020U,
1397    RADV_DCC_GFX9_CLEAR_SINGLE = 0x10101010U,
1398    RADV_DCC_GFX11_CLEAR_SINGLE = 0x01010101U,
1399    RADV_DCC_GFX11_CLEAR_0000 = 0x00000000U,
1400    RADV_DCC_GFX11_CLEAR_1111_UNORM = 0x02020202U,
1401    RADV_DCC_GFX11_CLEAR_1111_FP16 = 0x04040404U,
1402    RADV_DCC_GFX11_CLEAR_1111_FP32 = 0x06060606U,
1403    RADV_DCC_GFX11_CLEAR_0001_UNORM = 0x08080808U,
1404    RADV_DCC_GFX11_CLEAR_1110_UNORM = 0x0A0A0A0AU,
1405 };
1406 
1407 static uint32_t
radv_dcc_single_clear_value(const struct radv_device * device)1408 radv_dcc_single_clear_value(const struct radv_device *device)
1409 {
1410    const struct radv_physical_device *pdev = radv_device_physical(device);
1411    return pdev->info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE : RADV_DCC_GFX9_CLEAR_SINGLE;
1412 }
1413 
1414 static void
gfx8_get_fast_clear_parameters(struct radv_device * device,const struct radv_image_view * iview,const VkClearColorValue * clear_value,uint32_t * reset_value,bool * can_avoid_fast_clear_elim)1415 gfx8_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview,
1416                                const VkClearColorValue *clear_value, uint32_t *reset_value,
1417                                bool *can_avoid_fast_clear_elim)
1418 {
1419    const struct radv_physical_device *pdev = radv_device_physical(device);
1420    bool values[4] = {0};
1421    int extra_channel;
1422    bool main_value = false;
1423    bool extra_value = false;
1424    bool has_color = false;
1425    bool has_alpha = false;
1426 
1427    /* comp-to-single allows to perform DCC fast clears without requiring a FCE. */
1428    if (iview->image->support_comp_to_single) {
1429       *reset_value = RADV_DCC_GFX9_CLEAR_SINGLE;
1430       *can_avoid_fast_clear_elim = true;
1431    } else {
1432       *reset_value = RADV_DCC_GFX8_CLEAR_REG;
1433       *can_avoid_fast_clear_elim = false;
1434    }
1435 
1436    const struct util_format_description *desc = vk_format_description(iview->vk.format);
1437    if (iview->vk.format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || iview->vk.format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
1438        iview->vk.format == VK_FORMAT_B5G6R5_UNORM_PACK16)
1439       extra_channel = -1;
1440    else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1441       if (ac_alpha_is_on_msb(&pdev->info, vk_format_to_pipe_format(iview->vk.format)))
1442          extra_channel = desc->nr_channels - 1;
1443       else
1444          extra_channel = 0;
1445    } else
1446       return;
1447 
1448    for (int i = 0; i < 4; i++) {
1449       int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
1450       if (desc->swizzle[i] < PIPE_SWIZZLE_X || desc->swizzle[i] > PIPE_SWIZZLE_W)
1451          continue;
1452 
1453       if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1454          /* Use the maximum value for clamping the clear color. */
1455          int max = u_bit_consecutive(0, desc->channel[i].size - 1);
1456 
1457          values[i] = clear_value->int32[i] != 0;
1458          if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
1459             return;
1460       } else if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1461          /* Use the maximum value for clamping the clear color. */
1462          unsigned max = u_bit_consecutive(0, desc->channel[i].size);
1463 
1464          values[i] = clear_value->uint32[i] != 0U;
1465          if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
1466             return;
1467       } else {
1468          values[i] = clear_value->float32[i] != 0.0F;
1469          if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
1470             return;
1471       }
1472 
1473       if (index == extra_channel) {
1474          extra_value = values[i];
1475          has_alpha = true;
1476       } else {
1477          main_value = values[i];
1478          has_color = true;
1479       }
1480    }
1481 
1482    /* If alpha isn't present, make it the same as color, and vice versa. */
1483    if (!has_alpha)
1484       extra_value = main_value;
1485    else if (!has_color)
1486       main_value = extra_value;
1487 
1488    for (int i = 0; i < 4; ++i)
1489       if (values[i] != main_value && desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
1490           desc->swizzle[i] >= PIPE_SWIZZLE_X && desc->swizzle[i] <= PIPE_SWIZZLE_W)
1491          return;
1492 
1493    /* Only DCC clear code 0000 is allowed for signed<->unsigned formats. */
1494    if ((main_value || extra_value) && iview->image->dcc_sign_reinterpret)
1495       return;
1496 
1497    *can_avoid_fast_clear_elim = true;
1498 
1499    if (main_value) {
1500       if (extra_value)
1501          *reset_value = RADV_DCC_GFX8_CLEAR_1111;
1502       else
1503          *reset_value = RADV_DCC_GFX8_CLEAR_1110;
1504    } else {
1505       if (extra_value)
1506          *reset_value = RADV_DCC_GFX8_CLEAR_0001;
1507       else
1508          *reset_value = RADV_DCC_CLEAR_0000;
1509    }
1510 }
1511 
1512 static bool
gfx11_get_fast_clear_parameters(struct radv_device * device,const struct radv_image_view * iview,const VkClearColorValue * clear_value,uint32_t * reset_value)1513 gfx11_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview,
1514                                 const VkClearColorValue *clear_value, uint32_t *reset_value)
1515 {
1516    const struct util_format_description *desc = vk_format_description(iview->vk.format);
1517    unsigned start_bit = UINT_MAX;
1518    unsigned end_bit = 0;
1519 
1520    /* TODO: 8bpp and 16bpp fast DCC clears don't work. */
1521    if (desc->block.bits <= 16)
1522       return false;
1523 
1524    /* Find the used bit range. */
1525    for (unsigned i = 0; i < 4; i++) {
1526       unsigned swizzle = desc->swizzle[i];
1527 
1528       if (swizzle >= PIPE_SWIZZLE_0)
1529          continue;
1530 
1531       start_bit = MIN2(start_bit, desc->channel[swizzle].shift);
1532       end_bit = MAX2(end_bit, desc->channel[swizzle].shift + desc->channel[swizzle].size);
1533    }
1534 
1535    union {
1536       uint8_t ub[16];
1537       uint16_t us[8];
1538       uint32_t ui[4];
1539    } value;
1540    memset(&value, 0, sizeof(value));
1541    util_format_pack_rgba(vk_format_to_pipe_format(iview->vk.format), &value, clear_value, 1);
1542 
1543    /* Check the cases where all components or bits are either all 0 or all 1. */
1544    bool all_bits_are_0 = true;
1545    bool all_bits_are_1 = true;
1546    bool all_words_are_fp16_1 = false;
1547    bool all_words_are_fp32_1 = false;
1548 
1549    for (unsigned i = start_bit; i < end_bit; i++) {
1550       bool bit = value.ub[i / 8] & BITFIELD_BIT(i % 8);
1551 
1552       all_bits_are_0 &= !bit;
1553       all_bits_are_1 &= bit;
1554    }
1555 
1556    if (start_bit % 16 == 0 && end_bit % 16 == 0) {
1557       all_words_are_fp16_1 = true;
1558       for (unsigned i = start_bit / 16; i < end_bit / 16; i++)
1559          all_words_are_fp16_1 &= value.us[i] == 0x3c00;
1560    }
1561 
1562    if (start_bit % 32 == 0 && end_bit % 32 == 0) {
1563       all_words_are_fp32_1 = true;
1564       for (unsigned i = start_bit / 32; i < end_bit / 32; i++)
1565          all_words_are_fp32_1 &= value.ui[i] == 0x3f800000;
1566    }
1567 
1568    if (all_bits_are_0 || all_bits_are_1 || all_words_are_fp16_1 || all_words_are_fp32_1) {
1569       if (all_bits_are_0)
1570          *reset_value = RADV_DCC_CLEAR_0000;
1571       else if (all_bits_are_1)
1572          *reset_value = RADV_DCC_GFX11_CLEAR_1111_UNORM;
1573       else if (all_words_are_fp16_1)
1574          *reset_value = RADV_DCC_GFX11_CLEAR_1111_FP16;
1575       else if (all_words_are_fp32_1)
1576          *reset_value = RADV_DCC_GFX11_CLEAR_1111_FP32;
1577       return true;
1578    }
1579 
1580    if (desc->nr_channels == 2 && desc->channel[0].size == 8) {
1581       if (value.ub[0] == 0x00 && value.ub[1] == 0xff) {
1582          *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1583          return true;
1584       } else if (value.ub[0] == 0xff && value.ub[1] == 0x00) {
1585          *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1586          return true;
1587       }
1588    } else if (desc->nr_channels == 4 && desc->channel[0].size == 8) {
1589       if (value.ub[0] == 0x00 && value.ub[1] == 0x00 && value.ub[2] == 0x00 && value.ub[3] == 0xff) {
1590          *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1591          return true;
1592       } else if (value.ub[0] == 0xff && value.ub[1] == 0xff && value.ub[2] == 0xff && value.ub[3] == 0x00) {
1593          *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1594          return true;
1595       }
1596    } else if (desc->nr_channels == 4 && desc->channel[0].size == 16) {
1597       if (value.us[0] == 0x0000 && value.us[1] == 0x0000 && value.us[2] == 0x0000 && value.us[3] == 0xffff) {
1598          *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1599          return true;
1600       } else if (value.us[0] == 0xffff && value.us[1] == 0xffff && value.us[2] == 0xffff && value.us[3] == 0x0000) {
1601          *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1602          return true;
1603       }
1604    }
1605 
1606    if (iview->image->support_comp_to_single) {
1607       *reset_value = RADV_DCC_GFX11_CLEAR_SINGLE;
1608       return true;
1609    }
1610 
1611    return false;
1612 }
1613 
1614 static bool
radv_can_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,const VkClearRect * clear_rect,VkClearColorValue clear_value,uint32_t view_mask)1615 radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
1616                           VkImageLayout image_layout, const VkClearRect *clear_rect, VkClearColorValue clear_value,
1617                           uint32_t view_mask)
1618 {
1619    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1620    const struct radv_physical_device *pdev = radv_device_physical(device);
1621    uint32_t clear_color[2];
1622 
1623    if (!iview || !iview->support_fast_clear)
1624       return false;
1625 
1626    if (!radv_layout_can_fast_clear(device, iview->image, iview->vk.base_mip_level, image_layout,
1627                                    radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf)))
1628       return false;
1629 
1630    if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
1631        clear_rect->rect.extent.width != iview->image->vk.extent.width ||
1632        clear_rect->rect.extent.height != iview->image->vk.extent.height)
1633       return false;
1634 
1635    if (view_mask && (iview->image->vk.array_layers >= 32 || (1u << iview->image->vk.array_layers) - 1u != view_mask))
1636       return false;
1637    if (!view_mask && clear_rect->baseArrayLayer != 0)
1638       return false;
1639    if (!view_mask && clear_rect->layerCount != iview->image->vk.array_layers)
1640       return false;
1641 
1642    /* DCC */
1643 
1644    /* Images that support comp-to-single clears don't have clear values. */
1645    if (!iview->image->support_comp_to_single) {
1646       if (!radv_format_pack_clear_color(iview->vk.format, clear_color, &clear_value))
1647          return false;
1648 
1649       if (!radv_image_has_clear_value(iview->image) && (clear_color[0] != 0 || clear_color[1] != 0))
1650          return false;
1651    }
1652 
1653    if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
1654       bool can_avoid_fast_clear_elim;
1655       uint32_t reset_value;
1656 
1657       if (pdev->info.gfx_level >= GFX11) {
1658          if (!gfx11_get_fast_clear_parameters(device, iview, &clear_value, &reset_value))
1659             return false;
1660       } else {
1661          gfx8_get_fast_clear_parameters(device, iview, &clear_value, &reset_value, &can_avoid_fast_clear_elim);
1662       }
1663 
1664       if (iview->image->vk.mip_levels > 1) {
1665          if (pdev->info.gfx_level >= GFX9) {
1666             uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
1667             if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
1668                /* Do not fast clears if one level can't be fast cleard. */
1669                return false;
1670             }
1671          } else {
1672             for (uint32_t l = 0; l < iview->vk.level_count; l++) {
1673                uint32_t level = iview->vk.base_mip_level + l;
1674                struct legacy_surf_dcc_level *dcc_level =
1675                   &iview->image->planes[0].surface.u.legacy.color.dcc_level[level];
1676 
1677                /* Do not fast clears if one level can't be
1678                 * fast cleared.
1679                 */
1680                if (!dcc_level->dcc_fast_clear_size)
1681                   return false;
1682             }
1683          }
1684       }
1685    }
1686 
1687    return true;
1688 }
1689 
1690 static void
radv_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)1691 radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
1692                       const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush,
1693                       enum radv_cmd_flush_bits *post_flush)
1694 {
1695    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1696    const struct radv_physical_device *pdev = radv_device_physical(device);
1697    VkClearColorValue clear_value = clear_att->clearValue.color;
1698    uint32_t clear_color[4], flush_bits = 0;
1699    uint32_t cmask_clear_value;
1700    VkImageSubresourceRange range = {
1701       .aspectMask = iview->vk.aspects,
1702       .baseMipLevel = iview->vk.base_mip_level,
1703       .levelCount = iview->vk.level_count,
1704       .baseArrayLayer = iview->vk.base_array_layer,
1705       .layerCount = iview->vk.layer_count,
1706    };
1707 
1708    if (pre_flush) {
1709       enum radv_cmd_flush_bits bits = radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
1710                                                             VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, iview->image);
1711       cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
1712       *pre_flush |= cmd_buffer->state.flush_bits;
1713    }
1714 
1715    /* DCC */
1716    radv_format_pack_clear_color(iview->vk.format, clear_color, &clear_value);
1717 
1718    cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
1719 
1720    /* clear cmask buffer */
1721    bool need_decompress_pass = false;
1722    if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
1723       uint32_t reset_value;
1724       bool can_avoid_fast_clear_elim = true;
1725 
1726       if (pdev->info.gfx_level >= GFX11) {
1727          ASSERTED bool result = gfx11_get_fast_clear_parameters(device, iview, &clear_value, &reset_value);
1728          assert(result);
1729       } else {
1730          gfx8_get_fast_clear_parameters(device, iview, &clear_value, &reset_value, &can_avoid_fast_clear_elim);
1731       }
1732 
1733       if (radv_image_has_cmask(iview->image)) {
1734          flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
1735       }
1736 
1737       if (!can_avoid_fast_clear_elim)
1738          need_decompress_pass = true;
1739 
1740       flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range, reset_value);
1741 
1742       if (reset_value == radv_dcc_single_clear_value(device)) {
1743          /* Write the clear color to the first byte of each 256B block when the image supports DCC
1744           * fast clears with comp-to-single.
1745           */
1746          if (vk_format_get_blocksize(iview->image->vk.format) == 16) {
1747             flush_bits |= radv_clear_dcc_comp_to_single(cmd_buffer, iview->image, &range, clear_value.uint32);
1748          } else {
1749             clear_color[2] = clear_color[3] = 0;
1750             flush_bits |= radv_clear_dcc_comp_to_single(cmd_buffer, iview->image, &range, clear_color);
1751          }
1752       }
1753    } else {
1754       flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
1755 
1756       /* Fast clearing with CMASK should always be eliminated. */
1757       need_decompress_pass = true;
1758    }
1759 
1760    if (post_flush) {
1761       *post_flush |= flush_bits;
1762    }
1763 
1764    /* Update the FCE predicate to perform a fast-clear eliminate. */
1765    radv_update_fce_metadata(cmd_buffer, iview->image, &range, need_decompress_pass);
1766 
1767    radv_update_color_clear_metadata(cmd_buffer, iview, clear_att->colorAttachment, clear_color);
1768 }
1769 
1770 /**
1771  * The parameters mean that same as those in vkCmdClearAttachments.
1772  */
1773 static void
emit_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush,uint32_t view_mask)1774 emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, const VkClearRect *clear_rect,
1775            enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush, uint32_t view_mask)
1776 {
1777    const struct radv_rendering_state *render = &cmd_buffer->state.render;
1778    VkImageAspectFlags aspects = clear_att->aspectMask;
1779 
1780    if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1781       assert(clear_att->colorAttachment < render->color_att_count);
1782       const struct radv_attachment *color_att = &render->color_att[clear_att->colorAttachment];
1783 
1784       if (color_att->format == VK_FORMAT_UNDEFINED)
1785          return;
1786 
1787       VkClearColorValue clear_value = clear_att->clearValue.color;
1788 
1789       if (radv_can_fast_clear_color(cmd_buffer, color_att->iview, color_att->layout, clear_rect, clear_value,
1790                                     view_mask)) {
1791          radv_fast_clear_color(cmd_buffer, color_att->iview, clear_att, pre_flush, post_flush);
1792       } else {
1793          emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
1794       }
1795    } else {
1796       const struct radv_attachment *ds_att = &render->ds_att;
1797 
1798       if (ds_att->format == VK_FORMAT_UNDEFINED)
1799          return;
1800 
1801       VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
1802 
1803       assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
1804       bool can_fast_clear_depth = false;
1805       bool can_fast_clear_stencil = false;
1806       if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) &&
1807           ds_att->layout != ds_att->stencil_layout) {
1808          can_fast_clear_depth = radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, ds_att->layout, aspects,
1809                                                           clear_rect, clear_value, view_mask);
1810          can_fast_clear_stencil = radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, ds_att->stencil_layout, aspects,
1811                                                             clear_rect, clear_value, view_mask);
1812       } else {
1813          VkImageLayout layout = aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? ds_att->layout : ds_att->stencil_layout;
1814          can_fast_clear_depth =
1815             radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, layout, aspects, clear_rect, clear_value, view_mask);
1816          can_fast_clear_stencil = can_fast_clear_depth;
1817       }
1818 
1819       if (can_fast_clear_depth && can_fast_clear_stencil) {
1820          radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att->clearValue.depthStencil, clear_att->aspectMask,
1821                                pre_flush, post_flush);
1822       } else if (!can_fast_clear_depth && !can_fast_clear_stencil) {
1823          emit_depthstencil_clear(cmd_buffer, clear_att->clearValue.depthStencil, clear_att->aspectMask, clear_rect,
1824                                  view_mask, false);
1825       } else {
1826          if (can_fast_clear_depth) {
1827             radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att->clearValue.depthStencil,
1828                                   VK_IMAGE_ASPECT_DEPTH_BIT, pre_flush, post_flush);
1829          } else {
1830             emit_depthstencil_clear(cmd_buffer, clear_att->clearValue.depthStencil, VK_IMAGE_ASPECT_DEPTH_BIT,
1831                                     clear_rect, view_mask, can_fast_clear_depth);
1832          }
1833 
1834          if (can_fast_clear_stencil) {
1835             radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att->clearValue.depthStencil,
1836                                   VK_IMAGE_ASPECT_STENCIL_BIT, pre_flush, post_flush);
1837          } else {
1838             emit_depthstencil_clear(cmd_buffer, clear_att->clearValue.depthStencil, VK_IMAGE_ASPECT_STENCIL_BIT,
1839                                     clear_rect, view_mask, can_fast_clear_stencil);
1840          }
1841       }
1842    }
1843 }
1844 
1845 static bool
radv_rendering_needs_clear(const VkRenderingInfo * pRenderingInfo)1846 radv_rendering_needs_clear(const VkRenderingInfo *pRenderingInfo)
1847 {
1848    for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
1849       if (pRenderingInfo->pColorAttachments[i].imageView != VK_NULL_HANDLE &&
1850           pRenderingInfo->pColorAttachments[i].loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
1851          return true;
1852    }
1853 
1854    if (pRenderingInfo->pDepthAttachment != NULL && pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
1855        pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
1856       return true;
1857 
1858    if (pRenderingInfo->pStencilAttachment != NULL && pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE &&
1859        pRenderingInfo->pStencilAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
1860       return true;
1861 
1862    return false;
1863 }
1864 
1865 static void
radv_subpass_clear_attachment(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)1866 radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
1867                               enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
1868 {
1869    const struct radv_rendering_state *render = &cmd_buffer->state.render;
1870 
1871    VkClearRect clear_rect = {
1872       .rect = render->area,
1873       .baseArrayLayer = 0,
1874       .layerCount = render->layer_count,
1875    };
1876 
1877    radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
1878 
1879    emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush, render->view_mask);
1880 
1881    radv_describe_end_render_pass_clear(cmd_buffer);
1882 }
1883 
1884 /**
1885  * Emit any pending attachment clears for the current subpass.
1886  *
1887  * @see radv_attachment_state::pending_clear_aspects
1888  */
1889 void
radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)1890 radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, const VkRenderingInfo *pRenderingInfo)
1891 {
1892    const struct radv_rendering_state *render = &cmd_buffer->state.render;
1893    struct radv_meta_saved_state saved_state;
1894    enum radv_cmd_flush_bits pre_flush = 0;
1895    enum radv_cmd_flush_bits post_flush = 0;
1896 
1897    if (!radv_rendering_needs_clear(pRenderingInfo))
1898       return;
1899 
1900    /* Subpass clear should not be affected by conditional rendering. */
1901    radv_meta_save(&saved_state, cmd_buffer,
1902                   RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
1903 
1904    assert(render->color_att_count == pRenderingInfo->colorAttachmentCount);
1905    for (uint32_t i = 0; i < render->color_att_count; i++) {
1906       if (render->color_att[i].iview == NULL ||
1907           pRenderingInfo->pColorAttachments[i].loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR)
1908          continue;
1909 
1910       VkClearAttachment clear_att = {
1911          .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1912          .colorAttachment = i,
1913          .clearValue = pRenderingInfo->pColorAttachments[i].clearValue,
1914       };
1915 
1916       radv_subpass_clear_attachment(cmd_buffer, &clear_att, &pre_flush, &post_flush);
1917    }
1918 
1919    if (render->ds_att.iview != NULL) {
1920       VkClearAttachment clear_att = {.aspectMask = 0};
1921 
1922       if (pRenderingInfo->pDepthAttachment != NULL && pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
1923           pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1924          clear_att.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
1925          clear_att.clearValue.depthStencil.depth = pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth;
1926       }
1927 
1928       if (pRenderingInfo->pStencilAttachment != NULL &&
1929           pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE &&
1930           pRenderingInfo->pStencilAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1931          clear_att.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
1932          clear_att.clearValue.depthStencil.stencil =
1933             pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil;
1934       }
1935 
1936       if (clear_att.aspectMask != 0) {
1937          radv_subpass_clear_attachment(cmd_buffer, &clear_att, &pre_flush, &post_flush);
1938       }
1939    }
1940 
1941    radv_meta_restore(&saved_state, cmd_buffer);
1942    cmd_buffer->state.flush_bits |= post_flush;
1943 }
1944 
1945 static void
radv_clear_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkImageSubresourceRange * range,VkFormat format,int level,unsigned layer_count,const VkClearValue * clear_val)1946 radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout,
1947                        const VkImageSubresourceRange *range, VkFormat format, int level, unsigned layer_count,
1948                        const VkClearValue *clear_val)
1949 {
1950    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1951    struct radv_image_view iview;
1952    uint32_t width = u_minify(image->vk.extent.width, range->baseMipLevel + level);
1953    uint32_t height = u_minify(image->vk.extent.height, range->baseMipLevel + level);
1954 
1955    radv_image_view_init(&iview, device,
1956                         &(VkImageViewCreateInfo){
1957                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1958                            .image = radv_image_to_handle(image),
1959                            .viewType = radv_meta_get_view_type(image),
1960                            .format = format,
1961                            .subresourceRange = {.aspectMask = range->aspectMask,
1962                                                 .baseMipLevel = range->baseMipLevel + level,
1963                                                 .levelCount = 1,
1964                                                 .baseArrayLayer = range->baseArrayLayer,
1965                                                 .layerCount = layer_count},
1966                         },
1967                         0, NULL);
1968 
1969    VkClearAttachment clear_att = {
1970       .aspectMask = range->aspectMask,
1971       .colorAttachment = 0,
1972       .clearValue = *clear_val,
1973    };
1974 
1975    VkClearRect clear_rect = {
1976       .rect =
1977          {
1978             .offset = {0, 0},
1979             .extent = {width, height},
1980          },
1981       .baseArrayLayer = 0,
1982       .layerCount = layer_count,
1983    };
1984 
1985    VkRenderingAttachmentInfo att = {
1986       .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1987       .imageView = radv_image_view_to_handle(&iview),
1988       .imageLayout = image_layout,
1989       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
1990       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
1991    };
1992 
1993    VkRenderingInfo rendering_info = {
1994       .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
1995       .flags = VK_RENDERING_INPUT_ATTACHMENT_NO_CONCURRENT_WRITES_BIT_MESA,
1996       .renderArea =
1997          {
1998             .offset = {0, 0},
1999             .extent = {width, height},
2000          },
2001       .layerCount = layer_count,
2002    };
2003 
2004    if (image->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
2005       rendering_info.colorAttachmentCount = 1;
2006       rendering_info.pColorAttachments = &att;
2007    }
2008    if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
2009       rendering_info.pDepthAttachment = &att;
2010    if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
2011       rendering_info.pStencilAttachment = &att;
2012 
2013    radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
2014 
2015    emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0);
2016 
2017    radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
2018 
2019    radv_image_view_finish(&iview);
2020 }
2021 
2022 /**
2023  * Return TRUE if a fast color or depth clear has been performed.
2024  */
2025 static bool
radv_fast_clear_range(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkFormat format,VkImageLayout image_layout,const VkImageSubresourceRange * range,const VkClearValue * clear_val)2026 radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkFormat format,
2027                       VkImageLayout image_layout, const VkImageSubresourceRange *range, const VkClearValue *clear_val)
2028 {
2029    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2030    struct radv_image_view iview;
2031    bool fast_cleared = false;
2032 
2033    radv_image_view_init(&iview, device,
2034                         &(VkImageViewCreateInfo){
2035                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2036                            .image = radv_image_to_handle(image),
2037                            .viewType = radv_meta_get_view_type(image),
2038                            .format = image->vk.format,
2039                            .subresourceRange =
2040                               {
2041                                  .aspectMask = range->aspectMask,
2042                                  .baseMipLevel = range->baseMipLevel,
2043                                  .levelCount = vk_image_subresource_level_count(&image->vk, range),
2044                                  .baseArrayLayer = range->baseArrayLayer,
2045                                  .layerCount = vk_image_subresource_layer_count(&image->vk, range),
2046                               },
2047                         },
2048                         0, NULL);
2049 
2050    VkClearRect clear_rect = {
2051       .rect =
2052          {
2053             .offset = {0, 0},
2054             .extent =
2055                {
2056                   u_minify(image->vk.extent.width, range->baseMipLevel),
2057                   u_minify(image->vk.extent.height, range->baseMipLevel),
2058                },
2059          },
2060       .baseArrayLayer = range->baseArrayLayer,
2061       .layerCount = vk_image_subresource_layer_count(&image->vk, range),
2062    };
2063 
2064    VkClearAttachment clear_att = {
2065       .aspectMask = range->aspectMask,
2066       .colorAttachment = 0,
2067       .clearValue = *clear_val,
2068    };
2069 
2070    if (vk_format_is_color(format)) {
2071       if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, &clear_rect, clear_att.clearValue.color, 0)) {
2072          radv_fast_clear_color(cmd_buffer, &iview, &clear_att, NULL, NULL);
2073          fast_cleared = true;
2074       }
2075    } else {
2076       if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, range->aspectMask, &clear_rect,
2077                                     clear_att.clearValue.depthStencil, 0)) {
2078          radv_fast_clear_depth(cmd_buffer, &iview, clear_att.clearValue.depthStencil, clear_att.aspectMask, NULL, NULL);
2079          fast_cleared = true;
2080       }
2081    }
2082 
2083    radv_image_view_finish(&iview);
2084    return fast_cleared;
2085 }
2086 
2087 static void
radv_cmd_clear_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkClearValue * clear_value,uint32_t range_count,const VkImageSubresourceRange * ranges,bool cs)2088 radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout,
2089                      const VkClearValue *clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges,
2090                      bool cs)
2091 {
2092    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2093    const struct radv_physical_device *pdev = radv_device_physical(device);
2094    VkFormat format = image->vk.format;
2095    VkClearValue internal_clear_value;
2096 
2097    if (ranges->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
2098       internal_clear_value.color = clear_value->color;
2099    else
2100       internal_clear_value.depthStencil = clear_value->depthStencil;
2101 
2102    bool disable_compression = false;
2103 
2104    if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
2105       if (cs ? !radv_is_storage_image_format_supported(pdev, format)
2106              : !radv_is_colorbuffer_format_supported(pdev, format)) {
2107          format = VK_FORMAT_R32_UINT;
2108          internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32);
2109 
2110          uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf);
2111 
2112          for (uint32_t r = 0; r < range_count; r++) {
2113             const VkImageSubresourceRange *range = &ranges[r];
2114 
2115             /* Don't use compressed image stores because they will use an incompatible format. */
2116             if (radv_layout_dcc_compressed(device, image, range->baseMipLevel, image_layout, queue_mask)) {
2117                disable_compression = cs;
2118                break;
2119             }
2120          }
2121       }
2122    }
2123 
2124    if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
2125       uint8_t r, g;
2126       format = VK_FORMAT_R8_UINT;
2127       r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
2128       g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
2129       internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
2130    }
2131 
2132    for (uint32_t r = 0; r < range_count; r++) {
2133       const VkImageSubresourceRange *range = &ranges[r];
2134 
2135       /* Try to perform a fast clear first, otherwise fallback to
2136        * the legacy path.
2137        */
2138       if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, range, &internal_clear_value)) {
2139          continue;
2140       }
2141 
2142       for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, range); ++l) {
2143          const uint32_t layer_count = image->vk.image_type == VK_IMAGE_TYPE_3D
2144                                          ? u_minify(image->vk.extent.depth, range->baseMipLevel + l)
2145                                          : vk_image_subresource_layer_count(&image->vk, range);
2146          if (cs) {
2147             for (uint32_t s = 0; s < layer_count; ++s) {
2148                struct radv_meta_blit2d_surf surf;
2149                surf.format = format;
2150                surf.image = image;
2151                surf.level = range->baseMipLevel + l;
2152                surf.layer = range->baseArrayLayer + s;
2153                surf.aspect_mask = range->aspectMask;
2154                surf.disable_compression = disable_compression;
2155                radv_meta_clear_image_cs(cmd_buffer, &surf, &internal_clear_value.color);
2156             }
2157          } else {
2158             assert(!disable_compression);
2159             radv_clear_image_layer(cmd_buffer, image, image_layout, range, format, l, layer_count,
2160                                    &internal_clear_value);
2161          }
2162       }
2163    }
2164 
2165    if (disable_compression) {
2166       enum radv_cmd_flush_bits flush_bits = 0;
2167       for (unsigned i = 0; i < range_count; i++) {
2168          if (radv_dcc_enabled(image, ranges[i].baseMipLevel))
2169             flush_bits |= radv_clear_dcc(cmd_buffer, image, &ranges[i], 0xffffffffu);
2170       }
2171       cmd_buffer->state.flush_bits |= flush_bits;
2172    }
2173 }
2174 
2175 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2176 radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
2177                         const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)
2178 {
2179    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2180    VK_FROM_HANDLE(radv_image, image, image_h);
2181    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2182    struct radv_meta_saved_state saved_state;
2183    bool cs;
2184 
2185    cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(device, image);
2186 
2187    /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering.
2188     */
2189    enum radv_meta_save_flags save_flags = RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING;
2190    if (cs)
2191       save_flags |= RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS;
2192    else
2193       save_flags |= RADV_META_SAVE_GRAPHICS_PIPELINE;
2194 
2195    radv_meta_save(&saved_state, cmd_buffer, save_flags);
2196 
2197    radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount, pRanges, cs);
2198 
2199    radv_meta_restore(&saved_state, cmd_buffer);
2200 }
2201 
2202 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2203 radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
2204                                const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
2205                                const VkImageSubresourceRange *pRanges)
2206 {
2207    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2208    VK_FROM_HANDLE(radv_image, image, image_h);
2209    struct radv_meta_saved_state saved_state;
2210 
2211    /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering. */
2212    radv_meta_save(&saved_state, cmd_buffer,
2213                   RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
2214 
2215    radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil, rangeCount, pRanges,
2216                         false);
2217 
2218    radv_meta_restore(&saved_state, cmd_buffer);
2219 }
2220 
2221 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2222 radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments,
2223                          uint32_t rectCount, const VkClearRect *pRects)
2224 {
2225    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2226    struct radv_meta_saved_state saved_state;
2227    enum radv_cmd_flush_bits pre_flush = 0;
2228    enum radv_cmd_flush_bits post_flush = 0;
2229 
2230    if (!cmd_buffer->state.render.active)
2231       return;
2232 
2233    radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
2234 
2235    /* FINISHME: We can do better than this dumb loop. It thrashes too much
2236     * state.
2237     */
2238    for (uint32_t a = 0; a < attachmentCount; ++a) {
2239       for (uint32_t r = 0; r < rectCount; ++r) {
2240          emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
2241                     cmd_buffer->state.render.view_mask);
2242       }
2243    }
2244 
2245    radv_meta_restore(&saved_state, cmd_buffer);
2246    cmd_buffer->state.flush_bits |= post_flush;
2247 }
2248