1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "nir/nir_builder.h"
8 #include "radv_debug.h"
9 #include "radv_entrypoints.h"
10 #include "radv_formats.h"
11 #include "radv_meta.h"
12
13 #include "util/format_rgb9e5.h"
14 #include "vk_common_entrypoints.h"
15 #include "vk_format.h"
16 #include "vk_shader_module.h"
17
18 #include "ac_formats.h"
19
20 enum { DEPTH_CLEAR_SLOW, DEPTH_CLEAR_FAST };
21
22 static void
build_color_shaders(struct radv_device * dev,struct nir_shader ** out_vs,struct nir_shader ** out_fs,uint32_t frag_output)23 build_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs,
24 uint32_t frag_output)
25 {
26 nir_builder vs_b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_clear_color_vs");
27 nir_builder fs_b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output);
28
29 const struct glsl_type *position_type = glsl_vec4_type();
30 const struct glsl_type *color_type = glsl_vec4_type();
31
32 nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
33 vs_out_pos->data.location = VARYING_SLOT_POS;
34
35 nir_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
36
37 nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
38 fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
39
40 nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
41
42 nir_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL);
43 nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
44
45 const struct glsl_type *layer_type = glsl_int_type();
46 nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
47 vs_out_layer->data.location = VARYING_SLOT_LAYER;
48 vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
49 nir_def *inst_id = nir_load_instance_id(&vs_b);
50 nir_def *base_instance = nir_load_base_instance(&vs_b);
51
52 nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
53 nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
54
55 *out_vs = vs_b.shader;
56 *out_fs = fs_b.shader;
57 }
58
59 static VkResult
create_pipeline(struct radv_device * device,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineRenderingCreateInfo * dyn_state,const VkPipelineLayout layout,const struct radv_graphics_pipeline_create_info * extra,const VkAllocationCallbacks * alloc,VkPipeline * pipeline)60 create_pipeline(struct radv_device *device, uint32_t samples, struct nir_shader *vs_nir, struct nir_shader *fs_nir,
61 const VkPipelineVertexInputStateCreateInfo *vi_state,
62 const VkPipelineDepthStencilStateCreateInfo *ds_state,
63 const VkPipelineColorBlendStateCreateInfo *cb_state, const VkPipelineRenderingCreateInfo *dyn_state,
64 const VkPipelineLayout layout, const struct radv_graphics_pipeline_create_info *extra,
65 const VkAllocationCallbacks *alloc, VkPipeline *pipeline)
66 {
67 VkDevice device_h = radv_device_to_handle(device);
68 VkResult result;
69
70 result = radv_graphics_pipeline_create(device_h, device->meta_state.cache,
71 &(VkGraphicsPipelineCreateInfo){
72 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
73 .pNext = dyn_state,
74 .stageCount = fs_nir ? 2 : 1,
75 .pStages =
76 (VkPipelineShaderStageCreateInfo[]){
77 {
78 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
79 .stage = VK_SHADER_STAGE_VERTEX_BIT,
80 .module = vk_shader_module_handle_from_nir(vs_nir),
81 .pName = "main",
82 },
83 {
84 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
85 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
86 .module = vk_shader_module_handle_from_nir(fs_nir),
87 .pName = "main",
88 },
89 },
90 .pVertexInputState = vi_state,
91 .pInputAssemblyState =
92 &(VkPipelineInputAssemblyStateCreateInfo){
93 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
94 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
95 .primitiveRestartEnable = false,
96 },
97 .pViewportState =
98 &(VkPipelineViewportStateCreateInfo){
99 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
100 .viewportCount = 1,
101 .scissorCount = 1,
102 },
103 .pRasterizationState =
104 &(VkPipelineRasterizationStateCreateInfo){
105 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
106 .rasterizerDiscardEnable = false,
107 .polygonMode = VK_POLYGON_MODE_FILL,
108 .cullMode = VK_CULL_MODE_NONE,
109 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
110 .depthBiasEnable = false,
111 .depthBiasConstantFactor = 0.0f,
112 .depthBiasClamp = 0.0f,
113 .depthBiasSlopeFactor = 0.0f,
114 .lineWidth = 1.0f,
115 },
116 .pMultisampleState =
117 &(VkPipelineMultisampleStateCreateInfo){
118 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
119 .rasterizationSamples = samples,
120 .sampleShadingEnable = false,
121 .pSampleMask = NULL,
122 .alphaToCoverageEnable = false,
123 .alphaToOneEnable = false,
124 },
125 .pDepthStencilState = ds_state,
126 .pColorBlendState = cb_state,
127 .pDynamicState =
128 &(VkPipelineDynamicStateCreateInfo){
129 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
130 .dynamicStateCount = 3,
131 .pDynamicStates =
132 (VkDynamicState[]){
133 VK_DYNAMIC_STATE_VIEWPORT,
134 VK_DYNAMIC_STATE_SCISSOR,
135 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
136 },
137 },
138 .layout = layout,
139 .flags = 0,
140 .renderPass = VK_NULL_HANDLE,
141 .subpass = 0,
142 },
143 extra, alloc, pipeline);
144
145 ralloc_free(vs_nir);
146 ralloc_free(fs_nir);
147
148 return result;
149 }
150
151 static VkResult
create_color_pipeline(struct radv_device * device,uint32_t samples,uint32_t frag_output,VkFormat format,VkPipeline * pipeline)152 create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output, VkFormat format,
153 VkPipeline *pipeline)
154 {
155 struct nir_shader *vs_nir;
156 struct nir_shader *fs_nir;
157 VkResult result;
158
159 if (!device->meta_state.clear_color_p_layout) {
160 const VkPushConstantRange pc_range_color = {
161 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
162 .size = 16,
163 };
164
165 result =
166 radv_meta_create_pipeline_layout(device, NULL, 1, &pc_range_color, &device->meta_state.clear_color_p_layout);
167 if (result != VK_SUCCESS)
168 return result;
169 }
170
171 build_color_shaders(device, &vs_nir, &fs_nir, frag_output);
172
173 const VkPipelineVertexInputStateCreateInfo vi_state = {
174 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
175 .vertexBindingDescriptionCount = 0,
176 .vertexAttributeDescriptionCount = 0,
177 };
178
179 const VkPipelineDepthStencilStateCreateInfo ds_state = {
180 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
181 .depthTestEnable = false,
182 .depthWriteEnable = false,
183 .depthBoundsTestEnable = false,
184 .stencilTestEnable = false,
185 .minDepthBounds = 0.0f,
186 .maxDepthBounds = 1.0f,
187 };
188
189 VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0};
190 blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){
191 .blendEnable = false,
192 .colorWriteMask =
193 VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT,
194 };
195
196 const VkPipelineColorBlendStateCreateInfo cb_state = {
197 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
198 .logicOpEnable = false,
199 .attachmentCount = MAX_RTS,
200 .pAttachments = blend_attachment_state,
201 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}};
202
203 VkFormat att_formats[MAX_RTS] = {0};
204 att_formats[frag_output] = format;
205
206 const VkPipelineRenderingCreateInfo rendering_create_info = {
207 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
208 .colorAttachmentCount = MAX_RTS,
209 .pColorAttachmentFormats = att_formats,
210 };
211
212 struct radv_graphics_pipeline_create_info extra = {
213 .use_rectlist = true,
214 };
215 result = create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &rendering_create_info,
216 device->meta_state.clear_color_p_layout, &extra, &device->meta_state.alloc, pipeline);
217
218 return result;
219 }
220
221 static VkResult
get_color_pipeline(struct radv_device * device,uint32_t samples,uint32_t frag_output,VkFormat format,VkPipeline * pipeline_out)222 get_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output, VkFormat format,
223 VkPipeline *pipeline_out)
224 {
225 struct radv_meta_state *state = &device->meta_state;
226 const uint32_t fs_key = radv_format_meta_fs_key(device, format);
227 const uint32_t samples_log2 = ffs(samples) - 1;
228 VkResult result = VK_SUCCESS;
229 VkPipeline *pipeline;
230
231 mtx_lock(&state->mtx);
232 pipeline = &state->color_clear[samples_log2][frag_output].color_pipelines[fs_key];
233 if (!*pipeline) {
234 result = create_color_pipeline(device, samples, frag_output, radv_fs_key_format_exemplars[fs_key], pipeline);
235 if (result != VK_SUCCESS)
236 goto fail;
237 }
238
239 *pipeline_out = *pipeline;
240
241 fail:
242 mtx_unlock(&state->mtx);
243 return result;
244 }
245
246 static void
finish_meta_clear_htile_mask_state(struct radv_device * device)247 finish_meta_clear_htile_mask_state(struct radv_device *device)
248 {
249 struct radv_meta_state *state = &device->meta_state;
250
251 radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline, &state->alloc);
252 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout, &state->alloc);
253 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
254 state->clear_htile_mask_ds_layout, &state->alloc);
255 }
256
257 static void
finish_meta_clear_dcc_comp_to_single_state(struct radv_device * device)258 finish_meta_clear_dcc_comp_to_single_state(struct radv_device *device)
259 {
260 struct radv_meta_state *state = &device->meta_state;
261
262 for (uint32_t i = 0; i < 2; i++) {
263 radv_DestroyPipeline(radv_device_to_handle(device), state->clear_dcc_comp_to_single_pipeline[i], &state->alloc);
264 }
265 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_dcc_comp_to_single_p_layout, &state->alloc);
266 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
267 state->clear_dcc_comp_to_single_ds_layout, &state->alloc);
268 }
269
270 void
radv_device_finish_meta_clear_state(struct radv_device * device)271 radv_device_finish_meta_clear_state(struct radv_device *device)
272 {
273 struct radv_meta_state *state = &device->meta_state;
274
275 for (uint32_t i = 0; i < ARRAY_SIZE(state->color_clear); ++i) {
276 for (uint32_t j = 0; j < ARRAY_SIZE(state->color_clear[0]); ++j) {
277 for (uint32_t k = 0; k < ARRAY_SIZE(state->color_clear[i][j].color_pipelines); ++k) {
278 radv_DestroyPipeline(radv_device_to_handle(device), state->color_clear[i][j].color_pipelines[k],
279 &state->alloc);
280 }
281 }
282 }
283 for (uint32_t i = 0; i < ARRAY_SIZE(state->ds_clear); ++i) {
284 for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
285 radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depth_only_pipeline[j], &state->alloc);
286 radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].stencil_only_pipeline[j],
287 &state->alloc);
288 radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depthstencil_pipeline[j],
289 &state->alloc);
290
291 radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depth_only_unrestricted_pipeline[j],
292 &state->alloc);
293 radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].stencil_only_unrestricted_pipeline[j],
294 &state->alloc);
295 radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depthstencil_unrestricted_pipeline[j],
296 &state->alloc);
297 }
298 }
299 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout, &state->alloc);
300 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout, &state->alloc);
301 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_unrestricted_p_layout, &state->alloc);
302
303 finish_meta_clear_htile_mask_state(device);
304 finish_meta_clear_dcc_comp_to_single_state(device);
305 }
306
307 static void
emit_color_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,uint32_t view_mask)308 emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, const VkClearRect *clear_rect,
309 uint32_t view_mask)
310 {
311 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
312 const struct radv_rendering_state *render = &cmd_buffer->state.render;
313 uint32_t samples;
314 VkFormat format;
315 VkClearColorValue clear_value = clear_att->clearValue.color;
316 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
317 VkPipeline pipeline;
318 VkResult result;
319
320 assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
321 assert(clear_att->colorAttachment < render->color_att_count);
322 const struct radv_attachment *color_att = &render->color_att[clear_att->colorAttachment];
323
324 /* When a framebuffer is bound to the current command buffer, get the
325 * number of samples from it. Otherwise, get the number of samples from
326 * the render pass because it's likely a secondary command buffer.
327 */
328 if (color_att->iview) {
329 samples = color_att->iview->image->vk.samples;
330 format = color_att->iview->vk.format;
331 } else {
332 samples = render->max_samples;
333 format = color_att->format;
334 }
335 assert(format != VK_FORMAT_UNDEFINED);
336
337 assert(util_is_power_of_two_nonzero(samples));
338
339 result = get_color_pipeline(device, samples, clear_att->colorAttachment, format, &pipeline);
340 if (result != VK_SUCCESS) {
341 vk_command_buffer_set_error(&cmd_buffer->vk, result);
342 return;
343 }
344
345 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_color_p_layout,
346 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, &clear_value);
347
348 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
349
350 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
351 &(VkViewport){.x = clear_rect->rect.offset.x,
352 .y = clear_rect->rect.offset.y,
353 .width = clear_rect->rect.extent.width,
354 .height = clear_rect->rect.extent.height,
355 .minDepth = 0.0f,
356 .maxDepth = 1.0f});
357
358 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
359
360 if (view_mask) {
361 u_foreach_bit (i, view_mask)
362 radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
363 } else {
364 radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
365 }
366 }
367
368 static void
build_depthstencil_shader(struct radv_device * dev,struct nir_shader ** out_vs,struct nir_shader ** out_fs,bool unrestricted)369 build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs,
370 bool unrestricted)
371 {
372 nir_builder vs_b = radv_meta_init_shader(
373 dev, MESA_SHADER_VERTEX, unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs");
374 nir_builder fs_b =
375 radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT,
376 unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs");
377
378 const struct glsl_type *position_out_type = glsl_vec4_type();
379
380 nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
381 vs_out_pos->data.location = VARYING_SLOT_POS;
382
383 nir_def *z;
384 if (unrestricted) {
385 nir_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
386
387 nir_variable *fs_out_depth = nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
388 fs_out_depth->data.location = FRAG_RESULT_DEPTH;
389 nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);
390
391 z = nir_imm_float(&vs_b, 0.0);
392 } else {
393 z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4);
394 }
395
396 nir_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL);
397 nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
398
399 const struct glsl_type *layer_type = glsl_int_type();
400 nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
401 vs_out_layer->data.location = VARYING_SLOT_LAYER;
402 vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
403 nir_def *inst_id = nir_load_instance_id(&vs_b);
404 nir_def *base_instance = nir_load_base_instance(&vs_b);
405
406 nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
407 nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
408
409 *out_vs = vs_b.shader;
410 *out_fs = fs_b.shader;
411 }
412
413 static VkResult
create_depthstencil_pipeline(struct radv_device * device,VkImageAspectFlags aspects,uint32_t samples,int index,bool unrestricted,VkPipeline * pipeline)414 create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects, uint32_t samples, int index,
415 bool unrestricted, VkPipeline *pipeline)
416 {
417 struct nir_shader *vs_nir, *fs_nir;
418 VkResult result;
419
420 if (!device->meta_state.clear_depth_p_layout) {
421 const VkPushConstantRange pc_range_depth = {
422 .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
423 .size = 4,
424 };
425
426 result =
427 radv_meta_create_pipeline_layout(device, NULL, 1, &pc_range_depth, &device->meta_state.clear_depth_p_layout);
428 if (result != VK_SUCCESS)
429 return result;
430 }
431
432 if (!device->meta_state.clear_depth_unrestricted_p_layout) {
433 const VkPushConstantRange pc_range_depth_unrestricted = {
434 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
435 .size = 4,
436 };
437
438 result = radv_meta_create_pipeline_layout(device, NULL, 1, &pc_range_depth_unrestricted,
439 &device->meta_state.clear_depth_unrestricted_p_layout);
440 if (result != VK_SUCCESS)
441 return result;
442 }
443
444 build_depthstencil_shader(device, &vs_nir, &fs_nir, unrestricted);
445
446 const VkPipelineVertexInputStateCreateInfo vi_state = {
447 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
448 .vertexBindingDescriptionCount = 0,
449 .vertexAttributeDescriptionCount = 0,
450 };
451
452 const VkPipelineDepthStencilStateCreateInfo ds_state = {
453 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
454 .depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
455 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
456 .depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
457 .depthBoundsTestEnable = false,
458 .stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
459 .front =
460 {
461 .passOp = VK_STENCIL_OP_REPLACE,
462 .compareOp = VK_COMPARE_OP_ALWAYS,
463 .writeMask = UINT32_MAX,
464 .reference = 0, /* dynamic */
465 },
466 .back = {0 /* dont care */},
467 .minDepthBounds = 0.0f,
468 .maxDepthBounds = 1.0f,
469 };
470
471 const VkPipelineColorBlendStateCreateInfo cb_state = {
472 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
473 .logicOpEnable = false,
474 .attachmentCount = 0,
475 .pAttachments = NULL,
476 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
477 };
478
479 const VkPipelineRenderingCreateInfo rendering_create_info = {
480 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
481 .depthAttachmentFormat = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_FORMAT_D32_SFLOAT : VK_FORMAT_UNDEFINED,
482 .stencilAttachmentFormat = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? VK_FORMAT_S8_UINT : VK_FORMAT_UNDEFINED,
483 };
484
485 struct radv_graphics_pipeline_create_info extra = {
486 .use_rectlist = true,
487 };
488
489 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
490 extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;
491 }
492 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
493 extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
494 }
495 result = create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &rendering_create_info,
496 device->meta_state.clear_depth_p_layout, &extra, &device->meta_state.alloc, pipeline);
497
498 return result;
499 }
500
501 static bool radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
502 VkImageLayout image_layout, VkImageAspectFlags aspects,
503 const VkClearRect *clear_rect, const VkClearDepthStencilValue clear_value,
504 uint32_t view_mask);
505
506 static VkResult
get_depth_stencil_pipeline(struct radv_device * device,int samples_log2,VkImageAspectFlags aspects,bool fast,VkPipeline * pipeline_out)507 get_depth_stencil_pipeline(struct radv_device *device, int samples_log2, VkImageAspectFlags aspects, bool fast,
508 VkPipeline *pipeline_out)
509 {
510 struct radv_meta_state *meta_state = &device->meta_state;
511 bool unrestricted = device->vk.enabled_extensions.EXT_depth_range_unrestricted;
512 int index = fast ? DEPTH_CLEAR_FAST : DEPTH_CLEAR_SLOW;
513 VkResult result = VK_SUCCESS;
514 VkPipeline *pipeline;
515
516 mtx_lock(&meta_state->mtx);
517
518 switch (aspects) {
519 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
520 pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].depthstencil_unrestricted_pipeline[index]
521 : &meta_state->ds_clear[samples_log2].depthstencil_pipeline[index];
522 break;
523 case VK_IMAGE_ASPECT_DEPTH_BIT:
524 pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].depth_only_unrestricted_pipeline[index]
525 : &meta_state->ds_clear[samples_log2].depth_only_pipeline[index];
526 break;
527 case VK_IMAGE_ASPECT_STENCIL_BIT:
528 pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].stencil_only_unrestricted_pipeline[index]
529 : &meta_state->ds_clear[samples_log2].stencil_only_pipeline[index];
530 break;
531 default:
532 unreachable("expected depth or stencil aspect");
533 }
534
535 if (!*pipeline) {
536 result = create_depthstencil_pipeline(device, aspects, 1u << samples_log2, index, unrestricted, pipeline);
537 if (result != VK_SUCCESS)
538 goto fail;
539 }
540
541 *pipeline_out = *pipeline;
542
543 fail:
544 mtx_unlock(&meta_state->mtx);
545 return result;
546 }
547
548 static void
emit_depthstencil_clear(struct radv_cmd_buffer * cmd_buffer,VkClearDepthStencilValue clear_value,VkImageAspectFlags aspects,const VkClearRect * clear_rect,uint32_t view_mask,bool can_fast_clear)549 emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, VkClearDepthStencilValue clear_value,
550 VkImageAspectFlags aspects, const VkClearRect *clear_rect, uint32_t view_mask,
551 bool can_fast_clear)
552 {
553 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
554 const struct radv_rendering_state *render = &cmd_buffer->state.render;
555 uint32_t samples, samples_log2;
556 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
557 VkPipeline pipeline;
558 VkResult result;
559
560 /* When a framebuffer is bound to the current command buffer, get the
561 * number of samples from it. Otherwise, get the number of samples from
562 * the render pass because it's likely a secondary command buffer.
563 */
564 struct radv_image_view *iview = render->ds_att.iview;
565 if (iview) {
566 samples = iview->image->vk.samples;
567 } else {
568 assert(render->ds_att.format != VK_FORMAT_UNDEFINED);
569 samples = render->max_samples;
570 }
571
572 assert(util_is_power_of_two_nonzero(samples));
573 samples_log2 = ffs(samples) - 1;
574
575 result = get_depth_stencil_pipeline(device, samples_log2, aspects, can_fast_clear, &pipeline);
576 if (result != VK_SUCCESS) {
577 vk_command_buffer_set_error(&cmd_buffer->vk, result);
578 return;
579 }
580
581 if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
582 clear_value.depth = 1.0f;
583
584 if (device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
585 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
586 device->meta_state.clear_depth_unrestricted_p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
587 4, &clear_value.depth);
588 } else {
589 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_depth_p_layout,
590 VK_SHADER_STAGE_VERTEX_BIT, 0, 4, &clear_value.depth);
591 }
592
593 uint32_t prev_reference = cmd_buffer->state.dynamic.vk.ds.stencil.front.reference;
594 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
595 radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil);
596 }
597
598 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
599
600 if (can_fast_clear)
601 radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
602
603 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
604 &(VkViewport){.x = clear_rect->rect.offset.x,
605 .y = clear_rect->rect.offset.y,
606 .width = clear_rect->rect.extent.width,
607 .height = clear_rect->rect.extent.height,
608 .minDepth = 0.0f,
609 .maxDepth = 1.0f});
610
611 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
612
613 if (view_mask) {
614 u_foreach_bit (i, view_mask)
615 radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
616 } else {
617 radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
618 }
619
620 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
621 radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, prev_reference);
622 }
623 }
624
625 static nir_shader *
build_clear_htile_mask_shader(struct radv_device * dev)626 build_clear_htile_mask_shader(struct radv_device *dev)
627 {
628 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_htile_mask");
629 b.shader->info.workgroup_size[0] = 64;
630
631 nir_def *global_id = get_global_ids(&b, 1);
632
633 nir_def *offset = nir_imul_imm(&b, global_id, 16);
634 offset = nir_channel(&b, offset, 0);
635
636 nir_def *buf = radv_meta_load_descriptor(&b, 0, 0);
637
638 nir_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
639
640 nir_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);
641
642 /* data = (data & ~htile_mask) | (htile_value & htile_mask) */
643 nir_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
644 data = nir_ior(&b, data, nir_channel(&b, constants, 0));
645
646 nir_store_ssbo(&b, data, buf, offset, .access = ACCESS_NON_READABLE, .align_mul = 16);
647
648 return b.shader;
649 }
650
651 static VkResult
create_clear_htile_mask_pipeline(struct radv_device * device)652 create_clear_htile_mask_pipeline(struct radv_device *device)
653 {
654 struct radv_meta_state *state = &device->meta_state;
655 VkResult result;
656
657 const VkDescriptorSetLayoutBinding binding = {
658 .binding = 0,
659 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
660 .descriptorCount = 1,
661 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
662 };
663
664 result = radv_meta_create_descriptor_set_layout(device, 1, &binding, &state->clear_htile_mask_ds_layout);
665 if (result != VK_SUCCESS)
666 return result;
667
668 const VkPushConstantRange pc_range = {
669 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
670 .size = 8,
671 };
672
673 result = radv_meta_create_pipeline_layout(device, &state->clear_htile_mask_ds_layout, 1, &pc_range,
674 &state->clear_htile_mask_p_layout);
675 if (result != VK_SUCCESS)
676 return result;
677
678 nir_shader *cs = build_clear_htile_mask_shader(device);
679
680 result = radv_meta_create_compute_pipeline(device, cs, state->clear_htile_mask_p_layout,
681 &state->clear_htile_mask_pipeline);
682
683 ralloc_free(cs);
684 return result;
685 }
686
687 static VkResult
get_clear_htile_mask_pipeline(struct radv_device * device,VkPipeline * pipeline_out)688 get_clear_htile_mask_pipeline(struct radv_device *device, VkPipeline *pipeline_out)
689 {
690 struct radv_meta_state *state = &device->meta_state;
691 VkResult result = VK_SUCCESS;
692
693 mtx_lock(&state->mtx);
694 if (!state->clear_htile_mask_pipeline) {
695 result = create_clear_htile_mask_pipeline(device);
696 if (result != VK_SUCCESS)
697 goto fail;
698 }
699
700 *pipeline_out = state->clear_htile_mask_pipeline;
701
702 fail:
703 mtx_unlock(&state->mtx);
704 return result;
705 }
706
707 static uint32_t
clear_htile_mask(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t htile_value,uint32_t htile_mask)708 clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, struct radeon_winsys_bo *bo,
709 uint64_t offset, uint64_t size, uint32_t htile_value, uint32_t htile_mask)
710 {
711 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
712 struct radv_meta_state *state = &device->meta_state;
713 uint64_t block_count = DIV_ROUND_UP(size, 1024);
714 struct radv_meta_saved_state saved_state;
715 struct radv_buffer dst_buffer;
716 VkPipeline pipeline;
717 VkResult result;
718
719 result = get_clear_htile_mask_pipeline(device, &pipeline);
720 if (result != VK_SUCCESS) {
721 vk_command_buffer_set_error(&cmd_buffer->vk, result);
722 return 0;
723 }
724
725 radv_meta_save(&saved_state, cmd_buffer,
726 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
727
728 radv_buffer_init(&dst_buffer, device, bo, size, offset);
729
730 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
731
732 radv_meta_push_descriptor_set(
733 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->clear_htile_mask_p_layout, 0, 1,
734 (VkWriteDescriptorSet[]){
735 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
736 .dstBinding = 0,
737 .dstArrayElement = 0,
738 .descriptorCount = 1,
739 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
740 .pBufferInfo =
741 &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer), .offset = 0, .range = size}}});
742
743 const unsigned constants[2] = {
744 htile_value & htile_mask,
745 ~htile_mask,
746 };
747
748 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), state->clear_htile_mask_p_layout,
749 VK_SHADER_STAGE_COMPUTE_BIT, 0, 8, constants);
750
751 vk_common_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
752
753 radv_buffer_finish(&dst_buffer);
754
755 radv_meta_restore(&saved_state, cmd_buffer);
756
757 return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
758 VK_ACCESS_2_SHADER_WRITE_BIT, image);
759 }
760
761 static uint32_t
radv_get_htile_fast_clear_value(const struct radv_device * device,const struct radv_image * image,VkClearDepthStencilValue value)762 radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image,
763 VkClearDepthStencilValue value)
764 {
765 uint32_t max_zval = 0x3fff; /* maximum 14-bit value. */
766 uint32_t zmask = 0, smem = 0;
767 uint32_t htile_value;
768 uint32_t zmin, zmax;
769
770 /* Convert the depth value to 14-bit zmin/zmax values. */
771 zmin = lroundf(value.depth * max_zval);
772 zmax = zmin;
773
774 if (radv_image_tile_stencil_disabled(device, image)) {
775 /* Z only (no stencil):
776 *
777 * |31 18|17 4|3 0|
778 * +---------+---------+-------+
779 * | Max Z | Min Z | ZMask |
780 */
781 htile_value = (((zmax & 0x3fff) << 18) | ((zmin & 0x3fff) << 4) | ((zmask & 0xf) << 0));
782 } else {
783
784 /* Z and stencil:
785 *
786 * |31 12|11 10|9 8|7 6|5 4|3 0|
787 * +-----------+-----+------+-----+-----+-------+
788 * | Z Range | | SMem | SR1 | SR0 | ZMask |
789 *
790 * Z, stencil, 4 bit VRS encoding:
791 * |31 12| 11 10 |9 8|7 6 |5 4|3 0|
792 * +-----------+------------+------+------------+-----+-------+
793 * | Z Range | VRS Y-rate | SMem | VRS X-rate | SR0 | ZMask |
794 */
795 uint32_t delta = 0;
796 uint32_t zrange = ((zmax << 6) | delta);
797 uint32_t sresults = 0xf; /* SR0/SR1 both as 0x3. */
798
799 if (radv_image_has_vrs_htile(device, image))
800 sresults = 0x3;
801
802 htile_value = (((zrange & 0xfffff) << 12) | ((smem & 0x3) << 8) | ((sresults & 0xf) << 4) | ((zmask & 0xf) << 0));
803 }
804
805 return htile_value;
806 }
807
808 static uint32_t
radv_get_htile_mask(const struct radv_device * device,const struct radv_image * image,VkImageAspectFlags aspects)809 radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image, VkImageAspectFlags aspects)
810 {
811 uint32_t mask = 0;
812
813 if (radv_image_tile_stencil_disabled(device, image)) {
814 /* All the HTILE buffer is used when there is no stencil. */
815 mask = UINT32_MAX;
816 } else {
817 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
818 mask |= 0xfffffc0f;
819 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
820 mask |= 0x000003f0;
821 }
822
823 return mask;
824 }
825
826 static bool
radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)827 radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)
828 {
829 return value.depth == 1.0f || value.depth == 0.0f;
830 }
831
832 static bool
radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)833 radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)
834 {
835 return value.stencil == 0;
836 }
837
838 static bool
radv_can_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,VkImageAspectFlags aspects,const VkClearRect * clear_rect,const VkClearDepthStencilValue clear_value,uint32_t view_mask)839 radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
840 VkImageLayout image_layout, VkImageAspectFlags aspects, const VkClearRect *clear_rect,
841 const VkClearDepthStencilValue clear_value, uint32_t view_mask)
842 {
843 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
844
845 if (!iview || !iview->support_fast_clear)
846 return false;
847
848 if (!radv_layout_is_htile_compressed(device, iview->image, image_layout,
849 radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf)))
850 return false;
851
852 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
853 clear_rect->rect.extent.width != iview->image->vk.extent.width ||
854 clear_rect->rect.extent.height != iview->image->vk.extent.height)
855 return false;
856
857 if (view_mask && (iview->image->vk.array_layers >= 32 || (1u << iview->image->vk.array_layers) - 1u != view_mask))
858 return false;
859 if (!view_mask && clear_rect->baseArrayLayer != 0)
860 return false;
861 if (!view_mask && clear_rect->layerCount != iview->image->vk.array_layers)
862 return false;
863
864 if (device->vk.enabled_extensions.EXT_depth_range_unrestricted && (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
865 (clear_value.depth < 0.0 || clear_value.depth > 1.0))
866 return false;
867
868 if (radv_image_is_tc_compat_htile(iview->image) &&
869 (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && !radv_is_fast_clear_depth_allowed(clear_value)) ||
870 ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && !radv_is_fast_clear_stencil_allowed(clear_value))))
871 return false;
872
873 if (iview->image->vk.mip_levels > 1) {
874 uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
875 if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
876 /* Do not fast clears if one level can't be fast cleared. */
877 return false;
878 }
879 }
880
881 return true;
882 }
883
884 static void
radv_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkClearDepthStencilValue clear_value,VkImageAspectFlags aspects,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)885 radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
886 VkClearDepthStencilValue clear_value, VkImageAspectFlags aspects,
887 enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
888 {
889 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
890 uint32_t clear_word, flush_bits;
891
892 clear_word = radv_get_htile_fast_clear_value(device, iview->image, clear_value);
893
894 if (pre_flush) {
895 enum radv_cmd_flush_bits bits =
896 radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
897 VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, iview->image) |
898 radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT,
899 iview->image);
900 cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
901 *pre_flush |= cmd_buffer->state.flush_bits;
902 }
903
904 VkImageSubresourceRange range = {
905 .aspectMask = aspects,
906 .baseMipLevel = iview->vk.base_mip_level,
907 .levelCount = iview->vk.level_count,
908 .baseArrayLayer = iview->vk.base_array_layer,
909 .layerCount = iview->vk.layer_count,
910 };
911
912 flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
913
914 if (iview->image->planes[0].surface.has_stencil &&
915 !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
916 /* Synchronize after performing a depth-only or a stencil-only
917 * fast clear because the driver uses an optimized path which
918 * performs a read-modify-write operation, and the two separate
919 * aspects might use the same HTILE memory.
920 */
921 cmd_buffer->state.flush_bits |= flush_bits;
922 }
923
924 radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
925 if (post_flush) {
926 *post_flush |= flush_bits;
927 }
928 }
929
930 /* Clear DCC using comp-to-single by storing the clear value at the beginning of every 256B block.
931 * For MSAA images, clearing the first sample should be enough as long as CMASK is also cleared.
932 */
933 static nir_shader *
build_clear_dcc_comp_to_single_shader(struct radv_device * dev,bool is_msaa)934 build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa)
935 {
936 enum glsl_sampler_dim dim = is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
937 const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT);
938
939 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s",
940 is_msaa ? "multisampled" : "singlesampled");
941 b.shader->info.workgroup_size[0] = 8;
942 b.shader->info.workgroup_size[1] = 8;
943
944 nir_def *global_id = get_global_ids(&b, 3);
945
946 /* Load the dimensions in pixels of a block that gets compressed to one DCC byte. */
947 nir_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
948
949 /* Compute the coordinates. */
950 nir_def *coord = nir_trim_vector(&b, global_id, 2);
951 coord = nir_imul(&b, coord, dcc_block_size);
952 coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, global_id, 2),
953 nir_undef(&b, 1, 32));
954
955 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
956 output_img->data.descriptor_set = 0;
957 output_img->data.binding = 0;
958
959 /* Load the clear color values. */
960 nir_def *clear_values = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 8), .range = 24);
961
962 nir_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1),
963 nir_channel(&b, clear_values, 2), nir_channel(&b, clear_values, 3));
964
965 /* Store the clear color values. */
966 nir_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_undef(&b, 1, 32);
967 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, sample_id, data, nir_imm_int(&b, 0),
968 .image_dim = dim, .image_array = true);
969
970 return b.shader;
971 }
972
973 static VkResult
create_dcc_comp_to_single_pipeline(struct radv_device * device,bool is_msaa,VkPipeline * pipeline)974 create_dcc_comp_to_single_pipeline(struct radv_device *device, bool is_msaa, VkPipeline *pipeline)
975 {
976 struct radv_meta_state *state = &device->meta_state;
977 VkResult result;
978
979 if (!state->clear_dcc_comp_to_single_ds_layout) {
980 const VkDescriptorSetLayoutBinding binding = {
981 .binding = 0,
982 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
983 .descriptorCount = 1,
984 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
985 };
986
987 result = radv_meta_create_descriptor_set_layout(device, 1, &binding, &state->clear_dcc_comp_to_single_ds_layout);
988 if (result != VK_SUCCESS)
989 return result;
990 }
991
992 if (!state->clear_dcc_comp_to_single_p_layout) {
993 const VkPushConstantRange pc_range = {
994 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
995 .size = 24,
996 };
997
998 result = radv_meta_create_pipeline_layout(device, &state->clear_dcc_comp_to_single_ds_layout, 1, &pc_range,
999 &state->clear_dcc_comp_to_single_p_layout);
1000 if (result != VK_SUCCESS)
1001 return result;
1002 }
1003
1004 nir_shader *cs = build_clear_dcc_comp_to_single_shader(device, is_msaa);
1005
1006 result = radv_meta_create_compute_pipeline(device, cs, state->clear_dcc_comp_to_single_p_layout, pipeline);
1007
1008 ralloc_free(cs);
1009 return result;
1010 }
1011
1012 static VkResult
init_meta_clear_dcc_comp_to_single_state(struct radv_device * device)1013 init_meta_clear_dcc_comp_to_single_state(struct radv_device *device)
1014 {
1015 struct radv_meta_state *state = &device->meta_state;
1016 VkResult result;
1017
1018 for (uint32_t i = 0; i < 2; i++) {
1019 result = create_dcc_comp_to_single_pipeline(device, !!i, &state->clear_dcc_comp_to_single_pipeline[i]);
1020 if (result != VK_SUCCESS)
1021 return result;
1022 }
1023
1024 return result;
1025 }
1026
1027 VkResult
radv_device_init_meta_clear_state(struct radv_device * device,bool on_demand)1028 radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
1029 {
1030 VkResult res;
1031 struct radv_meta_state *state = &device->meta_state;
1032
1033 if (on_demand)
1034 return VK_SUCCESS;
1035
1036 res = init_meta_clear_dcc_comp_to_single_state(device);
1037 if (res != VK_SUCCESS)
1038 return res;
1039
1040 res = create_clear_htile_mask_pipeline(device);
1041 if (res != VK_SUCCESS)
1042 return res;
1043
1044 for (uint32_t i = 0; i < ARRAY_SIZE(state->color_clear); ++i) {
1045 uint32_t samples = 1 << i;
1046
1047 /* Only precompile meta pipelines for attachment 0 as other are uncommon. */
1048 for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
1049 VkFormat format = radv_fs_key_format_exemplars[j];
1050 unsigned fs_key = radv_format_meta_fs_key(device, format);
1051 assert(!state->color_clear[i][0].color_pipelines[fs_key]);
1052
1053 res = create_color_pipeline(device, samples, 0, format, &state->color_clear[i][0].color_pipelines[fs_key]);
1054 if (res != VK_SUCCESS)
1055 return res;
1056 }
1057 }
1058 for (uint32_t i = 0; i < ARRAY_SIZE(state->ds_clear); ++i) {
1059 uint32_t samples = 1 << i;
1060
1061 for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
1062 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, false,
1063 &state->ds_clear[i].depth_only_pipeline[j]);
1064 if (res != VK_SUCCESS)
1065 return res;
1066
1067 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,
1068 &state->ds_clear[i].stencil_only_pipeline[j]);
1069 if (res != VK_SUCCESS)
1070 return res;
1071
1072 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j,
1073 false, &state->ds_clear[i].depthstencil_pipeline[j]);
1074 if (res != VK_SUCCESS)
1075 return res;
1076
1077 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, true,
1078 &state->ds_clear[i].depth_only_unrestricted_pipeline[j]);
1079 if (res != VK_SUCCESS)
1080 return res;
1081
1082 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
1083 &state->ds_clear[i].stencil_only_unrestricted_pipeline[j]);
1084 if (res != VK_SUCCESS)
1085 return res;
1086
1087 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j,
1088 true, &state->ds_clear[i].depthstencil_unrestricted_pipeline[j]);
1089 if (res != VK_SUCCESS)
1090 return res;
1091 }
1092 }
1093 return VK_SUCCESS;
1094 }
1095
1096 static uint32_t
radv_get_cmask_fast_clear_value(const struct radv_image * image)1097 radv_get_cmask_fast_clear_value(const struct radv_image *image)
1098 {
1099 uint32_t value = 0; /* Default value when no DCC. */
1100
1101 /* The fast-clear value is different for images that have both DCC and
1102 * CMASK metadata.
1103 */
1104 if (radv_image_has_dcc(image)) {
1105 /* DCC fast clear with MSAA should clear CMASK to 0xC. */
1106 return image->vk.samples > 1 ? 0xcccccccc : 0xffffffff;
1107 }
1108
1109 return value;
1110 }
1111
1112 uint32_t
radv_clear_cmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1113 radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
1114 uint32_t value)
1115 {
1116 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1117 const struct radv_physical_device *pdev = radv_device_physical(device);
1118 uint64_t cmask_offset = image->planes[0].surface.cmask_offset;
1119 uint64_t size;
1120
1121 if (pdev->info.gfx_level == GFX9) {
1122 /* TODO: clear layers. */
1123 size = image->planes[0].surface.cmask_size;
1124 } else {
1125 unsigned slice_size = image->planes[0].surface.cmask_slice_size;
1126
1127 cmask_offset += slice_size * range->baseArrayLayer;
1128 size = slice_size * vk_image_subresource_layer_count(&image->vk, range);
1129 }
1130
1131 return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_image_get_va(image, 0) + cmask_offset, size,
1132 value);
1133 }
1134
1135 uint32_t
radv_clear_fmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1136 radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
1137 uint32_t value)
1138 {
1139 uint64_t fmask_offset = image->planes[0].surface.fmask_offset;
1140 unsigned slice_size = image->planes[0].surface.fmask_slice_size;
1141 uint64_t size;
1142
1143 /* MSAA images do not support mipmap levels. */
1144 assert(range->baseMipLevel == 0 && vk_image_subresource_level_count(&image->vk, range) == 1);
1145
1146 fmask_offset += slice_size * range->baseArrayLayer;
1147 size = slice_size * vk_image_subresource_layer_count(&image->vk, range);
1148
1149 return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_image_get_va(image, 0) + fmask_offset, size,
1150 value);
1151 }
1152
1153 uint32_t
radv_clear_dcc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1154 radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
1155 uint32_t value)
1156 {
1157 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1158 const struct radv_physical_device *pdev = radv_device_physical(device);
1159 uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
1160 uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range);
1161 uint32_t flush_bits = 0;
1162
1163 /* Mark the image as being compressed. */
1164 radv_update_dcc_metadata(cmd_buffer, image, range, true);
1165
1166 for (uint32_t l = 0; l < level_count; l++) {
1167 uint64_t dcc_offset = image->planes[0].surface.meta_offset;
1168 uint32_t level = range->baseMipLevel + l;
1169 uint64_t size;
1170
1171 if (pdev->info.gfx_level >= GFX10) {
1172 /* DCC for mipmaps+layers is currently disabled. */
1173 dcc_offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer +
1174 image->planes[0].surface.u.gfx9.meta_levels[level].offset;
1175 size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count;
1176 } else if (pdev->info.gfx_level == GFX9) {
1177 /* Mipmap levels and layers aren't implemented. */
1178 assert(level == 0);
1179 size = image->planes[0].surface.meta_size;
1180 } else {
1181 const struct legacy_surf_dcc_level *dcc_level = &image->planes[0].surface.u.legacy.color.dcc_level[level];
1182
1183 /* If dcc_fast_clear_size is 0 (which might happens for
1184 * mipmaps) the fill buffer operation below is a no-op.
1185 * This can only happen during initialization as the
1186 * fast clear path fallbacks to slow clears if one
1187 * level can't be fast cleared.
1188 */
1189 dcc_offset += dcc_level->dcc_offset + dcc_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
1190 size = dcc_level->dcc_slice_fast_clear_size * vk_image_subresource_layer_count(&image->vk, range);
1191 }
1192
1193 /* Do not clear this level if it can't be compressed. */
1194 if (!size)
1195 continue;
1196
1197 flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_image_get_va(image, 0) + dcc_offset,
1198 size, value);
1199 }
1200
1201 return flush_bits;
1202 }
1203
1204 static VkResult
get_clear_dcc_comp_to_single_pipeline(struct radv_device * device,bool is_msaa,VkPipeline * pipeline_out)1205 get_clear_dcc_comp_to_single_pipeline(struct radv_device *device, bool is_msaa, VkPipeline *pipeline_out)
1206 {
1207 struct radv_meta_state *state = &device->meta_state;
1208 VkResult result = VK_SUCCESS;
1209
1210 mtx_lock(&state->mtx);
1211 if (!state->clear_dcc_comp_to_single_pipeline[is_msaa]) {
1212 result = create_dcc_comp_to_single_pipeline(device, is_msaa, &state->clear_dcc_comp_to_single_pipeline[is_msaa]);
1213 if (result != VK_SUCCESS)
1214 goto fail;
1215 }
1216
1217 *pipeline_out = state->clear_dcc_comp_to_single_pipeline[is_msaa];
1218
1219 fail:
1220 mtx_unlock(&state->mtx);
1221 return result;
1222 }
1223
1224 static uint32_t
radv_clear_dcc_comp_to_single(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t color_values[4])1225 radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1226 const VkImageSubresourceRange *range, uint32_t color_values[4])
1227 {
1228 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1229 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
1230 unsigned layer_count = vk_image_subresource_layer_count(&image->vk, range);
1231 struct radv_meta_saved_state saved_state;
1232 bool is_msaa = image->vk.samples > 1;
1233 struct radv_image_view iview;
1234 VkPipeline pipeline;
1235 VkResult result;
1236 VkFormat format;
1237
1238 switch (bytes_per_pixel) {
1239 case 1:
1240 format = VK_FORMAT_R8_UINT;
1241 break;
1242 case 2:
1243 format = VK_FORMAT_R16_UINT;
1244 break;
1245 case 4:
1246 format = VK_FORMAT_R32_UINT;
1247 break;
1248 case 8:
1249 format = VK_FORMAT_R32G32_UINT;
1250 break;
1251 case 16:
1252 format = VK_FORMAT_R32G32B32A32_UINT;
1253 break;
1254 default:
1255 unreachable("Unsupported number of bytes per pixel");
1256 }
1257
1258 result = get_clear_dcc_comp_to_single_pipeline(device, is_msaa, &pipeline);
1259 if (result != VK_SUCCESS) {
1260 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1261 return 0;
1262 }
1263
1264 radv_meta_save(&saved_state, cmd_buffer,
1265 RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
1266
1267 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1268
1269 for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, range); l++) {
1270 uint32_t width, height;
1271
1272 /* Do not write the clear color value for levels without DCC. */
1273 if (!radv_dcc_enabled(image, range->baseMipLevel + l))
1274 continue;
1275
1276 width = u_minify(image->vk.extent.width, range->baseMipLevel + l);
1277 height = u_minify(image->vk.extent.height, range->baseMipLevel + l);
1278
1279 radv_image_view_init(&iview, device,
1280 &(VkImageViewCreateInfo){
1281 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1282 .image = radv_image_to_handle(image),
1283 .viewType = VK_IMAGE_VIEW_TYPE_2D,
1284 .format = format,
1285 .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1286 .baseMipLevel = range->baseMipLevel + l,
1287 .levelCount = 1,
1288 .baseArrayLayer = range->baseArrayLayer,
1289 .layerCount = layer_count},
1290 },
1291 0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
1292
1293 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1294 device->meta_state.clear_dcc_comp_to_single_p_layout, 0, 1,
1295 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1296 .dstBinding = 0,
1297 .dstArrayElement = 0,
1298 .descriptorCount = 1,
1299 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1300 .pImageInfo = (VkDescriptorImageInfo[]){
1301 {
1302 .sampler = VK_NULL_HANDLE,
1303 .imageView = radv_image_view_to_handle(&iview),
1304 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1305 },
1306 }}});
1307
1308 unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);
1309 unsigned dcc_height = DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
1310
1311 const unsigned constants[6] = {
1312 image->planes[0].surface.u.gfx9.color.dcc_block_width,
1313 image->planes[0].surface.u.gfx9.color.dcc_block_height,
1314 color_values[0],
1315 color_values[1],
1316 color_values[2],
1317 color_values[3],
1318 };
1319
1320 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1321 device->meta_state.clear_dcc_comp_to_single_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1322 24, constants);
1323
1324 radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, layer_count);
1325
1326 radv_image_view_finish(&iview);
1327 }
1328
1329 radv_meta_restore(&saved_state, cmd_buffer);
1330
1331 return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1332 VK_ACCESS_2_SHADER_WRITE_BIT, image);
1333 }
1334
1335 uint32_t
radv_clear_htile(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1336 radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
1337 const VkImageSubresourceRange *range, uint32_t value)
1338 {
1339 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1340 const struct radv_physical_device *pdev = radv_device_physical(device);
1341 uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
1342 uint32_t flush_bits = 0;
1343 uint32_t htile_mask;
1344
1345 htile_mask = radv_get_htile_mask(device, image, range->aspectMask);
1346
1347 if (level_count != image->vk.mip_levels) {
1348 assert(pdev->info.gfx_level >= GFX10);
1349
1350 /* Clear individuals levels separately. */
1351 for (uint32_t l = 0; l < level_count; l++) {
1352 uint32_t level = range->baseMipLevel + l;
1353 uint64_t htile_offset =
1354 image->planes[0].surface.meta_offset + image->planes[0].surface.u.gfx9.meta_levels[level].offset;
1355 uint32_t size = image->planes[0].surface.u.gfx9.meta_levels[level].size;
1356
1357 /* Do not clear this level if it can be compressed. */
1358 if (!size)
1359 continue;
1360
1361 if (htile_mask == UINT_MAX) {
1362 /* Clear the whole HTILE buffer. */
1363 flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1364 radv_image_get_va(image, 0) + htile_offset, size, value);
1365 } else {
1366 /* Only clear depth or stencil bytes in the HTILE buffer. */
1367 flush_bits |= clear_htile_mask(cmd_buffer, image, image->bindings[0].bo,
1368 image->bindings[0].offset + htile_offset, size, value, htile_mask);
1369 }
1370 }
1371 } else {
1372 unsigned layer_count = vk_image_subresource_layer_count(&image->vk, range);
1373 uint64_t size = image->planes[0].surface.meta_slice_size * layer_count;
1374 uint64_t htile_offset =
1375 image->planes[0].surface.meta_offset + image->planes[0].surface.meta_slice_size * range->baseArrayLayer;
1376
1377 if (htile_mask == UINT_MAX) {
1378 /* Clear the whole HTILE buffer. */
1379 flush_bits = radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1380 radv_image_get_va(image, 0) + htile_offset, size, value);
1381 } else {
1382 /* Only clear depth or stencil bytes in the HTILE buffer. */
1383 flush_bits = clear_htile_mask(cmd_buffer, image, image->bindings[0].bo,
1384 image->bindings[0].offset + htile_offset, size, value, htile_mask);
1385 }
1386 }
1387
1388 return flush_bits;
1389 }
1390
1391 enum {
1392 RADV_DCC_CLEAR_0000 = 0x00000000U,
1393 RADV_DCC_GFX8_CLEAR_0001 = 0x40404040U,
1394 RADV_DCC_GFX8_CLEAR_1110 = 0x80808080U,
1395 RADV_DCC_GFX8_CLEAR_1111 = 0xC0C0C0C0U,
1396 RADV_DCC_GFX8_CLEAR_REG = 0x20202020U,
1397 RADV_DCC_GFX9_CLEAR_SINGLE = 0x10101010U,
1398 RADV_DCC_GFX11_CLEAR_SINGLE = 0x01010101U,
1399 RADV_DCC_GFX11_CLEAR_0000 = 0x00000000U,
1400 RADV_DCC_GFX11_CLEAR_1111_UNORM = 0x02020202U,
1401 RADV_DCC_GFX11_CLEAR_1111_FP16 = 0x04040404U,
1402 RADV_DCC_GFX11_CLEAR_1111_FP32 = 0x06060606U,
1403 RADV_DCC_GFX11_CLEAR_0001_UNORM = 0x08080808U,
1404 RADV_DCC_GFX11_CLEAR_1110_UNORM = 0x0A0A0A0AU,
1405 };
1406
1407 static uint32_t
radv_dcc_single_clear_value(const struct radv_device * device)1408 radv_dcc_single_clear_value(const struct radv_device *device)
1409 {
1410 const struct radv_physical_device *pdev = radv_device_physical(device);
1411 return pdev->info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE : RADV_DCC_GFX9_CLEAR_SINGLE;
1412 }
1413
1414 static void
gfx8_get_fast_clear_parameters(struct radv_device * device,const struct radv_image_view * iview,const VkClearColorValue * clear_value,uint32_t * reset_value,bool * can_avoid_fast_clear_elim)1415 gfx8_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview,
1416 const VkClearColorValue *clear_value, uint32_t *reset_value,
1417 bool *can_avoid_fast_clear_elim)
1418 {
1419 const struct radv_physical_device *pdev = radv_device_physical(device);
1420 bool values[4] = {0};
1421 int extra_channel;
1422 bool main_value = false;
1423 bool extra_value = false;
1424 bool has_color = false;
1425 bool has_alpha = false;
1426
1427 /* comp-to-single allows to perform DCC fast clears without requiring a FCE. */
1428 if (iview->image->support_comp_to_single) {
1429 *reset_value = RADV_DCC_GFX9_CLEAR_SINGLE;
1430 *can_avoid_fast_clear_elim = true;
1431 } else {
1432 *reset_value = RADV_DCC_GFX8_CLEAR_REG;
1433 *can_avoid_fast_clear_elim = false;
1434 }
1435
1436 const struct util_format_description *desc = vk_format_description(iview->vk.format);
1437 if (iview->vk.format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || iview->vk.format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
1438 iview->vk.format == VK_FORMAT_B5G6R5_UNORM_PACK16)
1439 extra_channel = -1;
1440 else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1441 if (ac_alpha_is_on_msb(&pdev->info, vk_format_to_pipe_format(iview->vk.format)))
1442 extra_channel = desc->nr_channels - 1;
1443 else
1444 extra_channel = 0;
1445 } else
1446 return;
1447
1448 for (int i = 0; i < 4; i++) {
1449 int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
1450 if (desc->swizzle[i] < PIPE_SWIZZLE_X || desc->swizzle[i] > PIPE_SWIZZLE_W)
1451 continue;
1452
1453 if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1454 /* Use the maximum value for clamping the clear color. */
1455 int max = u_bit_consecutive(0, desc->channel[i].size - 1);
1456
1457 values[i] = clear_value->int32[i] != 0;
1458 if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
1459 return;
1460 } else if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1461 /* Use the maximum value for clamping the clear color. */
1462 unsigned max = u_bit_consecutive(0, desc->channel[i].size);
1463
1464 values[i] = clear_value->uint32[i] != 0U;
1465 if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
1466 return;
1467 } else {
1468 values[i] = clear_value->float32[i] != 0.0F;
1469 if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
1470 return;
1471 }
1472
1473 if (index == extra_channel) {
1474 extra_value = values[i];
1475 has_alpha = true;
1476 } else {
1477 main_value = values[i];
1478 has_color = true;
1479 }
1480 }
1481
1482 /* If alpha isn't present, make it the same as color, and vice versa. */
1483 if (!has_alpha)
1484 extra_value = main_value;
1485 else if (!has_color)
1486 main_value = extra_value;
1487
1488 for (int i = 0; i < 4; ++i)
1489 if (values[i] != main_value && desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
1490 desc->swizzle[i] >= PIPE_SWIZZLE_X && desc->swizzle[i] <= PIPE_SWIZZLE_W)
1491 return;
1492
1493 /* Only DCC clear code 0000 is allowed for signed<->unsigned formats. */
1494 if ((main_value || extra_value) && iview->image->dcc_sign_reinterpret)
1495 return;
1496
1497 *can_avoid_fast_clear_elim = true;
1498
1499 if (main_value) {
1500 if (extra_value)
1501 *reset_value = RADV_DCC_GFX8_CLEAR_1111;
1502 else
1503 *reset_value = RADV_DCC_GFX8_CLEAR_1110;
1504 } else {
1505 if (extra_value)
1506 *reset_value = RADV_DCC_GFX8_CLEAR_0001;
1507 else
1508 *reset_value = RADV_DCC_CLEAR_0000;
1509 }
1510 }
1511
1512 static bool
gfx11_get_fast_clear_parameters(struct radv_device * device,const struct radv_image_view * iview,const VkClearColorValue * clear_value,uint32_t * reset_value)1513 gfx11_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview,
1514 const VkClearColorValue *clear_value, uint32_t *reset_value)
1515 {
1516 const struct util_format_description *desc = vk_format_description(iview->vk.format);
1517 unsigned start_bit = UINT_MAX;
1518 unsigned end_bit = 0;
1519
1520 /* TODO: 8bpp and 16bpp fast DCC clears don't work. */
1521 if (desc->block.bits <= 16)
1522 return false;
1523
1524 /* Find the used bit range. */
1525 for (unsigned i = 0; i < 4; i++) {
1526 unsigned swizzle = desc->swizzle[i];
1527
1528 if (swizzle >= PIPE_SWIZZLE_0)
1529 continue;
1530
1531 start_bit = MIN2(start_bit, desc->channel[swizzle].shift);
1532 end_bit = MAX2(end_bit, desc->channel[swizzle].shift + desc->channel[swizzle].size);
1533 }
1534
1535 union {
1536 uint8_t ub[16];
1537 uint16_t us[8];
1538 uint32_t ui[4];
1539 } value;
1540 memset(&value, 0, sizeof(value));
1541 util_format_pack_rgba(vk_format_to_pipe_format(iview->vk.format), &value, clear_value, 1);
1542
1543 /* Check the cases where all components or bits are either all 0 or all 1. */
1544 bool all_bits_are_0 = true;
1545 bool all_bits_are_1 = true;
1546 bool all_words_are_fp16_1 = false;
1547 bool all_words_are_fp32_1 = false;
1548
1549 for (unsigned i = start_bit; i < end_bit; i++) {
1550 bool bit = value.ub[i / 8] & BITFIELD_BIT(i % 8);
1551
1552 all_bits_are_0 &= !bit;
1553 all_bits_are_1 &= bit;
1554 }
1555
1556 if (start_bit % 16 == 0 && end_bit % 16 == 0) {
1557 all_words_are_fp16_1 = true;
1558 for (unsigned i = start_bit / 16; i < end_bit / 16; i++)
1559 all_words_are_fp16_1 &= value.us[i] == 0x3c00;
1560 }
1561
1562 if (start_bit % 32 == 0 && end_bit % 32 == 0) {
1563 all_words_are_fp32_1 = true;
1564 for (unsigned i = start_bit / 32; i < end_bit / 32; i++)
1565 all_words_are_fp32_1 &= value.ui[i] == 0x3f800000;
1566 }
1567
1568 if (all_bits_are_0 || all_bits_are_1 || all_words_are_fp16_1 || all_words_are_fp32_1) {
1569 if (all_bits_are_0)
1570 *reset_value = RADV_DCC_CLEAR_0000;
1571 else if (all_bits_are_1)
1572 *reset_value = RADV_DCC_GFX11_CLEAR_1111_UNORM;
1573 else if (all_words_are_fp16_1)
1574 *reset_value = RADV_DCC_GFX11_CLEAR_1111_FP16;
1575 else if (all_words_are_fp32_1)
1576 *reset_value = RADV_DCC_GFX11_CLEAR_1111_FP32;
1577 return true;
1578 }
1579
1580 if (desc->nr_channels == 2 && desc->channel[0].size == 8) {
1581 if (value.ub[0] == 0x00 && value.ub[1] == 0xff) {
1582 *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1583 return true;
1584 } else if (value.ub[0] == 0xff && value.ub[1] == 0x00) {
1585 *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1586 return true;
1587 }
1588 } else if (desc->nr_channels == 4 && desc->channel[0].size == 8) {
1589 if (value.ub[0] == 0x00 && value.ub[1] == 0x00 && value.ub[2] == 0x00 && value.ub[3] == 0xff) {
1590 *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1591 return true;
1592 } else if (value.ub[0] == 0xff && value.ub[1] == 0xff && value.ub[2] == 0xff && value.ub[3] == 0x00) {
1593 *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1594 return true;
1595 }
1596 } else if (desc->nr_channels == 4 && desc->channel[0].size == 16) {
1597 if (value.us[0] == 0x0000 && value.us[1] == 0x0000 && value.us[2] == 0x0000 && value.us[3] == 0xffff) {
1598 *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1599 return true;
1600 } else if (value.us[0] == 0xffff && value.us[1] == 0xffff && value.us[2] == 0xffff && value.us[3] == 0x0000) {
1601 *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1602 return true;
1603 }
1604 }
1605
1606 if (iview->image->support_comp_to_single) {
1607 *reset_value = RADV_DCC_GFX11_CLEAR_SINGLE;
1608 return true;
1609 }
1610
1611 return false;
1612 }
1613
1614 static bool
radv_can_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,const VkClearRect * clear_rect,VkClearColorValue clear_value,uint32_t view_mask)1615 radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
1616 VkImageLayout image_layout, const VkClearRect *clear_rect, VkClearColorValue clear_value,
1617 uint32_t view_mask)
1618 {
1619 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1620 const struct radv_physical_device *pdev = radv_device_physical(device);
1621 uint32_t clear_color[2];
1622
1623 if (!iview || !iview->support_fast_clear)
1624 return false;
1625
1626 if (!radv_layout_can_fast_clear(device, iview->image, iview->vk.base_mip_level, image_layout,
1627 radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf)))
1628 return false;
1629
1630 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
1631 clear_rect->rect.extent.width != iview->image->vk.extent.width ||
1632 clear_rect->rect.extent.height != iview->image->vk.extent.height)
1633 return false;
1634
1635 if (view_mask && (iview->image->vk.array_layers >= 32 || (1u << iview->image->vk.array_layers) - 1u != view_mask))
1636 return false;
1637 if (!view_mask && clear_rect->baseArrayLayer != 0)
1638 return false;
1639 if (!view_mask && clear_rect->layerCount != iview->image->vk.array_layers)
1640 return false;
1641
1642 /* DCC */
1643
1644 /* Images that support comp-to-single clears don't have clear values. */
1645 if (!iview->image->support_comp_to_single) {
1646 if (!radv_format_pack_clear_color(iview->vk.format, clear_color, &clear_value))
1647 return false;
1648
1649 if (!radv_image_has_clear_value(iview->image) && (clear_color[0] != 0 || clear_color[1] != 0))
1650 return false;
1651 }
1652
1653 if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
1654 bool can_avoid_fast_clear_elim;
1655 uint32_t reset_value;
1656
1657 if (pdev->info.gfx_level >= GFX11) {
1658 if (!gfx11_get_fast_clear_parameters(device, iview, &clear_value, &reset_value))
1659 return false;
1660 } else {
1661 gfx8_get_fast_clear_parameters(device, iview, &clear_value, &reset_value, &can_avoid_fast_clear_elim);
1662 }
1663
1664 if (iview->image->vk.mip_levels > 1) {
1665 if (pdev->info.gfx_level >= GFX9) {
1666 uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
1667 if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
1668 /* Do not fast clears if one level can't be fast cleard. */
1669 return false;
1670 }
1671 } else {
1672 for (uint32_t l = 0; l < iview->vk.level_count; l++) {
1673 uint32_t level = iview->vk.base_mip_level + l;
1674 struct legacy_surf_dcc_level *dcc_level =
1675 &iview->image->planes[0].surface.u.legacy.color.dcc_level[level];
1676
1677 /* Do not fast clears if one level can't be
1678 * fast cleared.
1679 */
1680 if (!dcc_level->dcc_fast_clear_size)
1681 return false;
1682 }
1683 }
1684 }
1685 }
1686
1687 return true;
1688 }
1689
1690 static void
radv_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)1691 radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
1692 const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush,
1693 enum radv_cmd_flush_bits *post_flush)
1694 {
1695 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1696 const struct radv_physical_device *pdev = radv_device_physical(device);
1697 VkClearColorValue clear_value = clear_att->clearValue.color;
1698 uint32_t clear_color[4], flush_bits = 0;
1699 uint32_t cmask_clear_value;
1700 VkImageSubresourceRange range = {
1701 .aspectMask = iview->vk.aspects,
1702 .baseMipLevel = iview->vk.base_mip_level,
1703 .levelCount = iview->vk.level_count,
1704 .baseArrayLayer = iview->vk.base_array_layer,
1705 .layerCount = iview->vk.layer_count,
1706 };
1707
1708 if (pre_flush) {
1709 enum radv_cmd_flush_bits bits = radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
1710 VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, iview->image);
1711 cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
1712 *pre_flush |= cmd_buffer->state.flush_bits;
1713 }
1714
1715 /* DCC */
1716 radv_format_pack_clear_color(iview->vk.format, clear_color, &clear_value);
1717
1718 cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
1719
1720 /* clear cmask buffer */
1721 bool need_decompress_pass = false;
1722 if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
1723 uint32_t reset_value;
1724 bool can_avoid_fast_clear_elim = true;
1725
1726 if (pdev->info.gfx_level >= GFX11) {
1727 ASSERTED bool result = gfx11_get_fast_clear_parameters(device, iview, &clear_value, &reset_value);
1728 assert(result);
1729 } else {
1730 gfx8_get_fast_clear_parameters(device, iview, &clear_value, &reset_value, &can_avoid_fast_clear_elim);
1731 }
1732
1733 if (radv_image_has_cmask(iview->image)) {
1734 flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
1735 }
1736
1737 if (!can_avoid_fast_clear_elim)
1738 need_decompress_pass = true;
1739
1740 flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range, reset_value);
1741
1742 if (reset_value == radv_dcc_single_clear_value(device)) {
1743 /* Write the clear color to the first byte of each 256B block when the image supports DCC
1744 * fast clears with comp-to-single.
1745 */
1746 if (vk_format_get_blocksize(iview->image->vk.format) == 16) {
1747 flush_bits |= radv_clear_dcc_comp_to_single(cmd_buffer, iview->image, &range, clear_value.uint32);
1748 } else {
1749 clear_color[2] = clear_color[3] = 0;
1750 flush_bits |= radv_clear_dcc_comp_to_single(cmd_buffer, iview->image, &range, clear_color);
1751 }
1752 }
1753 } else {
1754 flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
1755
1756 /* Fast clearing with CMASK should always be eliminated. */
1757 need_decompress_pass = true;
1758 }
1759
1760 if (post_flush) {
1761 *post_flush |= flush_bits;
1762 }
1763
1764 /* Update the FCE predicate to perform a fast-clear eliminate. */
1765 radv_update_fce_metadata(cmd_buffer, iview->image, &range, need_decompress_pass);
1766
1767 radv_update_color_clear_metadata(cmd_buffer, iview, clear_att->colorAttachment, clear_color);
1768 }
1769
1770 /**
1771 * The parameters mean that same as those in vkCmdClearAttachments.
1772 */
1773 static void
emit_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush,uint32_t view_mask)1774 emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, const VkClearRect *clear_rect,
1775 enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush, uint32_t view_mask)
1776 {
1777 const struct radv_rendering_state *render = &cmd_buffer->state.render;
1778 VkImageAspectFlags aspects = clear_att->aspectMask;
1779
1780 if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1781 assert(clear_att->colorAttachment < render->color_att_count);
1782 const struct radv_attachment *color_att = &render->color_att[clear_att->colorAttachment];
1783
1784 if (color_att->format == VK_FORMAT_UNDEFINED)
1785 return;
1786
1787 VkClearColorValue clear_value = clear_att->clearValue.color;
1788
1789 if (radv_can_fast_clear_color(cmd_buffer, color_att->iview, color_att->layout, clear_rect, clear_value,
1790 view_mask)) {
1791 radv_fast_clear_color(cmd_buffer, color_att->iview, clear_att, pre_flush, post_flush);
1792 } else {
1793 emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
1794 }
1795 } else {
1796 const struct radv_attachment *ds_att = &render->ds_att;
1797
1798 if (ds_att->format == VK_FORMAT_UNDEFINED)
1799 return;
1800
1801 VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
1802
1803 assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
1804 bool can_fast_clear_depth = false;
1805 bool can_fast_clear_stencil = false;
1806 if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) &&
1807 ds_att->layout != ds_att->stencil_layout) {
1808 can_fast_clear_depth = radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, ds_att->layout, aspects,
1809 clear_rect, clear_value, view_mask);
1810 can_fast_clear_stencil = radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, ds_att->stencil_layout, aspects,
1811 clear_rect, clear_value, view_mask);
1812 } else {
1813 VkImageLayout layout = aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? ds_att->layout : ds_att->stencil_layout;
1814 can_fast_clear_depth =
1815 radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, layout, aspects, clear_rect, clear_value, view_mask);
1816 can_fast_clear_stencil = can_fast_clear_depth;
1817 }
1818
1819 if (can_fast_clear_depth && can_fast_clear_stencil) {
1820 radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att->clearValue.depthStencil, clear_att->aspectMask,
1821 pre_flush, post_flush);
1822 } else if (!can_fast_clear_depth && !can_fast_clear_stencil) {
1823 emit_depthstencil_clear(cmd_buffer, clear_att->clearValue.depthStencil, clear_att->aspectMask, clear_rect,
1824 view_mask, false);
1825 } else {
1826 if (can_fast_clear_depth) {
1827 radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att->clearValue.depthStencil,
1828 VK_IMAGE_ASPECT_DEPTH_BIT, pre_flush, post_flush);
1829 } else {
1830 emit_depthstencil_clear(cmd_buffer, clear_att->clearValue.depthStencil, VK_IMAGE_ASPECT_DEPTH_BIT,
1831 clear_rect, view_mask, can_fast_clear_depth);
1832 }
1833
1834 if (can_fast_clear_stencil) {
1835 radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att->clearValue.depthStencil,
1836 VK_IMAGE_ASPECT_STENCIL_BIT, pre_flush, post_flush);
1837 } else {
1838 emit_depthstencil_clear(cmd_buffer, clear_att->clearValue.depthStencil, VK_IMAGE_ASPECT_STENCIL_BIT,
1839 clear_rect, view_mask, can_fast_clear_stencil);
1840 }
1841 }
1842 }
1843 }
1844
1845 static bool
radv_rendering_needs_clear(const VkRenderingInfo * pRenderingInfo)1846 radv_rendering_needs_clear(const VkRenderingInfo *pRenderingInfo)
1847 {
1848 for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
1849 if (pRenderingInfo->pColorAttachments[i].imageView != VK_NULL_HANDLE &&
1850 pRenderingInfo->pColorAttachments[i].loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
1851 return true;
1852 }
1853
1854 if (pRenderingInfo->pDepthAttachment != NULL && pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
1855 pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
1856 return true;
1857
1858 if (pRenderingInfo->pStencilAttachment != NULL && pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE &&
1859 pRenderingInfo->pStencilAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
1860 return true;
1861
1862 return false;
1863 }
1864
1865 static void
radv_subpass_clear_attachment(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)1866 radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
1867 enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
1868 {
1869 const struct radv_rendering_state *render = &cmd_buffer->state.render;
1870
1871 VkClearRect clear_rect = {
1872 .rect = render->area,
1873 .baseArrayLayer = 0,
1874 .layerCount = render->layer_count,
1875 };
1876
1877 radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
1878
1879 emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush, render->view_mask);
1880
1881 radv_describe_end_render_pass_clear(cmd_buffer);
1882 }
1883
1884 /**
1885 * Emit any pending attachment clears for the current subpass.
1886 *
1887 * @see radv_attachment_state::pending_clear_aspects
1888 */
1889 void
radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)1890 radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, const VkRenderingInfo *pRenderingInfo)
1891 {
1892 const struct radv_rendering_state *render = &cmd_buffer->state.render;
1893 struct radv_meta_saved_state saved_state;
1894 enum radv_cmd_flush_bits pre_flush = 0;
1895 enum radv_cmd_flush_bits post_flush = 0;
1896
1897 if (!radv_rendering_needs_clear(pRenderingInfo))
1898 return;
1899
1900 /* Subpass clear should not be affected by conditional rendering. */
1901 radv_meta_save(&saved_state, cmd_buffer,
1902 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
1903
1904 assert(render->color_att_count == pRenderingInfo->colorAttachmentCount);
1905 for (uint32_t i = 0; i < render->color_att_count; i++) {
1906 if (render->color_att[i].iview == NULL ||
1907 pRenderingInfo->pColorAttachments[i].loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR)
1908 continue;
1909
1910 VkClearAttachment clear_att = {
1911 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1912 .colorAttachment = i,
1913 .clearValue = pRenderingInfo->pColorAttachments[i].clearValue,
1914 };
1915
1916 radv_subpass_clear_attachment(cmd_buffer, &clear_att, &pre_flush, &post_flush);
1917 }
1918
1919 if (render->ds_att.iview != NULL) {
1920 VkClearAttachment clear_att = {.aspectMask = 0};
1921
1922 if (pRenderingInfo->pDepthAttachment != NULL && pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
1923 pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1924 clear_att.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
1925 clear_att.clearValue.depthStencil.depth = pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth;
1926 }
1927
1928 if (pRenderingInfo->pStencilAttachment != NULL &&
1929 pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE &&
1930 pRenderingInfo->pStencilAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1931 clear_att.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
1932 clear_att.clearValue.depthStencil.stencil =
1933 pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil;
1934 }
1935
1936 if (clear_att.aspectMask != 0) {
1937 radv_subpass_clear_attachment(cmd_buffer, &clear_att, &pre_flush, &post_flush);
1938 }
1939 }
1940
1941 radv_meta_restore(&saved_state, cmd_buffer);
1942 cmd_buffer->state.flush_bits |= post_flush;
1943 }
1944
1945 static void
radv_clear_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkImageSubresourceRange * range,VkFormat format,int level,unsigned layer_count,const VkClearValue * clear_val)1946 radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout,
1947 const VkImageSubresourceRange *range, VkFormat format, int level, unsigned layer_count,
1948 const VkClearValue *clear_val)
1949 {
1950 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1951 struct radv_image_view iview;
1952 uint32_t width = u_minify(image->vk.extent.width, range->baseMipLevel + level);
1953 uint32_t height = u_minify(image->vk.extent.height, range->baseMipLevel + level);
1954
1955 radv_image_view_init(&iview, device,
1956 &(VkImageViewCreateInfo){
1957 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1958 .image = radv_image_to_handle(image),
1959 .viewType = radv_meta_get_view_type(image),
1960 .format = format,
1961 .subresourceRange = {.aspectMask = range->aspectMask,
1962 .baseMipLevel = range->baseMipLevel + level,
1963 .levelCount = 1,
1964 .baseArrayLayer = range->baseArrayLayer,
1965 .layerCount = layer_count},
1966 },
1967 0, NULL);
1968
1969 VkClearAttachment clear_att = {
1970 .aspectMask = range->aspectMask,
1971 .colorAttachment = 0,
1972 .clearValue = *clear_val,
1973 };
1974
1975 VkClearRect clear_rect = {
1976 .rect =
1977 {
1978 .offset = {0, 0},
1979 .extent = {width, height},
1980 },
1981 .baseArrayLayer = 0,
1982 .layerCount = layer_count,
1983 };
1984
1985 VkRenderingAttachmentInfo att = {
1986 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1987 .imageView = radv_image_view_to_handle(&iview),
1988 .imageLayout = image_layout,
1989 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
1990 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
1991 };
1992
1993 VkRenderingInfo rendering_info = {
1994 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
1995 .flags = VK_RENDERING_INPUT_ATTACHMENT_NO_CONCURRENT_WRITES_BIT_MESA,
1996 .renderArea =
1997 {
1998 .offset = {0, 0},
1999 .extent = {width, height},
2000 },
2001 .layerCount = layer_count,
2002 };
2003
2004 if (image->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
2005 rendering_info.colorAttachmentCount = 1;
2006 rendering_info.pColorAttachments = &att;
2007 }
2008 if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
2009 rendering_info.pDepthAttachment = &att;
2010 if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
2011 rendering_info.pStencilAttachment = &att;
2012
2013 radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
2014
2015 emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0);
2016
2017 radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
2018
2019 radv_image_view_finish(&iview);
2020 }
2021
2022 /**
2023 * Return TRUE if a fast color or depth clear has been performed.
2024 */
2025 static bool
radv_fast_clear_range(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkFormat format,VkImageLayout image_layout,const VkImageSubresourceRange * range,const VkClearValue * clear_val)2026 radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkFormat format,
2027 VkImageLayout image_layout, const VkImageSubresourceRange *range, const VkClearValue *clear_val)
2028 {
2029 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2030 struct radv_image_view iview;
2031 bool fast_cleared = false;
2032
2033 radv_image_view_init(&iview, device,
2034 &(VkImageViewCreateInfo){
2035 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2036 .image = radv_image_to_handle(image),
2037 .viewType = radv_meta_get_view_type(image),
2038 .format = image->vk.format,
2039 .subresourceRange =
2040 {
2041 .aspectMask = range->aspectMask,
2042 .baseMipLevel = range->baseMipLevel,
2043 .levelCount = vk_image_subresource_level_count(&image->vk, range),
2044 .baseArrayLayer = range->baseArrayLayer,
2045 .layerCount = vk_image_subresource_layer_count(&image->vk, range),
2046 },
2047 },
2048 0, NULL);
2049
2050 VkClearRect clear_rect = {
2051 .rect =
2052 {
2053 .offset = {0, 0},
2054 .extent =
2055 {
2056 u_minify(image->vk.extent.width, range->baseMipLevel),
2057 u_minify(image->vk.extent.height, range->baseMipLevel),
2058 },
2059 },
2060 .baseArrayLayer = range->baseArrayLayer,
2061 .layerCount = vk_image_subresource_layer_count(&image->vk, range),
2062 };
2063
2064 VkClearAttachment clear_att = {
2065 .aspectMask = range->aspectMask,
2066 .colorAttachment = 0,
2067 .clearValue = *clear_val,
2068 };
2069
2070 if (vk_format_is_color(format)) {
2071 if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, &clear_rect, clear_att.clearValue.color, 0)) {
2072 radv_fast_clear_color(cmd_buffer, &iview, &clear_att, NULL, NULL);
2073 fast_cleared = true;
2074 }
2075 } else {
2076 if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, range->aspectMask, &clear_rect,
2077 clear_att.clearValue.depthStencil, 0)) {
2078 radv_fast_clear_depth(cmd_buffer, &iview, clear_att.clearValue.depthStencil, clear_att.aspectMask, NULL, NULL);
2079 fast_cleared = true;
2080 }
2081 }
2082
2083 radv_image_view_finish(&iview);
2084 return fast_cleared;
2085 }
2086
2087 static void
radv_cmd_clear_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkClearValue * clear_value,uint32_t range_count,const VkImageSubresourceRange * ranges,bool cs)2088 radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout,
2089 const VkClearValue *clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges,
2090 bool cs)
2091 {
2092 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2093 const struct radv_physical_device *pdev = radv_device_physical(device);
2094 VkFormat format = image->vk.format;
2095 VkClearValue internal_clear_value;
2096
2097 if (ranges->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
2098 internal_clear_value.color = clear_value->color;
2099 else
2100 internal_clear_value.depthStencil = clear_value->depthStencil;
2101
2102 bool disable_compression = false;
2103
2104 if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
2105 if (cs ? !radv_is_storage_image_format_supported(pdev, format)
2106 : !radv_is_colorbuffer_format_supported(pdev, format)) {
2107 format = VK_FORMAT_R32_UINT;
2108 internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32);
2109
2110 uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf);
2111
2112 for (uint32_t r = 0; r < range_count; r++) {
2113 const VkImageSubresourceRange *range = &ranges[r];
2114
2115 /* Don't use compressed image stores because they will use an incompatible format. */
2116 if (radv_layout_dcc_compressed(device, image, range->baseMipLevel, image_layout, queue_mask)) {
2117 disable_compression = cs;
2118 break;
2119 }
2120 }
2121 }
2122 }
2123
2124 if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
2125 uint8_t r, g;
2126 format = VK_FORMAT_R8_UINT;
2127 r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
2128 g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
2129 internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
2130 }
2131
2132 for (uint32_t r = 0; r < range_count; r++) {
2133 const VkImageSubresourceRange *range = &ranges[r];
2134
2135 /* Try to perform a fast clear first, otherwise fallback to
2136 * the legacy path.
2137 */
2138 if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, range, &internal_clear_value)) {
2139 continue;
2140 }
2141
2142 for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, range); ++l) {
2143 const uint32_t layer_count = image->vk.image_type == VK_IMAGE_TYPE_3D
2144 ? u_minify(image->vk.extent.depth, range->baseMipLevel + l)
2145 : vk_image_subresource_layer_count(&image->vk, range);
2146 if (cs) {
2147 for (uint32_t s = 0; s < layer_count; ++s) {
2148 struct radv_meta_blit2d_surf surf;
2149 surf.format = format;
2150 surf.image = image;
2151 surf.level = range->baseMipLevel + l;
2152 surf.layer = range->baseArrayLayer + s;
2153 surf.aspect_mask = range->aspectMask;
2154 surf.disable_compression = disable_compression;
2155 radv_meta_clear_image_cs(cmd_buffer, &surf, &internal_clear_value.color);
2156 }
2157 } else {
2158 assert(!disable_compression);
2159 radv_clear_image_layer(cmd_buffer, image, image_layout, range, format, l, layer_count,
2160 &internal_clear_value);
2161 }
2162 }
2163 }
2164
2165 if (disable_compression) {
2166 enum radv_cmd_flush_bits flush_bits = 0;
2167 for (unsigned i = 0; i < range_count; i++) {
2168 if (radv_dcc_enabled(image, ranges[i].baseMipLevel))
2169 flush_bits |= radv_clear_dcc(cmd_buffer, image, &ranges[i], 0xffffffffu);
2170 }
2171 cmd_buffer->state.flush_bits |= flush_bits;
2172 }
2173 }
2174
2175 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2176 radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
2177 const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)
2178 {
2179 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2180 VK_FROM_HANDLE(radv_image, image, image_h);
2181 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2182 struct radv_meta_saved_state saved_state;
2183 bool cs;
2184
2185 cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(device, image);
2186
2187 /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering.
2188 */
2189 enum radv_meta_save_flags save_flags = RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING;
2190 if (cs)
2191 save_flags |= RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS;
2192 else
2193 save_flags |= RADV_META_SAVE_GRAPHICS_PIPELINE;
2194
2195 radv_meta_save(&saved_state, cmd_buffer, save_flags);
2196
2197 radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount, pRanges, cs);
2198
2199 radv_meta_restore(&saved_state, cmd_buffer);
2200 }
2201
2202 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2203 radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
2204 const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
2205 const VkImageSubresourceRange *pRanges)
2206 {
2207 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2208 VK_FROM_HANDLE(radv_image, image, image_h);
2209 struct radv_meta_saved_state saved_state;
2210
2211 /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering. */
2212 radv_meta_save(&saved_state, cmd_buffer,
2213 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
2214
2215 radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil, rangeCount, pRanges,
2216 false);
2217
2218 radv_meta_restore(&saved_state, cmd_buffer);
2219 }
2220
2221 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2222 radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments,
2223 uint32_t rectCount, const VkClearRect *pRects)
2224 {
2225 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2226 struct radv_meta_saved_state saved_state;
2227 enum radv_cmd_flush_bits pre_flush = 0;
2228 enum radv_cmd_flush_bits post_flush = 0;
2229
2230 if (!cmd_buffer->state.render.active)
2231 return;
2232
2233 radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
2234
2235 /* FINISHME: We can do better than this dumb loop. It thrashes too much
2236 * state.
2237 */
2238 for (uint32_t a = 0; a < attachmentCount; ++a) {
2239 for (uint32_t r = 0; r < rectCount; ++r) {
2240 emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
2241 cmd_buffer->state.render.view_mask);
2242 }
2243 }
2244
2245 radv_meta_restore(&saved_state, cmd_buffer);
2246 cmd_buffer->state.flush_bits |= post_flush;
2247 }
2248