xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/meta/radv_meta_bufimage.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 #include "nir/nir_builder.h"
8 #include "radv_entrypoints.h"
9 #include "radv_meta.h"
10 #include "vk_common_entrypoints.h"
11 #include "vk_shader_module.h"
12 
13 /*
14  * GFX queue: Compute shader implementation of image->buffer copy
15  * Compute queue: implementation also of buffer->image, image->image, and image clear.
16  */
17 
18 static nir_shader *
build_nir_itob_compute_shader(struct radv_device * dev,bool is_3d)19 build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
20 {
21    enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
22    const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
23    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
24    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
25    b.shader->info.workgroup_size[0] = 8;
26    b.shader->info.workgroup_size[1] = 8;
27    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
28    input_img->data.descriptor_set = 0;
29    input_img->data.binding = 0;
30 
31    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
32    output_img->data.descriptor_set = 0;
33    output_img->data.binding = 1;
34 
35    nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
36 
37    nir_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
38    nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
39 
40    nir_def *img_coord = nir_iadd(&b, global_id, offset);
41    nir_def *outval =
42       nir_txf_deref(&b, nir_build_deref_var(&b, input_img), nir_trim_vector(&b, img_coord, 2 + is_3d), NULL);
43 
44    nir_def *pos_x = nir_channel(&b, global_id, 0);
45    nir_def *pos_y = nir_channel(&b, global_id, 1);
46 
47    nir_def *tmp = nir_imul(&b, pos_y, stride);
48    tmp = nir_iadd(&b, tmp, pos_x);
49 
50    nir_def *coord = nir_replicate(&b, tmp, 4);
51 
52    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32), outval,
53                          nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
54 
55    return b.shader;
56 }
57 
58 static VkResult
create_itob_layout(struct radv_device * device)59 create_itob_layout(struct radv_device *device)
60 {
61    VkResult result = VK_SUCCESS;
62 
63    if (!device->meta_state.itob.img_ds_layout) {
64       const VkDescriptorSetLayoutBinding bindings[] = {
65          {
66             .binding = 0,
67             .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
68             .descriptorCount = 1,
69             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
70          },
71          {
72             .binding = 1,
73             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
74             .descriptorCount = 1,
75             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
76          },
77       };
78 
79       result = radv_meta_create_descriptor_set_layout(device, 2, bindings, &device->meta_state.itob.img_ds_layout);
80       if (result != VK_SUCCESS)
81          return result;
82    }
83 
84    if (!device->meta_state.itob.img_p_layout) {
85       const VkPushConstantRange pc_range = {
86          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
87          .size = 16,
88       };
89 
90       result = radv_meta_create_pipeline_layout(device, &device->meta_state.itob.img_ds_layout, 1, &pc_range,
91                                                 &device->meta_state.itob.img_p_layout);
92    }
93 
94    return result;
95 }
96 
97 static VkResult
create_itob_pipeline(struct radv_device * device,bool is_3d,VkPipeline * pipeline)98 create_itob_pipeline(struct radv_device *device, bool is_3d, VkPipeline *pipeline)
99 {
100    VkResult result;
101 
102    result = create_itob_layout(device);
103    if (result != VK_SUCCESS)
104       return result;
105 
106    nir_shader *cs = build_nir_itob_compute_shader(device, is_3d);
107 
108    result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.itob.img_p_layout, pipeline);
109 
110    ralloc_free(cs);
111    return result;
112 }
113 
114 static VkResult
get_itob_pipeline(struct radv_device * device,const struct radv_image * image,VkPipeline * pipeline_out)115 get_itob_pipeline(struct radv_device *device, const struct radv_image *image, VkPipeline *pipeline_out)
116 {
117    struct radv_meta_state *state = &device->meta_state;
118    const bool is_3d = image->vk.image_type == VK_IMAGE_TYPE_3D;
119    VkResult result = VK_SUCCESS;
120    VkPipeline *pipeline;
121 
122    mtx_lock(&state->mtx);
123 
124    pipeline = is_3d ? &state->itob.pipeline_3d : &state->itob.pipeline;
125    if (!*pipeline) {
126       result = create_itob_pipeline(device, is_3d, pipeline);
127       if (result != VK_SUCCESS)
128          goto fail;
129    }
130 
131    *pipeline_out = *pipeline;
132 
133 fail:
134    mtx_unlock(&state->mtx);
135    return result;
136 }
137 
138 /* Image to buffer - don't write use image accessors */
139 static VkResult
radv_device_init_meta_itob_state(struct radv_device * device)140 radv_device_init_meta_itob_state(struct radv_device *device)
141 {
142    VkResult result;
143 
144    result = create_itob_pipeline(device, false, &device->meta_state.itob.pipeline);
145    if (result != VK_SUCCESS)
146       return result;
147 
148    return create_itob_pipeline(device, true, &device->meta_state.itob.pipeline_3d);
149 }
150 
151 static void
radv_device_finish_meta_itob_state(struct radv_device * device)152 radv_device_finish_meta_itob_state(struct radv_device *device)
153 {
154    struct radv_meta_state *state = &device->meta_state;
155 
156    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout, &state->alloc);
157    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itob.img_ds_layout,
158                                                         &state->alloc);
159    radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline, &state->alloc);
160    radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline_3d, &state->alloc);
161 }
162 
163 static nir_shader *
build_nir_btoi_compute_shader(struct radv_device * dev,bool is_3d)164 build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
165 {
166    enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
167    const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
168    const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
169    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
170    b.shader->info.workgroup_size[0] = 8;
171    b.shader->info.workgroup_size[1] = 8;
172    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
173    input_img->data.descriptor_set = 0;
174    input_img->data.binding = 0;
175 
176    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
177    output_img->data.descriptor_set = 0;
178    output_img->data.binding = 1;
179 
180    nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
181 
182    nir_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
183    nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
184 
185    nir_def *pos_x = nir_channel(&b, global_id, 0);
186    nir_def *pos_y = nir_channel(&b, global_id, 1);
187 
188    nir_def *buf_coord = nir_imul(&b, pos_y, stride);
189    buf_coord = nir_iadd(&b, buf_coord, pos_x);
190 
191    nir_def *coord = nir_iadd(&b, global_id, offset);
192    nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
193 
194    nir_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
195                                  is_3d ? nir_channel(&b, coord, 2) : nir_undef(&b, 1, 32), nir_undef(&b, 1, 32));
196 
197    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_undef(&b, 1, 32), outval,
198                          nir_imm_int(&b, 0), .image_dim = dim);
199 
200    return b.shader;
201 }
202 
203 static VkResult
create_btoi_layout(struct radv_device * device)204 create_btoi_layout(struct radv_device *device)
205 {
206    VkResult result = VK_SUCCESS;
207 
208    if (!device->meta_state.btoi.img_ds_layout) {
209       const VkDescriptorSetLayoutBinding bindings[] = {
210          {
211             .binding = 0,
212             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
213             .descriptorCount = 1,
214             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
215          },
216          {
217             .binding = 1,
218             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
219             .descriptorCount = 1,
220             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
221          },
222       };
223 
224       result = radv_meta_create_descriptor_set_layout(device, 2, bindings, &device->meta_state.btoi.img_ds_layout);
225       if (result != VK_SUCCESS)
226          return result;
227    }
228 
229    if (!device->meta_state.btoi.img_p_layout) {
230       const VkPushConstantRange pc_range = {
231          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
232          .size = 16,
233       };
234 
235       result = radv_meta_create_pipeline_layout(device, &device->meta_state.btoi.img_ds_layout, 1, &pc_range,
236                                                 &device->meta_state.btoi.img_p_layout);
237    }
238 
239    return result;
240 }
241 
242 static VkResult
create_btoi_pipeline(struct radv_device * device,bool is_3d,VkPipeline * pipeline)243 create_btoi_pipeline(struct radv_device *device, bool is_3d, VkPipeline *pipeline)
244 {
245    VkResult result;
246 
247    result = create_btoi_layout(device);
248    if (result != VK_SUCCESS)
249       return result;
250 
251    nir_shader *cs = build_nir_btoi_compute_shader(device, is_3d);
252 
253    result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.btoi.img_p_layout, pipeline);
254 
255    ralloc_free(cs);
256    return result;
257 }
258 
259 static VkResult
get_btoi_pipeline(struct radv_device * device,const struct radv_image * image,VkPipeline * pipeline_out)260 get_btoi_pipeline(struct radv_device *device, const struct radv_image *image, VkPipeline *pipeline_out)
261 {
262    struct radv_meta_state *state = &device->meta_state;
263    const bool is_3d = image->vk.image_type == VK_IMAGE_TYPE_3D;
264    VkResult result = VK_SUCCESS;
265    VkPipeline *pipeline;
266 
267    mtx_lock(&state->mtx);
268 
269    pipeline = is_3d ? &state->btoi.pipeline_3d : &state->btoi.pipeline;
270    if (!*pipeline) {
271       result = create_btoi_pipeline(device, is_3d, pipeline);
272       if (result != VK_SUCCESS)
273          goto fail;
274    }
275 
276    *pipeline_out = *pipeline;
277 
278 fail:
279    mtx_unlock(&state->mtx);
280    return result;
281 }
282 
283 /* Buffer to image - don't write use image accessors */
284 static VkResult
radv_device_init_meta_btoi_state(struct radv_device * device)285 radv_device_init_meta_btoi_state(struct radv_device *device)
286 {
287    VkResult result;
288 
289    result = create_btoi_pipeline(device, false, &device->meta_state.btoi.pipeline);
290    if (result != VK_SUCCESS)
291       return result;
292 
293    return create_btoi_pipeline(device, true, &device->meta_state.btoi.pipeline_3d);
294 }
295 
296 static void
radv_device_finish_meta_btoi_state(struct radv_device * device)297 radv_device_finish_meta_btoi_state(struct radv_device *device)
298 {
299    struct radv_meta_state *state = &device->meta_state;
300 
301    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout, &state->alloc);
302    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->btoi.img_ds_layout,
303                                                         &state->alloc);
304    radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline, &state->alloc);
305    radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline_3d, &state->alloc);
306 }
307 
308 /* Buffer to image - special path for R32G32B32 */
309 static nir_shader *
build_nir_btoi_r32g32b32_compute_shader(struct radv_device * dev)310 build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
311 {
312    const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
313    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
314    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_btoi_r32g32b32_cs");
315    b.shader->info.workgroup_size[0] = 8;
316    b.shader->info.workgroup_size[1] = 8;
317    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
318    input_img->data.descriptor_set = 0;
319    input_img->data.binding = 0;
320 
321    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
322    output_img->data.descriptor_set = 0;
323    output_img->data.binding = 1;
324 
325    nir_def *global_id = get_global_ids(&b, 2);
326 
327    nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
328    nir_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
329    nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
330 
331    nir_def *pos_x = nir_channel(&b, global_id, 0);
332    nir_def *pos_y = nir_channel(&b, global_id, 1);
333 
334    nir_def *buf_coord = nir_imul(&b, pos_y, stride);
335    buf_coord = nir_iadd(&b, buf_coord, pos_x);
336 
337    nir_def *img_coord = nir_iadd(&b, global_id, offset);
338 
339    nir_def *global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
340                                   nir_imul_imm(&b, nir_channel(&b, img_coord, 0), 3));
341 
342    nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
343 
344    for (int chan = 0; chan < 3; chan++) {
345       nir_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
346 
347       nir_def *coord = nir_replicate(&b, local_pos, 4);
348 
349       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32),
350                             nir_channel(&b, outval, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
351    }
352 
353    return b.shader;
354 }
355 
356 static VkResult
create_btoi_r32g32b32_layout(struct radv_device * device)357 create_btoi_r32g32b32_layout(struct radv_device *device)
358 {
359    VkResult result = VK_SUCCESS;
360 
361    if (!device->meta_state.btoi_r32g32b32.img_ds_layout) {
362       const VkDescriptorSetLayoutBinding bindings[] = {
363          {
364             .binding = 0,
365             .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
366             .descriptorCount = 1,
367             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
368          },
369          {
370             .binding = 1,
371             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
372             .descriptorCount = 1,
373             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
374          },
375       };
376 
377       result =
378          radv_meta_create_descriptor_set_layout(device, 2, bindings, &device->meta_state.btoi_r32g32b32.img_ds_layout);
379       if (result != VK_SUCCESS)
380          return result;
381    }
382 
383    if (!device->meta_state.btoi_r32g32b32.img_p_layout) {
384       const VkPushConstantRange pc_range = {
385          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
386          .size = 16,
387       };
388 
389       result = radv_meta_create_pipeline_layout(device, &device->meta_state.btoi_r32g32b32.img_ds_layout, 1, &pc_range,
390                                                 &device->meta_state.btoi_r32g32b32.img_p_layout);
391    }
392 
393    return result;
394 }
395 
396 static VkResult
create_btoi_r32g32b32_pipeline(struct radv_device * device,VkPipeline * pipeline)397 create_btoi_r32g32b32_pipeline(struct radv_device *device, VkPipeline *pipeline)
398 {
399    VkResult result;
400 
401    result = create_btoi_r32g32b32_layout(device);
402    if (result != VK_SUCCESS)
403       return result;
404 
405    nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device);
406 
407    result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.btoi_r32g32b32.img_p_layout, pipeline);
408 
409    ralloc_free(cs);
410    return result;
411 }
412 
413 static VkResult
get_btoi_r32g32b32_pipeline(struct radv_device * device,VkPipeline * pipeline_out)414 get_btoi_r32g32b32_pipeline(struct radv_device *device, VkPipeline *pipeline_out)
415 {
416    struct radv_meta_state *state = &device->meta_state;
417    VkResult result = VK_SUCCESS;
418 
419    mtx_lock(&state->mtx);
420 
421    if (!state->btoi_r32g32b32.pipeline) {
422       result = create_btoi_r32g32b32_pipeline(device, &state->btoi_r32g32b32.pipeline);
423       if (result != VK_SUCCESS)
424          goto fail;
425    }
426 
427    *pipeline_out = state->btoi_r32g32b32.pipeline;
428 
429 fail:
430    mtx_unlock(&state->mtx);
431    return result;
432 }
433 
434 static VkResult
radv_device_init_meta_btoi_r32g32b32_state(struct radv_device * device)435 radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
436 {
437    return create_btoi_r32g32b32_pipeline(device, &device->meta_state.btoi_r32g32b32.pipeline);
438 }
439 
440 static void
radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device * device)441 radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
442 {
443    struct radv_meta_state *state = &device->meta_state;
444 
445    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout, &state->alloc);
446    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
447                                                         state->btoi_r32g32b32.img_ds_layout, &state->alloc);
448    radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline, &state->alloc);
449 }
450 
451 static nir_shader *
build_nir_itoi_compute_shader(struct radv_device * dev,bool src_3d,bool dst_3d,int samples)452 build_nir_itoi_compute_shader(struct radv_device *dev, bool src_3d, bool dst_3d, int samples)
453 {
454    bool is_multisampled = samples > 1;
455    enum glsl_sampler_dim src_dim = src_3d            ? GLSL_SAMPLER_DIM_3D
456                                    : is_multisampled ? GLSL_SAMPLER_DIM_MS
457                                                      : GLSL_SAMPLER_DIM_2D;
458    enum glsl_sampler_dim dst_dim = dst_3d            ? GLSL_SAMPLER_DIM_3D
459                                    : is_multisampled ? GLSL_SAMPLER_DIM_MS
460                                                      : GLSL_SAMPLER_DIM_2D;
461    const struct glsl_type *buf_type = glsl_sampler_type(src_dim, false, false, GLSL_TYPE_FLOAT);
462    const struct glsl_type *img_type = glsl_image_type(dst_dim, false, GLSL_TYPE_FLOAT);
463    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_itoi_cs-%dd-%dd-%d", src_3d ? 3 : 2,
464                                          dst_3d ? 3 : 2, samples);
465    b.shader->info.workgroup_size[0] = 8;
466    b.shader->info.workgroup_size[1] = 8;
467    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
468    input_img->data.descriptor_set = 0;
469    input_img->data.binding = 0;
470 
471    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
472    output_img->data.descriptor_set = 0;
473    output_img->data.binding = 1;
474 
475    nir_def *global_id = get_global_ids(&b, (src_3d || dst_3d) ? 3 : 2);
476 
477    nir_def *src_offset = nir_load_push_constant(&b, src_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = src_3d ? 12 : 8);
478    nir_def *dst_offset = nir_load_push_constant(&b, dst_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = dst_3d ? 24 : 20);
479 
480    nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
481    nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
482 
483    nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
484 
485    nir_def *tex_vals[8];
486    if (is_multisampled) {
487       for (uint32_t i = 0; i < samples; i++) {
488          tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2), nir_imm_int(&b, i));
489       }
490    } else {
491       tex_vals[0] = nir_txf_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2 + src_3d), nir_imm_int(&b, 0));
492    }
493 
494    nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1),
495                                  dst_3d ? nir_channel(&b, dst_coord, 2) : nir_undef(&b, 1, 32), nir_undef(&b, 1, 32));
496 
497    for (uint32_t i = 0; i < samples; i++) {
498       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_imm_int(&b, i), tex_vals[i],
499                             nir_imm_int(&b, 0), .image_dim = dst_dim);
500    }
501 
502    return b.shader;
503 }
504 
505 static VkResult
create_itoi_layout(struct radv_device * device)506 create_itoi_layout(struct radv_device *device)
507 {
508    VkResult result = VK_SUCCESS;
509 
510    if (!device->meta_state.itoi.img_ds_layout) {
511       const VkDescriptorSetLayoutBinding bindings[] = {
512          {
513             .binding = 0,
514             .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
515             .descriptorCount = 1,
516             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
517          },
518          {
519             .binding = 1,
520             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
521             .descriptorCount = 1,
522             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
523          },
524       };
525 
526       result = radv_meta_create_descriptor_set_layout(device, 2, bindings, &device->meta_state.itoi.img_ds_layout);
527       if (result != VK_SUCCESS)
528          return result;
529    }
530 
531    if (!device->meta_state.itoi.img_p_layout) {
532       const VkPushConstantRange pc_range = {
533          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
534          .size = 24,
535       };
536 
537       result = radv_meta_create_pipeline_layout(device, &device->meta_state.itoi.img_ds_layout, 1, &pc_range,
538                                                 &device->meta_state.itoi.img_p_layout);
539    }
540 
541    return result;
542 }
543 
544 static VkResult
create_itoi_pipeline(struct radv_device * device,bool src_3d,bool dst_3d,int samples,VkPipeline * pipeline)545 create_itoi_pipeline(struct radv_device *device, bool src_3d, bool dst_3d, int samples, VkPipeline *pipeline)
546 {
547    struct radv_meta_state *state = &device->meta_state;
548    VkResult result;
549 
550    result = create_itoi_layout(device);
551    if (result != VK_SUCCESS)
552       return result;
553 
554    nir_shader *cs = build_nir_itoi_compute_shader(device, src_3d, dst_3d, samples);
555 
556    result = radv_meta_create_compute_pipeline(device, cs, state->itoi.img_p_layout, pipeline);
557    ralloc_free(cs);
558    return result;
559 }
560 
561 static VkResult
get_itoi_pipeline(struct radv_device * device,const struct radv_image * src_image,const struct radv_image * dst_image,int samples,VkPipeline * pipeline_out)562 get_itoi_pipeline(struct radv_device *device, const struct radv_image *src_image, const struct radv_image *dst_image,
563                   int samples, VkPipeline *pipeline_out)
564 {
565    struct radv_meta_state *state = &device->meta_state;
566    const bool src_3d = src_image->vk.image_type == VK_IMAGE_TYPE_3D;
567    const bool dst_3d = dst_image->vk.image_type == VK_IMAGE_TYPE_3D;
568    const uint32_t samples_log2 = ffs(samples) - 1;
569    VkResult result = VK_SUCCESS;
570    VkPipeline *pipeline;
571 
572    mtx_lock(&state->mtx);
573 
574    if (src_3d && dst_3d)
575       pipeline = &device->meta_state.itoi.pipeline_3d_3d;
576    else if (src_3d)
577       pipeline = &device->meta_state.itoi.pipeline_3d_2d;
578    else if (dst_3d)
579       pipeline = &device->meta_state.itoi.pipeline_2d_3d;
580    else
581       pipeline = &state->itoi.pipeline[samples_log2];
582 
583    if (!*pipeline) {
584       result = create_itoi_pipeline(device, src_3d, dst_3d, samples, pipeline);
585       if (result != VK_SUCCESS)
586          goto fail;
587    }
588 
589    *pipeline_out = *pipeline;
590 
591 fail:
592    mtx_unlock(&state->mtx);
593    return result;
594 }
595 
596 /* image to image - don't write use image accessors */
597 static VkResult
radv_device_init_meta_itoi_state(struct radv_device * device)598 radv_device_init_meta_itoi_state(struct radv_device *device)
599 {
600    VkResult result;
601 
602    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
603       uint32_t samples = 1 << i;
604       result = create_itoi_pipeline(device, false, false, samples, &device->meta_state.itoi.pipeline[i]);
605       if (result != VK_SUCCESS)
606          return result;
607    }
608 
609    for (uint32_t src_3d = 0; src_3d < 2; src_3d++) {
610       for (uint32_t dst_3d = 0; dst_3d < 2; dst_3d++) {
611          VkPipeline *pipeline;
612          if (src_3d && dst_3d)
613             pipeline = &device->meta_state.itoi.pipeline_3d_3d;
614          else if (src_3d)
615             pipeline = &device->meta_state.itoi.pipeline_3d_2d;
616          else if (dst_3d)
617             pipeline = &device->meta_state.itoi.pipeline_2d_3d;
618          else
619             continue;
620 
621          result = create_itoi_pipeline(device, src_3d, dst_3d, 1, pipeline);
622          if (result != VK_SUCCESS)
623             return result;
624       }
625    }
626 
627    return result;
628 }
629 
630 static void
radv_device_finish_meta_itoi_state(struct radv_device * device)631 radv_device_finish_meta_itoi_state(struct radv_device *device)
632 {
633    struct radv_meta_state *state = &device->meta_state;
634 
635    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout, &state->alloc);
636    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itoi.img_ds_layout,
637                                                         &state->alloc);
638 
639    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
640       radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline[i], &state->alloc);
641    }
642 
643    radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_2d_3d, &state->alloc);
644    radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_3d_2d, &state->alloc);
645    radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_3d_3d, &state->alloc);
646 }
647 
648 static nir_shader *
build_nir_itoi_r32g32b32_compute_shader(struct radv_device * dev)649 build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
650 {
651    const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
652    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
653    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_itoi_r32g32b32_cs");
654    b.shader->info.workgroup_size[0] = 8;
655    b.shader->info.workgroup_size[1] = 8;
656    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "input_img");
657    input_img->data.descriptor_set = 0;
658    input_img->data.binding = 0;
659 
660    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "output_img");
661    output_img->data.descriptor_set = 0;
662    output_img->data.binding = 1;
663 
664    nir_def *global_id = get_global_ids(&b, 2);
665 
666    nir_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
667    nir_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
668 
669    nir_def *src_stride = nir_channel(&b, src_offset, 2);
670    nir_def *dst_stride = nir_channel(&b, dst_offset, 2);
671 
672    nir_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
673    nir_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
674 
675    nir_def *src_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
676                                       nir_imul_imm(&b, nir_channel(&b, src_img_coord, 0), 3));
677 
678    nir_def *dst_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
679                                       nir_imul_imm(&b, nir_channel(&b, dst_img_coord, 0), 3));
680 
681    for (int chan = 0; chan < 3; chan++) {
682       /* src */
683       nir_def *src_local_pos = nir_iadd_imm(&b, src_global_pos, chan);
684       nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), src_local_pos, NULL);
685 
686       /* dst */
687       nir_def *dst_local_pos = nir_iadd_imm(&b, dst_global_pos, chan);
688 
689       nir_def *dst_coord = nir_replicate(&b, dst_local_pos, 4);
690 
691       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, dst_coord, nir_undef(&b, 1, 32),
692                             nir_channel(&b, outval, 0), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
693    }
694 
695    return b.shader;
696 }
697 
698 /* Image to image - special path for R32G32B32 */
699 static VkResult
create_itoi_r32g32b32_layout(struct radv_device * device)700 create_itoi_r32g32b32_layout(struct radv_device *device)
701 {
702    VkResult result = VK_SUCCESS;
703 
704    if (!device->meta_state.itoi_r32g32b32.img_ds_layout) {
705       const VkDescriptorSetLayoutBinding bindings[] = {
706          {
707             .binding = 0,
708             .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
709             .descriptorCount = 1,
710             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
711          },
712          {
713             .binding = 1,
714             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
715             .descriptorCount = 1,
716             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
717          },
718       };
719 
720       result =
721          radv_meta_create_descriptor_set_layout(device, 2, bindings, &device->meta_state.itoi_r32g32b32.img_ds_layout);
722       if (result != VK_SUCCESS)
723          return result;
724    }
725 
726    if (!device->meta_state.itoi_r32g32b32.img_p_layout) {
727       const VkPushConstantRange pc_range = {
728          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
729          .size = 24,
730       };
731 
732       result = radv_meta_create_pipeline_layout(device, &device->meta_state.itoi_r32g32b32.img_ds_layout, 1, &pc_range,
733                                                 &device->meta_state.itoi_r32g32b32.img_p_layout);
734    }
735 
736    return result;
737 }
738 
739 static VkResult
create_itoi_r32g32b32_pipeline(struct radv_device * device,VkPipeline * pipeline)740 create_itoi_r32g32b32_pipeline(struct radv_device *device, VkPipeline *pipeline)
741 {
742    VkResult result;
743 
744    result = create_itoi_r32g32b32_layout(device);
745    if (result != VK_SUCCESS)
746       return result;
747 
748    nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device);
749 
750    result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.itoi_r32g32b32.img_p_layout, pipeline);
751 
752    ralloc_free(cs);
753    return result;
754 }
755 
756 static VkResult
get_itoi_r32g32b32_pipeline(struct radv_device * device,VkPipeline * pipeline_out)757 get_itoi_r32g32b32_pipeline(struct radv_device *device, VkPipeline *pipeline_out)
758 {
759    struct radv_meta_state *state = &device->meta_state;
760    VkResult result = VK_SUCCESS;
761 
762    mtx_lock(&state->mtx);
763    if (!state->itoi_r32g32b32.pipeline) {
764       result = create_itoi_r32g32b32_pipeline(device, &state->itoi_r32g32b32.pipeline);
765       if (result != VK_SUCCESS)
766          goto fail;
767    }
768 
769    *pipeline_out = state->itoi_r32g32b32.pipeline;
770 
771 fail:
772    mtx_unlock(&state->mtx);
773    return result;
774 }
775 
776 static VkResult
radv_device_init_meta_itoi_r32g32b32_state(struct radv_device * device)777 radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
778 {
779    return create_itoi_r32g32b32_pipeline(device, &device->meta_state.itoi_r32g32b32.pipeline);
780 }
781 
782 static void
radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device * device)783 radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
784 {
785    struct radv_meta_state *state = &device->meta_state;
786 
787    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout, &state->alloc);
788    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
789                                                         state->itoi_r32g32b32.img_ds_layout, &state->alloc);
790    radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline, &state->alloc);
791 }
792 
793 static nir_shader *
build_nir_cleari_compute_shader(struct radv_device * dev,bool is_3d,int samples)794 build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples)
795 {
796    bool is_multisampled = samples > 1;
797    enum glsl_sampler_dim dim = is_3d             ? GLSL_SAMPLER_DIM_3D
798                                : is_multisampled ? GLSL_SAMPLER_DIM_MS
799                                                  : GLSL_SAMPLER_DIM_2D;
800    const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
801    nir_builder b =
802       radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
803    b.shader->info.workgroup_size[0] = 8;
804    b.shader->info.workgroup_size[1] = 8;
805 
806    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
807    output_img->data.descriptor_set = 0;
808    output_img->data.binding = 0;
809 
810    nir_def *global_id = get_global_ids(&b, 2);
811 
812    nir_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
813    nir_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
814 
815    nir_def *comps[4];
816    comps[0] = nir_channel(&b, global_id, 0);
817    comps[1] = nir_channel(&b, global_id, 1);
818    comps[2] = layer;
819    comps[3] = nir_undef(&b, 1, 32);
820    global_id = nir_vec(&b, comps, 4);
821 
822    for (uint32_t i = 0; i < samples; i++) {
823       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_imm_int(&b, i), clear_val,
824                             nir_imm_int(&b, 0), .image_dim = dim);
825    }
826 
827    return b.shader;
828 }
829 
830 static VkResult
create_cleari_layout(struct radv_device * device)831 create_cleari_layout(struct radv_device *device)
832 {
833    VkResult result = VK_SUCCESS;
834 
835    if (!device->meta_state.cleari.img_ds_layout) {
836       const VkDescriptorSetLayoutBinding binding = {
837          .binding = 0,
838          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
839          .descriptorCount = 1,
840          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
841       };
842 
843       result = radv_meta_create_descriptor_set_layout(device, 1, &binding, &device->meta_state.cleari.img_ds_layout);
844       if (result != VK_SUCCESS)
845          return result;
846    }
847 
848    if (!device->meta_state.cleari.img_p_layout) {
849       const VkPushConstantRange pc_range = {
850          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
851          .size = 20,
852       };
853 
854       result = radv_meta_create_pipeline_layout(device, &device->meta_state.cleari.img_ds_layout, 1, &pc_range,
855                                                 &device->meta_state.cleari.img_p_layout);
856    }
857 
858    return result;
859 }
860 
861 static VkResult
create_cleari_pipeline(struct radv_device * device,bool is_3d,int samples,VkPipeline * pipeline)862 create_cleari_pipeline(struct radv_device *device, bool is_3d, int samples, VkPipeline *pipeline)
863 {
864    VkResult result;
865 
866    result = create_cleari_layout(device);
867    if (result != VK_SUCCESS)
868       return result;
869 
870    nir_shader *cs = build_nir_cleari_compute_shader(device, is_3d, samples);
871 
872    result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.cleari.img_p_layout, pipeline);
873    ralloc_free(cs);
874    return result;
875 }
876 
877 static VkResult
get_cleari_pipeline(struct radv_device * device,const struct radv_image * image,VkPipeline * pipeline_out)878 get_cleari_pipeline(struct radv_device *device, const struct radv_image *image, VkPipeline *pipeline_out)
879 {
880    struct radv_meta_state *state = &device->meta_state;
881    const bool is_3d = image->vk.image_type == VK_IMAGE_TYPE_3D;
882    const uint32_t samples = image->vk.samples;
883    const uint32_t samples_log2 = ffs(samples) - 1;
884    VkResult result = VK_SUCCESS;
885    VkPipeline *pipeline;
886 
887    mtx_lock(&state->mtx);
888 
889    if (is_3d) {
890       pipeline = &state->cleari.pipeline_3d;
891    } else {
892       pipeline = &state->cleari.pipeline[samples_log2];
893    }
894 
895    if (!*pipeline) {
896       result = create_cleari_pipeline(device, is_3d, samples, pipeline);
897       if (result != VK_SUCCESS)
898          goto fail;
899    }
900 
901    *pipeline_out = *pipeline;
902 
903 fail:
904    mtx_unlock(&state->mtx);
905    return result;
906 }
907 
908 static VkResult
radv_device_init_meta_cleari_state(struct radv_device * device)909 radv_device_init_meta_cleari_state(struct radv_device *device)
910 {
911    VkResult result;
912 
913    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
914       uint32_t samples = 1 << i;
915       result = create_cleari_pipeline(device, false, samples, &device->meta_state.cleari.pipeline[i]);
916       if (result != VK_SUCCESS)
917          return result;
918    }
919 
920    return create_cleari_pipeline(device, true, 1, &device->meta_state.cleari.pipeline_3d);
921 }
922 
923 static void
radv_device_finish_meta_cleari_state(struct radv_device * device)924 radv_device_finish_meta_cleari_state(struct radv_device *device)
925 {
926    struct radv_meta_state *state = &device->meta_state;
927 
928    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout, &state->alloc);
929    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->cleari.img_ds_layout,
930                                                         &state->alloc);
931 
932    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
933       radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline[i], &state->alloc);
934    }
935 
936    radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline_3d, &state->alloc);
937 }
938 
939 /* Special path for clearing R32G32B32 images using a compute shader. */
940 static nir_shader *
build_nir_cleari_r32g32b32_compute_shader(struct radv_device * dev)941 build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
942 {
943    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
944    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_cleari_r32g32b32_cs");
945    b.shader->info.workgroup_size[0] = 8;
946    b.shader->info.workgroup_size[1] = 8;
947 
948    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
949    output_img->data.descriptor_set = 0;
950    output_img->data.binding = 0;
951 
952    nir_def *global_id = get_global_ids(&b, 2);
953 
954    nir_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
955    nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
956 
957    nir_def *global_x = nir_channel(&b, global_id, 0);
958    nir_def *global_y = nir_channel(&b, global_id, 1);
959 
960    nir_def *global_pos = nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul_imm(&b, global_x, 3));
961 
962    for (unsigned chan = 0; chan < 3; chan++) {
963       nir_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
964 
965       nir_def *coord = nir_replicate(&b, local_pos, 4);
966 
967       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32),
968                             nir_channel(&b, clear_val, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
969    }
970 
971    return b.shader;
972 }
973 
974 static VkResult
create_cleari_r32g32b32_layout(struct radv_device * device)975 create_cleari_r32g32b32_layout(struct radv_device *device)
976 {
977    VkResult result = VK_SUCCESS;
978 
979    if (!device->meta_state.cleari_r32g32b32.img_ds_layout) {
980       const VkDescriptorSetLayoutBinding binding = {
981          .binding = 0,
982          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
983          .descriptorCount = 1,
984          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
985       };
986 
987       result = radv_meta_create_descriptor_set_layout(device, 1, &binding,
988                                                       &device->meta_state.cleari_r32g32b32.img_ds_layout);
989       if (result != VK_SUCCESS)
990          return result;
991    }
992 
993    if (!device->meta_state.cleari_r32g32b32.img_p_layout) {
994       const VkPushConstantRange pc_range = {
995          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
996          .size = 16,
997       };
998 
999       result = radv_meta_create_pipeline_layout(device, &device->meta_state.cleari_r32g32b32.img_ds_layout, 1,
1000                                                 &pc_range, &device->meta_state.cleari_r32g32b32.img_p_layout);
1001    }
1002 
1003    return result;
1004 }
1005 
1006 static VkResult
create_cleari_r32g32b32_pipeline(struct radv_device * device,VkPipeline * pipeline)1007 create_cleari_r32g32b32_pipeline(struct radv_device *device, VkPipeline *pipeline)
1008 {
1009    VkResult result;
1010 
1011    result = create_cleari_r32g32b32_layout(device);
1012    if (result != VK_SUCCESS)
1013       return result;
1014 
1015    nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device);
1016 
1017    result = radv_meta_create_compute_pipeline(device, cs, device->meta_state.cleari_r32g32b32.img_p_layout, pipeline);
1018 
1019    ralloc_free(cs);
1020    return result;
1021 }
1022 
1023 static VkResult
get_cleari_r32g32b32_pipeline(struct radv_device * device,VkPipeline * pipeline_out)1024 get_cleari_r32g32b32_pipeline(struct radv_device *device, VkPipeline *pipeline_out)
1025 {
1026    struct radv_meta_state *state = &device->meta_state;
1027    VkResult result = VK_SUCCESS;
1028 
1029    mtx_lock(&state->mtx);
1030 
1031    if (!state->cleari_r32g32b32.pipeline) {
1032       result = create_cleari_r32g32b32_pipeline(device, &state->cleari_r32g32b32.pipeline);
1033       if (result != VK_SUCCESS)
1034          goto fail;
1035    }
1036 
1037    *pipeline_out = state->cleari_r32g32b32.pipeline;
1038 
1039 fail:
1040    mtx_unlock(&state->mtx);
1041    return result;
1042 }
1043 
1044 static VkResult
radv_device_init_meta_cleari_r32g32b32_state(struct radv_device * device)1045 radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
1046 {
1047    return create_cleari_r32g32b32_pipeline(device, &device->meta_state.cleari_r32g32b32.pipeline);
1048 }
1049 
1050 static void
radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device * device)1051 radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
1052 {
1053    struct radv_meta_state *state = &device->meta_state;
1054 
1055    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout, &state->alloc);
1056    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
1057                                                         state->cleari_r32g32b32.img_ds_layout, &state->alloc);
1058    radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline, &state->alloc);
1059 }
1060 
1061 void
radv_device_finish_meta_bufimage_state(struct radv_device * device)1062 radv_device_finish_meta_bufimage_state(struct radv_device *device)
1063 {
1064    radv_device_finish_meta_itob_state(device);
1065    radv_device_finish_meta_btoi_state(device);
1066    radv_device_finish_meta_btoi_r32g32b32_state(device);
1067    radv_device_finish_meta_itoi_state(device);
1068    radv_device_finish_meta_itoi_r32g32b32_state(device);
1069    radv_device_finish_meta_cleari_state(device);
1070    radv_device_finish_meta_cleari_r32g32b32_state(device);
1071 }
1072 
1073 VkResult
radv_device_init_meta_bufimage_state(struct radv_device * device,bool on_demand)1074 radv_device_init_meta_bufimage_state(struct radv_device *device, bool on_demand)
1075 {
1076    VkResult result;
1077 
1078    if (on_demand)
1079       return VK_SUCCESS;
1080 
1081    result = radv_device_init_meta_itob_state(device);
1082    if (result != VK_SUCCESS)
1083       return result;
1084 
1085    result = radv_device_init_meta_btoi_state(device);
1086    if (result != VK_SUCCESS)
1087       return result;
1088 
1089    result = radv_device_init_meta_btoi_r32g32b32_state(device);
1090    if (result != VK_SUCCESS)
1091       return result;
1092 
1093    result = radv_device_init_meta_itoi_state(device);
1094    if (result != VK_SUCCESS)
1095       return result;
1096 
1097    result = radv_device_init_meta_itoi_r32g32b32_state(device);
1098    if (result != VK_SUCCESS)
1099       return result;
1100 
1101    result = radv_device_init_meta_cleari_state(device);
1102    if (result != VK_SUCCESS)
1103       return result;
1104 
1105    result = radv_device_init_meta_cleari_r32g32b32_state(device);
1106    if (result != VK_SUCCESS)
1107       return result;
1108 
1109    return VK_SUCCESS;
1110 }
1111 
1112 static void
create_iview(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf,struct radv_image_view * iview,VkFormat format,VkImageAspectFlagBits aspects)1113 create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, struct radv_image_view *iview,
1114              VkFormat format, VkImageAspectFlagBits aspects)
1115 {
1116    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1117 
1118    if (format == VK_FORMAT_UNDEFINED)
1119       format = surf->format;
1120 
1121    radv_image_view_init(iview, device,
1122                         &(VkImageViewCreateInfo){
1123                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1124                            .image = radv_image_to_handle(surf->image),
1125                            .viewType = radv_meta_get_view_type(surf->image),
1126                            .format = format,
1127                            .subresourceRange = {.aspectMask = aspects,
1128                                                 .baseMipLevel = surf->level,
1129                                                 .levelCount = 1,
1130                                                 .baseArrayLayer = surf->layer,
1131                                                 .layerCount = 1},
1132                         },
1133                         0,
1134                         &(struct radv_image_view_extra_create_info){
1135                            .disable_compression = surf->disable_compression,
1136                         });
1137 }
1138 
1139 static void
create_bview(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * buffer,unsigned offset,VkFormat format,struct radv_buffer_view * bview)1140 create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset, VkFormat format,
1141              struct radv_buffer_view *bview)
1142 {
1143    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1144 
1145    radv_buffer_view_init(bview, device,
1146                          &(VkBufferViewCreateInfo){
1147                             .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1148                             .flags = 0,
1149                             .buffer = radv_buffer_to_handle(buffer),
1150                             .format = format,
1151                             .offset = offset,
1152                             .range = VK_WHOLE_SIZE,
1153                          });
1154 }
1155 
1156 static void
create_buffer_from_image(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf,VkBufferUsageFlagBits2KHR usage,VkBuffer * buffer)1157 create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
1158                          VkBufferUsageFlagBits2KHR usage, VkBuffer *buffer)
1159 {
1160    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1161    struct radv_device_memory mem;
1162 
1163    radv_device_memory_init(&mem, device, surf->image->bindings[0].bo);
1164 
1165    radv_create_buffer(device,
1166                       &(VkBufferCreateInfo){
1167                          .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1168                          .pNext =
1169                             &(VkBufferUsageFlags2CreateInfoKHR){
1170                                .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
1171                                .usage = usage,
1172                             },
1173                          .flags = 0,
1174                          .size = surf->image->size,
1175                          .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1176                       },
1177                       NULL, buffer, true);
1178 
1179    radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1180                           (VkBindBufferMemoryInfo[]){{
1181                              .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1182                              .buffer = *buffer,
1183                              .memory = radv_device_memory_to_handle(&mem),
1184                              .memoryOffset = surf->image->bindings[0].offset,
1185                           }});
1186 
1187    radv_device_memory_finish(&mem);
1188 }
1189 
1190 static void
create_bview_for_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * buffer,unsigned offset,VkFormat src_format,struct radv_buffer_view * bview)1191 create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset,
1192                            VkFormat src_format, struct radv_buffer_view *bview)
1193 {
1194    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1195    VkFormat format;
1196 
1197    switch (src_format) {
1198    case VK_FORMAT_R32G32B32_UINT:
1199       format = VK_FORMAT_R32_UINT;
1200       break;
1201    case VK_FORMAT_R32G32B32_SINT:
1202       format = VK_FORMAT_R32_SINT;
1203       break;
1204    case VK_FORMAT_R32G32B32_SFLOAT:
1205       format = VK_FORMAT_R32_SFLOAT;
1206       break;
1207    default:
1208       unreachable("invalid R32G32B32 format");
1209    }
1210 
1211    radv_buffer_view_init(bview, device,
1212                          &(VkBufferViewCreateInfo){
1213                             .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1214                             .flags = 0,
1215                             .buffer = radv_buffer_to_handle(buffer),
1216                             .format = format,
1217                             .offset = offset,
1218                             .range = VK_WHOLE_SIZE,
1219                          });
1220 }
1221 
1222 /* GFX9+ has an issue where the HW does not calculate mipmap degradations
1223  * for block-compressed images correctly (see the comment in
1224  * radv_image_view_init). Some texels are unaddressable and cannot be copied
1225  * to/from by a compute shader. Here we will perform a buffer copy to copy the
1226  * texels that the hardware missed.
1227  *
1228  * GFX10 will not use this workaround because it can be fixed by adjusting its
1229  * image view descriptors instead.
1230  */
1231 static void
fixup_gfx9_cs_copy(struct radv_cmd_buffer * cmd_buffer,const struct radv_meta_blit2d_buffer * buf_bsurf,const struct radv_meta_blit2d_surf * img_bsurf,const struct radv_meta_blit2d_rect * rect,bool to_image)1232 fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_blit2d_buffer *buf_bsurf,
1233                    const struct radv_meta_blit2d_surf *img_bsurf, const struct radv_meta_blit2d_rect *rect,
1234                    bool to_image)
1235 {
1236    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1237    const struct radv_physical_device *pdev = radv_device_physical(device);
1238    const unsigned mip_level = img_bsurf->level;
1239    const struct radv_image *image = img_bsurf->image;
1240    const struct radeon_surf *surf = &image->planes[0].surface;
1241    const struct radeon_info *gpu_info = &pdev->info;
1242    struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws);
1243    struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
1244 
1245    /* GFX10 will use a different workaround unless this is not a 2D image */
1246    if (gpu_info->gfx_level < GFX9 || (gpu_info->gfx_level >= GFX10 && image->vk.image_type == VK_IMAGE_TYPE_2D) ||
1247        image->vk.mip_levels == 1 || !vk_format_is_block_compressed(image->vk.format))
1248       return;
1249 
1250    /* The physical extent of the base mip */
1251    VkExtent2D hw_base_extent = {surf->u.gfx9.base_mip_width, surf->u.gfx9.base_mip_height};
1252 
1253    /* The hardware-calculated extent of the selected mip
1254     * (naive divide-by-two integer math)
1255     */
1256    VkExtent2D hw_mip_extent = {u_minify(hw_base_extent.width, mip_level), u_minify(hw_base_extent.height, mip_level)};
1257 
1258    /* The actual extent we want to copy */
1259    VkExtent2D mip_extent = {rect->width, rect->height};
1260 
1261    VkOffset2D mip_offset = {to_image ? rect->dst_x : rect->src_x, to_image ? rect->dst_y : rect->src_y};
1262 
1263    if (hw_mip_extent.width >= mip_offset.x + mip_extent.width &&
1264        hw_mip_extent.height >= mip_offset.y + mip_extent.height)
1265       return;
1266 
1267    if (!to_image) {
1268       /* If we are writing to a buffer, then we need to wait for the compute
1269        * shader to finish because it may write over the unaddressable texels
1270        * while we're fixing them. If we're writing to an image, we do not need
1271        * to wait because the compute shader cannot write to those texels
1272        */
1273       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
1274    }
1275 
1276    for (uint32_t y = 0; y < mip_extent.height; y++) {
1277       uint32_t coordY = y + mip_offset.y;
1278       /* If the default copy algorithm (done previously) has already seen this
1279        * scanline, then we can bias the starting X coordinate over to skip the
1280        * region already copied by the default copy.
1281        */
1282       uint32_t x = (coordY < hw_mip_extent.height) ? hw_mip_extent.width : 0;
1283       for (; x < mip_extent.width; x++) {
1284          uint32_t coordX = x + mip_offset.x;
1285          uint64_t addr = ac_surface_addr_from_coord(addrlib, gpu_info, surf, &surf_info, mip_level, coordX, coordY,
1286                                                     img_bsurf->layer, image->vk.image_type == VK_IMAGE_TYPE_3D);
1287          struct radeon_winsys_bo *img_bo = image->bindings[0].bo;
1288          struct radeon_winsys_bo *mem_bo = buf_bsurf->buffer->bo;
1289          const uint64_t img_offset = image->bindings[0].offset + addr;
1290          /* buf_bsurf->offset already includes the layer offset */
1291          const uint64_t mem_offset =
1292             buf_bsurf->buffer->offset + buf_bsurf->offset + y * buf_bsurf->pitch * surf->bpe + x * surf->bpe;
1293          if (to_image) {
1294             radv_copy_buffer(cmd_buffer, mem_bo, img_bo, mem_offset, img_offset, surf->bpe);
1295          } else {
1296             radv_copy_buffer(cmd_buffer, img_bo, mem_bo, img_offset, mem_offset, surf->bpe);
1297          }
1298       }
1299    }
1300 }
1301 
1302 static unsigned
get_image_stride_for_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf)1303 get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf)
1304 {
1305    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1306    const struct radv_physical_device *pdev = radv_device_physical(device);
1307    unsigned stride;
1308 
1309    if (pdev->info.gfx_level >= GFX9) {
1310       stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
1311    } else {
1312       stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
1313    }
1314 
1315    return stride;
1316 }
1317 
1318 static void
itob_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src,struct radv_buffer_view * dst)1319 itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, struct radv_buffer_view *dst)
1320 {
1321    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1322 
1323    radv_meta_push_descriptor_set(
1324       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itob.img_p_layout, 0, 2,
1325       (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1326                                 .dstBinding = 0,
1327                                 .dstArrayElement = 0,
1328                                 .descriptorCount = 1,
1329                                 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1330                                 .pImageInfo =
1331                                    (VkDescriptorImageInfo[]){
1332                                       {
1333                                          .sampler = VK_NULL_HANDLE,
1334                                          .imageView = radv_image_view_to_handle(src),
1335                                          .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1336                                       },
1337                                    }},
1338                                {
1339                                   .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1340                                   .dstBinding = 1,
1341                                   .dstArrayElement = 0,
1342                                   .descriptorCount = 1,
1343                                   .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1344                                   .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1345                                }});
1346 }
1347 
1348 void
radv_meta_image_to_buffer(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_buffer * dst,struct radv_meta_blit2d_rect * rect)1349 radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1350                           struct radv_meta_blit2d_buffer *dst, struct radv_meta_blit2d_rect *rect)
1351 {
1352    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1353    struct radv_image_view src_view;
1354    struct radv_buffer_view dst_view;
1355    VkPipeline pipeline;
1356    VkResult result;
1357 
1358    result = get_itob_pipeline(device, src->image, &pipeline);
1359    if (result != VK_SUCCESS) {
1360       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1361       return;
1362    }
1363 
1364    create_iview(cmd_buffer, src, &src_view, VK_FORMAT_UNDEFINED, src->aspect_mask);
1365    create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
1366    itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1367 
1368    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1369 
1370    unsigned push_constants[4] = {rect->src_x, rect->src_y, src->layer, dst->pitch};
1371    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itob.img_p_layout,
1372                               VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1373 
1374    radv_unaligned_dispatch(cmd_buffer, rect->width, rect->height, 1);
1375    fixup_gfx9_cs_copy(cmd_buffer, dst, src, rect, false);
1376 
1377    radv_image_view_finish(&src_view);
1378    radv_buffer_view_finish(&dst_view);
1379 }
1380 
1381 static void
btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_buffer_view * dst)1382 btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
1383                                 struct radv_buffer_view *dst)
1384 {
1385    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1386 
1387    radv_meta_push_descriptor_set(
1388       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout, 0, 2,
1389       (VkWriteDescriptorSet[]){{
1390                                   .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1391                                   .dstBinding = 0,
1392                                   .dstArrayElement = 0,
1393                                   .descriptorCount = 1,
1394                                   .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1395                                   .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1396                                },
1397                                {
1398                                   .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1399                                   .dstBinding = 1,
1400                                   .dstArrayElement = 0,
1401                                   .descriptorCount = 1,
1402                                   .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1403                                   .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1404                                }});
1405 }
1406 
1407 static void
radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_buffer * src,struct radv_meta_blit2d_surf * dst,struct radv_meta_blit2d_rect * rect)1408 radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src,
1409                                        struct radv_meta_blit2d_surf *dst, struct radv_meta_blit2d_rect *rect)
1410 {
1411    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1412    struct radv_buffer_view src_view, dst_view;
1413    unsigned dst_offset = 0;
1414    VkPipeline pipeline;
1415    unsigned stride;
1416    VkBuffer buffer;
1417    VkResult result;
1418 
1419    result = get_btoi_r32g32b32_pipeline(device, &pipeline);
1420    if (result != VK_SUCCESS) {
1421       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1422       return;
1423    }
1424 
1425    /* This special btoi path for R32G32B32 formats will write the linear
1426     * image as a buffer with the same underlying memory. The compute
1427     * shader will copy all components separately using a R32 format.
1428     */
1429    create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_2_STORAGE_TEXEL_BUFFER_BIT_KHR, &buffer);
1430 
1431    create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1432    create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format, &dst_view);
1433    btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1434 
1435    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1436 
1437    stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1438 
1439    unsigned push_constants[4] = {
1440       rect->dst_x,
1441       rect->dst_y,
1442       stride,
1443       src->pitch,
1444    };
1445 
1446    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.btoi_r32g32b32.img_p_layout,
1447                               VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1448 
1449    radv_unaligned_dispatch(cmd_buffer, rect->width, rect->height, 1);
1450 
1451    radv_buffer_view_finish(&src_view);
1452    radv_buffer_view_finish(&dst_view);
1453    radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1454 }
1455 
1456 static void
btoi_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_image_view * dst)1457 btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, struct radv_image_view *dst)
1458 {
1459    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1460 
1461    radv_meta_push_descriptor_set(
1462       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi.img_p_layout, 0, 2,
1463       (VkWriteDescriptorSet[]){{
1464                                   .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1465                                   .dstBinding = 0,
1466                                   .dstArrayElement = 0,
1467                                   .descriptorCount = 1,
1468                                   .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1469                                   .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1470                                },
1471                                {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1472                                 .dstBinding = 1,
1473                                 .dstArrayElement = 0,
1474                                 .descriptorCount = 1,
1475                                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1476                                 .pImageInfo = (VkDescriptorImageInfo[]){
1477                                    {
1478                                       .sampler = VK_NULL_HANDLE,
1479                                       .imageView = radv_image_view_to_handle(dst),
1480                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1481                                    },
1482                                 }}});
1483 }
1484 
1485 void
radv_meta_buffer_to_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_buffer * src,struct radv_meta_blit2d_surf * dst,struct radv_meta_blit2d_rect * rect)1486 radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src,
1487                              struct radv_meta_blit2d_surf *dst, struct radv_meta_blit2d_rect *rect)
1488 {
1489    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1490    struct radv_buffer_view src_view;
1491    struct radv_image_view dst_view;
1492    VkPipeline pipeline;
1493    VkResult result;
1494 
1495    if (dst->image->vk.format == VK_FORMAT_R32G32B32_UINT || dst->image->vk.format == VK_FORMAT_R32G32B32_SINT ||
1496        dst->image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
1497       radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, rect);
1498       return;
1499    }
1500 
1501    result = get_btoi_pipeline(device, dst->image, &pipeline);
1502    if (result != VK_SUCCESS) {
1503       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1504       return;
1505    }
1506 
1507    create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1508    create_iview(cmd_buffer, dst, &dst_view, VK_FORMAT_UNDEFINED, dst->aspect_mask);
1509    btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1510 
1511    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1512 
1513    unsigned push_constants[4] = {
1514       rect->dst_x,
1515       rect->dst_y,
1516       dst->layer,
1517       src->pitch,
1518    };
1519    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.btoi.img_p_layout,
1520                               VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1521 
1522    radv_unaligned_dispatch(cmd_buffer, rect->width, rect->height, 1);
1523    fixup_gfx9_cs_copy(cmd_buffer, src, dst, rect, true);
1524 
1525    radv_image_view_finish(&dst_view);
1526    radv_buffer_view_finish(&src_view);
1527 }
1528 
1529 static void
itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_buffer_view * dst)1530 itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
1531                                 struct radv_buffer_view *dst)
1532 {
1533    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1534 
1535    radv_meta_push_descriptor_set(
1536       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout, 0, 2,
1537       (VkWriteDescriptorSet[]){{
1538                                   .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1539                                   .dstBinding = 0,
1540                                   .dstArrayElement = 0,
1541                                   .descriptorCount = 1,
1542                                   .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1543                                   .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1544                                },
1545                                {
1546                                   .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1547                                   .dstBinding = 1,
1548                                   .dstArrayElement = 0,
1549                                   .descriptorCount = 1,
1550                                   .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1551                                   .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1552                                }});
1553 }
1554 
1555 static void
radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst,struct radv_meta_blit2d_rect * rect)1556 radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1557                                       struct radv_meta_blit2d_surf *dst, struct radv_meta_blit2d_rect *rect)
1558 {
1559    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1560    struct radv_buffer_view src_view, dst_view;
1561    unsigned src_offset = 0, dst_offset = 0;
1562    unsigned src_stride, dst_stride;
1563    VkBuffer src_buffer, dst_buffer;
1564    VkPipeline pipeline;
1565    VkResult result;
1566 
1567    result = get_itoi_r32g32b32_pipeline(device, &pipeline);
1568    if (result != VK_SUCCESS) {
1569       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1570       return;
1571    }
1572 
1573    /* 96-bit formats are only compatible to themselves. */
1574    assert(dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
1575           dst->format == VK_FORMAT_R32G32B32_SFLOAT);
1576 
1577    /* This special itoi path for R32G32B32 formats will write the linear
1578     * image as a buffer with the same underlying memory. The compute
1579     * shader will copy all components separately using a R32 format.
1580     */
1581    create_buffer_from_image(cmd_buffer, src, VK_BUFFER_USAGE_2_UNIFORM_TEXEL_BUFFER_BIT_KHR, &src_buffer);
1582    create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_2_STORAGE_TEXEL_BUFFER_BIT_KHR, &dst_buffer);
1583 
1584    create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset, src->format, &src_view);
1585    create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset, dst->format, &dst_view);
1586    itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1587 
1588    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1589 
1590    src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
1591    dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1592 
1593    unsigned push_constants[6] = {
1594       rect->src_x, rect->src_y, src_stride, rect->dst_x, rect->dst_y, dst_stride,
1595    };
1596    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itoi_r32g32b32.img_p_layout,
1597                               VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
1598 
1599    radv_unaligned_dispatch(cmd_buffer, rect->width, rect->height, 1);
1600 
1601    radv_buffer_view_finish(&src_view);
1602    radv_buffer_view_finish(&dst_view);
1603    radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
1604    radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
1605 }
1606 
1607 static void
itoi_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src,struct radv_image_view * dst)1608 itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, struct radv_image_view *dst)
1609 {
1610    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1611 
1612    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout, 0, 2,
1613                                  (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1614                                                            .dstBinding = 0,
1615                                                            .dstArrayElement = 0,
1616                                                            .descriptorCount = 1,
1617                                                            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1618                                                            .pImageInfo =
1619                                                               (VkDescriptorImageInfo[]){
1620                                                                  {
1621                                                                     .sampler = VK_NULL_HANDLE,
1622                                                                     .imageView = radv_image_view_to_handle(src),
1623                                                                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1624                                                                  },
1625                                                               }},
1626                                                           {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1627                                                            .dstBinding = 1,
1628                                                            .dstArrayElement = 0,
1629                                                            .descriptorCount = 1,
1630                                                            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1631                                                            .pImageInfo = (VkDescriptorImageInfo[]){
1632                                                               {
1633                                                                  .sampler = VK_NULL_HANDLE,
1634                                                                  .imageView = radv_image_view_to_handle(dst),
1635                                                                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1636                                                               },
1637                                                            }}});
1638 }
1639 
1640 void
radv_meta_image_to_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst,struct radv_meta_blit2d_rect * rect)1641 radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1642                             struct radv_meta_blit2d_surf *dst, struct radv_meta_blit2d_rect *rect)
1643 {
1644    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1645    struct radv_image_view src_view, dst_view;
1646    uint32_t samples = src->image->vk.samples;
1647    VkPipeline pipeline;
1648    VkResult result;
1649 
1650    if (src->format == VK_FORMAT_R32G32B32_UINT || src->format == VK_FORMAT_R32G32B32_SINT ||
1651        src->format == VK_FORMAT_R32G32B32_SFLOAT) {
1652       radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst, rect);
1653       return;
1654    }
1655 
1656    result = get_itoi_pipeline(device, src->image, dst->image, samples, &pipeline);
1657    if (result != VK_SUCCESS) {
1658       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1659       return;
1660    }
1661 
1662    u_foreach_bit (i, dst->aspect_mask) {
1663       unsigned dst_aspect_mask = 1u << i;
1664       unsigned src_aspect_mask = dst_aspect_mask;
1665       VkFormat depth_format = 0;
1666       if (dst_aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
1667          depth_format = vk_format_stencil_only(dst->image->vk.format);
1668       else if (dst_aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
1669          depth_format = vk_format_depth_only(dst->image->vk.format);
1670       else {
1671          /*
1672           * "Multi-planar images can only be copied on a per-plane basis, and the subresources used in each region when
1673           * copying to or from such images must specify only one plane, though different regions can specify different
1674           * planes."
1675           */
1676          assert((dst->aspect_mask & (dst->aspect_mask - 1)) == 0);
1677          assert((src->aspect_mask & (src->aspect_mask - 1)) == 0);
1678          src_aspect_mask = src->aspect_mask;
1679       }
1680 
1681       create_iview(cmd_buffer, src, &src_view, depth_format, src_aspect_mask);
1682       create_iview(cmd_buffer, dst, &dst_view, depth_format, dst_aspect_mask);
1683 
1684       itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1685 
1686       radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1687 
1688       unsigned push_constants[6] = {
1689          rect->src_x, rect->src_y, src->layer, rect->dst_x, rect->dst_y, dst->layer,
1690       };
1691       vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itoi.img_p_layout,
1692                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
1693 
1694       radv_unaligned_dispatch(cmd_buffer, rect->width, rect->height, 1);
1695 
1696       radv_image_view_finish(&src_view);
1697       radv_image_view_finish(&dst_view);
1698    }
1699 }
1700 
1701 static void
cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * view)1702 cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *view)
1703 {
1704    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1705 
1706    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1707                                  device->meta_state.cleari_r32g32b32.img_p_layout, 0, 1,
1708                                  (VkWriteDescriptorSet[]){{
1709                                     .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1710                                     .dstBinding = 0,
1711                                     .dstArrayElement = 0,
1712                                     .descriptorCount = 1,
1713                                     .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1714                                     .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)},
1715                                  }});
1716 }
1717 
1718 static void
radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * dst,const VkClearColorValue * clear_color)1719 radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
1720                                    const VkClearColorValue *clear_color)
1721 {
1722    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1723    struct radv_buffer_view dst_view;
1724    VkPipeline pipeline;
1725    unsigned stride;
1726    VkBuffer buffer;
1727    VkResult result;
1728 
1729    result = get_cleari_r32g32b32_pipeline(device, &pipeline);
1730    if (result != VK_SUCCESS) {
1731       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1732       return;
1733    }
1734 
1735    /* This special clear path for R32G32B32 formats will write the linear
1736     * image as a buffer with the same underlying memory. The compute
1737     * shader will clear all components separately using a R32 format.
1738     */
1739    create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_2_STORAGE_TEXEL_BUFFER_BIT_KHR, &buffer);
1740 
1741    create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format, &dst_view);
1742    cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
1743 
1744    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1745 
1746    stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1747 
1748    unsigned push_constants[4] = {
1749       clear_color->uint32[0],
1750       clear_color->uint32[1],
1751       clear_color->uint32[2],
1752       stride,
1753    };
1754 
1755    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.cleari_r32g32b32.img_p_layout,
1756                               VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1757 
1758    radv_unaligned_dispatch(cmd_buffer, dst->image->vk.extent.width, dst->image->vk.extent.height, 1);
1759 
1760    radv_buffer_view_finish(&dst_view);
1761    radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1762 }
1763 
1764 static void
cleari_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * dst_iview)1765 cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *dst_iview)
1766 {
1767    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1768 
1769    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari.img_p_layout, 0,
1770                                  1,
1771                                  (VkWriteDescriptorSet[]){
1772                                     {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1773                                      .dstBinding = 0,
1774                                      .dstArrayElement = 0,
1775                                      .descriptorCount = 1,
1776                                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1777                                      .pImageInfo =
1778                                         (VkDescriptorImageInfo[]){
1779                                            {
1780                                               .sampler = VK_NULL_HANDLE,
1781                                               .imageView = radv_image_view_to_handle(dst_iview),
1782                                               .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1783                                            },
1784                                         }},
1785                                  });
1786 }
1787 
1788 void
radv_meta_clear_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * dst,const VkClearColorValue * clear_color)1789 radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
1790                          const VkClearColorValue *clear_color)
1791 {
1792    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1793    struct radv_image_view dst_iview;
1794    VkPipeline pipeline;
1795    VkResult result;
1796 
1797    if (dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
1798        dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
1799       radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
1800       return;
1801    }
1802 
1803    result = get_cleari_pipeline(device, dst->image, &pipeline);
1804    if (result != VK_SUCCESS) {
1805       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1806       return;
1807    }
1808 
1809    create_iview(cmd_buffer, dst, &dst_iview, VK_FORMAT_UNDEFINED, dst->aspect_mask);
1810    cleari_bind_descriptors(cmd_buffer, &dst_iview);
1811 
1812    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1813 
1814    unsigned push_constants[5] = {
1815       clear_color->uint32[0], clear_color->uint32[1], clear_color->uint32[2], clear_color->uint32[3], dst->layer,
1816    };
1817 
1818    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.cleari.img_p_layout,
1819                               VK_SHADER_STAGE_COMPUTE_BIT, 0, 20, push_constants);
1820 
1821    radv_unaligned_dispatch(cmd_buffer, dst->image->vk.extent.width, dst->image->vk.extent.height, 1);
1822 
1823    radv_image_view_finish(&dst_iview);
1824 }
1825