1 /*
2 * Copyright © 2021 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/common/v3d_tfu.h"
29 #include "broadcom/common/v3d_util.h"
30 #include "broadcom/cle/v3dx_pack.h"
31 #include "broadcom/compiler/v3d_compiler.h"
32
33 struct rcl_clear_info {
34 const union v3dv_clear_value *clear_value;
35 struct v3dv_image *image;
36 VkImageAspectFlags aspects;
37 uint32_t level;
38 };
39
40 static struct v3dv_cl *
emit_rcl_prologue(struct v3dv_job * job,struct v3dv_meta_framebuffer * fb,const struct rcl_clear_info * clear_info)41 emit_rcl_prologue(struct v3dv_job *job,
42 struct v3dv_meta_framebuffer *fb,
43 const struct rcl_clear_info *clear_info)
44 {
45 const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
46
47 struct v3dv_cl *rcl = &job->rcl;
48 v3dv_cl_ensure_space_with_branch(rcl, 200 +
49 tiling->layers * 256 *
50 cl_packet_length(SUPERTILE_COORDINATES));
51 if (job->cmd_buffer->state.oom)
52 return NULL;
53
54 assert(!tiling->msaa || !tiling->double_buffer);
55 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
56 config.early_z_disable = true;
57 config.image_width_pixels = tiling->width;
58 config.image_height_pixels = tiling->height;
59 config.number_of_render_targets = 1;
60 config.multisample_mode_4x = tiling->msaa;
61 config.double_buffer_in_non_ms_mode = tiling->double_buffer;
62 #if V3D_VERSION == 42
63 config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
64 #endif
65 #if V3D_VERSION >= 71
66 config.log2_tile_width = log2_tile_size(tiling->tile_width);
67 config.log2_tile_height = log2_tile_size(tiling->tile_height);
68 /* FIXME: ideallly we would like next assert on the packet header (as is
69 * general, so also applies to GL). We would need to expand
70 * gen_pack_header for that.
71 */
72 assert(config.log2_tile_width == config.log2_tile_height ||
73 config.log2_tile_width == config.log2_tile_height + 1);
74 #endif
75 config.internal_depth_type = fb->internal_depth_type;
76 }
77
78 const uint32_t *color = NULL;
79 if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
80 UNUSED uint32_t clear_pad = 0;
81 if (clear_info->image) {
82 const struct v3dv_image *image = clear_info->image;
83
84 /* From vkCmdClearColorImage:
85 * "image must not use any of the formats that require a sampler
86 * YCBCR conversion"
87 */
88 assert(image->plane_count == 1);
89 const struct v3d_resource_slice *slice =
90 &image->planes[0].slices[clear_info->level];
91 if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
92 slice->tiling == V3D_TILING_UIF_XOR) {
93 int uif_block_height = v3d_utile_height(image->planes[0].cpp) * 2;
94
95 uint32_t implicit_padded_height =
96 align(tiling->height, uif_block_height) / uif_block_height;
97
98 if (slice->padded_height_of_output_image_in_uif_blocks -
99 implicit_padded_height >= 15) {
100 clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
101 }
102 }
103 }
104
105 color = &clear_info->clear_value->color[0];
106
107 #if V3D_VERSION == 42
108 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
109 clear.clear_color_low_32_bits = color[0];
110 clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
111 clear.render_target_number = 0;
112 };
113
114 if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
115 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
116 clear.clear_color_mid_low_32_bits =
117 ((color[1] >> 24) | (color[2] << 8));
118 clear.clear_color_mid_high_24_bits =
119 ((color[2] >> 24) | ((color[3] & 0xffff) << 8));
120 clear.render_target_number = 0;
121 };
122 }
123
124 if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
125 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
126 clear.uif_padded_height_in_uif_blocks = clear_pad;
127 clear.clear_color_high_16_bits = color[3] >> 16;
128 clear.render_target_number = 0;
129 };
130 }
131 #endif
132 }
133
134 #if V3D_VERSION == 42
135 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
136 rt.render_target_0_internal_bpp = tiling->internal_bpp;
137 rt.render_target_0_internal_type = fb->internal_type;
138 rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
139 }
140 #endif
141
142 #if V3D_VERSION >= 71
143 cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART1, rt) {
144 if (color)
145 rt.clear_color_low_bits = color[0];
146 rt.internal_bpp = tiling->internal_bpp;
147 rt.internal_type_and_clamping = v3dX(clamp_for_format_and_type)(fb->internal_type,
148 fb->vk_format);
149 rt.stride =
150 v3d_compute_rt_row_row_stride_128_bits(tiling->tile_width,
151 v3d_internal_bpp_words(rt.internal_bpp));
152 rt.base_address = 0;
153 rt.render_target_number = 0;
154 }
155
156 if (color && tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
157 cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART2, rt) {
158 rt.clear_color_mid_bits = /* 40 bits (32 + 8) */
159 ((uint64_t) color[1]) |
160 (((uint64_t) (color[2] & 0xff)) << 32);
161 rt.render_target_number = 0;
162 }
163 }
164
165 if (color && tiling->internal_bpp >= V3D_INTERNAL_BPP_128) {
166 cl_emit(rcl, TILE_RENDERING_MODE_CFG_RENDER_TARGET_PART3, rt) {
167 rt.clear_color_top_bits = /* 56 bits (24 + 32) */
168 (((uint64_t) (color[2] & 0xffffff00)) >> 8) |
169 (((uint64_t) (color[3])) << 24);
170 rt.render_target_number = 0;
171 }
172 }
173 #endif
174
175 cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
176 clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
177 clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
178 };
179
180 cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
181 init.use_auto_chained_tile_lists = true;
182 init.size_of_first_block_in_chained_tile_lists =
183 TILE_ALLOCATION_BLOCK_SIZE_64B;
184 }
185
186 return rcl;
187 }
188
189 static void
emit_frame_setup(struct v3dv_job * job,uint32_t min_layer,const union v3dv_clear_value * clear_value)190 emit_frame_setup(struct v3dv_job *job,
191 uint32_t min_layer,
192 const union v3dv_clear_value *clear_value)
193 {
194 v3dv_return_if_oom(NULL, job);
195
196 const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
197
198 struct v3dv_cl *rcl = &job->rcl;
199
200 const uint32_t tile_alloc_offset =
201 64 * min_layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
202 cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
203 list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
204 }
205
206 cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
207 config.number_of_bin_tile_lists = 1;
208 config.total_frame_width_in_tiles = tiling->draw_tiles_x;
209 config.total_frame_height_in_tiles = tiling->draw_tiles_y;
210
211 config.supertile_width_in_tiles = tiling->supertile_width;
212 config.supertile_height_in_tiles = tiling->supertile_height;
213
214 config.total_frame_width_in_supertiles =
215 tiling->frame_width_in_supertiles;
216 config.total_frame_height_in_supertiles =
217 tiling->frame_height_in_supertiles;
218 }
219
220 /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
221 * it here.
222 */
223 for (int i = 0; i < 2; i++) {
224 cl_emit(rcl, TILE_COORDINATES, coords);
225 cl_emit(rcl, END_OF_LOADS, end);
226 cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
227 store.buffer_to_store = NONE;
228 }
229 /* When using double-buffering, we need to clear both buffers (unless
230 * we only have a single tile to render).
231 */
232 if (clear_value &&
233 (i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
234 #if V3D_VERSION == 42
235 cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
236 clear.clear_z_stencil_buffer = true;
237 clear.clear_all_render_targets = true;
238 }
239 #endif
240 #if V3D_VERSION >= 71
241 cl_emit(rcl, CLEAR_RENDER_TARGETS, clear);
242 #endif
243 }
244 cl_emit(rcl, END_OF_TILE_MARKER, end);
245 }
246
247 cl_emit(rcl, FLUSH_VCD_CACHE, flush);
248 }
249
250 static void
emit_supertile_coordinates(struct v3dv_job * job,struct v3dv_meta_framebuffer * framebuffer)251 emit_supertile_coordinates(struct v3dv_job *job,
252 struct v3dv_meta_framebuffer *framebuffer)
253 {
254 v3dv_return_if_oom(NULL, job);
255
256 struct v3dv_cl *rcl = &job->rcl;
257
258 const uint32_t min_y = framebuffer->min_y_supertile;
259 const uint32_t max_y = framebuffer->max_y_supertile;
260 const uint32_t min_x = framebuffer->min_x_supertile;
261 const uint32_t max_x = framebuffer->max_x_supertile;
262
263 for (int y = min_y; y <= max_y; y++) {
264 for (int x = min_x; x <= max_x; x++) {
265 cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
266 coords.column_number_in_supertiles = x;
267 coords.row_number_in_supertiles = y;
268 }
269 }
270 }
271 }
272
273 static void
emit_linear_load(struct v3dv_cl * cl,uint32_t buffer,struct v3dv_bo * bo,uint32_t offset,uint32_t stride,uint32_t format)274 emit_linear_load(struct v3dv_cl *cl,
275 uint32_t buffer,
276 struct v3dv_bo *bo,
277 uint32_t offset,
278 uint32_t stride,
279 uint32_t format)
280 {
281 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
282 load.buffer_to_load = buffer;
283 load.address = v3dv_cl_address(bo, offset);
284 load.input_image_format = format;
285 load.memory_format = V3D_TILING_RASTER;
286 load.height_in_ub_or_stride = stride;
287 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
288 }
289 }
290
291 static void
emit_linear_store(struct v3dv_cl * cl,uint32_t buffer,struct v3dv_bo * bo,uint32_t offset,uint32_t stride,bool msaa,uint32_t format)292 emit_linear_store(struct v3dv_cl *cl,
293 uint32_t buffer,
294 struct v3dv_bo *bo,
295 uint32_t offset,
296 uint32_t stride,
297 bool msaa,
298 uint32_t format)
299 {
300 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
301 store.buffer_to_store = RENDER_TARGET_0;
302 store.address = v3dv_cl_address(bo, offset);
303 store.clear_buffer_being_stored = false;
304 store.output_image_format = format;
305 store.memory_format = V3D_TILING_RASTER;
306 store.height_in_ub_or_stride = stride;
307 store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
308 V3D_DECIMATE_MODE_SAMPLE_0;
309 }
310 }
311
312 /* This chooses a tile buffer format that is appropriate for the copy operation.
313 * Typically, this is the image render target type, however, if we are copying
314 * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
315 * we need to load and store to/from a tile color buffer using a compatible
316 * color format.
317 */
318 static uint32_t
choose_tlb_format(struct v3dv_meta_framebuffer * framebuffer,VkImageAspectFlags aspect,bool for_store,bool is_copy_to_buffer,bool is_copy_from_buffer)319 choose_tlb_format(struct v3dv_meta_framebuffer *framebuffer,
320 VkImageAspectFlags aspect,
321 bool for_store,
322 bool is_copy_to_buffer,
323 bool is_copy_from_buffer)
324 {
325 /* At this point the framebuffer was already lowered to single-plane */
326 assert(framebuffer->format->plane_count == 1);
327
328 if (is_copy_to_buffer || is_copy_from_buffer) {
329 switch (framebuffer->vk_format) {
330 case VK_FORMAT_D16_UNORM:
331 return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
332 case VK_FORMAT_D32_SFLOAT:
333 return V3D_OUTPUT_IMAGE_FORMAT_R32F;
334 case VK_FORMAT_X8_D24_UNORM_PACK32:
335 return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
336 case VK_FORMAT_D24_UNORM_S8_UINT:
337 /* When storing the stencil aspect of a combined depth/stencil image
338 * to a buffer, the Vulkan spec states that the output buffer must
339 * have packed stencil values, so we choose an R8UI format for our
340 * store outputs. For the load input we still want RGBA8UI since the
341 * source image contains 4 channels (including the 3 channels
342 * containing the 24-bit depth value).
343 *
344 * When loading the stencil aspect of a combined depth/stencil image
345 * from a buffer, we read packed 8-bit stencil values from the buffer
346 * that we need to put into the LSB of the 32-bit format (the R
347 * channel), so we use R8UI. For the store, if we used R8UI then we
348 * would write 8-bit stencil values consecutively over depth channels,
349 * so we need to use RGBA8UI. This will write each stencil value in
350 * its correct position, but will overwrite depth values (channels G
351 * B,A) with undefined values. To fix this, we will have to restore
352 * the depth aspect from the Z tile buffer, which we should pre-load
353 * from the image before the store).
354 */
355 if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
356 return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
357 } else {
358 assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
359 if (is_copy_to_buffer) {
360 return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
361 V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
362 } else {
363 assert(is_copy_from_buffer);
364 return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
365 V3D_OUTPUT_IMAGE_FORMAT_R8UI;
366 }
367 }
368 default: /* Color formats */
369 return framebuffer->format->planes[0].rt_type;
370 break;
371 }
372 } else {
373 return framebuffer->format->planes[0].rt_type;
374 }
375 }
376
377 static inline bool
format_needs_rb_swap(struct v3dv_device * device,VkFormat format)378 format_needs_rb_swap(struct v3dv_device *device,
379 VkFormat format)
380 {
381 /* We are calling these methods for framebuffer formats, that at this point
382 * should be single-plane
383 */
384 assert(vk_format_get_plane_count(format) == 1);
385 const uint8_t *swizzle = v3dv_get_format_swizzle(device, format, 0);
386 return v3dv_format_swizzle_needs_rb_swap(swizzle);
387 }
388
389 static inline bool
format_needs_reverse(struct v3dv_device * device,VkFormat format)390 format_needs_reverse(struct v3dv_device *device,
391 VkFormat format)
392 {
393 /* We are calling these methods for framebuffer formats, that at this point
394 * should be single-plane
395 */
396 assert(vk_format_get_plane_count(format) == 1);
397 const uint8_t *swizzle = v3dv_get_format_swizzle(device, format, 0);
398 return v3dv_format_swizzle_needs_reverse(swizzle);
399 }
400
401 static void
emit_image_load(struct v3dv_device * device,struct v3dv_cl * cl,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_image * image,VkImageAspectFlags aspect,uint32_t layer,uint32_t mip_level,bool is_copy_to_buffer,bool is_copy_from_buffer)402 emit_image_load(struct v3dv_device *device,
403 struct v3dv_cl *cl,
404 struct v3dv_meta_framebuffer *framebuffer,
405 struct v3dv_image *image,
406 VkImageAspectFlags aspect,
407 uint32_t layer,
408 uint32_t mip_level,
409 bool is_copy_to_buffer,
410 bool is_copy_from_buffer)
411 {
412 uint8_t plane = v3dv_plane_from_aspect(aspect);
413 uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer, plane);
414
415 /* For multi-plane formats we are copying plane by plane to the color
416 * tlb. Framebuffer format was already selected to be a tlb single-plane
417 * compatible format. We still need to use the real plane to get the
418 * address etc from the source image.
419 */
420 assert(framebuffer->format->plane_count == 1);
421 /* For image to/from buffer copies we always load to and store from RT0,
422 * even for depth/stencil aspects, because the hardware can't do raster
423 * stores or loads from/to the depth/stencil tile buffers.
424 */
425 bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
426 image->format->plane_count > 1 ||
427 aspect == VK_IMAGE_ASPECT_COLOR_BIT;
428
429 const struct v3d_resource_slice *slice = &image->planes[plane].slices[mip_level];
430 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
431 load.buffer_to_load = load_to_color_tlb ?
432 RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
433
434 load.address = v3dv_cl_address(image->planes[plane].mem->bo, layer_offset);
435 load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
436 is_copy_to_buffer,
437 is_copy_from_buffer);
438 load.memory_format = slice->tiling;
439
440 /* When copying depth/stencil images to a buffer, for D24 formats Vulkan
441 * expects the depth value in the LSB bits of each 32-bit pixel.
442 * Unfortunately, the hardware seems to put the S8/X8 bits there and the
443 * depth bits on the MSB. To work around that we can reverse the channel
444 * order and then swap the R/B channels to get what we want.
445 *
446 * NOTE: reversing and swapping only gets us the behavior we want if the
447 * operations happen in that exact order, which seems to be the case when
448 * done on the tile buffer load operations. On the store, it seems the
449 * order is not the same. The order on the store is probably reversed so
450 * that reversing and swapping on both the load and the store preserves
451 * the original order of the channels in memory.
452 *
453 * Notice that we only need to do this when copying to a buffer, where
454 * depth and stencil aspects are copied as separate regions and
455 * the spec expects them to be tightly packed.
456 */
457 bool needs_rb_swap = false;
458 bool needs_chan_reverse = false;
459 if (is_copy_to_buffer &&
460 (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
461 (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
462 (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
463 needs_rb_swap = true;
464 needs_chan_reverse = true;
465 } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
466 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
467 /* This is not a raw data copy (i.e. we are clearing the image),
468 * so we need to make sure we respect the format swizzle.
469 */
470 needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
471 needs_chan_reverse = format_needs_reverse(device, framebuffer->vk_format);
472 }
473
474 load.r_b_swap = needs_rb_swap;
475 load.channel_reverse = needs_chan_reverse;
476
477 if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
478 slice->tiling == V3D_TILING_UIF_XOR) {
479 load.height_in_ub_or_stride =
480 slice->padded_height_of_output_image_in_uif_blocks;
481 } else if (slice->tiling == V3D_TILING_RASTER) {
482 load.height_in_ub_or_stride = slice->stride;
483 }
484
485 if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
486 load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
487 else
488 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
489 }
490 }
491
492 static void
emit_image_store(struct v3dv_device * device,struct v3dv_cl * cl,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_image * image,VkImageAspectFlags aspect,uint32_t layer,uint32_t mip_level,bool is_copy_to_buffer,bool is_copy_from_buffer,bool is_multisample_resolve)493 emit_image_store(struct v3dv_device *device,
494 struct v3dv_cl *cl,
495 struct v3dv_meta_framebuffer *framebuffer,
496 struct v3dv_image *image,
497 VkImageAspectFlags aspect,
498 uint32_t layer,
499 uint32_t mip_level,
500 bool is_copy_to_buffer,
501 bool is_copy_from_buffer,
502 bool is_multisample_resolve)
503 {
504 uint8_t plane = v3dv_plane_from_aspect(aspect);
505 uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer, plane);
506
507 /*
508 * For multi-plane formats we are copying plane by plane to the color
509 * tlb. Framebuffer format was already selected to be a tlb single-plane
510 * compatible format. We still need to use the real plane to get the
511 * address etc.
512 */
513 assert(framebuffer->format->plane_count == 1);
514
515 bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
516 image->format->plane_count > 1 ||
517 aspect == VK_IMAGE_ASPECT_COLOR_BIT;
518
519 const struct v3d_resource_slice *slice = &image->planes[plane].slices[mip_level];
520 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
521 store.buffer_to_store = store_from_color_tlb ?
522 RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
523
524 store.address = v3dv_cl_address(image->planes[plane].mem->bo, layer_offset);
525
526 store.clear_buffer_being_stored = false;
527
528 /* See rationale in emit_image_load() */
529 bool needs_rb_swap = false;
530 bool needs_chan_reverse = false;
531 if (is_copy_from_buffer &&
532 (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
533 (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
534 (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
535 needs_rb_swap = true;
536 needs_chan_reverse = true;
537 } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
538 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
539 needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
540 needs_chan_reverse = format_needs_reverse(device, framebuffer->vk_format);
541 }
542
543 store.r_b_swap = needs_rb_swap;
544 store.channel_reverse = needs_chan_reverse;
545
546 store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
547 is_copy_to_buffer,
548 is_copy_from_buffer);
549 store.memory_format = slice->tiling;
550 if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
551 slice->tiling == V3D_TILING_UIF_XOR) {
552 store.height_in_ub_or_stride =
553 slice->padded_height_of_output_image_in_uif_blocks;
554 } else if (slice->tiling == V3D_TILING_RASTER) {
555 store.height_in_ub_or_stride = slice->stride;
556 }
557
558 if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
559 store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
560 else if (is_multisample_resolve)
561 store.decimate_mode = V3D_DECIMATE_MODE_4X;
562 else
563 store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
564 }
565 }
566
567 static void
emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job * job,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_buffer * buffer,struct v3dv_image * image,uint32_t layer_offset,const VkBufferImageCopy2 * region)568 emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
569 struct v3dv_meta_framebuffer *framebuffer,
570 struct v3dv_buffer *buffer,
571 struct v3dv_image *image,
572 uint32_t layer_offset,
573 const VkBufferImageCopy2 *region)
574 {
575 struct v3dv_cl *cl = &job->indirect;
576 v3dv_cl_ensure_space(cl, 200, 1);
577 v3dv_return_if_oom(NULL, job);
578
579 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
580
581 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
582
583 /* Load image to TLB */
584 assert((image->vk.image_type != VK_IMAGE_TYPE_3D &&
585 layer_offset < image->vk.array_layers) ||
586 layer_offset < image->vk.extent.depth);
587
588 const uint32_t image_layer = image->vk.image_type != VK_IMAGE_TYPE_3D ?
589 region->imageSubresource.baseArrayLayer + layer_offset :
590 region->imageOffset.z + layer_offset;
591
592 emit_image_load(job->device, cl, framebuffer, image,
593 region->imageSubresource.aspectMask,
594 image_layer,
595 region->imageSubresource.mipLevel,
596 true, false);
597
598 cl_emit(cl, END_OF_LOADS, end);
599
600 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
601
602 /* Store TLB to buffer */
603 uint32_t width, height;
604 if (region->bufferRowLength == 0)
605 width = region->imageExtent.width;
606 else
607 width = region->bufferRowLength;
608
609 if (region->bufferImageHeight == 0)
610 height = region->imageExtent.height;
611 else
612 height = region->bufferImageHeight;
613
614 /* Handle copy from compressed format */
615 width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
616 height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
617
618 /* If we are storing stencil from a combined depth/stencil format the
619 * Vulkan spec states that the output buffer must have packed stencil
620 * values, where each stencil value is 1 byte.
621 */
622 uint8_t plane = v3dv_plane_from_aspect(region->imageSubresource.aspectMask);
623 uint32_t cpp =
624 region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
625 1 : image->planes[plane].cpp;
626 uint32_t buffer_stride = width * cpp;
627 uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
628 height * buffer_stride * layer_offset;
629
630 uint32_t format = choose_tlb_format(framebuffer,
631 region->imageSubresource.aspectMask,
632 true, true, false);
633 bool msaa = image->vk.samples > VK_SAMPLE_COUNT_1_BIT;
634
635 emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
636 buffer_offset, buffer_stride, msaa, format);
637
638 cl_emit(cl, END_OF_TILE_MARKER, end);
639
640 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
641
642 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
643 branch.start = tile_list_start;
644 branch.end = v3dv_cl_get_address(cl);
645 }
646 }
647
648 static void
emit_copy_layer_to_buffer(struct v3dv_job * job,struct v3dv_buffer * buffer,struct v3dv_image * image,struct v3dv_meta_framebuffer * framebuffer,uint32_t layer,const VkBufferImageCopy2 * region)649 emit_copy_layer_to_buffer(struct v3dv_job *job,
650 struct v3dv_buffer *buffer,
651 struct v3dv_image *image,
652 struct v3dv_meta_framebuffer *framebuffer,
653 uint32_t layer,
654 const VkBufferImageCopy2 *region)
655 {
656 emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
657 image, layer, region);
658 emit_supertile_coordinates(job, framebuffer);
659 }
660
661 void
v3dX(meta_emit_copy_image_to_buffer_rcl)662 v3dX(meta_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job,
663 struct v3dv_buffer *buffer,
664 struct v3dv_image *image,
665 struct v3dv_meta_framebuffer *framebuffer,
666 const VkBufferImageCopy2 *region)
667 {
668 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
669 v3dv_return_if_oom(NULL, job);
670
671 emit_frame_setup(job, 0, NULL);
672 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
673 emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
674 cl_emit(rcl, END_OF_RENDERING, end);
675 }
676
677 static void
emit_resolve_image_layer_per_tile_list(struct v3dv_job * job,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_image * dst,struct v3dv_image * src,uint32_t layer_offset,const VkImageResolve2 * region)678 emit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
679 struct v3dv_meta_framebuffer *framebuffer,
680 struct v3dv_image *dst,
681 struct v3dv_image *src,
682 uint32_t layer_offset,
683 const VkImageResolve2 *region)
684 {
685 struct v3dv_cl *cl = &job->indirect;
686 v3dv_cl_ensure_space(cl, 200, 1);
687 v3dv_return_if_oom(NULL, job);
688
689 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
690
691 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
692
693 assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
694 layer_offset < src->vk.array_layers) ||
695 layer_offset < src->vk.extent.depth);
696
697 const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
698 region->srcSubresource.baseArrayLayer + layer_offset :
699 region->srcOffset.z + layer_offset;
700
701 emit_image_load(job->device, cl, framebuffer, src,
702 region->srcSubresource.aspectMask,
703 src_layer,
704 region->srcSubresource.mipLevel,
705 false, false);
706
707 cl_emit(cl, END_OF_LOADS, end);
708
709 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
710
711 assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
712 layer_offset < dst->vk.array_layers) ||
713 layer_offset < dst->vk.extent.depth);
714
715 const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
716 region->dstSubresource.baseArrayLayer + layer_offset :
717 region->dstOffset.z + layer_offset;
718
719 bool is_depth_or_stencil =
720 region->dstSubresource.aspectMask &
721 (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
722 emit_image_store(job->device, cl, framebuffer, dst,
723 region->dstSubresource.aspectMask,
724 dst_layer,
725 region->dstSubresource.mipLevel,
726 false, false, !is_depth_or_stencil);
727
728 cl_emit(cl, END_OF_TILE_MARKER, end);
729
730 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
731
732 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
733 branch.start = tile_list_start;
734 branch.end = v3dv_cl_get_address(cl);
735 }
736 }
737
738 static void
emit_resolve_image_layer(struct v3dv_job * job,struct v3dv_image * dst,struct v3dv_image * src,struct v3dv_meta_framebuffer * framebuffer,uint32_t layer,const VkImageResolve2 * region)739 emit_resolve_image_layer(struct v3dv_job *job,
740 struct v3dv_image *dst,
741 struct v3dv_image *src,
742 struct v3dv_meta_framebuffer *framebuffer,
743 uint32_t layer,
744 const VkImageResolve2 *region)
745 {
746 emit_resolve_image_layer_per_tile_list(job, framebuffer,
747 dst, src, layer, region);
748 emit_supertile_coordinates(job, framebuffer);
749 }
750
751 void
v3dX(meta_emit_resolve_image_rcl)752 v3dX(meta_emit_resolve_image_rcl)(struct v3dv_job *job,
753 struct v3dv_image *dst,
754 struct v3dv_image *src,
755 struct v3dv_meta_framebuffer *framebuffer,
756 const VkImageResolve2 *region)
757 {
758 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
759 v3dv_return_if_oom(NULL, job);
760
761 emit_frame_setup(job, 0, NULL);
762 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
763 emit_resolve_image_layer(job, dst, src, framebuffer, layer, region);
764 cl_emit(rcl, END_OF_RENDERING, end);
765 }
766
767 static void
emit_copy_buffer_per_tile_list(struct v3dv_job * job,struct v3dv_bo * dst,struct v3dv_bo * src,uint32_t dst_offset,uint32_t src_offset,uint32_t stride,uint32_t format)768 emit_copy_buffer_per_tile_list(struct v3dv_job *job,
769 struct v3dv_bo *dst,
770 struct v3dv_bo *src,
771 uint32_t dst_offset,
772 uint32_t src_offset,
773 uint32_t stride,
774 uint32_t format)
775 {
776 struct v3dv_cl *cl = &job->indirect;
777 v3dv_cl_ensure_space(cl, 200, 1);
778 v3dv_return_if_oom(NULL, job);
779
780 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
781
782 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
783
784 emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format);
785
786 cl_emit(cl, END_OF_LOADS, end);
787
788 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
789
790 emit_linear_store(cl, RENDER_TARGET_0,
791 dst, dst_offset, stride, false, format);
792
793 cl_emit(cl, END_OF_TILE_MARKER, end);
794
795 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
796
797 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
798 branch.start = tile_list_start;
799 branch.end = v3dv_cl_get_address(cl);
800 }
801 }
802
803 void
v3dX(meta_emit_copy_buffer)804 v3dX(meta_emit_copy_buffer)(struct v3dv_job *job,
805 struct v3dv_bo *dst,
806 struct v3dv_bo *src,
807 uint32_t dst_offset,
808 uint32_t src_offset,
809 struct v3dv_meta_framebuffer *framebuffer,
810 uint32_t format,
811 uint32_t item_size)
812 {
813 const uint32_t stride = job->frame_tiling.width * item_size;
814 emit_copy_buffer_per_tile_list(job, dst, src,
815 dst_offset, src_offset,
816 stride, format);
817 emit_supertile_coordinates(job, framebuffer);
818 }
819
820 void
v3dX(meta_emit_copy_buffer_rcl)821 v3dX(meta_emit_copy_buffer_rcl)(struct v3dv_job *job,
822 struct v3dv_bo *dst,
823 struct v3dv_bo *src,
824 uint32_t dst_offset,
825 uint32_t src_offset,
826 struct v3dv_meta_framebuffer *framebuffer,
827 uint32_t format,
828 uint32_t item_size)
829 {
830 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
831 v3dv_return_if_oom(NULL, job);
832
833 emit_frame_setup(job, 0, NULL);
834
835 v3dX(meta_emit_copy_buffer)(job, dst, src, dst_offset, src_offset,
836 framebuffer, format, item_size);
837
838 cl_emit(rcl, END_OF_RENDERING, end);
839 }
840
841 static void
emit_copy_image_layer_per_tile_list(struct v3dv_job * job,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_image * dst,struct v3dv_image * src,uint32_t layer_offset,const VkImageCopy2 * region)842 emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
843 struct v3dv_meta_framebuffer *framebuffer,
844 struct v3dv_image *dst,
845 struct v3dv_image *src,
846 uint32_t layer_offset,
847 const VkImageCopy2 *region)
848 {
849 struct v3dv_cl *cl = &job->indirect;
850 v3dv_cl_ensure_space(cl, 200, 1);
851 v3dv_return_if_oom(NULL, job);
852
853 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
854
855 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
856
857 assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
858 layer_offset < src->vk.array_layers) ||
859 layer_offset < src->vk.extent.depth);
860
861 const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
862 region->srcSubresource.baseArrayLayer + layer_offset :
863 region->srcOffset.z + layer_offset;
864
865 emit_image_load(job->device, cl, framebuffer, src,
866 region->srcSubresource.aspectMask,
867 src_layer,
868 region->srcSubresource.mipLevel,
869 false, false);
870
871 cl_emit(cl, END_OF_LOADS, end);
872
873 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
874
875 assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
876 layer_offset < dst->vk.array_layers) ||
877 layer_offset < dst->vk.extent.depth);
878
879 const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
880 region->dstSubresource.baseArrayLayer + layer_offset :
881 region->dstOffset.z + layer_offset;
882
883 emit_image_store(job->device, cl, framebuffer, dst,
884 region->dstSubresource.aspectMask,
885 dst_layer,
886 region->dstSubresource.mipLevel,
887 false, false, false);
888
889 cl_emit(cl, END_OF_TILE_MARKER, end);
890
891 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
892
893 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
894 branch.start = tile_list_start;
895 branch.end = v3dv_cl_get_address(cl);
896 }
897 }
898
899 static void
emit_copy_image_layer(struct v3dv_job * job,struct v3dv_image * dst,struct v3dv_image * src,struct v3dv_meta_framebuffer * framebuffer,uint32_t layer,const VkImageCopy2 * region)900 emit_copy_image_layer(struct v3dv_job *job,
901 struct v3dv_image *dst,
902 struct v3dv_image *src,
903 struct v3dv_meta_framebuffer *framebuffer,
904 uint32_t layer,
905 const VkImageCopy2 *region)
906 {
907 emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region);
908 emit_supertile_coordinates(job, framebuffer);
909 }
910
911 void
v3dX(meta_emit_copy_image_rcl)912 v3dX(meta_emit_copy_image_rcl)(struct v3dv_job *job,
913 struct v3dv_image *dst,
914 struct v3dv_image *src,
915 struct v3dv_meta_framebuffer *framebuffer,
916 const VkImageCopy2 *region)
917 {
918 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
919 v3dv_return_if_oom(NULL, job);
920
921 emit_frame_setup(job, 0, NULL);
922 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
923 emit_copy_image_layer(job, dst, src, framebuffer, layer, region);
924 cl_emit(rcl, END_OF_RENDERING, end);
925 }
926
927 void
v3dX(meta_emit_tfu_job)928 v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
929 uint32_t dst_bo_handle,
930 uint32_t dst_offset,
931 enum v3d_tiling_mode dst_tiling,
932 uint32_t dst_padded_height_or_stride,
933 uint32_t dst_cpp,
934 uint32_t src_bo_handle,
935 uint32_t src_offset,
936 enum v3d_tiling_mode src_tiling,
937 uint32_t src_padded_height_or_stride,
938 uint32_t src_cpp,
939 uint32_t width,
940 uint32_t height,
941 const struct v3dv_format_plane *format_plane)
942 {
943 struct drm_v3d_submit_tfu tfu = {
944 .ios = (height << 16) | width,
945 .bo_handles = {
946 dst_bo_handle,
947 src_bo_handle != dst_bo_handle ? src_bo_handle : 0
948 },
949 };
950
951 tfu.iia |= src_offset;
952
953 #if V3D_VERSION <= 42
954 if (src_tiling == V3D_TILING_RASTER) {
955 tfu.icfg = V3D33_TFU_ICFG_FORMAT_RASTER << V3D33_TFU_ICFG_FORMAT_SHIFT;
956 } else {
957 tfu.icfg = (V3D33_TFU_ICFG_FORMAT_LINEARTILE +
958 (src_tiling - V3D_TILING_LINEARTILE)) <<
959 V3D33_TFU_ICFG_FORMAT_SHIFT;
960 }
961 tfu.icfg |= format_plane->tex_type << V3D33_TFU_ICFG_TTYPE_SHIFT;
962 #endif
963 #if V3D_VERSION >= 71
964 if (src_tiling == V3D_TILING_RASTER) {
965 tfu.icfg = V3D71_TFU_ICFG_FORMAT_RASTER << V3D71_TFU_ICFG_IFORMAT_SHIFT;
966 } else {
967 tfu.icfg = (V3D71_TFU_ICFG_FORMAT_LINEARTILE +
968 (src_tiling - V3D_TILING_LINEARTILE)) <<
969 V3D71_TFU_ICFG_IFORMAT_SHIFT;
970 }
971 tfu.icfg |= format_plane->tex_type << V3D71_TFU_ICFG_OTYPE_SHIFT;
972 #endif
973
974 tfu.ioa = dst_offset;
975
976 #if V3D_VERSION <= 42
977 tfu.ioa |= (V3D33_TFU_IOA_FORMAT_LINEARTILE +
978 (dst_tiling - V3D_TILING_LINEARTILE)) <<
979 V3D33_TFU_IOA_FORMAT_SHIFT;
980 #endif
981
982 #if V3D_VERSION >= 71
983 tfu.v71.ioc = (V3D71_TFU_IOC_FORMAT_LINEARTILE +
984 (dst_tiling - V3D_TILING_LINEARTILE)) <<
985 V3D71_TFU_IOC_FORMAT_SHIFT;
986
987 switch (dst_tiling) {
988 case V3D_TILING_UIF_NO_XOR:
989 case V3D_TILING_UIF_XOR:
990 tfu.v71.ioc |=
991 (dst_padded_height_or_stride / (2 * v3d_utile_height(dst_cpp))) <<
992 V3D71_TFU_IOC_STRIDE_SHIFT;
993 break;
994 case V3D_TILING_RASTER:
995 tfu.v71.ioc |= (dst_padded_height_or_stride / dst_cpp) <<
996 V3D71_TFU_IOC_STRIDE_SHIFT;
997 break;
998 default:
999 break;
1000 }
1001 #endif
1002
1003 switch (src_tiling) {
1004 case V3D_TILING_UIF_NO_XOR:
1005 case V3D_TILING_UIF_XOR:
1006 tfu.iis |= src_padded_height_or_stride / (2 * v3d_utile_height(src_cpp));
1007 break;
1008 case V3D_TILING_RASTER:
1009 tfu.iis |= src_padded_height_or_stride / src_cpp;
1010 break;
1011 default:
1012 break;
1013 }
1014
1015 /* The TFU can handle raster sources but always produces UIF results */
1016 assert(dst_tiling != V3D_TILING_RASTER);
1017
1018 #if V3D_VERSION <= 42
1019 /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
1020 * OPAD field for the destination (how many extra UIF blocks beyond
1021 * those necessary to cover the height).
1022 */
1023 if (dst_tiling == V3D_TILING_UIF_NO_XOR || dst_tiling == V3D_TILING_UIF_XOR) {
1024 uint32_t uif_block_h = 2 * v3d_utile_height(dst_cpp);
1025 uint32_t implicit_padded_height = align(height, uif_block_h);
1026 uint32_t icfg = (dst_padded_height_or_stride - implicit_padded_height) /
1027 uif_block_h;
1028 tfu.icfg |= icfg << V3D33_TFU_ICFG_OPAD_SHIFT;
1029 }
1030 #endif
1031
1032 v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
1033 }
1034
1035 static void
emit_clear_image_layer_per_tile_list(struct v3dv_job * job,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_image * image,VkImageAspectFlags aspects,uint32_t layer,uint32_t level)1036 emit_clear_image_layer_per_tile_list(struct v3dv_job *job,
1037 struct v3dv_meta_framebuffer *framebuffer,
1038 struct v3dv_image *image,
1039 VkImageAspectFlags aspects,
1040 uint32_t layer,
1041 uint32_t level)
1042 {
1043 struct v3dv_cl *cl = &job->indirect;
1044 v3dv_cl_ensure_space(cl, 200, 1);
1045 v3dv_return_if_oom(NULL, job);
1046
1047 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
1048
1049 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
1050
1051 cl_emit(cl, END_OF_LOADS, end);
1052
1053 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
1054
1055 emit_image_store(job->device, cl, framebuffer, image, aspects,
1056 layer, level, false, false, false);
1057
1058 cl_emit(cl, END_OF_TILE_MARKER, end);
1059
1060 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1061
1062 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1063 branch.start = tile_list_start;
1064 branch.end = v3dv_cl_get_address(cl);
1065 }
1066 }
1067
1068 static void
emit_clear_image_layers(struct v3dv_job * job,struct v3dv_image * image,struct v3dv_meta_framebuffer * framebuffer,VkImageAspectFlags aspects,uint32_t min_layer,uint32_t max_layer,uint32_t level)1069 emit_clear_image_layers(struct v3dv_job *job,
1070 struct v3dv_image *image,
1071 struct v3dv_meta_framebuffer *framebuffer,
1072 VkImageAspectFlags aspects,
1073 uint32_t min_layer,
1074 uint32_t max_layer,
1075 uint32_t level)
1076 {
1077 for (uint32_t layer = min_layer; layer < max_layer; layer++) {
1078 emit_clear_image_layer_per_tile_list(job, framebuffer, image, aspects,
1079 layer, level);
1080 emit_supertile_coordinates(job, framebuffer);
1081 }
1082 }
1083
1084 void
v3dX(meta_emit_clear_image_rcl)1085 v3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job,
1086 struct v3dv_image *image,
1087 struct v3dv_meta_framebuffer *framebuffer,
1088 const union v3dv_clear_value *clear_value,
1089 VkImageAspectFlags aspects,
1090 uint32_t min_layer,
1091 uint32_t max_layer,
1092 uint32_t level)
1093 {
1094 const struct rcl_clear_info clear_info = {
1095 .clear_value = clear_value,
1096 .image = image,
1097 .aspects = aspects,
1098 .level = level,
1099 };
1100
1101 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
1102 v3dv_return_if_oom(NULL, job);
1103
1104 emit_frame_setup(job, 0, clear_value);
1105 emit_clear_image_layers(job, image, framebuffer, aspects,
1106 min_layer, max_layer, level);
1107 cl_emit(rcl, END_OF_RENDERING, end);
1108 }
1109
1110 static void
emit_fill_buffer_per_tile_list(struct v3dv_job * job,struct v3dv_bo * bo,uint32_t offset,uint32_t stride)1111 emit_fill_buffer_per_tile_list(struct v3dv_job *job,
1112 struct v3dv_bo *bo,
1113 uint32_t offset,
1114 uint32_t stride)
1115 {
1116 struct v3dv_cl *cl = &job->indirect;
1117 v3dv_cl_ensure_space(cl, 200, 1);
1118 v3dv_return_if_oom(NULL, job);
1119
1120 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
1121
1122 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
1123
1124 cl_emit(cl, END_OF_LOADS, end);
1125
1126 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
1127
1128 emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false,
1129 V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI);
1130
1131 cl_emit(cl, END_OF_TILE_MARKER, end);
1132
1133 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1134
1135 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1136 branch.start = tile_list_start;
1137 branch.end = v3dv_cl_get_address(cl);
1138 }
1139 }
1140
1141 static void
emit_fill_buffer(struct v3dv_job * job,struct v3dv_bo * bo,uint32_t offset,struct v3dv_meta_framebuffer * framebuffer)1142 emit_fill_buffer(struct v3dv_job *job,
1143 struct v3dv_bo *bo,
1144 uint32_t offset,
1145 struct v3dv_meta_framebuffer *framebuffer)
1146 {
1147 const uint32_t stride = job->frame_tiling.width * 4;
1148 emit_fill_buffer_per_tile_list(job, bo, offset, stride);
1149 emit_supertile_coordinates(job, framebuffer);
1150 }
1151
1152 void
v3dX(meta_emit_fill_buffer_rcl)1153 v3dX(meta_emit_fill_buffer_rcl)(struct v3dv_job *job,
1154 struct v3dv_bo *bo,
1155 uint32_t offset,
1156 struct v3dv_meta_framebuffer *framebuffer,
1157 uint32_t data)
1158 {
1159 const union v3dv_clear_value clear_value = {
1160 .color = { data, 0, 0, 0 },
1161 };
1162
1163 const struct rcl_clear_info clear_info = {
1164 .clear_value = &clear_value,
1165 .image = NULL,
1166 .aspects = VK_IMAGE_ASPECT_COLOR_BIT,
1167 .level = 0,
1168 };
1169
1170 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
1171 v3dv_return_if_oom(NULL, job);
1172
1173 emit_frame_setup(job, 0, &clear_value);
1174 emit_fill_buffer(job, bo, offset, framebuffer);
1175 cl_emit(rcl, END_OF_RENDERING, end);
1176 }
1177
1178
1179 static void
emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job * job,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_image * image,struct v3dv_buffer * buffer,uint32_t layer,const VkBufferImageCopy2 * region)1180 emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job,
1181 struct v3dv_meta_framebuffer *framebuffer,
1182 struct v3dv_image *image,
1183 struct v3dv_buffer *buffer,
1184 uint32_t layer,
1185 const VkBufferImageCopy2 *region)
1186 {
1187 struct v3dv_cl *cl = &job->indirect;
1188 v3dv_cl_ensure_space(cl, 200, 1);
1189 v3dv_return_if_oom(NULL, job);
1190
1191 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
1192
1193 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
1194
1195 assert((image->vk.image_type != VK_IMAGE_TYPE_3D && layer < image->vk.array_layers) ||
1196 layer < image->vk.extent.depth);
1197
1198 /* Load TLB from buffer */
1199 uint32_t width, height;
1200 if (region->bufferRowLength == 0)
1201 width = region->imageExtent.width;
1202 else
1203 width = region->bufferRowLength;
1204
1205 if (region->bufferImageHeight == 0)
1206 height = region->imageExtent.height;
1207 else
1208 height = region->bufferImageHeight;
1209
1210 /* Handle copy to compressed format using a compatible format */
1211 width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
1212 height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
1213
1214 const VkImageSubresourceLayers *imgrsc = ®ion->imageSubresource;
1215 uint8_t plane = v3dv_plane_from_aspect(imgrsc->aspectMask);
1216 uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
1217 1 : image->planes[plane].cpp;
1218 uint32_t buffer_stride = width * cpp;
1219 uint32_t buffer_offset =
1220 buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
1221
1222 uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
1223 false, false, true);
1224
1225 uint32_t image_layer = layer + (image->vk.image_type != VK_IMAGE_TYPE_3D ?
1226 imgrsc->baseArrayLayer : region->imageOffset.z);
1227
1228 emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
1229 buffer_offset, buffer_stride, format);
1230
1231 /* Because we can't do raster loads/stores of Z/S formats we need to
1232 * use a color tile buffer with a compatible RGBA color format instead.
1233 * However, when we are uploading a single aspect to a combined
1234 * depth/stencil image we have the problem that our tile buffer stores don't
1235 * allow us to mask out the other aspect, so we always write all four RGBA
1236 * channels to the image and we end up overwriting that other aspect with
1237 * undefined values. To work around that, we first load the aspect we are
1238 * not copying from the image memory into a proper Z/S tile buffer. Then we
1239 * do our store from the color buffer for the aspect we are copying, and
1240 * after that, we do another store from the Z/S tile buffer to restore the
1241 * other aspect to its original value.
1242 */
1243 if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
1244 if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1245 emit_image_load(job->device, cl, framebuffer, image,
1246 VK_IMAGE_ASPECT_STENCIL_BIT,
1247 image_layer, imgrsc->mipLevel,
1248 false, false);
1249 } else {
1250 assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
1251 emit_image_load(job->device, cl, framebuffer, image,
1252 VK_IMAGE_ASPECT_DEPTH_BIT,
1253 image_layer, imgrsc->mipLevel,
1254 false, false);
1255 }
1256 }
1257
1258 cl_emit(cl, END_OF_LOADS, end);
1259
1260 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
1261
1262 /* Store TLB to image */
1263 emit_image_store(job->device, cl, framebuffer, image, imgrsc->aspectMask,
1264 image_layer, imgrsc->mipLevel,
1265 false, true, false);
1266
1267 if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
1268 if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1269 emit_image_store(job->device, cl, framebuffer, image,
1270 VK_IMAGE_ASPECT_STENCIL_BIT,
1271 image_layer, imgrsc->mipLevel,
1272 false, false, false);
1273 } else {
1274 assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
1275 emit_image_store(job->device, cl, framebuffer, image,
1276 VK_IMAGE_ASPECT_DEPTH_BIT,
1277 image_layer, imgrsc->mipLevel,
1278 false, false, false);
1279 }
1280 }
1281
1282 cl_emit(cl, END_OF_TILE_MARKER, end);
1283
1284 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1285
1286 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1287 branch.start = tile_list_start;
1288 branch.end = v3dv_cl_get_address(cl);
1289 }
1290 }
1291
1292 static void
emit_copy_buffer_to_layer(struct v3dv_job * job,struct v3dv_image * image,struct v3dv_buffer * buffer,struct v3dv_meta_framebuffer * framebuffer,uint32_t layer,const VkBufferImageCopy2 * region)1293 emit_copy_buffer_to_layer(struct v3dv_job *job,
1294 struct v3dv_image *image,
1295 struct v3dv_buffer *buffer,
1296 struct v3dv_meta_framebuffer *framebuffer,
1297 uint32_t layer,
1298 const VkBufferImageCopy2 *region)
1299 {
1300 emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer,
1301 layer, region);
1302 emit_supertile_coordinates(job, framebuffer);
1303 }
1304
1305 void
v3dX(meta_emit_copy_buffer_to_image_rcl)1306 v3dX(meta_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job,
1307 struct v3dv_image *image,
1308 struct v3dv_buffer *buffer,
1309 struct v3dv_meta_framebuffer *framebuffer,
1310 const VkBufferImageCopy2 *region)
1311 {
1312 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
1313 v3dv_return_if_oom(NULL, job);
1314
1315 emit_frame_setup(job, 0, NULL);
1316 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
1317 emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region);
1318 cl_emit(rcl, END_OF_RENDERING, end);
1319 }
1320
1321 /* Figure out a TLB size configuration for a number of pixels to process.
1322 * Beware that we can't "render" more than MAX_DIMxMAX_DIM pixels in a single
1323 * job, if the pixel count is larger than this, the caller might need to split
1324 * the job and call this function multiple times.
1325 */
1326 static void
framebuffer_size_for_pixel_count(uint32_t num_pixels,uint32_t * width,uint32_t * height)1327 framebuffer_size_for_pixel_count(uint32_t num_pixels,
1328 uint32_t *width,
1329 uint32_t *height)
1330 {
1331 assert(num_pixels > 0);
1332
1333 const uint32_t max_dim_pixels = V3D_MAX_IMAGE_DIMENSION;
1334 const uint32_t max_pixels = max_dim_pixels * max_dim_pixels;
1335
1336 uint32_t w, h;
1337 if (num_pixels > max_pixels) {
1338 w = max_dim_pixels;
1339 h = max_dim_pixels;
1340 } else {
1341 w = num_pixels;
1342 h = 1;
1343 while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) {
1344 w >>= 1;
1345 h <<= 1;
1346 }
1347 }
1348 assert(w <= max_dim_pixels && h <= max_dim_pixels);
1349 assert(w * h <= num_pixels);
1350 assert(w > 0 && h > 0);
1351
1352 *width = w;
1353 *height = h;
1354 }
1355
1356 struct v3dv_job *
v3dX(meta_copy_buffer)1357 v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
1358 struct v3dv_bo *dst,
1359 uint32_t dst_offset,
1360 struct v3dv_bo *src,
1361 uint32_t src_offset,
1362 const VkBufferCopy2 *region)
1363 {
1364 const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
1365 const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
1366
1367 /* Select appropriate pixel format for the copy operation based on the
1368 * size to copy and the alignment of the source and destination offsets.
1369 */
1370 src_offset += region->srcOffset;
1371 dst_offset += region->dstOffset;
1372 uint32_t item_size = 4;
1373 while (item_size > 1 &&
1374 (src_offset % item_size != 0 || dst_offset % item_size != 0)) {
1375 item_size /= 2;
1376 }
1377
1378 while (item_size > 1 && region->size % item_size != 0)
1379 item_size /= 2;
1380
1381 assert(region->size % item_size == 0);
1382 uint32_t num_items = region->size / item_size;
1383 assert(num_items > 0);
1384
1385 uint32_t format;
1386 VkFormat vk_format;
1387 switch (item_size) {
1388 case 4:
1389 format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
1390 vk_format = VK_FORMAT_R8G8B8A8_UINT;
1391 break;
1392 case 2:
1393 format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI;
1394 vk_format = VK_FORMAT_R8G8_UINT;
1395 break;
1396 default:
1397 format = V3D_OUTPUT_IMAGE_FORMAT_R8UI;
1398 vk_format = VK_FORMAT_R8_UINT;
1399 break;
1400 }
1401
1402 struct v3dv_job *job = NULL;
1403 while (num_items > 0) {
1404 job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1405 if (!job)
1406 return NULL;
1407
1408 uint32_t width, height;
1409 framebuffer_size_for_pixel_count(num_items, &width, &height);
1410
1411 v3dv_job_start_frame(job, width, height, 1, true, true, 1,
1412 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
1413 false);
1414
1415 struct v3dv_meta_framebuffer framebuffer;
1416 v3dX(meta_framebuffer_init)(&framebuffer, vk_format, internal_type,
1417 &job->frame_tiling);
1418
1419 v3dX(job_emit_binning_flush)(job);
1420
1421 v3dX(meta_emit_copy_buffer_rcl)(job, dst, src, dst_offset, src_offset,
1422 &framebuffer, format, item_size);
1423
1424 v3dv_cmd_buffer_finish_job(cmd_buffer);
1425
1426 const uint32_t items_copied = width * height;
1427 const uint32_t bytes_copied = items_copied * item_size;
1428 num_items -= items_copied;
1429 src_offset += bytes_copied;
1430 dst_offset += bytes_copied;
1431 }
1432
1433 return job;
1434 }
1435
1436 void
v3dX(meta_fill_buffer)1437 v3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
1438 struct v3dv_bo *bo,
1439 uint32_t offset,
1440 uint32_t size,
1441 uint32_t data)
1442 {
1443 assert(size > 0 && size % 4 == 0);
1444 assert(offset + size <= bo->size);
1445
1446 const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
1447 const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
1448 uint32_t num_items = size / 4;
1449
1450 while (num_items > 0) {
1451 struct v3dv_job *job =
1452 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1453 if (!job)
1454 return;
1455
1456 uint32_t width, height;
1457 framebuffer_size_for_pixel_count(num_items, &width, &height);
1458
1459 v3dv_job_start_frame(job, width, height, 1, true, true, 1,
1460 internal_bpp, 4 * v3d_internal_bpp_words(internal_bpp),
1461 false);
1462
1463 struct v3dv_meta_framebuffer framebuffer;
1464 v3dX(meta_framebuffer_init)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
1465 internal_type, &job->frame_tiling);
1466
1467 v3dX(job_emit_binning_flush)(job);
1468
1469 v3dX(meta_emit_fill_buffer_rcl)(job, bo, offset, &framebuffer, data);
1470
1471 v3dv_cmd_buffer_finish_job(cmd_buffer);
1472
1473 const uint32_t items_copied = width * height;
1474 const uint32_t bytes_copied = items_copied * 4;
1475 num_items -= items_copied;
1476 offset += bytes_copied;
1477 }
1478 }
1479
1480 void
v3dX(meta_framebuffer_init)1481 v3dX(meta_framebuffer_init)(struct v3dv_meta_framebuffer *fb,
1482 VkFormat vk_format,
1483 uint32_t internal_type,
1484 const struct v3dv_frame_tiling *tiling)
1485 {
1486 fb->internal_type = internal_type;
1487
1488 /* Supertile coverage always starts at 0,0 */
1489 uint32_t supertile_w_in_pixels =
1490 tiling->tile_width * tiling->supertile_width;
1491 uint32_t supertile_h_in_pixels =
1492 tiling->tile_height * tiling->supertile_height;
1493
1494 fb->min_x_supertile = 0;
1495 fb->min_y_supertile = 0;
1496 fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
1497 fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;
1498
1499 fb->vk_format = vk_format;
1500 fb->format = v3dX(get_format)(vk_format);
1501
1502 fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
1503 if (vk_format_is_depth_or_stencil(vk_format))
1504 fb->internal_depth_type = v3dX(get_internal_depth_type)(vk_format);
1505 }
1506