1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <string.h>
29 #include <vulkan/vulkan.h>
30
31 #include "pvr_csb.h"
32 #include "pvr_csb_enum_helpers.h"
33 #include "pvr_formats.h"
34 #include "pvr_job_common.h"
35 #include "pvr_job_context.h"
36 #include "pvr_job_transfer.h"
37 #include "pvr_private.h"
38 #include "pvr_tex_state.h"
39 #include "pvr_transfer_frag_store.h"
40 #include "pvr_types.h"
41 #include "pvr_uscgen.h"
42 #include "pvr_util.h"
43 #include "pvr_winsys.h"
44 #include "util/bitscan.h"
45 #include "util/list.h"
46 #include "util/macros.h"
47 #include "util/u_math.h"
48 #include "util/xxhash.h"
49 #include "vk_format.h"
50 #include "vk_log.h"
51 #include "vk_sync.h"
52
53 #define PVR_TRANSFER_MAX_PASSES 10U
54 #define PVR_TRANSFER_MAX_CLIP_RECTS 4U
55 #define PVR_TRANSFER_MAX_PREPARES_PER_SUBMIT 16U
56 #define PVR_TRANSFER_MAX_CUSTOM_RECTS 3U
57
58 /* Number of triangles sent to the TSP per raster. */
59 #define PVR_TRANSFER_NUM_LAYERS 1U
60
61 #define PVR_MAX_WIDTH 16384
62 #define PVR_MAX_HEIGHT 16384
63
64 #define PVR_MAX_CLIP_SIZE(dev_info) \
65 (PVR_HAS_FEATURE(dev_info, screen_size8K) ? 8192U : 16384U)
66
67 enum pvr_paired_tiles {
68 PVR_PAIRED_TILES_NONE,
69 PVR_PAIRED_TILES_X,
70 PVR_PAIRED_TILES_Y
71 };
72
73 struct pvr_transfer_wa_source {
74 uint32_t src_offset;
75 uint32_t mapping_count;
76 struct pvr_rect_mapping mappings[PVR_TRANSFER_MAX_CUSTOM_MAPPINGS];
77 bool extend_height;
78 };
79
80 struct pvr_transfer_pass {
81 uint32_t dst_offset;
82
83 uint32_t source_count;
84 struct pvr_transfer_wa_source sources[PVR_TRANSFER_MAX_SOURCES];
85
86 uint32_t clip_rects_count;
87 VkRect2D clip_rects[PVR_TRANSFER_MAX_CLIP_RECTS];
88 };
89
90 /* Structure representing a layer iteration. */
91 struct pvr_transfer_custom_mapping {
92 bool double_stride;
93 uint32_t texel_unwind_src;
94 uint32_t texel_unwind_dst;
95 uint32_t texel_extend_src;
96 uint32_t texel_extend_dst;
97 uint32_t pass_count;
98 struct pvr_transfer_pass passes[PVR_TRANSFER_MAX_PASSES];
99 uint32_t max_clip_rects;
100 int32_t max_clip_size;
101 };
102
103 struct pvr_transfer_3d_iteration {
104 uint32_t texture_coords[12];
105 };
106
107 struct pvr_transfer_3d_state {
108 struct pvr_winsys_transfer_regs regs;
109
110 bool empty_dst;
111 bool down_scale;
112 /* Write all channels present in the dst from the USC even if those are
113 * constants.
114 */
115 bool dont_force_pbe;
116
117 /* The rate of the shader. */
118 uint32_t msaa_multiplier;
119 /* Top left corner of the render in ISP tiles. */
120 uint32_t origin_x_in_tiles;
121 /* Top left corner of the render in ISP tiles. */
122 uint32_t origin_y_in_tiles;
123 /* Width of the render in ISP tiles. */
124 uint32_t width_in_tiles;
125 /* Height of the render in ISP tiles. */
126 uint32_t height_in_tiles;
127
128 /* Width of a sample in registers (pixel partition width). */
129 uint32_t usc_pixel_width;
130
131 /* Properties of the USC shader. */
132 struct pvr_tq_shader_properties shader_props;
133
134 /* TODO: Use pvr_dev_addr_t of an offset type for these. */
135 uint32_t pds_shader_task_offset;
136 uint32_t tex_state_data_offset;
137 uint32_t uni_tex_code_offset;
138
139 uint32_t uniform_data_size;
140 uint32_t tex_state_data_size;
141 uint32_t usc_coeff_regs;
142
143 /* Pointer into the common store. */
144 uint32_t common_ptr;
145 /* Pointer into the dynamic constant reg buffer. */
146 uint32_t dynamic_const_reg_ptr;
147 /* Pointer into the USC constant reg buffer. */
148 uint32_t usc_const_reg_ptr;
149
150 uint32_t pds_coeff_task_offset;
151 uint32_t coeff_data_size;
152
153 /* Number of temporary 32bit registers used by PDS. */
154 uint32_t pds_temps;
155
156 struct pvr_transfer_custom_mapping custom_mapping;
157 uint32_t pass_idx;
158
159 enum pvr_filter filter[PVR_TRANSFER_MAX_SOURCES];
160 bool custom_filter;
161
162 enum pvr_paired_tiles pair_tiles;
163 };
164
165 struct pvr_transfer_prep_data {
166 struct pvr_winsys_transfer_cmd_flags flags;
167 struct pvr_transfer_3d_state state;
168 };
169
170 struct pvr_transfer_submit {
171 uint32_t prep_count;
172 struct pvr_transfer_prep_data
173 prep_array[PVR_TRANSFER_MAX_PREPARES_PER_SUBMIT];
174 };
175
pvr_pbe_src_format_raw(VkFormat format)176 static enum pvr_transfer_pbe_pixel_src pvr_pbe_src_format_raw(VkFormat format)
177 {
178 uint32_t bpp = vk_format_get_blocksizebits(format);
179
180 if (bpp <= 32U)
181 return PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
182 else if (bpp <= 64U)
183 return PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
184
185 return PVR_TRANSFER_PBE_PIXEL_SRC_RAW128;
186 }
187
pvr_pbe_src_format_pick_depth(const VkFormat src_format,const VkFormat dst_format,enum pvr_transfer_pbe_pixel_src * const src_format_out)188 static VkResult pvr_pbe_src_format_pick_depth(
189 const VkFormat src_format,
190 const VkFormat dst_format,
191 enum pvr_transfer_pbe_pixel_src *const src_format_out)
192 {
193 if (dst_format != VK_FORMAT_D24_UNORM_S8_UINT)
194 return VK_ERROR_FORMAT_NOT_SUPPORTED;
195
196 switch (src_format) {
197 case VK_FORMAT_D24_UNORM_S8_UINT:
198 case VK_FORMAT_X8_D24_UNORM_PACK32:
199 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8;
200 break;
201
202 case VK_FORMAT_D32_SFLOAT:
203 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8;
204 break;
205
206 default:
207 return VK_ERROR_FORMAT_NOT_SUPPORTED;
208 }
209
210 return VK_SUCCESS;
211 }
212
pvr_pbe_src_format_pick_stencil(const VkFormat src_format,const VkFormat dst_format,enum pvr_transfer_pbe_pixel_src * const src_format_out)213 static VkResult pvr_pbe_src_format_pick_stencil(
214 const VkFormat src_format,
215 const VkFormat dst_format,
216 enum pvr_transfer_pbe_pixel_src *const src_format_out)
217 {
218 if ((src_format != VK_FORMAT_D24_UNORM_S8_UINT &&
219 src_format != VK_FORMAT_S8_UINT) ||
220 dst_format != VK_FORMAT_D24_UNORM_S8_UINT) {
221 return VK_ERROR_FORMAT_NOT_SUPPORTED;
222 }
223
224 if (src_format == VK_FORMAT_S8_UINT)
225 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8;
226 else
227 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8;
228
229 return VK_SUCCESS;
230 }
231
232 static VkResult
pvr_pbe_src_format_ds(const struct pvr_transfer_cmd_surface * src,const enum pvr_filter filter,const VkFormat dst_format,const uint32_t flags,const bool down_scale,enum pvr_transfer_pbe_pixel_src * src_format_out)233 pvr_pbe_src_format_ds(const struct pvr_transfer_cmd_surface *src,
234 const enum pvr_filter filter,
235 const VkFormat dst_format,
236 const uint32_t flags,
237 const bool down_scale,
238 enum pvr_transfer_pbe_pixel_src *src_format_out)
239 {
240 const VkFormat src_format = src->vk_format;
241
242 const bool src_depth = vk_format_has_depth(src_format);
243 const bool dst_depth = vk_format_has_depth(dst_format);
244 const bool src_stencil = vk_format_has_stencil(src_format);
245 const bool dst_stencil = vk_format_has_stencil(dst_format);
246
247 if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
248 /* Merging, so destination should always have both. */
249 if (!dst_depth || !dst_stencil)
250 return VK_ERROR_FORMAT_NOT_SUPPORTED;
251
252 if (flags & PVR_TRANSFER_CMD_FLAGS_PICKD) {
253 return pvr_pbe_src_format_pick_depth(src_format,
254 dst_format,
255 src_format_out);
256 } else {
257 return pvr_pbe_src_format_pick_stencil(src_format,
258 dst_format,
259 src_format_out);
260 }
261 }
262
263 /* We can't invent channels out of nowhere. */
264 if ((dst_depth && !src_depth) || (dst_stencil && !src_stencil))
265 return VK_ERROR_FORMAT_NOT_SUPPORTED;
266
267 switch (dst_format) {
268 case VK_FORMAT_D16_UNORM:
269 if (src_format == VK_FORMAT_D24_UNORM_S8_UINT)
270 return VK_ERROR_FORMAT_NOT_SUPPORTED;
271
272 if (!down_scale)
273 *src_format_out = pvr_pbe_src_format_raw(dst_format);
274 else
275 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
276
277 break;
278 case VK_FORMAT_D24_UNORM_S8_UINT:
279 switch (src_format) {
280 case VK_FORMAT_D24_UNORM_S8_UINT:
281 if (filter == PVR_FILTER_LINEAR)
282 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_D24S8;
283 else
284 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
285
286 break;
287
288 /* D16_UNORM results in a 0.0->1.0 float from the TPU, the same as D32 */
289 case VK_FORMAT_D16_UNORM:
290 case VK_FORMAT_D32_SFLOAT:
291 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8;
292 break;
293
294 default:
295 if (filter == PVR_FILTER_LINEAR)
296 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_D32S8;
297 else
298 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
299 }
300
301 break;
302
303 case VK_FORMAT_D32_SFLOAT:
304 if (src_format == VK_FORMAT_D24_UNORM_S8_UINT)
305 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32;
306 else
307 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32;
308
309 break;
310
311 default:
312 if (src_format == VK_FORMAT_D24_UNORM_S8_UINT)
313 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB;
314 else
315 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
316 }
317
318 return VK_SUCCESS;
319 }
320
321 /**
322 * How the PBE expects the output buffer for an RGBA space conversion.
323 */
324 static VkResult
pvr_pbe_src_format_normal(VkFormat src_format,VkFormat dst_format,bool down_scale,bool dont_force_pbe,enum pvr_transfer_pbe_pixel_src * src_format_out)325 pvr_pbe_src_format_normal(VkFormat src_format,
326 VkFormat dst_format,
327 bool down_scale,
328 bool dont_force_pbe,
329 enum pvr_transfer_pbe_pixel_src *src_format_out)
330 {
331 bool dst_signed = vk_format_is_sint(dst_format) ||
332 vk_format_is_snorm(dst_format);
333
334 if (vk_format_is_int(dst_format)) {
335 uint32_t red_width;
336 bool src_signed;
337 uint32_t count;
338
339 if (!vk_format_is_int(src_format))
340 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
341
342 src_signed = vk_format_is_sint(src_format);
343
344 red_width = vk_format_get_component_bits(dst_format,
345 UTIL_FORMAT_COLORSPACE_RGB,
346 0);
347
348 switch (red_width) {
349 case 8:
350 if (!src_signed && !dst_signed)
351 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_UU8888;
352 else if (src_signed && !dst_signed)
353 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SU8888;
354 else if (!src_signed && dst_signed)
355 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_US8888;
356 else
357 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SS8888;
358
359 break;
360
361 case 10:
362 switch (dst_format) {
363 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
364 *src_format_out = src_signed ? PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102
365 : PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102;
366 break;
367
368 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
369 *src_format_out = src_signed
370 ? PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102
371 : PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102;
372 break;
373
374 default:
375 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
376 }
377 break;
378
379 case 16:
380 if (!src_signed && !dst_signed)
381 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16;
382 else if (src_signed && !dst_signed)
383 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16;
384 else if (!src_signed && dst_signed)
385 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_US16S16;
386 else
387 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16;
388
389 break;
390
391 case 32:
392 if (dont_force_pbe) {
393 count = vk_format_get_blocksizebits(dst_format) / 32U;
394 } else {
395 count =
396 pvr_vk_format_get_common_color_channel_count(src_format, dst_format);
397 }
398
399 if (!src_signed && !dst_signed) {
400 *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_RAW128
401 : PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
402 } else if (src_signed && !dst_signed) {
403 *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32
404 : PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32;
405 } else if (!src_signed && dst_signed) {
406 *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32
407 : PVR_TRANSFER_PBE_PIXEL_SRC_US32S32;
408 } else {
409 *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_RAW128
410 : PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
411 }
412 break;
413
414 default:
415 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
416 }
417
418 } else if (vk_format_is_float(dst_format) ||
419 pvr_vk_format_is_fully_normalized(dst_format)) {
420 bool is_float = true;
421
422 if (!vk_format_is_float(src_format) &&
423 !pvr_vk_format_is_fully_normalized(src_format) &&
424 !vk_format_is_block_compressed(src_format)) {
425 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
426 }
427
428 if (pvr_vk_format_is_fully_normalized(dst_format)) {
429 uint32_t chan_width;
430
431 is_float = false;
432
433 /* Alpha only. */
434 switch (dst_format) {
435 case VK_FORMAT_D16_UNORM:
436 chan_width = 16;
437 break;
438
439 default:
440 chan_width =
441 vk_format_get_component_bits(dst_format,
442 UTIL_FORMAT_COLORSPACE_RGB,
443 0U);
444 break;
445 }
446
447 if (src_format == dst_format) {
448 switch (chan_width) {
449 case 16U:
450 if (down_scale) {
451 *src_format_out = dst_signed
452 ? PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM
453 : PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
454 } else {
455 *src_format_out = dst_signed
456 ? PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16
457 : PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16;
458 }
459 break;
460
461 case 32U:
462 *src_format_out = pvr_pbe_src_format_raw(dst_format);
463 break;
464 default:
465 is_float = true;
466 break;
467 }
468 } else {
469 switch (chan_width) {
470 case 16U:
471 *src_format_out = dst_signed
472 ? PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM
473 : PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
474 break;
475 default:
476 is_float = true;
477 break;
478 }
479 }
480 }
481
482 if (is_float) {
483 if (pvr_vk_format_has_32bit_component(dst_format)) {
484 uint32_t count;
485
486 if (dont_force_pbe) {
487 count = vk_format_get_blocksizebits(dst_format) / 32U;
488 } else {
489 count = pvr_vk_format_get_common_color_channel_count(src_format,
490 dst_format);
491 }
492
493 switch (count) {
494 case 1U:
495 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32;
496 break;
497 case 2U:
498 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32X2;
499 break;
500 default:
501 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32X4;
502 break;
503 }
504 } else {
505 if (dst_format == VK_FORMAT_B8G8R8A8_UNORM ||
506 dst_format == VK_FORMAT_R8G8B8A8_UNORM ||
507 dst_format == VK_FORMAT_A8B8G8R8_UNORM_PACK32) {
508 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8;
509 } else {
510 *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F16F16;
511 }
512 }
513 }
514 } else {
515 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
516 }
517
518 return VK_SUCCESS;
519 }
520
521 static inline uint32_t
pvr_get_blit_flags(const struct pvr_transfer_cmd * transfer_cmd)522 pvr_get_blit_flags(const struct pvr_transfer_cmd *transfer_cmd)
523 {
524 return transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FAST2D
525 ? 0
526 : transfer_cmd->flags;
527 }
528
pvr_pbe_src_format(struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,struct pvr_tq_shader_properties * prop)529 static VkResult pvr_pbe_src_format(struct pvr_transfer_cmd *transfer_cmd,
530 struct pvr_transfer_3d_state *state,
531 struct pvr_tq_shader_properties *prop)
532 {
533 struct pvr_tq_layer_properties *layer = &prop->layer_props;
534 const enum pvr_filter filter = transfer_cmd->source_count
535 ? transfer_cmd->sources[0].filter
536 : PVR_FILTER_POINT;
537 const uint32_t flags = transfer_cmd->flags;
538 VkFormat dst_format = transfer_cmd->dst.vk_format;
539 const struct pvr_transfer_cmd_surface *src;
540 VkFormat src_format;
541 bool down_scale;
542
543 if (transfer_cmd->source_count > 0) {
544 src = &transfer_cmd->sources[0].surface;
545 down_scale = transfer_cmd->sources[0].resolve_op == PVR_RESOLVE_BLEND &&
546 transfer_cmd->sources[0].surface.sample_count > 1U &&
547 transfer_cmd->dst.sample_count <= 1U;
548 } else {
549 src = &transfer_cmd->dst;
550 down_scale = false;
551 }
552
553 src_format = src->vk_format;
554
555 /* This has to come before the rest as S8 for instance is integer and
556 * signedness check fails on D24S8.
557 */
558 if (vk_format_is_depth_or_stencil(src_format) ||
559 vk_format_is_depth_or_stencil(dst_format) ||
560 flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
561 return pvr_pbe_src_format_ds(src,
562 filter,
563 dst_format,
564 flags,
565 down_scale,
566 &layer->pbe_format);
567 }
568
569 return pvr_pbe_src_format_normal(src_format,
570 dst_format,
571 down_scale,
572 state->dont_force_pbe,
573 &layer->pbe_format);
574 }
575
pvr_setup_hwbg_object(const struct pvr_device_info * dev_info,struct pvr_transfer_3d_state * state)576 static inline void pvr_setup_hwbg_object(const struct pvr_device_info *dev_info,
577 struct pvr_transfer_3d_state *state)
578 {
579 struct pvr_winsys_transfer_regs *regs = &state->regs;
580
581 pvr_csb_pack (®s->pds_bgnd0_base, CR_PDS_BGRND0_BASE, reg) {
582 reg.shader_addr = PVR_DEV_ADDR(state->pds_shader_task_offset);
583 assert(pvr_dev_addr_is_aligned(
584 reg.shader_addr,
585 PVRX(CR_PDS_BGRND0_BASE_SHADER_ADDR_ALIGNMENT)));
586 reg.texunicode_addr = PVR_DEV_ADDR(state->uni_tex_code_offset);
587 assert(pvr_dev_addr_is_aligned(
588 reg.texunicode_addr,
589 PVRX(CR_PDS_BGRND0_BASE_TEXUNICODE_ADDR_ALIGNMENT)));
590 }
591
592 pvr_csb_pack (®s->pds_bgnd1_base, CR_PDS_BGRND1_BASE, reg) {
593 reg.texturedata_addr = PVR_DEV_ADDR(state->tex_state_data_offset);
594 assert(pvr_dev_addr_is_aligned(
595 reg.texturedata_addr,
596 PVRX(CR_PDS_BGRND1_BASE_TEXTUREDATA_ADDR_ALIGNMENT)));
597 }
598
599 /* BGRND 2 not needed, background object PDS doesn't use uniform program. */
600
601 pvr_csb_pack (®s->pds_bgnd3_sizeinfo, CR_PDS_BGRND3_SIZEINFO, reg) {
602 reg.usc_sharedsize =
603 DIV_ROUND_UP(state->common_ptr,
604 PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE));
605
606 assert(!(state->uniform_data_size &
607 (PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_UNIFORMSIZE_UNIT_SIZE) - 1)));
608 reg.pds_uniformsize =
609 state->uniform_data_size /
610 PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_UNIFORMSIZE_UNIT_SIZE);
611
612 assert(
613 !(state->tex_state_data_size &
614 (PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE) - 1)));
615 reg.pds_texturestatesize =
616 state->tex_state_data_size /
617 PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE);
618
619 reg.pds_tempsize =
620 DIV_ROUND_UP(state->pds_temps,
621 PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE));
622 }
623 }
624
625 static inline bool
pvr_is_surface_aligned(pvr_dev_addr_t dev_addr,bool is_input,uint32_t bpp)626 pvr_is_surface_aligned(pvr_dev_addr_t dev_addr, bool is_input, uint32_t bpp)
627 {
628 /* 96 bpp is 32 bit granular. */
629 if (bpp == 64U || bpp == 128U) {
630 uint64_t mask = (uint64_t)((bpp >> 3U) - 1U);
631
632 if ((dev_addr.addr & mask) != 0ULL)
633 return false;
634 }
635
636 if (is_input) {
637 if ((dev_addr.addr &
638 (PVRX(TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_ALIGNMENT) - 1U)) !=
639 0ULL) {
640 return false;
641 }
642 } else {
643 if ((dev_addr.addr &
644 (PVRX(PBESTATE_STATE_WORD0_ADDRESS_LOW_ALIGNMENT) - 1U)) != 0ULL) {
645 return false;
646 }
647 }
648
649 return true;
650 }
651
652 static inline VkResult
pvr_mem_layout_spec(const struct pvr_transfer_cmd_surface * surface,uint32_t load,bool is_input,uint32_t * width_out,uint32_t * height_out,uint32_t * stride_out,enum pvr_memlayout * mem_layout_out,pvr_dev_addr_t * dev_addr_out)653 pvr_mem_layout_spec(const struct pvr_transfer_cmd_surface *surface,
654 uint32_t load,
655 bool is_input,
656 uint32_t *width_out,
657 uint32_t *height_out,
658 uint32_t *stride_out,
659 enum pvr_memlayout *mem_layout_out,
660 pvr_dev_addr_t *dev_addr_out)
661 {
662 const uint32_t bpp = vk_format_get_blocksizebits(surface->vk_format);
663 uint32_t unsigned_stride;
664
665 *mem_layout_out = surface->mem_layout;
666 *height_out = surface->height;
667 *width_out = surface->width;
668 *stride_out = surface->stride;
669 *dev_addr_out = surface->dev_addr;
670
671 if (surface->mem_layout != PVR_MEMLAYOUT_LINEAR &&
672 !pvr_is_surface_aligned(*dev_addr_out, is_input, bpp)) {
673 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
674 }
675
676 switch (surface->mem_layout) {
677 case PVR_MEMLAYOUT_LINEAR:
678 if (surface->stride == 0U)
679 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
680
681 unsigned_stride = *stride_out;
682
683 if (!pvr_is_surface_aligned(*dev_addr_out, is_input, bpp))
684 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
685
686 if (unsigned_stride < *width_out)
687 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
688
689 if (!is_input) {
690 if (unsigned_stride == 1U) {
691 /* Change the setup to twiddling as that doesn't hit the stride
692 * limit and twiddled == strided when 1px stride.
693 */
694 *mem_layout_out = PVR_MEMLAYOUT_TWIDDLED;
695 }
696 }
697
698 *stride_out = unsigned_stride;
699 break;
700
701 case PVR_MEMLAYOUT_TWIDDLED:
702 case PVR_MEMLAYOUT_3DTWIDDLED:
703 /* Ignoring stride value for twiddled/tiled surface. */
704 *stride_out = *width_out;
705 break;
706
707 default:
708 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
709 }
710
711 return VK_SUCCESS;
712 }
713
714 static VkResult
pvr_pbe_setup_codegen_defaults(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,struct pvr_pbe_surf_params * surface_params,struct pvr_pbe_render_params * render_params)715 pvr_pbe_setup_codegen_defaults(const struct pvr_device_info *dev_info,
716 const struct pvr_transfer_cmd *transfer_cmd,
717 struct pvr_transfer_3d_state *state,
718 struct pvr_pbe_surf_params *surface_params,
719 struct pvr_pbe_render_params *render_params)
720 {
721 const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
722 const uint8_t *swizzle;
723 VkFormat format;
724 VkResult result;
725
726 switch (dst->vk_format) {
727 case VK_FORMAT_D24_UNORM_S8_UINT:
728 case VK_FORMAT_X8_D24_UNORM_PACK32:
729 format = VK_FORMAT_R32_UINT;
730 break;
731
732 default:
733 format = dst->vk_format;
734 break;
735 }
736
737 swizzle = pvr_get_format_swizzle(format);
738 memcpy(surface_params->swizzle, swizzle, sizeof(surface_params->swizzle));
739
740 pvr_pbe_get_src_format_and_gamma(format,
741 PVR_PBE_GAMMA_NONE,
742 false,
743 &surface_params->source_format,
744 &surface_params->gamma);
745
746 surface_params->is_normalized = pvr_vk_format_is_fully_normalized(format);
747 surface_params->pbe_packmode = pvr_get_pbe_packmode(format);
748 surface_params->nr_components = vk_format_get_nr_components(format);
749
750 result = pvr_mem_layout_spec(dst,
751 0U,
752 false,
753 &surface_params->width,
754 &surface_params->height,
755 &surface_params->stride,
756 &surface_params->mem_layout,
757 &surface_params->addr);
758 if (result != VK_SUCCESS)
759 return result;
760
761 surface_params->z_only_render = false;
762 surface_params->depth = dst->depth;
763 surface_params->down_scale = state->down_scale;
764
765 if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
766 render_params->slice = (uint32_t)MAX2(dst->z_position, 0.0f);
767 else
768 render_params->slice = 0U;
769
770 uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
771 uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
772
773 /* If the rectangle happens to be empty / off-screen we clip away
774 * everything.
775 */
776 if (state->empty_dst) {
777 render_params->min_x_clip = 2U * tile_size_x;
778 render_params->max_x_clip = 3U * tile_size_x;
779 render_params->min_y_clip = 2U * tile_size_y;
780 render_params->max_y_clip = 3U * tile_size_y;
781 state->origin_x_in_tiles = 0U;
782 state->origin_y_in_tiles = 0U;
783 state->height_in_tiles = 1U;
784 state->width_in_tiles = 1U;
785 } else {
786 const VkRect2D *scissor = &transfer_cmd->scissor;
787
788 /* Clamp */
789 render_params->min_x_clip =
790 MAX2(MIN2(scissor->offset.x, (int32_t)surface_params->width), 0U);
791 render_params->max_x_clip =
792 MAX2(MIN2(scissor->offset.x + scissor->extent.width,
793 (int32_t)surface_params->width),
794 0U) -
795 1U;
796
797 render_params->min_y_clip =
798 MAX2(MIN2(scissor->offset.y, surface_params->height), 0U);
799 render_params->max_y_clip =
800 MAX2(MIN2(scissor->offset.y + scissor->extent.height,
801 surface_params->height),
802 0U) -
803 1U;
804
805 if (state->custom_mapping.pass_count > 0U) {
806 struct pvr_transfer_pass *pass =
807 &state->custom_mapping.passes[state->pass_idx];
808
809 render_params->min_x_clip = (uint32_t)pass->clip_rects[0U].offset.x;
810 render_params->max_x_clip =
811 (uint32_t)(pass->clip_rects[0U].offset.x +
812 pass->clip_rects[0U].extent.width) -
813 1U;
814 render_params->min_y_clip = (uint32_t)pass->clip_rects[0U].offset.y;
815 render_params->max_y_clip =
816 (uint32_t)(pass->clip_rects[0U].offset.y +
817 pass->clip_rects[0U].extent.height) -
818 1U;
819 }
820
821 state->origin_x_in_tiles = render_params->min_x_clip / tile_size_x;
822 state->origin_y_in_tiles = render_params->min_y_clip / tile_size_y;
823 state->width_in_tiles =
824 (render_params->max_x_clip + tile_size_x) / tile_size_x;
825 state->height_in_tiles =
826 (render_params->max_y_clip + tile_size_y) / tile_size_y;
827
828 /* Be careful here as this isn't the same as ((max_x_clip -
829 * min_x_clip) + tile_size_x) >> tile_size_x.
830 */
831 state->width_in_tiles -= state->origin_x_in_tiles;
832 state->height_in_tiles -= state->origin_y_in_tiles;
833 }
834
835 render_params->source_start = PVR_PBE_STARTPOS_BIT0;
836 render_params->mrt_index = 0U;
837
838 return VK_SUCCESS;
839 }
840
841 static VkResult
pvr_pbe_setup_modify_defaults(const struct pvr_transfer_cmd_surface * dst,struct pvr_transfer_3d_state * state,uint32_t rt_idx,struct pvr_pbe_surf_params * surf_params,struct pvr_pbe_render_params * render_params)842 pvr_pbe_setup_modify_defaults(const struct pvr_transfer_cmd_surface *dst,
843 struct pvr_transfer_3d_state *state,
844 uint32_t rt_idx,
845 struct pvr_pbe_surf_params *surf_params,
846 struct pvr_pbe_render_params *render_params)
847 {
848 struct pvr_transfer_pass *pass;
849 VkRect2D *clip_rect;
850
851 render_params->mrt_index = rt_idx;
852
853 assert(rt_idx > 0 && rt_idx <= PVR_TRANSFER_MAX_RENDER_TARGETS);
854
855 if (state->custom_mapping.pass_count == 0)
856 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
857
858 pass = &state->custom_mapping.passes[state->pass_idx];
859
860 assert(rt_idx < PVR_TRANSFER_MAX_CUSTOM_RECTS);
861
862 clip_rect = &pass->clip_rects[rt_idx];
863
864 render_params->min_x_clip = (uint32_t)clip_rect->offset.x;
865 render_params->max_x_clip =
866 (uint32_t)clip_rect->offset.x + clip_rect->extent.width - 1U;
867 render_params->min_y_clip = (uint32_t)clip_rect->offset.y;
868 render_params->max_y_clip =
869 (uint32_t)clip_rect->offset.y + clip_rect->extent.height - 1U;
870
871 return VK_SUCCESS;
872 }
873
874 static uint32_t
pvr_pbe_get_pixel_size(enum pvr_transfer_pbe_pixel_src pixel_format)875 pvr_pbe_get_pixel_size(enum pvr_transfer_pbe_pixel_src pixel_format)
876 {
877 switch (pixel_format) {
878 case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32:
879 case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
880 case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8:
881 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
882 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
883 case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
884 case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
885 case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
886 case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
887 case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
888 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
889 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
890 case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
891 case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
892 case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
893 case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
894 case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
895 case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
896 case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
897 return 1U;
898
899 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
900 case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
901 case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
902 case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45:
903 case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
904 case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
905 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
906 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
907 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
908 case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
909 case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
910 case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
911 case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
912 case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
913 case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
914 case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
915 return 2U;
916
917 case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
918 case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
919 case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
920 case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
921 return 4U;
922
923 case PVR_TRANSFER_PBE_PIXEL_SRC_NUM:
924 default:
925 break;
926 }
927
928 return 0U;
929 }
930
pvr_pbe_setup_swizzle(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,struct pvr_pbe_surf_params * surf_params)931 static void pvr_pbe_setup_swizzle(const struct pvr_transfer_cmd *transfer_cmd,
932 struct pvr_transfer_3d_state *state,
933 struct pvr_pbe_surf_params *surf_params)
934 {
935 bool color_fill = !!(transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL);
936 const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
937
938 const uint32_t pixel_size =
939 pvr_pbe_get_pixel_size(state->shader_props.layer_props.pbe_format);
940
941 state->usc_pixel_width = MAX2(pixel_size, 1U);
942
943 switch (dst->vk_format) {
944 case VK_FORMAT_X8_D24_UNORM_PACK32:
945 case VK_FORMAT_D24_UNORM_S8_UINT:
946 case VK_FORMAT_S8_UINT:
947 surf_params->swizzle[0U] = PIPE_SWIZZLE_X;
948 surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
949 surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
950 surf_params->swizzle[3U] = PIPE_SWIZZLE_0;
951 break;
952
953 default: {
954 const uint32_t red_width =
955 vk_format_get_component_bits(dst->vk_format,
956 UTIL_FORMAT_COLORSPACE_RGB,
957 0U);
958
959 if (transfer_cmd->source_count > 0 &&
960 vk_format_is_alpha(dst->vk_format)) {
961 if (vk_format_has_alpha(transfer_cmd->sources[0].surface.vk_format)) {
962 /* Modify the destination format swizzle to always source from
963 * src0.
964 */
965 surf_params->swizzle[0U] = PIPE_SWIZZLE_X;
966 surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
967 surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
968 surf_params->swizzle[3U] = PIPE_SWIZZLE_1;
969 break;
970 }
971
972 /* Source format having no alpha channel still allocates 4 output
973 * buffer registers.
974 */
975 }
976
977 if (pvr_vk_format_is_fully_normalized(dst->vk_format)) {
978 if (color_fill &&
979 (dst->vk_format == VK_FORMAT_B8G8R8A8_UNORM ||
980 dst->vk_format == VK_FORMAT_R8G8B8A8_UNORM ||
981 dst->vk_format == VK_FORMAT_A8B8G8R8_UNORM_PACK32)) {
982 surf_params->source_format =
983 PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
984 } else if (state->shader_props.layer_props.pbe_format ==
985 PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8) {
986 surf_params->source_format =
987 PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
988 } else if (red_width <= 8U) {
989 surf_params->source_format =
990 PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
991 }
992 } else if (red_width == 32U && !state->dont_force_pbe) {
993 uint32_t count = 0U;
994
995 for (uint32_t i = 0; i < transfer_cmd->source_count; i++) {
996 VkFormat src_format = transfer_cmd->sources[i].surface.vk_format;
997 uint32_t tmp;
998
999 tmp = pvr_vk_format_get_common_color_channel_count(src_format,
1000 dst->vk_format);
1001
1002 count = MAX2(count, tmp);
1003 }
1004
1005 switch (count) {
1006 case 1U:
1007 surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
1008 FALLTHROUGH;
1009 case 2U:
1010 surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
1011 FALLTHROUGH;
1012 case 3U:
1013 surf_params->swizzle[3U] = PIPE_SWIZZLE_1;
1014 break;
1015
1016 case 4U:
1017 default:
1018 break;
1019 }
1020 }
1021 break;
1022 }
1023 }
1024 }
1025
1026 /**
1027 * Calculates the required PBE byte mask based on the incoming transfer command.
1028 *
1029 * @param transfer_cmd the transfer command
1030 * @return the bytemask (active high disable mask)
1031 */
1032
pvr_pbe_byte_mask(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd)1033 static uint64_t pvr_pbe_byte_mask(const struct pvr_device_info *dev_info,
1034 const struct pvr_transfer_cmd *transfer_cmd)
1035 {
1036 uint32_t flags = pvr_get_blit_flags(transfer_cmd);
1037
1038 assert(PVR_HAS_ERN(dev_info, 42064));
1039
1040 if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
1041 uint32_t mask = 0U;
1042
1043 switch (transfer_cmd->dst.vk_format) {
1044 case VK_FORMAT_D32_SFLOAT_S8_UINT:
1045 mask = 0xF0F0F0F0U;
1046 break;
1047 case VK_FORMAT_D24_UNORM_S8_UINT:
1048 mask = 0x88888888U;
1049 break;
1050 default:
1051 break;
1052 }
1053
1054 if ((flags & PVR_TRANSFER_CMD_FLAGS_PICKD) == 0U)
1055 mask = ~mask;
1056
1057 return mask;
1058 }
1059
1060 /* The mask is as it was inactive on cores without the ERN. This keeps the
1061 * firmware agnostic to the feature.
1062 */
1063 return 0U;
1064 }
1065
pvr_pbe_setup_emit(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_ctx * ctx,struct pvr_transfer_3d_state * state,uint32_t rt_count,uint32_t * pbe_setup_words)1066 static VkResult pvr_pbe_setup_emit(const struct pvr_transfer_cmd *transfer_cmd,
1067 struct pvr_transfer_ctx *ctx,
1068 struct pvr_transfer_3d_state *state,
1069 uint32_t rt_count,
1070 uint32_t *pbe_setup_words)
1071 {
1072 struct pvr_device *const device = ctx->device;
1073 const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
1074
1075 struct pvr_winsys_transfer_regs *regs = &state->regs;
1076 struct pvr_pds_event_program program = {
1077 .emit_words = pbe_setup_words,
1078 .num_emit_word_pairs = rt_count,
1079 };
1080 struct pvr_pds_upload pds_upload;
1081 uint32_t staging_buffer_size;
1082 uint32_t *staging_buffer;
1083 pvr_dev_addr_t addr;
1084 VkResult result;
1085
1086 /* Precondition, make sure to use a valid index for ctx->usc_eot_bos. */
1087 assert(rt_count <= ARRAY_SIZE(ctx->usc_eot_bos));
1088 assert(rt_count > 0U);
1089
1090 addr.addr = ctx->usc_eot_bos[rt_count - 1U]->dev_addr.addr -
1091 device->heaps.usc_heap->base_addr.addr;
1092
1093 pvr_pds_setup_doutu(&program.task_control,
1094 addr.addr,
1095 0U,
1096 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1097 false);
1098
1099 pvr_pds_set_sizes_pixel_event(&program, dev_info);
1100
1101 staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1102
1103 staging_buffer = vk_alloc(&device->vk.alloc,
1104 staging_buffer_size,
1105 8U,
1106 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1107 if (!staging_buffer)
1108 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1109
1110 pvr_pds_generate_pixel_event_data_segment(&program,
1111 staging_buffer,
1112 dev_info);
1113
1114 /* TODO: We can save some memory by generating a code segment for each
1115 * rt_count, which at the time of writing is a maximum of 3, in
1116 * pvr_setup_transfer_eot_shaders() when we setup the corresponding EOT
1117 * USC programs.
1118 */
1119 pvr_pds_generate_pixel_event_code_segment(&program,
1120 staging_buffer + program.data_size,
1121 dev_info);
1122
1123 result =
1124 pvr_cmd_buffer_upload_pds(transfer_cmd->cmd_buffer,
1125 staging_buffer,
1126 program.data_size,
1127 PVRX(CR_EVENT_PIXEL_PDS_DATA_ADDR_ALIGNMENT),
1128 staging_buffer + program.data_size,
1129 program.code_size,
1130 PVRX(CR_EVENT_PIXEL_PDS_CODE_ADDR_ALIGNMENT),
1131 PVRX(CR_EVENT_PIXEL_PDS_DATA_ADDR_ALIGNMENT),
1132 &pds_upload);
1133 vk_free(&device->vk.alloc, staging_buffer);
1134 if (result != VK_SUCCESS)
1135 return result;
1136
1137 pvr_csb_pack (®s->event_pixel_pds_info, CR_EVENT_PIXEL_PDS_INFO, reg) {
1138 reg.temp_stride = 0U;
1139 reg.const_size =
1140 DIV_ROUND_UP(program.data_size,
1141 PVRX(CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE));
1142 reg.usc_sr_size =
1143 DIV_ROUND_UP(rt_count * PVR_STATE_PBE_DWORDS,
1144 PVRX(CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE));
1145 }
1146
1147 pvr_csb_pack (®s->event_pixel_pds_data, CR_EVENT_PIXEL_PDS_DATA, reg) {
1148 reg.addr = PVR_DEV_ADDR(pds_upload.data_offset);
1149 }
1150
1151 pvr_csb_pack (®s->event_pixel_pds_code, CR_EVENT_PIXEL_PDS_CODE, reg) {
1152 reg.addr = PVR_DEV_ADDR(pds_upload.code_offset);
1153 }
1154
1155 return VK_SUCCESS;
1156 }
1157
pvr_pbe_setup(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_ctx * ctx,struct pvr_transfer_3d_state * state)1158 static VkResult pvr_pbe_setup(const struct pvr_transfer_cmd *transfer_cmd,
1159 struct pvr_transfer_ctx *ctx,
1160 struct pvr_transfer_3d_state *state)
1161 {
1162 struct pvr_device *const device = ctx->device;
1163 const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
1164
1165 const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
1166 uint32_t num_rts = vk_format_get_plane_count(dst->vk_format);
1167 uint32_t pbe_setup_words[PVR_TRANSFER_MAX_RENDER_TARGETS *
1168 ROGUE_NUM_PBESTATE_STATE_WORDS];
1169 struct pvr_pbe_render_params render_params;
1170 struct pvr_pbe_surf_params surf_params;
1171 VkResult result;
1172
1173 if (state->custom_mapping.pass_count > 0U)
1174 num_rts = state->custom_mapping.passes[state->pass_idx].clip_rects_count;
1175
1176 if (PVR_HAS_FEATURE(dev_info, paired_tiles))
1177 state->pair_tiles = PVR_PAIRED_TILES_NONE;
1178
1179 for (uint32_t i = 0U; i < num_rts; i++) {
1180 uint64_t *pbe_regs;
1181 uint32_t *pbe_words;
1182
1183 /* Ensure the access into the pbe_wordx_mrty is made within its bounds. */
1184 assert(i * ROGUE_NUM_PBESTATE_REG_WORDS_FOR_TRANSFER <
1185 ARRAY_SIZE(state->regs.pbe_wordx_mrty));
1186 /* Ensure the access into pbe_setup_words is made within its bounds. */
1187 assert(i * ROGUE_NUM_PBESTATE_STATE_WORDS < ARRAY_SIZE(pbe_setup_words));
1188
1189 pbe_regs =
1190 &state->regs
1191 .pbe_wordx_mrty[i * ROGUE_NUM_PBESTATE_REG_WORDS_FOR_TRANSFER];
1192 pbe_words = &pbe_setup_words[i * ROGUE_NUM_PBESTATE_STATE_WORDS];
1193
1194 if (PVR_HAS_ERN(dev_info, 42064))
1195 pbe_regs[2U] = 0UL;
1196
1197 if (i == 0U) {
1198 result = pvr_pbe_setup_codegen_defaults(dev_info,
1199 transfer_cmd,
1200 state,
1201 &surf_params,
1202 &render_params);
1203 if (result != VK_SUCCESS)
1204 return result;
1205 } else {
1206 result = pvr_pbe_setup_modify_defaults(dst,
1207 state,
1208 i,
1209 &surf_params,
1210 &render_params);
1211 if (result != VK_SUCCESS)
1212 return result;
1213 }
1214
1215 pvr_pbe_setup_swizzle(transfer_cmd, state, &surf_params);
1216
1217 pvr_pbe_pack_state(dev_info,
1218 &surf_params,
1219 &render_params,
1220 pbe_words,
1221 pbe_regs);
1222
1223 if (PVR_HAS_ERN(dev_info, 42064)) {
1224 uint64_t temp_reg;
1225
1226 pvr_csb_pack (&temp_reg, PBESTATE_REG_WORD2, reg) {
1227 reg.sw_bytemask = pvr_pbe_byte_mask(dev_info, transfer_cmd);
1228 }
1229
1230 pbe_regs[2U] |= temp_reg;
1231 }
1232
1233 if (PVR_HAS_FEATURE(dev_info, paired_tiles)) {
1234 if (pbe_regs[2U] &
1235 (1ULL << PVRX(PBESTATE_REG_WORD2_PAIR_TILES_SHIFT))) {
1236 if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_TWIDDLED)
1237 state->pair_tiles = PVR_PAIRED_TILES_Y;
1238 else
1239 state->pair_tiles = PVR_PAIRED_TILES_X;
1240 }
1241 }
1242 }
1243
1244 result =
1245 pvr_pbe_setup_emit(transfer_cmd, ctx, state, num_rts, pbe_setup_words);
1246 if (result != VK_SUCCESS)
1247 return result;
1248
1249 /* Adjust tile origin and width to include all emits. */
1250 if (state->custom_mapping.pass_count > 0U) {
1251 const uint32_t tile_size_x =
1252 PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
1253 const uint32_t tile_size_y =
1254 PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
1255 struct pvr_transfer_pass *pass =
1256 &state->custom_mapping.passes[state->pass_idx];
1257 VkOffset2D offset = { 0U, 0U };
1258 VkOffset2D end = { 0U, 0U };
1259
1260 for (uint32_t i = 0U; i < pass->clip_rects_count; i++) {
1261 VkRect2D *rect = &pass->clip_rects[i];
1262
1263 offset.x = MIN2(offset.x, rect->offset.x);
1264 offset.y = MIN2(offset.y, rect->offset.y);
1265 end.x = MAX2(end.x, rect->offset.x + rect->extent.width);
1266 end.y = MAX2(end.y, rect->offset.y + rect->extent.height);
1267 }
1268
1269 state->origin_x_in_tiles = (uint32_t)offset.x / tile_size_x;
1270 state->origin_y_in_tiles = (uint32_t)offset.y / tile_size_y;
1271 state->width_in_tiles =
1272 DIV_ROUND_UP((uint32_t)end.x, tile_size_x) - state->origin_x_in_tiles;
1273 state->height_in_tiles =
1274 DIV_ROUND_UP((uint32_t)end.y, tile_size_y) - state->origin_y_in_tiles;
1275 }
1276
1277 return VK_SUCCESS;
1278 }
1279
1280 /**
1281 * Writes the ISP tile registers according to the MSAA state. Sets up the USC
1282 * pixel partition allocations and the number of tiles in flight.
1283 */
pvr_isp_tiles(const struct pvr_device * device,struct pvr_transfer_3d_state * state)1284 static VkResult pvr_isp_tiles(const struct pvr_device *device,
1285 struct pvr_transfer_3d_state *state)
1286 {
1287 const struct pvr_device_runtime_info *dev_runtime_info =
1288 &device->pdevice->dev_runtime_info;
1289 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1290 const uint32_t isp_samples =
1291 PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1U);
1292 uint32_t origin_x = state->origin_x_in_tiles;
1293 uint32_t origin_y = state->origin_y_in_tiles;
1294 uint32_t width = state->width_in_tiles;
1295 uint32_t height = state->height_in_tiles;
1296 uint32_t isp_tiles_in_flight;
1297
1298 /* msaa_multiplier is calculated by sample_count & ~1U. Given sample
1299 * count is always in powers of two, we can get the sample count from
1300 * msaa_multiplier using the following logic.
1301 */
1302 const uint32_t samples = MAX2(state->msaa_multiplier, 1U);
1303
1304 /* isp_samples_per_pixel feature is also know as "2x/4x for free", when
1305 * this is present SAMPLES_PER_PIXEL is 2/4, otherwise 1. The following
1306 * logic should end up with these numbers:
1307 *
1308 * |---------------------------------|
1309 * | 4 SAMPLES / ISP PIXEL |
1310 * |-----------------------+----+----|
1311 * | MSAA | X* | Y* |
1312 * | 2X | 1 | 1 |
1313 * | 4X | 1 | 1 |
1314 * |---------------------------------|
1315 * | 2 SAMPLES / ISP PIXEL |
1316 * |-----------------------+----+----|
1317 * | MSAA | X* | Y* |
1318 * | 2X | 1 | 1 |
1319 * | 4X | 1 | 2 |
1320 * | 8X | 2 | 2 |
1321 * |-----------------------+----+----|
1322 * | 1 SAMPLE / ISP PIXEL |
1323 * |-----------------------+----+----|
1324 * | MSAA | X* | Y* |
1325 * | 2X | 1 | 2 |
1326 * | 4X | 2 | 2 |
1327 * |-----------------------+----+----|
1328 */
1329
1330 origin_x <<= (state->msaa_multiplier >> (isp_samples + 1U)) & 1U;
1331 origin_y <<= ((state->msaa_multiplier >> (isp_samples + 1U)) |
1332 (state->msaa_multiplier >> isp_samples)) &
1333 1U;
1334 width <<= (state->msaa_multiplier >> (isp_samples + 1U)) & 1U;
1335 height <<= ((state->msaa_multiplier >> (isp_samples + 1U)) |
1336 (state->msaa_multiplier >> isp_samples)) &
1337 1U;
1338
1339 if (PVR_HAS_FEATURE(dev_info, paired_tiles) &&
1340 state->pair_tiles != PVR_PAIRED_TILES_NONE) {
1341 width = ALIGN_POT(width, 2U);
1342 height = ALIGN_POT(height, 2U);
1343 }
1344
1345 pvr_csb_pack (&state->regs.isp_mtile_size, CR_ISP_MTILE_SIZE, reg) {
1346 reg.x = width;
1347 reg.y = height;
1348 }
1349
1350 pvr_csb_pack (&state->regs.isp_render_origin, CR_ISP_RENDER_ORIGIN, reg) {
1351 reg.x = origin_x;
1352 reg.y = origin_y;
1353 }
1354
1355 pvr_setup_tiles_in_flight(dev_info,
1356 dev_runtime_info,
1357 pvr_cr_isp_aa_mode_type(samples),
1358 state->usc_pixel_width,
1359 state->pair_tiles != PVR_PAIRED_TILES_NONE,
1360 0,
1361 &isp_tiles_in_flight,
1362 &state->regs.usc_pixel_output_ctrl);
1363
1364 pvr_csb_pack (&state->regs.isp_ctl, CR_ISP_CTL, reg) {
1365 reg.process_empty_tiles = true;
1366
1367 if (PVR_HAS_FEATURE(dev_info, paired_tiles)) {
1368 if (state->pair_tiles == PVR_PAIRED_TILES_X) {
1369 reg.pair_tiles = true;
1370 } else if (state->pair_tiles == PVR_PAIRED_TILES_Y) {
1371 reg.pair_tiles = true;
1372 reg.pair_tiles_vert = true;
1373 }
1374 }
1375 }
1376
1377 state->regs.isp_ctl |= isp_tiles_in_flight;
1378
1379 return VK_SUCCESS;
1380 }
1381
1382 static bool
pvr_int_pbe_pixel_changes_dst_rate(const struct pvr_device_info * dev_info,enum pvr_transfer_pbe_pixel_src pbe_format)1383 pvr_int_pbe_pixel_changes_dst_rate(const struct pvr_device_info *dev_info,
1384 enum pvr_transfer_pbe_pixel_src pbe_format)
1385 {
1386 /* We don't emulate rate change from the USC with the pbe_yuv feature. */
1387 if (!PVR_HAS_FEATURE(dev_info, pbe_yuv) &&
1388 (pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED ||
1389 pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V)) {
1390 return true;
1391 }
1392
1393 return false;
1394 }
1395
1396 /**
1397 * Number of DWORDs from the unified store that floating texture coefficients
1398 * take up.
1399 */
pvr_uv_space(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state)1400 static void pvr_uv_space(const struct pvr_device_info *dev_info,
1401 const struct pvr_transfer_cmd *transfer_cmd,
1402 struct pvr_transfer_3d_state *state)
1403 {
1404 const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
1405 const VkRect2D *dst_rect = &transfer_cmd->scissor;
1406
1407 /* This also avoids division by 0 in pvr_dma_texture_floats(). */
1408 if (state->custom_mapping.pass_count == 0U &&
1409 (dst_rect->extent.width == 0U || dst_rect->extent.height == 0U ||
1410 MAX2(dst_rect->offset.x, dst_rect->offset.x + dst_rect->extent.width) <
1411 0U ||
1412 MIN2(dst_rect->offset.x, dst_rect->offset.x + dst_rect->extent.width) >
1413 (int32_t)dst->width ||
1414 MAX2(dst_rect->offset.y, dst_rect->offset.y + dst_rect->extent.height) <
1415 0U ||
1416 MIN2(dst_rect->offset.y, dst_rect->offset.y + dst_rect->extent.height) >
1417 (int32_t)dst->height)) {
1418 state->empty_dst = true;
1419 } else {
1420 state->empty_dst = false;
1421
1422 if (transfer_cmd->source_count > 0) {
1423 struct pvr_tq_layer_properties *layer =
1424 &state->shader_props.layer_props;
1425
1426 const VkRect2D *src_rect =
1427 &transfer_cmd->sources[0U].mappings[0U].src_rect;
1428 const VkRect2D *dst_rect =
1429 &transfer_cmd->sources[0U].mappings[0U].dst_rect;
1430 int32_t dst_x1 = dst_rect->offset.x + dst_rect->extent.width;
1431 int32_t dst_y1 = dst_rect->offset.y + dst_rect->extent.height;
1432 int32_t src_x1 = src_rect->offset.x + src_rect->extent.width;
1433 int32_t src_y1 = src_rect->offset.y + src_rect->extent.height;
1434
1435 assert(transfer_cmd->source_count == 1);
1436
1437 if (state->filter[0U] > PVR_FILTER_POINT) {
1438 layer->layer_floats = PVR_INT_COORD_SET_FLOATS_4;
1439 } else if (src_rect->extent.width == 0U ||
1440 src_rect->extent.height == 0U) {
1441 layer->layer_floats = PVR_INT_COORD_SET_FLOATS_0;
1442 } else if ((src_rect->offset.x * dst_x1 !=
1443 src_x1 * dst_rect->offset.x) ||
1444 (src_rect->offset.y * dst_y1 !=
1445 src_y1 * dst_rect->offset.y) ||
1446 (src_rect->extent.width != dst_rect->extent.width) ||
1447 (src_rect->extent.height != dst_rect->extent.height) ||
1448 transfer_cmd->sources[0U].mappings[0U].flip_x ||
1449 transfer_cmd->sources[0U].mappings[0U].flip_y) {
1450 layer->layer_floats = PVR_INT_COORD_SET_FLOATS_4;
1451 } else {
1452 layer->layer_floats = PVR_INT_COORD_SET_FLOATS_0;
1453 }
1454
1455 /* We have to adjust the rate. */
1456 if (layer->layer_floats != PVR_INT_COORD_SET_FLOATS_0 &&
1457 pvr_int_pbe_pixel_changes_dst_rate(dev_info, layer->pbe_format)) {
1458 layer->layer_floats = PVR_INT_COORD_SET_FLOATS_6;
1459 }
1460 }
1461 }
1462 }
1463
pvr_int_pbe_pixel_num_sampler_and_image_states(enum pvr_transfer_pbe_pixel_src pbe_format)1464 static uint32_t pvr_int_pbe_pixel_num_sampler_and_image_states(
1465 enum pvr_transfer_pbe_pixel_src pbe_format)
1466 {
1467 switch (pbe_format) {
1468 case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
1469 case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
1470 return 1U;
1471 default:
1472 return pvr_pbe_pixel_num_loads(pbe_format);
1473 }
1474 }
1475
pvr_sampler_state_for_surface(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd_surface * surface,enum pvr_filter filter,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t sampler,uint32_t * mem_ptr)1476 static VkResult pvr_sampler_state_for_surface(
1477 const struct pvr_device_info *dev_info,
1478 const struct pvr_transfer_cmd_surface *surface,
1479 enum pvr_filter filter,
1480 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1481 uint32_t sampler,
1482 uint32_t *mem_ptr)
1483 {
1484 uint64_t sampler_state[2U] = { 0UL, 0UL };
1485
1486 pvr_csb_pack (&sampler_state[0U], TEXSTATE_SAMPLER, reg) {
1487 reg.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
1488 reg.minlod = PVRX(TEXSTATE_CLAMP_MIN);
1489 reg.maxlod = PVRX(TEXSTATE_CLAMP_MIN);
1490 reg.dadjust = PVRX(TEXSTATE_DADJUST_MIN_UINT);
1491
1492 if (filter == PVR_FILTER_DONTCARE || filter == PVR_FILTER_POINT) {
1493 reg.minfilter = PVRX(TEXSTATE_FILTER_POINT);
1494 reg.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1495 } else if (filter == PVR_FILTER_LINEAR) {
1496 reg.minfilter = PVRX(TEXSTATE_FILTER_LINEAR);
1497 reg.magfilter = PVRX(TEXSTATE_FILTER_LINEAR);
1498 } else {
1499 assert(PVR_HAS_FEATURE(dev_info, tf_bicubic_filter));
1500 reg.minfilter = PVRX(TEXSTATE_FILTER_BICUBIC);
1501 reg.magfilter = PVRX(TEXSTATE_FILTER_BICUBIC);
1502 }
1503
1504 reg.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1505 reg.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1506
1507 if (surface->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
1508 reg.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1509 }
1510
1511 assert(sampler < PVR_TRANSFER_MAX_IMAGES);
1512
1513 assert(sampler <= sh_reg_layout->combined_image_samplers.count);
1514 mem_ptr += sh_reg_layout->combined_image_samplers.offsets[sampler].sampler;
1515
1516 memcpy(mem_ptr, sampler_state, sizeof(sampler_state));
1517
1518 return VK_SUCCESS;
1519 }
1520
pvr_image_state_set_codegen_defaults(struct pvr_device * device,struct pvr_transfer_3d_state * state,const struct pvr_transfer_cmd_surface * surface,uint32_t load,uint64_t * mem_ptr)1521 static inline VkResult pvr_image_state_set_codegen_defaults(
1522 struct pvr_device *device,
1523 struct pvr_transfer_3d_state *state,
1524 const struct pvr_transfer_cmd_surface *surface,
1525 uint32_t load,
1526 uint64_t *mem_ptr)
1527 {
1528 struct pvr_tq_layer_properties *layer = &state->shader_props.layer_props;
1529 struct pvr_texture_state_info info = { 0U };
1530 VkResult result;
1531
1532 switch (surface->vk_format) {
1533 /* ERN 46863 */
1534 case VK_FORMAT_D32_SFLOAT_S8_UINT:
1535 switch (layer->pbe_format) {
1536 case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
1537 case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
1538 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
1539 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
1540 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
1541 case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
1542 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
1543 info.format = VK_FORMAT_R32G32_UINT;
1544 break;
1545 default:
1546 break;
1547 }
1548 break;
1549
1550 case VK_FORMAT_D24_UNORM_S8_UINT:
1551 case VK_FORMAT_X8_D24_UNORM_PACK32:
1552 info.format = VK_FORMAT_R32_UINT;
1553 break;
1554
1555 default:
1556 info.format = surface->vk_format;
1557 break;
1558 }
1559
1560 info.flags = 0U;
1561 info.base_level = 0U;
1562 info.mip_levels = 1U;
1563 info.mipmaps_present = false;
1564 info.sample_count = MAX2(surface->sample_count, 1U);
1565
1566 if (surface->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
1567 info.extent.depth = surface->depth;
1568 else
1569 info.extent.depth = 0U;
1570
1571 if (PVR_HAS_FEATURE(&device->pdevice->dev_info, tpu_array_textures))
1572 info.array_size = 0U;
1573
1574 result = pvr_mem_layout_spec(surface,
1575 load,
1576 true,
1577 &info.extent.width,
1578 &info.extent.height,
1579 &info.stride,
1580 &info.mem_layout,
1581 &info.addr);
1582 if (result != VK_SUCCESS)
1583 return result;
1584
1585 if (state->custom_mapping.texel_extend_dst > 1U) {
1586 info.extent.width /= state->custom_mapping.texel_extend_dst;
1587 info.stride /= state->custom_mapping.texel_extend_dst;
1588 }
1589
1590 info.tex_state_type = PVR_TEXTURE_STATE_SAMPLE;
1591 memcpy(info.swizzle,
1592 pvr_get_format_swizzle(info.format),
1593 sizeof(info.swizzle));
1594
1595 if (surface->vk_format == VK_FORMAT_S8_UINT) {
1596 info.swizzle[0U] = PIPE_SWIZZLE_X;
1597 info.swizzle[1U] = PIPE_SWIZZLE_0;
1598 info.swizzle[2U] = PIPE_SWIZZLE_0;
1599 info.swizzle[3U] = PIPE_SWIZZLE_0;
1600 }
1601
1602 if (info.extent.depth > 0U)
1603 info.type = VK_IMAGE_VIEW_TYPE_3D;
1604 else if (info.extent.height > 1U)
1605 info.type = VK_IMAGE_VIEW_TYPE_2D;
1606 else
1607 info.type = VK_IMAGE_VIEW_TYPE_1D;
1608
1609 result = pvr_pack_tex_state(device, &info, mem_ptr);
1610 if (result != VK_SUCCESS)
1611 return result;
1612
1613 return VK_SUCCESS;
1614 }
1615
pvr_image_state_for_surface(const struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const struct pvr_transfer_cmd_surface * surface,uint32_t load,uint32_t source,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,struct pvr_transfer_3d_state * state,uint32_t uf_image,uint32_t * mem_ptr)1616 static VkResult pvr_image_state_for_surface(
1617 const struct pvr_transfer_ctx *ctx,
1618 const struct pvr_transfer_cmd *transfer_cmd,
1619 const struct pvr_transfer_cmd_surface *surface,
1620 uint32_t load,
1621 uint32_t source,
1622 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1623 struct pvr_transfer_3d_state *state,
1624 uint32_t uf_image,
1625 uint32_t *mem_ptr)
1626 {
1627 uint32_t tex_state[ROGUE_MAXIMUM_IMAGE_STATE_SIZE] = { 0U };
1628 VkResult result;
1629 uint8_t offset;
1630
1631 result = pvr_image_state_set_codegen_defaults(ctx->device,
1632 state,
1633 surface,
1634 load,
1635 (uint64_t *)tex_state);
1636 if (result != VK_SUCCESS)
1637 return result;
1638
1639 assert(uf_image < PVR_TRANSFER_MAX_IMAGES);
1640
1641 /* Offset of the shared registers containing the hardware image state. */
1642 assert(uf_image < sh_reg_layout->combined_image_samplers.count);
1643 offset = sh_reg_layout->combined_image_samplers.offsets[uf_image].image;
1644
1645 /* Copy the image state to the buffer which is loaded into the shared
1646 * registers.
1647 */
1648 memcpy(mem_ptr + offset, tex_state, sizeof(tex_state));
1649
1650 return VK_SUCCESS;
1651 }
1652
1653 /* Writes the texture state/sampler state into DMAed memory. */
1654 static VkResult
pvr_sampler_image_state(struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,struct pvr_transfer_3d_state * state,uint32_t * mem_ptr)1655 pvr_sampler_image_state(struct pvr_transfer_ctx *ctx,
1656 const struct pvr_transfer_cmd *transfer_cmd,
1657 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1658 struct pvr_transfer_3d_state *state,
1659 uint32_t *mem_ptr)
1660 {
1661 if (!state->empty_dst) {
1662 uint32_t uf_sampler = 0U;
1663 uint32_t uf_image = 0U;
1664
1665 for (uint32_t source = 0; source < transfer_cmd->source_count; source++) {
1666 struct pvr_tq_layer_properties *layer =
1667 &state->shader_props.layer_props;
1668 uint32_t max_load = pvr_pbe_pixel_num_loads(layer->pbe_format);
1669
1670 for (uint32_t load = 0U; load < max_load; load++) {
1671 const struct pvr_transfer_cmd_surface *surface;
1672 enum pvr_filter filter;
1673 VkResult result;
1674
1675 switch (layer->pbe_format) {
1676 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
1677 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
1678 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
1679 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
1680 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
1681 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
1682 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
1683 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
1684 case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
1685 case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
1686 if (load > 0U) {
1687 surface = &transfer_cmd->dst;
1688 filter = transfer_cmd->sources[source].filter;
1689 } else {
1690 surface = &transfer_cmd->sources[source].surface;
1691 filter = state->filter[source];
1692 }
1693 break;
1694
1695 case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
1696 case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
1697 surface = &transfer_cmd->sources[source].surface;
1698 filter = state->filter[source];
1699 break;
1700
1701 default:
1702 surface = &transfer_cmd->sources[source + load].surface;
1703 filter = state->filter[source + load];
1704 break;
1705 }
1706
1707 if (load < pvr_int_pbe_pixel_num_sampler_and_image_states(
1708 layer->pbe_format)) {
1709 const struct pvr_device_info *dev_info =
1710 &transfer_cmd->cmd_buffer->device->pdevice->dev_info;
1711
1712 result = pvr_sampler_state_for_surface(dev_info,
1713 surface,
1714 filter,
1715 sh_reg_layout,
1716 uf_sampler,
1717 mem_ptr);
1718 if (result != VK_SUCCESS)
1719 return result;
1720
1721 uf_sampler++;
1722
1723 result = pvr_image_state_for_surface(ctx,
1724 transfer_cmd,
1725 surface,
1726 load,
1727 source,
1728 sh_reg_layout,
1729 state,
1730 uf_image,
1731 mem_ptr);
1732 if (result != VK_SUCCESS)
1733 return result;
1734
1735 uf_image++;
1736 }
1737 }
1738 }
1739 }
1740
1741 return VK_SUCCESS;
1742 }
1743
1744 /* The returned offset is in dwords. */
pvr_dynamic_const_reg_advance(const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,struct pvr_transfer_3d_state * state)1745 static inline uint32_t pvr_dynamic_const_reg_advance(
1746 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1747 struct pvr_transfer_3d_state *state)
1748 {
1749 const uint32_t offset = sh_reg_layout->dynamic_consts.offset;
1750
1751 assert(state->dynamic_const_reg_ptr < sh_reg_layout->dynamic_consts.count);
1752
1753 return offset + state->dynamic_const_reg_ptr++;
1754 }
1755
1756 /** Scales coefficients for sampling. (non normalized). */
1757 static inline void
pvr_dma_texture_floats(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t * mem_ptr)1758 pvr_dma_texture_floats(const struct pvr_transfer_cmd *transfer_cmd,
1759 struct pvr_transfer_3d_state *state,
1760 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1761 uint32_t *mem_ptr)
1762
1763 {
1764 if (transfer_cmd->source_count > 0) {
1765 struct pvr_tq_layer_properties *layer = &state->shader_props.layer_props;
1766 const struct pvr_rect_mapping *mapping =
1767 &transfer_cmd->sources[0].mappings[0U];
1768 VkRect2D src_rect = mapping->src_rect;
1769 VkRect2D dst_rect = mapping->dst_rect;
1770
1771 switch (layer->layer_floats) {
1772 case PVR_INT_COORD_SET_FLOATS_0:
1773 break;
1774
1775 case PVR_INT_COORD_SET_FLOATS_6:
1776 case PVR_INT_COORD_SET_FLOATS_4: {
1777 int32_t consts[2U] = { 0U, 0U };
1778 int32_t denom[2U] = { 0U, 0U };
1779 int32_t nums[2U] = { 0U, 0U };
1780 int32_t src_x, dst_x;
1781 int32_t src_y, dst_y;
1782 float offset = 0.0f;
1783 float tmp;
1784
1785 dst_x = mapping->flip_x ? -(int32_t)dst_rect.extent.width
1786 : dst_rect.extent.width;
1787 dst_y = mapping->flip_y ? -(int32_t)dst_rect.extent.height
1788 : dst_rect.extent.height;
1789 src_x = src_rect.extent.width;
1790 src_y = src_rect.extent.height;
1791
1792 nums[0U] = src_x;
1793 denom[0U] = dst_x;
1794 consts[0U] =
1795 mapping->flip_x
1796 ? src_rect.offset.x * dst_x -
1797 src_x * (dst_rect.offset.x + dst_rect.extent.width)
1798 : src_rect.offset.x * dst_x - src_x * dst_rect.offset.x;
1799 nums[1U] = src_y;
1800 denom[1U] = dst_y;
1801 consts[1U] =
1802 mapping->flip_y
1803 ? src_rect.offset.y * dst_y -
1804 src_y * (dst_rect.offset.y + dst_rect.extent.height)
1805 : src_rect.offset.y * dst_y - src_y * dst_rect.offset.y;
1806
1807 for (uint32_t i = 0U; i < 2U; i++) {
1808 tmp = (float)(nums[i]) / (float)(denom[i]);
1809 mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1810 fui(tmp);
1811
1812 tmp = ((float)(consts[i]) + (i == 1U ? offset : 0.0f)) /
1813 (float)(denom[i]);
1814 mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1815 fui(tmp);
1816 }
1817
1818 if (layer->layer_floats == PVR_INT_COORD_SET_FLOATS_6) {
1819 tmp = (float)MIN2(dst_rect.offset.x, dst_rect.offset.x + dst_x);
1820 mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1821 fui(tmp);
1822
1823 tmp = (float)MIN2(dst_rect.offset.y, dst_rect.offset.y + dst_y);
1824 mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1825 fui(tmp);
1826 }
1827 break;
1828 }
1829
1830 default:
1831 unreachable("Unknown COORD_SET_FLOATS.");
1832 break;
1833 }
1834 }
1835 }
1836
pvr_int_pbe_pixel_requires_usc_filter(const struct pvr_device_info * dev_info,enum pvr_transfer_pbe_pixel_src pixel_format)1837 static bool pvr_int_pbe_pixel_requires_usc_filter(
1838 const struct pvr_device_info *dev_info,
1839 enum pvr_transfer_pbe_pixel_src pixel_format)
1840 {
1841 switch (pixel_format) {
1842 case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
1843 case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
1844 case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
1845 case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
1846 case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
1847 case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
1848 case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
1849 return true;
1850 case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
1851 return !PVR_HAS_FEATURE(dev_info, pbe_filterable_f16);
1852 default:
1853 return false;
1854 }
1855 }
1856
1857 /**
1858 * Sets up the MSAA related bits in the operation
1859 *
1860 * TPU sample count is read directly from transfer_cmd in the TPU code. An MSAA
1861 * src can be read from sample rate or instance rate shaders as long as the
1862 * sample count is set on the TPU. If a layer is single sample we expect the
1863 * same sample replicated in full rate shaders. If the layer is multi sample,
1864 * instance rate shaders are used to emulate the filter or to select the
1865 * specified sample. The sample number is static in the programs.
1866 */
pvr_msaa_state(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,uint32_t source)1867 static VkResult pvr_msaa_state(const struct pvr_device_info *dev_info,
1868 const struct pvr_transfer_cmd *transfer_cmd,
1869 struct pvr_transfer_3d_state *state,
1870 uint32_t source)
1871 {
1872 struct pvr_tq_shader_properties *shader_props = &state->shader_props;
1873 struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
1874 struct pvr_winsys_transfer_regs *const regs = &state->regs;
1875 uint32_t src_sample_count =
1876 transfer_cmd->sources[source].surface.sample_count & ~1U;
1877 uint32_t dst_sample_count = transfer_cmd->dst.sample_count & ~1U;
1878 uint32_t bsample_count = 0U;
1879
1880 shader_props->full_rate = false;
1881 state->msaa_multiplier = 1U;
1882 state->down_scale = false;
1883
1884 /* clang-format off */
1885 pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg);
1886 /* clang-format on */
1887
1888 layer->sample_count = 1U;
1889 layer->resolve_op = PVR_RESOLVE_BLEND;
1890
1891 bsample_count |= src_sample_count | dst_sample_count;
1892
1893 if (bsample_count > PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 0U))
1894 return vk_error(transfer_cmd->cmd_buffer, VK_ERROR_FORMAT_NOT_SUPPORTED);
1895
1896 /* Shouldn't get two distinct bits set (implies different sample counts).
1897 * The reason being the rate at which the shader runs has to match.
1898 */
1899 if ((bsample_count & (bsample_count - 1U)) != 0U)
1900 return vk_error(transfer_cmd->cmd_buffer, VK_ERROR_FORMAT_NOT_SUPPORTED);
1901
1902 if (src_sample_count == 0U && dst_sample_count == 0U) {
1903 /* S -> S (no MSAA involved). */
1904 layer->msaa = false;
1905 } else if (src_sample_count != 0U && dst_sample_count == 0U) {
1906 /* M -> S (resolve). */
1907 layer->resolve_op = transfer_cmd->sources[source].resolve_op;
1908
1909 if ((uint32_t)layer->resolve_op >=
1910 (src_sample_count + (uint32_t)PVR_RESOLVE_SAMPLE0)) {
1911 return vk_error(transfer_cmd->cmd_buffer,
1912 VK_ERROR_FORMAT_NOT_SUPPORTED);
1913 }
1914
1915 layer->msaa = true;
1916
1917 switch (layer->resolve_op) {
1918 case PVR_RESOLVE_MIN:
1919 case PVR_RESOLVE_MAX:
1920 switch (transfer_cmd->sources[source].surface.vk_format) {
1921 case VK_FORMAT_D32_SFLOAT:
1922 case VK_FORMAT_D16_UNORM:
1923 case VK_FORMAT_S8_UINT:
1924 case VK_FORMAT_D24_UNORM_S8_UINT:
1925 case VK_FORMAT_X8_D24_UNORM_PACK32:
1926 if (transfer_cmd->sources[source].surface.vk_format !=
1927 transfer_cmd->dst.vk_format) {
1928 return vk_error(transfer_cmd->cmd_buffer,
1929 VK_ERROR_FORMAT_NOT_SUPPORTED);
1930 }
1931 break;
1932
1933 default:
1934 return vk_error(transfer_cmd->cmd_buffer,
1935 VK_ERROR_FORMAT_NOT_SUPPORTED);
1936 }
1937
1938 /* Instance rate. */
1939 layer->sample_count = src_sample_count;
1940 state->shader_props.full_rate = false;
1941 break;
1942
1943 case PVR_RESOLVE_BLEND:
1944 if (pvr_int_pbe_pixel_requires_usc_filter(dev_info,
1945 layer->pbe_format)) {
1946 /* Instance rate. */
1947 layer->sample_count = src_sample_count;
1948 state->shader_props.full_rate = false;
1949 } else {
1950 /* Sample rate. */
1951 state->shader_props.full_rate = true;
1952 state->msaa_multiplier = src_sample_count;
1953 state->down_scale = true;
1954
1955 pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg) {
1956 reg.mode = pvr_cr_isp_aa_mode_type(src_sample_count);
1957 }
1958 }
1959 break;
1960
1961 default:
1962 /* Shader doesn't have to know the number of samples. It's enough
1963 * if the TPU knows, and the shader sets the right sno (given to the
1964 * shader in resolve_op).
1965 */
1966 state->shader_props.full_rate = false;
1967 break;
1968 }
1969 } else {
1970 state->msaa_multiplier = dst_sample_count;
1971
1972 pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg) {
1973 reg.mode = pvr_cr_isp_aa_mode_type(dst_sample_count);
1974 }
1975
1976 if (src_sample_count == 0U && dst_sample_count != 0U) {
1977 /* S -> M (replicate samples) */
1978 layer->msaa = false;
1979 state->shader_props.full_rate = !state->shader_props.iterated;
1980 } else {
1981 /* M -> M (sample to sample) */
1982 layer->msaa = true;
1983 state->shader_props.full_rate = true;
1984 }
1985 }
1986
1987 return VK_SUCCESS;
1988 }
1989
pvr_requires_usc_linear_filter(VkFormat format)1990 static bool pvr_requires_usc_linear_filter(VkFormat format)
1991 {
1992 switch (format) {
1993 case VK_FORMAT_R32_SFLOAT:
1994 case VK_FORMAT_R32G32_SFLOAT:
1995 case VK_FORMAT_R32G32B32_SFLOAT:
1996 case VK_FORMAT_R32G32B32A32_SFLOAT:
1997 case VK_FORMAT_D32_SFLOAT:
1998 case VK_FORMAT_D24_UNORM_S8_UINT:
1999 case VK_FORMAT_X8_D24_UNORM_PACK32:
2000 return true;
2001 default:
2002 return false;
2003 }
2004 }
2005
2006 static inline bool
pvr_int_pbe_usc_linear_filter(enum pvr_transfer_pbe_pixel_src pbe_format,bool sample,bool msaa,bool full_rate)2007 pvr_int_pbe_usc_linear_filter(enum pvr_transfer_pbe_pixel_src pbe_format,
2008 bool sample,
2009 bool msaa,
2010 bool full_rate)
2011 {
2012 if (sample || msaa || full_rate)
2013 return false;
2014
2015 switch (pbe_format) {
2016 case PVR_TRANSFER_PBE_PIXEL_SRC_D24S8:
2017 case PVR_TRANSFER_PBE_PIXEL_SRC_S8D24:
2018 case PVR_TRANSFER_PBE_PIXEL_SRC_D32S8:
2019 case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
2020 case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
2021 case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
2022 return true;
2023 default:
2024 return false;
2025 }
2026 }
2027
pvr_pick_component_needed(const struct pvr_transfer_custom_mapping * custom_mapping)2028 static inline bool pvr_pick_component_needed(
2029 const struct pvr_transfer_custom_mapping *custom_mapping)
2030 {
2031 return custom_mapping->pass_count > 0U &&
2032 custom_mapping->texel_extend_dst > 1U &&
2033 custom_mapping->texel_extend_src <= 1U;
2034 }
2035
2036 /** Writes the shader related constants into the DMA space. */
2037 static void
pvr_write_usc_constants(const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t * dma_space)2038 pvr_write_usc_constants(const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
2039 uint32_t *dma_space)
2040 {
2041 const uint32_t reg = sh_reg_layout->driver_total;
2042 const uint32_t consts_count =
2043 sh_reg_layout->compiler_out.usc_constants.count;
2044
2045 /* If not we likely need to write more consts. */
2046 assert(consts_count == sh_reg_layout->compiler_out_total);
2047
2048 /* Append the usc consts after the driver allocated regs. */
2049 for (uint32_t i = 0U; i < consts_count; i++)
2050 dma_space[reg + i] = sh_reg_layout->compiler_out.usc_constants.values[i];
2051 }
2052
2053 static inline void
pvr_dma_texel_unwind(struct pvr_transfer_3d_state * state,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t * mem_ptr)2054 pvr_dma_texel_unwind(struct pvr_transfer_3d_state *state,
2055 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
2056 uint32_t *mem_ptr)
2057
2058 {
2059 const uint32_t coord_sample_mask =
2060 state->custom_mapping.texel_extend_dst - 1U;
2061
2062 mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
2063 coord_sample_mask;
2064 mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
2065 state->custom_mapping.texel_unwind_dst;
2066 }
2067
2068 /** Writes the Uniform/Texture state data segments + the UniTex code. */
2069 static inline VkResult
pvr_pds_unitex(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_pds_pixel_shader_sa_program * program,struct pvr_transfer_prep_data * prep_data)2070 pvr_pds_unitex(const struct pvr_device_info *dev_info,
2071 struct pvr_transfer_ctx *ctx,
2072 const struct pvr_transfer_cmd *transfer_cmd,
2073 struct pvr_pds_pixel_shader_sa_program *program,
2074 struct pvr_transfer_prep_data *prep_data)
2075 {
2076 struct pvr_pds_upload *unitex_code =
2077 &ctx->pds_unitex_code[program->num_texture_dma_kicks]
2078 [program->num_uniform_dma_kicks];
2079 struct pvr_transfer_3d_state *state = &prep_data->state;
2080 struct pvr_suballoc_bo *pvr_bo;
2081 VkResult result;
2082 void *map;
2083
2084 /* Uniform program is not used. */
2085 assert(program->num_uniform_dma_kicks == 0U);
2086
2087 if (program->num_texture_dma_kicks == 0U) {
2088 state->uniform_data_size = 0U;
2089 state->tex_state_data_size = 0U;
2090 state->tex_state_data_offset = 0U;
2091 state->uni_tex_code_offset = 0U;
2092
2093 return VK_SUCCESS;
2094 }
2095
2096 pvr_pds_set_sizes_pixel_shader_sa_uniform_data(program, dev_info);
2097 assert(program->data_size == 0U);
2098 state->uniform_data_size = 0U;
2099
2100 pvr_pds_set_sizes_pixel_shader_sa_texture_data(program, dev_info);
2101 state->tex_state_data_size =
2102 ALIGN_POT(program->data_size,
2103 PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE));
2104
2105 result =
2106 pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
2107 ctx->device->heaps.pds_heap,
2108 PVR_DW_TO_BYTES(state->tex_state_data_size),
2109 &pvr_bo);
2110 if (result != VK_SUCCESS)
2111 return result;
2112
2113 state->tex_state_data_offset =
2114 pvr_bo->dev_addr.addr - ctx->device->heaps.pds_heap->base_addr.addr;
2115
2116 map = pvr_bo_suballoc_get_map_addr(pvr_bo);
2117 pvr_pds_generate_pixel_shader_sa_texture_state_data(program, map, dev_info);
2118
2119 /* Save the dev_addr and size in the 3D state. */
2120 state->uni_tex_code_offset = unitex_code->code_offset;
2121 state->pds_temps = program->temps_used;
2122
2123 return VK_SUCCESS;
2124 }
2125
2126 /** Converts a float in range 0 to 1 to an N-bit fixed-point integer. */
pvr_float_to_ufixed(float value,uint32_t bits)2127 static uint32_t pvr_float_to_ufixed(float value, uint32_t bits)
2128 {
2129 uint32_t max = (1U << bits) - 1U;
2130
2131 /* NaN and Inf and overflow. */
2132 if (util_is_inf_or_nan(value) || value >= 1.0f)
2133 return max;
2134 else if (value < 0.0f)
2135 return 0U;
2136
2137 /* Normalise. */
2138 value = value * (float)max;
2139
2140 /* Cast to double so that we can accurately represent the sum for N > 23. */
2141 return (uint32_t)floor((double)value + 0.5f);
2142 }
2143
2144 /** Converts a float in range -1 to 1 to a signed N-bit fixed-point integer. */
pvr_float_to_sfixed(float value,uint32_t N)2145 static uint32_t pvr_float_to_sfixed(float value, uint32_t N)
2146 {
2147 int32_t max = (1 << (N - 1)) - 1;
2148 int32_t min = 0 - (1 << (N - 1));
2149 union fi x;
2150
2151 /* NaN and Inf and overflow. */
2152 if (util_is_inf_or_nan(value) || value >= 1.0f)
2153 return (uint32_t)max;
2154 else if (value == 0.0f)
2155 return 0U;
2156 else if (value <= -1.0f)
2157 return (uint32_t)min;
2158
2159 /* Normalise. */
2160 value *= (float)max;
2161
2162 /* Cast to double so that we can accurately represent the sum for N > 23. */
2163 if (value > 0.0f)
2164 x.i = (int32_t)floor((double)value + 0.5f);
2165 else
2166 x.i = (int32_t)floor((double)value - 0.5f);
2167
2168 return x.ui;
2169 }
2170
2171 /** Convert a value in IEEE single precision format to 16-bit floating point
2172 * format.
2173 */
2174 /* TODO: See if we can use _mesa_float_to_float16_rtz_slow() instead. */
pvr_float_to_f16(float value,bool round_to_even)2175 static uint16_t pvr_float_to_f16(float value, bool round_to_even)
2176 {
2177 uint32_t input_value;
2178 uint32_t exponent;
2179 uint32_t mantissa;
2180 uint16_t output;
2181
2182 /* 0.0f can be exactly expressed in binary using IEEE float format. */
2183 if (value == 0.0f)
2184 return 0U;
2185
2186 if (value < 0U) {
2187 output = 0x8000;
2188 value = -value;
2189 } else {
2190 output = 0U;
2191 }
2192
2193 /* 2^16 * (2 - 1/1024) = highest f16 representable value. */
2194 value = MIN2(value, 131008);
2195 input_value = fui(value);
2196
2197 /* Extract the exponent and mantissa. */
2198 exponent = util_get_float32_exponent(value) + 15;
2199 mantissa = input_value & ((1 << 23) - 1);
2200
2201 /* If the exponent is outside the supported range then denormalise the
2202 * mantissa.
2203 */
2204 if ((int32_t)exponent <= 0) {
2205 uint32_t shift;
2206
2207 mantissa |= (1 << 23);
2208 exponent = input_value >> 23;
2209 shift = -14 + 127 - exponent;
2210
2211 if (shift < 24)
2212 mantissa >>= shift;
2213 else
2214 mantissa = 0;
2215 } else {
2216 output = (uint16_t)(output | ((exponent << 10) & 0x7C00));
2217 }
2218
2219 output = (uint16_t)(output | (((mantissa >> 13) << 0) & 0x03FF));
2220
2221 if (round_to_even) {
2222 /* Round to nearest even. */
2223 if ((((int)value) % 2 != 0) && (((1 << 13) - 1) & mantissa))
2224 output++;
2225 } else {
2226 /* Round to nearest. */
2227 if (mantissa & (1 << 12))
2228 output++;
2229 }
2230
2231 return output;
2232 }
2233
pvr_pack_clear_color(VkFormat format,const union fi color[static4],uint32_t pkd_color[static4])2234 static VkResult pvr_pack_clear_color(VkFormat format,
2235 const union fi color[static 4],
2236 uint32_t pkd_color[static 4])
2237 {
2238 const uint32_t red_width =
2239 vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0U);
2240 uint32_t pbe_pack_mode = pvr_get_pbe_packmode(format);
2241 const bool pbe_norm = pvr_vk_format_is_fully_normalized(format);
2242
2243 if (pbe_pack_mode == PVRX(PBESTATE_PACKMODE_INVALID))
2244 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
2245
2246 /* Set packed color based on PBE pack mode and PBE norm. */
2247 switch (pbe_pack_mode) {
2248 case PVRX(PBESTATE_PACKMODE_U8U8U8U8):
2249 case PVRX(PBESTATE_PACKMODE_A8R3G3B2):
2250 if (pbe_norm) {
2251 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 8) & 0xFFU;
2252 pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 8) & 0xFFU) << 8;
2253 pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 8) & 0xFFU) << 16;
2254 pkd_color[0] |= (pvr_float_to_ufixed(color[3].f, 8) & 0xFFU) << 24;
2255 } else {
2256 pkd_color[0] = color[0].ui & 0xFFU;
2257 pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2258 pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2259 pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
2260 }
2261 break;
2262
2263 case PVRX(PBESTATE_PACKMODE_S8S8S8S8):
2264 case PVRX(PBESTATE_PACKMODE_X8U8S8S8):
2265 case PVRX(PBESTATE_PACKMODE_X8S8S8U8):
2266 if (pbe_norm) {
2267 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2268 pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2269 pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, false);
2270 pkd_color[1] |= (uint32_t)pvr_float_to_f16(color[3].f, false) << 16;
2271 } else {
2272 pkd_color[0] = color[0].ui & 0xFFU;
2273 pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2274 pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2275 pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
2276 }
2277 break;
2278
2279 case PVRX(PBESTATE_PACKMODE_U16U16U16U16):
2280 if (pbe_norm) {
2281 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2282 pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
2283 pkd_color[1] = pvr_float_to_ufixed(color[2].f, 16) & 0xFFFFU;
2284 pkd_color[1] |= (pvr_float_to_ufixed(color[3].f, 16) & 0xFFFFU) << 16;
2285 } else {
2286 pkd_color[0] = color[0].ui & 0xFFFFU;
2287 pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2288 pkd_color[1] = color[2].ui & 0xFFFFU;
2289 pkd_color[1] |= (color[3].ui & 0xFFFFU) << 16;
2290 }
2291 break;
2292
2293 case PVRX(PBESTATE_PACKMODE_S16S16S16S16):
2294 if (pbe_norm) {
2295 pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2296 pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
2297 pkd_color[1] = (pvr_float_to_sfixed(color[2].f, 16) & 0xFFFFU);
2298 pkd_color[1] |= (pvr_float_to_sfixed(color[3].f, 16) & 0xFFFFU) << 16;
2299 } else {
2300 pkd_color[0] = color[0].ui & 0xFFFFU;
2301 pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2302 pkd_color[1] = color[2].ui & 0xFFFFU;
2303 pkd_color[1] |= (color[3].ui & 0xFFFFU) << 16;
2304 }
2305 break;
2306
2307 case PVRX(PBESTATE_PACKMODE_A2_XRBIAS_U10U10U10):
2308 case PVRX(PBESTATE_PACKMODE_ARGBV16_XR10):
2309 case PVRX(PBESTATE_PACKMODE_F16F16F16F16):
2310 case PVRX(PBESTATE_PACKMODE_A2R10B10G10):
2311 case PVRX(PBESTATE_PACKMODE_A4R4G4B4):
2312 case PVRX(PBESTATE_PACKMODE_A1R5G5B5):
2313 case PVRX(PBESTATE_PACKMODE_R5G5B5A1):
2314 case PVRX(PBESTATE_PACKMODE_R5G6B5):
2315 if (red_width > 0) {
2316 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2317 pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2318 pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, false);
2319 pkd_color[1] |= (uint32_t)pvr_float_to_f16(color[3].f, false) << 16;
2320 } else {
2321 /* Swizzle only uses first channel for alpha formats. */
2322 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[3].f, false);
2323 }
2324 break;
2325
2326 case PVRX(PBESTATE_PACKMODE_U32U32U32U32):
2327 pkd_color[0] = color[0].ui;
2328 pkd_color[1] = color[1].ui;
2329 pkd_color[2] = color[2].ui;
2330 pkd_color[3] = color[3].ui;
2331 break;
2332
2333 case PVRX(PBESTATE_PACKMODE_S32S32S32S32):
2334 pkd_color[0] = (uint32_t)color[0].i;
2335 pkd_color[1] = (uint32_t)color[1].i;
2336 pkd_color[2] = (uint32_t)color[2].i;
2337 pkd_color[3] = (uint32_t)color[3].i;
2338 break;
2339
2340 case PVRX(PBESTATE_PACKMODE_F32F32F32F32):
2341 memcpy(pkd_color, &color[0].f, 4U * sizeof(float));
2342 break;
2343
2344 case PVRX(PBESTATE_PACKMODE_R10B10G10A2):
2345 if (pbe_norm) {
2346 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 10) & 0xFFU;
2347 pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 10) & 0xFFU) << 10;
2348 pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 10) & 0xFFU) << 20;
2349 pkd_color[0] |= (pvr_float_to_ufixed(color[3].f, 2) & 0xFFU) << 30;
2350 } else if (format == VK_FORMAT_A2R10G10B10_UINT_PACK32) {
2351 pkd_color[0] = color[2].ui & 0x3FFU;
2352 pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
2353 pkd_color[0] |= (color[0].ui & 0x3FFU) << 20;
2354 pkd_color[0] |= (color[3].ui & 0x3U) << 30;
2355 } else {
2356 pkd_color[0] = color[0].ui & 0x3FFU;
2357 pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
2358 pkd_color[0] |= (color[2].ui & 0x3FFU) << 20;
2359 pkd_color[0] |= (color[3].ui & 0x3U) << 30;
2360 }
2361
2362 break;
2363
2364 case PVRX(PBESTATE_PACKMODE_A2F10F10F10):
2365 case PVRX(PBESTATE_PACKMODE_F10F10F10A2):
2366 pkd_color[0] = pvr_float_to_sfixed(color[0].f, 10) & 0xFFU;
2367 pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 10) & 0xFFU) << 10;
2368 pkd_color[0] |= (pvr_float_to_sfixed(color[2].f, 10) & 0xFFU) << 20;
2369 pkd_color[0] |= (pvr_float_to_sfixed(color[3].f, 2) & 0xFFU) << 30;
2370 break;
2371
2372 case PVRX(PBESTATE_PACKMODE_U8U8U8):
2373 case PVRX(PBESTATE_PACKMODE_R5SG5SB6):
2374 if (pbe_norm) {
2375 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 8) & 0xFFU;
2376 pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 8) & 0xFFU) << 8;
2377 pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 8) & 0xFFU) << 16;
2378 } else {
2379 pkd_color[0] = color[0].ui & 0xFFU;
2380 pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2381 pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2382 }
2383 break;
2384
2385 case PVRX(PBESTATE_PACKMODE_S8S8S8):
2386 case PVRX(PBESTATE_PACKMODE_B6G5SR5S):
2387 if (pbe_norm) {
2388 pkd_color[0] = pvr_float_to_sfixed(color[0].f, 8) & 0xFFU;
2389 pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 8) & 0xFFU) << 8;
2390 pkd_color[0] |= (pvr_float_to_sfixed(color[2].f, 8) & 0xFFU) << 16;
2391 } else {
2392 pkd_color[0] = color[0].ui & 0xFFU;
2393 pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2394 pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2395 }
2396 break;
2397
2398 case PVRX(PBESTATE_PACKMODE_U16U16U16):
2399 if (pbe_norm) {
2400 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2401 pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
2402 pkd_color[1] = (pvr_float_to_ufixed(color[2].f, 16) & 0xFFFFU);
2403 } else {
2404 pkd_color[0] = color[0].ui & 0xFFFFU;
2405 pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2406 pkd_color[1] = color[2].ui & 0xFFFFU;
2407 }
2408 break;
2409
2410 case PVRX(PBESTATE_PACKMODE_S16S16S16):
2411 if (pbe_norm) {
2412 pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2413 pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
2414 pkd_color[1] = pvr_float_to_sfixed(color[2].f, 16) & 0xFFFFU;
2415 } else {
2416 pkd_color[0] = color[0].ui & 0xFFFFU;
2417 pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2418 pkd_color[1] = color[2].ui & 0xFFFFU;
2419 }
2420 break;
2421
2422 case PVRX(PBESTATE_PACKMODE_F16F16F16):
2423 case PVRX(PBESTATE_PACKMODE_F11F11F10):
2424 case PVRX(PBESTATE_PACKMODE_F10F11F11):
2425 case PVRX(PBESTATE_PACKMODE_SE9995):
2426 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
2427 pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, true) << 16;
2428 pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, true);
2429 break;
2430
2431 case PVRX(PBESTATE_PACKMODE_U32U32U32):
2432 pkd_color[0] = color[0].ui;
2433 pkd_color[1] = color[1].ui;
2434 pkd_color[2] = color[2].ui;
2435 break;
2436
2437 case PVRX(PBESTATE_PACKMODE_S32S32S32):
2438 pkd_color[0] = (uint32_t)color[0].i;
2439 pkd_color[1] = (uint32_t)color[1].i;
2440 pkd_color[2] = (uint32_t)color[2].i;
2441 break;
2442
2443 case PVRX(PBESTATE_PACKMODE_X24G8X32):
2444 case PVRX(PBESTATE_PACKMODE_U8X24):
2445 pkd_color[1] = (color[1].ui & 0xFFU) << 24;
2446 break;
2447
2448 case PVRX(PBESTATE_PACKMODE_F32F32F32):
2449 memcpy(pkd_color, &color[0].f, 3U * sizeof(float));
2450 break;
2451
2452 case PVRX(PBESTATE_PACKMODE_U8U8):
2453 if (pbe_norm) {
2454 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2455 pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2456 } else {
2457 pkd_color[0] = color[0].ui & 0xFFU;
2458 pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2459 }
2460 break;
2461
2462 case PVRX(PBESTATE_PACKMODE_S8S8):
2463 if (pbe_norm) {
2464 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2465 pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2466 } else {
2467 pkd_color[0] = color[0].ui & 0xFFU;
2468 pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2469 pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2470 pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
2471 }
2472 break;
2473
2474 case PVRX(PBESTATE_PACKMODE_U16U16):
2475 if (pbe_norm) {
2476 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2477 pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
2478 } else {
2479 pkd_color[0] = color[0].ui & 0xFFFFU;
2480 pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2481 }
2482 break;
2483
2484 case PVRX(PBESTATE_PACKMODE_S16S16):
2485 if (pbe_norm) {
2486 pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2487 pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
2488 } else {
2489 pkd_color[0] = color[0].ui & 0xFFFFU;
2490 pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2491 }
2492 break;
2493
2494 case PVRX(PBESTATE_PACKMODE_F16F16):
2495 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
2496 pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, true) << 16;
2497 break;
2498
2499 case PVRX(PBESTATE_PACKMODE_U32U32):
2500 pkd_color[0] = color[0].ui;
2501 pkd_color[1] = color[1].ui;
2502 break;
2503
2504 case PVRX(PBESTATE_PACKMODE_S32S32):
2505 pkd_color[0] = (uint32_t)color[0].i;
2506 pkd_color[1] = (uint32_t)color[1].i;
2507 break;
2508
2509 case PVRX(PBESTATE_PACKMODE_X24U8F32):
2510 case PVRX(PBESTATE_PACKMODE_X24X8F32):
2511 memcpy(pkd_color, &color[0].f, 1U * sizeof(float));
2512 pkd_color[1] = color[1].ui & 0xFFU;
2513 break;
2514
2515 case PVRX(PBESTATE_PACKMODE_F32F32):
2516 memcpy(pkd_color, &color[0].f, 2U * sizeof(float));
2517 break;
2518
2519 case PVRX(PBESTATE_PACKMODE_ST8U24):
2520 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2521 pkd_color[0] |= color[1].ui << 24;
2522 break;
2523
2524 case PVRX(PBESTATE_PACKMODE_U8):
2525 if (format == VK_FORMAT_S8_UINT)
2526 pkd_color[0] = color[1].ui & 0xFFU;
2527 else if (pbe_norm)
2528 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2529 else
2530 pkd_color[0] = color[0].ui & 0xFFU;
2531
2532 break;
2533
2534 case PVRX(PBESTATE_PACKMODE_S8):
2535 if (pbe_norm)
2536 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2537 else
2538 pkd_color[0] = color[0].ui & 0xFFU;
2539 break;
2540
2541 case PVRX(PBESTATE_PACKMODE_U16):
2542 if (pbe_norm)
2543 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2544 else
2545 pkd_color[0] = color[0].ui & 0xFFFFU;
2546 break;
2547
2548 case PVRX(PBESTATE_PACKMODE_S16):
2549 if (pbe_norm)
2550 pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2551 else
2552 pkd_color[0] = color[0].ui & 0xFFFFU;
2553 break;
2554
2555 case PVRX(PBESTATE_PACKMODE_F16):
2556 pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
2557 break;
2558
2559 /* U32 */
2560 case PVRX(PBESTATE_PACKMODE_U32):
2561 if (format == VK_FORMAT_X8_D24_UNORM_PACK32) {
2562 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2563 } else if (format == VK_FORMAT_D24_UNORM_S8_UINT) {
2564 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2565 pkd_color[0] |= (color[1].ui & 0xFFU) << 24;
2566 } else if (format == VK_FORMAT_A2B10G10R10_UINT_PACK32) {
2567 pkd_color[0] = color[0].ui & 0x3FFU;
2568 pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
2569 pkd_color[0] |= (color[2].ui & 0x3FFU) << 20;
2570 pkd_color[0] |= (color[3].ui & 0x3U) << 30;
2571 } else {
2572 pkd_color[0] = color[0].ui;
2573 }
2574 break;
2575
2576 /* U24ST8 */
2577 case PVRX(PBESTATE_PACKMODE_U24ST8):
2578 pkd_color[1] = (color[1].ui & 0xFFU) << 24;
2579 pkd_color[1] |= pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2580 break;
2581
2582 /* S32 */
2583 case PVRX(PBESTATE_PACKMODE_S32):
2584 pkd_color[0] = (uint32_t)color[0].i;
2585 break;
2586
2587 /* F32 */
2588 case PVRX(PBESTATE_PACKMODE_F32):
2589 memcpy(pkd_color, &color[0].f, sizeof(float));
2590 break;
2591
2592 /* X8U24 */
2593 case PVRX(PBESTATE_PACKMODE_X8U24):
2594 pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2595 break;
2596
2597 default:
2598 break;
2599 }
2600
2601 return VK_SUCCESS;
2602 }
2603
2604 static VkResult
pvr_isp_scan_direction(struct pvr_transfer_cmd * transfer_cmd,bool custom_mapping,enum PVRX (CR_DIR_TYPE)* const dir_type_out)2605 pvr_isp_scan_direction(struct pvr_transfer_cmd *transfer_cmd,
2606 bool custom_mapping,
2607 enum PVRX(CR_DIR_TYPE) *const dir_type_out)
2608 {
2609 pvr_dev_addr_t dst_dev_addr = transfer_cmd->dst.dev_addr;
2610 bool backwards_in_x = false;
2611 bool backwards_in_y = false;
2612 bool done_dest_rect = false;
2613 VkRect2D dst_rect;
2614 int32_t dst_x1;
2615 int32_t dst_y1;
2616
2617 for (uint32_t i = 0; i < transfer_cmd->source_count; i++) {
2618 struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[i];
2619 pvr_dev_addr_t src_dev_addr = src->surface.dev_addr;
2620
2621 if (src_dev_addr.addr == dst_dev_addr.addr && !custom_mapping) {
2622 VkRect2D *src_rect = &src->mappings[0].src_rect;
2623 int32_t src_x1 = src_rect->offset.x + src_rect->extent.width;
2624 int32_t src_y1 = src_rect->offset.y + src_rect->extent.height;
2625
2626 if (!done_dest_rect) {
2627 dst_rect = src->mappings[0].dst_rect;
2628
2629 dst_x1 = dst_rect.offset.x + dst_rect.extent.width;
2630 dst_y1 = dst_rect.offset.y + dst_rect.extent.height;
2631
2632 done_dest_rect = true;
2633 }
2634
2635 if ((dst_rect.offset.x < src_x1 && dst_x1 > src_rect->offset.x) &&
2636 (dst_rect.offset.y < src_y1 && dst_y1 > src_rect->offset.y)) {
2637 if (src_rect->extent.width != dst_rect.extent.width ||
2638 src_rect->extent.height != dst_rect.extent.height) {
2639 /* Scaling is not possible. */
2640 return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
2641 }
2642
2643 /* Direction is to the right. */
2644 backwards_in_x = dst_rect.offset.x > src_rect->offset.x;
2645
2646 /* Direction is to the bottom. */
2647 backwards_in_y = dst_rect.offset.y > src_rect->offset.y;
2648 }
2649 }
2650 }
2651
2652 if (backwards_in_x) {
2653 if (backwards_in_y)
2654 *dir_type_out = PVRX(CR_DIR_TYPE_BR2TL);
2655 else
2656 *dir_type_out = PVRX(CR_DIR_TYPE_TR2BL);
2657 } else {
2658 if (backwards_in_y)
2659 *dir_type_out = PVRX(CR_DIR_TYPE_BL2TR);
2660 else
2661 *dir_type_out = PVRX(CR_DIR_TYPE_TL2BR);
2662 }
2663
2664 return VK_SUCCESS;
2665 }
2666
pvr_3d_copy_blit_core(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)2667 static VkResult pvr_3d_copy_blit_core(struct pvr_transfer_ctx *ctx,
2668 struct pvr_transfer_cmd *transfer_cmd,
2669 struct pvr_transfer_prep_data *prep_data,
2670 uint32_t pass_idx,
2671 bool *finished_out)
2672 {
2673 struct pvr_transfer_3d_state *const state = &prep_data->state;
2674 struct pvr_winsys_transfer_regs *const regs = &state->regs;
2675 struct pvr_device *const device = ctx->device;
2676 const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
2677
2678 VkResult result;
2679
2680 *finished_out = true;
2681
2682 state->common_ptr = 0U;
2683 state->dynamic_const_reg_ptr = 0U;
2684 state->usc_const_reg_ptr = 0U;
2685
2686 if ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U) {
2687 uint32_t packed_color[4U] = { 0U };
2688
2689 if (transfer_cmd->source_count != 0U)
2690 return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
2691
2692 if (vk_format_is_compressed(transfer_cmd->dst.vk_format))
2693 return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
2694
2695 /* No shader. */
2696 state->pds_temps = 0U;
2697 state->uniform_data_size = 0U;
2698 state->tex_state_data_size = 0U;
2699
2700 /* No background enabled. */
2701 /* clang-format off */
2702 pvr_csb_pack (®s->isp_bgobjvals, CR_ISP_BGOBJVALS, reg);
2703 /* clang-format on */
2704 pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg) {
2705 reg.mode = pvr_cr_isp_aa_mode_type(transfer_cmd->dst.sample_count);
2706 }
2707
2708 result = pvr_pack_clear_color(transfer_cmd->dst.vk_format,
2709 transfer_cmd->clear_color,
2710 packed_color);
2711 if (result != VK_SUCCESS)
2712 return result;
2713
2714 pvr_csb_pack (®s->usc_clear_register0, CR_USC_CLEAR_REGISTER, reg) {
2715 reg.val = packed_color[0U];
2716 }
2717
2718 pvr_csb_pack (®s->usc_clear_register1, CR_USC_CLEAR_REGISTER, reg) {
2719 reg.val = packed_color[1U];
2720 }
2721
2722 pvr_csb_pack (®s->usc_clear_register2, CR_USC_CLEAR_REGISTER, reg) {
2723 reg.val = packed_color[2U];
2724 }
2725
2726 pvr_csb_pack (®s->usc_clear_register3, CR_USC_CLEAR_REGISTER, reg) {
2727 reg.val = packed_color[3U];
2728 }
2729
2730 state->msaa_multiplier = transfer_cmd->dst.sample_count & ~1U;
2731 state->pds_shader_task_offset = 0U;
2732 state->uni_tex_code_offset = 0U;
2733 state->tex_state_data_offset = 0U;
2734 } else if (transfer_cmd->source_count > 0U) {
2735 const struct pvr_tq_frag_sh_reg_layout nop_sh_reg_layout = {
2736 /* TODO: Setting this to 1 so that we don't try to pvr_bo_alloc() with
2737 * zero size. The device will ignore the PDS program if USC_SHAREDSIZE
2738 * is zero and in the case of the nop shader we're expecting it to be
2739 * zero. See if we can safely pass PVR_DEV_ADDR_INVALID for the unitex
2740 * program.
2741 */
2742 .driver_total = 1,
2743 };
2744 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout;
2745 struct pvr_pds_pixel_shader_sa_program unitex_prog = { 0U };
2746 uint32_t tex_state_dma_size_dw;
2747 struct pvr_suballoc_bo *pvr_bo;
2748 uint32_t *dma_space;
2749
2750 result = pvr_pbe_src_format(transfer_cmd, state, &state->shader_props);
2751 if (result != VK_SUCCESS)
2752 return result;
2753
2754 pvr_uv_space(dev_info, transfer_cmd, state);
2755
2756 state->shader_props.iterated = false;
2757
2758 state->shader_props.layer_props.sample =
2759 transfer_cmd->sources[0].surface.mem_layout ==
2760 PVR_MEMLAYOUT_3DTWIDDLED;
2761
2762 result = pvr_msaa_state(dev_info, transfer_cmd, state, 0);
2763 if (result != VK_SUCCESS)
2764 return result;
2765
2766 state->shader_props.pick_component =
2767 pvr_pick_component_needed(&state->custom_mapping);
2768
2769 if (state->filter[0] == PVR_FILTER_LINEAR &&
2770 pvr_requires_usc_linear_filter(
2771 transfer_cmd->sources[0].surface.vk_format)) {
2772 if (pvr_int_pbe_usc_linear_filter(
2773 state->shader_props.layer_props.pbe_format,
2774 state->shader_props.layer_props.sample,
2775 state->shader_props.layer_props.msaa,
2776 state->shader_props.full_rate)) {
2777 state->shader_props.layer_props.linear = true;
2778 } else {
2779 mesa_logw("Transfer: F32 linear filter not supported.");
2780 }
2781 }
2782
2783 if (state->empty_dst) {
2784 sh_reg_layout = &nop_sh_reg_layout;
2785 state->pds_shader_task_offset = device->nop_program.pds.data_offset;
2786 } else {
2787 pvr_dev_addr_t kick_usc_pds_dev_addr;
2788
2789 result =
2790 pvr_transfer_frag_store_get_shader_info(device,
2791 &ctx->frag_store,
2792 &state->shader_props,
2793 &kick_usc_pds_dev_addr,
2794 &sh_reg_layout);
2795 if (result != VK_SUCCESS)
2796 return result;
2797
2798 assert(kick_usc_pds_dev_addr.addr <= UINT32_MAX);
2799 state->pds_shader_task_offset = (uint32_t)kick_usc_pds_dev_addr.addr;
2800 }
2801
2802 unitex_prog.kick_usc = false;
2803 unitex_prog.clear = false;
2804
2805 tex_state_dma_size_dw =
2806 sh_reg_layout->driver_total + sh_reg_layout->compiler_out_total;
2807
2808 unitex_prog.num_texture_dma_kicks = 1U;
2809 unitex_prog.num_uniform_dma_kicks = 0U;
2810
2811 result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
2812 device->heaps.general_heap,
2813 PVR_DW_TO_BYTES(tex_state_dma_size_dw),
2814 &pvr_bo);
2815 if (result != VK_SUCCESS)
2816 return result;
2817
2818 dma_space = (uint32_t *)pvr_bo_suballoc_get_map_addr(pvr_bo);
2819
2820 result = pvr_sampler_image_state(ctx,
2821 transfer_cmd,
2822 sh_reg_layout,
2823 state,
2824 dma_space);
2825 if (result != VK_SUCCESS)
2826 return result;
2827
2828 pvr_dma_texture_floats(transfer_cmd, state, sh_reg_layout, dma_space);
2829
2830 if (transfer_cmd->sources[0].surface.mem_layout ==
2831 PVR_MEMLAYOUT_3DTWIDDLED) {
2832 dma_space[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
2833 fui(transfer_cmd->sources[0].surface.z_position);
2834 }
2835
2836 pvr_write_usc_constants(sh_reg_layout, dma_space);
2837
2838 if (pvr_pick_component_needed(&state->custom_mapping))
2839 pvr_dma_texel_unwind(state, sh_reg_layout, dma_space);
2840
2841 pvr_pds_encode_dma_burst(unitex_prog.texture_dma_control,
2842 unitex_prog.texture_dma_address,
2843 state->common_ptr,
2844 tex_state_dma_size_dw,
2845 pvr_bo->dev_addr.addr,
2846 true,
2847 dev_info);
2848
2849 state->common_ptr += tex_state_dma_size_dw;
2850
2851 result =
2852 pvr_pds_unitex(dev_info, ctx, transfer_cmd, &unitex_prog, prep_data);
2853 if (result != VK_SUCCESS)
2854 return result;
2855
2856 pvr_csb_pack (®s->isp_bgobjvals, CR_ISP_BGOBJVALS, reg) {
2857 reg.enablebgtag = true;
2858 }
2859
2860 /* clang-format off */
2861 pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg);
2862 /* clang-format on */
2863 } else {
2864 /* No shader. */
2865 state->pds_temps = 0U;
2866 state->uniform_data_size = 0U;
2867 state->tex_state_data_size = 0U;
2868
2869 /* No background enabled. */
2870 /* clang-format off */
2871 pvr_csb_pack (®s->isp_bgobjvals, CR_ISP_BGOBJVALS, reg);
2872 /* clang-format on */
2873 pvr_csb_pack (®s->isp_aa, CR_ISP_AA, reg) {
2874 reg.mode = pvr_cr_isp_aa_mode_type(transfer_cmd->dst.sample_count);
2875 }
2876 state->msaa_multiplier = transfer_cmd->dst.sample_count & ~1U;
2877 state->pds_shader_task_offset = 0U;
2878 state->uni_tex_code_offset = 0U;
2879 state->tex_state_data_offset = 0U;
2880
2881 result = pvr_pbe_src_format(transfer_cmd, state, &state->shader_props);
2882 if (result != VK_SUCCESS)
2883 return result;
2884 }
2885
2886 pvr_setup_hwbg_object(dev_info, state);
2887
2888 pvr_csb_pack (®s->isp_render, CR_ISP_RENDER, reg) {
2889 reg.mode_type = PVRX(CR_ISP_RENDER_MODE_TYPE_FAST_SCALE);
2890
2891 result = pvr_isp_scan_direction(transfer_cmd,
2892 state->custom_mapping.pass_count,
2893 ®.dir_type);
2894 if (result != VK_SUCCESS)
2895 return result;
2896 }
2897
2898 /* Set up pixel event handling. */
2899 result = pvr_pbe_setup(transfer_cmd, ctx, state);
2900 if (result != VK_SUCCESS)
2901 return result;
2902
2903 result = pvr_isp_tiles(device, state);
2904 if (result != VK_SUCCESS)
2905 return result;
2906
2907 if (PVR_HAS_FEATURE(&device->pdevice->dev_info, gpu_multicore_support)) {
2908 pvr_csb_pack (®s->frag_screen, CR_FRAG_SCREEN, reg) {
2909 reg.xmax = transfer_cmd->dst.width - 1;
2910 reg.ymax = transfer_cmd->dst.height - 1;
2911 }
2912 }
2913
2914 if ((pass_idx + 1U) < state->custom_mapping.pass_count)
2915 *finished_out = false;
2916
2917 return VK_SUCCESS;
2918 }
2919
2920 static VkResult
pvr_pbe_src_format_f2d(uint32_t merge_flags,struct pvr_transfer_cmd_source * src,VkFormat dst_format,bool down_scale,bool dont_force_pbe,enum pvr_transfer_pbe_pixel_src * pixel_format_out)2921 pvr_pbe_src_format_f2d(uint32_t merge_flags,
2922 struct pvr_transfer_cmd_source *src,
2923 VkFormat dst_format,
2924 bool down_scale,
2925 bool dont_force_pbe,
2926 enum pvr_transfer_pbe_pixel_src *pixel_format_out)
2927 {
2928 VkFormat src_format = src->surface.vk_format;
2929
2930 /* This has to come before the rest as S8 for instance is integer and
2931 * signedsess check fails on D24S8.
2932 */
2933 if (vk_format_is_depth_or_stencil(src_format) ||
2934 vk_format_is_depth_or_stencil(dst_format) ||
2935 merge_flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
2936 return pvr_pbe_src_format_ds(&src->surface,
2937 src->filter,
2938 dst_format,
2939 merge_flags,
2940 down_scale,
2941 pixel_format_out);
2942 }
2943
2944 return pvr_pbe_src_format_normal(src_format,
2945 dst_format,
2946 down_scale,
2947 dont_force_pbe,
2948 pixel_format_out);
2949 }
2950
2951 /** Writes the coefficient loading PDS task. */
2952 static inline VkResult
pvr_pds_coeff_task(struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const bool sample_3d,struct pvr_transfer_prep_data * prep_data)2953 pvr_pds_coeff_task(struct pvr_transfer_ctx *ctx,
2954 const struct pvr_transfer_cmd *transfer_cmd,
2955 const bool sample_3d,
2956 struct pvr_transfer_prep_data *prep_data)
2957 {
2958 struct pvr_transfer_3d_state *state = &prep_data->state;
2959 struct pvr_pds_coeff_loading_program program = { 0U };
2960 struct pvr_suballoc_bo *pvr_bo;
2961 VkResult result;
2962
2963 program.num_fpu_iterators = 1U;
2964
2965 pvr_csb_pack (&program.FPU_iterators[0U],
2966 PDSINST_DOUT_FIELDS_DOUTI_SRC,
2967 reg) {
2968 if (sample_3d)
2969 reg.size = PVRX(PDSINST_DOUTI_SIZE_3D);
2970 else
2971 reg.size = PVRX(PDSINST_DOUTI_SIZE_2D);
2972
2973 reg.perspective = false;
2974
2975 /* Varying wrap on the TSP means that the TSP chooses the shorter path
2976 * out of the normal and the wrapping path i.e. chooses between u0->u1
2977 * and u1->1.0 == 0.0 -> u0. We don't need this behavior.
2978 */
2979 /*
2980 * if RHW ever needed offset SRC_F32 to the first U in 16 bit units
2981 * l0 U <= offs 0
2982 * l0 V
2983 * l1 U <= offs 4
2984 * ...
2985 */
2986 reg.shademodel = PVRX(PDSINST_DOUTI_SHADEMODEL_GOURUAD);
2987 reg.f32_offset = 0U;
2988 }
2989
2990 if (sample_3d)
2991 state->usc_coeff_regs = 12U;
2992 else
2993 state->usc_coeff_regs = 8U;
2994
2995 pvr_pds_set_sizes_coeff_loading(&program);
2996
2997 result = pvr_cmd_buffer_alloc_mem(
2998 transfer_cmd->cmd_buffer,
2999 ctx->device->heaps.pds_heap,
3000 PVR_DW_TO_BYTES(program.data_size + program.code_size),
3001 &pvr_bo);
3002 if (result != VK_SUCCESS)
3003 return result;
3004
3005 state->pds_coeff_task_offset =
3006 pvr_bo->dev_addr.addr - ctx->device->heaps.pds_heap->base_addr.addr;
3007
3008 pvr_pds_generate_coeff_loading_program(&program,
3009 pvr_bo_suballoc_get_map_addr(pvr_bo));
3010
3011 state->coeff_data_size = program.data_size;
3012 state->pds_temps = program.temps_used;
3013
3014 return VK_SUCCESS;
3015 }
3016
3017 #define X 0U
3018 #define Y 1U
3019 #define Z 2U
3020
pvr_tsp_floats(const struct pvr_device_info * dev_info,VkRect2D * rect,const float recips[3U],bool custom_filter,bool z_present,float z_value,struct pvr_transfer_3d_iteration * layer)3021 static void pvr_tsp_floats(const struct pvr_device_info *dev_info,
3022 VkRect2D *rect,
3023 const float recips[3U],
3024 bool custom_filter,
3025 bool z_present,
3026 float z_value,
3027 struct pvr_transfer_3d_iteration *layer)
3028 {
3029 #define U0 0U
3030 #define U1 1U
3031 #define V0 2U
3032 #define V1 3U
3033
3034 const uint32_t indices[8U] = { U0, V0, U0, V1, U1, V1, U1, V0 };
3035 float delta[2U] = { 0.0f, 0.0f };
3036 int32_t non_normalized[4U];
3037 uint32_t src_flipped[2U];
3038 uint32_t normalized[4U];
3039 int32_t src_span[2U];
3040
3041 non_normalized[U0] = rect->offset.x;
3042 non_normalized[U1] = rect->offset.x + rect->extent.width;
3043 non_normalized[V0] = rect->offset.y;
3044 non_normalized[V1] = rect->offset.y + rect->extent.height;
3045
3046 /* Filter adjust. */
3047 src_span[X] = rect->extent.width;
3048 src_flipped[X] = src_span[X] > 0U ? 0U : 1U;
3049 src_span[Y] = rect->extent.height;
3050 src_flipped[Y] = src_span[Y] > 0U ? 0U : 1U;
3051 /*
3052 * | X | Y | srcFlipX | srcFlipY |
3053 * +----+----+----------+----------|
3054 * | X | Y | 0 | 0 |
3055 * | -X | Y | 1 | 0 |
3056 * | X | -Y | 0 | 1 |
3057 * | -X | -Y | 1 | 1 |
3058 */
3059 for (uint32_t i = X; i <= Y; i++) {
3060 if (custom_filter) {
3061 if (src_flipped[i] != 0U)
3062 delta[i] += 0.25;
3063 else
3064 delta[i] -= 0.25;
3065 }
3066 }
3067
3068 /* Normalize. */
3069 for (uint32_t i = 0U; i < ARRAY_SIZE(normalized); i++) {
3070 uint32_t tmp;
3071 float ftmp;
3072
3073 ftmp = (float)non_normalized[i] + delta[i >> 1U];
3074 ftmp *= recips[i >> 1U];
3075
3076 tmp = fui(ftmp);
3077 if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3078 tmp = XXH_rotl32(tmp, 1U);
3079
3080 normalized[i] = tmp;
3081 }
3082
3083 /* Apply indices. */
3084 for (uint32_t i = 0U; i < 8U; i++)
3085 layer->texture_coords[i] = normalized[indices[i]];
3086
3087 if (z_present) {
3088 uint32_t tmp = fui(z_value * recips[2U]);
3089
3090 if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3091 tmp = XXH_rotl32(tmp, 1U);
3092
3093 for (uint32_t i = 8U; i < 12U; i++)
3094 layer->texture_coords[i] = tmp;
3095 }
3096
3097 #undef U0
3098 #undef U1
3099 #undef V0
3100 #undef V1
3101 }
3102
3103 static void
pvr_isp_prim_block_tsp_vertex_block(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd_source * src,struct pvr_rect_mapping * mappings,bool custom_filter,uint32_t num_mappings,uint32_t mapping_offset,uint32_t tsp_comp_format_in_dw,uint32_t ** const cs_ptr_out)3104 pvr_isp_prim_block_tsp_vertex_block(const struct pvr_device_info *dev_info,
3105 const struct pvr_transfer_cmd_source *src,
3106 struct pvr_rect_mapping *mappings,
3107 bool custom_filter,
3108 uint32_t num_mappings,
3109 uint32_t mapping_offset,
3110 uint32_t tsp_comp_format_in_dw,
3111 uint32_t **const cs_ptr_out)
3112 {
3113 struct pvr_transfer_3d_iteration layer;
3114 uint32_t *cs_ptr = *cs_ptr_out;
3115
3116 /* |<-32b->|
3117 * +-------+-----
3118 * | RHW | | X num_isp_vertices
3119 * +-------+-- |
3120 * | U | | |
3121 * | V | | X PVR_TRANSFER_NUM_LAYERS
3122 * +-------+-----
3123 *
3124 * RHW is not there any more in the Transfer. The comment still explains
3125 * where it should go if ever needed.
3126 */
3127 for (uint32_t i = mapping_offset; i < mapping_offset + num_mappings; i++) {
3128 bool z_present = src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED;
3129 const float recips[3U] = {
3130 [X] = 1.0f / (float)src->surface.width,
3131 [Y] = 1.0f / (float)src->surface.height,
3132 [Z] = z_present ? 1.0f / (float)src->surface.depth : 0.0f,
3133 };
3134 float z_pos = (src->filter < PVR_FILTER_LINEAR)
3135 ? floor(src->surface.z_position + 0.5f)
3136 : src->surface.z_position;
3137
3138 pvr_tsp_floats(dev_info,
3139 &mappings[i].src_rect,
3140 recips,
3141 custom_filter,
3142 z_present,
3143 z_pos,
3144 &layer);
3145
3146 /* We request UVs from TSP for ISP triangle:
3147 * 0 u 1
3148 * +---,
3149 * v| /|
3150 * | / |
3151 * 2'/--'3
3152 */
3153 for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3154 *cs_ptr++ = layer.texture_coords[0U];
3155 *cs_ptr++ = layer.texture_coords[1U];
3156 }
3157
3158 if (z_present) {
3159 *cs_ptr++ = layer.texture_coords[8U];
3160 *cs_ptr++ = 0U;
3161 }
3162
3163 for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3164 *cs_ptr++ = layer.texture_coords[6U];
3165 *cs_ptr++ = layer.texture_coords[7U];
3166 }
3167
3168 if (z_present) {
3169 *cs_ptr++ = layer.texture_coords[11U];
3170 *cs_ptr++ = 0U;
3171 }
3172
3173 for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3174 *cs_ptr++ = layer.texture_coords[2U];
3175 *cs_ptr++ = layer.texture_coords[3U];
3176 }
3177
3178 if (z_present) {
3179 *cs_ptr++ = layer.texture_coords[9U];
3180 *cs_ptr++ = 0U;
3181 }
3182
3183 for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3184 *cs_ptr++ = layer.texture_coords[4U];
3185 *cs_ptr++ = layer.texture_coords[5U];
3186 }
3187
3188 if (z_present) {
3189 *cs_ptr++ = layer.texture_coords[10U];
3190 *cs_ptr++ = 0U;
3191 }
3192 }
3193
3194 if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3195 /* Skipped optional primitive id. */
3196 for (uint32_t i = 0U; i < tsp_comp_format_in_dw; i++)
3197 *cs_ptr++ = 0x88888888U;
3198 } else {
3199 /* Align back to 64 bits. */
3200 if (((uintptr_t)cs_ptr & 7U) != 0U)
3201 cs_ptr++;
3202 }
3203
3204 *cs_ptr_out = cs_ptr;
3205 }
3206
3207 #undef X
3208 #undef Y
3209 #undef Z
3210
pvr_isp_prim_block_pds_state(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,struct pvr_transfer_3d_state * state,uint32_t ** const cs_ptr_out)3211 static void pvr_isp_prim_block_pds_state(const struct pvr_device_info *dev_info,
3212 struct pvr_transfer_ctx *ctx,
3213 struct pvr_transfer_3d_state *state,
3214 uint32_t **const cs_ptr_out)
3215 {
3216 uint32_t *cs_ptr = *cs_ptr_out;
3217
3218 pvr_csb_pack (cs_ptr, TA_STATE_PDS_SHADERBASE, shader_base) {
3219 shader_base.addr = PVR_DEV_ADDR(state->pds_shader_task_offset);
3220 }
3221 cs_ptr++;
3222
3223 pvr_csb_pack (cs_ptr, TA_STATE_PDS_TEXUNICODEBASE, tex_base) {
3224 tex_base.addr = PVR_DEV_ADDR(state->uni_tex_code_offset);
3225 }
3226 cs_ptr++;
3227
3228 pvr_csb_pack (cs_ptr, TA_STATE_PDS_SIZEINFO1, info1) {
3229 info1.pds_uniformsize =
3230 state->uniform_data_size /
3231 PVRX(TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE);
3232
3233 info1.pds_texturestatesize =
3234 state->tex_state_data_size /
3235 PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE);
3236
3237 info1.pds_varyingsize =
3238 state->coeff_data_size /
3239 PVRX(TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE);
3240
3241 info1.usc_varyingsize =
3242 ALIGN_POT(state->usc_coeff_regs,
3243 PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE)) /
3244 PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE);
3245
3246 info1.pds_tempsize =
3247 ALIGN_POT(state->pds_temps,
3248 PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE)) /
3249 PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE);
3250 }
3251 cs_ptr++;
3252
3253 pvr_csb_pack (cs_ptr, TA_STATE_PDS_VARYINGBASE, base) {
3254 base.addr = PVR_DEV_ADDR(state->pds_coeff_task_offset);
3255 }
3256 cs_ptr++;
3257
3258 pvr_csb_pack (cs_ptr, TA_STATE_PDS_TEXTUREDATABASE, base) {
3259 base.addr = PVR_DEV_ADDR(state->tex_state_data_offset);
3260 }
3261 cs_ptr++;
3262
3263 /* PDS uniform program not used. */
3264 pvr_csb_pack (cs_ptr, TA_STATE_PDS_UNIFORMDATABASE, base) {
3265 base.addr = PVR_DEV_ADDR(0U);
3266 }
3267 cs_ptr++;
3268
3269 pvr_csb_pack (cs_ptr, TA_STATE_PDS_SIZEINFO2, info) {
3270 info.usc_sharedsize =
3271 ALIGN_POT(state->common_ptr,
3272 PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE)) /
3273 PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE);
3274 info.pds_tri_merge_disable = !PVR_HAS_ERN(dev_info, 42307);
3275 info.pds_batchnum = 0U;
3276 }
3277 cs_ptr++;
3278
3279 /* Get back to 64 bits boundary. */
3280 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3281 cs_ptr++;
3282
3283 *cs_ptr_out = cs_ptr;
3284 }
3285
pvr_isp_prim_block_isp_state(const struct pvr_device_info * dev_info,UNUSED uint32_t tsp_comp_format_in_dw,uint32_t tsp_data_size_in_bytes,uint32_t num_isp_vertices,bool read_bgnd,uint32_t ** const cs_ptr_out)3286 static void pvr_isp_prim_block_isp_state(const struct pvr_device_info *dev_info,
3287 UNUSED uint32_t tsp_comp_format_in_dw,
3288 uint32_t tsp_data_size_in_bytes,
3289 uint32_t num_isp_vertices,
3290 bool read_bgnd,
3291 uint32_t **const cs_ptr_out)
3292 {
3293 const bool has_simple_internal_parameter_format_v2 =
3294 PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2);
3295 uint32_t *cs_ptr = *cs_ptr_out;
3296
3297 if (has_simple_internal_parameter_format_v2) {
3298 const uint32_t tsp_data_per_vrx_in_bytes =
3299 tsp_data_size_in_bytes / num_isp_vertices;
3300
3301 pvr_csb_pack ((uint64_t *)cs_ptr,
3302 IPF_VERTEX_FORMAT_WORD_SIPF2,
3303 vert_fmt) {
3304 vert_fmt.vf_isp_state_size =
3305 pvr_cmd_length(TA_STATE_ISPCTL) + pvr_cmd_length(TA_STATE_ISPA);
3306
3307 vert_fmt.vf_tsp_vtx_raw = true;
3308 vert_fmt.vf_isp_vtx_raw = true;
3309
3310 vert_fmt.vf_varying_vertex_bits = tsp_data_per_vrx_in_bytes * 8U;
3311 vert_fmt.vf_primitive_total = (num_isp_vertices / 2U) - 1U;
3312 vert_fmt.vf_vertex_total = num_isp_vertices - 1U;
3313 }
3314 cs_ptr += pvr_cmd_length(IPF_VERTEX_FORMAT_WORD_SIPF2);
3315 }
3316
3317 /* ISP state words. */
3318
3319 /* clang-format off */
3320 pvr_csb_pack (cs_ptr, TA_STATE_ISPCTL, ispctl);
3321 /* clang-format on */
3322 cs_ptr += pvr_cmd_length(TA_STATE_ISPCTL);
3323
3324 pvr_csb_pack (cs_ptr, TA_STATE_ISPA, ispa) {
3325 ispa.objtype = PVRX(TA_OBJTYPE_TRIANGLE);
3326 ispa.passtype = read_bgnd ? PVRX(TA_PASSTYPE_TRANSLUCENT)
3327 : PVRX(TA_PASSTYPE_OPAQUE);
3328 ispa.dcmpmode = PVRX(TA_CMPMODE_ALWAYS);
3329 ispa.dwritedisable = true;
3330 }
3331 cs_ptr += pvr_cmd_length(TA_STATE_ISPA);
3332
3333 if (has_simple_internal_parameter_format_v2) {
3334 *cs_ptr_out = cs_ptr;
3335 return;
3336 }
3337
3338 /* How many bytes the TSP compression format needs? */
3339 pvr_csb_pack (cs_ptr, IPF_COMPRESSION_SIZE_WORD, word) {
3340 word.cs_isp_comp_table_size = 0U;
3341 word.cs_tsp_comp_format_size = tsp_comp_format_in_dw;
3342 word.cs_tsp_comp_table_size = 0U;
3343 word.cs_tsp_comp_vertex_size = tsp_data_size_in_bytes / num_isp_vertices;
3344 }
3345 cs_ptr += pvr_cmd_length(IPF_COMPRESSION_SIZE_WORD);
3346
3347 /* ISP vertex compression. */
3348 pvr_csb_pack (cs_ptr, IPF_ISP_COMPRESSION_WORD_0, word0) {
3349 word0.cf_isp_comp_fmt_x0 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3350 word0.cf_isp_comp_fmt_x1 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3351 word0.cf_isp_comp_fmt_x2 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3352 word0.cf_isp_comp_fmt_y0 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3353 word0.cf_isp_comp_fmt_y1 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3354 word0.cf_isp_comp_fmt_y2 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3355 word0.cf_isp_comp_fmt_z0 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3356 word0.cf_isp_comp_fmt_z1 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3357 }
3358 cs_ptr += pvr_cmd_length(IPF_ISP_COMPRESSION_WORD_0);
3359
3360 pvr_csb_pack (cs_ptr, IPF_ISP_COMPRESSION_WORD_1, word1) {
3361 word1.vf_prim_msaa = 0U;
3362 word1.vf_prim_id_pres = 0U;
3363 word1.vf_vertex_clipped = 0U;
3364 word1.vf_vertex_total = num_isp_vertices - 1U;
3365 word1.cf_isp_comp_fmt_z3 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3366 word1.cf_isp_comp_fmt_z2 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3367 }
3368 cs_ptr += pvr_cmd_length(IPF_ISP_COMPRESSION_WORD_1);
3369
3370 *cs_ptr_out = cs_ptr;
3371 }
3372
3373 static void
pvr_isp_prim_block_index_block(const struct pvr_device_info * dev_info,uint32_t num_mappings,uint32_t ** const cs_ptr_out)3374 pvr_isp_prim_block_index_block(const struct pvr_device_info *dev_info,
3375 uint32_t num_mappings,
3376 uint32_t **const cs_ptr_out)
3377 {
3378 uint32_t *cs_ptr = *cs_ptr_out;
3379
3380 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3381 for (uint32_t i = 0U; i < DIV_ROUND_UP(num_mappings, 2U); i++) {
3382 const uint32_t idx = i * 8U;
3383
3384 pvr_csb_pack ((uint64_t *)cs_ptr,
3385 IPF_INDEX_DATA_WORDS_SIPF,
3386 idx_data_word) {
3387 idx_data_word.ix_triangle3_index_2 = idx + 5U;
3388 idx_data_word.ix_triangle3_index_1 = idx + 6U;
3389 idx_data_word.ix_triangle3_index_0 = idx + 7U;
3390
3391 idx_data_word.ix_triangle2_index_2 = idx + 6U;
3392 idx_data_word.ix_triangle2_index_1 = idx + 5U;
3393 idx_data_word.ix_triangle2_index_0 = idx + 4U;
3394
3395 idx_data_word.ix_triangle1_index_2 = idx + 1U;
3396 idx_data_word.ix_triangle1_index_1 = idx + 2U;
3397 idx_data_word.ix_triangle1_index_0 = idx + 3U;
3398
3399 idx_data_word.ix_triangle0_index_2 = idx + 2U;
3400 idx_data_word.ix_triangle0_index_1 = idx + 1U;
3401 idx_data_word.ix_triangle0_index_0 = idx + 0U;
3402 }
3403 cs_ptr += pvr_cmd_length(IPF_INDEX_DATA_WORDS_SIPF);
3404 }
3405
3406 *cs_ptr_out = cs_ptr;
3407 return;
3408 }
3409
3410 for (uint32_t i = 0U, j = 0U; i < num_mappings; i++, j += 4U) {
3411 if ((i & 1U) == 0U) {
3412 pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
3413 word.ix_index0_0 = j;
3414 word.ix_index0_1 = j + 1U;
3415 word.ix_index0_2 = j + 2U;
3416 word.ix_index1_0 = j + 3U;
3417 }
3418 cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
3419
3420 /* Don't increment cs_ptr here. IPF_INDEX_DATA is patched in the
3421 * else part and then cs_ptr is incremented.
3422 */
3423 pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
3424 word.ix_index0_0 = j + 2U;
3425 word.ix_index0_1 = j + 1U;
3426 }
3427 } else {
3428 uint32_t tmp;
3429
3430 pvr_csb_pack (&tmp, IPF_INDEX_DATA, word) {
3431 word.ix_index0_2 = j;
3432 word.ix_index1_0 = j + 1U;
3433 }
3434 *cs_ptr |= tmp;
3435 cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
3436
3437 pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
3438 word.ix_index0_0 = j + 2U;
3439 word.ix_index0_1 = j + 3U;
3440 word.ix_index0_2 = j + 2U;
3441 word.ix_index1_0 = j + 1U;
3442 }
3443 cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
3444 }
3445 }
3446
3447 /* The last pass didn't ++. */
3448 if ((num_mappings & 1U) != 0U)
3449 cs_ptr++;
3450
3451 *cs_ptr_out = cs_ptr;
3452 }
3453
3454 /* Calculates a 24 bit fixed point (biased) representation of a signed integer.
3455 */
3456 static inline VkResult
pvr_int32_to_isp_xy_vtx(const struct pvr_device_info * dev_info,int32_t val,bool bias,uint32_t * word_out)3457 pvr_int32_to_isp_xy_vtx(const struct pvr_device_info *dev_info,
3458 int32_t val,
3459 bool bias,
3460 uint32_t *word_out)
3461 {
3462 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3463 const uint32_t max_fractional = PVRX(IPF_ISP_VERTEX_XY_SIPF_FRAC_MAX_VAL);
3464 const uint32_t max_integer = PVRX(IPF_ISP_VERTEX_XY_SIPF_INTEGER_MAX_VAL);
3465
3466 uint32_t fractional;
3467 uint32_t integer;
3468
3469 if (bias)
3470 val += PVRX(IPF_ISP_VERTEX_XY_BIAS_VALUE_SIPF);
3471
3472 if (val < 0 || val > max_integer + 1) {
3473 mesa_loge("ISP vertex xy value out of range.");
3474 return vk_error(NULL, VK_ERROR_UNKNOWN);
3475 }
3476
3477 if (val <= max_integer) {
3478 integer = val;
3479 fractional = 0;
3480 } else if (val == max_integer + 1) {
3481 /* The integer field is 13 bits long so the max value is
3482 * 2 ^ 13 - 1 = 8191. For 8k support we need to handle 8192 so we set
3483 * all fractional bits to get as close as possible. The best we can do
3484 * is: 0x1FFF.F = 8191.9375 ≈ 8192 .
3485 */
3486 integer = max_integer;
3487 fractional = max_fractional;
3488 }
3489
3490 pvr_csb_pack (word_out, IPF_ISP_VERTEX_XY_SIPF, word) {
3491 word.integer = integer;
3492 word.frac = fractional;
3493 }
3494
3495 return VK_SUCCESS;
3496 }
3497
3498 val += PVRX(IPF_ISP_VERTEX_XY_BIAS_VALUE);
3499
3500 if (((uint32_t)val & 0x7fff8000U) != 0U)
3501 return vk_error(NULL, VK_ERROR_UNKNOWN);
3502
3503 pvr_csb_pack (word_out, IPF_ISP_VERTEX_XY, word) {
3504 word.sign = val < 0;
3505 word.integer = val;
3506 }
3507
3508 return VK_SUCCESS;
3509 }
3510
3511 static VkResult
pvr_isp_prim_block_isp_vertices(const struct pvr_device_info * dev_info,struct pvr_transfer_3d_state * state,struct pvr_rect_mapping * mappings,uint32_t num_mappings,uint32_t mapping_offset,uint32_t ** const cs_ptr_out)3512 pvr_isp_prim_block_isp_vertices(const struct pvr_device_info *dev_info,
3513 struct pvr_transfer_3d_state *state,
3514 struct pvr_rect_mapping *mappings,
3515 uint32_t num_mappings,
3516 uint32_t mapping_offset,
3517 uint32_t **const cs_ptr_out)
3518 {
3519 uint32_t *cs_ptr = *cs_ptr_out;
3520 bool bias = true;
3521 uint32_t i;
3522
3523 if (PVR_HAS_FEATURE(dev_info, screen_size8K))
3524 bias = state->width_in_tiles <= 256U && state->height_in_tiles <= 256U;
3525
3526 for (i = mapping_offset; i < mapping_offset + num_mappings; i++) {
3527 uint32_t bottom = 0U;
3528 uint32_t right = 0U;
3529 uint32_t left = 0U;
3530 uint32_t top = 0U;
3531 VkResult result;
3532
3533 /* ISP vertex data (X, Y, Z). */
3534 result = pvr_int32_to_isp_xy_vtx(dev_info,
3535 mappings[i].dst_rect.offset.y,
3536 bias,
3537 &top);
3538 if (result != VK_SUCCESS)
3539 return result;
3540
3541 result = pvr_int32_to_isp_xy_vtx(dev_info,
3542 mappings[i].dst_rect.offset.y +
3543 mappings[i].dst_rect.extent.height,
3544 bias,
3545 &bottom);
3546 if (result != VK_SUCCESS)
3547 return result;
3548
3549 result = pvr_int32_to_isp_xy_vtx(dev_info,
3550 mappings[i].dst_rect.offset.x,
3551 bias,
3552 &left);
3553 if (result != VK_SUCCESS)
3554 return result;
3555
3556 result = pvr_int32_to_isp_xy_vtx(dev_info,
3557 mappings[i].dst_rect.offset.x +
3558 mappings[i].dst_rect.extent.width,
3559 bias,
3560 &right);
3561 if (result != VK_SUCCESS)
3562 return result;
3563
3564 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3565 pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3566 word.y = top;
3567 word.x = left;
3568 }
3569 cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3570
3571 pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3572 word.y = top;
3573 word.x = right;
3574 }
3575 cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3576
3577 pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3578 word.y = bottom;
3579 word.x = left;
3580 }
3581 cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3582
3583 pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3584 word.y = bottom;
3585 word.x = right;
3586 }
3587 cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3588
3589 continue;
3590 }
3591
3592 /* ISP vertices 0 and 1. */
3593 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_0, word0) {
3594 word0.x0 = left;
3595 word0.y0 = top & 0xFF;
3596 }
3597 cs_ptr++;
3598
3599 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_1, word1) {
3600 word1.y0 = top >> PVRX(IPF_ISP_VERTEX_WORD_1_Y0_SHIFT);
3601 }
3602 cs_ptr++;
3603
3604 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_2, word2) {
3605 word2.x1 = right & 0xFFFF;
3606 word2.z0 = 0U;
3607 }
3608 cs_ptr++;
3609
3610 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_3, word3) {
3611 word3.x1 = right >> PVRX(IPF_ISP_VERTEX_WORD_3_X1_SHIFT);
3612 word3.y1 = top;
3613 }
3614 cs_ptr++;
3615
3616 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_4, word4) {
3617 word4.z1 = 0U;
3618 }
3619 cs_ptr++;
3620
3621 /* ISP vertices 2 and 3. */
3622 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_0, word0) {
3623 word0.x0 = left;
3624 word0.y0 = bottom & 0xFF;
3625 }
3626 cs_ptr++;
3627
3628 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_1, word1) {
3629 word1.y0 = bottom >> PVRX(IPF_ISP_VERTEX_WORD_1_Y0_SHIFT);
3630 }
3631 cs_ptr++;
3632
3633 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_2, word2) {
3634 word2.x1 = right & 0xFFFF;
3635 word2.z0 = 0U;
3636 }
3637 cs_ptr++;
3638
3639 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_3, word3) {
3640 word3.x1 = right >> PVRX(IPF_ISP_VERTEX_WORD_3_X1_SHIFT);
3641 word3.y1 = bottom;
3642 }
3643 cs_ptr++;
3644
3645 pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_4, word4) {
3646 word4.z1 = 0U;
3647 }
3648 cs_ptr++;
3649 }
3650 *cs_ptr_out = cs_ptr;
3651
3652 return VK_SUCCESS;
3653 }
3654
3655 static uint32_t
pvr_isp_primitive_block_size(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd_source * src,uint32_t num_mappings)3656 pvr_isp_primitive_block_size(const struct pvr_device_info *dev_info,
3657 const struct pvr_transfer_cmd_source *src,
3658 uint32_t num_mappings)
3659 {
3660 uint32_t num_isp_vertices = num_mappings * 4U;
3661 uint32_t num_tsp_vertices_per_isp_vertex;
3662 uint32_t isp_vertex_data_size_dw;
3663 bool color_fill = (src == NULL);
3664 uint32_t tsp_comp_format_dw;
3665 uint32_t isp_state_size_dw;
3666 uint32_t pds_state_size_dw;
3667 uint32_t idx_data_size_dw;
3668 uint32_t tsp_data_size;
3669 uint32_t stream_size;
3670
3671 if (color_fill) {
3672 num_tsp_vertices_per_isp_vertex = 0U;
3673 } else {
3674 num_tsp_vertices_per_isp_vertex =
3675 src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED ? 4U : 2U;
3676 }
3677
3678 tsp_data_size = PVR_DW_TO_BYTES(num_isp_vertices * PVR_TRANSFER_NUM_LAYERS *
3679 num_tsp_vertices_per_isp_vertex);
3680
3681 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3682 /* An XYZ vertex is 16/16/32 bits => 8 bytes. */
3683 isp_vertex_data_size_dw = num_isp_vertices * 2U;
3684
3685 /* Round to even for 64 bit boundary. */
3686 idx_data_size_dw = ALIGN_POT(num_mappings, 2U);
3687 tsp_comp_format_dw = 0U;
3688 isp_state_size_dw = 4U;
3689 pds_state_size_dw = 8U;
3690 } else {
3691 tsp_comp_format_dw = color_fill ? 0U : PVR_TRANSFER_NUM_LAYERS;
3692
3693 if (!color_fill) {
3694 if (src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
3695 tsp_comp_format_dw *= 2U;
3696 }
3697
3698 /* An XYZ vertex is 24/24/32 bits => 10 bytes with last padded to 4 byte
3699 * burst align.
3700 */
3701 isp_vertex_data_size_dw = DIV_ROUND_UP(num_isp_vertices * 10U, 4U);
3702
3703 /* 4 triangles fit in 3 dw: t0t0t0t1_t1t1t2t2_t2t3t3t3. */
3704 idx_data_size_dw = num_mappings + DIV_ROUND_UP(num_mappings, 2U);
3705 isp_state_size_dw = 5U;
3706 pds_state_size_dw = 7U;
3707 }
3708
3709 stream_size =
3710 tsp_data_size + PVR_DW_TO_BYTES(idx_data_size_dw + tsp_comp_format_dw +
3711 isp_vertex_data_size_dw +
3712 isp_state_size_dw + pds_state_size_dw);
3713
3714 return stream_size;
3715 }
3716
3717 static VkResult
pvr_isp_primitive_block(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,const struct pvr_transfer_cmd_source * src,bool custom_filter,struct pvr_rect_mapping * mappings,uint32_t num_mappings,uint32_t mapping_offset,bool read_bgnd,uint32_t * cs_start_offset,uint32_t ** cs_ptr_out)3718 pvr_isp_primitive_block(const struct pvr_device_info *dev_info,
3719 struct pvr_transfer_ctx *ctx,
3720 const struct pvr_transfer_cmd *transfer_cmd,
3721 struct pvr_transfer_prep_data *prep_data,
3722 const struct pvr_transfer_cmd_source *src,
3723 bool custom_filter,
3724 struct pvr_rect_mapping *mappings,
3725 uint32_t num_mappings,
3726 uint32_t mapping_offset,
3727 bool read_bgnd,
3728 uint32_t *cs_start_offset,
3729 uint32_t **cs_ptr_out)
3730 {
3731 struct pvr_transfer_3d_state *state = &prep_data->state;
3732 uint32_t num_isp_vertices = num_mappings * 4U;
3733 uint32_t num_tsp_vertices_per_isp_vert;
3734 uint32_t tsp_data_size_in_bytes;
3735 uint32_t tsp_comp_format_in_dw;
3736 bool color_fill = src == NULL;
3737 uint32_t stream_size_in_bytes;
3738 uint32_t *cs_ptr_start;
3739 VkResult result;
3740
3741 if (color_fill) {
3742 num_tsp_vertices_per_isp_vert = 0U;
3743 } else {
3744 num_tsp_vertices_per_isp_vert =
3745 src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED ? 4U : 2U;
3746 }
3747
3748 tsp_data_size_in_bytes =
3749 PVR_DW_TO_BYTES(num_isp_vertices * PVR_TRANSFER_NUM_LAYERS *
3750 num_tsp_vertices_per_isp_vert);
3751
3752 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3753 tsp_comp_format_in_dw = 0U;
3754 } else {
3755 tsp_comp_format_in_dw = color_fill ? 0U : PVR_TRANSFER_NUM_LAYERS;
3756
3757 if (!color_fill && src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
3758 tsp_comp_format_in_dw *= 2U;
3759 }
3760
3761 stream_size_in_bytes =
3762 pvr_isp_primitive_block_size(dev_info, src, num_mappings);
3763
3764 cs_ptr_start = *cs_ptr_out;
3765
3766 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3767 /* This includes:
3768 * Vertex formats.
3769 * ISP state words.
3770 */
3771 pvr_isp_prim_block_isp_state(dev_info,
3772 tsp_comp_format_in_dw,
3773 tsp_data_size_in_bytes,
3774 num_isp_vertices,
3775 read_bgnd,
3776 cs_ptr_out);
3777
3778 /* This include:
3779 * Index data / point pitch.
3780 */
3781 pvr_isp_prim_block_index_block(dev_info, num_mappings, cs_ptr_out);
3782
3783 result = pvr_isp_prim_block_isp_vertices(dev_info,
3784 state,
3785 mappings,
3786 num_mappings,
3787 mapping_offset,
3788 cs_ptr_out);
3789 if (result != VK_SUCCESS)
3790 return result;
3791
3792 pvr_isp_prim_block_pds_state(dev_info, ctx, state, cs_ptr_out);
3793
3794 if (!color_fill) {
3795 /* This includes:
3796 * TSP vertex formats.
3797 */
3798 pvr_isp_prim_block_tsp_vertex_block(dev_info,
3799 src,
3800 mappings,
3801 custom_filter,
3802 num_mappings,
3803 mapping_offset,
3804 tsp_comp_format_in_dw,
3805 cs_ptr_out);
3806 }
3807
3808 *cs_start_offset = 0;
3809 } else {
3810 if (!color_fill) {
3811 /* This includes:
3812 * Compressed TSP vertex data & tables.
3813 * Primitive id.
3814 * TSP compression formats.
3815 */
3816 pvr_isp_prim_block_tsp_vertex_block(dev_info,
3817 src,
3818 mappings,
3819 custom_filter,
3820 num_mappings,
3821 mapping_offset,
3822 tsp_comp_format_in_dw,
3823 cs_ptr_out);
3824 }
3825
3826 pvr_isp_prim_block_pds_state(dev_info, ctx, state, cs_ptr_out);
3827
3828 /* Point the CS_PRIM_BASE here. */
3829 *cs_start_offset = (*cs_ptr_out - cs_ptr_start) * sizeof(cs_ptr_start[0]);
3830
3831 /* This includes:
3832 * ISP state words.
3833 * Compression size word.
3834 * ISP compression and vertex formats.
3835 */
3836 pvr_isp_prim_block_isp_state(dev_info,
3837 tsp_comp_format_in_dw,
3838 tsp_data_size_in_bytes,
3839 num_isp_vertices,
3840 read_bgnd,
3841 cs_ptr_out);
3842
3843 pvr_isp_prim_block_index_block(dev_info, num_mappings, cs_ptr_out);
3844
3845 result = pvr_isp_prim_block_isp_vertices(dev_info,
3846 state,
3847 mappings,
3848 num_mappings,
3849 mapping_offset,
3850 cs_ptr_out);
3851 if (result != VK_SUCCESS)
3852 return result;
3853 }
3854
3855 assert((*cs_ptr_out - cs_ptr_start) * sizeof(cs_ptr_start[0]) ==
3856 stream_size_in_bytes);
3857
3858 return VK_SUCCESS;
3859 }
3860
3861 static inline uint32_t
pvr_transfer_prim_blocks_per_alloc(const struct pvr_device_info * dev_info)3862 pvr_transfer_prim_blocks_per_alloc(const struct pvr_device_info *dev_info)
3863 {
3864 uint32_t ret = PVR_DW_TO_BYTES(PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS));
3865
3866 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3867 return ret / sizeof(uint64_t) / 2U;
3868
3869 return ret / sizeof(uint32_t) / 2U - 1U;
3870 }
3871
3872 static inline uint32_t
pvr_transfer_max_quads_per_pb(const struct pvr_device_info * dev_info)3873 pvr_transfer_max_quads_per_pb(const struct pvr_device_info *dev_info)
3874 {
3875 return PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U
3876 : 16U;
3877 }
3878
pvr_isp_ctrl_stream_sipf_write_aligned(uint8_t * stream,uint32_t data,uint32_t size)3879 static inline uint8_t *pvr_isp_ctrl_stream_sipf_write_aligned(uint8_t *stream,
3880 uint32_t data,
3881 uint32_t size)
3882 {
3883 const uint32_t offset = (uintptr_t)stream & 0x3U;
3884 uint32_t *aligned_stream = (uint32_t *)(stream - offset);
3885 const uint32_t current_data = *aligned_stream & ((1U << (offset * 8U)) - 1U);
3886
3887 assert(size > 0 && size <= 4U);
3888
3889 *aligned_stream = current_data | data << (offset * 8U);
3890
3891 if (offset + size > 4U) {
3892 aligned_stream++;
3893 *aligned_stream = data >> ((4U - offset) * 8);
3894 }
3895
3896 return stream + size;
3897 }
3898
3899 /**
3900 * Writes ISP ctrl stream.
3901 *
3902 * We change sampler/texture state when we process a new TQ source. The
3903 * primitive block contains the shader pointers, but we supply the primitive
3904 * blocks with shaders from here.
3905 */
pvr_isp_ctrl_stream(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data)3906 static VkResult pvr_isp_ctrl_stream(const struct pvr_device_info *dev_info,
3907 struct pvr_transfer_ctx *ctx,
3908 struct pvr_transfer_cmd *transfer_cmd,
3909 struct pvr_transfer_prep_data *prep_data)
3910 {
3911 const uint32_t max_mappings_per_pb = pvr_transfer_max_quads_per_pb(dev_info);
3912 bool fill_blit = (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U;
3913 uint32_t free_ctrl_stream_words = PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS);
3914 struct pvr_transfer_3d_state *const state = &prep_data->state;
3915 struct pvr_winsys_transfer_regs *const regs = &state->regs;
3916 struct pvr_transfer_pass *pass = NULL;
3917 uint32_t flags = transfer_cmd->flags;
3918 struct pvr_suballoc_bo *pvr_cs_bo;
3919 pvr_dev_addr_t stream_base_vaddr;
3920 uint32_t num_prim_blks = 0U;
3921 uint32_t prim_blk_size = 0U;
3922 uint32_t region_arrays_size;
3923 uint32_t num_region_arrays;
3924 uint32_t total_stream_size;
3925 bool was_linked = false;
3926 uint32_t rem_mappings;
3927 uint32_t num_sources;
3928 uint32_t *blk_cs_ptr;
3929 uint32_t *cs_ptr;
3930 uint32_t source;
3931 VkResult result;
3932
3933 if (state->custom_mapping.pass_count > 0U) {
3934 pass = &state->custom_mapping.passes[state->pass_idx];
3935
3936 num_sources = pass->source_count;
3937
3938 for (source = 0; source < num_sources; source++) {
3939 uint32_t num_mappings = pass->sources[source].mapping_count;
3940
3941 while (num_mappings > 0U) {
3942 if (fill_blit) {
3943 prim_blk_size += pvr_isp_primitive_block_size(
3944 dev_info,
3945 NULL,
3946 MIN2(max_mappings_per_pb, num_mappings));
3947 }
3948
3949 if (transfer_cmd->source_count > 0) {
3950 prim_blk_size += pvr_isp_primitive_block_size(
3951 dev_info,
3952 &transfer_cmd->sources[source],
3953 MIN2(max_mappings_per_pb, num_mappings));
3954 }
3955
3956 num_mappings -= MIN2(max_mappings_per_pb, num_mappings);
3957 num_prim_blks++;
3958 }
3959 }
3960 } else {
3961 num_sources = fill_blit ? 1U : transfer_cmd->source_count;
3962
3963 if (fill_blit) {
3964 num_prim_blks = 1U;
3965 prim_blk_size +=
3966 pvr_isp_primitive_block_size(dev_info,
3967 NULL,
3968 MIN2(max_mappings_per_pb, 1U));
3969
3970 /* Fill blits can also have a source; fallthrough to handle. */
3971 }
3972
3973 for (source = 0; source < transfer_cmd->source_count; source++) {
3974 uint32_t num_mappings = transfer_cmd->sources[source].mapping_count;
3975
3976 while (num_mappings > 0U) {
3977 prim_blk_size += pvr_isp_primitive_block_size(
3978 dev_info,
3979 &transfer_cmd->sources[source],
3980 MIN2(max_mappings_per_pb, num_mappings));
3981
3982 num_mappings -= MIN2(max_mappings_per_pb, num_mappings);
3983 num_prim_blks++;
3984 }
3985 }
3986 }
3987
3988 num_region_arrays =
3989 (num_prim_blks + (pvr_transfer_prim_blocks_per_alloc(dev_info) - 1U)) /
3990 pvr_transfer_prim_blocks_per_alloc(dev_info);
3991 region_arrays_size = PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS) *
3992 sizeof(uint32_t) * num_region_arrays;
3993 total_stream_size = region_arrays_size + prim_blk_size;
3994
3995 /* Allocate space for IPF control stream. */
3996 result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
3997 ctx->device->heaps.transfer_frag_heap,
3998 total_stream_size,
3999 &pvr_cs_bo);
4000 if (result != VK_SUCCESS)
4001 return result;
4002
4003 stream_base_vaddr =
4004 PVR_DEV_ADDR(pvr_cs_bo->dev_addr.addr -
4005 ctx->device->heaps.transfer_frag_heap->base_addr.addr);
4006
4007 cs_ptr = pvr_bo_suballoc_get_map_addr(pvr_cs_bo);
4008 blk_cs_ptr = cs_ptr + region_arrays_size / sizeof(uint32_t);
4009
4010 source = 0;
4011 while (source < num_sources) {
4012 if (fill_blit)
4013 rem_mappings = pass ? pass->sources[source].mapping_count : 1U;
4014 else
4015 rem_mappings = transfer_cmd->sources[source].mapping_count;
4016
4017 if ((transfer_cmd->source_count > 0 || fill_blit) && rem_mappings != 0U) {
4018 struct pvr_pds_pixel_shader_sa_program unitex_pds_prog = { 0U };
4019 struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[source];
4020 struct pvr_rect_mapping fill_mapping;
4021 uint32_t mapping_offset = 0U;
4022 bool read_bgnd = false;
4023
4024 if (fill_blit) {
4025 uint32_t packed_color[4U] = { 0U };
4026
4027 if (vk_format_is_compressed(transfer_cmd->dst.vk_format)) {
4028 return vk_error(transfer_cmd->cmd_buffer,
4029 VK_ERROR_FORMAT_NOT_SUPPORTED);
4030 }
4031
4032 state->pds_shader_task_offset = 0U;
4033 state->uni_tex_code_offset = 0U;
4034 state->tex_state_data_offset = 0U;
4035 state->common_ptr = 0U;
4036
4037 result = pvr_pack_clear_color(transfer_cmd->dst.vk_format,
4038 transfer_cmd->clear_color,
4039 packed_color);
4040 if (result != VK_SUCCESS)
4041 return result;
4042
4043 fill_mapping.dst_rect = transfer_cmd->scissor;
4044
4045 pvr_csb_pack (®s->usc_clear_register0,
4046 CR_USC_CLEAR_REGISTER,
4047 reg) {
4048 reg.val = packed_color[0U];
4049 }
4050
4051 pvr_csb_pack (®s->usc_clear_register1,
4052 CR_USC_CLEAR_REGISTER,
4053 reg) {
4054 reg.val = packed_color[1U];
4055 }
4056
4057 pvr_csb_pack (®s->usc_clear_register2,
4058 CR_USC_CLEAR_REGISTER,
4059 reg) {
4060 reg.val = packed_color[2U];
4061 }
4062
4063 pvr_csb_pack (®s->usc_clear_register3,
4064 CR_USC_CLEAR_REGISTER,
4065 reg) {
4066 reg.val = packed_color[3U];
4067 }
4068
4069 state->pds_shader_task_offset =
4070 transfer_cmd->cmd_buffer->device->nop_program.pds.data_offset;
4071
4072 unitex_pds_prog.kick_usc = false;
4073 unitex_pds_prog.clear = false;
4074 } else {
4075 const bool down_scale = transfer_cmd->sources[source].resolve_op ==
4076 PVR_RESOLVE_BLEND &&
4077 src->surface.sample_count > 1U &&
4078 transfer_cmd->dst.sample_count <= 1U;
4079 struct pvr_tq_shader_properties *shader_props =
4080 &state->shader_props;
4081 struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
4082 const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout;
4083 enum pvr_transfer_pbe_pixel_src pbe_src_format;
4084 struct pvr_suballoc_bo *pvr_bo;
4085 uint32_t tex_state_dma_size;
4086 pvr_dev_addr_t dev_offset;
4087
4088 /* Reset the shared register bank ptrs each src implies new texture
4089 * state (Note that we don't change texture state per prim block).
4090 */
4091 state->common_ptr = 0U;
4092 state->usc_const_reg_ptr = 0U;
4093 /* We don't use state->dynamic_const_reg_ptr here. */
4094
4095 if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE)
4096 read_bgnd = true;
4097
4098 result = pvr_pbe_src_format_f2d(flags,
4099 src,
4100 transfer_cmd->dst.vk_format,
4101 down_scale,
4102 state->dont_force_pbe,
4103 &pbe_src_format);
4104 if (result != VK_SUCCESS)
4105 return result;
4106
4107 memset(shader_props, 0U, sizeof(*shader_props));
4108
4109 layer->pbe_format = pbe_src_format;
4110 layer->sample =
4111 (src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED);
4112 shader_props->iterated = true;
4113
4114 shader_props->pick_component =
4115 pvr_pick_component_needed(&state->custom_mapping);
4116
4117 result = pvr_msaa_state(dev_info, transfer_cmd, state, source);
4118 if (result != VK_SUCCESS)
4119 return result;
4120
4121 if (state->filter[source] == PVR_FILTER_LINEAR &&
4122 pvr_requires_usc_linear_filter(src->surface.vk_format)) {
4123 if (pvr_int_pbe_usc_linear_filter(layer->pbe_format,
4124 layer->sample,
4125 layer->msaa,
4126 shader_props->full_rate)) {
4127 layer->linear = true;
4128 } else {
4129 mesa_logw("Transfer: F32 linear filter not supported.");
4130 }
4131 }
4132
4133 result = pvr_transfer_frag_store_get_shader_info(
4134 transfer_cmd->cmd_buffer->device,
4135 &ctx->frag_store,
4136 shader_props,
4137 &dev_offset,
4138 &sh_reg_layout);
4139 if (result != VK_SUCCESS)
4140 return result;
4141
4142 assert(dev_offset.addr <= UINT32_MAX);
4143 prep_data->state.pds_shader_task_offset = (uint32_t)dev_offset.addr;
4144
4145 result =
4146 pvr_pds_coeff_task(ctx, transfer_cmd, layer->sample, prep_data);
4147 if (result != VK_SUCCESS)
4148 return result;
4149
4150 unitex_pds_prog.kick_usc = false;
4151 unitex_pds_prog.clear = false;
4152
4153 tex_state_dma_size =
4154 sh_reg_layout->driver_total + sh_reg_layout->compiler_out_total;
4155
4156 unitex_pds_prog.num_texture_dma_kicks = 1U;
4157 unitex_pds_prog.num_uniform_dma_kicks = 0U;
4158
4159 /* Allocate memory for DMA. */
4160 result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
4161 ctx->device->heaps.general_heap,
4162 tex_state_dma_size << 2U,
4163 &pvr_bo);
4164 if (result != VK_SUCCESS)
4165 return result;
4166
4167 result = pvr_sampler_state_for_surface(
4168 dev_info,
4169 &transfer_cmd->sources[source].surface,
4170 state->filter[source],
4171 sh_reg_layout,
4172 0U,
4173 pvr_bo_suballoc_get_map_addr(pvr_bo));
4174 if (result != VK_SUCCESS)
4175 return result;
4176
4177 result = pvr_image_state_for_surface(
4178 ctx,
4179 transfer_cmd,
4180 &transfer_cmd->sources[source].surface,
4181 0U,
4182 source,
4183 sh_reg_layout,
4184 state,
4185 0U,
4186 pvr_bo_suballoc_get_map_addr(pvr_bo));
4187 if (result != VK_SUCCESS)
4188 return result;
4189
4190 pvr_pds_encode_dma_burst(unitex_pds_prog.texture_dma_control,
4191 unitex_pds_prog.texture_dma_address,
4192 state->common_ptr,
4193 tex_state_dma_size,
4194 pvr_bo->dev_addr.addr,
4195 true,
4196 dev_info);
4197
4198 state->common_ptr += tex_state_dma_size;
4199
4200 pvr_write_usc_constants(sh_reg_layout,
4201 pvr_bo_suballoc_get_map_addr(pvr_bo));
4202
4203 if (pvr_pick_component_needed(&state->custom_mapping)) {
4204 pvr_dma_texel_unwind(state,
4205 sh_reg_layout,
4206 pvr_bo_suballoc_get_map_addr(pvr_bo));
4207 }
4208 }
4209
4210 result = pvr_pds_unitex(dev_info,
4211 ctx,
4212 transfer_cmd,
4213 &unitex_pds_prog,
4214 prep_data);
4215 if (result != VK_SUCCESS)
4216 return result;
4217
4218 while (rem_mappings > 0U) {
4219 const uint32_t min_free_ctrl_stream_words =
4220 PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 2
4221 : 3;
4222 const uint32_t num_mappings =
4223 MIN2(max_mappings_per_pb, rem_mappings);
4224 struct pvr_rect_mapping *mappings = NULL;
4225 uint32_t stream_start_offset = 0U;
4226 pvr_dev_addr_t prim_blk_addr;
4227
4228 if (free_ctrl_stream_words < min_free_ctrl_stream_words) {
4229 pvr_dev_addr_t next_region_array_vaddr = stream_base_vaddr;
4230
4231 num_region_arrays++;
4232 next_region_array_vaddr.addr +=
4233 num_region_arrays *
4234 PVR_DW_TO_BYTES(PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS));
4235
4236 if (PVR_HAS_FEATURE(dev_info,
4237 simple_internal_parameter_format_v2)) {
4238 uint32_t link_addr;
4239
4240 pvr_csb_pack (&link_addr,
4241 IPF_CONTROL_STREAM_LINK_SIPF2,
4242 control_stream) {
4243 control_stream.cs_ctrl_type =
4244 PVRX(IPF_CS_CTRL_TYPE_SIPF2_LINK);
4245 control_stream.cs_link.addr = next_region_array_vaddr.addr;
4246 }
4247
4248 pvr_isp_ctrl_stream_sipf_write_aligned(
4249 (uint8_t *)cs_ptr,
4250 link_addr,
4251 PVR_DW_TO_BYTES(
4252 pvr_cmd_length(IPF_CONTROL_STREAM_LINK_SIPF2)));
4253 } else {
4254 pvr_csb_pack (cs_ptr, IPF_CONTROL_STREAM, control_stream) {
4255 control_stream.cs_type = PVRX(IPF_CS_TYPE_LINK);
4256 control_stream.cs_link.addr = next_region_array_vaddr.addr;
4257 }
4258 }
4259
4260 cs_ptr =
4261 (uint32_t *)pvr_bo_suballoc_get_map_addr(pvr_cs_bo) +
4262 num_region_arrays * PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS);
4263 free_ctrl_stream_words = PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS);
4264
4265 was_linked = PVR_HAS_FEATURE(dev_info, ipf_creq_pf);
4266 }
4267
4268 if (fill_blit)
4269 mappings = pass ? pass->sources[source].mappings : &fill_mapping;
4270 else
4271 mappings = transfer_cmd->sources[source].mappings;
4272
4273 prim_blk_addr = stream_base_vaddr;
4274 prim_blk_addr.addr +=
4275 (uintptr_t)blk_cs_ptr -
4276 (uintptr_t)pvr_bo_suballoc_get_map_addr(pvr_cs_bo);
4277
4278 result = pvr_isp_primitive_block(dev_info,
4279 ctx,
4280 transfer_cmd,
4281 prep_data,
4282 fill_blit ? NULL : src,
4283 state->custom_filter,
4284 mappings,
4285 num_mappings,
4286 mapping_offset,
4287 read_bgnd,
4288 &stream_start_offset,
4289 &blk_cs_ptr);
4290 if (result != VK_SUCCESS)
4291 return result;
4292
4293 prim_blk_addr.addr += stream_start_offset;
4294
4295 if (PVR_HAS_FEATURE(dev_info,
4296 simple_internal_parameter_format_v2)) {
4297 uint8_t *cs_byte_ptr = (uint8_t *)cs_ptr;
4298 uint32_t tmp;
4299
4300 /* This part of the control stream is byte granular. */
4301
4302 pvr_csb_pack (&tmp, IPF_PRIMITIVE_HEADER_SIPF2, prim_header) {
4303 prim_header.cs_prim_base_size = 1;
4304 prim_header.cs_mask_num_bytes = 1;
4305 prim_header.cs_valid_tile0 = true;
4306 }
4307 cs_byte_ptr =
4308 pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 1);
4309
4310 pvr_csb_pack (&tmp, IPF_PRIMITIVE_BASE_SIPF2, word) {
4311 word.cs_prim_base = prim_blk_addr;
4312 }
4313 cs_byte_ptr =
4314 pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 4);
4315
4316 /* IPF_BYTE_BASED_MASK_ONE_BYTE_WORD_0_SIPF2 since
4317 * IPF_PRIMITIVE_HEADER_SIPF2.cs_mask_num_bytes == 1.
4318 */
4319 pvr_csb_pack (&tmp,
4320 IPF_BYTE_BASED_MASK_ONE_BYTE_WORD_0_SIPF2,
4321 mask) {
4322 switch (num_mappings) {
4323 case 4:
4324 mask.cs_mask_one_byte_tile0_7 = true;
4325 mask.cs_mask_one_byte_tile0_6 = true;
4326 FALLTHROUGH;
4327 case 3:
4328 mask.cs_mask_one_byte_tile0_5 = true;
4329 mask.cs_mask_one_byte_tile0_4 = true;
4330 FALLTHROUGH;
4331 case 2:
4332 mask.cs_mask_one_byte_tile0_3 = true;
4333 mask.cs_mask_one_byte_tile0_2 = true;
4334 FALLTHROUGH;
4335 case 1:
4336 mask.cs_mask_one_byte_tile0_1 = true;
4337 mask.cs_mask_one_byte_tile0_0 = true;
4338 break;
4339 default:
4340 /* Unreachable since we clamped the value earlier so
4341 * reaching this is an implementation error.
4342 */
4343 unreachable("num_mapping exceeded max_mappings_per_pb");
4344 break;
4345 }
4346 }
4347 /* Only 1 byte since there's only 1 valid tile within the single
4348 * IPF_BYTE_BASED_MASK_ONE_BYTE_WORD_0_SIPF2 mask.
4349 * ROGUE_IPF_PRIMITIVE_HEADER_SIPF2.cs_valid_tile0 == true.
4350 */
4351 cs_byte_ptr =
4352 pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 1);
4353
4354 cs_ptr = (uint32_t *)cs_byte_ptr;
4355
4356 free_ctrl_stream_words -= 2;
4357 } else {
4358 pvr_csb_pack (cs_ptr, IPF_PRIMITIVE_FORMAT, word) {
4359 word.cs_type = PVRX(IPF_CS_TYPE_PRIM);
4360 word.cs_isp_state_read = true;
4361 word.cs_isp_state_size = 2U;
4362 word.cs_prim_total = 2U * num_mappings - 1U;
4363 word.cs_mask_fmt = PVRX(IPF_CS_MASK_FMT_FULL);
4364 word.cs_prim_base_pres = true;
4365 }
4366 cs_ptr += pvr_cmd_length(IPF_PRIMITIVE_FORMAT);
4367
4368 pvr_csb_pack (cs_ptr, IPF_PRIMITIVE_BASE, word) {
4369 word.cs_prim_base = prim_blk_addr;
4370 }
4371 cs_ptr += pvr_cmd_length(IPF_PRIMITIVE_BASE);
4372
4373 free_ctrl_stream_words -= 2;
4374 }
4375
4376 rem_mappings -= num_mappings;
4377 mapping_offset += num_mappings;
4378 }
4379 }
4380
4381 source++;
4382
4383 /* A fill blit may also have sources for normal blits. */
4384 if (fill_blit && transfer_cmd->source_count > 0) {
4385 /* Fill blit count for custom mapping equals source blit count. While
4386 * normal blits use only one fill blit.
4387 */
4388 if (state->custom_mapping.pass_count == 0 && source > num_sources) {
4389 fill_blit = false;
4390 source = 0;
4391 }
4392 }
4393 }
4394
4395 if (PVR_HAS_FEATURE(dev_info, ipf_creq_pf))
4396 assert((num_region_arrays > 1) == was_linked);
4397
4398 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2)) {
4399 uint8_t *cs_byte_ptr = (uint8_t *)cs_ptr;
4400 uint32_t tmp;
4401
4402 /* clang-format off */
4403 pvr_csb_pack (&tmp, IPF_CONTROL_STREAM_TERMINATE_SIPF2, term);
4404 /* clang-format on */
4405
4406 cs_byte_ptr = pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 1);
4407
4408 cs_ptr = (uint32_t *)cs_byte_ptr;
4409 } else {
4410 pvr_csb_pack (cs_ptr, IPF_CONTROL_STREAM, word) {
4411 word.cs_type = PVRX(IPF_CS_TYPE_TERM);
4412 }
4413 cs_ptr += pvr_cmd_length(IPF_CONTROL_STREAM);
4414 }
4415
4416 pvr_csb_pack (®s->isp_mtile_base, CR_ISP_MTILE_BASE, reg) {
4417 reg.addr =
4418 PVR_DEV_ADDR(pvr_cs_bo->dev_addr.addr -
4419 ctx->device->heaps.transfer_frag_heap->base_addr.addr);
4420 }
4421
4422 pvr_csb_pack (®s->isp_render, CR_ISP_RENDER, reg) {
4423 reg.mode_type = PVRX(CR_ISP_RENDER_MODE_TYPE_FAST_2D);
4424 }
4425
4426 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2) &&
4427 PVR_HAS_FEATURE(dev_info, ipf_creq_pf)) {
4428 pvr_csb_pack (®s->isp_rgn, CR_ISP_RGN_SIPF, isp_rgn) {
4429 /* Bit 0 in CR_ISP_RGN.cs_size_ipf_creq_pf is used to indicate the
4430 * presence of a link.
4431 */
4432 isp_rgn.cs_size_ipf_creq_pf = was_linked;
4433 }
4434 } else {
4435 /* clang-format off */
4436 pvr_csb_pack(®s->isp_rgn, CR_ISP_RGN, isp_rgn);
4437 /* clang-format on */
4438 }
4439
4440 return VK_SUCCESS;
4441 }
4442
pvr_transfer_set_filter(struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state)4443 static void pvr_transfer_set_filter(struct pvr_transfer_cmd *transfer_cmd,
4444 struct pvr_transfer_3d_state *state)
4445 {
4446 for (uint32_t i = 0; i < transfer_cmd->source_count; i++) {
4447 VkRect2D *src = &transfer_cmd->sources[i].mappings[0U].src_rect;
4448 VkRect2D *dst = &transfer_cmd->sources[i].mappings[0U].dst_rect;
4449
4450 /* If no scaling is applied to the copy region, we can use point
4451 * filtering.
4452 */
4453 if (!state->custom_filter && (src->extent.width == dst->extent.width) &&
4454 (src->extent.height == dst->extent.height))
4455 state->filter[i] = PVR_FILTER_POINT;
4456 else
4457 state->filter[i] = transfer_cmd->sources[i].filter;
4458 }
4459 }
4460
4461 /** Generates hw resources to kick a 3D clip blit. */
pvr_3d_clip_blit(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)4462 static VkResult pvr_3d_clip_blit(struct pvr_transfer_ctx *ctx,
4463 struct pvr_transfer_cmd *transfer_cmd,
4464 struct pvr_transfer_prep_data *prep_data,
4465 uint32_t pass_idx,
4466 bool *finished_out)
4467 {
4468 struct pvr_transfer_3d_state *state = &prep_data->state;
4469 uint32_t texel_unwind_src = state->custom_mapping.texel_unwind_src;
4470 struct pvr_transfer_cmd bg_cmd = { 0U };
4471 uint32_t control_reg;
4472 VkResult result;
4473
4474 state->dont_force_pbe = false;
4475 bg_cmd.scissor = transfer_cmd->scissor;
4476 bg_cmd.cmd_buffer = transfer_cmd->cmd_buffer;
4477 bg_cmd.flags = transfer_cmd->flags;
4478 bg_cmd.flags &=
4479 ~(PVR_TRANSFER_CMD_FLAGS_FAST2D | PVR_TRANSFER_CMD_FLAGS_FILL |
4480 PVR_TRANSFER_CMD_FLAGS_DSMERGE | PVR_TRANSFER_CMD_FLAGS_PICKD);
4481
4482 bg_cmd.source_count = state->custom_mapping.pass_count > 0U ? 0 : 1;
4483 if (bg_cmd.source_count > 0) {
4484 struct pvr_transfer_cmd_source *src = &bg_cmd.sources[0];
4485
4486 src->mappings[0U].src_rect = transfer_cmd->scissor;
4487 src->mappings[0U].dst_rect = transfer_cmd->scissor;
4488 src->resolve_op = PVR_RESOLVE_BLEND;
4489 src->surface = transfer_cmd->dst;
4490 }
4491
4492 state->filter[0] = PVR_FILTER_DONTCARE;
4493 bg_cmd.dst = transfer_cmd->dst;
4494 state->custom_mapping.texel_unwind_src =
4495 state->custom_mapping.texel_unwind_dst;
4496
4497 result =
4498 pvr_3d_copy_blit_core(ctx, &bg_cmd, prep_data, pass_idx, finished_out);
4499 if (result != VK_SUCCESS)
4500 return result;
4501
4502 /* If the destination has 4 channels and the source has at most 2, we still
4503 * need all 4 channels from the USC into the PBE.
4504 */
4505 state->dont_force_pbe = true;
4506 state->custom_mapping.texel_unwind_src = texel_unwind_src;
4507
4508 /* We need the viewport mask, otherwise all pixels would be disabled. */
4509 pvr_csb_pack (&control_reg, CR_ISP_BGOBJVALS, reg) {
4510 reg.mask = true;
4511 }
4512 state->regs.isp_bgobjvals |= control_reg;
4513
4514 pvr_transfer_set_filter(transfer_cmd, state);
4515 result = pvr_isp_ctrl_stream(&ctx->device->pdevice->dev_info,
4516 ctx,
4517 transfer_cmd,
4518 prep_data);
4519 if (result != VK_SUCCESS)
4520 return result;
4521
4522 /* In case of resolve M -> S, the accumulation is read from and written to a
4523 * single sampled surface. Make sure that we are resolving and we have the
4524 * right number of tiles.
4525 */
4526 if (state->down_scale) {
4527 uint64_t tmp;
4528
4529 pvr_csb_pack (&tmp, CR_PBE_WORD0_MRT0, reg) {
4530 reg.downscale = true;
4531 }
4532 state->regs.pbe_wordx_mrty[0U] |= tmp;
4533
4534 result = pvr_isp_tiles(ctx->device, state);
4535 if (result != VK_SUCCESS)
4536 return result;
4537 }
4538
4539 return VK_SUCCESS;
4540 }
4541
pvr_texel_unwind(uint32_t bpp,pvr_dev_addr_t dev_addr,bool is_input,uint32_t texel_extend,uint32_t * texel_unwind_out)4542 static bool pvr_texel_unwind(uint32_t bpp,
4543 pvr_dev_addr_t dev_addr,
4544 bool is_input,
4545 uint32_t texel_extend,
4546 uint32_t *texel_unwind_out)
4547 {
4548 uint32_t texel_unwind = 0U;
4549
4550 for (uint32_t i = 0U; i < 16U; i++) {
4551 if (pvr_is_surface_aligned(dev_addr, is_input, bpp)) {
4552 break;
4553 } else {
4554 if (i == 15U) {
4555 return false;
4556 } else {
4557 dev_addr.addr -= (bpp / texel_extend) / 8U;
4558 texel_unwind++;
4559 }
4560 }
4561 }
4562
4563 *texel_unwind_out = texel_unwind;
4564
4565 return true;
4566 }
4567
pvr_is_identity_mapping(const struct pvr_rect_mapping * mapping)4568 static bool pvr_is_identity_mapping(const struct pvr_rect_mapping *mapping)
4569 {
4570 return (mapping->src_rect.offset.x == mapping->dst_rect.offset.x &&
4571 mapping->src_rect.offset.y == mapping->dst_rect.offset.y &&
4572 mapping->src_rect.extent.width == mapping->dst_rect.extent.width &&
4573 mapping->src_rect.extent.height == mapping->dst_rect.extent.height);
4574 }
4575
pvr_is_pbe_stride_aligned(const uint32_t stride)4576 static inline bool pvr_is_pbe_stride_aligned(const uint32_t stride)
4577 {
4578 if (stride == 1U)
4579 return true;
4580
4581 return ((stride & (PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE) - 1U)) ==
4582 0x0U);
4583 }
4584
4585 static struct pvr_transfer_pass *
pvr_create_pass(struct pvr_transfer_custom_mapping * custom_mapping,uint32_t dst_offset)4586 pvr_create_pass(struct pvr_transfer_custom_mapping *custom_mapping,
4587 uint32_t dst_offset)
4588 {
4589 struct pvr_transfer_pass *pass;
4590
4591 assert(custom_mapping->pass_count < PVR_TRANSFER_MAX_PASSES);
4592
4593 pass = &custom_mapping->passes[custom_mapping->pass_count];
4594 pass->clip_rects_count = 0U;
4595 pass->dst_offset = dst_offset;
4596 pass->source_count = 0U;
4597
4598 custom_mapping->pass_count++;
4599
4600 return pass;
4601 }
4602
4603 /* Acquire pass with given offset. If one doesn't exist, create new. */
4604 static struct pvr_transfer_pass *
pvr_acquire_pass(struct pvr_transfer_custom_mapping * custom_mapping,uint32_t dst_offset)4605 pvr_acquire_pass(struct pvr_transfer_custom_mapping *custom_mapping,
4606 uint32_t dst_offset)
4607 {
4608 for (uint32_t i = 0U; i < custom_mapping->pass_count; i++) {
4609 if (custom_mapping->passes[i].dst_offset == dst_offset)
4610 return &custom_mapping->passes[i];
4611 }
4612
4613 return pvr_create_pass(custom_mapping, dst_offset);
4614 }
4615
4616 static struct pvr_transfer_wa_source *
pvr_create_source(struct pvr_transfer_pass * pass,uint32_t src_offset,bool extend_height)4617 pvr_create_source(struct pvr_transfer_pass *pass,
4618 uint32_t src_offset,
4619 bool extend_height)
4620 {
4621 struct pvr_transfer_wa_source *src;
4622
4623 assert(pass->source_count < ARRAY_SIZE(pass->sources));
4624
4625 src = &pass->sources[pass->source_count];
4626 src->mapping_count = 0U;
4627 src->extend_height = extend_height;
4628
4629 pass->source_count++;
4630
4631 return src;
4632 }
4633
4634 /* Acquire source with given offset. If one doesn't exist, create new. */
4635 static struct pvr_transfer_wa_source *
pvr_acquire_source(struct pvr_transfer_pass * pass,uint32_t src_offset,bool extend_height)4636 pvr_acquire_source(struct pvr_transfer_pass *pass,
4637 uint32_t src_offset,
4638 bool extend_height)
4639 {
4640 for (uint32_t i = 0U; i < pass->source_count; i++) {
4641 if (pass->sources[i].src_offset == src_offset &&
4642 pass->sources[i].extend_height == extend_height)
4643 return &pass->sources[i];
4644 }
4645
4646 return pvr_create_source(pass, src_offset, extend_height);
4647 }
4648
pvr_remove_source(struct pvr_transfer_pass * pass,uint32_t idx)4649 static void pvr_remove_source(struct pvr_transfer_pass *pass, uint32_t idx)
4650 {
4651 assert(idx < pass->source_count);
4652
4653 for (uint32_t i = idx; i < (pass->source_count - 1U); i++)
4654 pass->sources[i] = pass->sources[i + 1U];
4655
4656 pass->source_count--;
4657 }
4658
pvr_remove_mapping(struct pvr_transfer_wa_source * src,uint32_t idx)4659 static void pvr_remove_mapping(struct pvr_transfer_wa_source *src, uint32_t idx)
4660 {
4661 assert(idx < src->mapping_count);
4662
4663 for (uint32_t i = idx; i < (src->mapping_count - 1U); i++)
4664 src->mappings[i] = src->mappings[i + 1U];
4665
4666 src->mapping_count--;
4667 }
4668
4669 static struct pvr_rect_mapping *
pvr_create_mapping(struct pvr_transfer_wa_source * src)4670 pvr_create_mapping(struct pvr_transfer_wa_source *src)
4671 {
4672 assert(src->mapping_count < ARRAY_SIZE(src->mappings));
4673
4674 return &src->mappings[src->mapping_count++];
4675 }
4676
4677 /**
4678 * If PBE can't write to surfaces with odd stride, the stride of
4679 * destination surface is doubled to make it even. Height of the surface is
4680 * halved. The source surface is not resized. Each half of the modified
4681 * destination surface samples every second row from the source surface. This
4682 * only works with nearest filtering.
4683 */
pvr_double_stride(struct pvr_transfer_pass * pass,uint32_t stride)4684 static bool pvr_double_stride(struct pvr_transfer_pass *pass, uint32_t stride)
4685 {
4686 struct pvr_rect_mapping *mappings = pass->sources[0].mappings;
4687 uint32_t new_mapping = 0;
4688
4689 if (stride == 1U)
4690 return false;
4691
4692 if (mappings[0U].dst_rect.extent.height == 1U &&
4693 pass->sources[0].mapping_count == 1U) {
4694 /* Only one mapping required if height is 1. */
4695 if ((mappings[0U].dst_rect.offset.y & 1U) != 0U) {
4696 mappings[0U].dst_rect.offset.x += (int32_t)stride;
4697 mappings[0U].dst_rect.offset.y /= 2U;
4698 mappings[0U].dst_rect.extent.height =
4699 (mappings[0U].dst_rect.extent.height + 1U) / 2U;
4700 } else {
4701 mappings[0U].dst_rect.extent.height =
4702 (mappings[0U].dst_rect.offset.y +
4703 mappings[0U].dst_rect.extent.height + 1U) /
4704 2U -
4705 mappings[0U].dst_rect.offset.y;
4706 mappings[0U].dst_rect.offset.y /= 2U;
4707 }
4708
4709 return true;
4710 }
4711
4712 for (uint32_t i = 0; i < pass->sources[0].mapping_count; i++) {
4713 struct pvr_rect_mapping *mapping_a = &mappings[i];
4714 struct pvr_rect_mapping *mapping_b =
4715 &mappings[pass->sources[0].mapping_count + new_mapping];
4716 int32_t mapping_a_src_rect_y1 =
4717 mapping_a->src_rect.offset.y + mapping_a->src_rect.extent.height;
4718 int32_t mapping_b_src_rect_y1 = mapping_a_src_rect_y1;
4719 const bool dst_starts_odd_row = !!(mapping_a->dst_rect.offset.y & 1);
4720 const bool dst_ends_odd_row =
4721 !!((mapping_a->dst_rect.offset.y + mapping_a->dst_rect.extent.height) &
4722 1);
4723 const bool src_starts_odd_row = !!(mapping_a->src_rect.offset.y & 1);
4724 const bool src_ends_odd_row =
4725 !!((mapping_a->src_rect.offset.y + mapping_a->src_rect.extent.height) &
4726 1);
4727
4728 assert(pass->sources[0].mapping_count + new_mapping <
4729 ARRAY_SIZE(pass->sources[0].mappings));
4730 *mapping_b = *mapping_a;
4731
4732 mapping_a->src_rect.offset.y = ALIGN_POT(mapping_a->src_rect.offset.y, 2);
4733 if (dst_starts_odd_row && !src_starts_odd_row)
4734 mapping_a->src_rect.offset.y++;
4735 else if (!dst_starts_odd_row && src_starts_odd_row)
4736 mapping_a->src_rect.offset.y--;
4737
4738 mapping_a_src_rect_y1 = ALIGN_POT(mapping_a_src_rect_y1, 2);
4739 if (dst_ends_odd_row && !src_ends_odd_row)
4740 mapping_a_src_rect_y1++;
4741 else if (!dst_ends_odd_row && src_ends_odd_row)
4742 mapping_a_src_rect_y1--;
4743
4744 mapping_a->src_rect.extent.height =
4745 mapping_a_src_rect_y1 - mapping_a->src_rect.offset.y;
4746
4747 mapping_b->src_rect.offset.y = ALIGN_POT(mapping_b->src_rect.offset.y, 2);
4748 if (dst_starts_odd_row && src_starts_odd_row)
4749 mapping_b->src_rect.offset.y--;
4750 else if (!dst_starts_odd_row && !src_starts_odd_row)
4751 mapping_b->src_rect.offset.y++;
4752
4753 mapping_b_src_rect_y1 = ALIGN_POT(mapping_b_src_rect_y1, 2);
4754 if (dst_ends_odd_row && src_ends_odd_row)
4755 mapping_b_src_rect_y1--;
4756 else if (!dst_ends_odd_row && !src_ends_odd_row)
4757 mapping_b_src_rect_y1++;
4758
4759 mapping_b->src_rect.extent.height =
4760 mapping_b_src_rect_y1 - mapping_b->src_rect.offset.y;
4761
4762 /* Destination rectangles. */
4763 mapping_a->dst_rect.offset.y = mapping_a->dst_rect.offset.y / 2;
4764
4765 if (dst_starts_odd_row)
4766 mapping_a->dst_rect.offset.y++;
4767
4768 mapping_b->dst_rect.offset.x += stride;
4769 mapping_b->dst_rect.offset.y /= 2;
4770 mapping_b->dst_rect.extent.height /= 2;
4771 mapping_a->dst_rect.extent.height -= mapping_b->dst_rect.extent.height;
4772
4773 if (!mapping_a->src_rect.extent.width ||
4774 !mapping_a->src_rect.extent.height) {
4775 *mapping_a = *mapping_b;
4776 } else if (mapping_b->src_rect.extent.width &&
4777 mapping_b->src_rect.extent.height) {
4778 new_mapping++;
4779 }
4780 }
4781
4782 pass->sources[0].mapping_count++;
4783
4784 return true;
4785 }
4786
pvr_split_rect(uint32_t stride,uint32_t height,uint32_t texel_unwind,VkRect2D * rect_a,VkRect2D * rect_b)4787 static void pvr_split_rect(uint32_t stride,
4788 uint32_t height,
4789 uint32_t texel_unwind,
4790 VkRect2D *rect_a,
4791 VkRect2D *rect_b)
4792 {
4793 rect_a->offset.x = 0;
4794 rect_a->extent.width = stride - texel_unwind;
4795 rect_a->offset.y = 0;
4796 rect_a->extent.height = height;
4797
4798 rect_b->offset.x = (int32_t)stride - texel_unwind;
4799 rect_b->extent.width = texel_unwind;
4800 rect_b->offset.y = 0;
4801 rect_b->extent.height = height;
4802 }
4803
pvr_rect_width_covered_by(const VkRect2D * rect_a,const VkRect2D * rect_b)4804 static bool pvr_rect_width_covered_by(const VkRect2D *rect_a,
4805 const VkRect2D *rect_b)
4806 {
4807 return (rect_b->offset.x <= rect_a->offset.x &&
4808 (rect_b->offset.x + rect_b->extent.width) >=
4809 (rect_a->offset.x + rect_a->extent.width));
4810 }
4811
pvr_unwind_rects(uint32_t width,uint32_t height,uint32_t texel_unwind,bool input,struct pvr_transfer_pass * pass)4812 static void pvr_unwind_rects(uint32_t width,
4813 uint32_t height,
4814 uint32_t texel_unwind,
4815 bool input,
4816 struct pvr_transfer_pass *pass)
4817 {
4818 struct pvr_transfer_wa_source *const source = &pass->sources[0];
4819 struct pvr_rect_mapping *const mappings = source->mappings;
4820 const uint32_t num_mappings = source->mapping_count;
4821 VkRect2D rect_a, rect_b;
4822
4823 if (texel_unwind == 0)
4824 return;
4825
4826 pvr_split_rect(width, height, texel_unwind, &rect_a, &rect_b);
4827
4828 for (uint32_t i = 0; i < num_mappings; i++) {
4829 VkRect2D *const old_rect = input ? &mappings[i].src_rect
4830 : &mappings[i].dst_rect;
4831
4832 if (height == 1) {
4833 old_rect->offset.x += texel_unwind;
4834 } else if (width == 1) {
4835 old_rect->offset.y += texel_unwind;
4836 } else if (pvr_rect_width_covered_by(old_rect, &rect_a)) {
4837 old_rect->offset.x += texel_unwind;
4838 } else if (pvr_rect_width_covered_by(old_rect, &rect_b)) {
4839 old_rect->offset.x = texel_unwind - width + old_rect->offset.x;
4840 old_rect->offset.y++;
4841 } else {
4842 /* Mapping requires split. */
4843 const uint32_t new_mapping = source->mapping_count++;
4844
4845 VkRect2D *const new_rect = input ? &mappings[new_mapping].src_rect
4846 : &mappings[new_mapping].dst_rect;
4847
4848 VkRect2D *const new_rect_opp = input ? &mappings[new_mapping].dst_rect
4849 : &mappings[new_mapping].src_rect;
4850 VkRect2D *const old_rect_opp = input ? &mappings[i].dst_rect
4851 : &mappings[i].src_rect;
4852
4853 const uint32_t split_point = width - texel_unwind;
4854 const uint32_t split_width =
4855 old_rect->offset.x + old_rect->extent.width - split_point;
4856
4857 assert(new_mapping < ARRAY_SIZE(source->mappings));
4858 mappings[new_mapping] = mappings[i];
4859
4860 old_rect_opp->extent.width -= split_width;
4861 new_rect_opp->extent.width = split_width;
4862 new_rect_opp->offset.x =
4863 old_rect_opp->offset.x + old_rect_opp->extent.width;
4864
4865 old_rect->offset.x += texel_unwind;
4866 old_rect->extent.width = width - old_rect->offset.x;
4867
4868 new_rect->offset.x = 0;
4869 new_rect->offset.y++;
4870 new_rect->extent.width = split_width;
4871 }
4872 }
4873 }
4874
4875 /**
4876 * Assign clip rects to rectangle mappings. TDM can only do two PBE clip
4877 * rects per screen.
4878 */
4879 static void
pvr_map_clip_rects(struct pvr_transfer_custom_mapping * custom_mapping)4880 pvr_map_clip_rects(struct pvr_transfer_custom_mapping *custom_mapping)
4881 {
4882 for (uint32_t i = 0U; i < custom_mapping->pass_count; i++) {
4883 struct pvr_transfer_pass *pass = &custom_mapping->passes[i];
4884
4885 pass->clip_rects_count = 0U;
4886
4887 for (uint32_t s = 0U; s < pass->source_count; s++) {
4888 struct pvr_transfer_wa_source *src = &pass->sources[s];
4889
4890 for (uint32_t j = 0U; j < src->mapping_count; j++) {
4891 struct pvr_rect_mapping *mappings = src->mappings;
4892 VkRect2D *clip_rects = pass->clip_rects;
4893 bool merged = false;
4894
4895 /* Try merge adjacent clip rects. */
4896 for (uint32_t k = 0U; k < pass->clip_rects_count; k++) {
4897 if (clip_rects[k].offset.y == mappings[j].dst_rect.offset.y &&
4898 clip_rects[k].extent.height ==
4899 mappings[j].dst_rect.extent.height &&
4900 clip_rects[k].offset.x + clip_rects[k].extent.width ==
4901 mappings[j].dst_rect.offset.x) {
4902 clip_rects[k].extent.width +=
4903 mappings[j].dst_rect.extent.width;
4904 merged = true;
4905 break;
4906 }
4907
4908 if (clip_rects[k].offset.y == mappings[j].dst_rect.offset.y &&
4909 clip_rects[k].extent.height ==
4910 mappings[j].dst_rect.extent.height &&
4911 clip_rects[k].offset.x ==
4912 mappings[j].dst_rect.offset.x +
4913 mappings[j].dst_rect.extent.width) {
4914 clip_rects[k].offset.x = mappings[j].dst_rect.offset.x;
4915 clip_rects[k].extent.width +=
4916 mappings[j].dst_rect.extent.width;
4917 merged = true;
4918 break;
4919 }
4920
4921 if (clip_rects[k].offset.x == mappings[j].dst_rect.offset.x &&
4922 clip_rects[k].extent.width ==
4923 mappings[j].dst_rect.extent.width &&
4924 clip_rects[k].offset.y + clip_rects[k].extent.height ==
4925 mappings[j].dst_rect.offset.y) {
4926 clip_rects[k].extent.height +=
4927 mappings[j].dst_rect.extent.height;
4928 merged = true;
4929 break;
4930 }
4931
4932 if (clip_rects[k].offset.x == mappings[j].dst_rect.offset.x &&
4933 clip_rects[k].extent.width ==
4934 mappings[j].dst_rect.extent.width &&
4935 clip_rects[k].offset.y ==
4936 mappings[j].dst_rect.offset.y +
4937 mappings[j].dst_rect.extent.height) {
4938 clip_rects[k].extent.height +=
4939 mappings[j].dst_rect.extent.height;
4940 clip_rects[k].offset.y = mappings[j].dst_rect.offset.y;
4941 merged = true;
4942 break;
4943 }
4944 }
4945
4946 if (merged)
4947 continue;
4948
4949 /* Create new pass if needed, TDM can only have 2 clip rects. */
4950 if (pass->clip_rects_count >= custom_mapping->max_clip_rects) {
4951 struct pvr_transfer_pass *new_pass =
4952 pvr_create_pass(custom_mapping, pass->dst_offset);
4953 struct pvr_transfer_wa_source *new_source =
4954 pvr_create_source(new_pass,
4955 src->src_offset,
4956 src->extend_height);
4957 struct pvr_rect_mapping *new_mapping =
4958 pvr_create_mapping(new_source);
4959
4960 new_pass->clip_rects_count = 1U;
4961 *new_mapping = src->mappings[j];
4962
4963 pvr_remove_mapping(src, j);
4964
4965 if (src->mapping_count == 0) {
4966 pvr_remove_source(pass, s);
4967 s--;
4968 } else {
4969 /* Redo - mapping was replaced. */
4970 j--;
4971 }
4972 } else {
4973 pass->clip_rects[pass->clip_rects_count] =
4974 src->mappings[j].dst_rect;
4975
4976 pass->clip_rects_count++;
4977
4978 assert(pass->clip_rects_count <= ARRAY_SIZE(pass->clip_rects));
4979 }
4980 }
4981 }
4982 }
4983 }
4984
pvr_extend_height(const VkRect2D * rect,const uint32_t height,const uint32_t unwind_src)4985 static bool pvr_extend_height(const VkRect2D *rect,
4986 const uint32_t height,
4987 const uint32_t unwind_src)
4988 {
4989 if (rect->offset.x >= (int32_t)unwind_src)
4990 return false;
4991
4992 return (rect->offset.y > (int32_t)height) ||
4993 ((rect->offset.y + rect->extent.height) > (int32_t)height);
4994 }
4995
4996 static void
pvr_generate_custom_mapping(uint32_t src_stride,uint32_t src_width,uint32_t src_height,uint32_t dst_stride,uint32_t dst_width,uint32_t dst_height,enum pvr_memlayout dst_mem_layout,struct pvr_transfer_custom_mapping * custom_mapping)4997 pvr_generate_custom_mapping(uint32_t src_stride,
4998 uint32_t src_width,
4999 uint32_t src_height,
5000 uint32_t dst_stride,
5001 uint32_t dst_width,
5002 uint32_t dst_height,
5003 enum pvr_memlayout dst_mem_layout,
5004 struct pvr_transfer_custom_mapping *custom_mapping)
5005 {
5006 src_stride *= custom_mapping->texel_extend_src;
5007 src_width *= custom_mapping->texel_extend_src;
5008 dst_stride *= custom_mapping->texel_extend_dst;
5009 dst_width *= custom_mapping->texel_extend_dst;
5010
5011 if (custom_mapping->texel_unwind_src > 0U) {
5012 pvr_unwind_rects(src_stride,
5013 src_height,
5014 custom_mapping->texel_unwind_src,
5015 true,
5016 &custom_mapping->passes[0U]);
5017 }
5018
5019 if (custom_mapping->double_stride) {
5020 custom_mapping->double_stride =
5021 pvr_double_stride(&custom_mapping->passes[0U], dst_stride);
5022
5023 dst_stride *= 2U;
5024 }
5025
5026 pvr_unwind_rects(dst_stride,
5027 dst_height,
5028 custom_mapping->texel_unwind_dst,
5029 false,
5030 &custom_mapping->passes[0U]);
5031
5032 pvr_map_clip_rects(custom_mapping);
5033
5034 /* If the last row of the source mapping is sampled, height of the surface
5035 * can only be increased if the new area contains a valid region. Some blits
5036 * are split to two sources.
5037 */
5038 if (custom_mapping->texel_unwind_src > 0U) {
5039 for (uint32_t i = 0; i < custom_mapping->pass_count; i++) {
5040 struct pvr_transfer_pass *pass = &custom_mapping->passes[i];
5041
5042 for (uint32_t j = 0; j < pass->source_count; j++) {
5043 struct pvr_transfer_wa_source *src = &pass->sources[j];
5044
5045 for (uint32_t k = 0; k < src->mapping_count; k++) {
5046 VkRect2D *src_rect = &src->mappings[k].src_rect;
5047 bool extend_height =
5048 pvr_extend_height(src_rect,
5049 src_height,
5050 custom_mapping->texel_unwind_src);
5051
5052 if (src->mapping_count == 1) {
5053 src->extend_height = extend_height;
5054 } else if (!src->extend_height && extend_height) {
5055 struct pvr_transfer_wa_source *new_src =
5056 pvr_acquire_source(pass, src->src_offset, extend_height);
5057
5058 new_src->mappings[new_src->mapping_count] = src->mappings[k];
5059 new_src->src_offset = src->src_offset;
5060
5061 for (uint32_t l = k + 1; l < src->mapping_count; l++)
5062 src->mappings[l - 1] = src->mappings[l];
5063
5064 new_src->mapping_count++;
5065 src->mapping_count--;
5066 k--;
5067 }
5068 }
5069 }
5070 }
5071 }
5072 }
5073
5074 static bool
pvr_get_custom_mapping(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,uint32_t max_clip_rects,struct pvr_transfer_custom_mapping * custom_mapping)5075 pvr_get_custom_mapping(const struct pvr_device_info *dev_info,
5076 const struct pvr_transfer_cmd *transfer_cmd,
5077 uint32_t max_clip_rects,
5078 struct pvr_transfer_custom_mapping *custom_mapping)
5079 {
5080 const uint32_t dst_bpp =
5081 vk_format_get_blocksizebits(transfer_cmd->dst.vk_format);
5082 const struct pvr_transfer_cmd_source *src = NULL;
5083 struct pvr_transfer_pass *pass;
5084 bool ret;
5085
5086 custom_mapping->max_clip_rects = max_clip_rects;
5087 custom_mapping->texel_unwind_src = 0U;
5088 custom_mapping->texel_unwind_dst = 0U;
5089 custom_mapping->texel_extend_src = 1U;
5090 custom_mapping->texel_extend_dst = 1U;
5091 custom_mapping->pass_count = 0U;
5092
5093 if (transfer_cmd->source_count > 1)
5094 return false;
5095
5096 custom_mapping->max_clip_size = PVR_MAX_CLIP_SIZE(dev_info);
5097
5098 ret = pvr_texel_unwind(dst_bpp,
5099 transfer_cmd->dst.dev_addr,
5100 false,
5101 1U,
5102 &custom_mapping->texel_unwind_dst);
5103 if (!ret) {
5104 custom_mapping->texel_extend_dst = dst_bpp / 8U;
5105 if (transfer_cmd->source_count > 0) {
5106 if (transfer_cmd->sources[0].surface.mem_layout ==
5107 PVR_MEMLAYOUT_LINEAR) {
5108 custom_mapping->texel_extend_src = custom_mapping->texel_extend_dst;
5109 } else if (transfer_cmd->sources[0].surface.mem_layout ==
5110 PVR_MEMLAYOUT_TWIDDLED &&
5111 transfer_cmd->sources[0].surface.height == 1U) {
5112 custom_mapping->texel_extend_src = custom_mapping->texel_extend_dst;
5113 }
5114 }
5115
5116 ret = pvr_texel_unwind(dst_bpp,
5117 transfer_cmd->dst.dev_addr,
5118 false,
5119 custom_mapping->texel_extend_dst,
5120 &custom_mapping->texel_unwind_dst);
5121 if (!ret)
5122 return false;
5123 }
5124
5125 if (transfer_cmd->source_count > 0) {
5126 src = &transfer_cmd->sources[0];
5127 const uint32_t src_bpp =
5128 vk_format_get_blocksizebits(src->surface.vk_format);
5129
5130 ret = pvr_is_surface_aligned(src->surface.dev_addr, true, src_bpp);
5131
5132 if (!ret && (src->surface.mem_layout == PVR_MEMLAYOUT_LINEAR ||
5133 src->surface.height == 1U)) {
5134 ret = pvr_texel_unwind(src_bpp,
5135 src->surface.dev_addr,
5136 true,
5137 custom_mapping->texel_extend_src,
5138 &custom_mapping->texel_unwind_src);
5139 }
5140
5141 if (!ret) {
5142 custom_mapping->texel_extend_src = dst_bpp / 8U;
5143 custom_mapping->texel_extend_dst = custom_mapping->texel_extend_src;
5144
5145 ret = pvr_texel_unwind(src_bpp,
5146 src->surface.dev_addr,
5147 true,
5148 custom_mapping->texel_extend_src,
5149 &custom_mapping->texel_unwind_src);
5150 }
5151
5152 if (!ret)
5153 return false;
5154 }
5155
5156 VkRect2D rect = transfer_cmd->scissor;
5157 assert(
5158 (rect.offset.x + rect.extent.width) <= custom_mapping->max_clip_size &&
5159 (rect.offset.y + rect.extent.height) <= custom_mapping->max_clip_size);
5160
5161 /* Texel extend only works with strided memory layout, because pixel width is
5162 * changed. Texel unwind only works with strided memory layout. 1D blits are
5163 * allowed.
5164 */
5165 if (src && src->surface.height > 1U &&
5166 (custom_mapping->texel_extend_src > 1U ||
5167 custom_mapping->texel_unwind_src > 0U) &&
5168 src->surface.mem_layout != PVR_MEMLAYOUT_LINEAR) {
5169 return false;
5170 }
5171
5172 /* Texel extend only works with strided memory layout, because pixel width is
5173 * changed. Texel unwind only works with strided memory layout. 1D blits are
5174 * allowed.
5175 */
5176 if ((custom_mapping->texel_extend_dst > 1U ||
5177 custom_mapping->texel_unwind_dst > 0U) &&
5178 transfer_cmd->dst.mem_layout != PVR_MEMLAYOUT_LINEAR &&
5179 transfer_cmd->dst.height > 1U) {
5180 return false;
5181 }
5182
5183 if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_LINEAR) {
5184 custom_mapping->double_stride = !pvr_is_pbe_stride_aligned(
5185 transfer_cmd->dst.stride * custom_mapping->texel_extend_dst);
5186 }
5187
5188 if (custom_mapping->texel_unwind_src > 0U ||
5189 custom_mapping->texel_unwind_dst > 0U || custom_mapping->double_stride) {
5190 struct pvr_transfer_wa_source *wa_src;
5191 struct pvr_rect_mapping *mapping;
5192
5193 pass = pvr_acquire_pass(custom_mapping, 0U);
5194 wa_src = pvr_create_source(pass, 0U, false);
5195 mapping = pvr_create_mapping(wa_src);
5196
5197 if (transfer_cmd->source_count > 0) {
5198 *mapping = src->mappings[0U];
5199 } else {
5200 mapping->src_rect = transfer_cmd->scissor;
5201 mapping->dst_rect = transfer_cmd->scissor;
5202 }
5203 } else {
5204 return false;
5205 }
5206
5207 if (custom_mapping->texel_extend_src > 1U ||
5208 custom_mapping->texel_extend_dst > 1U) {
5209 pass->sources[0].mappings[0U].src_rect.offset.x *=
5210 (int32_t)custom_mapping->texel_extend_dst;
5211 pass->sources[0].mappings[0U].src_rect.extent.width *=
5212 (int32_t)custom_mapping->texel_extend_dst;
5213 pass->sources[0].mappings[0U].dst_rect.offset.x *=
5214 (int32_t)custom_mapping->texel_extend_dst;
5215 pass->sources[0].mappings[0U].dst_rect.extent.width *=
5216 (int32_t)custom_mapping->texel_extend_dst;
5217 }
5218
5219 if (transfer_cmd->source_count > 0) {
5220 pvr_generate_custom_mapping(transfer_cmd->sources[0].surface.stride,
5221 transfer_cmd->sources[0].surface.width,
5222 transfer_cmd->sources[0].surface.height,
5223 transfer_cmd->dst.stride,
5224 transfer_cmd->dst.width,
5225 transfer_cmd->dst.height,
5226 transfer_cmd->dst.mem_layout,
5227 custom_mapping);
5228 } else {
5229 pvr_generate_custom_mapping(0U,
5230 0U,
5231 0U,
5232 transfer_cmd->dst.stride,
5233 transfer_cmd->dst.width,
5234 transfer_cmd->dst.height,
5235 transfer_cmd->dst.mem_layout,
5236 custom_mapping);
5237 }
5238
5239 return true;
5240 }
5241
pvr_pbe_extend_rect(uint32_t texel_extend,VkRect2D * rect)5242 static void pvr_pbe_extend_rect(uint32_t texel_extend, VkRect2D *rect)
5243 {
5244 rect->offset.x *= texel_extend;
5245 rect->extent.width *= texel_extend;
5246 }
5247
pvr_pbe_rect_intersect(VkRect2D * rect_a,VkRect2D * rect_b)5248 static void pvr_pbe_rect_intersect(VkRect2D *rect_a, VkRect2D *rect_b)
5249 {
5250 rect_a->extent.width = MIN2(rect_a->offset.x + rect_a->extent.width,
5251 rect_b->offset.x + rect_b->extent.width) -
5252 MAX2(rect_a->offset.x, rect_b->offset.x);
5253 rect_a->offset.x = MAX2(rect_a->offset.x, rect_b->offset.x);
5254 rect_a->extent.height = MIN2(rect_a->offset.y + rect_a->extent.height,
5255 rect_b->offset.y + rect_b->extent.height) -
5256 MAX2(rect_a->offset.y, rect_b->offset.y);
5257 rect_a->offset.y = MAX2(rect_a->offset.y, rect_b->offset.y);
5258 }
5259
pvr_texel_extend_src_format(VkFormat vk_format)5260 static VkFormat pvr_texel_extend_src_format(VkFormat vk_format)
5261 {
5262 uint32_t bpp = vk_format_get_blocksizebits(vk_format);
5263 VkFormat ext_format;
5264
5265 switch (bpp) {
5266 case 16:
5267 ext_format = VK_FORMAT_R8G8_UINT;
5268 break;
5269 case 32:
5270 ext_format = VK_FORMAT_R8G8B8A8_UINT;
5271 break;
5272 case 48:
5273 ext_format = VK_FORMAT_R16G16B16_UINT;
5274 break;
5275 default:
5276 ext_format = VK_FORMAT_R8_UINT;
5277 break;
5278 }
5279
5280 return ext_format;
5281 }
5282
5283 static void
pvr_modify_command(struct pvr_transfer_custom_mapping * custom_mapping,uint32_t pass_idx,struct pvr_transfer_cmd * transfer_cmd)5284 pvr_modify_command(struct pvr_transfer_custom_mapping *custom_mapping,
5285 uint32_t pass_idx,
5286 struct pvr_transfer_cmd *transfer_cmd)
5287 {
5288 struct pvr_transfer_pass *pass = &custom_mapping->passes[pass_idx];
5289 uint32_t bpp;
5290
5291 if (custom_mapping->texel_extend_src > 1U) {
5292 struct pvr_rect_mapping *mapping = &transfer_cmd->sources[0].mappings[0];
5293
5294 pvr_pbe_extend_rect(custom_mapping->texel_extend_src, &mapping->dst_rect);
5295 pvr_pbe_extend_rect(custom_mapping->texel_extend_src, &mapping->src_rect);
5296
5297 transfer_cmd->dst.vk_format = VK_FORMAT_R8_UINT;
5298 transfer_cmd->dst.width *= custom_mapping->texel_extend_src;
5299 transfer_cmd->dst.stride *= custom_mapping->texel_extend_src;
5300 transfer_cmd->sources[0].surface.vk_format = VK_FORMAT_R8_UINT;
5301 transfer_cmd->sources[0].surface.width *=
5302 custom_mapping->texel_extend_src;
5303 transfer_cmd->sources[0].surface.stride *=
5304 custom_mapping->texel_extend_src;
5305 } else if (custom_mapping->texel_extend_dst > 1U) {
5306 VkRect2D max_clip = {
5307 .offset = { 0, 0 },
5308 .extent = { custom_mapping->max_clip_size,
5309 custom_mapping->max_clip_size },
5310 };
5311
5312 pvr_pbe_extend_rect(custom_mapping->texel_extend_dst,
5313 &transfer_cmd->scissor);
5314
5315 pvr_pbe_rect_intersect(&transfer_cmd->scissor, &max_clip);
5316
5317 if (transfer_cmd->source_count > 0) {
5318 transfer_cmd->sources[0].surface.width *=
5319 custom_mapping->texel_extend_dst;
5320 transfer_cmd->sources[0].surface.stride *=
5321 custom_mapping->texel_extend_dst;
5322
5323 transfer_cmd->sources[0].surface.vk_format =
5324 pvr_texel_extend_src_format(
5325 transfer_cmd->sources[0].surface.vk_format);
5326 }
5327
5328 transfer_cmd->dst.vk_format = VK_FORMAT_R8_UINT;
5329 transfer_cmd->dst.width *= custom_mapping->texel_extend_dst;
5330 transfer_cmd->dst.stride *= custom_mapping->texel_extend_dst;
5331 }
5332
5333 if (custom_mapping->double_stride) {
5334 transfer_cmd->dst.width *= 2U;
5335 transfer_cmd->dst.stride *= 2U;
5336 }
5337
5338 if (custom_mapping->texel_unwind_src > 0U) {
5339 if (transfer_cmd->sources[0].surface.height == 1U) {
5340 transfer_cmd->sources[0].surface.width +=
5341 custom_mapping->texel_unwind_src;
5342 transfer_cmd->sources[0].surface.stride +=
5343 custom_mapping->texel_unwind_src;
5344 } else if (transfer_cmd->sources[0].surface.stride == 1U) {
5345 transfer_cmd->sources[0].surface.height +=
5346 custom_mapping->texel_unwind_src;
5347 } else {
5348 /* Increase source width by texel unwind. If texel unwind is less than
5349 * the distance between width and stride. The blit can be done with one
5350 * rectangle mapping, but the width of the surface needs be to
5351 * increased in case we sample from the area between width and stride.
5352 */
5353 transfer_cmd->sources[0].surface.width =
5354 MIN2(transfer_cmd->sources[0].surface.width +
5355 custom_mapping->texel_unwind_src,
5356 transfer_cmd->sources[0].surface.stride);
5357 }
5358 }
5359
5360 for (uint32_t i = 0U; i < pass->source_count; i++) {
5361 struct pvr_transfer_wa_source *src = &pass->sources[i];
5362
5363 if (i > 0)
5364 transfer_cmd->sources[i] = transfer_cmd->sources[0];
5365
5366 transfer_cmd->sources[i].mapping_count = src->mapping_count;
5367 for (uint32_t j = 0U; j < transfer_cmd->sources[i].mapping_count; j++)
5368 transfer_cmd->sources[i].mappings[j] = src->mappings[j];
5369
5370 if (src->extend_height)
5371 transfer_cmd->sources[i].surface.height += 1U;
5372
5373 transfer_cmd->sources[i].surface.width =
5374 MIN2(PVR_MAX_WIDTH, transfer_cmd->sources[i].surface.width);
5375 transfer_cmd->sources[i].surface.height =
5376 MIN2(PVR_MAX_WIDTH, transfer_cmd->sources[i].surface.height);
5377 transfer_cmd->sources[i].surface.stride =
5378 MIN2(PVR_MAX_WIDTH, transfer_cmd->sources[i].surface.stride);
5379 }
5380
5381 if (transfer_cmd->dst.height == 1U) {
5382 transfer_cmd->dst.width =
5383 transfer_cmd->dst.stride + custom_mapping->texel_unwind_dst;
5384 transfer_cmd->dst.mem_layout = PVR_MEMLAYOUT_TWIDDLED;
5385 }
5386
5387 if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_TWIDDLED) {
5388 transfer_cmd->dst.width =
5389 MIN2((uint32_t)custom_mapping->max_clip_size, transfer_cmd->dst.width);
5390 transfer_cmd->dst.height = MIN2((uint32_t)custom_mapping->max_clip_size,
5391 transfer_cmd->dst.height);
5392 } else {
5393 transfer_cmd->dst.width = MIN2(PVR_MAX_WIDTH, transfer_cmd->dst.width);
5394 }
5395
5396 if (transfer_cmd->source_count > 0) {
5397 for (uint32_t i = 0; i < pass->source_count; i++) {
5398 struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[i];
5399
5400 bpp = vk_format_get_blocksizebits(src->surface.vk_format);
5401
5402 src->surface.dev_addr.addr -=
5403 custom_mapping->texel_unwind_src * bpp / 8U;
5404 src->surface.dev_addr.addr += MAX2(src->surface.sample_count, 1U) *
5405 pass->sources[i].src_offset * bpp / 8U;
5406 }
5407 }
5408
5409 bpp = vk_format_get_blocksizebits(transfer_cmd->dst.vk_format);
5410 transfer_cmd->dst.dev_addr.addr -=
5411 custom_mapping->texel_unwind_dst * bpp / 8U;
5412 transfer_cmd->dst.dev_addr.addr +=
5413 MAX2(transfer_cmd->dst.sample_count, 1U) * pass->dst_offset * bpp / 8U;
5414
5415 if (transfer_cmd->source_count > 0)
5416 transfer_cmd->source_count = pass->source_count;
5417 }
5418
5419 /* Route a copy_blit (FastScale HW) to a clip_blit (Fast2D HW).
5420 * Destination rectangle can be specified in dst_rect, or NULL to use existing.
5421 */
pvr_reroute_to_clip(struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const struct VkRect2D * dst_rect,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)5422 static VkResult pvr_reroute_to_clip(struct pvr_transfer_ctx *ctx,
5423 const struct pvr_transfer_cmd *transfer_cmd,
5424 const struct VkRect2D *dst_rect,
5425 struct pvr_transfer_prep_data *prep_data,
5426 uint32_t pass_idx,
5427 bool *finished_out)
5428 {
5429 struct pvr_transfer_cmd clip_transfer_cmd;
5430
5431 clip_transfer_cmd = *transfer_cmd;
5432 clip_transfer_cmd.flags |= PVR_TRANSFER_CMD_FLAGS_FAST2D;
5433
5434 if (transfer_cmd->source_count <= 1U) {
5435 if (dst_rect)
5436 clip_transfer_cmd.scissor = *dst_rect;
5437
5438 return pvr_3d_clip_blit(ctx,
5439 &clip_transfer_cmd,
5440 prep_data,
5441 pass_idx,
5442 finished_out);
5443 }
5444
5445 return vk_error(ctx->device, VK_ERROR_FORMAT_NOT_SUPPORTED);
5446 }
5447
pvr_3d_copy_blit(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)5448 static VkResult pvr_3d_copy_blit(struct pvr_transfer_ctx *ctx,
5449 struct pvr_transfer_cmd *transfer_cmd,
5450 struct pvr_transfer_prep_data *prep_data,
5451 uint32_t pass_idx,
5452 bool *finished_out)
5453 {
5454 const struct pvr_device_info *const dev_info =
5455 &ctx->device->pdevice->dev_info;
5456
5457 struct pvr_transfer_3d_state *state = &prep_data->state;
5458 struct pvr_transfer_cmd *active_cmd = transfer_cmd;
5459 struct pvr_transfer_cmd int_cmd;
5460 VkResult result;
5461
5462 state->dont_force_pbe = false;
5463 state->pass_idx = pass_idx;
5464
5465 pvr_transfer_set_filter(transfer_cmd, state);
5466
5467 if (transfer_cmd->source_count == 1U) {
5468 struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[0];
5469
5470 /* Try to work out a condition to map pixel formats to RAW. That is only
5471 * possible if we don't perform any kind of 2D operation on the blit as we
5472 * don't know the actual pixel values - i.e. it has to be point sampled -
5473 * scaling doesn't matter as long as point sampled.
5474 */
5475 if (src->surface.vk_format == transfer_cmd->dst.vk_format &&
5476 state->filter[0] == PVR_FILTER_POINT &&
5477 src->surface.sample_count <= transfer_cmd->dst.sample_count &&
5478 (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) == 0U) {
5479 uint32_t bpp;
5480
5481 int_cmd = *transfer_cmd;
5482 active_cmd = &int_cmd;
5483 bpp = vk_format_get_blocksizebits(int_cmd.dst.vk_format);
5484
5485 if (bpp > 0U) {
5486 switch (bpp) {
5487 case 8U:
5488 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R8_UINT;
5489 break;
5490 case 16U:
5491 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R8G8_UINT;
5492 break;
5493 case 24U:
5494 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R8G8B8_UINT;
5495 break;
5496 case 32U:
5497 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R32_UINT;
5498 break;
5499 case 48U:
5500 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R16G16B16_UINT;
5501 break;
5502 case 64U:
5503 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R32G32_UINT;
5504 break;
5505 case 96U:
5506 int_cmd.sources[0].surface.vk_format = VK_FORMAT_R32G32B32_UINT;
5507 break;
5508 case 128U:
5509 int_cmd.sources[0].surface.vk_format =
5510 VK_FORMAT_R32G32B32A32_UINT;
5511 break;
5512 default:
5513 active_cmd = transfer_cmd;
5514 break;
5515 }
5516 }
5517
5518 int_cmd.dst.vk_format = int_cmd.sources[0].surface.vk_format;
5519 }
5520 }
5521
5522 if (pass_idx == 0U) {
5523 pvr_get_custom_mapping(dev_info, active_cmd, 3U, &state->custom_mapping);
5524
5525 if (state->custom_mapping.texel_extend_src > 1U)
5526 state->custom_mapping.texel_extend_dst = 1U;
5527 }
5528
5529 if (state->custom_mapping.pass_count > 0U) {
5530 struct pvr_transfer_pass *pass = &state->custom_mapping.passes[pass_idx];
5531
5532 if (active_cmd != &int_cmd) {
5533 int_cmd = *active_cmd;
5534 active_cmd = &int_cmd;
5535 }
5536
5537 state->custom_filter = true;
5538
5539 pvr_modify_command(&state->custom_mapping, pass_idx, active_cmd);
5540
5541 if (state->custom_mapping.double_stride ||
5542 pass->sources[0].mapping_count > 1U || pass->source_count > 1U) {
5543 result =
5544 pvr_3d_clip_blit(ctx, active_cmd, prep_data, pass_idx, finished_out);
5545 } else {
5546 struct pvr_rect_mapping *mappings = &pass->sources[0].mappings[0U];
5547
5548 mappings[0U].src_rect.offset.x /=
5549 MAX2(1U, state->custom_mapping.texel_extend_dst);
5550 mappings[0U].src_rect.extent.width /=
5551 MAX2(1U, state->custom_mapping.texel_extend_dst);
5552
5553 if (int_cmd.source_count > 0) {
5554 for (uint32_t i = 0U; i < pass->sources[0].mapping_count; i++)
5555 active_cmd->sources[0].mappings[i] = mappings[i];
5556 }
5557
5558 active_cmd->scissor = mappings[0U].dst_rect;
5559
5560 result = pvr_3d_copy_blit_core(ctx,
5561 active_cmd,
5562 prep_data,
5563 pass_idx,
5564 finished_out);
5565 }
5566
5567 return result;
5568 }
5569
5570 /* Route DS merge blits to Clip blit. Background object is used to preserve
5571 * the unmerged channel.
5572 */
5573 if ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) != 0U) {
5574 /* PBE byte mask could be used for DS merge with FastScale. Clearing the
5575 * other channel on a DS merge requires Clip blit.
5576 */
5577 if (!PVR_HAS_ERN(dev_info, 42064) ||
5578 ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U)) {
5579 return pvr_reroute_to_clip(ctx,
5580 active_cmd,
5581 &active_cmd->scissor,
5582 prep_data,
5583 pass_idx,
5584 finished_out);
5585 }
5586 }
5587
5588 return pvr_3d_copy_blit_core(ctx,
5589 active_cmd,
5590 prep_data,
5591 pass_idx,
5592 finished_out);
5593 }
5594
5595 /* TODO: This should be generated in csbgen. */
5596 #define TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_MASK \
5597 BITFIELD64_RANGE(2, (53 - 16) + 1)
5598
pvr_validate_source_addr(pvr_dev_addr_t addr)5599 static bool pvr_validate_source_addr(pvr_dev_addr_t addr)
5600 {
5601 if (!pvr_dev_addr_is_aligned(
5602 addr,
5603 PVRX(TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_ALIGNMENT))) {
5604 return false;
5605 }
5606
5607 if (addr.addr & ~TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_MASK)
5608 return false;
5609
5610 return true;
5611 }
5612
pvr_supports_texel_unwind(struct pvr_transfer_cmd * transfer_cmd)5613 static bool pvr_supports_texel_unwind(struct pvr_transfer_cmd *transfer_cmd)
5614 {
5615 struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
5616
5617 if (transfer_cmd->source_count > 1)
5618 return false;
5619
5620 if (transfer_cmd->source_count) {
5621 struct pvr_transfer_cmd_surface *src = &transfer_cmd->sources[0].surface;
5622
5623 if (src->height == 1) {
5624 if (src->mem_layout != PVR_MEMLAYOUT_LINEAR &&
5625 src->mem_layout != PVR_MEMLAYOUT_TWIDDLED &&
5626 src->mem_layout != PVR_MEMLAYOUT_3DTWIDDLED) {
5627 return false;
5628 }
5629 } else if (src->mem_layout == PVR_MEMLAYOUT_TWIDDLED ||
5630 src->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
5631 if (!pvr_validate_source_addr(src->dev_addr))
5632 return false;
5633 } else {
5634 if (src->mem_layout != PVR_MEMLAYOUT_LINEAR)
5635 return false;
5636 }
5637 }
5638
5639 if (dst->mem_layout != PVR_MEMLAYOUT_LINEAR &&
5640 dst->mem_layout != PVR_MEMLAYOUT_TWIDDLED) {
5641 return false;
5642 }
5643
5644 return true;
5645 }
5646
pvr_3d_validate_addr(struct pvr_transfer_cmd * transfer_cmd)5647 static bool pvr_3d_validate_addr(struct pvr_transfer_cmd *transfer_cmd)
5648 {
5649 if (!pvr_supports_texel_unwind(transfer_cmd)) {
5650 return pvr_dev_addr_is_aligned(
5651 transfer_cmd->dst.dev_addr,
5652 PVRX(PBESTATE_STATE_WORD0_ADDRESS_LOW_ALIGNMENT));
5653 }
5654
5655 return true;
5656 }
5657
5658 static void
pvr_submit_info_stream_init(struct pvr_transfer_ctx * ctx,struct pvr_transfer_prep_data * prep_data,struct pvr_winsys_transfer_cmd * cmd)5659 pvr_submit_info_stream_init(struct pvr_transfer_ctx *ctx,
5660 struct pvr_transfer_prep_data *prep_data,
5661 struct pvr_winsys_transfer_cmd *cmd)
5662 {
5663 const struct pvr_winsys_transfer_regs *const regs = &prep_data->state.regs;
5664 const struct pvr_physical_device *const pdevice = ctx->device->pdevice;
5665 const struct pvr_device_info *const dev_info = &pdevice->dev_info;
5666
5667 uint32_t *stream_ptr = (uint32_t *)cmd->fw_stream;
5668 uint32_t *stream_len_ptr = stream_ptr;
5669
5670 /* Leave space for stream header. */
5671 stream_ptr += pvr_cmd_length(KMD_STREAM_HDR);
5672
5673 *(uint64_t *)stream_ptr = regs->pds_bgnd0_base;
5674 stream_ptr += pvr_cmd_length(CR_PDS_BGRND0_BASE);
5675
5676 *(uint64_t *)stream_ptr = regs->pds_bgnd1_base;
5677 stream_ptr += pvr_cmd_length(CR_PDS_BGRND1_BASE);
5678
5679 *(uint64_t *)stream_ptr = regs->pds_bgnd3_sizeinfo;
5680 stream_ptr += pvr_cmd_length(CR_PDS_BGRND3_SIZEINFO);
5681
5682 *(uint64_t *)stream_ptr = regs->isp_mtile_base;
5683 stream_ptr += pvr_cmd_length(CR_ISP_MTILE_BASE);
5684
5685 STATIC_ASSERT(ARRAY_SIZE(regs->pbe_wordx_mrty) == 9U);
5686 STATIC_ASSERT(sizeof(regs->pbe_wordx_mrty[0]) == sizeof(uint64_t));
5687 memcpy(stream_ptr, regs->pbe_wordx_mrty, sizeof(regs->pbe_wordx_mrty));
5688 stream_ptr += 9U * 2U;
5689
5690 *stream_ptr = regs->isp_bgobjvals;
5691 stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS);
5692
5693 *stream_ptr = regs->usc_pixel_output_ctrl;
5694 stream_ptr += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
5695
5696 *stream_ptr = regs->usc_clear_register0;
5697 stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5698
5699 *stream_ptr = regs->usc_clear_register1;
5700 stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5701
5702 *stream_ptr = regs->usc_clear_register2;
5703 stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5704
5705 *stream_ptr = regs->usc_clear_register3;
5706 stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5707
5708 *stream_ptr = regs->isp_mtile_size;
5709 stream_ptr += pvr_cmd_length(CR_ISP_MTILE_SIZE);
5710
5711 *stream_ptr = regs->isp_render_origin;
5712 stream_ptr += pvr_cmd_length(CR_ISP_RENDER_ORIGIN);
5713
5714 *stream_ptr = regs->isp_ctl;
5715 stream_ptr += pvr_cmd_length(CR_ISP_CTL);
5716
5717 *stream_ptr = regs->isp_aa;
5718 stream_ptr += pvr_cmd_length(CR_ISP_AA);
5719
5720 *stream_ptr = regs->event_pixel_pds_info;
5721 stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
5722
5723 *stream_ptr = regs->event_pixel_pds_code;
5724 stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_CODE);
5725
5726 *stream_ptr = regs->event_pixel_pds_data;
5727 stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
5728
5729 *stream_ptr = regs->isp_render;
5730 stream_ptr += pvr_cmd_length(CR_ISP_RENDER);
5731
5732 *stream_ptr = regs->isp_rgn;
5733 stream_ptr++;
5734
5735 if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
5736 *stream_ptr = regs->frag_screen;
5737 stream_ptr++;
5738 }
5739
5740 cmd->fw_stream_len = (uint8_t *)stream_ptr - (uint8_t *)cmd->fw_stream;
5741 assert(cmd->fw_stream_len <= ARRAY_SIZE(cmd->fw_stream));
5742
5743 pvr_csb_pack ((uint64_t *)stream_len_ptr, KMD_STREAM_HDR, value) {
5744 value.length = cmd->fw_stream_len;
5745 }
5746 }
5747
5748 static void
pvr_submit_info_flags_init(const struct pvr_device_info * const dev_info,const struct pvr_transfer_prep_data * const prep_data,struct pvr_winsys_transfer_cmd_flags * flags)5749 pvr_submit_info_flags_init(const struct pvr_device_info *const dev_info,
5750 const struct pvr_transfer_prep_data *const prep_data,
5751 struct pvr_winsys_transfer_cmd_flags *flags)
5752 {
5753 *flags = prep_data->flags;
5754 flags->use_single_core = PVR_HAS_FEATURE(dev_info, gpu_multicore_support);
5755 }
5756
pvr_transfer_job_ws_submit_info_init(struct pvr_transfer_ctx * ctx,struct pvr_transfer_submit * submit,struct vk_sync * wait,struct pvr_winsys_transfer_submit_info * submit_info)5757 static void pvr_transfer_job_ws_submit_info_init(
5758 struct pvr_transfer_ctx *ctx,
5759 struct pvr_transfer_submit *submit,
5760 struct vk_sync *wait,
5761 struct pvr_winsys_transfer_submit_info *submit_info)
5762 {
5763 const struct pvr_device *const device = ctx->device;
5764 const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
5765
5766 submit_info->frame_num = device->global_queue_present_count;
5767 submit_info->job_num = device->global_cmd_buffer_submit_count;
5768 submit_info->wait = wait;
5769 submit_info->cmd_count = submit->prep_count;
5770
5771 for (uint32_t i = 0U; i < submit->prep_count; i++) {
5772 struct pvr_winsys_transfer_cmd *const cmd = &submit_info->cmds[i];
5773 struct pvr_transfer_prep_data *prep_data = &submit->prep_array[i];
5774
5775 pvr_submit_info_stream_init(ctx, prep_data, cmd);
5776 pvr_submit_info_flags_init(dev_info, prep_data, &cmd->flags);
5777 }
5778 }
5779
pvr_submit_transfer(struct pvr_transfer_ctx * ctx,struct pvr_transfer_submit * submit,struct vk_sync * wait,struct vk_sync * signal_sync)5780 static VkResult pvr_submit_transfer(struct pvr_transfer_ctx *ctx,
5781 struct pvr_transfer_submit *submit,
5782 struct vk_sync *wait,
5783 struct vk_sync *signal_sync)
5784 {
5785 struct pvr_winsys_transfer_submit_info submit_info;
5786
5787 pvr_transfer_job_ws_submit_info_init(ctx, submit, wait, &submit_info);
5788
5789 return ctx->device->ws->ops->transfer_submit(ctx->ws_ctx,
5790 &submit_info,
5791 &ctx->device->pdevice->dev_info,
5792 signal_sync);
5793 }
5794
pvr_queue_transfer(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct vk_sync * wait,struct vk_sync * signal_sync)5795 static VkResult pvr_queue_transfer(struct pvr_transfer_ctx *ctx,
5796 struct pvr_transfer_cmd *transfer_cmd,
5797 struct vk_sync *wait,
5798 struct vk_sync *signal_sync)
5799 {
5800 struct pvr_transfer_prep_data *prep_data = NULL;
5801 struct pvr_transfer_prep_data *prev_prep_data;
5802 struct pvr_transfer_submit submit = { 0U };
5803 bool finished = false;
5804 uint32_t pass = 0U;
5805 VkResult result;
5806
5807 /* Transfer queue might decide to do a blit in multiple passes. When the
5808 * prepare doesn't set the finished flag this code will keep calling the
5809 * prepare with increasing pass. If queued transfers are submitted from
5810 * here we submit them straight away. That's why we only need a single
5811 * prepare for the blit rather then one for each pass. Otherwise we insert
5812 * each prepare into the prepare array. When the client does blit batching
5813 * and we split the blit into multiple passes each pass in each queued
5814 * transfer adds one more prepare. Thus the prepare array after 2
5815 * pvr_queue_transfer calls might look like:
5816 *
5817 * +------+------++-------+-------+-------+
5818 * |B0/P0 |B0/P1 || B1/P0 | B1/P1 | B1/P2 |
5819 * +------+------++-------+-------+-------+
5820 * F S/U F S/U
5821 *
5822 * Bn/Pm : nth blit (queue transfer call) / mth prepare
5823 * F : fence point
5824 * S/U : update / server sync update point
5825 */
5826
5827 while (!finished) {
5828 prev_prep_data = prep_data;
5829 prep_data = &submit.prep_array[submit.prep_count++];
5830
5831 /* Clear down the memory before we write to this prep. */
5832 memset(prep_data, 0U, sizeof(*prep_data));
5833
5834 if (pass == 0U) {
5835 if (!pvr_3d_validate_addr(transfer_cmd))
5836 return vk_error(ctx->device, VK_ERROR_FEATURE_NOT_PRESENT);
5837 } else {
5838 /* Transfer queue workarounds could use more than one pass with 3D
5839 * path.
5840 */
5841 prep_data->state = prev_prep_data->state;
5842 }
5843
5844 if (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FAST2D) {
5845 result =
5846 pvr_3d_clip_blit(ctx, transfer_cmd, prep_data, pass, &finished);
5847 } else {
5848 result =
5849 pvr_3d_copy_blit(ctx, transfer_cmd, prep_data, pass, &finished);
5850 }
5851 if (result != VK_SUCCESS)
5852 return result;
5853
5854 /* Submit if we have finished the blit or if we are out of prepares. */
5855 if (finished || submit.prep_count == ARRAY_SIZE(submit.prep_array)) {
5856 result = pvr_submit_transfer(ctx,
5857 &submit,
5858 wait,
5859 finished ? signal_sync : NULL);
5860 if (result != VK_SUCCESS)
5861 return result;
5862
5863 /* Check if we need to reset prep_count. */
5864 if (submit.prep_count == ARRAY_SIZE(submit.prep_array))
5865 submit.prep_count = 0U;
5866 }
5867
5868 pass++;
5869 }
5870
5871 return VK_SUCCESS;
5872 }
5873
pvr_transfer_job_submit(struct pvr_transfer_ctx * ctx,struct pvr_sub_cmd_transfer * sub_cmd,struct vk_sync * wait_sync,struct vk_sync * signal_sync)5874 VkResult pvr_transfer_job_submit(struct pvr_transfer_ctx *ctx,
5875 struct pvr_sub_cmd_transfer *sub_cmd,
5876 struct vk_sync *wait_sync,
5877 struct vk_sync *signal_sync)
5878 {
5879 list_for_each_entry_safe (struct pvr_transfer_cmd,
5880 transfer_cmd,
5881 sub_cmd->transfer_cmds,
5882 link) {
5883 /* The fw guarantees that any kick on the same context will be
5884 * synchronized in submission order. This means only the first kick must
5885 * wait, and only the last kick need signal.
5886 */
5887 struct vk_sync *first_cmd_wait_sync = NULL;
5888 struct vk_sync *last_cmd_signal_sync = NULL;
5889 VkResult result;
5890
5891 if (list_first_entry(sub_cmd->transfer_cmds,
5892 struct pvr_transfer_cmd,
5893 link) == transfer_cmd) {
5894 first_cmd_wait_sync = wait_sync;
5895 }
5896
5897 if (list_last_entry(sub_cmd->transfer_cmds,
5898 struct pvr_transfer_cmd,
5899 link) == transfer_cmd) {
5900 last_cmd_signal_sync = signal_sync;
5901 }
5902
5903 result = pvr_queue_transfer(ctx,
5904 transfer_cmd,
5905 first_cmd_wait_sync,
5906 last_cmd_signal_sync);
5907 if (result != VK_SUCCESS)
5908 return result;
5909 }
5910
5911 return VK_SUCCESS;
5912 }
5913