xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pvr_job_transfer.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <string.h>
29 #include <vulkan/vulkan.h>
30 
31 #include "pvr_csb.h"
32 #include "pvr_csb_enum_helpers.h"
33 #include "pvr_formats.h"
34 #include "pvr_job_common.h"
35 #include "pvr_job_context.h"
36 #include "pvr_job_transfer.h"
37 #include "pvr_private.h"
38 #include "pvr_tex_state.h"
39 #include "pvr_transfer_frag_store.h"
40 #include "pvr_types.h"
41 #include "pvr_uscgen.h"
42 #include "pvr_util.h"
43 #include "pvr_winsys.h"
44 #include "util/bitscan.h"
45 #include "util/list.h"
46 #include "util/macros.h"
47 #include "util/u_math.h"
48 #include "util/xxhash.h"
49 #include "vk_format.h"
50 #include "vk_log.h"
51 #include "vk_sync.h"
52 
53 #define PVR_TRANSFER_MAX_PASSES 10U
54 #define PVR_TRANSFER_MAX_CLIP_RECTS 4U
55 #define PVR_TRANSFER_MAX_PREPARES_PER_SUBMIT 16U
56 #define PVR_TRANSFER_MAX_CUSTOM_RECTS 3U
57 
58 /* Number of triangles sent to the TSP per raster. */
59 #define PVR_TRANSFER_NUM_LAYERS 1U
60 
61 #define PVR_MAX_WIDTH 16384
62 #define PVR_MAX_HEIGHT 16384
63 
64 #define PVR_MAX_CLIP_SIZE(dev_info) \
65    (PVR_HAS_FEATURE(dev_info, screen_size8K) ? 8192U : 16384U)
66 
67 enum pvr_paired_tiles {
68    PVR_PAIRED_TILES_NONE,
69    PVR_PAIRED_TILES_X,
70    PVR_PAIRED_TILES_Y
71 };
72 
73 struct pvr_transfer_wa_source {
74    uint32_t src_offset;
75    uint32_t mapping_count;
76    struct pvr_rect_mapping mappings[PVR_TRANSFER_MAX_CUSTOM_MAPPINGS];
77    bool extend_height;
78 };
79 
80 struct pvr_transfer_pass {
81    uint32_t dst_offset;
82 
83    uint32_t source_count;
84    struct pvr_transfer_wa_source sources[PVR_TRANSFER_MAX_SOURCES];
85 
86    uint32_t clip_rects_count;
87    VkRect2D clip_rects[PVR_TRANSFER_MAX_CLIP_RECTS];
88 };
89 
90 /* Structure representing a layer iteration. */
91 struct pvr_transfer_custom_mapping {
92    bool double_stride;
93    uint32_t texel_unwind_src;
94    uint32_t texel_unwind_dst;
95    uint32_t texel_extend_src;
96    uint32_t texel_extend_dst;
97    uint32_t pass_count;
98    struct pvr_transfer_pass passes[PVR_TRANSFER_MAX_PASSES];
99    uint32_t max_clip_rects;
100    int32_t max_clip_size;
101 };
102 
103 struct pvr_transfer_3d_iteration {
104    uint32_t texture_coords[12];
105 };
106 
107 struct pvr_transfer_3d_state {
108    struct pvr_winsys_transfer_regs regs;
109 
110    bool empty_dst;
111    bool down_scale;
112    /* Write all channels present in the dst from the USC even if those are
113     * constants.
114     */
115    bool dont_force_pbe;
116 
117    /* The rate of the shader. */
118    uint32_t msaa_multiplier;
119    /* Top left corner of the render in ISP tiles. */
120    uint32_t origin_x_in_tiles;
121    /* Top left corner of the render in ISP tiles. */
122    uint32_t origin_y_in_tiles;
123    /* Width of the render in ISP tiles. */
124    uint32_t width_in_tiles;
125    /* Height of the render in ISP tiles. */
126    uint32_t height_in_tiles;
127 
128    /* Width of a sample in registers (pixel partition width). */
129    uint32_t usc_pixel_width;
130 
131    /* Properties of the USC shader. */
132    struct pvr_tq_shader_properties shader_props;
133 
134    /* TODO: Use pvr_dev_addr_t of an offset type for these. */
135    uint32_t pds_shader_task_offset;
136    uint32_t tex_state_data_offset;
137    uint32_t uni_tex_code_offset;
138 
139    uint32_t uniform_data_size;
140    uint32_t tex_state_data_size;
141    uint32_t usc_coeff_regs;
142 
143    /* Pointer into the common store. */
144    uint32_t common_ptr;
145    /* Pointer into the dynamic constant reg buffer. */
146    uint32_t dynamic_const_reg_ptr;
147    /* Pointer into the USC constant reg buffer. */
148    uint32_t usc_const_reg_ptr;
149 
150    uint32_t pds_coeff_task_offset;
151    uint32_t coeff_data_size;
152 
153    /* Number of temporary 32bit registers used by PDS. */
154    uint32_t pds_temps;
155 
156    struct pvr_transfer_custom_mapping custom_mapping;
157    uint32_t pass_idx;
158 
159    enum pvr_filter filter[PVR_TRANSFER_MAX_SOURCES];
160    bool custom_filter;
161 
162    enum pvr_paired_tiles pair_tiles;
163 };
164 
165 struct pvr_transfer_prep_data {
166    struct pvr_winsys_transfer_cmd_flags flags;
167    struct pvr_transfer_3d_state state;
168 };
169 
170 struct pvr_transfer_submit {
171    uint32_t prep_count;
172    struct pvr_transfer_prep_data
173       prep_array[PVR_TRANSFER_MAX_PREPARES_PER_SUBMIT];
174 };
175 
pvr_pbe_src_format_raw(VkFormat format)176 static enum pvr_transfer_pbe_pixel_src pvr_pbe_src_format_raw(VkFormat format)
177 {
178    uint32_t bpp = vk_format_get_blocksizebits(format);
179 
180    if (bpp <= 32U)
181       return PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
182    else if (bpp <= 64U)
183       return PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
184 
185    return PVR_TRANSFER_PBE_PIXEL_SRC_RAW128;
186 }
187 
pvr_pbe_src_format_pick_depth(const VkFormat src_format,const VkFormat dst_format,enum pvr_transfer_pbe_pixel_src * const src_format_out)188 static VkResult pvr_pbe_src_format_pick_depth(
189    const VkFormat src_format,
190    const VkFormat dst_format,
191    enum pvr_transfer_pbe_pixel_src *const src_format_out)
192 {
193    if (dst_format != VK_FORMAT_D24_UNORM_S8_UINT)
194       return VK_ERROR_FORMAT_NOT_SUPPORTED;
195 
196    switch (src_format) {
197    case VK_FORMAT_D24_UNORM_S8_UINT:
198    case VK_FORMAT_X8_D24_UNORM_PACK32:
199       *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8;
200       break;
201 
202    case VK_FORMAT_D32_SFLOAT:
203       *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8;
204       break;
205 
206    default:
207       return VK_ERROR_FORMAT_NOT_SUPPORTED;
208    }
209 
210    return VK_SUCCESS;
211 }
212 
pvr_pbe_src_format_pick_stencil(const VkFormat src_format,const VkFormat dst_format,enum pvr_transfer_pbe_pixel_src * const src_format_out)213 static VkResult pvr_pbe_src_format_pick_stencil(
214    const VkFormat src_format,
215    const VkFormat dst_format,
216    enum pvr_transfer_pbe_pixel_src *const src_format_out)
217 {
218    if ((src_format != VK_FORMAT_D24_UNORM_S8_UINT &&
219         src_format != VK_FORMAT_S8_UINT) ||
220        dst_format != VK_FORMAT_D24_UNORM_S8_UINT) {
221       return VK_ERROR_FORMAT_NOT_SUPPORTED;
222    }
223 
224    if (src_format == VK_FORMAT_S8_UINT)
225       *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8;
226    else
227       *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8;
228 
229    return VK_SUCCESS;
230 }
231 
232 static VkResult
pvr_pbe_src_format_ds(const struct pvr_transfer_cmd_surface * src,const enum pvr_filter filter,const VkFormat dst_format,const uint32_t flags,const bool down_scale,enum pvr_transfer_pbe_pixel_src * src_format_out)233 pvr_pbe_src_format_ds(const struct pvr_transfer_cmd_surface *src,
234                       const enum pvr_filter filter,
235                       const VkFormat dst_format,
236                       const uint32_t flags,
237                       const bool down_scale,
238                       enum pvr_transfer_pbe_pixel_src *src_format_out)
239 {
240    const VkFormat src_format = src->vk_format;
241 
242    const bool src_depth = vk_format_has_depth(src_format);
243    const bool dst_depth = vk_format_has_depth(dst_format);
244    const bool src_stencil = vk_format_has_stencil(src_format);
245    const bool dst_stencil = vk_format_has_stencil(dst_format);
246 
247    if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
248       /* Merging, so destination should always have both. */
249       if (!dst_depth || !dst_stencil)
250          return VK_ERROR_FORMAT_NOT_SUPPORTED;
251 
252       if (flags & PVR_TRANSFER_CMD_FLAGS_PICKD) {
253          return pvr_pbe_src_format_pick_depth(src_format,
254                                               dst_format,
255                                               src_format_out);
256       } else {
257          return pvr_pbe_src_format_pick_stencil(src_format,
258                                                 dst_format,
259                                                 src_format_out);
260       }
261    }
262 
263    /* We can't invent channels out of nowhere. */
264    if ((dst_depth && !src_depth) || (dst_stencil && !src_stencil))
265       return VK_ERROR_FORMAT_NOT_SUPPORTED;
266 
267    switch (dst_format) {
268    case VK_FORMAT_D16_UNORM:
269       if (src_format == VK_FORMAT_D24_UNORM_S8_UINT)
270          return VK_ERROR_FORMAT_NOT_SUPPORTED;
271 
272       if (!down_scale)
273          *src_format_out = pvr_pbe_src_format_raw(dst_format);
274       else
275          *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
276 
277       break;
278    case VK_FORMAT_D24_UNORM_S8_UINT:
279       switch (src_format) {
280       case VK_FORMAT_D24_UNORM_S8_UINT:
281          if (filter == PVR_FILTER_LINEAR)
282             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_D24S8;
283          else
284             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
285 
286          break;
287 
288       /* D16_UNORM results in a 0.0->1.0 float from the TPU, the same as D32 */
289       case VK_FORMAT_D16_UNORM:
290       case VK_FORMAT_D32_SFLOAT:
291          *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8;
292          break;
293 
294       default:
295          if (filter == PVR_FILTER_LINEAR)
296             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_D32S8;
297          else
298             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
299       }
300 
301       break;
302 
303    case VK_FORMAT_D32_SFLOAT:
304       if (src_format == VK_FORMAT_D24_UNORM_S8_UINT)
305          *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32;
306       else
307          *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32;
308 
309       break;
310 
311    default:
312       if (src_format == VK_FORMAT_D24_UNORM_S8_UINT)
313          *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB;
314       else
315          *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_RAW32;
316    }
317 
318    return VK_SUCCESS;
319 }
320 
321 /**
322  * How the PBE expects the output buffer for an RGBA space conversion.
323  */
324 static VkResult
pvr_pbe_src_format_normal(VkFormat src_format,VkFormat dst_format,bool down_scale,bool dont_force_pbe,enum pvr_transfer_pbe_pixel_src * src_format_out)325 pvr_pbe_src_format_normal(VkFormat src_format,
326                           VkFormat dst_format,
327                           bool down_scale,
328                           bool dont_force_pbe,
329                           enum pvr_transfer_pbe_pixel_src *src_format_out)
330 {
331    bool dst_signed = vk_format_is_sint(dst_format) ||
332                      vk_format_is_snorm(dst_format);
333 
334    if (vk_format_is_int(dst_format)) {
335       uint32_t red_width;
336       bool src_signed;
337       uint32_t count;
338 
339       if (!vk_format_is_int(src_format))
340          return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
341 
342       src_signed = vk_format_is_sint(src_format);
343 
344       red_width = vk_format_get_component_bits(dst_format,
345                                                UTIL_FORMAT_COLORSPACE_RGB,
346                                                0);
347 
348       switch (red_width) {
349       case 8:
350          if (!src_signed && !dst_signed)
351             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_UU8888;
352          else if (src_signed && !dst_signed)
353             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SU8888;
354          else if (!src_signed && dst_signed)
355             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_US8888;
356          else
357             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SS8888;
358 
359          break;
360 
361       case 10:
362          switch (dst_format) {
363          case VK_FORMAT_A2B10G10R10_UINT_PACK32:
364             *src_format_out = src_signed ? PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102
365                                          : PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102;
366             break;
367 
368          case VK_FORMAT_A2R10G10B10_UINT_PACK32:
369             *src_format_out = src_signed
370                                  ? PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102
371                                  : PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102;
372             break;
373 
374          default:
375             return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
376          }
377          break;
378 
379       case 16:
380          if (!src_signed && !dst_signed)
381             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16;
382          else if (src_signed && !dst_signed)
383             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16;
384          else if (!src_signed && dst_signed)
385             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_US16S16;
386          else
387             *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16;
388 
389          break;
390 
391       case 32:
392          if (dont_force_pbe) {
393             count = vk_format_get_blocksizebits(dst_format) / 32U;
394          } else {
395             count =
396                pvr_vk_format_get_common_color_channel_count(src_format, dst_format);
397          }
398 
399          if (!src_signed && !dst_signed) {
400             *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_RAW128
401                                            : PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
402          } else if (src_signed && !dst_signed) {
403             *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32
404                                            : PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32;
405          } else if (!src_signed && dst_signed) {
406             *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32
407                                            : PVR_TRANSFER_PBE_PIXEL_SRC_US32S32;
408          } else {
409             *src_format_out = (count > 2U) ? PVR_TRANSFER_PBE_PIXEL_SRC_RAW128
410                                            : PVR_TRANSFER_PBE_PIXEL_SRC_RAW64;
411          }
412          break;
413 
414       default:
415          return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
416       }
417 
418    } else if (vk_format_is_float(dst_format) ||
419               pvr_vk_format_is_fully_normalized(dst_format)) {
420       bool is_float = true;
421 
422       if (!vk_format_is_float(src_format) &&
423           !pvr_vk_format_is_fully_normalized(src_format) &&
424           !vk_format_is_block_compressed(src_format)) {
425          return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
426       }
427 
428       if (pvr_vk_format_is_fully_normalized(dst_format)) {
429          uint32_t chan_width;
430 
431          is_float = false;
432 
433          /* Alpha only. */
434          switch (dst_format) {
435          case VK_FORMAT_D16_UNORM:
436             chan_width = 16;
437             break;
438 
439          default:
440             chan_width =
441                vk_format_get_component_bits(dst_format,
442                                             UTIL_FORMAT_COLORSPACE_RGB,
443                                             0U);
444             break;
445          }
446 
447          if (src_format == dst_format) {
448             switch (chan_width) {
449             case 16U:
450                if (down_scale) {
451                   *src_format_out = dst_signed
452                                        ? PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM
453                                        : PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
454                } else {
455                   *src_format_out = dst_signed
456                                        ? PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16
457                                        : PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16;
458                }
459                break;
460 
461             case 32U:
462                *src_format_out = pvr_pbe_src_format_raw(dst_format);
463                break;
464             default:
465                is_float = true;
466                break;
467             }
468          } else {
469             switch (chan_width) {
470             case 16U:
471                *src_format_out = dst_signed
472                                     ? PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM
473                                     : PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM;
474                break;
475             default:
476                is_float = true;
477                break;
478             }
479          }
480       }
481 
482       if (is_float) {
483          if (pvr_vk_format_has_32bit_component(dst_format)) {
484             uint32_t count;
485 
486             if (dont_force_pbe) {
487                count = vk_format_get_blocksizebits(dst_format) / 32U;
488             } else {
489                count = pvr_vk_format_get_common_color_channel_count(src_format,
490                                                                 dst_format);
491             }
492 
493             switch (count) {
494             case 1U:
495                *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32;
496                break;
497             case 2U:
498                *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32X2;
499                break;
500             default:
501                *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F32X4;
502                break;
503             }
504          } else {
505             if (dst_format == VK_FORMAT_B8G8R8A8_UNORM ||
506                 dst_format == VK_FORMAT_R8G8B8A8_UNORM ||
507                 dst_format == VK_FORMAT_A8B8G8R8_UNORM_PACK32) {
508                *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8;
509             } else {
510                *src_format_out = PVR_TRANSFER_PBE_PIXEL_SRC_F16F16;
511             }
512          }
513       }
514    } else {
515       return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
516    }
517 
518    return VK_SUCCESS;
519 }
520 
521 static inline uint32_t
pvr_get_blit_flags(const struct pvr_transfer_cmd * transfer_cmd)522 pvr_get_blit_flags(const struct pvr_transfer_cmd *transfer_cmd)
523 {
524    return transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FAST2D
525              ? 0
526              : transfer_cmd->flags;
527 }
528 
pvr_pbe_src_format(struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,struct pvr_tq_shader_properties * prop)529 static VkResult pvr_pbe_src_format(struct pvr_transfer_cmd *transfer_cmd,
530                                    struct pvr_transfer_3d_state *state,
531                                    struct pvr_tq_shader_properties *prop)
532 {
533    struct pvr_tq_layer_properties *layer = &prop->layer_props;
534    const enum pvr_filter filter = transfer_cmd->source_count
535                                      ? transfer_cmd->sources[0].filter
536                                      : PVR_FILTER_POINT;
537    const uint32_t flags = transfer_cmd->flags;
538    VkFormat dst_format = transfer_cmd->dst.vk_format;
539    const struct pvr_transfer_cmd_surface *src;
540    VkFormat src_format;
541    bool down_scale;
542 
543    if (transfer_cmd->source_count > 0) {
544       src = &transfer_cmd->sources[0].surface;
545       down_scale = transfer_cmd->sources[0].resolve_op == PVR_RESOLVE_BLEND &&
546                    transfer_cmd->sources[0].surface.sample_count > 1U &&
547                    transfer_cmd->dst.sample_count <= 1U;
548    } else {
549       src = &transfer_cmd->dst;
550       down_scale = false;
551    }
552 
553    src_format = src->vk_format;
554 
555    /* This has to come before the rest as S8 for instance is integer and
556     * signedness check fails on D24S8.
557     */
558    if (vk_format_is_depth_or_stencil(src_format) ||
559        vk_format_is_depth_or_stencil(dst_format) ||
560        flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
561       return pvr_pbe_src_format_ds(src,
562                                    filter,
563                                    dst_format,
564                                    flags,
565                                    down_scale,
566                                    &layer->pbe_format);
567    }
568 
569    return pvr_pbe_src_format_normal(src_format,
570                                     dst_format,
571                                     down_scale,
572                                     state->dont_force_pbe,
573                                     &layer->pbe_format);
574 }
575 
pvr_setup_hwbg_object(const struct pvr_device_info * dev_info,struct pvr_transfer_3d_state * state)576 static inline void pvr_setup_hwbg_object(const struct pvr_device_info *dev_info,
577                                          struct pvr_transfer_3d_state *state)
578 {
579    struct pvr_winsys_transfer_regs *regs = &state->regs;
580 
581    pvr_csb_pack (&regs->pds_bgnd0_base, CR_PDS_BGRND0_BASE, reg) {
582       reg.shader_addr = PVR_DEV_ADDR(state->pds_shader_task_offset);
583       assert(pvr_dev_addr_is_aligned(
584          reg.shader_addr,
585          PVRX(CR_PDS_BGRND0_BASE_SHADER_ADDR_ALIGNMENT)));
586       reg.texunicode_addr = PVR_DEV_ADDR(state->uni_tex_code_offset);
587       assert(pvr_dev_addr_is_aligned(
588          reg.texunicode_addr,
589          PVRX(CR_PDS_BGRND0_BASE_TEXUNICODE_ADDR_ALIGNMENT)));
590    }
591 
592    pvr_csb_pack (&regs->pds_bgnd1_base, CR_PDS_BGRND1_BASE, reg) {
593       reg.texturedata_addr = PVR_DEV_ADDR(state->tex_state_data_offset);
594       assert(pvr_dev_addr_is_aligned(
595          reg.texturedata_addr,
596          PVRX(CR_PDS_BGRND1_BASE_TEXTUREDATA_ADDR_ALIGNMENT)));
597    }
598 
599    /* BGRND 2 not needed, background object PDS doesn't use uniform program. */
600 
601    pvr_csb_pack (&regs->pds_bgnd3_sizeinfo, CR_PDS_BGRND3_SIZEINFO, reg) {
602       reg.usc_sharedsize =
603          DIV_ROUND_UP(state->common_ptr,
604                       PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE));
605 
606       assert(!(state->uniform_data_size &
607                (PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_UNIFORMSIZE_UNIT_SIZE) - 1)));
608       reg.pds_uniformsize =
609          state->uniform_data_size /
610          PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_UNIFORMSIZE_UNIT_SIZE);
611 
612       assert(
613          !(state->tex_state_data_size &
614            (PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE) - 1)));
615       reg.pds_texturestatesize =
616          state->tex_state_data_size /
617          PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE);
618 
619       reg.pds_tempsize =
620          DIV_ROUND_UP(state->pds_temps,
621                       PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE));
622    }
623 }
624 
625 static inline bool
pvr_is_surface_aligned(pvr_dev_addr_t dev_addr,bool is_input,uint32_t bpp)626 pvr_is_surface_aligned(pvr_dev_addr_t dev_addr, bool is_input, uint32_t bpp)
627 {
628    /* 96 bpp is 32 bit granular. */
629    if (bpp == 64U || bpp == 128U) {
630       uint64_t mask = (uint64_t)((bpp >> 3U) - 1U);
631 
632       if ((dev_addr.addr & mask) != 0ULL)
633          return false;
634    }
635 
636    if (is_input) {
637       if ((dev_addr.addr &
638            (PVRX(TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_ALIGNMENT) - 1U)) !=
639           0ULL) {
640          return false;
641       }
642    } else {
643       if ((dev_addr.addr &
644            (PVRX(PBESTATE_STATE_WORD0_ADDRESS_LOW_ALIGNMENT) - 1U)) != 0ULL) {
645          return false;
646       }
647    }
648 
649    return true;
650 }
651 
652 static inline VkResult
pvr_mem_layout_spec(const struct pvr_transfer_cmd_surface * surface,uint32_t load,bool is_input,uint32_t * width_out,uint32_t * height_out,uint32_t * stride_out,enum pvr_memlayout * mem_layout_out,pvr_dev_addr_t * dev_addr_out)653 pvr_mem_layout_spec(const struct pvr_transfer_cmd_surface *surface,
654                     uint32_t load,
655                     bool is_input,
656                     uint32_t *width_out,
657                     uint32_t *height_out,
658                     uint32_t *stride_out,
659                     enum pvr_memlayout *mem_layout_out,
660                     pvr_dev_addr_t *dev_addr_out)
661 {
662    const uint32_t bpp = vk_format_get_blocksizebits(surface->vk_format);
663    uint32_t unsigned_stride;
664 
665    *mem_layout_out = surface->mem_layout;
666    *height_out = surface->height;
667    *width_out = surface->width;
668    *stride_out = surface->stride;
669    *dev_addr_out = surface->dev_addr;
670 
671    if (surface->mem_layout != PVR_MEMLAYOUT_LINEAR &&
672        !pvr_is_surface_aligned(*dev_addr_out, is_input, bpp)) {
673       return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
674    }
675 
676    switch (surface->mem_layout) {
677    case PVR_MEMLAYOUT_LINEAR:
678       if (surface->stride == 0U)
679          return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
680 
681       unsigned_stride = *stride_out;
682 
683       if (!pvr_is_surface_aligned(*dev_addr_out, is_input, bpp))
684          return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
685 
686       if (unsigned_stride < *width_out)
687          return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
688 
689       if (!is_input) {
690          if (unsigned_stride == 1U) {
691             /* Change the setup to twiddling as that doesn't hit the stride
692              * limit and twiddled == strided when 1px stride.
693              */
694             *mem_layout_out = PVR_MEMLAYOUT_TWIDDLED;
695          }
696       }
697 
698       *stride_out = unsigned_stride;
699       break;
700 
701    case PVR_MEMLAYOUT_TWIDDLED:
702    case PVR_MEMLAYOUT_3DTWIDDLED:
703       /* Ignoring stride value for twiddled/tiled surface. */
704       *stride_out = *width_out;
705       break;
706 
707    default:
708       return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
709    }
710 
711    return VK_SUCCESS;
712 }
713 
714 static VkResult
pvr_pbe_setup_codegen_defaults(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,struct pvr_pbe_surf_params * surface_params,struct pvr_pbe_render_params * render_params)715 pvr_pbe_setup_codegen_defaults(const struct pvr_device_info *dev_info,
716                                const struct pvr_transfer_cmd *transfer_cmd,
717                                struct pvr_transfer_3d_state *state,
718                                struct pvr_pbe_surf_params *surface_params,
719                                struct pvr_pbe_render_params *render_params)
720 {
721    const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
722    const uint8_t *swizzle;
723    VkFormat format;
724    VkResult result;
725 
726    switch (dst->vk_format) {
727    case VK_FORMAT_D24_UNORM_S8_UINT:
728    case VK_FORMAT_X8_D24_UNORM_PACK32:
729       format = VK_FORMAT_R32_UINT;
730       break;
731 
732    default:
733       format = dst->vk_format;
734       break;
735    }
736 
737    swizzle = pvr_get_format_swizzle(format);
738    memcpy(surface_params->swizzle, swizzle, sizeof(surface_params->swizzle));
739 
740    pvr_pbe_get_src_format_and_gamma(format,
741                                     PVR_PBE_GAMMA_NONE,
742                                     false,
743                                     &surface_params->source_format,
744                                     &surface_params->gamma);
745 
746    surface_params->is_normalized = pvr_vk_format_is_fully_normalized(format);
747    surface_params->pbe_packmode = pvr_get_pbe_packmode(format);
748    surface_params->nr_components = vk_format_get_nr_components(format);
749 
750    result = pvr_mem_layout_spec(dst,
751                                 0U,
752                                 false,
753                                 &surface_params->width,
754                                 &surface_params->height,
755                                 &surface_params->stride,
756                                 &surface_params->mem_layout,
757                                 &surface_params->addr);
758    if (result != VK_SUCCESS)
759       return result;
760 
761    surface_params->z_only_render = false;
762    surface_params->depth = dst->depth;
763    surface_params->down_scale = state->down_scale;
764 
765    if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
766       render_params->slice = (uint32_t)MAX2(dst->z_position, 0.0f);
767    else
768       render_params->slice = 0U;
769 
770    uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
771    uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
772 
773    /* If the rectangle happens to be empty / off-screen we clip away
774     * everything.
775     */
776    if (state->empty_dst) {
777       render_params->min_x_clip = 2U * tile_size_x;
778       render_params->max_x_clip = 3U * tile_size_x;
779       render_params->min_y_clip = 2U * tile_size_y;
780       render_params->max_y_clip = 3U * tile_size_y;
781       state->origin_x_in_tiles = 0U;
782       state->origin_y_in_tiles = 0U;
783       state->height_in_tiles = 1U;
784       state->width_in_tiles = 1U;
785    } else {
786       const VkRect2D *scissor = &transfer_cmd->scissor;
787 
788       /* Clamp */
789       render_params->min_x_clip =
790          MAX2(MIN2(scissor->offset.x, (int32_t)surface_params->width), 0U);
791       render_params->max_x_clip =
792          MAX2(MIN2(scissor->offset.x + scissor->extent.width,
793                    (int32_t)surface_params->width),
794               0U) -
795          1U;
796 
797       render_params->min_y_clip =
798          MAX2(MIN2(scissor->offset.y, surface_params->height), 0U);
799       render_params->max_y_clip =
800          MAX2(MIN2(scissor->offset.y + scissor->extent.height,
801                    surface_params->height),
802               0U) -
803          1U;
804 
805       if (state->custom_mapping.pass_count > 0U) {
806          struct pvr_transfer_pass *pass =
807             &state->custom_mapping.passes[state->pass_idx];
808 
809          render_params->min_x_clip = (uint32_t)pass->clip_rects[0U].offset.x;
810          render_params->max_x_clip =
811             (uint32_t)(pass->clip_rects[0U].offset.x +
812                        pass->clip_rects[0U].extent.width) -
813             1U;
814          render_params->min_y_clip = (uint32_t)pass->clip_rects[0U].offset.y;
815          render_params->max_y_clip =
816             (uint32_t)(pass->clip_rects[0U].offset.y +
817                        pass->clip_rects[0U].extent.height) -
818             1U;
819       }
820 
821       state->origin_x_in_tiles = render_params->min_x_clip / tile_size_x;
822       state->origin_y_in_tiles = render_params->min_y_clip / tile_size_y;
823       state->width_in_tiles =
824          (render_params->max_x_clip + tile_size_x) / tile_size_x;
825       state->height_in_tiles =
826          (render_params->max_y_clip + tile_size_y) / tile_size_y;
827 
828       /* Be careful here as this isn't the same as ((max_x_clip -
829        * min_x_clip) + tile_size_x) >> tile_size_x.
830        */
831       state->width_in_tiles -= state->origin_x_in_tiles;
832       state->height_in_tiles -= state->origin_y_in_tiles;
833    }
834 
835    render_params->source_start = PVR_PBE_STARTPOS_BIT0;
836    render_params->mrt_index = 0U;
837 
838    return VK_SUCCESS;
839 }
840 
841 static VkResult
pvr_pbe_setup_modify_defaults(const struct pvr_transfer_cmd_surface * dst,struct pvr_transfer_3d_state * state,uint32_t rt_idx,struct pvr_pbe_surf_params * surf_params,struct pvr_pbe_render_params * render_params)842 pvr_pbe_setup_modify_defaults(const struct pvr_transfer_cmd_surface *dst,
843                               struct pvr_transfer_3d_state *state,
844                               uint32_t rt_idx,
845                               struct pvr_pbe_surf_params *surf_params,
846                               struct pvr_pbe_render_params *render_params)
847 {
848    struct pvr_transfer_pass *pass;
849    VkRect2D *clip_rect;
850 
851    render_params->mrt_index = rt_idx;
852 
853    assert(rt_idx > 0 && rt_idx <= PVR_TRANSFER_MAX_RENDER_TARGETS);
854 
855    if (state->custom_mapping.pass_count == 0)
856       return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
857 
858    pass = &state->custom_mapping.passes[state->pass_idx];
859 
860    assert(rt_idx < PVR_TRANSFER_MAX_CUSTOM_RECTS);
861 
862    clip_rect = &pass->clip_rects[rt_idx];
863 
864    render_params->min_x_clip = (uint32_t)clip_rect->offset.x;
865    render_params->max_x_clip =
866       (uint32_t)clip_rect->offset.x + clip_rect->extent.width - 1U;
867    render_params->min_y_clip = (uint32_t)clip_rect->offset.y;
868    render_params->max_y_clip =
869       (uint32_t)clip_rect->offset.y + clip_rect->extent.height - 1U;
870 
871    return VK_SUCCESS;
872 }
873 
874 static uint32_t
pvr_pbe_get_pixel_size(enum pvr_transfer_pbe_pixel_src pixel_format)875 pvr_pbe_get_pixel_size(enum pvr_transfer_pbe_pixel_src pixel_format)
876 {
877    switch (pixel_format) {
878    case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32:
879    case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
880    case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8:
881    case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
882    case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
883    case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
884    case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
885    case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
886    case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
887    case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
888    case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
889    case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
890    case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
891    case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
892    case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
893    case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
894    case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
895    case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
896    case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
897       return 1U;
898 
899    case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
900    case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
901    case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
902    case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45:
903    case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
904    case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
905    case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
906    case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
907    case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
908    case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
909    case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
910    case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
911    case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
912    case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
913    case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
914    case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
915       return 2U;
916 
917    case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
918    case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
919    case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
920    case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
921       return 4U;
922 
923    case PVR_TRANSFER_PBE_PIXEL_SRC_NUM:
924    default:
925       break;
926    }
927 
928    return 0U;
929 }
930 
pvr_pbe_setup_swizzle(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,struct pvr_pbe_surf_params * surf_params)931 static void pvr_pbe_setup_swizzle(const struct pvr_transfer_cmd *transfer_cmd,
932                                   struct pvr_transfer_3d_state *state,
933                                   struct pvr_pbe_surf_params *surf_params)
934 {
935    bool color_fill = !!(transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL);
936    const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
937 
938    const uint32_t pixel_size =
939       pvr_pbe_get_pixel_size(state->shader_props.layer_props.pbe_format);
940 
941    state->usc_pixel_width = MAX2(pixel_size, 1U);
942 
943    switch (dst->vk_format) {
944    case VK_FORMAT_X8_D24_UNORM_PACK32:
945    case VK_FORMAT_D24_UNORM_S8_UINT:
946    case VK_FORMAT_S8_UINT:
947       surf_params->swizzle[0U] = PIPE_SWIZZLE_X;
948       surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
949       surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
950       surf_params->swizzle[3U] = PIPE_SWIZZLE_0;
951       break;
952 
953    default: {
954       const uint32_t red_width =
955          vk_format_get_component_bits(dst->vk_format,
956                                       UTIL_FORMAT_COLORSPACE_RGB,
957                                       0U);
958 
959       if (transfer_cmd->source_count > 0 &&
960           vk_format_is_alpha(dst->vk_format)) {
961          if (vk_format_has_alpha(transfer_cmd->sources[0].surface.vk_format)) {
962             /* Modify the destination format swizzle to always source from
963              * src0.
964              */
965             surf_params->swizzle[0U] = PIPE_SWIZZLE_X;
966             surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
967             surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
968             surf_params->swizzle[3U] = PIPE_SWIZZLE_1;
969             break;
970          }
971 
972          /* Source format having no alpha channel still allocates 4 output
973           * buffer registers.
974           */
975       }
976 
977       if (pvr_vk_format_is_fully_normalized(dst->vk_format)) {
978          if (color_fill &&
979              (dst->vk_format == VK_FORMAT_B8G8R8A8_UNORM ||
980               dst->vk_format == VK_FORMAT_R8G8B8A8_UNORM ||
981               dst->vk_format == VK_FORMAT_A8B8G8R8_UNORM_PACK32)) {
982             surf_params->source_format =
983                PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
984          } else if (state->shader_props.layer_props.pbe_format ==
985                     PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8) {
986             surf_params->source_format =
987                PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
988          } else if (red_width <= 8U) {
989             surf_params->source_format =
990                PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
991          }
992       } else if (red_width == 32U && !state->dont_force_pbe) {
993          uint32_t count = 0U;
994 
995          for (uint32_t i = 0; i < transfer_cmd->source_count; i++) {
996             VkFormat src_format = transfer_cmd->sources[i].surface.vk_format;
997             uint32_t tmp;
998 
999             tmp = pvr_vk_format_get_common_color_channel_count(src_format,
1000                                                            dst->vk_format);
1001 
1002             count = MAX2(count, tmp);
1003          }
1004 
1005          switch (count) {
1006          case 1U:
1007             surf_params->swizzle[1U] = PIPE_SWIZZLE_0;
1008             FALLTHROUGH;
1009          case 2U:
1010             surf_params->swizzle[2U] = PIPE_SWIZZLE_0;
1011             FALLTHROUGH;
1012          case 3U:
1013             surf_params->swizzle[3U] = PIPE_SWIZZLE_1;
1014             break;
1015 
1016          case 4U:
1017          default:
1018             break;
1019          }
1020       }
1021       break;
1022    }
1023    }
1024 }
1025 
1026 /**
1027  * Calculates the required PBE byte mask based on the incoming transfer command.
1028  *
1029  * @param transfer_cmd  the transfer command
1030  * @return the bytemask (active high disable mask)
1031  */
1032 
pvr_pbe_byte_mask(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd)1033 static uint64_t pvr_pbe_byte_mask(const struct pvr_device_info *dev_info,
1034                                   const struct pvr_transfer_cmd *transfer_cmd)
1035 {
1036    uint32_t flags = pvr_get_blit_flags(transfer_cmd);
1037 
1038    assert(PVR_HAS_ERN(dev_info, 42064));
1039 
1040    if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
1041       uint32_t mask = 0U;
1042 
1043       switch (transfer_cmd->dst.vk_format) {
1044       case VK_FORMAT_D32_SFLOAT_S8_UINT:
1045          mask = 0xF0F0F0F0U;
1046          break;
1047       case VK_FORMAT_D24_UNORM_S8_UINT:
1048          mask = 0x88888888U;
1049          break;
1050       default:
1051          break;
1052       }
1053 
1054       if ((flags & PVR_TRANSFER_CMD_FLAGS_PICKD) == 0U)
1055          mask = ~mask;
1056 
1057       return mask;
1058    }
1059 
1060    /* The mask is as it was inactive on cores without the ERN. This keeps the
1061     * firmware agnostic to the feature.
1062     */
1063    return 0U;
1064 }
1065 
pvr_pbe_setup_emit(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_ctx * ctx,struct pvr_transfer_3d_state * state,uint32_t rt_count,uint32_t * pbe_setup_words)1066 static VkResult pvr_pbe_setup_emit(const struct pvr_transfer_cmd *transfer_cmd,
1067                                    struct pvr_transfer_ctx *ctx,
1068                                    struct pvr_transfer_3d_state *state,
1069                                    uint32_t rt_count,
1070                                    uint32_t *pbe_setup_words)
1071 {
1072    struct pvr_device *const device = ctx->device;
1073    const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
1074 
1075    struct pvr_winsys_transfer_regs *regs = &state->regs;
1076    struct pvr_pds_event_program program = {
1077       .emit_words = pbe_setup_words,
1078       .num_emit_word_pairs = rt_count,
1079    };
1080    struct pvr_pds_upload pds_upload;
1081    uint32_t staging_buffer_size;
1082    uint32_t *staging_buffer;
1083    pvr_dev_addr_t addr;
1084    VkResult result;
1085 
1086    /* Precondition, make sure to use a valid index for ctx->usc_eot_bos. */
1087    assert(rt_count <= ARRAY_SIZE(ctx->usc_eot_bos));
1088    assert(rt_count > 0U);
1089 
1090    addr.addr = ctx->usc_eot_bos[rt_count - 1U]->dev_addr.addr -
1091                device->heaps.usc_heap->base_addr.addr;
1092 
1093    pvr_pds_setup_doutu(&program.task_control,
1094                        addr.addr,
1095                        0U,
1096                        PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1097                        false);
1098 
1099    pvr_pds_set_sizes_pixel_event(&program, dev_info);
1100 
1101    staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1102 
1103    staging_buffer = vk_alloc(&device->vk.alloc,
1104                              staging_buffer_size,
1105                              8U,
1106                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1107    if (!staging_buffer)
1108       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1109 
1110    pvr_pds_generate_pixel_event_data_segment(&program,
1111                                              staging_buffer,
1112                                              dev_info);
1113 
1114    /* TODO: We can save some memory by generating a code segment for each
1115     * rt_count, which at the time of writing is a maximum of 3, in
1116     * pvr_setup_transfer_eot_shaders() when we setup the corresponding EOT
1117     * USC programs.
1118     */
1119    pvr_pds_generate_pixel_event_code_segment(&program,
1120                                              staging_buffer + program.data_size,
1121                                              dev_info);
1122 
1123    result =
1124       pvr_cmd_buffer_upload_pds(transfer_cmd->cmd_buffer,
1125                                 staging_buffer,
1126                                 program.data_size,
1127                                 PVRX(CR_EVENT_PIXEL_PDS_DATA_ADDR_ALIGNMENT),
1128                                 staging_buffer + program.data_size,
1129                                 program.code_size,
1130                                 PVRX(CR_EVENT_PIXEL_PDS_CODE_ADDR_ALIGNMENT),
1131                                 PVRX(CR_EVENT_PIXEL_PDS_DATA_ADDR_ALIGNMENT),
1132                                 &pds_upload);
1133    vk_free(&device->vk.alloc, staging_buffer);
1134    if (result != VK_SUCCESS)
1135       return result;
1136 
1137    pvr_csb_pack (&regs->event_pixel_pds_info, CR_EVENT_PIXEL_PDS_INFO, reg) {
1138       reg.temp_stride = 0U;
1139       reg.const_size =
1140          DIV_ROUND_UP(program.data_size,
1141                       PVRX(CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE));
1142       reg.usc_sr_size =
1143          DIV_ROUND_UP(rt_count * PVR_STATE_PBE_DWORDS,
1144                       PVRX(CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE));
1145    }
1146 
1147    pvr_csb_pack (&regs->event_pixel_pds_data, CR_EVENT_PIXEL_PDS_DATA, reg) {
1148       reg.addr = PVR_DEV_ADDR(pds_upload.data_offset);
1149    }
1150 
1151    pvr_csb_pack (&regs->event_pixel_pds_code, CR_EVENT_PIXEL_PDS_CODE, reg) {
1152       reg.addr = PVR_DEV_ADDR(pds_upload.code_offset);
1153    }
1154 
1155    return VK_SUCCESS;
1156 }
1157 
pvr_pbe_setup(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_ctx * ctx,struct pvr_transfer_3d_state * state)1158 static VkResult pvr_pbe_setup(const struct pvr_transfer_cmd *transfer_cmd,
1159                               struct pvr_transfer_ctx *ctx,
1160                               struct pvr_transfer_3d_state *state)
1161 {
1162    struct pvr_device *const device = ctx->device;
1163    const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
1164 
1165    const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
1166    uint32_t num_rts = vk_format_get_plane_count(dst->vk_format);
1167    uint32_t pbe_setup_words[PVR_TRANSFER_MAX_RENDER_TARGETS *
1168                             ROGUE_NUM_PBESTATE_STATE_WORDS];
1169    struct pvr_pbe_render_params render_params;
1170    struct pvr_pbe_surf_params surf_params;
1171    VkResult result;
1172 
1173    if (state->custom_mapping.pass_count > 0U)
1174       num_rts = state->custom_mapping.passes[state->pass_idx].clip_rects_count;
1175 
1176    if (PVR_HAS_FEATURE(dev_info, paired_tiles))
1177       state->pair_tiles = PVR_PAIRED_TILES_NONE;
1178 
1179    for (uint32_t i = 0U; i < num_rts; i++) {
1180       uint64_t *pbe_regs;
1181       uint32_t *pbe_words;
1182 
1183       /* Ensure the access into the pbe_wordx_mrty is made within its bounds. */
1184       assert(i * ROGUE_NUM_PBESTATE_REG_WORDS_FOR_TRANSFER <
1185              ARRAY_SIZE(state->regs.pbe_wordx_mrty));
1186       /* Ensure the access into pbe_setup_words is made within its bounds. */
1187       assert(i * ROGUE_NUM_PBESTATE_STATE_WORDS < ARRAY_SIZE(pbe_setup_words));
1188 
1189       pbe_regs =
1190          &state->regs
1191              .pbe_wordx_mrty[i * ROGUE_NUM_PBESTATE_REG_WORDS_FOR_TRANSFER];
1192       pbe_words = &pbe_setup_words[i * ROGUE_NUM_PBESTATE_STATE_WORDS];
1193 
1194       if (PVR_HAS_ERN(dev_info, 42064))
1195          pbe_regs[2U] = 0UL;
1196 
1197       if (i == 0U) {
1198          result = pvr_pbe_setup_codegen_defaults(dev_info,
1199                                                  transfer_cmd,
1200                                                  state,
1201                                                  &surf_params,
1202                                                  &render_params);
1203          if (result != VK_SUCCESS)
1204             return result;
1205       } else {
1206          result = pvr_pbe_setup_modify_defaults(dst,
1207                                                 state,
1208                                                 i,
1209                                                 &surf_params,
1210                                                 &render_params);
1211          if (result != VK_SUCCESS)
1212             return result;
1213       }
1214 
1215       pvr_pbe_setup_swizzle(transfer_cmd, state, &surf_params);
1216 
1217       pvr_pbe_pack_state(dev_info,
1218                          &surf_params,
1219                          &render_params,
1220                          pbe_words,
1221                          pbe_regs);
1222 
1223       if (PVR_HAS_ERN(dev_info, 42064)) {
1224          uint64_t temp_reg;
1225 
1226          pvr_csb_pack (&temp_reg, PBESTATE_REG_WORD2, reg) {
1227             reg.sw_bytemask = pvr_pbe_byte_mask(dev_info, transfer_cmd);
1228          }
1229 
1230          pbe_regs[2U] |= temp_reg;
1231       }
1232 
1233       if (PVR_HAS_FEATURE(dev_info, paired_tiles)) {
1234          if (pbe_regs[2U] &
1235              (1ULL << PVRX(PBESTATE_REG_WORD2_PAIR_TILES_SHIFT))) {
1236             if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_TWIDDLED)
1237                state->pair_tiles = PVR_PAIRED_TILES_Y;
1238             else
1239                state->pair_tiles = PVR_PAIRED_TILES_X;
1240          }
1241       }
1242    }
1243 
1244    result =
1245       pvr_pbe_setup_emit(transfer_cmd, ctx, state, num_rts, pbe_setup_words);
1246    if (result != VK_SUCCESS)
1247       return result;
1248 
1249    /* Adjust tile origin and width to include all emits. */
1250    if (state->custom_mapping.pass_count > 0U) {
1251       const uint32_t tile_size_x =
1252          PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
1253       const uint32_t tile_size_y =
1254          PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
1255       struct pvr_transfer_pass *pass =
1256          &state->custom_mapping.passes[state->pass_idx];
1257       VkOffset2D offset = { 0U, 0U };
1258       VkOffset2D end = { 0U, 0U };
1259 
1260       for (uint32_t i = 0U; i < pass->clip_rects_count; i++) {
1261          VkRect2D *rect = &pass->clip_rects[i];
1262 
1263          offset.x = MIN2(offset.x, rect->offset.x);
1264          offset.y = MIN2(offset.y, rect->offset.y);
1265          end.x = MAX2(end.x, rect->offset.x + rect->extent.width);
1266          end.y = MAX2(end.y, rect->offset.y + rect->extent.height);
1267       }
1268 
1269       state->origin_x_in_tiles = (uint32_t)offset.x / tile_size_x;
1270       state->origin_y_in_tiles = (uint32_t)offset.y / tile_size_y;
1271       state->width_in_tiles =
1272          DIV_ROUND_UP((uint32_t)end.x, tile_size_x) - state->origin_x_in_tiles;
1273       state->height_in_tiles =
1274          DIV_ROUND_UP((uint32_t)end.y, tile_size_y) - state->origin_y_in_tiles;
1275    }
1276 
1277    return VK_SUCCESS;
1278 }
1279 
1280 /**
1281  * Writes the ISP tile registers according to the MSAA state. Sets up the USC
1282  * pixel partition allocations and the number of tiles in flight.
1283  */
pvr_isp_tiles(const struct pvr_device * device,struct pvr_transfer_3d_state * state)1284 static VkResult pvr_isp_tiles(const struct pvr_device *device,
1285                               struct pvr_transfer_3d_state *state)
1286 {
1287    const struct pvr_device_runtime_info *dev_runtime_info =
1288       &device->pdevice->dev_runtime_info;
1289    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1290    const uint32_t isp_samples =
1291       PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1U);
1292    uint32_t origin_x = state->origin_x_in_tiles;
1293    uint32_t origin_y = state->origin_y_in_tiles;
1294    uint32_t width = state->width_in_tiles;
1295    uint32_t height = state->height_in_tiles;
1296    uint32_t isp_tiles_in_flight;
1297 
1298    /* msaa_multiplier is calculated by sample_count & ~1U. Given sample
1299     * count is always in powers of two, we can get the sample count from
1300     * msaa_multiplier using the following logic.
1301     */
1302    const uint32_t samples = MAX2(state->msaa_multiplier, 1U);
1303 
1304    /* isp_samples_per_pixel feature is also know as "2x/4x for free", when
1305     * this is present SAMPLES_PER_PIXEL is 2/4, otherwise 1. The following
1306     * logic should end up with these numbers:
1307     *
1308     * |---------------------------------|
1309     * | 4 SAMPLES / ISP PIXEL           |
1310     * |-----------------------+----+----|
1311     * |                  MSAA | X* | Y* |
1312     * |                    2X |  1 |  1 |
1313     * |                    4X |  1 |  1 |
1314     * |---------------------------------|
1315     * | 2 SAMPLES / ISP PIXEL           |
1316     * |-----------------------+----+----|
1317     * |                  MSAA | X* | Y* |
1318     * |                    2X |  1 |  1 |
1319     * |                    4X |  1 |  2 |
1320     * |                    8X |  2 |  2 |
1321     * |-----------------------+----+----|
1322     * |  1 SAMPLE / ISP PIXEL           |
1323     * |-----------------------+----+----|
1324     * |                  MSAA | X* | Y* |
1325     * |                    2X |  1 |  2 |
1326     * |                    4X |  2 |  2 |
1327     * |-----------------------+----+----|
1328     */
1329 
1330    origin_x <<= (state->msaa_multiplier >> (isp_samples + 1U)) & 1U;
1331    origin_y <<= ((state->msaa_multiplier >> (isp_samples + 1U)) |
1332                  (state->msaa_multiplier >> isp_samples)) &
1333                 1U;
1334    width <<= (state->msaa_multiplier >> (isp_samples + 1U)) & 1U;
1335    height <<= ((state->msaa_multiplier >> (isp_samples + 1U)) |
1336                (state->msaa_multiplier >> isp_samples)) &
1337               1U;
1338 
1339    if (PVR_HAS_FEATURE(dev_info, paired_tiles) &&
1340        state->pair_tiles != PVR_PAIRED_TILES_NONE) {
1341       width = ALIGN_POT(width, 2U);
1342       height = ALIGN_POT(height, 2U);
1343    }
1344 
1345    pvr_csb_pack (&state->regs.isp_mtile_size, CR_ISP_MTILE_SIZE, reg) {
1346       reg.x = width;
1347       reg.y = height;
1348    }
1349 
1350    pvr_csb_pack (&state->regs.isp_render_origin, CR_ISP_RENDER_ORIGIN, reg) {
1351       reg.x = origin_x;
1352       reg.y = origin_y;
1353    }
1354 
1355    pvr_setup_tiles_in_flight(dev_info,
1356                              dev_runtime_info,
1357                              pvr_cr_isp_aa_mode_type(samples),
1358                              state->usc_pixel_width,
1359                              state->pair_tiles != PVR_PAIRED_TILES_NONE,
1360                              0,
1361                              &isp_tiles_in_flight,
1362                              &state->regs.usc_pixel_output_ctrl);
1363 
1364    pvr_csb_pack (&state->regs.isp_ctl, CR_ISP_CTL, reg) {
1365       reg.process_empty_tiles = true;
1366 
1367       if (PVR_HAS_FEATURE(dev_info, paired_tiles)) {
1368          if (state->pair_tiles == PVR_PAIRED_TILES_X) {
1369             reg.pair_tiles = true;
1370          } else if (state->pair_tiles == PVR_PAIRED_TILES_Y) {
1371             reg.pair_tiles = true;
1372             reg.pair_tiles_vert = true;
1373          }
1374       }
1375    }
1376 
1377    state->regs.isp_ctl |= isp_tiles_in_flight;
1378 
1379    return VK_SUCCESS;
1380 }
1381 
1382 static bool
pvr_int_pbe_pixel_changes_dst_rate(const struct pvr_device_info * dev_info,enum pvr_transfer_pbe_pixel_src pbe_format)1383 pvr_int_pbe_pixel_changes_dst_rate(const struct pvr_device_info *dev_info,
1384                                    enum pvr_transfer_pbe_pixel_src pbe_format)
1385 {
1386    /* We don't emulate rate change from the USC with the pbe_yuv feature. */
1387    if (!PVR_HAS_FEATURE(dev_info, pbe_yuv) &&
1388        (pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED ||
1389         pbe_format == PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V)) {
1390       return true;
1391    }
1392 
1393    return false;
1394 }
1395 
1396 /**
1397  * Number of DWORDs from the unified store that floating texture coefficients
1398  * take up.
1399  */
pvr_uv_space(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state)1400 static void pvr_uv_space(const struct pvr_device_info *dev_info,
1401                          const struct pvr_transfer_cmd *transfer_cmd,
1402                          struct pvr_transfer_3d_state *state)
1403 {
1404    const struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
1405    const VkRect2D *dst_rect = &transfer_cmd->scissor;
1406 
1407    /* This also avoids division by 0 in pvr_dma_texture_floats(). */
1408    if (state->custom_mapping.pass_count == 0U &&
1409        (dst_rect->extent.width == 0U || dst_rect->extent.height == 0U ||
1410         MAX2(dst_rect->offset.x, dst_rect->offset.x + dst_rect->extent.width) <
1411            0U ||
1412         MIN2(dst_rect->offset.x, dst_rect->offset.x + dst_rect->extent.width) >
1413            (int32_t)dst->width ||
1414         MAX2(dst_rect->offset.y, dst_rect->offset.y + dst_rect->extent.height) <
1415            0U ||
1416         MIN2(dst_rect->offset.y, dst_rect->offset.y + dst_rect->extent.height) >
1417            (int32_t)dst->height)) {
1418       state->empty_dst = true;
1419    } else {
1420       state->empty_dst = false;
1421 
1422       if (transfer_cmd->source_count > 0) {
1423          struct pvr_tq_layer_properties *layer =
1424             &state->shader_props.layer_props;
1425 
1426          const VkRect2D *src_rect =
1427             &transfer_cmd->sources[0U].mappings[0U].src_rect;
1428          const VkRect2D *dst_rect =
1429             &transfer_cmd->sources[0U].mappings[0U].dst_rect;
1430          int32_t dst_x1 = dst_rect->offset.x + dst_rect->extent.width;
1431          int32_t dst_y1 = dst_rect->offset.y + dst_rect->extent.height;
1432          int32_t src_x1 = src_rect->offset.x + src_rect->extent.width;
1433          int32_t src_y1 = src_rect->offset.y + src_rect->extent.height;
1434 
1435          assert(transfer_cmd->source_count == 1);
1436 
1437          if (state->filter[0U] > PVR_FILTER_POINT) {
1438             layer->layer_floats = PVR_INT_COORD_SET_FLOATS_4;
1439          } else if (src_rect->extent.width == 0U ||
1440                     src_rect->extent.height == 0U) {
1441             layer->layer_floats = PVR_INT_COORD_SET_FLOATS_0;
1442          } else if ((src_rect->offset.x * dst_x1 !=
1443                      src_x1 * dst_rect->offset.x) ||
1444                     (src_rect->offset.y * dst_y1 !=
1445                      src_y1 * dst_rect->offset.y) ||
1446                     (src_rect->extent.width != dst_rect->extent.width) ||
1447                     (src_rect->extent.height != dst_rect->extent.height) ||
1448                     transfer_cmd->sources[0U].mappings[0U].flip_x ||
1449                     transfer_cmd->sources[0U].mappings[0U].flip_y) {
1450             layer->layer_floats = PVR_INT_COORD_SET_FLOATS_4;
1451          } else {
1452             layer->layer_floats = PVR_INT_COORD_SET_FLOATS_0;
1453          }
1454 
1455          /* We have to adjust the rate. */
1456          if (layer->layer_floats != PVR_INT_COORD_SET_FLOATS_0 &&
1457              pvr_int_pbe_pixel_changes_dst_rate(dev_info, layer->pbe_format)) {
1458             layer->layer_floats = PVR_INT_COORD_SET_FLOATS_6;
1459          }
1460       }
1461    }
1462 }
1463 
pvr_int_pbe_pixel_num_sampler_and_image_states(enum pvr_transfer_pbe_pixel_src pbe_format)1464 static uint32_t pvr_int_pbe_pixel_num_sampler_and_image_states(
1465    enum pvr_transfer_pbe_pixel_src pbe_format)
1466 {
1467    switch (pbe_format) {
1468    case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
1469    case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
1470       return 1U;
1471    default:
1472       return pvr_pbe_pixel_num_loads(pbe_format);
1473    }
1474 }
1475 
pvr_sampler_state_for_surface(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd_surface * surface,enum pvr_filter filter,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t sampler,uint32_t * mem_ptr)1476 static VkResult pvr_sampler_state_for_surface(
1477    const struct pvr_device_info *dev_info,
1478    const struct pvr_transfer_cmd_surface *surface,
1479    enum pvr_filter filter,
1480    const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1481    uint32_t sampler,
1482    uint32_t *mem_ptr)
1483 {
1484    uint64_t sampler_state[2U] = { 0UL, 0UL };
1485 
1486    pvr_csb_pack (&sampler_state[0U], TEXSTATE_SAMPLER, reg) {
1487       reg.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
1488       reg.minlod = PVRX(TEXSTATE_CLAMP_MIN);
1489       reg.maxlod = PVRX(TEXSTATE_CLAMP_MIN);
1490       reg.dadjust = PVRX(TEXSTATE_DADJUST_MIN_UINT);
1491 
1492       if (filter == PVR_FILTER_DONTCARE || filter == PVR_FILTER_POINT) {
1493          reg.minfilter = PVRX(TEXSTATE_FILTER_POINT);
1494          reg.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1495       } else if (filter == PVR_FILTER_LINEAR) {
1496          reg.minfilter = PVRX(TEXSTATE_FILTER_LINEAR);
1497          reg.magfilter = PVRX(TEXSTATE_FILTER_LINEAR);
1498       } else {
1499          assert(PVR_HAS_FEATURE(dev_info, tf_bicubic_filter));
1500          reg.minfilter = PVRX(TEXSTATE_FILTER_BICUBIC);
1501          reg.magfilter = PVRX(TEXSTATE_FILTER_BICUBIC);
1502       }
1503 
1504       reg.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1505       reg.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1506 
1507       if (surface->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
1508          reg.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1509    }
1510 
1511    assert(sampler < PVR_TRANSFER_MAX_IMAGES);
1512 
1513    assert(sampler <= sh_reg_layout->combined_image_samplers.count);
1514    mem_ptr += sh_reg_layout->combined_image_samplers.offsets[sampler].sampler;
1515 
1516    memcpy(mem_ptr, sampler_state, sizeof(sampler_state));
1517 
1518    return VK_SUCCESS;
1519 }
1520 
pvr_image_state_set_codegen_defaults(struct pvr_device * device,struct pvr_transfer_3d_state * state,const struct pvr_transfer_cmd_surface * surface,uint32_t load,uint64_t * mem_ptr)1521 static inline VkResult pvr_image_state_set_codegen_defaults(
1522    struct pvr_device *device,
1523    struct pvr_transfer_3d_state *state,
1524    const struct pvr_transfer_cmd_surface *surface,
1525    uint32_t load,
1526    uint64_t *mem_ptr)
1527 {
1528    struct pvr_tq_layer_properties *layer = &state->shader_props.layer_props;
1529    struct pvr_texture_state_info info = { 0U };
1530    VkResult result;
1531 
1532    switch (surface->vk_format) {
1533    /* ERN 46863 */
1534    case VK_FORMAT_D32_SFLOAT_S8_UINT:
1535       switch (layer->pbe_format) {
1536       case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
1537       case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
1538       case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
1539       case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
1540       case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
1541       case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
1542       case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
1543          info.format = VK_FORMAT_R32G32_UINT;
1544          break;
1545       default:
1546          break;
1547       }
1548       break;
1549 
1550    case VK_FORMAT_D24_UNORM_S8_UINT:
1551    case VK_FORMAT_X8_D24_UNORM_PACK32:
1552       info.format = VK_FORMAT_R32_UINT;
1553       break;
1554 
1555    default:
1556       info.format = surface->vk_format;
1557       break;
1558    }
1559 
1560    info.flags = 0U;
1561    info.base_level = 0U;
1562    info.mip_levels = 1U;
1563    info.mipmaps_present = false;
1564    info.sample_count = MAX2(surface->sample_count, 1U);
1565 
1566    if (surface->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
1567       info.extent.depth = surface->depth;
1568    else
1569       info.extent.depth = 0U;
1570 
1571    if (PVR_HAS_FEATURE(&device->pdevice->dev_info, tpu_array_textures))
1572       info.array_size = 0U;
1573 
1574    result = pvr_mem_layout_spec(surface,
1575                                 load,
1576                                 true,
1577                                 &info.extent.width,
1578                                 &info.extent.height,
1579                                 &info.stride,
1580                                 &info.mem_layout,
1581                                 &info.addr);
1582    if (result != VK_SUCCESS)
1583       return result;
1584 
1585    if (state->custom_mapping.texel_extend_dst > 1U) {
1586       info.extent.width /= state->custom_mapping.texel_extend_dst;
1587       info.stride /= state->custom_mapping.texel_extend_dst;
1588    }
1589 
1590    info.tex_state_type = PVR_TEXTURE_STATE_SAMPLE;
1591    memcpy(info.swizzle,
1592           pvr_get_format_swizzle(info.format),
1593           sizeof(info.swizzle));
1594 
1595    if (surface->vk_format == VK_FORMAT_S8_UINT) {
1596       info.swizzle[0U] = PIPE_SWIZZLE_X;
1597       info.swizzle[1U] = PIPE_SWIZZLE_0;
1598       info.swizzle[2U] = PIPE_SWIZZLE_0;
1599       info.swizzle[3U] = PIPE_SWIZZLE_0;
1600    }
1601 
1602    if (info.extent.depth > 0U)
1603       info.type = VK_IMAGE_VIEW_TYPE_3D;
1604    else if (info.extent.height > 1U)
1605       info.type = VK_IMAGE_VIEW_TYPE_2D;
1606    else
1607       info.type = VK_IMAGE_VIEW_TYPE_1D;
1608 
1609    result = pvr_pack_tex_state(device, &info, mem_ptr);
1610    if (result != VK_SUCCESS)
1611       return result;
1612 
1613    return VK_SUCCESS;
1614 }
1615 
pvr_image_state_for_surface(const struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const struct pvr_transfer_cmd_surface * surface,uint32_t load,uint32_t source,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,struct pvr_transfer_3d_state * state,uint32_t uf_image,uint32_t * mem_ptr)1616 static VkResult pvr_image_state_for_surface(
1617    const struct pvr_transfer_ctx *ctx,
1618    const struct pvr_transfer_cmd *transfer_cmd,
1619    const struct pvr_transfer_cmd_surface *surface,
1620    uint32_t load,
1621    uint32_t source,
1622    const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1623    struct pvr_transfer_3d_state *state,
1624    uint32_t uf_image,
1625    uint32_t *mem_ptr)
1626 {
1627    uint32_t tex_state[ROGUE_MAXIMUM_IMAGE_STATE_SIZE] = { 0U };
1628    VkResult result;
1629    uint8_t offset;
1630 
1631    result = pvr_image_state_set_codegen_defaults(ctx->device,
1632                                                  state,
1633                                                  surface,
1634                                                  load,
1635                                                  (uint64_t *)tex_state);
1636    if (result != VK_SUCCESS)
1637       return result;
1638 
1639    assert(uf_image < PVR_TRANSFER_MAX_IMAGES);
1640 
1641    /* Offset of the shared registers containing the hardware image state. */
1642    assert(uf_image < sh_reg_layout->combined_image_samplers.count);
1643    offset = sh_reg_layout->combined_image_samplers.offsets[uf_image].image;
1644 
1645    /* Copy the image state to the buffer which is loaded into the shared
1646     * registers.
1647     */
1648    memcpy(mem_ptr + offset, tex_state, sizeof(tex_state));
1649 
1650    return VK_SUCCESS;
1651 }
1652 
1653 /* Writes the texture state/sampler state into DMAed memory. */
1654 static VkResult
pvr_sampler_image_state(struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,struct pvr_transfer_3d_state * state,uint32_t * mem_ptr)1655 pvr_sampler_image_state(struct pvr_transfer_ctx *ctx,
1656                         const struct pvr_transfer_cmd *transfer_cmd,
1657                         const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1658                         struct pvr_transfer_3d_state *state,
1659                         uint32_t *mem_ptr)
1660 {
1661    if (!state->empty_dst) {
1662       uint32_t uf_sampler = 0U;
1663       uint32_t uf_image = 0U;
1664 
1665       for (uint32_t source = 0; source < transfer_cmd->source_count; source++) {
1666          struct pvr_tq_layer_properties *layer =
1667             &state->shader_props.layer_props;
1668          uint32_t max_load = pvr_pbe_pixel_num_loads(layer->pbe_format);
1669 
1670          for (uint32_t load = 0U; load < max_load; load++) {
1671             const struct pvr_transfer_cmd_surface *surface;
1672             enum pvr_filter filter;
1673             VkResult result;
1674 
1675             switch (layer->pbe_format) {
1676             case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
1677             case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
1678             case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
1679             case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
1680             case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
1681             case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
1682             case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
1683             case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
1684             case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
1685             case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
1686                if (load > 0U) {
1687                   surface = &transfer_cmd->dst;
1688                   filter = transfer_cmd->sources[source].filter;
1689                } else {
1690                   surface = &transfer_cmd->sources[source].surface;
1691                   filter = state->filter[source];
1692                }
1693                break;
1694 
1695             case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
1696             case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
1697                surface = &transfer_cmd->sources[source].surface;
1698                filter = state->filter[source];
1699                break;
1700 
1701             default:
1702                surface = &transfer_cmd->sources[source + load].surface;
1703                filter = state->filter[source + load];
1704                break;
1705             }
1706 
1707             if (load < pvr_int_pbe_pixel_num_sampler_and_image_states(
1708                           layer->pbe_format)) {
1709                const struct pvr_device_info *dev_info =
1710                   &transfer_cmd->cmd_buffer->device->pdevice->dev_info;
1711 
1712                result = pvr_sampler_state_for_surface(dev_info,
1713                                                       surface,
1714                                                       filter,
1715                                                       sh_reg_layout,
1716                                                       uf_sampler,
1717                                                       mem_ptr);
1718                if (result != VK_SUCCESS)
1719                   return result;
1720 
1721                uf_sampler++;
1722 
1723                result = pvr_image_state_for_surface(ctx,
1724                                                     transfer_cmd,
1725                                                     surface,
1726                                                     load,
1727                                                     source,
1728                                                     sh_reg_layout,
1729                                                     state,
1730                                                     uf_image,
1731                                                     mem_ptr);
1732                if (result != VK_SUCCESS)
1733                   return result;
1734 
1735                uf_image++;
1736             }
1737          }
1738       }
1739    }
1740 
1741    return VK_SUCCESS;
1742 }
1743 
1744 /* The returned offset is in dwords. */
pvr_dynamic_const_reg_advance(const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,struct pvr_transfer_3d_state * state)1745 static inline uint32_t pvr_dynamic_const_reg_advance(
1746    const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1747    struct pvr_transfer_3d_state *state)
1748 {
1749    const uint32_t offset = sh_reg_layout->dynamic_consts.offset;
1750 
1751    assert(state->dynamic_const_reg_ptr < sh_reg_layout->dynamic_consts.count);
1752 
1753    return offset + state->dynamic_const_reg_ptr++;
1754 }
1755 
1756 /** Scales coefficients for sampling. (non normalized). */
1757 static inline void
pvr_dma_texture_floats(const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t * mem_ptr)1758 pvr_dma_texture_floats(const struct pvr_transfer_cmd *transfer_cmd,
1759                        struct pvr_transfer_3d_state *state,
1760                        const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
1761                        uint32_t *mem_ptr)
1762 
1763 {
1764    if (transfer_cmd->source_count > 0) {
1765       struct pvr_tq_layer_properties *layer = &state->shader_props.layer_props;
1766       const struct pvr_rect_mapping *mapping =
1767          &transfer_cmd->sources[0].mappings[0U];
1768       VkRect2D src_rect = mapping->src_rect;
1769       VkRect2D dst_rect = mapping->dst_rect;
1770 
1771       switch (layer->layer_floats) {
1772       case PVR_INT_COORD_SET_FLOATS_0:
1773          break;
1774 
1775       case PVR_INT_COORD_SET_FLOATS_6:
1776       case PVR_INT_COORD_SET_FLOATS_4: {
1777          int32_t consts[2U] = { 0U, 0U };
1778          int32_t denom[2U] = { 0U, 0U };
1779          int32_t nums[2U] = { 0U, 0U };
1780          int32_t src_x, dst_x;
1781          int32_t src_y, dst_y;
1782          float offset = 0.0f;
1783          float tmp;
1784 
1785          dst_x = mapping->flip_x ? -(int32_t)dst_rect.extent.width
1786                                  : dst_rect.extent.width;
1787          dst_y = mapping->flip_y ? -(int32_t)dst_rect.extent.height
1788                                  : dst_rect.extent.height;
1789          src_x = src_rect.extent.width;
1790          src_y = src_rect.extent.height;
1791 
1792          nums[0U] = src_x;
1793          denom[0U] = dst_x;
1794          consts[0U] =
1795             mapping->flip_x
1796                ? src_rect.offset.x * dst_x -
1797                     src_x * (dst_rect.offset.x + dst_rect.extent.width)
1798                : src_rect.offset.x * dst_x - src_x * dst_rect.offset.x;
1799          nums[1U] = src_y;
1800          denom[1U] = dst_y;
1801          consts[1U] =
1802             mapping->flip_y
1803                ? src_rect.offset.y * dst_y -
1804                     src_y * (dst_rect.offset.y + dst_rect.extent.height)
1805                : src_rect.offset.y * dst_y - src_y * dst_rect.offset.y;
1806 
1807          for (uint32_t i = 0U; i < 2U; i++) {
1808             tmp = (float)(nums[i]) / (float)(denom[i]);
1809             mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1810                fui(tmp);
1811 
1812             tmp = ((float)(consts[i]) + (i == 1U ? offset : 0.0f)) /
1813                   (float)(denom[i]);
1814             mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1815                fui(tmp);
1816          }
1817 
1818          if (layer->layer_floats == PVR_INT_COORD_SET_FLOATS_6) {
1819             tmp = (float)MIN2(dst_rect.offset.x, dst_rect.offset.x + dst_x);
1820             mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1821                fui(tmp);
1822 
1823             tmp = (float)MIN2(dst_rect.offset.y, dst_rect.offset.y + dst_y);
1824             mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
1825                fui(tmp);
1826          }
1827          break;
1828       }
1829 
1830       default:
1831          unreachable("Unknown COORD_SET_FLOATS.");
1832          break;
1833       }
1834    }
1835 }
1836 
pvr_int_pbe_pixel_requires_usc_filter(const struct pvr_device_info * dev_info,enum pvr_transfer_pbe_pixel_src pixel_format)1837 static bool pvr_int_pbe_pixel_requires_usc_filter(
1838    const struct pvr_device_info *dev_info,
1839    enum pvr_transfer_pbe_pixel_src pixel_format)
1840 {
1841    switch (pixel_format) {
1842    case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
1843    case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
1844    case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
1845    case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
1846    case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
1847    case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
1848    case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
1849       return true;
1850    case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
1851       return !PVR_HAS_FEATURE(dev_info, pbe_filterable_f16);
1852    default:
1853       return false;
1854    }
1855 }
1856 
1857 /**
1858  * Sets up the MSAA related bits in the operation
1859  *
1860  * TPU sample count is read directly from transfer_cmd in the TPU code. An MSAA
1861  * src can be read from sample rate or instance rate shaders as long as the
1862  * sample count is set on the TPU. If a layer is single sample we expect the
1863  * same sample replicated in full rate shaders. If the layer is multi sample,
1864  * instance rate shaders are used to emulate the filter or to select the
1865  * specified sample. The sample number is static in the programs.
1866  */
pvr_msaa_state(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state,uint32_t source)1867 static VkResult pvr_msaa_state(const struct pvr_device_info *dev_info,
1868                                const struct pvr_transfer_cmd *transfer_cmd,
1869                                struct pvr_transfer_3d_state *state,
1870                                uint32_t source)
1871 {
1872    struct pvr_tq_shader_properties *shader_props = &state->shader_props;
1873    struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
1874    struct pvr_winsys_transfer_regs *const regs = &state->regs;
1875    uint32_t src_sample_count =
1876       transfer_cmd->sources[source].surface.sample_count & ~1U;
1877    uint32_t dst_sample_count = transfer_cmd->dst.sample_count & ~1U;
1878    uint32_t bsample_count = 0U;
1879 
1880    shader_props->full_rate = false;
1881    state->msaa_multiplier = 1U;
1882    state->down_scale = false;
1883 
1884    /* clang-format off */
1885    pvr_csb_pack (&regs->isp_aa, CR_ISP_AA, reg);
1886    /* clang-format on */
1887 
1888    layer->sample_count = 1U;
1889    layer->resolve_op = PVR_RESOLVE_BLEND;
1890 
1891    bsample_count |= src_sample_count | dst_sample_count;
1892 
1893    if (bsample_count > PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 0U))
1894       return vk_error(transfer_cmd->cmd_buffer, VK_ERROR_FORMAT_NOT_SUPPORTED);
1895 
1896    /* Shouldn't get two distinct bits set (implies different sample counts).
1897     * The reason being the rate at which the shader runs has to match.
1898     */
1899    if ((bsample_count & (bsample_count - 1U)) != 0U)
1900       return vk_error(transfer_cmd->cmd_buffer, VK_ERROR_FORMAT_NOT_SUPPORTED);
1901 
1902    if (src_sample_count == 0U && dst_sample_count == 0U) {
1903       /* S -> S (no MSAA involved). */
1904       layer->msaa = false;
1905    } else if (src_sample_count != 0U && dst_sample_count == 0U) {
1906       /* M -> S (resolve). */
1907       layer->resolve_op = transfer_cmd->sources[source].resolve_op;
1908 
1909       if ((uint32_t)layer->resolve_op >=
1910           (src_sample_count + (uint32_t)PVR_RESOLVE_SAMPLE0)) {
1911          return vk_error(transfer_cmd->cmd_buffer,
1912                          VK_ERROR_FORMAT_NOT_SUPPORTED);
1913       }
1914 
1915       layer->msaa = true;
1916 
1917       switch (layer->resolve_op) {
1918       case PVR_RESOLVE_MIN:
1919       case PVR_RESOLVE_MAX:
1920          switch (transfer_cmd->sources[source].surface.vk_format) {
1921          case VK_FORMAT_D32_SFLOAT:
1922          case VK_FORMAT_D16_UNORM:
1923          case VK_FORMAT_S8_UINT:
1924          case VK_FORMAT_D24_UNORM_S8_UINT:
1925          case VK_FORMAT_X8_D24_UNORM_PACK32:
1926             if (transfer_cmd->sources[source].surface.vk_format !=
1927                 transfer_cmd->dst.vk_format) {
1928                return vk_error(transfer_cmd->cmd_buffer,
1929                                VK_ERROR_FORMAT_NOT_SUPPORTED);
1930             }
1931             break;
1932 
1933          default:
1934             return vk_error(transfer_cmd->cmd_buffer,
1935                             VK_ERROR_FORMAT_NOT_SUPPORTED);
1936          }
1937 
1938          /* Instance rate. */
1939          layer->sample_count = src_sample_count;
1940          state->shader_props.full_rate = false;
1941          break;
1942 
1943       case PVR_RESOLVE_BLEND:
1944          if (pvr_int_pbe_pixel_requires_usc_filter(dev_info,
1945                                                    layer->pbe_format)) {
1946             /* Instance rate. */
1947             layer->sample_count = src_sample_count;
1948             state->shader_props.full_rate = false;
1949          } else {
1950             /* Sample rate. */
1951             state->shader_props.full_rate = true;
1952             state->msaa_multiplier = src_sample_count;
1953             state->down_scale = true;
1954 
1955             pvr_csb_pack (&regs->isp_aa, CR_ISP_AA, reg) {
1956                reg.mode = pvr_cr_isp_aa_mode_type(src_sample_count);
1957             }
1958          }
1959          break;
1960 
1961       default:
1962          /* Shader doesn't have to know the number of samples. It's enough
1963           * if the TPU knows, and the shader sets the right sno (given to the
1964           * shader in resolve_op).
1965           */
1966          state->shader_props.full_rate = false;
1967          break;
1968       }
1969    } else {
1970       state->msaa_multiplier = dst_sample_count;
1971 
1972       pvr_csb_pack (&regs->isp_aa, CR_ISP_AA, reg) {
1973          reg.mode = pvr_cr_isp_aa_mode_type(dst_sample_count);
1974       }
1975 
1976       if (src_sample_count == 0U && dst_sample_count != 0U) {
1977          /* S -> M (replicate samples) */
1978          layer->msaa = false;
1979          state->shader_props.full_rate = !state->shader_props.iterated;
1980       } else {
1981          /* M -> M (sample to sample) */
1982          layer->msaa = true;
1983          state->shader_props.full_rate = true;
1984       }
1985    }
1986 
1987    return VK_SUCCESS;
1988 }
1989 
pvr_requires_usc_linear_filter(VkFormat format)1990 static bool pvr_requires_usc_linear_filter(VkFormat format)
1991 {
1992    switch (format) {
1993    case VK_FORMAT_R32_SFLOAT:
1994    case VK_FORMAT_R32G32_SFLOAT:
1995    case VK_FORMAT_R32G32B32_SFLOAT:
1996    case VK_FORMAT_R32G32B32A32_SFLOAT:
1997    case VK_FORMAT_D32_SFLOAT:
1998    case VK_FORMAT_D24_UNORM_S8_UINT:
1999    case VK_FORMAT_X8_D24_UNORM_PACK32:
2000       return true;
2001    default:
2002       return false;
2003    }
2004 }
2005 
2006 static inline bool
pvr_int_pbe_usc_linear_filter(enum pvr_transfer_pbe_pixel_src pbe_format,bool sample,bool msaa,bool full_rate)2007 pvr_int_pbe_usc_linear_filter(enum pvr_transfer_pbe_pixel_src pbe_format,
2008                               bool sample,
2009                               bool msaa,
2010                               bool full_rate)
2011 {
2012    if (sample || msaa || full_rate)
2013       return false;
2014 
2015    switch (pbe_format) {
2016    case PVR_TRANSFER_PBE_PIXEL_SRC_D24S8:
2017    case PVR_TRANSFER_PBE_PIXEL_SRC_S8D24:
2018    case PVR_TRANSFER_PBE_PIXEL_SRC_D32S8:
2019    case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
2020    case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
2021    case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
2022       return true;
2023    default:
2024       return false;
2025    }
2026 }
2027 
pvr_pick_component_needed(const struct pvr_transfer_custom_mapping * custom_mapping)2028 static inline bool pvr_pick_component_needed(
2029    const struct pvr_transfer_custom_mapping *custom_mapping)
2030 {
2031    return custom_mapping->pass_count > 0U &&
2032           custom_mapping->texel_extend_dst > 1U &&
2033           custom_mapping->texel_extend_src <= 1U;
2034 }
2035 
2036 /** Writes the shader related constants into the DMA space. */
2037 static void
pvr_write_usc_constants(const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t * dma_space)2038 pvr_write_usc_constants(const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
2039                         uint32_t *dma_space)
2040 {
2041    const uint32_t reg = sh_reg_layout->driver_total;
2042    const uint32_t consts_count =
2043       sh_reg_layout->compiler_out.usc_constants.count;
2044 
2045    /* If not we likely need to write more consts. */
2046    assert(consts_count == sh_reg_layout->compiler_out_total);
2047 
2048    /* Append the usc consts after the driver allocated regs. */
2049    for (uint32_t i = 0U; i < consts_count; i++)
2050       dma_space[reg + i] = sh_reg_layout->compiler_out.usc_constants.values[i];
2051 }
2052 
2053 static inline void
pvr_dma_texel_unwind(struct pvr_transfer_3d_state * state,const struct pvr_tq_frag_sh_reg_layout * sh_reg_layout,uint32_t * mem_ptr)2054 pvr_dma_texel_unwind(struct pvr_transfer_3d_state *state,
2055                      const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
2056                      uint32_t *mem_ptr)
2057 
2058 {
2059    const uint32_t coord_sample_mask =
2060       state->custom_mapping.texel_extend_dst - 1U;
2061 
2062    mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
2063       coord_sample_mask;
2064    mem_ptr[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
2065       state->custom_mapping.texel_unwind_dst;
2066 }
2067 
2068 /** Writes the Uniform/Texture state data segments + the UniTex code. */
2069 static inline VkResult
pvr_pds_unitex(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_pds_pixel_shader_sa_program * program,struct pvr_transfer_prep_data * prep_data)2070 pvr_pds_unitex(const struct pvr_device_info *dev_info,
2071                struct pvr_transfer_ctx *ctx,
2072                const struct pvr_transfer_cmd *transfer_cmd,
2073                struct pvr_pds_pixel_shader_sa_program *program,
2074                struct pvr_transfer_prep_data *prep_data)
2075 {
2076    struct pvr_pds_upload *unitex_code =
2077       &ctx->pds_unitex_code[program->num_texture_dma_kicks]
2078                            [program->num_uniform_dma_kicks];
2079    struct pvr_transfer_3d_state *state = &prep_data->state;
2080    struct pvr_suballoc_bo *pvr_bo;
2081    VkResult result;
2082    void *map;
2083 
2084    /* Uniform program is not used. */
2085    assert(program->num_uniform_dma_kicks == 0U);
2086 
2087    if (program->num_texture_dma_kicks == 0U) {
2088       state->uniform_data_size = 0U;
2089       state->tex_state_data_size = 0U;
2090       state->tex_state_data_offset = 0U;
2091       state->uni_tex_code_offset = 0U;
2092 
2093       return VK_SUCCESS;
2094    }
2095 
2096    pvr_pds_set_sizes_pixel_shader_sa_uniform_data(program, dev_info);
2097    assert(program->data_size == 0U);
2098    state->uniform_data_size = 0U;
2099 
2100    pvr_pds_set_sizes_pixel_shader_sa_texture_data(program, dev_info);
2101    state->tex_state_data_size =
2102       ALIGN_POT(program->data_size,
2103                 PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE));
2104 
2105    result =
2106       pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
2107                                ctx->device->heaps.pds_heap,
2108                                PVR_DW_TO_BYTES(state->tex_state_data_size),
2109                                &pvr_bo);
2110    if (result != VK_SUCCESS)
2111       return result;
2112 
2113    state->tex_state_data_offset =
2114       pvr_bo->dev_addr.addr - ctx->device->heaps.pds_heap->base_addr.addr;
2115 
2116    map = pvr_bo_suballoc_get_map_addr(pvr_bo);
2117    pvr_pds_generate_pixel_shader_sa_texture_state_data(program, map, dev_info);
2118 
2119    /* Save the dev_addr and size in the 3D state. */
2120    state->uni_tex_code_offset = unitex_code->code_offset;
2121    state->pds_temps = program->temps_used;
2122 
2123    return VK_SUCCESS;
2124 }
2125 
2126 /** Converts a float in range 0 to 1 to an N-bit fixed-point integer. */
pvr_float_to_ufixed(float value,uint32_t bits)2127 static uint32_t pvr_float_to_ufixed(float value, uint32_t bits)
2128 {
2129    uint32_t max = (1U << bits) - 1U;
2130 
2131    /* NaN and Inf and overflow. */
2132    if (util_is_inf_or_nan(value) || value >= 1.0f)
2133       return max;
2134    else if (value < 0.0f)
2135       return 0U;
2136 
2137    /* Normalise. */
2138    value = value * (float)max;
2139 
2140    /* Cast to double so that we can accurately represent the sum for N > 23. */
2141    return (uint32_t)floor((double)value + 0.5f);
2142 }
2143 
2144 /** Converts a float in range -1 to 1 to a signed N-bit fixed-point integer. */
pvr_float_to_sfixed(float value,uint32_t N)2145 static uint32_t pvr_float_to_sfixed(float value, uint32_t N)
2146 {
2147    int32_t max = (1 << (N - 1)) - 1;
2148    int32_t min = 0 - (1 << (N - 1));
2149    union fi x;
2150 
2151    /* NaN and Inf and overflow. */
2152    if (util_is_inf_or_nan(value) || value >= 1.0f)
2153       return (uint32_t)max;
2154    else if (value == 0.0f)
2155       return 0U;
2156    else if (value <= -1.0f)
2157       return (uint32_t)min;
2158 
2159    /* Normalise. */
2160    value *= (float)max;
2161 
2162    /* Cast to double so that we can accurately represent the sum for N > 23. */
2163    if (value > 0.0f)
2164       x.i = (int32_t)floor((double)value + 0.5f);
2165    else
2166       x.i = (int32_t)floor((double)value - 0.5f);
2167 
2168    return x.ui;
2169 }
2170 
2171 /** Convert a value in IEEE single precision format to 16-bit floating point
2172  * format.
2173  */
2174 /* TODO: See if we can use _mesa_float_to_float16_rtz_slow() instead. */
pvr_float_to_f16(float value,bool round_to_even)2175 static uint16_t pvr_float_to_f16(float value, bool round_to_even)
2176 {
2177    uint32_t input_value;
2178    uint32_t exponent;
2179    uint32_t mantissa;
2180    uint16_t output;
2181 
2182    /* 0.0f can be exactly expressed in binary using IEEE float format. */
2183    if (value == 0.0f)
2184       return 0U;
2185 
2186    if (value < 0U) {
2187       output = 0x8000;
2188       value = -value;
2189    } else {
2190       output = 0U;
2191    }
2192 
2193    /* 2^16 * (2 - 1/1024) = highest f16 representable value. */
2194    value = MIN2(value, 131008);
2195    input_value = fui(value);
2196 
2197    /* Extract the exponent and mantissa. */
2198    exponent = util_get_float32_exponent(value) + 15;
2199    mantissa = input_value & ((1 << 23) - 1);
2200 
2201    /* If the exponent is outside the supported range then denormalise the
2202     * mantissa.
2203     */
2204    if ((int32_t)exponent <= 0) {
2205       uint32_t shift;
2206 
2207       mantissa |= (1 << 23);
2208       exponent = input_value >> 23;
2209       shift = -14 + 127 - exponent;
2210 
2211       if (shift < 24)
2212          mantissa >>= shift;
2213       else
2214          mantissa = 0;
2215    } else {
2216       output = (uint16_t)(output | ((exponent << 10) & 0x7C00));
2217    }
2218 
2219    output = (uint16_t)(output | (((mantissa >> 13) << 0) & 0x03FF));
2220 
2221    if (round_to_even) {
2222       /* Round to nearest even. */
2223       if ((((int)value) % 2 != 0) && (((1 << 13) - 1) & mantissa))
2224          output++;
2225    } else {
2226       /* Round to nearest. */
2227       if (mantissa & (1 << 12))
2228          output++;
2229    }
2230 
2231    return output;
2232 }
2233 
pvr_pack_clear_color(VkFormat format,const union fi color[static4],uint32_t pkd_color[static4])2234 static VkResult pvr_pack_clear_color(VkFormat format,
2235                                      const union fi color[static 4],
2236                                      uint32_t pkd_color[static 4])
2237 {
2238    const uint32_t red_width =
2239       vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0U);
2240    uint32_t pbe_pack_mode = pvr_get_pbe_packmode(format);
2241    const bool pbe_norm = pvr_vk_format_is_fully_normalized(format);
2242 
2243    if (pbe_pack_mode == PVRX(PBESTATE_PACKMODE_INVALID))
2244       return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
2245 
2246    /* Set packed color based on PBE pack mode and PBE norm. */
2247    switch (pbe_pack_mode) {
2248    case PVRX(PBESTATE_PACKMODE_U8U8U8U8):
2249    case PVRX(PBESTATE_PACKMODE_A8R3G3B2):
2250       if (pbe_norm) {
2251          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 8) & 0xFFU;
2252          pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 8) & 0xFFU) << 8;
2253          pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 8) & 0xFFU) << 16;
2254          pkd_color[0] |= (pvr_float_to_ufixed(color[3].f, 8) & 0xFFU) << 24;
2255       } else {
2256          pkd_color[0] = color[0].ui & 0xFFU;
2257          pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2258          pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2259          pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
2260       }
2261       break;
2262 
2263    case PVRX(PBESTATE_PACKMODE_S8S8S8S8):
2264    case PVRX(PBESTATE_PACKMODE_X8U8S8S8):
2265    case PVRX(PBESTATE_PACKMODE_X8S8S8U8):
2266       if (pbe_norm) {
2267          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2268          pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2269          pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, false);
2270          pkd_color[1] |= (uint32_t)pvr_float_to_f16(color[3].f, false) << 16;
2271       } else {
2272          pkd_color[0] = color[0].ui & 0xFFU;
2273          pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2274          pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2275          pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
2276       }
2277       break;
2278 
2279    case PVRX(PBESTATE_PACKMODE_U16U16U16U16):
2280       if (pbe_norm) {
2281          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2282          pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
2283          pkd_color[1] = pvr_float_to_ufixed(color[2].f, 16) & 0xFFFFU;
2284          pkd_color[1] |= (pvr_float_to_ufixed(color[3].f, 16) & 0xFFFFU) << 16;
2285       } else {
2286          pkd_color[0] = color[0].ui & 0xFFFFU;
2287          pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2288          pkd_color[1] = color[2].ui & 0xFFFFU;
2289          pkd_color[1] |= (color[3].ui & 0xFFFFU) << 16;
2290       }
2291       break;
2292 
2293    case PVRX(PBESTATE_PACKMODE_S16S16S16S16):
2294       if (pbe_norm) {
2295          pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2296          pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
2297          pkd_color[1] = (pvr_float_to_sfixed(color[2].f, 16) & 0xFFFFU);
2298          pkd_color[1] |= (pvr_float_to_sfixed(color[3].f, 16) & 0xFFFFU) << 16;
2299       } else {
2300          pkd_color[0] = color[0].ui & 0xFFFFU;
2301          pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2302          pkd_color[1] = color[2].ui & 0xFFFFU;
2303          pkd_color[1] |= (color[3].ui & 0xFFFFU) << 16;
2304       }
2305       break;
2306 
2307    case PVRX(PBESTATE_PACKMODE_A2_XRBIAS_U10U10U10):
2308    case PVRX(PBESTATE_PACKMODE_ARGBV16_XR10):
2309    case PVRX(PBESTATE_PACKMODE_F16F16F16F16):
2310    case PVRX(PBESTATE_PACKMODE_A2R10B10G10):
2311    case PVRX(PBESTATE_PACKMODE_A4R4G4B4):
2312    case PVRX(PBESTATE_PACKMODE_A1R5G5B5):
2313    case PVRX(PBESTATE_PACKMODE_R5G5B5A1):
2314    case PVRX(PBESTATE_PACKMODE_R5G6B5):
2315       if (red_width > 0) {
2316          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2317          pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2318          pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, false);
2319          pkd_color[1] |= (uint32_t)pvr_float_to_f16(color[3].f, false) << 16;
2320       } else {
2321          /* Swizzle only uses first channel for alpha formats. */
2322          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[3].f, false);
2323       }
2324       break;
2325 
2326    case PVRX(PBESTATE_PACKMODE_U32U32U32U32):
2327       pkd_color[0] = color[0].ui;
2328       pkd_color[1] = color[1].ui;
2329       pkd_color[2] = color[2].ui;
2330       pkd_color[3] = color[3].ui;
2331       break;
2332 
2333    case PVRX(PBESTATE_PACKMODE_S32S32S32S32):
2334       pkd_color[0] = (uint32_t)color[0].i;
2335       pkd_color[1] = (uint32_t)color[1].i;
2336       pkd_color[2] = (uint32_t)color[2].i;
2337       pkd_color[3] = (uint32_t)color[3].i;
2338       break;
2339 
2340    case PVRX(PBESTATE_PACKMODE_F32F32F32F32):
2341       memcpy(pkd_color, &color[0].f, 4U * sizeof(float));
2342       break;
2343 
2344    case PVRX(PBESTATE_PACKMODE_R10B10G10A2):
2345       if (pbe_norm) {
2346          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 10) & 0xFFU;
2347          pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 10) & 0xFFU) << 10;
2348          pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 10) & 0xFFU) << 20;
2349          pkd_color[0] |= (pvr_float_to_ufixed(color[3].f, 2) & 0xFFU) << 30;
2350       } else if (format == VK_FORMAT_A2R10G10B10_UINT_PACK32) {
2351          pkd_color[0] = color[2].ui & 0x3FFU;
2352          pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
2353          pkd_color[0] |= (color[0].ui & 0x3FFU) << 20;
2354          pkd_color[0] |= (color[3].ui & 0x3U) << 30;
2355       } else {
2356          pkd_color[0] = color[0].ui & 0x3FFU;
2357          pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
2358          pkd_color[0] |= (color[2].ui & 0x3FFU) << 20;
2359          pkd_color[0] |= (color[3].ui & 0x3U) << 30;
2360       }
2361 
2362       break;
2363 
2364    case PVRX(PBESTATE_PACKMODE_A2F10F10F10):
2365    case PVRX(PBESTATE_PACKMODE_F10F10F10A2):
2366       pkd_color[0] = pvr_float_to_sfixed(color[0].f, 10) & 0xFFU;
2367       pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 10) & 0xFFU) << 10;
2368       pkd_color[0] |= (pvr_float_to_sfixed(color[2].f, 10) & 0xFFU) << 20;
2369       pkd_color[0] |= (pvr_float_to_sfixed(color[3].f, 2) & 0xFFU) << 30;
2370       break;
2371 
2372    case PVRX(PBESTATE_PACKMODE_U8U8U8):
2373    case PVRX(PBESTATE_PACKMODE_R5SG5SB6):
2374       if (pbe_norm) {
2375          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 8) & 0xFFU;
2376          pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 8) & 0xFFU) << 8;
2377          pkd_color[0] |= (pvr_float_to_ufixed(color[2].f, 8) & 0xFFU) << 16;
2378       } else {
2379          pkd_color[0] = color[0].ui & 0xFFU;
2380          pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2381          pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2382       }
2383       break;
2384 
2385    case PVRX(PBESTATE_PACKMODE_S8S8S8):
2386    case PVRX(PBESTATE_PACKMODE_B6G5SR5S):
2387       if (pbe_norm) {
2388          pkd_color[0] = pvr_float_to_sfixed(color[0].f, 8) & 0xFFU;
2389          pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 8) & 0xFFU) << 8;
2390          pkd_color[0] |= (pvr_float_to_sfixed(color[2].f, 8) & 0xFFU) << 16;
2391       } else {
2392          pkd_color[0] = color[0].ui & 0xFFU;
2393          pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2394          pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2395       }
2396       break;
2397 
2398    case PVRX(PBESTATE_PACKMODE_U16U16U16):
2399       if (pbe_norm) {
2400          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2401          pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
2402          pkd_color[1] = (pvr_float_to_ufixed(color[2].f, 16) & 0xFFFFU);
2403       } else {
2404          pkd_color[0] = color[0].ui & 0xFFFFU;
2405          pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2406          pkd_color[1] = color[2].ui & 0xFFFFU;
2407       }
2408       break;
2409 
2410    case PVRX(PBESTATE_PACKMODE_S16S16S16):
2411       if (pbe_norm) {
2412          pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2413          pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
2414          pkd_color[1] = pvr_float_to_sfixed(color[2].f, 16) & 0xFFFFU;
2415       } else {
2416          pkd_color[0] = color[0].ui & 0xFFFFU;
2417          pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2418          pkd_color[1] = color[2].ui & 0xFFFFU;
2419       }
2420       break;
2421 
2422    case PVRX(PBESTATE_PACKMODE_F16F16F16):
2423    case PVRX(PBESTATE_PACKMODE_F11F11F10):
2424    case PVRX(PBESTATE_PACKMODE_F10F11F11):
2425    case PVRX(PBESTATE_PACKMODE_SE9995):
2426       pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
2427       pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, true) << 16;
2428       pkd_color[1] = (uint32_t)pvr_float_to_f16(color[2].f, true);
2429       break;
2430 
2431    case PVRX(PBESTATE_PACKMODE_U32U32U32):
2432       pkd_color[0] = color[0].ui;
2433       pkd_color[1] = color[1].ui;
2434       pkd_color[2] = color[2].ui;
2435       break;
2436 
2437    case PVRX(PBESTATE_PACKMODE_S32S32S32):
2438       pkd_color[0] = (uint32_t)color[0].i;
2439       pkd_color[1] = (uint32_t)color[1].i;
2440       pkd_color[2] = (uint32_t)color[2].i;
2441       break;
2442 
2443    case PVRX(PBESTATE_PACKMODE_X24G8X32):
2444    case PVRX(PBESTATE_PACKMODE_U8X24):
2445       pkd_color[1] = (color[1].ui & 0xFFU) << 24;
2446       break;
2447 
2448    case PVRX(PBESTATE_PACKMODE_F32F32F32):
2449       memcpy(pkd_color, &color[0].f, 3U * sizeof(float));
2450       break;
2451 
2452    case PVRX(PBESTATE_PACKMODE_U8U8):
2453       if (pbe_norm) {
2454          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2455          pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2456       } else {
2457          pkd_color[0] = color[0].ui & 0xFFU;
2458          pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2459       }
2460       break;
2461 
2462    case PVRX(PBESTATE_PACKMODE_S8S8):
2463       if (pbe_norm) {
2464          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2465          pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, false) << 16;
2466       } else {
2467          pkd_color[0] = color[0].ui & 0xFFU;
2468          pkd_color[0] |= (color[1].ui & 0xFFU) << 8;
2469          pkd_color[0] |= (color[2].ui & 0xFFU) << 16;
2470          pkd_color[0] |= (color[3].ui & 0xFFU) << 24;
2471       }
2472       break;
2473 
2474    case PVRX(PBESTATE_PACKMODE_U16U16):
2475       if (pbe_norm) {
2476          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2477          pkd_color[0] |= (pvr_float_to_ufixed(color[1].f, 16) & 0xFFFFU) << 16;
2478       } else {
2479          pkd_color[0] = color[0].ui & 0xFFFFU;
2480          pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2481       }
2482       break;
2483 
2484    case PVRX(PBESTATE_PACKMODE_S16S16):
2485       if (pbe_norm) {
2486          pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2487          pkd_color[0] |= (pvr_float_to_sfixed(color[1].f, 16) & 0xFFFFU) << 16;
2488       } else {
2489          pkd_color[0] = color[0].ui & 0xFFFFU;
2490          pkd_color[0] |= (color[1].ui & 0xFFFFU) << 16;
2491       }
2492       break;
2493 
2494    case PVRX(PBESTATE_PACKMODE_F16F16):
2495       pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
2496       pkd_color[0] |= (uint32_t)pvr_float_to_f16(color[1].f, true) << 16;
2497       break;
2498 
2499    case PVRX(PBESTATE_PACKMODE_U32U32):
2500       pkd_color[0] = color[0].ui;
2501       pkd_color[1] = color[1].ui;
2502       break;
2503 
2504    case PVRX(PBESTATE_PACKMODE_S32S32):
2505       pkd_color[0] = (uint32_t)color[0].i;
2506       pkd_color[1] = (uint32_t)color[1].i;
2507       break;
2508 
2509    case PVRX(PBESTATE_PACKMODE_X24U8F32):
2510    case PVRX(PBESTATE_PACKMODE_X24X8F32):
2511       memcpy(pkd_color, &color[0].f, 1U * sizeof(float));
2512       pkd_color[1] = color[1].ui & 0xFFU;
2513       break;
2514 
2515    case PVRX(PBESTATE_PACKMODE_F32F32):
2516       memcpy(pkd_color, &color[0].f, 2U * sizeof(float));
2517       break;
2518 
2519    case PVRX(PBESTATE_PACKMODE_ST8U24):
2520       pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2521       pkd_color[0] |= color[1].ui << 24;
2522       break;
2523 
2524    case PVRX(PBESTATE_PACKMODE_U8):
2525       if (format == VK_FORMAT_S8_UINT)
2526          pkd_color[0] = color[1].ui & 0xFFU;
2527       else if (pbe_norm)
2528          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2529       else
2530          pkd_color[0] = color[0].ui & 0xFFU;
2531 
2532       break;
2533 
2534    case PVRX(PBESTATE_PACKMODE_S8):
2535       if (pbe_norm)
2536          pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, false);
2537       else
2538          pkd_color[0] = color[0].ui & 0xFFU;
2539       break;
2540 
2541    case PVRX(PBESTATE_PACKMODE_U16):
2542       if (pbe_norm)
2543          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 16) & 0xFFFFU;
2544       else
2545          pkd_color[0] = color[0].ui & 0xFFFFU;
2546       break;
2547 
2548    case PVRX(PBESTATE_PACKMODE_S16):
2549       if (pbe_norm)
2550          pkd_color[0] = pvr_float_to_sfixed(color[0].f, 16) & 0xFFFFU;
2551       else
2552          pkd_color[0] = color[0].ui & 0xFFFFU;
2553       break;
2554 
2555    case PVRX(PBESTATE_PACKMODE_F16):
2556       pkd_color[0] = (uint32_t)pvr_float_to_f16(color[0].f, true);
2557       break;
2558 
2559    /* U32 */
2560    case PVRX(PBESTATE_PACKMODE_U32):
2561       if (format == VK_FORMAT_X8_D24_UNORM_PACK32) {
2562          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2563       } else if (format == VK_FORMAT_D24_UNORM_S8_UINT) {
2564          pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2565          pkd_color[0] |= (color[1].ui & 0xFFU) << 24;
2566       } else if (format == VK_FORMAT_A2B10G10R10_UINT_PACK32) {
2567          pkd_color[0] = color[0].ui & 0x3FFU;
2568          pkd_color[0] |= (color[1].ui & 0x3FFU) << 10;
2569          pkd_color[0] |= (color[2].ui & 0x3FFU) << 20;
2570          pkd_color[0] |= (color[3].ui & 0x3U) << 30;
2571       } else {
2572          pkd_color[0] = color[0].ui;
2573       }
2574       break;
2575 
2576    /* U24ST8 */
2577    case PVRX(PBESTATE_PACKMODE_U24ST8):
2578       pkd_color[1] = (color[1].ui & 0xFFU) << 24;
2579       pkd_color[1] |= pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2580       break;
2581 
2582    /* S32 */
2583    case PVRX(PBESTATE_PACKMODE_S32):
2584       pkd_color[0] = (uint32_t)color[0].i;
2585       break;
2586 
2587    /* F32 */
2588    case PVRX(PBESTATE_PACKMODE_F32):
2589       memcpy(pkd_color, &color[0].f, sizeof(float));
2590       break;
2591 
2592    /* X8U24 */
2593    case PVRX(PBESTATE_PACKMODE_X8U24):
2594       pkd_color[0] = pvr_float_to_ufixed(color[0].f, 24) & 0xFFFFFFU;
2595       break;
2596 
2597    default:
2598       break;
2599    }
2600 
2601    return VK_SUCCESS;
2602 }
2603 
2604 static VkResult
pvr_isp_scan_direction(struct pvr_transfer_cmd * transfer_cmd,bool custom_mapping,enum PVRX (CR_DIR_TYPE)* const dir_type_out)2605 pvr_isp_scan_direction(struct pvr_transfer_cmd *transfer_cmd,
2606                        bool custom_mapping,
2607                        enum PVRX(CR_DIR_TYPE) *const dir_type_out)
2608 {
2609    pvr_dev_addr_t dst_dev_addr = transfer_cmd->dst.dev_addr;
2610    bool backwards_in_x = false;
2611    bool backwards_in_y = false;
2612    bool done_dest_rect = false;
2613    VkRect2D dst_rect;
2614    int32_t dst_x1;
2615    int32_t dst_y1;
2616 
2617    for (uint32_t i = 0; i < transfer_cmd->source_count; i++) {
2618       struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[i];
2619       pvr_dev_addr_t src_dev_addr = src->surface.dev_addr;
2620 
2621       if (src_dev_addr.addr == dst_dev_addr.addr && !custom_mapping) {
2622          VkRect2D *src_rect = &src->mappings[0].src_rect;
2623          int32_t src_x1 = src_rect->offset.x + src_rect->extent.width;
2624          int32_t src_y1 = src_rect->offset.y + src_rect->extent.height;
2625 
2626          if (!done_dest_rect) {
2627             dst_rect = src->mappings[0].dst_rect;
2628 
2629             dst_x1 = dst_rect.offset.x + dst_rect.extent.width;
2630             dst_y1 = dst_rect.offset.y + dst_rect.extent.height;
2631 
2632             done_dest_rect = true;
2633          }
2634 
2635          if ((dst_rect.offset.x < src_x1 && dst_x1 > src_rect->offset.x) &&
2636              (dst_rect.offset.y < src_y1 && dst_y1 > src_rect->offset.y)) {
2637             if (src_rect->extent.width != dst_rect.extent.width ||
2638                 src_rect->extent.height != dst_rect.extent.height) {
2639                /* Scaling is not possible. */
2640                return vk_error(NULL, VK_ERROR_FORMAT_NOT_SUPPORTED);
2641             }
2642 
2643             /* Direction is to the right. */
2644             backwards_in_x = dst_rect.offset.x > src_rect->offset.x;
2645 
2646             /* Direction is to the bottom. */
2647             backwards_in_y = dst_rect.offset.y > src_rect->offset.y;
2648          }
2649       }
2650    }
2651 
2652    if (backwards_in_x) {
2653       if (backwards_in_y)
2654          *dir_type_out = PVRX(CR_DIR_TYPE_BR2TL);
2655       else
2656          *dir_type_out = PVRX(CR_DIR_TYPE_TR2BL);
2657    } else {
2658       if (backwards_in_y)
2659          *dir_type_out = PVRX(CR_DIR_TYPE_BL2TR);
2660       else
2661          *dir_type_out = PVRX(CR_DIR_TYPE_TL2BR);
2662    }
2663 
2664    return VK_SUCCESS;
2665 }
2666 
pvr_3d_copy_blit_core(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)2667 static VkResult pvr_3d_copy_blit_core(struct pvr_transfer_ctx *ctx,
2668                                       struct pvr_transfer_cmd *transfer_cmd,
2669                                       struct pvr_transfer_prep_data *prep_data,
2670                                       uint32_t pass_idx,
2671                                       bool *finished_out)
2672 {
2673    struct pvr_transfer_3d_state *const state = &prep_data->state;
2674    struct pvr_winsys_transfer_regs *const regs = &state->regs;
2675    struct pvr_device *const device = ctx->device;
2676    const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
2677 
2678    VkResult result;
2679 
2680    *finished_out = true;
2681 
2682    state->common_ptr = 0U;
2683    state->dynamic_const_reg_ptr = 0U;
2684    state->usc_const_reg_ptr = 0U;
2685 
2686    if ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U) {
2687       uint32_t packed_color[4U] = { 0U };
2688 
2689       if (transfer_cmd->source_count != 0U)
2690          return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
2691 
2692       if (vk_format_is_compressed(transfer_cmd->dst.vk_format))
2693          return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
2694 
2695       /* No shader. */
2696       state->pds_temps = 0U;
2697       state->uniform_data_size = 0U;
2698       state->tex_state_data_size = 0U;
2699 
2700       /* No background enabled. */
2701       /* clang-format off */
2702       pvr_csb_pack (&regs->isp_bgobjvals, CR_ISP_BGOBJVALS, reg);
2703       /* clang-format on */
2704       pvr_csb_pack (&regs->isp_aa, CR_ISP_AA, reg) {
2705          reg.mode = pvr_cr_isp_aa_mode_type(transfer_cmd->dst.sample_count);
2706       }
2707 
2708       result = pvr_pack_clear_color(transfer_cmd->dst.vk_format,
2709                                     transfer_cmd->clear_color,
2710                                     packed_color);
2711       if (result != VK_SUCCESS)
2712          return result;
2713 
2714       pvr_csb_pack (&regs->usc_clear_register0, CR_USC_CLEAR_REGISTER, reg) {
2715          reg.val = packed_color[0U];
2716       }
2717 
2718       pvr_csb_pack (&regs->usc_clear_register1, CR_USC_CLEAR_REGISTER, reg) {
2719          reg.val = packed_color[1U];
2720       }
2721 
2722       pvr_csb_pack (&regs->usc_clear_register2, CR_USC_CLEAR_REGISTER, reg) {
2723          reg.val = packed_color[2U];
2724       }
2725 
2726       pvr_csb_pack (&regs->usc_clear_register3, CR_USC_CLEAR_REGISTER, reg) {
2727          reg.val = packed_color[3U];
2728       }
2729 
2730       state->msaa_multiplier = transfer_cmd->dst.sample_count & ~1U;
2731       state->pds_shader_task_offset = 0U;
2732       state->uni_tex_code_offset = 0U;
2733       state->tex_state_data_offset = 0U;
2734    } else if (transfer_cmd->source_count > 0U) {
2735       const struct pvr_tq_frag_sh_reg_layout nop_sh_reg_layout = {
2736          /* TODO: Setting this to 1 so that we don't try to pvr_bo_alloc() with
2737           * zero size. The device will ignore the PDS program if USC_SHAREDSIZE
2738           * is zero and in the case of the nop shader we're expecting it to be
2739           * zero. See if we can safely pass PVR_DEV_ADDR_INVALID for the unitex
2740           * program.
2741           */
2742          .driver_total = 1,
2743       };
2744       const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout;
2745       struct pvr_pds_pixel_shader_sa_program unitex_prog = { 0U };
2746       uint32_t tex_state_dma_size_dw;
2747       struct pvr_suballoc_bo *pvr_bo;
2748       uint32_t *dma_space;
2749 
2750       result = pvr_pbe_src_format(transfer_cmd, state, &state->shader_props);
2751       if (result != VK_SUCCESS)
2752          return result;
2753 
2754       pvr_uv_space(dev_info, transfer_cmd, state);
2755 
2756       state->shader_props.iterated = false;
2757 
2758       state->shader_props.layer_props.sample =
2759          transfer_cmd->sources[0].surface.mem_layout ==
2760          PVR_MEMLAYOUT_3DTWIDDLED;
2761 
2762       result = pvr_msaa_state(dev_info, transfer_cmd, state, 0);
2763       if (result != VK_SUCCESS)
2764          return result;
2765 
2766       state->shader_props.pick_component =
2767          pvr_pick_component_needed(&state->custom_mapping);
2768 
2769       if (state->filter[0] == PVR_FILTER_LINEAR &&
2770           pvr_requires_usc_linear_filter(
2771              transfer_cmd->sources[0].surface.vk_format)) {
2772          if (pvr_int_pbe_usc_linear_filter(
2773                 state->shader_props.layer_props.pbe_format,
2774                 state->shader_props.layer_props.sample,
2775                 state->shader_props.layer_props.msaa,
2776                 state->shader_props.full_rate)) {
2777             state->shader_props.layer_props.linear = true;
2778          } else {
2779             mesa_logw("Transfer: F32 linear filter not supported.");
2780          }
2781       }
2782 
2783       if (state->empty_dst) {
2784          sh_reg_layout = &nop_sh_reg_layout;
2785          state->pds_shader_task_offset = device->nop_program.pds.data_offset;
2786       } else {
2787          pvr_dev_addr_t kick_usc_pds_dev_addr;
2788 
2789          result =
2790             pvr_transfer_frag_store_get_shader_info(device,
2791                                                     &ctx->frag_store,
2792                                                     &state->shader_props,
2793                                                     &kick_usc_pds_dev_addr,
2794                                                     &sh_reg_layout);
2795          if (result != VK_SUCCESS)
2796             return result;
2797 
2798          assert(kick_usc_pds_dev_addr.addr <= UINT32_MAX);
2799          state->pds_shader_task_offset = (uint32_t)kick_usc_pds_dev_addr.addr;
2800       }
2801 
2802       unitex_prog.kick_usc = false;
2803       unitex_prog.clear = false;
2804 
2805       tex_state_dma_size_dw =
2806          sh_reg_layout->driver_total + sh_reg_layout->compiler_out_total;
2807 
2808       unitex_prog.num_texture_dma_kicks = 1U;
2809       unitex_prog.num_uniform_dma_kicks = 0U;
2810 
2811       result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
2812                                         device->heaps.general_heap,
2813                                         PVR_DW_TO_BYTES(tex_state_dma_size_dw),
2814                                         &pvr_bo);
2815       if (result != VK_SUCCESS)
2816          return result;
2817 
2818       dma_space = (uint32_t *)pvr_bo_suballoc_get_map_addr(pvr_bo);
2819 
2820       result = pvr_sampler_image_state(ctx,
2821                                        transfer_cmd,
2822                                        sh_reg_layout,
2823                                        state,
2824                                        dma_space);
2825       if (result != VK_SUCCESS)
2826          return result;
2827 
2828       pvr_dma_texture_floats(transfer_cmd, state, sh_reg_layout, dma_space);
2829 
2830       if (transfer_cmd->sources[0].surface.mem_layout ==
2831           PVR_MEMLAYOUT_3DTWIDDLED) {
2832          dma_space[pvr_dynamic_const_reg_advance(sh_reg_layout, state)] =
2833             fui(transfer_cmd->sources[0].surface.z_position);
2834       }
2835 
2836       pvr_write_usc_constants(sh_reg_layout, dma_space);
2837 
2838       if (pvr_pick_component_needed(&state->custom_mapping))
2839          pvr_dma_texel_unwind(state, sh_reg_layout, dma_space);
2840 
2841       pvr_pds_encode_dma_burst(unitex_prog.texture_dma_control,
2842                                unitex_prog.texture_dma_address,
2843                                state->common_ptr,
2844                                tex_state_dma_size_dw,
2845                                pvr_bo->dev_addr.addr,
2846                                true,
2847                                dev_info);
2848 
2849       state->common_ptr += tex_state_dma_size_dw;
2850 
2851       result =
2852          pvr_pds_unitex(dev_info, ctx, transfer_cmd, &unitex_prog, prep_data);
2853       if (result != VK_SUCCESS)
2854          return result;
2855 
2856       pvr_csb_pack (&regs->isp_bgobjvals, CR_ISP_BGOBJVALS, reg) {
2857          reg.enablebgtag = true;
2858       }
2859 
2860       /* clang-format off */
2861       pvr_csb_pack (&regs->isp_aa, CR_ISP_AA, reg);
2862       /* clang-format on */
2863    } else {
2864       /* No shader. */
2865       state->pds_temps = 0U;
2866       state->uniform_data_size = 0U;
2867       state->tex_state_data_size = 0U;
2868 
2869       /* No background enabled. */
2870       /* clang-format off */
2871       pvr_csb_pack (&regs->isp_bgobjvals, CR_ISP_BGOBJVALS, reg);
2872       /* clang-format on */
2873       pvr_csb_pack (&regs->isp_aa, CR_ISP_AA, reg) {
2874          reg.mode = pvr_cr_isp_aa_mode_type(transfer_cmd->dst.sample_count);
2875       }
2876       state->msaa_multiplier = transfer_cmd->dst.sample_count & ~1U;
2877       state->pds_shader_task_offset = 0U;
2878       state->uni_tex_code_offset = 0U;
2879       state->tex_state_data_offset = 0U;
2880 
2881       result = pvr_pbe_src_format(transfer_cmd, state, &state->shader_props);
2882       if (result != VK_SUCCESS)
2883          return result;
2884    }
2885 
2886    pvr_setup_hwbg_object(dev_info, state);
2887 
2888    pvr_csb_pack (&regs->isp_render, CR_ISP_RENDER, reg) {
2889       reg.mode_type = PVRX(CR_ISP_RENDER_MODE_TYPE_FAST_SCALE);
2890 
2891       result = pvr_isp_scan_direction(transfer_cmd,
2892                                       state->custom_mapping.pass_count,
2893                                       &reg.dir_type);
2894       if (result != VK_SUCCESS)
2895          return result;
2896    }
2897 
2898    /* Set up pixel event handling. */
2899    result = pvr_pbe_setup(transfer_cmd, ctx, state);
2900    if (result != VK_SUCCESS)
2901       return result;
2902 
2903    result = pvr_isp_tiles(device, state);
2904    if (result != VK_SUCCESS)
2905       return result;
2906 
2907    if (PVR_HAS_FEATURE(&device->pdevice->dev_info, gpu_multicore_support)) {
2908       pvr_csb_pack (&regs->frag_screen, CR_FRAG_SCREEN, reg) {
2909          reg.xmax = transfer_cmd->dst.width - 1;
2910          reg.ymax = transfer_cmd->dst.height - 1;
2911       }
2912    }
2913 
2914    if ((pass_idx + 1U) < state->custom_mapping.pass_count)
2915       *finished_out = false;
2916 
2917    return VK_SUCCESS;
2918 }
2919 
2920 static VkResult
pvr_pbe_src_format_f2d(uint32_t merge_flags,struct pvr_transfer_cmd_source * src,VkFormat dst_format,bool down_scale,bool dont_force_pbe,enum pvr_transfer_pbe_pixel_src * pixel_format_out)2921 pvr_pbe_src_format_f2d(uint32_t merge_flags,
2922                        struct pvr_transfer_cmd_source *src,
2923                        VkFormat dst_format,
2924                        bool down_scale,
2925                        bool dont_force_pbe,
2926                        enum pvr_transfer_pbe_pixel_src *pixel_format_out)
2927 {
2928    VkFormat src_format = src->surface.vk_format;
2929 
2930    /* This has to come before the rest as S8 for instance is integer and
2931     * signedsess check fails on D24S8.
2932     */
2933    if (vk_format_is_depth_or_stencil(src_format) ||
2934        vk_format_is_depth_or_stencil(dst_format) ||
2935        merge_flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) {
2936       return pvr_pbe_src_format_ds(&src->surface,
2937                                    src->filter,
2938                                    dst_format,
2939                                    merge_flags,
2940                                    down_scale,
2941                                    pixel_format_out);
2942    }
2943 
2944    return pvr_pbe_src_format_normal(src_format,
2945                                     dst_format,
2946                                     down_scale,
2947                                     dont_force_pbe,
2948                                     pixel_format_out);
2949 }
2950 
2951 /** Writes the coefficient loading PDS task. */
2952 static inline VkResult
pvr_pds_coeff_task(struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const bool sample_3d,struct pvr_transfer_prep_data * prep_data)2953 pvr_pds_coeff_task(struct pvr_transfer_ctx *ctx,
2954                    const struct pvr_transfer_cmd *transfer_cmd,
2955                    const bool sample_3d,
2956                    struct pvr_transfer_prep_data *prep_data)
2957 {
2958    struct pvr_transfer_3d_state *state = &prep_data->state;
2959    struct pvr_pds_coeff_loading_program program = { 0U };
2960    struct pvr_suballoc_bo *pvr_bo;
2961    VkResult result;
2962 
2963    program.num_fpu_iterators = 1U;
2964 
2965    pvr_csb_pack (&program.FPU_iterators[0U],
2966                  PDSINST_DOUT_FIELDS_DOUTI_SRC,
2967                  reg) {
2968       if (sample_3d)
2969          reg.size = PVRX(PDSINST_DOUTI_SIZE_3D);
2970       else
2971          reg.size = PVRX(PDSINST_DOUTI_SIZE_2D);
2972 
2973       reg.perspective = false;
2974 
2975       /* Varying wrap on the TSP means that the TSP chooses the shorter path
2976        * out of the normal and the wrapping path i.e. chooses between u0->u1
2977        * and u1->1.0 == 0.0 -> u0. We don't need this behavior.
2978        */
2979       /*
2980        * if RHW ever needed offset SRC_F32 to the first U in 16 bit units
2981        * l0 U    <= offs 0
2982        * l0 V
2983        * l1 U    <= offs 4
2984        * ...
2985        */
2986       reg.shademodel = PVRX(PDSINST_DOUTI_SHADEMODEL_GOURUAD);
2987       reg.f32_offset = 0U;
2988    }
2989 
2990    if (sample_3d)
2991       state->usc_coeff_regs = 12U;
2992    else
2993       state->usc_coeff_regs = 8U;
2994 
2995    pvr_pds_set_sizes_coeff_loading(&program);
2996 
2997    result = pvr_cmd_buffer_alloc_mem(
2998       transfer_cmd->cmd_buffer,
2999       ctx->device->heaps.pds_heap,
3000       PVR_DW_TO_BYTES(program.data_size + program.code_size),
3001       &pvr_bo);
3002    if (result != VK_SUCCESS)
3003       return result;
3004 
3005    state->pds_coeff_task_offset =
3006       pvr_bo->dev_addr.addr - ctx->device->heaps.pds_heap->base_addr.addr;
3007 
3008    pvr_pds_generate_coeff_loading_program(&program,
3009                                           pvr_bo_suballoc_get_map_addr(pvr_bo));
3010 
3011    state->coeff_data_size = program.data_size;
3012    state->pds_temps = program.temps_used;
3013 
3014    return VK_SUCCESS;
3015 }
3016 
3017 #define X 0U
3018 #define Y 1U
3019 #define Z 2U
3020 
pvr_tsp_floats(const struct pvr_device_info * dev_info,VkRect2D * rect,const float recips[3U],bool custom_filter,bool z_present,float z_value,struct pvr_transfer_3d_iteration * layer)3021 static void pvr_tsp_floats(const struct pvr_device_info *dev_info,
3022                            VkRect2D *rect,
3023                            const float recips[3U],
3024                            bool custom_filter,
3025                            bool z_present,
3026                            float z_value,
3027                            struct pvr_transfer_3d_iteration *layer)
3028 {
3029 #define U0 0U
3030 #define U1 1U
3031 #define V0 2U
3032 #define V1 3U
3033 
3034    const uint32_t indices[8U] = { U0, V0, U0, V1, U1, V1, U1, V0 };
3035    float delta[2U] = { 0.0f, 0.0f };
3036    int32_t non_normalized[4U];
3037    uint32_t src_flipped[2U];
3038    uint32_t normalized[4U];
3039    int32_t src_span[2U];
3040 
3041    non_normalized[U0] = rect->offset.x;
3042    non_normalized[U1] = rect->offset.x + rect->extent.width;
3043    non_normalized[V0] = rect->offset.y;
3044    non_normalized[V1] = rect->offset.y + rect->extent.height;
3045 
3046    /* Filter adjust. */
3047    src_span[X] = rect->extent.width;
3048    src_flipped[X] = src_span[X] > 0U ? 0U : 1U;
3049    src_span[Y] = rect->extent.height;
3050    src_flipped[Y] = src_span[Y] > 0U ? 0U : 1U;
3051    /*
3052     * | X  | Y  | srcFlipX | srcFlipY |
3053     * +----+----+----------+----------|
3054     * | X  | Y  | 0        | 0        |
3055     * | -X | Y  | 1        | 0        |
3056     * | X  | -Y | 0        | 1        |
3057     * | -X | -Y | 1        | 1        |
3058     */
3059    for (uint32_t i = X; i <= Y; i++) {
3060       if (custom_filter) {
3061          if (src_flipped[i] != 0U)
3062             delta[i] += 0.25;
3063          else
3064             delta[i] -= 0.25;
3065       }
3066    }
3067 
3068    /* Normalize. */
3069    for (uint32_t i = 0U; i < ARRAY_SIZE(normalized); i++) {
3070       uint32_t tmp;
3071       float ftmp;
3072 
3073       ftmp = (float)non_normalized[i] + delta[i >> 1U];
3074       ftmp *= recips[i >> 1U];
3075 
3076       tmp = fui(ftmp);
3077       if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3078          tmp = XXH_rotl32(tmp, 1U);
3079 
3080       normalized[i] = tmp;
3081    }
3082 
3083    /* Apply indices. */
3084    for (uint32_t i = 0U; i < 8U; i++)
3085       layer->texture_coords[i] = normalized[indices[i]];
3086 
3087    if (z_present) {
3088       uint32_t tmp = fui(z_value * recips[2U]);
3089 
3090       if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3091          tmp = XXH_rotl32(tmp, 1U);
3092 
3093       for (uint32_t i = 8U; i < 12U; i++)
3094          layer->texture_coords[i] = tmp;
3095    }
3096 
3097 #undef U0
3098 #undef U1
3099 #undef V0
3100 #undef V1
3101 }
3102 
3103 static void
pvr_isp_prim_block_tsp_vertex_block(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd_source * src,struct pvr_rect_mapping * mappings,bool custom_filter,uint32_t num_mappings,uint32_t mapping_offset,uint32_t tsp_comp_format_in_dw,uint32_t ** const cs_ptr_out)3104 pvr_isp_prim_block_tsp_vertex_block(const struct pvr_device_info *dev_info,
3105                                     const struct pvr_transfer_cmd_source *src,
3106                                     struct pvr_rect_mapping *mappings,
3107                                     bool custom_filter,
3108                                     uint32_t num_mappings,
3109                                     uint32_t mapping_offset,
3110                                     uint32_t tsp_comp_format_in_dw,
3111                                     uint32_t **const cs_ptr_out)
3112 {
3113    struct pvr_transfer_3d_iteration layer;
3114    uint32_t *cs_ptr = *cs_ptr_out;
3115 
3116    /*  |<-32b->|
3117     *  +-------+-----
3118     *  |  RHW  |    | X num_isp_vertices
3119     *  +-------+--  |
3120     *  |  U    | |  |
3121     *  |  V    | | X PVR_TRANSFER_NUM_LAYERS
3122     *  +-------+-----
3123     *
3124     * RHW is not there any more in the Transfer. The comment still explains
3125     * where it should go if ever needed.
3126     */
3127    for (uint32_t i = mapping_offset; i < mapping_offset + num_mappings; i++) {
3128       bool z_present = src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED;
3129       const float recips[3U] = {
3130          [X] = 1.0f / (float)src->surface.width,
3131          [Y] = 1.0f / (float)src->surface.height,
3132          [Z] = z_present ? 1.0f / (float)src->surface.depth : 0.0f,
3133       };
3134       float z_pos = (src->filter < PVR_FILTER_LINEAR)
3135                        ? floor(src->surface.z_position + 0.5f)
3136                        : src->surface.z_position;
3137 
3138       pvr_tsp_floats(dev_info,
3139                      &mappings[i].src_rect,
3140                      recips,
3141                      custom_filter,
3142                      z_present,
3143                      z_pos,
3144                      &layer);
3145 
3146       /* We request UVs from TSP for ISP triangle:
3147        *  0 u 1
3148        *  +---,
3149        * v|  /|
3150        *  | / |
3151        * 2'/--'3
3152        */
3153       for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3154          *cs_ptr++ = layer.texture_coords[0U];
3155          *cs_ptr++ = layer.texture_coords[1U];
3156       }
3157 
3158       if (z_present) {
3159          *cs_ptr++ = layer.texture_coords[8U];
3160          *cs_ptr++ = 0U;
3161       }
3162 
3163       for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3164          *cs_ptr++ = layer.texture_coords[6U];
3165          *cs_ptr++ = layer.texture_coords[7U];
3166       }
3167 
3168       if (z_present) {
3169          *cs_ptr++ = layer.texture_coords[11U];
3170          *cs_ptr++ = 0U;
3171       }
3172 
3173       for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3174          *cs_ptr++ = layer.texture_coords[2U];
3175          *cs_ptr++ = layer.texture_coords[3U];
3176       }
3177 
3178       if (z_present) {
3179          *cs_ptr++ = layer.texture_coords[9U];
3180          *cs_ptr++ = 0U;
3181       }
3182 
3183       for (uint32_t j = 0U; j < PVR_TRANSFER_NUM_LAYERS; j++) {
3184          *cs_ptr++ = layer.texture_coords[4U];
3185          *cs_ptr++ = layer.texture_coords[5U];
3186       }
3187 
3188       if (z_present) {
3189          *cs_ptr++ = layer.texture_coords[10U];
3190          *cs_ptr++ = 0U;
3191       }
3192    }
3193 
3194    if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3195       /* Skipped optional primitive id. */
3196       for (uint32_t i = 0U; i < tsp_comp_format_in_dw; i++)
3197          *cs_ptr++ = 0x88888888U;
3198    } else {
3199       /* Align back to 64 bits. */
3200       if (((uintptr_t)cs_ptr & 7U) != 0U)
3201          cs_ptr++;
3202    }
3203 
3204    *cs_ptr_out = cs_ptr;
3205 }
3206 
3207 #undef X
3208 #undef Y
3209 #undef Z
3210 
pvr_isp_prim_block_pds_state(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,struct pvr_transfer_3d_state * state,uint32_t ** const cs_ptr_out)3211 static void pvr_isp_prim_block_pds_state(const struct pvr_device_info *dev_info,
3212                                          struct pvr_transfer_ctx *ctx,
3213                                          struct pvr_transfer_3d_state *state,
3214                                          uint32_t **const cs_ptr_out)
3215 {
3216    uint32_t *cs_ptr = *cs_ptr_out;
3217 
3218    pvr_csb_pack (cs_ptr, TA_STATE_PDS_SHADERBASE, shader_base) {
3219       shader_base.addr = PVR_DEV_ADDR(state->pds_shader_task_offset);
3220    }
3221    cs_ptr++;
3222 
3223    pvr_csb_pack (cs_ptr, TA_STATE_PDS_TEXUNICODEBASE, tex_base) {
3224       tex_base.addr = PVR_DEV_ADDR(state->uni_tex_code_offset);
3225    }
3226    cs_ptr++;
3227 
3228    pvr_csb_pack (cs_ptr, TA_STATE_PDS_SIZEINFO1, info1) {
3229       info1.pds_uniformsize =
3230          state->uniform_data_size /
3231          PVRX(TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE);
3232 
3233       info1.pds_texturestatesize =
3234          state->tex_state_data_size /
3235          PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE);
3236 
3237       info1.pds_varyingsize =
3238          state->coeff_data_size /
3239          PVRX(TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE);
3240 
3241       info1.usc_varyingsize =
3242          ALIGN_POT(state->usc_coeff_regs,
3243                    PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE)) /
3244          PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE);
3245 
3246       info1.pds_tempsize =
3247          ALIGN_POT(state->pds_temps,
3248                    PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE)) /
3249          PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE);
3250    }
3251    cs_ptr++;
3252 
3253    pvr_csb_pack (cs_ptr, TA_STATE_PDS_VARYINGBASE, base) {
3254       base.addr = PVR_DEV_ADDR(state->pds_coeff_task_offset);
3255    }
3256    cs_ptr++;
3257 
3258    pvr_csb_pack (cs_ptr, TA_STATE_PDS_TEXTUREDATABASE, base) {
3259       base.addr = PVR_DEV_ADDR(state->tex_state_data_offset);
3260    }
3261    cs_ptr++;
3262 
3263    /* PDS uniform program not used. */
3264    pvr_csb_pack (cs_ptr, TA_STATE_PDS_UNIFORMDATABASE, base) {
3265       base.addr = PVR_DEV_ADDR(0U);
3266    }
3267    cs_ptr++;
3268 
3269    pvr_csb_pack (cs_ptr, TA_STATE_PDS_SIZEINFO2, info) {
3270       info.usc_sharedsize =
3271          ALIGN_POT(state->common_ptr,
3272                    PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE)) /
3273          PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE);
3274       info.pds_tri_merge_disable = !PVR_HAS_ERN(dev_info, 42307);
3275       info.pds_batchnum = 0U;
3276    }
3277    cs_ptr++;
3278 
3279    /* Get back to 64 bits boundary. */
3280    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3281       cs_ptr++;
3282 
3283    *cs_ptr_out = cs_ptr;
3284 }
3285 
pvr_isp_prim_block_isp_state(const struct pvr_device_info * dev_info,UNUSED uint32_t tsp_comp_format_in_dw,uint32_t tsp_data_size_in_bytes,uint32_t num_isp_vertices,bool read_bgnd,uint32_t ** const cs_ptr_out)3286 static void pvr_isp_prim_block_isp_state(const struct pvr_device_info *dev_info,
3287                                          UNUSED uint32_t tsp_comp_format_in_dw,
3288                                          uint32_t tsp_data_size_in_bytes,
3289                                          uint32_t num_isp_vertices,
3290                                          bool read_bgnd,
3291                                          uint32_t **const cs_ptr_out)
3292 {
3293    const bool has_simple_internal_parameter_format_v2 =
3294       PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2);
3295    uint32_t *cs_ptr = *cs_ptr_out;
3296 
3297    if (has_simple_internal_parameter_format_v2) {
3298       const uint32_t tsp_data_per_vrx_in_bytes =
3299          tsp_data_size_in_bytes / num_isp_vertices;
3300 
3301       pvr_csb_pack ((uint64_t *)cs_ptr,
3302                     IPF_VERTEX_FORMAT_WORD_SIPF2,
3303                     vert_fmt) {
3304          vert_fmt.vf_isp_state_size =
3305             pvr_cmd_length(TA_STATE_ISPCTL) + pvr_cmd_length(TA_STATE_ISPA);
3306 
3307          vert_fmt.vf_tsp_vtx_raw = true;
3308          vert_fmt.vf_isp_vtx_raw = true;
3309 
3310          vert_fmt.vf_varying_vertex_bits = tsp_data_per_vrx_in_bytes * 8U;
3311          vert_fmt.vf_primitive_total = (num_isp_vertices / 2U) - 1U;
3312          vert_fmt.vf_vertex_total = num_isp_vertices - 1U;
3313       }
3314       cs_ptr += pvr_cmd_length(IPF_VERTEX_FORMAT_WORD_SIPF2);
3315    }
3316 
3317    /* ISP state words. */
3318 
3319    /* clang-format off */
3320    pvr_csb_pack (cs_ptr, TA_STATE_ISPCTL, ispctl);
3321    /* clang-format on */
3322    cs_ptr += pvr_cmd_length(TA_STATE_ISPCTL);
3323 
3324    pvr_csb_pack (cs_ptr, TA_STATE_ISPA, ispa) {
3325       ispa.objtype = PVRX(TA_OBJTYPE_TRIANGLE);
3326       ispa.passtype = read_bgnd ? PVRX(TA_PASSTYPE_TRANSLUCENT)
3327                                 : PVRX(TA_PASSTYPE_OPAQUE);
3328       ispa.dcmpmode = PVRX(TA_CMPMODE_ALWAYS);
3329       ispa.dwritedisable = true;
3330    }
3331    cs_ptr += pvr_cmd_length(TA_STATE_ISPA);
3332 
3333    if (has_simple_internal_parameter_format_v2) {
3334       *cs_ptr_out = cs_ptr;
3335       return;
3336    }
3337 
3338    /* How many bytes the TSP compression format needs? */
3339    pvr_csb_pack (cs_ptr, IPF_COMPRESSION_SIZE_WORD, word) {
3340       word.cs_isp_comp_table_size = 0U;
3341       word.cs_tsp_comp_format_size = tsp_comp_format_in_dw;
3342       word.cs_tsp_comp_table_size = 0U;
3343       word.cs_tsp_comp_vertex_size = tsp_data_size_in_bytes / num_isp_vertices;
3344    }
3345    cs_ptr += pvr_cmd_length(IPF_COMPRESSION_SIZE_WORD);
3346 
3347    /* ISP vertex compression. */
3348    pvr_csb_pack (cs_ptr, IPF_ISP_COMPRESSION_WORD_0, word0) {
3349       word0.cf_isp_comp_fmt_x0 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3350       word0.cf_isp_comp_fmt_x1 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3351       word0.cf_isp_comp_fmt_x2 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3352       word0.cf_isp_comp_fmt_y0 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3353       word0.cf_isp_comp_fmt_y1 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3354       word0.cf_isp_comp_fmt_y2 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3355       word0.cf_isp_comp_fmt_z0 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3356       word0.cf_isp_comp_fmt_z1 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3357    }
3358    cs_ptr += pvr_cmd_length(IPF_ISP_COMPRESSION_WORD_0);
3359 
3360    pvr_csb_pack (cs_ptr, IPF_ISP_COMPRESSION_WORD_1, word1) {
3361       word1.vf_prim_msaa = 0U;
3362       word1.vf_prim_id_pres = 0U;
3363       word1.vf_vertex_clipped = 0U;
3364       word1.vf_vertex_total = num_isp_vertices - 1U;
3365       word1.cf_isp_comp_fmt_z3 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3366       word1.cf_isp_comp_fmt_z2 = PVRX(IPF_COMPRESSION_FORMAT_RAW_BYTE);
3367    }
3368    cs_ptr += pvr_cmd_length(IPF_ISP_COMPRESSION_WORD_1);
3369 
3370    *cs_ptr_out = cs_ptr;
3371 }
3372 
3373 static void
pvr_isp_prim_block_index_block(const struct pvr_device_info * dev_info,uint32_t num_mappings,uint32_t ** const cs_ptr_out)3374 pvr_isp_prim_block_index_block(const struct pvr_device_info *dev_info,
3375                                uint32_t num_mappings,
3376                                uint32_t **const cs_ptr_out)
3377 {
3378    uint32_t *cs_ptr = *cs_ptr_out;
3379 
3380    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3381       for (uint32_t i = 0U; i < DIV_ROUND_UP(num_mappings, 2U); i++) {
3382          const uint32_t idx = i * 8U;
3383 
3384          pvr_csb_pack ((uint64_t *)cs_ptr,
3385                        IPF_INDEX_DATA_WORDS_SIPF,
3386                        idx_data_word) {
3387             idx_data_word.ix_triangle3_index_2 = idx + 5U;
3388             idx_data_word.ix_triangle3_index_1 = idx + 6U;
3389             idx_data_word.ix_triangle3_index_0 = idx + 7U;
3390 
3391             idx_data_word.ix_triangle2_index_2 = idx + 6U;
3392             idx_data_word.ix_triangle2_index_1 = idx + 5U;
3393             idx_data_word.ix_triangle2_index_0 = idx + 4U;
3394 
3395             idx_data_word.ix_triangle1_index_2 = idx + 1U;
3396             idx_data_word.ix_triangle1_index_1 = idx + 2U;
3397             idx_data_word.ix_triangle1_index_0 = idx + 3U;
3398 
3399             idx_data_word.ix_triangle0_index_2 = idx + 2U;
3400             idx_data_word.ix_triangle0_index_1 = idx + 1U;
3401             idx_data_word.ix_triangle0_index_0 = idx + 0U;
3402          }
3403          cs_ptr += pvr_cmd_length(IPF_INDEX_DATA_WORDS_SIPF);
3404       }
3405 
3406       *cs_ptr_out = cs_ptr;
3407       return;
3408    }
3409 
3410    for (uint32_t i = 0U, j = 0U; i < num_mappings; i++, j += 4U) {
3411       if ((i & 1U) == 0U) {
3412          pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
3413             word.ix_index0_0 = j;
3414             word.ix_index0_1 = j + 1U;
3415             word.ix_index0_2 = j + 2U;
3416             word.ix_index1_0 = j + 3U;
3417          }
3418          cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
3419 
3420          /* Don't increment cs_ptr here. IPF_INDEX_DATA is patched in the
3421           * else part and then cs_ptr is incremented.
3422           */
3423          pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
3424             word.ix_index0_0 = j + 2U;
3425             word.ix_index0_1 = j + 1U;
3426          }
3427       } else {
3428          uint32_t tmp;
3429 
3430          pvr_csb_pack (&tmp, IPF_INDEX_DATA, word) {
3431             word.ix_index0_2 = j;
3432             word.ix_index1_0 = j + 1U;
3433          }
3434          *cs_ptr |= tmp;
3435          cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
3436 
3437          pvr_csb_pack (cs_ptr, IPF_INDEX_DATA, word) {
3438             word.ix_index0_0 = j + 2U;
3439             word.ix_index0_1 = j + 3U;
3440             word.ix_index0_2 = j + 2U;
3441             word.ix_index1_0 = j + 1U;
3442          }
3443          cs_ptr += pvr_cmd_length(IPF_INDEX_DATA);
3444       }
3445    }
3446 
3447    /* The last pass didn't ++. */
3448    if ((num_mappings & 1U) != 0U)
3449       cs_ptr++;
3450 
3451    *cs_ptr_out = cs_ptr;
3452 }
3453 
3454 /* Calculates a 24 bit fixed point (biased) representation of a signed integer.
3455  */
3456 static inline VkResult
pvr_int32_to_isp_xy_vtx(const struct pvr_device_info * dev_info,int32_t val,bool bias,uint32_t * word_out)3457 pvr_int32_to_isp_xy_vtx(const struct pvr_device_info *dev_info,
3458                         int32_t val,
3459                         bool bias,
3460                         uint32_t *word_out)
3461 {
3462    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3463       const uint32_t max_fractional = PVRX(IPF_ISP_VERTEX_XY_SIPF_FRAC_MAX_VAL);
3464       const uint32_t max_integer = PVRX(IPF_ISP_VERTEX_XY_SIPF_INTEGER_MAX_VAL);
3465 
3466       uint32_t fractional;
3467       uint32_t integer;
3468 
3469       if (bias)
3470          val += PVRX(IPF_ISP_VERTEX_XY_BIAS_VALUE_SIPF);
3471 
3472       if (val < 0 || val > max_integer + 1) {
3473          mesa_loge("ISP vertex xy value out of range.");
3474          return vk_error(NULL, VK_ERROR_UNKNOWN);
3475       }
3476 
3477       if (val <= max_integer) {
3478          integer = val;
3479          fractional = 0;
3480       } else if (val == max_integer + 1) {
3481          /* The integer field is 13 bits long so the max value is
3482           * 2 ^ 13 - 1 = 8191. For 8k support we need to handle 8192 so we set
3483           * all fractional bits to get as close as possible. The best we can do
3484           * is: 0x1FFF.F = 8191.9375 ≈ 8192 .
3485           */
3486          integer = max_integer;
3487          fractional = max_fractional;
3488       }
3489 
3490       pvr_csb_pack (word_out, IPF_ISP_VERTEX_XY_SIPF, word) {
3491          word.integer = integer;
3492          word.frac = fractional;
3493       }
3494 
3495       return VK_SUCCESS;
3496    }
3497 
3498    val += PVRX(IPF_ISP_VERTEX_XY_BIAS_VALUE);
3499 
3500    if (((uint32_t)val & 0x7fff8000U) != 0U)
3501       return vk_error(NULL, VK_ERROR_UNKNOWN);
3502 
3503    pvr_csb_pack (word_out, IPF_ISP_VERTEX_XY, word) {
3504       word.sign = val < 0;
3505       word.integer = val;
3506    }
3507 
3508    return VK_SUCCESS;
3509 }
3510 
3511 static VkResult
pvr_isp_prim_block_isp_vertices(const struct pvr_device_info * dev_info,struct pvr_transfer_3d_state * state,struct pvr_rect_mapping * mappings,uint32_t num_mappings,uint32_t mapping_offset,uint32_t ** const cs_ptr_out)3512 pvr_isp_prim_block_isp_vertices(const struct pvr_device_info *dev_info,
3513                                 struct pvr_transfer_3d_state *state,
3514                                 struct pvr_rect_mapping *mappings,
3515                                 uint32_t num_mappings,
3516                                 uint32_t mapping_offset,
3517                                 uint32_t **const cs_ptr_out)
3518 {
3519    uint32_t *cs_ptr = *cs_ptr_out;
3520    bool bias = true;
3521    uint32_t i;
3522 
3523    if (PVR_HAS_FEATURE(dev_info, screen_size8K))
3524       bias = state->width_in_tiles <= 256U && state->height_in_tiles <= 256U;
3525 
3526    for (i = mapping_offset; i < mapping_offset + num_mappings; i++) {
3527       uint32_t bottom = 0U;
3528       uint32_t right = 0U;
3529       uint32_t left = 0U;
3530       uint32_t top = 0U;
3531       VkResult result;
3532 
3533       /* ISP vertex data (X, Y, Z). */
3534       result = pvr_int32_to_isp_xy_vtx(dev_info,
3535                                        mappings[i].dst_rect.offset.y,
3536                                        bias,
3537                                        &top);
3538       if (result != VK_SUCCESS)
3539          return result;
3540 
3541       result = pvr_int32_to_isp_xy_vtx(dev_info,
3542                                        mappings[i].dst_rect.offset.y +
3543                                           mappings[i].dst_rect.extent.height,
3544                                        bias,
3545                                        &bottom);
3546       if (result != VK_SUCCESS)
3547          return result;
3548 
3549       result = pvr_int32_to_isp_xy_vtx(dev_info,
3550                                        mappings[i].dst_rect.offset.x,
3551                                        bias,
3552                                        &left);
3553       if (result != VK_SUCCESS)
3554          return result;
3555 
3556       result = pvr_int32_to_isp_xy_vtx(dev_info,
3557                                        mappings[i].dst_rect.offset.x +
3558                                           mappings[i].dst_rect.extent.width,
3559                                        bias,
3560                                        &right);
3561       if (result != VK_SUCCESS)
3562          return result;
3563 
3564       if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3565          pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3566             word.y = top;
3567             word.x = left;
3568          }
3569          cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3570 
3571          pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3572             word.y = top;
3573             word.x = right;
3574          }
3575          cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3576 
3577          pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3578             word.y = bottom;
3579             word.x = left;
3580          }
3581          cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3582 
3583          pvr_csb_pack ((uint64_t *)cs_ptr, IPF_ISP_VERTEX_WORD_SIPF, word) {
3584             word.y = bottom;
3585             word.x = right;
3586          }
3587          cs_ptr += pvr_cmd_length(IPF_ISP_VERTEX_WORD_SIPF);
3588 
3589          continue;
3590       }
3591 
3592       /* ISP vertices 0 and 1. */
3593       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_0, word0) {
3594          word0.x0 = left;
3595          word0.y0 = top & 0xFF;
3596       }
3597       cs_ptr++;
3598 
3599       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_1, word1) {
3600          word1.y0 = top >> PVRX(IPF_ISP_VERTEX_WORD_1_Y0_SHIFT);
3601       }
3602       cs_ptr++;
3603 
3604       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_2, word2) {
3605          word2.x1 = right & 0xFFFF;
3606          word2.z0 = 0U;
3607       }
3608       cs_ptr++;
3609 
3610       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_3, word3) {
3611          word3.x1 = right >> PVRX(IPF_ISP_VERTEX_WORD_3_X1_SHIFT);
3612          word3.y1 = top;
3613       }
3614       cs_ptr++;
3615 
3616       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_4, word4) {
3617          word4.z1 = 0U;
3618       }
3619       cs_ptr++;
3620 
3621       /* ISP vertices 2 and 3. */
3622       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_0, word0) {
3623          word0.x0 = left;
3624          word0.y0 = bottom & 0xFF;
3625       }
3626       cs_ptr++;
3627 
3628       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_1, word1) {
3629          word1.y0 = bottom >> PVRX(IPF_ISP_VERTEX_WORD_1_Y0_SHIFT);
3630       }
3631       cs_ptr++;
3632 
3633       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_2, word2) {
3634          word2.x1 = right & 0xFFFF;
3635          word2.z0 = 0U;
3636       }
3637       cs_ptr++;
3638 
3639       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_3, word3) {
3640          word3.x1 = right >> PVRX(IPF_ISP_VERTEX_WORD_3_X1_SHIFT);
3641          word3.y1 = bottom;
3642       }
3643       cs_ptr++;
3644 
3645       pvr_csb_pack (cs_ptr, IPF_ISP_VERTEX_WORD_4, word4) {
3646          word4.z1 = 0U;
3647       }
3648       cs_ptr++;
3649    }
3650    *cs_ptr_out = cs_ptr;
3651 
3652    return VK_SUCCESS;
3653 }
3654 
3655 static uint32_t
pvr_isp_primitive_block_size(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd_source * src,uint32_t num_mappings)3656 pvr_isp_primitive_block_size(const struct pvr_device_info *dev_info,
3657                              const struct pvr_transfer_cmd_source *src,
3658                              uint32_t num_mappings)
3659 {
3660    uint32_t num_isp_vertices = num_mappings * 4U;
3661    uint32_t num_tsp_vertices_per_isp_vertex;
3662    uint32_t isp_vertex_data_size_dw;
3663    bool color_fill = (src == NULL);
3664    uint32_t tsp_comp_format_dw;
3665    uint32_t isp_state_size_dw;
3666    uint32_t pds_state_size_dw;
3667    uint32_t idx_data_size_dw;
3668    uint32_t tsp_data_size;
3669    uint32_t stream_size;
3670 
3671    if (color_fill) {
3672       num_tsp_vertices_per_isp_vertex = 0U;
3673    } else {
3674       num_tsp_vertices_per_isp_vertex =
3675          src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED ? 4U : 2U;
3676    }
3677 
3678    tsp_data_size = PVR_DW_TO_BYTES(num_isp_vertices * PVR_TRANSFER_NUM_LAYERS *
3679                                    num_tsp_vertices_per_isp_vertex);
3680 
3681    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3682       /* An XYZ vertex is 16/16/32 bits => 8 bytes. */
3683       isp_vertex_data_size_dw = num_isp_vertices * 2U;
3684 
3685       /* Round to even for 64 bit boundary. */
3686       idx_data_size_dw = ALIGN_POT(num_mappings, 2U);
3687       tsp_comp_format_dw = 0U;
3688       isp_state_size_dw = 4U;
3689       pds_state_size_dw = 8U;
3690    } else {
3691       tsp_comp_format_dw = color_fill ? 0U : PVR_TRANSFER_NUM_LAYERS;
3692 
3693       if (!color_fill) {
3694          if (src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
3695             tsp_comp_format_dw *= 2U;
3696       }
3697 
3698       /* An XYZ vertex is 24/24/32 bits => 10 bytes with last padded to 4 byte
3699        * burst align.
3700        */
3701       isp_vertex_data_size_dw = DIV_ROUND_UP(num_isp_vertices * 10U, 4U);
3702 
3703       /* 4 triangles fit in 3 dw: t0t0t0t1_t1t1t2t2_t2t3t3t3. */
3704       idx_data_size_dw = num_mappings + DIV_ROUND_UP(num_mappings, 2U);
3705       isp_state_size_dw = 5U;
3706       pds_state_size_dw = 7U;
3707    }
3708 
3709    stream_size =
3710       tsp_data_size + PVR_DW_TO_BYTES(idx_data_size_dw + tsp_comp_format_dw +
3711                                       isp_vertex_data_size_dw +
3712                                       isp_state_size_dw + pds_state_size_dw);
3713 
3714    return stream_size;
3715 }
3716 
3717 static VkResult
pvr_isp_primitive_block(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,const struct pvr_transfer_cmd_source * src,bool custom_filter,struct pvr_rect_mapping * mappings,uint32_t num_mappings,uint32_t mapping_offset,bool read_bgnd,uint32_t * cs_start_offset,uint32_t ** cs_ptr_out)3718 pvr_isp_primitive_block(const struct pvr_device_info *dev_info,
3719                         struct pvr_transfer_ctx *ctx,
3720                         const struct pvr_transfer_cmd *transfer_cmd,
3721                         struct pvr_transfer_prep_data *prep_data,
3722                         const struct pvr_transfer_cmd_source *src,
3723                         bool custom_filter,
3724                         struct pvr_rect_mapping *mappings,
3725                         uint32_t num_mappings,
3726                         uint32_t mapping_offset,
3727                         bool read_bgnd,
3728                         uint32_t *cs_start_offset,
3729                         uint32_t **cs_ptr_out)
3730 {
3731    struct pvr_transfer_3d_state *state = &prep_data->state;
3732    uint32_t num_isp_vertices = num_mappings * 4U;
3733    uint32_t num_tsp_vertices_per_isp_vert;
3734    uint32_t tsp_data_size_in_bytes;
3735    uint32_t tsp_comp_format_in_dw;
3736    bool color_fill = src == NULL;
3737    uint32_t stream_size_in_bytes;
3738    uint32_t *cs_ptr_start;
3739    VkResult result;
3740 
3741    if (color_fill) {
3742       num_tsp_vertices_per_isp_vert = 0U;
3743    } else {
3744       num_tsp_vertices_per_isp_vert =
3745          src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED ? 4U : 2U;
3746    }
3747 
3748    tsp_data_size_in_bytes =
3749       PVR_DW_TO_BYTES(num_isp_vertices * PVR_TRANSFER_NUM_LAYERS *
3750                       num_tsp_vertices_per_isp_vert);
3751 
3752    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3753       tsp_comp_format_in_dw = 0U;
3754    } else {
3755       tsp_comp_format_in_dw = color_fill ? 0U : PVR_TRANSFER_NUM_LAYERS;
3756 
3757       if (!color_fill && src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
3758          tsp_comp_format_in_dw *= 2U;
3759    }
3760 
3761    stream_size_in_bytes =
3762       pvr_isp_primitive_block_size(dev_info, src, num_mappings);
3763 
3764    cs_ptr_start = *cs_ptr_out;
3765 
3766    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
3767       /* This includes:
3768        *    Vertex formats.
3769        *    ISP state words.
3770        */
3771       pvr_isp_prim_block_isp_state(dev_info,
3772                                    tsp_comp_format_in_dw,
3773                                    tsp_data_size_in_bytes,
3774                                    num_isp_vertices,
3775                                    read_bgnd,
3776                                    cs_ptr_out);
3777 
3778       /* This include:
3779        *    Index data / point pitch.
3780        */
3781       pvr_isp_prim_block_index_block(dev_info, num_mappings, cs_ptr_out);
3782 
3783       result = pvr_isp_prim_block_isp_vertices(dev_info,
3784                                                state,
3785                                                mappings,
3786                                                num_mappings,
3787                                                mapping_offset,
3788                                                cs_ptr_out);
3789       if (result != VK_SUCCESS)
3790          return result;
3791 
3792       pvr_isp_prim_block_pds_state(dev_info, ctx, state, cs_ptr_out);
3793 
3794       if (!color_fill) {
3795          /* This includes:
3796           *    TSP vertex formats.
3797           */
3798          pvr_isp_prim_block_tsp_vertex_block(dev_info,
3799                                              src,
3800                                              mappings,
3801                                              custom_filter,
3802                                              num_mappings,
3803                                              mapping_offset,
3804                                              tsp_comp_format_in_dw,
3805                                              cs_ptr_out);
3806       }
3807 
3808       *cs_start_offset = 0;
3809    } else {
3810       if (!color_fill) {
3811          /* This includes:
3812           *    Compressed TSP vertex data & tables.
3813           *    Primitive id.
3814           *    TSP compression formats.
3815           */
3816          pvr_isp_prim_block_tsp_vertex_block(dev_info,
3817                                              src,
3818                                              mappings,
3819                                              custom_filter,
3820                                              num_mappings,
3821                                              mapping_offset,
3822                                              tsp_comp_format_in_dw,
3823                                              cs_ptr_out);
3824       }
3825 
3826       pvr_isp_prim_block_pds_state(dev_info, ctx, state, cs_ptr_out);
3827 
3828       /* Point the CS_PRIM_BASE here. */
3829       *cs_start_offset = (*cs_ptr_out - cs_ptr_start) * sizeof(cs_ptr_start[0]);
3830 
3831       /* This includes:
3832        *    ISP state words.
3833        *    Compression size word.
3834        *    ISP compression and vertex formats.
3835        */
3836       pvr_isp_prim_block_isp_state(dev_info,
3837                                    tsp_comp_format_in_dw,
3838                                    tsp_data_size_in_bytes,
3839                                    num_isp_vertices,
3840                                    read_bgnd,
3841                                    cs_ptr_out);
3842 
3843       pvr_isp_prim_block_index_block(dev_info, num_mappings, cs_ptr_out);
3844 
3845       result = pvr_isp_prim_block_isp_vertices(dev_info,
3846                                                state,
3847                                                mappings,
3848                                                num_mappings,
3849                                                mapping_offset,
3850                                                cs_ptr_out);
3851       if (result != VK_SUCCESS)
3852          return result;
3853    }
3854 
3855    assert((*cs_ptr_out - cs_ptr_start) * sizeof(cs_ptr_start[0]) ==
3856           stream_size_in_bytes);
3857 
3858    return VK_SUCCESS;
3859 }
3860 
3861 static inline uint32_t
pvr_transfer_prim_blocks_per_alloc(const struct pvr_device_info * dev_info)3862 pvr_transfer_prim_blocks_per_alloc(const struct pvr_device_info *dev_info)
3863 {
3864    uint32_t ret = PVR_DW_TO_BYTES(PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS));
3865 
3866    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
3867       return ret / sizeof(uint64_t) / 2U;
3868 
3869    return ret / sizeof(uint32_t) / 2U - 1U;
3870 }
3871 
3872 static inline uint32_t
pvr_transfer_max_quads_per_pb(const struct pvr_device_info * dev_info)3873 pvr_transfer_max_quads_per_pb(const struct pvr_device_info *dev_info)
3874 {
3875    return PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U
3876                                                                       : 16U;
3877 }
3878 
pvr_isp_ctrl_stream_sipf_write_aligned(uint8_t * stream,uint32_t data,uint32_t size)3879 static inline uint8_t *pvr_isp_ctrl_stream_sipf_write_aligned(uint8_t *stream,
3880                                                               uint32_t data,
3881                                                               uint32_t size)
3882 {
3883    const uint32_t offset = (uintptr_t)stream & 0x3U;
3884    uint32_t *aligned_stream = (uint32_t *)(stream - offset);
3885    const uint32_t current_data = *aligned_stream & ((1U << (offset * 8U)) - 1U);
3886 
3887    assert(size > 0 && size <= 4U);
3888 
3889    *aligned_stream = current_data | data << (offset * 8U);
3890 
3891    if (offset + size > 4U) {
3892       aligned_stream++;
3893       *aligned_stream = data >> ((4U - offset) * 8);
3894    }
3895 
3896    return stream + size;
3897 }
3898 
3899 /**
3900  * Writes ISP ctrl stream.
3901  *
3902  * We change sampler/texture state when we process a new TQ source. The
3903  * primitive block contains the shader pointers, but we supply the primitive
3904  * blocks with shaders from here.
3905  */
pvr_isp_ctrl_stream(const struct pvr_device_info * dev_info,struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data)3906 static VkResult pvr_isp_ctrl_stream(const struct pvr_device_info *dev_info,
3907                                     struct pvr_transfer_ctx *ctx,
3908                                     struct pvr_transfer_cmd *transfer_cmd,
3909                                     struct pvr_transfer_prep_data *prep_data)
3910 {
3911    const uint32_t max_mappings_per_pb = pvr_transfer_max_quads_per_pb(dev_info);
3912    bool fill_blit = (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U;
3913    uint32_t free_ctrl_stream_words = PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS);
3914    struct pvr_transfer_3d_state *const state = &prep_data->state;
3915    struct pvr_winsys_transfer_regs *const regs = &state->regs;
3916    struct pvr_transfer_pass *pass = NULL;
3917    uint32_t flags = transfer_cmd->flags;
3918    struct pvr_suballoc_bo *pvr_cs_bo;
3919    pvr_dev_addr_t stream_base_vaddr;
3920    uint32_t num_prim_blks = 0U;
3921    uint32_t prim_blk_size = 0U;
3922    uint32_t region_arrays_size;
3923    uint32_t num_region_arrays;
3924    uint32_t total_stream_size;
3925    bool was_linked = false;
3926    uint32_t rem_mappings;
3927    uint32_t num_sources;
3928    uint32_t *blk_cs_ptr;
3929    uint32_t *cs_ptr;
3930    uint32_t source;
3931    VkResult result;
3932 
3933    if (state->custom_mapping.pass_count > 0U) {
3934       pass = &state->custom_mapping.passes[state->pass_idx];
3935 
3936       num_sources = pass->source_count;
3937 
3938       for (source = 0; source < num_sources; source++) {
3939          uint32_t num_mappings = pass->sources[source].mapping_count;
3940 
3941          while (num_mappings > 0U) {
3942             if (fill_blit) {
3943                prim_blk_size += pvr_isp_primitive_block_size(
3944                   dev_info,
3945                   NULL,
3946                   MIN2(max_mappings_per_pb, num_mappings));
3947             }
3948 
3949             if (transfer_cmd->source_count > 0) {
3950                prim_blk_size += pvr_isp_primitive_block_size(
3951                   dev_info,
3952                   &transfer_cmd->sources[source],
3953                   MIN2(max_mappings_per_pb, num_mappings));
3954             }
3955 
3956             num_mappings -= MIN2(max_mappings_per_pb, num_mappings);
3957             num_prim_blks++;
3958          }
3959       }
3960    } else {
3961       num_sources = fill_blit ? 1U : transfer_cmd->source_count;
3962 
3963       if (fill_blit) {
3964          num_prim_blks = 1U;
3965          prim_blk_size +=
3966             pvr_isp_primitive_block_size(dev_info,
3967                                          NULL,
3968                                          MIN2(max_mappings_per_pb, 1U));
3969 
3970          /* Fill blits can also have a source; fallthrough to handle. */
3971       }
3972 
3973       for (source = 0; source < transfer_cmd->source_count; source++) {
3974          uint32_t num_mappings = transfer_cmd->sources[source].mapping_count;
3975 
3976          while (num_mappings > 0U) {
3977             prim_blk_size += pvr_isp_primitive_block_size(
3978                dev_info,
3979                &transfer_cmd->sources[source],
3980                MIN2(max_mappings_per_pb, num_mappings));
3981 
3982             num_mappings -= MIN2(max_mappings_per_pb, num_mappings);
3983             num_prim_blks++;
3984          }
3985       }
3986    }
3987 
3988    num_region_arrays =
3989       (num_prim_blks + (pvr_transfer_prim_blocks_per_alloc(dev_info) - 1U)) /
3990       pvr_transfer_prim_blocks_per_alloc(dev_info);
3991    region_arrays_size = PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS) *
3992                         sizeof(uint32_t) * num_region_arrays;
3993    total_stream_size = region_arrays_size + prim_blk_size;
3994 
3995    /* Allocate space for IPF control stream. */
3996    result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
3997                                      ctx->device->heaps.transfer_frag_heap,
3998                                      total_stream_size,
3999                                      &pvr_cs_bo);
4000    if (result != VK_SUCCESS)
4001       return result;
4002 
4003    stream_base_vaddr =
4004       PVR_DEV_ADDR(pvr_cs_bo->dev_addr.addr -
4005                    ctx->device->heaps.transfer_frag_heap->base_addr.addr);
4006 
4007    cs_ptr = pvr_bo_suballoc_get_map_addr(pvr_cs_bo);
4008    blk_cs_ptr = cs_ptr + region_arrays_size / sizeof(uint32_t);
4009 
4010    source = 0;
4011    while (source < num_sources) {
4012       if (fill_blit)
4013          rem_mappings = pass ? pass->sources[source].mapping_count : 1U;
4014       else
4015          rem_mappings = transfer_cmd->sources[source].mapping_count;
4016 
4017       if ((transfer_cmd->source_count > 0 || fill_blit) && rem_mappings != 0U) {
4018          struct pvr_pds_pixel_shader_sa_program unitex_pds_prog = { 0U };
4019          struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[source];
4020          struct pvr_rect_mapping fill_mapping;
4021          uint32_t mapping_offset = 0U;
4022          bool read_bgnd = false;
4023 
4024          if (fill_blit) {
4025             uint32_t packed_color[4U] = { 0U };
4026 
4027             if (vk_format_is_compressed(transfer_cmd->dst.vk_format)) {
4028                return vk_error(transfer_cmd->cmd_buffer,
4029                                VK_ERROR_FORMAT_NOT_SUPPORTED);
4030             }
4031 
4032             state->pds_shader_task_offset = 0U;
4033             state->uni_tex_code_offset = 0U;
4034             state->tex_state_data_offset = 0U;
4035             state->common_ptr = 0U;
4036 
4037             result = pvr_pack_clear_color(transfer_cmd->dst.vk_format,
4038                                           transfer_cmd->clear_color,
4039                                           packed_color);
4040             if (result != VK_SUCCESS)
4041                return result;
4042 
4043             fill_mapping.dst_rect = transfer_cmd->scissor;
4044 
4045             pvr_csb_pack (&regs->usc_clear_register0,
4046                           CR_USC_CLEAR_REGISTER,
4047                           reg) {
4048                reg.val = packed_color[0U];
4049             }
4050 
4051             pvr_csb_pack (&regs->usc_clear_register1,
4052                           CR_USC_CLEAR_REGISTER,
4053                           reg) {
4054                reg.val = packed_color[1U];
4055             }
4056 
4057             pvr_csb_pack (&regs->usc_clear_register2,
4058                           CR_USC_CLEAR_REGISTER,
4059                           reg) {
4060                reg.val = packed_color[2U];
4061             }
4062 
4063             pvr_csb_pack (&regs->usc_clear_register3,
4064                           CR_USC_CLEAR_REGISTER,
4065                           reg) {
4066                reg.val = packed_color[3U];
4067             }
4068 
4069             state->pds_shader_task_offset =
4070                transfer_cmd->cmd_buffer->device->nop_program.pds.data_offset;
4071 
4072             unitex_pds_prog.kick_usc = false;
4073             unitex_pds_prog.clear = false;
4074          } else {
4075             const bool down_scale = transfer_cmd->sources[source].resolve_op ==
4076                                        PVR_RESOLVE_BLEND &&
4077                                     src->surface.sample_count > 1U &&
4078                                     transfer_cmd->dst.sample_count <= 1U;
4079             struct pvr_tq_shader_properties *shader_props =
4080                &state->shader_props;
4081             struct pvr_tq_layer_properties *layer = &shader_props->layer_props;
4082             const struct pvr_tq_frag_sh_reg_layout *sh_reg_layout;
4083             enum pvr_transfer_pbe_pixel_src pbe_src_format;
4084             struct pvr_suballoc_bo *pvr_bo;
4085             uint32_t tex_state_dma_size;
4086             pvr_dev_addr_t dev_offset;
4087 
4088             /* Reset the shared register bank ptrs each src implies new texture
4089              * state (Note that we don't change texture state per prim block).
4090              */
4091             state->common_ptr = 0U;
4092             state->usc_const_reg_ptr = 0U;
4093             /* We don't use state->dynamic_const_reg_ptr here. */
4094 
4095             if (flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE)
4096                read_bgnd = true;
4097 
4098             result = pvr_pbe_src_format_f2d(flags,
4099                                             src,
4100                                             transfer_cmd->dst.vk_format,
4101                                             down_scale,
4102                                             state->dont_force_pbe,
4103                                             &pbe_src_format);
4104             if (result != VK_SUCCESS)
4105                return result;
4106 
4107             memset(shader_props, 0U, sizeof(*shader_props));
4108 
4109             layer->pbe_format = pbe_src_format;
4110             layer->sample =
4111                (src->surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED);
4112             shader_props->iterated = true;
4113 
4114             shader_props->pick_component =
4115                pvr_pick_component_needed(&state->custom_mapping);
4116 
4117             result = pvr_msaa_state(dev_info, transfer_cmd, state, source);
4118             if (result != VK_SUCCESS)
4119                return result;
4120 
4121             if (state->filter[source] == PVR_FILTER_LINEAR &&
4122                 pvr_requires_usc_linear_filter(src->surface.vk_format)) {
4123                if (pvr_int_pbe_usc_linear_filter(layer->pbe_format,
4124                                                  layer->sample,
4125                                                  layer->msaa,
4126                                                  shader_props->full_rate)) {
4127                   layer->linear = true;
4128                } else {
4129                   mesa_logw("Transfer: F32 linear filter not supported.");
4130                }
4131             }
4132 
4133             result = pvr_transfer_frag_store_get_shader_info(
4134                transfer_cmd->cmd_buffer->device,
4135                &ctx->frag_store,
4136                shader_props,
4137                &dev_offset,
4138                &sh_reg_layout);
4139             if (result != VK_SUCCESS)
4140                return result;
4141 
4142             assert(dev_offset.addr <= UINT32_MAX);
4143             prep_data->state.pds_shader_task_offset = (uint32_t)dev_offset.addr;
4144 
4145             result =
4146                pvr_pds_coeff_task(ctx, transfer_cmd, layer->sample, prep_data);
4147             if (result != VK_SUCCESS)
4148                return result;
4149 
4150             unitex_pds_prog.kick_usc = false;
4151             unitex_pds_prog.clear = false;
4152 
4153             tex_state_dma_size =
4154                sh_reg_layout->driver_total + sh_reg_layout->compiler_out_total;
4155 
4156             unitex_pds_prog.num_texture_dma_kicks = 1U;
4157             unitex_pds_prog.num_uniform_dma_kicks = 0U;
4158 
4159             /* Allocate memory for DMA. */
4160             result = pvr_cmd_buffer_alloc_mem(transfer_cmd->cmd_buffer,
4161                                               ctx->device->heaps.general_heap,
4162                                               tex_state_dma_size << 2U,
4163                                               &pvr_bo);
4164             if (result != VK_SUCCESS)
4165                return result;
4166 
4167             result = pvr_sampler_state_for_surface(
4168                dev_info,
4169                &transfer_cmd->sources[source].surface,
4170                state->filter[source],
4171                sh_reg_layout,
4172                0U,
4173                pvr_bo_suballoc_get_map_addr(pvr_bo));
4174             if (result != VK_SUCCESS)
4175                return result;
4176 
4177             result = pvr_image_state_for_surface(
4178                ctx,
4179                transfer_cmd,
4180                &transfer_cmd->sources[source].surface,
4181                0U,
4182                source,
4183                sh_reg_layout,
4184                state,
4185                0U,
4186                pvr_bo_suballoc_get_map_addr(pvr_bo));
4187             if (result != VK_SUCCESS)
4188                return result;
4189 
4190             pvr_pds_encode_dma_burst(unitex_pds_prog.texture_dma_control,
4191                                      unitex_pds_prog.texture_dma_address,
4192                                      state->common_ptr,
4193                                      tex_state_dma_size,
4194                                      pvr_bo->dev_addr.addr,
4195                                      true,
4196                                      dev_info);
4197 
4198             state->common_ptr += tex_state_dma_size;
4199 
4200             pvr_write_usc_constants(sh_reg_layout,
4201                                     pvr_bo_suballoc_get_map_addr(pvr_bo));
4202 
4203             if (pvr_pick_component_needed(&state->custom_mapping)) {
4204                pvr_dma_texel_unwind(state,
4205                                     sh_reg_layout,
4206                                     pvr_bo_suballoc_get_map_addr(pvr_bo));
4207             }
4208          }
4209 
4210          result = pvr_pds_unitex(dev_info,
4211                                  ctx,
4212                                  transfer_cmd,
4213                                  &unitex_pds_prog,
4214                                  prep_data);
4215          if (result != VK_SUCCESS)
4216             return result;
4217 
4218          while (rem_mappings > 0U) {
4219             const uint32_t min_free_ctrl_stream_words =
4220                PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 2
4221                                                                            : 3;
4222             const uint32_t num_mappings =
4223                MIN2(max_mappings_per_pb, rem_mappings);
4224             struct pvr_rect_mapping *mappings = NULL;
4225             uint32_t stream_start_offset = 0U;
4226             pvr_dev_addr_t prim_blk_addr;
4227 
4228             if (free_ctrl_stream_words < min_free_ctrl_stream_words) {
4229                pvr_dev_addr_t next_region_array_vaddr = stream_base_vaddr;
4230 
4231                num_region_arrays++;
4232                next_region_array_vaddr.addr +=
4233                   num_region_arrays *
4234                   PVR_DW_TO_BYTES(PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS));
4235 
4236                if (PVR_HAS_FEATURE(dev_info,
4237                                    simple_internal_parameter_format_v2)) {
4238                   uint32_t link_addr;
4239 
4240                   pvr_csb_pack (&link_addr,
4241                                 IPF_CONTROL_STREAM_LINK_SIPF2,
4242                                 control_stream) {
4243                      control_stream.cs_ctrl_type =
4244                         PVRX(IPF_CS_CTRL_TYPE_SIPF2_LINK);
4245                      control_stream.cs_link.addr = next_region_array_vaddr.addr;
4246                   }
4247 
4248                   pvr_isp_ctrl_stream_sipf_write_aligned(
4249                      (uint8_t *)cs_ptr,
4250                      link_addr,
4251                      PVR_DW_TO_BYTES(
4252                         pvr_cmd_length(IPF_CONTROL_STREAM_LINK_SIPF2)));
4253                } else {
4254                   pvr_csb_pack (cs_ptr, IPF_CONTROL_STREAM, control_stream) {
4255                      control_stream.cs_type = PVRX(IPF_CS_TYPE_LINK);
4256                      control_stream.cs_link.addr = next_region_array_vaddr.addr;
4257                   }
4258                }
4259 
4260                cs_ptr =
4261                   (uint32_t *)pvr_bo_suballoc_get_map_addr(pvr_cs_bo) +
4262                   num_region_arrays * PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS);
4263                free_ctrl_stream_words = PVRX(IPF_CONTROL_STREAM_SIZE_DWORDS);
4264 
4265                was_linked = PVR_HAS_FEATURE(dev_info, ipf_creq_pf);
4266             }
4267 
4268             if (fill_blit)
4269                mappings = pass ? pass->sources[source].mappings : &fill_mapping;
4270             else
4271                mappings = transfer_cmd->sources[source].mappings;
4272 
4273             prim_blk_addr = stream_base_vaddr;
4274             prim_blk_addr.addr +=
4275                (uintptr_t)blk_cs_ptr -
4276                (uintptr_t)pvr_bo_suballoc_get_map_addr(pvr_cs_bo);
4277 
4278             result = pvr_isp_primitive_block(dev_info,
4279                                              ctx,
4280                                              transfer_cmd,
4281                                              prep_data,
4282                                              fill_blit ? NULL : src,
4283                                              state->custom_filter,
4284                                              mappings,
4285                                              num_mappings,
4286                                              mapping_offset,
4287                                              read_bgnd,
4288                                              &stream_start_offset,
4289                                              &blk_cs_ptr);
4290             if (result != VK_SUCCESS)
4291                return result;
4292 
4293             prim_blk_addr.addr += stream_start_offset;
4294 
4295             if (PVR_HAS_FEATURE(dev_info,
4296                                 simple_internal_parameter_format_v2)) {
4297                uint8_t *cs_byte_ptr = (uint8_t *)cs_ptr;
4298                uint32_t tmp;
4299 
4300                /* This part of the control stream is byte granular. */
4301 
4302                pvr_csb_pack (&tmp, IPF_PRIMITIVE_HEADER_SIPF2, prim_header) {
4303                   prim_header.cs_prim_base_size = 1;
4304                   prim_header.cs_mask_num_bytes = 1;
4305                   prim_header.cs_valid_tile0 = true;
4306                }
4307                cs_byte_ptr =
4308                   pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 1);
4309 
4310                pvr_csb_pack (&tmp, IPF_PRIMITIVE_BASE_SIPF2, word) {
4311                   word.cs_prim_base = prim_blk_addr;
4312                }
4313                cs_byte_ptr =
4314                   pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 4);
4315 
4316                /* IPF_BYTE_BASED_MASK_ONE_BYTE_WORD_0_SIPF2 since
4317                 * IPF_PRIMITIVE_HEADER_SIPF2.cs_mask_num_bytes == 1.
4318                 */
4319                pvr_csb_pack (&tmp,
4320                              IPF_BYTE_BASED_MASK_ONE_BYTE_WORD_0_SIPF2,
4321                              mask) {
4322                   switch (num_mappings) {
4323                   case 4:
4324                      mask.cs_mask_one_byte_tile0_7 = true;
4325                      mask.cs_mask_one_byte_tile0_6 = true;
4326                      FALLTHROUGH;
4327                   case 3:
4328                      mask.cs_mask_one_byte_tile0_5 = true;
4329                      mask.cs_mask_one_byte_tile0_4 = true;
4330                      FALLTHROUGH;
4331                   case 2:
4332                      mask.cs_mask_one_byte_tile0_3 = true;
4333                      mask.cs_mask_one_byte_tile0_2 = true;
4334                      FALLTHROUGH;
4335                   case 1:
4336                      mask.cs_mask_one_byte_tile0_1 = true;
4337                      mask.cs_mask_one_byte_tile0_0 = true;
4338                      break;
4339                   default:
4340                      /* Unreachable since we clamped the value earlier so
4341                       * reaching this is an implementation error.
4342                       */
4343                      unreachable("num_mapping exceeded max_mappings_per_pb");
4344                      break;
4345                   }
4346                }
4347                /* Only 1 byte since there's only 1 valid tile within the single
4348                 * IPF_BYTE_BASED_MASK_ONE_BYTE_WORD_0_SIPF2 mask.
4349                 * ROGUE_IPF_PRIMITIVE_HEADER_SIPF2.cs_valid_tile0 == true.
4350                 */
4351                cs_byte_ptr =
4352                   pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 1);
4353 
4354                cs_ptr = (uint32_t *)cs_byte_ptr;
4355 
4356                free_ctrl_stream_words -= 2;
4357             } else {
4358                pvr_csb_pack (cs_ptr, IPF_PRIMITIVE_FORMAT, word) {
4359                   word.cs_type = PVRX(IPF_CS_TYPE_PRIM);
4360                   word.cs_isp_state_read = true;
4361                   word.cs_isp_state_size = 2U;
4362                   word.cs_prim_total = 2U * num_mappings - 1U;
4363                   word.cs_mask_fmt = PVRX(IPF_CS_MASK_FMT_FULL);
4364                   word.cs_prim_base_pres = true;
4365                }
4366                cs_ptr += pvr_cmd_length(IPF_PRIMITIVE_FORMAT);
4367 
4368                pvr_csb_pack (cs_ptr, IPF_PRIMITIVE_BASE, word) {
4369                   word.cs_prim_base = prim_blk_addr;
4370                }
4371                cs_ptr += pvr_cmd_length(IPF_PRIMITIVE_BASE);
4372 
4373                free_ctrl_stream_words -= 2;
4374             }
4375 
4376             rem_mappings -= num_mappings;
4377             mapping_offset += num_mappings;
4378          }
4379       }
4380 
4381       source++;
4382 
4383       /* A fill blit may also have sources for normal blits. */
4384       if (fill_blit && transfer_cmd->source_count > 0) {
4385          /* Fill blit count for custom mapping equals source blit count. While
4386           * normal blits use only one fill blit.
4387           */
4388          if (state->custom_mapping.pass_count == 0 && source > num_sources) {
4389             fill_blit = false;
4390             source = 0;
4391          }
4392       }
4393    }
4394 
4395    if (PVR_HAS_FEATURE(dev_info, ipf_creq_pf))
4396       assert((num_region_arrays > 1) == was_linked);
4397 
4398    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2)) {
4399       uint8_t *cs_byte_ptr = (uint8_t *)cs_ptr;
4400       uint32_t tmp;
4401 
4402       /* clang-format off */
4403       pvr_csb_pack (&tmp, IPF_CONTROL_STREAM_TERMINATE_SIPF2, term);
4404       /* clang-format on */
4405 
4406       cs_byte_ptr = pvr_isp_ctrl_stream_sipf_write_aligned(cs_byte_ptr, tmp, 1);
4407 
4408       cs_ptr = (uint32_t *)cs_byte_ptr;
4409    } else {
4410       pvr_csb_pack (cs_ptr, IPF_CONTROL_STREAM, word) {
4411          word.cs_type = PVRX(IPF_CS_TYPE_TERM);
4412       }
4413       cs_ptr += pvr_cmd_length(IPF_CONTROL_STREAM);
4414    }
4415 
4416    pvr_csb_pack (&regs->isp_mtile_base, CR_ISP_MTILE_BASE, reg) {
4417       reg.addr =
4418          PVR_DEV_ADDR(pvr_cs_bo->dev_addr.addr -
4419                       ctx->device->heaps.transfer_frag_heap->base_addr.addr);
4420    }
4421 
4422    pvr_csb_pack (&regs->isp_render, CR_ISP_RENDER, reg) {
4423       reg.mode_type = PVRX(CR_ISP_RENDER_MODE_TYPE_FAST_2D);
4424    }
4425 
4426    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format_v2) &&
4427        PVR_HAS_FEATURE(dev_info, ipf_creq_pf)) {
4428       pvr_csb_pack (&regs->isp_rgn, CR_ISP_RGN_SIPF, isp_rgn) {
4429          /* Bit 0 in CR_ISP_RGN.cs_size_ipf_creq_pf is used to indicate the
4430           * presence of a link.
4431           */
4432          isp_rgn.cs_size_ipf_creq_pf = was_linked;
4433       }
4434    } else {
4435       /* clang-format off */
4436       pvr_csb_pack(&regs->isp_rgn, CR_ISP_RGN, isp_rgn);
4437       /* clang-format on */
4438    }
4439 
4440    return VK_SUCCESS;
4441 }
4442 
pvr_transfer_set_filter(struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_3d_state * state)4443 static void pvr_transfer_set_filter(struct pvr_transfer_cmd *transfer_cmd,
4444                                     struct pvr_transfer_3d_state *state)
4445 {
4446    for (uint32_t i = 0; i < transfer_cmd->source_count; i++) {
4447       VkRect2D *src = &transfer_cmd->sources[i].mappings[0U].src_rect;
4448       VkRect2D *dst = &transfer_cmd->sources[i].mappings[0U].dst_rect;
4449 
4450       /* If no scaling is applied to the copy region, we can use point
4451        * filtering.
4452        */
4453       if (!state->custom_filter && (src->extent.width == dst->extent.width) &&
4454           (src->extent.height == dst->extent.height))
4455          state->filter[i] = PVR_FILTER_POINT;
4456       else
4457          state->filter[i] = transfer_cmd->sources[i].filter;
4458    }
4459 }
4460 
4461 /** Generates hw resources to kick a 3D clip blit. */
pvr_3d_clip_blit(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)4462 static VkResult pvr_3d_clip_blit(struct pvr_transfer_ctx *ctx,
4463                                  struct pvr_transfer_cmd *transfer_cmd,
4464                                  struct pvr_transfer_prep_data *prep_data,
4465                                  uint32_t pass_idx,
4466                                  bool *finished_out)
4467 {
4468    struct pvr_transfer_3d_state *state = &prep_data->state;
4469    uint32_t texel_unwind_src = state->custom_mapping.texel_unwind_src;
4470    struct pvr_transfer_cmd bg_cmd = { 0U };
4471    uint32_t control_reg;
4472    VkResult result;
4473 
4474    state->dont_force_pbe = false;
4475    bg_cmd.scissor = transfer_cmd->scissor;
4476    bg_cmd.cmd_buffer = transfer_cmd->cmd_buffer;
4477    bg_cmd.flags = transfer_cmd->flags;
4478    bg_cmd.flags &=
4479       ~(PVR_TRANSFER_CMD_FLAGS_FAST2D | PVR_TRANSFER_CMD_FLAGS_FILL |
4480         PVR_TRANSFER_CMD_FLAGS_DSMERGE | PVR_TRANSFER_CMD_FLAGS_PICKD);
4481 
4482    bg_cmd.source_count = state->custom_mapping.pass_count > 0U ? 0 : 1;
4483    if (bg_cmd.source_count > 0) {
4484       struct pvr_transfer_cmd_source *src = &bg_cmd.sources[0];
4485 
4486       src->mappings[0U].src_rect = transfer_cmd->scissor;
4487       src->mappings[0U].dst_rect = transfer_cmd->scissor;
4488       src->resolve_op = PVR_RESOLVE_BLEND;
4489       src->surface = transfer_cmd->dst;
4490    }
4491 
4492    state->filter[0] = PVR_FILTER_DONTCARE;
4493    bg_cmd.dst = transfer_cmd->dst;
4494    state->custom_mapping.texel_unwind_src =
4495       state->custom_mapping.texel_unwind_dst;
4496 
4497    result =
4498       pvr_3d_copy_blit_core(ctx, &bg_cmd, prep_data, pass_idx, finished_out);
4499    if (result != VK_SUCCESS)
4500       return result;
4501 
4502    /* If the destination has 4 channels and the source has at most 2, we still
4503     * need all 4 channels from the USC into the PBE.
4504     */
4505    state->dont_force_pbe = true;
4506    state->custom_mapping.texel_unwind_src = texel_unwind_src;
4507 
4508    /* We need the viewport mask, otherwise all pixels would be disabled. */
4509    pvr_csb_pack (&control_reg, CR_ISP_BGOBJVALS, reg) {
4510       reg.mask = true;
4511    }
4512    state->regs.isp_bgobjvals |= control_reg;
4513 
4514    pvr_transfer_set_filter(transfer_cmd, state);
4515    result = pvr_isp_ctrl_stream(&ctx->device->pdevice->dev_info,
4516                                 ctx,
4517                                 transfer_cmd,
4518                                 prep_data);
4519    if (result != VK_SUCCESS)
4520       return result;
4521 
4522    /* In case of resolve M -> S, the accumulation is read from and written to a
4523     * single sampled surface. Make sure that we are resolving and we have the
4524     * right number of tiles.
4525     */
4526    if (state->down_scale) {
4527       uint64_t tmp;
4528 
4529       pvr_csb_pack (&tmp, CR_PBE_WORD0_MRT0, reg) {
4530          reg.downscale = true;
4531       }
4532       state->regs.pbe_wordx_mrty[0U] |= tmp;
4533 
4534       result = pvr_isp_tiles(ctx->device, state);
4535       if (result != VK_SUCCESS)
4536          return result;
4537    }
4538 
4539    return VK_SUCCESS;
4540 }
4541 
pvr_texel_unwind(uint32_t bpp,pvr_dev_addr_t dev_addr,bool is_input,uint32_t texel_extend,uint32_t * texel_unwind_out)4542 static bool pvr_texel_unwind(uint32_t bpp,
4543                              pvr_dev_addr_t dev_addr,
4544                              bool is_input,
4545                              uint32_t texel_extend,
4546                              uint32_t *texel_unwind_out)
4547 {
4548    uint32_t texel_unwind = 0U;
4549 
4550    for (uint32_t i = 0U; i < 16U; i++) {
4551       if (pvr_is_surface_aligned(dev_addr, is_input, bpp)) {
4552          break;
4553       } else {
4554          if (i == 15U) {
4555             return false;
4556          } else {
4557             dev_addr.addr -= (bpp / texel_extend) / 8U;
4558             texel_unwind++;
4559          }
4560       }
4561    }
4562 
4563    *texel_unwind_out = texel_unwind;
4564 
4565    return true;
4566 }
4567 
pvr_is_identity_mapping(const struct pvr_rect_mapping * mapping)4568 static bool pvr_is_identity_mapping(const struct pvr_rect_mapping *mapping)
4569 {
4570    return (mapping->src_rect.offset.x == mapping->dst_rect.offset.x &&
4571            mapping->src_rect.offset.y == mapping->dst_rect.offset.y &&
4572            mapping->src_rect.extent.width == mapping->dst_rect.extent.width &&
4573            mapping->src_rect.extent.height == mapping->dst_rect.extent.height);
4574 }
4575 
pvr_is_pbe_stride_aligned(const uint32_t stride)4576 static inline bool pvr_is_pbe_stride_aligned(const uint32_t stride)
4577 {
4578    if (stride == 1U)
4579       return true;
4580 
4581    return ((stride & (PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE) - 1U)) ==
4582            0x0U);
4583 }
4584 
4585 static struct pvr_transfer_pass *
pvr_create_pass(struct pvr_transfer_custom_mapping * custom_mapping,uint32_t dst_offset)4586 pvr_create_pass(struct pvr_transfer_custom_mapping *custom_mapping,
4587                 uint32_t dst_offset)
4588 {
4589    struct pvr_transfer_pass *pass;
4590 
4591    assert(custom_mapping->pass_count < PVR_TRANSFER_MAX_PASSES);
4592 
4593    pass = &custom_mapping->passes[custom_mapping->pass_count];
4594    pass->clip_rects_count = 0U;
4595    pass->dst_offset = dst_offset;
4596    pass->source_count = 0U;
4597 
4598    custom_mapping->pass_count++;
4599 
4600    return pass;
4601 }
4602 
4603 /* Acquire pass with given offset. If one doesn't exist, create new. */
4604 static struct pvr_transfer_pass *
pvr_acquire_pass(struct pvr_transfer_custom_mapping * custom_mapping,uint32_t dst_offset)4605 pvr_acquire_pass(struct pvr_transfer_custom_mapping *custom_mapping,
4606                  uint32_t dst_offset)
4607 {
4608    for (uint32_t i = 0U; i < custom_mapping->pass_count; i++) {
4609       if (custom_mapping->passes[i].dst_offset == dst_offset)
4610          return &custom_mapping->passes[i];
4611    }
4612 
4613    return pvr_create_pass(custom_mapping, dst_offset);
4614 }
4615 
4616 static struct pvr_transfer_wa_source *
pvr_create_source(struct pvr_transfer_pass * pass,uint32_t src_offset,bool extend_height)4617 pvr_create_source(struct pvr_transfer_pass *pass,
4618                   uint32_t src_offset,
4619                   bool extend_height)
4620 {
4621    struct pvr_transfer_wa_source *src;
4622 
4623    assert(pass->source_count < ARRAY_SIZE(pass->sources));
4624 
4625    src = &pass->sources[pass->source_count];
4626    src->mapping_count = 0U;
4627    src->extend_height = extend_height;
4628 
4629    pass->source_count++;
4630 
4631    return src;
4632 }
4633 
4634 /* Acquire source with given offset. If one doesn't exist, create new. */
4635 static struct pvr_transfer_wa_source *
pvr_acquire_source(struct pvr_transfer_pass * pass,uint32_t src_offset,bool extend_height)4636 pvr_acquire_source(struct pvr_transfer_pass *pass,
4637                    uint32_t src_offset,
4638                    bool extend_height)
4639 {
4640    for (uint32_t i = 0U; i < pass->source_count; i++) {
4641       if (pass->sources[i].src_offset == src_offset &&
4642           pass->sources[i].extend_height == extend_height)
4643          return &pass->sources[i];
4644    }
4645 
4646    return pvr_create_source(pass, src_offset, extend_height);
4647 }
4648 
pvr_remove_source(struct pvr_transfer_pass * pass,uint32_t idx)4649 static void pvr_remove_source(struct pvr_transfer_pass *pass, uint32_t idx)
4650 {
4651    assert(idx < pass->source_count);
4652 
4653    for (uint32_t i = idx; i < (pass->source_count - 1U); i++)
4654       pass->sources[i] = pass->sources[i + 1U];
4655 
4656    pass->source_count--;
4657 }
4658 
pvr_remove_mapping(struct pvr_transfer_wa_source * src,uint32_t idx)4659 static void pvr_remove_mapping(struct pvr_transfer_wa_source *src, uint32_t idx)
4660 {
4661    assert(idx < src->mapping_count);
4662 
4663    for (uint32_t i = idx; i < (src->mapping_count - 1U); i++)
4664       src->mappings[i] = src->mappings[i + 1U];
4665 
4666    src->mapping_count--;
4667 }
4668 
4669 static struct pvr_rect_mapping *
pvr_create_mapping(struct pvr_transfer_wa_source * src)4670 pvr_create_mapping(struct pvr_transfer_wa_source *src)
4671 {
4672    assert(src->mapping_count < ARRAY_SIZE(src->mappings));
4673 
4674    return &src->mappings[src->mapping_count++];
4675 }
4676 
4677 /**
4678  * If PBE can't write to surfaces with odd stride, the stride of
4679  * destination surface is doubled to make it even. Height of the surface is
4680  * halved. The source surface is not resized. Each half of the modified
4681  * destination surface samples every second row from the source surface. This
4682  * only works with nearest filtering.
4683  */
pvr_double_stride(struct pvr_transfer_pass * pass,uint32_t stride)4684 static bool pvr_double_stride(struct pvr_transfer_pass *pass, uint32_t stride)
4685 {
4686    struct pvr_rect_mapping *mappings = pass->sources[0].mappings;
4687    uint32_t new_mapping = 0;
4688 
4689    if (stride == 1U)
4690       return false;
4691 
4692    if (mappings[0U].dst_rect.extent.height == 1U &&
4693        pass->sources[0].mapping_count == 1U) {
4694       /* Only one mapping required if height is 1. */
4695       if ((mappings[0U].dst_rect.offset.y & 1U) != 0U) {
4696          mappings[0U].dst_rect.offset.x += (int32_t)stride;
4697          mappings[0U].dst_rect.offset.y /= 2U;
4698          mappings[0U].dst_rect.extent.height =
4699             (mappings[0U].dst_rect.extent.height + 1U) / 2U;
4700       } else {
4701          mappings[0U].dst_rect.extent.height =
4702             (mappings[0U].dst_rect.offset.y +
4703              mappings[0U].dst_rect.extent.height + 1U) /
4704                2U -
4705             mappings[0U].dst_rect.offset.y;
4706          mappings[0U].dst_rect.offset.y /= 2U;
4707       }
4708 
4709       return true;
4710    }
4711 
4712    for (uint32_t i = 0; i < pass->sources[0].mapping_count; i++) {
4713       struct pvr_rect_mapping *mapping_a = &mappings[i];
4714       struct pvr_rect_mapping *mapping_b =
4715          &mappings[pass->sources[0].mapping_count + new_mapping];
4716       int32_t mapping_a_src_rect_y1 =
4717          mapping_a->src_rect.offset.y + mapping_a->src_rect.extent.height;
4718       int32_t mapping_b_src_rect_y1 = mapping_a_src_rect_y1;
4719       const bool dst_starts_odd_row = !!(mapping_a->dst_rect.offset.y & 1);
4720       const bool dst_ends_odd_row =
4721          !!((mapping_a->dst_rect.offset.y + mapping_a->dst_rect.extent.height) &
4722             1);
4723       const bool src_starts_odd_row = !!(mapping_a->src_rect.offset.y & 1);
4724       const bool src_ends_odd_row =
4725          !!((mapping_a->src_rect.offset.y + mapping_a->src_rect.extent.height) &
4726             1);
4727 
4728       assert(pass->sources[0].mapping_count + new_mapping <
4729              ARRAY_SIZE(pass->sources[0].mappings));
4730       *mapping_b = *mapping_a;
4731 
4732       mapping_a->src_rect.offset.y = ALIGN_POT(mapping_a->src_rect.offset.y, 2);
4733       if (dst_starts_odd_row && !src_starts_odd_row)
4734          mapping_a->src_rect.offset.y++;
4735       else if (!dst_starts_odd_row && src_starts_odd_row)
4736          mapping_a->src_rect.offset.y--;
4737 
4738       mapping_a_src_rect_y1 = ALIGN_POT(mapping_a_src_rect_y1, 2);
4739       if (dst_ends_odd_row && !src_ends_odd_row)
4740          mapping_a_src_rect_y1++;
4741       else if (!dst_ends_odd_row && src_ends_odd_row)
4742          mapping_a_src_rect_y1--;
4743 
4744       mapping_a->src_rect.extent.height =
4745          mapping_a_src_rect_y1 - mapping_a->src_rect.offset.y;
4746 
4747       mapping_b->src_rect.offset.y = ALIGN_POT(mapping_b->src_rect.offset.y, 2);
4748       if (dst_starts_odd_row && src_starts_odd_row)
4749          mapping_b->src_rect.offset.y--;
4750       else if (!dst_starts_odd_row && !src_starts_odd_row)
4751          mapping_b->src_rect.offset.y++;
4752 
4753       mapping_b_src_rect_y1 = ALIGN_POT(mapping_b_src_rect_y1, 2);
4754       if (dst_ends_odd_row && src_ends_odd_row)
4755          mapping_b_src_rect_y1--;
4756       else if (!dst_ends_odd_row && !src_ends_odd_row)
4757          mapping_b_src_rect_y1++;
4758 
4759       mapping_b->src_rect.extent.height =
4760          mapping_b_src_rect_y1 - mapping_b->src_rect.offset.y;
4761 
4762       /* Destination rectangles. */
4763       mapping_a->dst_rect.offset.y = mapping_a->dst_rect.offset.y / 2;
4764 
4765       if (dst_starts_odd_row)
4766          mapping_a->dst_rect.offset.y++;
4767 
4768       mapping_b->dst_rect.offset.x += stride;
4769       mapping_b->dst_rect.offset.y /= 2;
4770       mapping_b->dst_rect.extent.height /= 2;
4771       mapping_a->dst_rect.extent.height -= mapping_b->dst_rect.extent.height;
4772 
4773       if (!mapping_a->src_rect.extent.width ||
4774           !mapping_a->src_rect.extent.height) {
4775          *mapping_a = *mapping_b;
4776       } else if (mapping_b->src_rect.extent.width &&
4777                  mapping_b->src_rect.extent.height) {
4778          new_mapping++;
4779       }
4780    }
4781 
4782    pass->sources[0].mapping_count++;
4783 
4784    return true;
4785 }
4786 
pvr_split_rect(uint32_t stride,uint32_t height,uint32_t texel_unwind,VkRect2D * rect_a,VkRect2D * rect_b)4787 static void pvr_split_rect(uint32_t stride,
4788                            uint32_t height,
4789                            uint32_t texel_unwind,
4790                            VkRect2D *rect_a,
4791                            VkRect2D *rect_b)
4792 {
4793    rect_a->offset.x = 0;
4794    rect_a->extent.width = stride - texel_unwind;
4795    rect_a->offset.y = 0;
4796    rect_a->extent.height = height;
4797 
4798    rect_b->offset.x = (int32_t)stride - texel_unwind;
4799    rect_b->extent.width = texel_unwind;
4800    rect_b->offset.y = 0;
4801    rect_b->extent.height = height;
4802 }
4803 
pvr_rect_width_covered_by(const VkRect2D * rect_a,const VkRect2D * rect_b)4804 static bool pvr_rect_width_covered_by(const VkRect2D *rect_a,
4805                                       const VkRect2D *rect_b)
4806 {
4807    return (rect_b->offset.x <= rect_a->offset.x &&
4808            (rect_b->offset.x + rect_b->extent.width) >=
4809               (rect_a->offset.x + rect_a->extent.width));
4810 }
4811 
pvr_unwind_rects(uint32_t width,uint32_t height,uint32_t texel_unwind,bool input,struct pvr_transfer_pass * pass)4812 static void pvr_unwind_rects(uint32_t width,
4813                              uint32_t height,
4814                              uint32_t texel_unwind,
4815                              bool input,
4816                              struct pvr_transfer_pass *pass)
4817 {
4818    struct pvr_transfer_wa_source *const source = &pass->sources[0];
4819    struct pvr_rect_mapping *const mappings = source->mappings;
4820    const uint32_t num_mappings = source->mapping_count;
4821    VkRect2D rect_a, rect_b;
4822 
4823    if (texel_unwind == 0)
4824       return;
4825 
4826    pvr_split_rect(width, height, texel_unwind, &rect_a, &rect_b);
4827 
4828    for (uint32_t i = 0; i < num_mappings; i++) {
4829       VkRect2D *const old_rect = input ? &mappings[i].src_rect
4830                                        : &mappings[i].dst_rect;
4831 
4832       if (height == 1) {
4833          old_rect->offset.x += texel_unwind;
4834       } else if (width == 1) {
4835          old_rect->offset.y += texel_unwind;
4836       } else if (pvr_rect_width_covered_by(old_rect, &rect_a)) {
4837          old_rect->offset.x += texel_unwind;
4838       } else if (pvr_rect_width_covered_by(old_rect, &rect_b)) {
4839          old_rect->offset.x = texel_unwind - width + old_rect->offset.x;
4840          old_rect->offset.y++;
4841       } else {
4842          /* Mapping requires split. */
4843          const uint32_t new_mapping = source->mapping_count++;
4844 
4845          VkRect2D *const new_rect = input ? &mappings[new_mapping].src_rect
4846                                           : &mappings[new_mapping].dst_rect;
4847 
4848          VkRect2D *const new_rect_opp = input ? &mappings[new_mapping].dst_rect
4849                                               : &mappings[new_mapping].src_rect;
4850          VkRect2D *const old_rect_opp = input ? &mappings[i].dst_rect
4851                                               : &mappings[i].src_rect;
4852 
4853          const uint32_t split_point = width - texel_unwind;
4854          const uint32_t split_width =
4855             old_rect->offset.x + old_rect->extent.width - split_point;
4856 
4857          assert(new_mapping < ARRAY_SIZE(source->mappings));
4858          mappings[new_mapping] = mappings[i];
4859 
4860          old_rect_opp->extent.width -= split_width;
4861          new_rect_opp->extent.width = split_width;
4862          new_rect_opp->offset.x =
4863             old_rect_opp->offset.x + old_rect_opp->extent.width;
4864 
4865          old_rect->offset.x += texel_unwind;
4866          old_rect->extent.width = width - old_rect->offset.x;
4867 
4868          new_rect->offset.x = 0;
4869          new_rect->offset.y++;
4870          new_rect->extent.width = split_width;
4871       }
4872    }
4873 }
4874 
4875 /**
4876  * Assign clip rects to rectangle mappings. TDM can only do two PBE clip
4877  * rects per screen.
4878  */
4879 static void
pvr_map_clip_rects(struct pvr_transfer_custom_mapping * custom_mapping)4880 pvr_map_clip_rects(struct pvr_transfer_custom_mapping *custom_mapping)
4881 {
4882    for (uint32_t i = 0U; i < custom_mapping->pass_count; i++) {
4883       struct pvr_transfer_pass *pass = &custom_mapping->passes[i];
4884 
4885       pass->clip_rects_count = 0U;
4886 
4887       for (uint32_t s = 0U; s < pass->source_count; s++) {
4888          struct pvr_transfer_wa_source *src = &pass->sources[s];
4889 
4890          for (uint32_t j = 0U; j < src->mapping_count; j++) {
4891             struct pvr_rect_mapping *mappings = src->mappings;
4892             VkRect2D *clip_rects = pass->clip_rects;
4893             bool merged = false;
4894 
4895             /* Try merge adjacent clip rects. */
4896             for (uint32_t k = 0U; k < pass->clip_rects_count; k++) {
4897                if (clip_rects[k].offset.y == mappings[j].dst_rect.offset.y &&
4898                    clip_rects[k].extent.height ==
4899                       mappings[j].dst_rect.extent.height &&
4900                    clip_rects[k].offset.x + clip_rects[k].extent.width ==
4901                       mappings[j].dst_rect.offset.x) {
4902                   clip_rects[k].extent.width +=
4903                      mappings[j].dst_rect.extent.width;
4904                   merged = true;
4905                   break;
4906                }
4907 
4908                if (clip_rects[k].offset.y == mappings[j].dst_rect.offset.y &&
4909                    clip_rects[k].extent.height ==
4910                       mappings[j].dst_rect.extent.height &&
4911                    clip_rects[k].offset.x ==
4912                       mappings[j].dst_rect.offset.x +
4913                          mappings[j].dst_rect.extent.width) {
4914                   clip_rects[k].offset.x = mappings[j].dst_rect.offset.x;
4915                   clip_rects[k].extent.width +=
4916                      mappings[j].dst_rect.extent.width;
4917                   merged = true;
4918                   break;
4919                }
4920 
4921                if (clip_rects[k].offset.x == mappings[j].dst_rect.offset.x &&
4922                    clip_rects[k].extent.width ==
4923                       mappings[j].dst_rect.extent.width &&
4924                    clip_rects[k].offset.y + clip_rects[k].extent.height ==
4925                       mappings[j].dst_rect.offset.y) {
4926                   clip_rects[k].extent.height +=
4927                      mappings[j].dst_rect.extent.height;
4928                   merged = true;
4929                   break;
4930                }
4931 
4932                if (clip_rects[k].offset.x == mappings[j].dst_rect.offset.x &&
4933                    clip_rects[k].extent.width ==
4934                       mappings[j].dst_rect.extent.width &&
4935                    clip_rects[k].offset.y ==
4936                       mappings[j].dst_rect.offset.y +
4937                          mappings[j].dst_rect.extent.height) {
4938                   clip_rects[k].extent.height +=
4939                      mappings[j].dst_rect.extent.height;
4940                   clip_rects[k].offset.y = mappings[j].dst_rect.offset.y;
4941                   merged = true;
4942                   break;
4943                }
4944             }
4945 
4946             if (merged)
4947                continue;
4948 
4949             /* Create new pass if needed, TDM can only have 2 clip rects. */
4950             if (pass->clip_rects_count >= custom_mapping->max_clip_rects) {
4951                struct pvr_transfer_pass *new_pass =
4952                   pvr_create_pass(custom_mapping, pass->dst_offset);
4953                struct pvr_transfer_wa_source *new_source =
4954                   pvr_create_source(new_pass,
4955                                     src->src_offset,
4956                                     src->extend_height);
4957                struct pvr_rect_mapping *new_mapping =
4958                   pvr_create_mapping(new_source);
4959 
4960                new_pass->clip_rects_count = 1U;
4961                *new_mapping = src->mappings[j];
4962 
4963                pvr_remove_mapping(src, j);
4964 
4965                if (src->mapping_count == 0) {
4966                   pvr_remove_source(pass, s);
4967                   s--;
4968                } else {
4969                   /* Redo - mapping was replaced. */
4970                   j--;
4971                }
4972             } else {
4973                pass->clip_rects[pass->clip_rects_count] =
4974                   src->mappings[j].dst_rect;
4975 
4976                pass->clip_rects_count++;
4977 
4978                assert(pass->clip_rects_count <= ARRAY_SIZE(pass->clip_rects));
4979             }
4980          }
4981       }
4982    }
4983 }
4984 
pvr_extend_height(const VkRect2D * rect,const uint32_t height,const uint32_t unwind_src)4985 static bool pvr_extend_height(const VkRect2D *rect,
4986                               const uint32_t height,
4987                               const uint32_t unwind_src)
4988 {
4989    if (rect->offset.x >= (int32_t)unwind_src)
4990       return false;
4991 
4992    return (rect->offset.y > (int32_t)height) ||
4993           ((rect->offset.y + rect->extent.height) > (int32_t)height);
4994 }
4995 
4996 static void
pvr_generate_custom_mapping(uint32_t src_stride,uint32_t src_width,uint32_t src_height,uint32_t dst_stride,uint32_t dst_width,uint32_t dst_height,enum pvr_memlayout dst_mem_layout,struct pvr_transfer_custom_mapping * custom_mapping)4997 pvr_generate_custom_mapping(uint32_t src_stride,
4998                             uint32_t src_width,
4999                             uint32_t src_height,
5000                             uint32_t dst_stride,
5001                             uint32_t dst_width,
5002                             uint32_t dst_height,
5003                             enum pvr_memlayout dst_mem_layout,
5004                             struct pvr_transfer_custom_mapping *custom_mapping)
5005 {
5006    src_stride *= custom_mapping->texel_extend_src;
5007    src_width *= custom_mapping->texel_extend_src;
5008    dst_stride *= custom_mapping->texel_extend_dst;
5009    dst_width *= custom_mapping->texel_extend_dst;
5010 
5011    if (custom_mapping->texel_unwind_src > 0U) {
5012       pvr_unwind_rects(src_stride,
5013                        src_height,
5014                        custom_mapping->texel_unwind_src,
5015                        true,
5016                        &custom_mapping->passes[0U]);
5017    }
5018 
5019    if (custom_mapping->double_stride) {
5020       custom_mapping->double_stride =
5021          pvr_double_stride(&custom_mapping->passes[0U], dst_stride);
5022 
5023       dst_stride *= 2U;
5024    }
5025 
5026    pvr_unwind_rects(dst_stride,
5027                     dst_height,
5028                     custom_mapping->texel_unwind_dst,
5029                     false,
5030                     &custom_mapping->passes[0U]);
5031 
5032    pvr_map_clip_rects(custom_mapping);
5033 
5034    /* If the last row of the source mapping is sampled, height of the surface
5035     * can only be increased if the new area contains a valid region. Some blits
5036     * are split to two sources.
5037     */
5038    if (custom_mapping->texel_unwind_src > 0U) {
5039       for (uint32_t i = 0; i < custom_mapping->pass_count; i++) {
5040          struct pvr_transfer_pass *pass = &custom_mapping->passes[i];
5041 
5042          for (uint32_t j = 0; j < pass->source_count; j++) {
5043             struct pvr_transfer_wa_source *src = &pass->sources[j];
5044 
5045             for (uint32_t k = 0; k < src->mapping_count; k++) {
5046                VkRect2D *src_rect = &src->mappings[k].src_rect;
5047                bool extend_height =
5048                   pvr_extend_height(src_rect,
5049                                     src_height,
5050                                     custom_mapping->texel_unwind_src);
5051 
5052                if (src->mapping_count == 1) {
5053                   src->extend_height = extend_height;
5054                } else if (!src->extend_height && extend_height) {
5055                   struct pvr_transfer_wa_source *new_src =
5056                      pvr_acquire_source(pass, src->src_offset, extend_height);
5057 
5058                   new_src->mappings[new_src->mapping_count] = src->mappings[k];
5059                   new_src->src_offset = src->src_offset;
5060 
5061                   for (uint32_t l = k + 1; l < src->mapping_count; l++)
5062                      src->mappings[l - 1] = src->mappings[l];
5063 
5064                   new_src->mapping_count++;
5065                   src->mapping_count--;
5066                   k--;
5067                }
5068             }
5069          }
5070       }
5071    }
5072 }
5073 
5074 static bool
pvr_get_custom_mapping(const struct pvr_device_info * dev_info,const struct pvr_transfer_cmd * transfer_cmd,uint32_t max_clip_rects,struct pvr_transfer_custom_mapping * custom_mapping)5075 pvr_get_custom_mapping(const struct pvr_device_info *dev_info,
5076                        const struct pvr_transfer_cmd *transfer_cmd,
5077                        uint32_t max_clip_rects,
5078                        struct pvr_transfer_custom_mapping *custom_mapping)
5079 {
5080    const uint32_t dst_bpp =
5081       vk_format_get_blocksizebits(transfer_cmd->dst.vk_format);
5082    const struct pvr_transfer_cmd_source *src = NULL;
5083    struct pvr_transfer_pass *pass;
5084    bool ret;
5085 
5086    custom_mapping->max_clip_rects = max_clip_rects;
5087    custom_mapping->texel_unwind_src = 0U;
5088    custom_mapping->texel_unwind_dst = 0U;
5089    custom_mapping->texel_extend_src = 1U;
5090    custom_mapping->texel_extend_dst = 1U;
5091    custom_mapping->pass_count = 0U;
5092 
5093    if (transfer_cmd->source_count > 1)
5094       return false;
5095 
5096    custom_mapping->max_clip_size = PVR_MAX_CLIP_SIZE(dev_info);
5097 
5098    ret = pvr_texel_unwind(dst_bpp,
5099                           transfer_cmd->dst.dev_addr,
5100                           false,
5101                           1U,
5102                           &custom_mapping->texel_unwind_dst);
5103    if (!ret) {
5104       custom_mapping->texel_extend_dst = dst_bpp / 8U;
5105       if (transfer_cmd->source_count > 0) {
5106          if (transfer_cmd->sources[0].surface.mem_layout ==
5107              PVR_MEMLAYOUT_LINEAR) {
5108             custom_mapping->texel_extend_src = custom_mapping->texel_extend_dst;
5109          } else if (transfer_cmd->sources[0].surface.mem_layout ==
5110                        PVR_MEMLAYOUT_TWIDDLED &&
5111                     transfer_cmd->sources[0].surface.height == 1U) {
5112             custom_mapping->texel_extend_src = custom_mapping->texel_extend_dst;
5113          }
5114       }
5115 
5116       ret = pvr_texel_unwind(dst_bpp,
5117                              transfer_cmd->dst.dev_addr,
5118                              false,
5119                              custom_mapping->texel_extend_dst,
5120                              &custom_mapping->texel_unwind_dst);
5121       if (!ret)
5122          return false;
5123    }
5124 
5125    if (transfer_cmd->source_count > 0) {
5126       src = &transfer_cmd->sources[0];
5127       const uint32_t src_bpp =
5128          vk_format_get_blocksizebits(src->surface.vk_format);
5129 
5130       ret = pvr_is_surface_aligned(src->surface.dev_addr, true, src_bpp);
5131 
5132       if (!ret && (src->surface.mem_layout == PVR_MEMLAYOUT_LINEAR ||
5133                    src->surface.height == 1U)) {
5134          ret = pvr_texel_unwind(src_bpp,
5135                                 src->surface.dev_addr,
5136                                 true,
5137                                 custom_mapping->texel_extend_src,
5138                                 &custom_mapping->texel_unwind_src);
5139       }
5140 
5141       if (!ret) {
5142          custom_mapping->texel_extend_src = dst_bpp / 8U;
5143          custom_mapping->texel_extend_dst = custom_mapping->texel_extend_src;
5144 
5145          ret = pvr_texel_unwind(src_bpp,
5146                                 src->surface.dev_addr,
5147                                 true,
5148                                 custom_mapping->texel_extend_src,
5149                                 &custom_mapping->texel_unwind_src);
5150       }
5151 
5152       if (!ret)
5153          return false;
5154    }
5155 
5156    VkRect2D rect = transfer_cmd->scissor;
5157    assert(
5158       (rect.offset.x + rect.extent.width) <= custom_mapping->max_clip_size &&
5159       (rect.offset.y + rect.extent.height) <= custom_mapping->max_clip_size);
5160 
5161    /* Texel extend only works with strided memory layout, because pixel width is
5162     * changed. Texel unwind only works with strided memory layout. 1D blits are
5163     * allowed.
5164     */
5165    if (src && src->surface.height > 1U &&
5166        (custom_mapping->texel_extend_src > 1U ||
5167         custom_mapping->texel_unwind_src > 0U) &&
5168        src->surface.mem_layout != PVR_MEMLAYOUT_LINEAR) {
5169       return false;
5170    }
5171 
5172    /* Texel extend only works with strided memory layout, because pixel width is
5173     * changed. Texel unwind only works with strided memory layout. 1D blits are
5174     * allowed.
5175     */
5176    if ((custom_mapping->texel_extend_dst > 1U ||
5177         custom_mapping->texel_unwind_dst > 0U) &&
5178        transfer_cmd->dst.mem_layout != PVR_MEMLAYOUT_LINEAR &&
5179        transfer_cmd->dst.height > 1U) {
5180       return false;
5181    }
5182 
5183    if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_LINEAR) {
5184       custom_mapping->double_stride = !pvr_is_pbe_stride_aligned(
5185          transfer_cmd->dst.stride * custom_mapping->texel_extend_dst);
5186    }
5187 
5188    if (custom_mapping->texel_unwind_src > 0U ||
5189        custom_mapping->texel_unwind_dst > 0U || custom_mapping->double_stride) {
5190       struct pvr_transfer_wa_source *wa_src;
5191       struct pvr_rect_mapping *mapping;
5192 
5193       pass = pvr_acquire_pass(custom_mapping, 0U);
5194       wa_src = pvr_create_source(pass, 0U, false);
5195       mapping = pvr_create_mapping(wa_src);
5196 
5197       if (transfer_cmd->source_count > 0) {
5198          *mapping = src->mappings[0U];
5199       } else {
5200          mapping->src_rect = transfer_cmd->scissor;
5201          mapping->dst_rect = transfer_cmd->scissor;
5202       }
5203    } else {
5204       return false;
5205    }
5206 
5207    if (custom_mapping->texel_extend_src > 1U ||
5208        custom_mapping->texel_extend_dst > 1U) {
5209       pass->sources[0].mappings[0U].src_rect.offset.x *=
5210          (int32_t)custom_mapping->texel_extend_dst;
5211       pass->sources[0].mappings[0U].src_rect.extent.width *=
5212          (int32_t)custom_mapping->texel_extend_dst;
5213       pass->sources[0].mappings[0U].dst_rect.offset.x *=
5214          (int32_t)custom_mapping->texel_extend_dst;
5215       pass->sources[0].mappings[0U].dst_rect.extent.width *=
5216          (int32_t)custom_mapping->texel_extend_dst;
5217    }
5218 
5219    if (transfer_cmd->source_count > 0) {
5220       pvr_generate_custom_mapping(transfer_cmd->sources[0].surface.stride,
5221                                   transfer_cmd->sources[0].surface.width,
5222                                   transfer_cmd->sources[0].surface.height,
5223                                   transfer_cmd->dst.stride,
5224                                   transfer_cmd->dst.width,
5225                                   transfer_cmd->dst.height,
5226                                   transfer_cmd->dst.mem_layout,
5227                                   custom_mapping);
5228    } else {
5229       pvr_generate_custom_mapping(0U,
5230                                   0U,
5231                                   0U,
5232                                   transfer_cmd->dst.stride,
5233                                   transfer_cmd->dst.width,
5234                                   transfer_cmd->dst.height,
5235                                   transfer_cmd->dst.mem_layout,
5236                                   custom_mapping);
5237    }
5238 
5239    return true;
5240 }
5241 
pvr_pbe_extend_rect(uint32_t texel_extend,VkRect2D * rect)5242 static void pvr_pbe_extend_rect(uint32_t texel_extend, VkRect2D *rect)
5243 {
5244    rect->offset.x *= texel_extend;
5245    rect->extent.width *= texel_extend;
5246 }
5247 
pvr_pbe_rect_intersect(VkRect2D * rect_a,VkRect2D * rect_b)5248 static void pvr_pbe_rect_intersect(VkRect2D *rect_a, VkRect2D *rect_b)
5249 {
5250    rect_a->extent.width = MIN2(rect_a->offset.x + rect_a->extent.width,
5251                                rect_b->offset.x + rect_b->extent.width) -
5252                           MAX2(rect_a->offset.x, rect_b->offset.x);
5253    rect_a->offset.x = MAX2(rect_a->offset.x, rect_b->offset.x);
5254    rect_a->extent.height = MIN2(rect_a->offset.y + rect_a->extent.height,
5255                                 rect_b->offset.y + rect_b->extent.height) -
5256                            MAX2(rect_a->offset.y, rect_b->offset.y);
5257    rect_a->offset.y = MAX2(rect_a->offset.y, rect_b->offset.y);
5258 }
5259 
pvr_texel_extend_src_format(VkFormat vk_format)5260 static VkFormat pvr_texel_extend_src_format(VkFormat vk_format)
5261 {
5262    uint32_t bpp = vk_format_get_blocksizebits(vk_format);
5263    VkFormat ext_format;
5264 
5265    switch (bpp) {
5266    case 16:
5267       ext_format = VK_FORMAT_R8G8_UINT;
5268       break;
5269    case 32:
5270       ext_format = VK_FORMAT_R8G8B8A8_UINT;
5271       break;
5272    case 48:
5273       ext_format = VK_FORMAT_R16G16B16_UINT;
5274       break;
5275    default:
5276       ext_format = VK_FORMAT_R8_UINT;
5277       break;
5278    }
5279 
5280    return ext_format;
5281 }
5282 
5283 static void
pvr_modify_command(struct pvr_transfer_custom_mapping * custom_mapping,uint32_t pass_idx,struct pvr_transfer_cmd * transfer_cmd)5284 pvr_modify_command(struct pvr_transfer_custom_mapping *custom_mapping,
5285                    uint32_t pass_idx,
5286                    struct pvr_transfer_cmd *transfer_cmd)
5287 {
5288    struct pvr_transfer_pass *pass = &custom_mapping->passes[pass_idx];
5289    uint32_t bpp;
5290 
5291    if (custom_mapping->texel_extend_src > 1U) {
5292       struct pvr_rect_mapping *mapping = &transfer_cmd->sources[0].mappings[0];
5293 
5294       pvr_pbe_extend_rect(custom_mapping->texel_extend_src, &mapping->dst_rect);
5295       pvr_pbe_extend_rect(custom_mapping->texel_extend_src, &mapping->src_rect);
5296 
5297       transfer_cmd->dst.vk_format = VK_FORMAT_R8_UINT;
5298       transfer_cmd->dst.width *= custom_mapping->texel_extend_src;
5299       transfer_cmd->dst.stride *= custom_mapping->texel_extend_src;
5300       transfer_cmd->sources[0].surface.vk_format = VK_FORMAT_R8_UINT;
5301       transfer_cmd->sources[0].surface.width *=
5302          custom_mapping->texel_extend_src;
5303       transfer_cmd->sources[0].surface.stride *=
5304          custom_mapping->texel_extend_src;
5305    } else if (custom_mapping->texel_extend_dst > 1U) {
5306       VkRect2D max_clip = {
5307          .offset = { 0, 0 },
5308          .extent = { custom_mapping->max_clip_size,
5309                      custom_mapping->max_clip_size },
5310       };
5311 
5312       pvr_pbe_extend_rect(custom_mapping->texel_extend_dst,
5313                           &transfer_cmd->scissor);
5314 
5315       pvr_pbe_rect_intersect(&transfer_cmd->scissor, &max_clip);
5316 
5317       if (transfer_cmd->source_count > 0) {
5318          transfer_cmd->sources[0].surface.width *=
5319             custom_mapping->texel_extend_dst;
5320          transfer_cmd->sources[0].surface.stride *=
5321             custom_mapping->texel_extend_dst;
5322 
5323          transfer_cmd->sources[0].surface.vk_format =
5324             pvr_texel_extend_src_format(
5325                transfer_cmd->sources[0].surface.vk_format);
5326       }
5327 
5328       transfer_cmd->dst.vk_format = VK_FORMAT_R8_UINT;
5329       transfer_cmd->dst.width *= custom_mapping->texel_extend_dst;
5330       transfer_cmd->dst.stride *= custom_mapping->texel_extend_dst;
5331    }
5332 
5333    if (custom_mapping->double_stride) {
5334       transfer_cmd->dst.width *= 2U;
5335       transfer_cmd->dst.stride *= 2U;
5336    }
5337 
5338    if (custom_mapping->texel_unwind_src > 0U) {
5339       if (transfer_cmd->sources[0].surface.height == 1U) {
5340          transfer_cmd->sources[0].surface.width +=
5341             custom_mapping->texel_unwind_src;
5342          transfer_cmd->sources[0].surface.stride +=
5343             custom_mapping->texel_unwind_src;
5344       } else if (transfer_cmd->sources[0].surface.stride == 1U) {
5345          transfer_cmd->sources[0].surface.height +=
5346             custom_mapping->texel_unwind_src;
5347       } else {
5348          /* Increase source width by texel unwind. If texel unwind is less than
5349           * the distance between width and stride. The blit can be done with one
5350           * rectangle mapping, but the width of the surface needs be to
5351           * increased in case we sample from the area between width and stride.
5352           */
5353          transfer_cmd->sources[0].surface.width =
5354             MIN2(transfer_cmd->sources[0].surface.width +
5355                     custom_mapping->texel_unwind_src,
5356                  transfer_cmd->sources[0].surface.stride);
5357       }
5358    }
5359 
5360    for (uint32_t i = 0U; i < pass->source_count; i++) {
5361       struct pvr_transfer_wa_source *src = &pass->sources[i];
5362 
5363       if (i > 0)
5364          transfer_cmd->sources[i] = transfer_cmd->sources[0];
5365 
5366       transfer_cmd->sources[i].mapping_count = src->mapping_count;
5367       for (uint32_t j = 0U; j < transfer_cmd->sources[i].mapping_count; j++)
5368          transfer_cmd->sources[i].mappings[j] = src->mappings[j];
5369 
5370       if (src->extend_height)
5371          transfer_cmd->sources[i].surface.height += 1U;
5372 
5373       transfer_cmd->sources[i].surface.width =
5374          MIN2(PVR_MAX_WIDTH, transfer_cmd->sources[i].surface.width);
5375       transfer_cmd->sources[i].surface.height =
5376          MIN2(PVR_MAX_WIDTH, transfer_cmd->sources[i].surface.height);
5377       transfer_cmd->sources[i].surface.stride =
5378          MIN2(PVR_MAX_WIDTH, transfer_cmd->sources[i].surface.stride);
5379    }
5380 
5381    if (transfer_cmd->dst.height == 1U) {
5382       transfer_cmd->dst.width =
5383          transfer_cmd->dst.stride + custom_mapping->texel_unwind_dst;
5384       transfer_cmd->dst.mem_layout = PVR_MEMLAYOUT_TWIDDLED;
5385    }
5386 
5387    if (transfer_cmd->dst.mem_layout == PVR_MEMLAYOUT_TWIDDLED) {
5388       transfer_cmd->dst.width =
5389          MIN2((uint32_t)custom_mapping->max_clip_size, transfer_cmd->dst.width);
5390       transfer_cmd->dst.height = MIN2((uint32_t)custom_mapping->max_clip_size,
5391                                       transfer_cmd->dst.height);
5392    } else {
5393       transfer_cmd->dst.width = MIN2(PVR_MAX_WIDTH, transfer_cmd->dst.width);
5394    }
5395 
5396    if (transfer_cmd->source_count > 0) {
5397       for (uint32_t i = 0; i < pass->source_count; i++) {
5398          struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[i];
5399 
5400          bpp = vk_format_get_blocksizebits(src->surface.vk_format);
5401 
5402          src->surface.dev_addr.addr -=
5403             custom_mapping->texel_unwind_src * bpp / 8U;
5404          src->surface.dev_addr.addr += MAX2(src->surface.sample_count, 1U) *
5405                                        pass->sources[i].src_offset * bpp / 8U;
5406       }
5407    }
5408 
5409    bpp = vk_format_get_blocksizebits(transfer_cmd->dst.vk_format);
5410    transfer_cmd->dst.dev_addr.addr -=
5411       custom_mapping->texel_unwind_dst * bpp / 8U;
5412    transfer_cmd->dst.dev_addr.addr +=
5413       MAX2(transfer_cmd->dst.sample_count, 1U) * pass->dst_offset * bpp / 8U;
5414 
5415    if (transfer_cmd->source_count > 0)
5416       transfer_cmd->source_count = pass->source_count;
5417 }
5418 
5419 /* Route a copy_blit (FastScale HW) to a clip_blit (Fast2D HW).
5420  * Destination rectangle can be specified in dst_rect, or NULL to use existing.
5421  */
pvr_reroute_to_clip(struct pvr_transfer_ctx * ctx,const struct pvr_transfer_cmd * transfer_cmd,const struct VkRect2D * dst_rect,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)5422 static VkResult pvr_reroute_to_clip(struct pvr_transfer_ctx *ctx,
5423                                     const struct pvr_transfer_cmd *transfer_cmd,
5424                                     const struct VkRect2D *dst_rect,
5425                                     struct pvr_transfer_prep_data *prep_data,
5426                                     uint32_t pass_idx,
5427                                     bool *finished_out)
5428 {
5429    struct pvr_transfer_cmd clip_transfer_cmd;
5430 
5431    clip_transfer_cmd = *transfer_cmd;
5432    clip_transfer_cmd.flags |= PVR_TRANSFER_CMD_FLAGS_FAST2D;
5433 
5434    if (transfer_cmd->source_count <= 1U) {
5435       if (dst_rect)
5436          clip_transfer_cmd.scissor = *dst_rect;
5437 
5438       return pvr_3d_clip_blit(ctx,
5439                               &clip_transfer_cmd,
5440                               prep_data,
5441                               pass_idx,
5442                               finished_out);
5443    }
5444 
5445    return vk_error(ctx->device, VK_ERROR_FORMAT_NOT_SUPPORTED);
5446 }
5447 
pvr_3d_copy_blit(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct pvr_transfer_prep_data * prep_data,uint32_t pass_idx,bool * finished_out)5448 static VkResult pvr_3d_copy_blit(struct pvr_transfer_ctx *ctx,
5449                                  struct pvr_transfer_cmd *transfer_cmd,
5450                                  struct pvr_transfer_prep_data *prep_data,
5451                                  uint32_t pass_idx,
5452                                  bool *finished_out)
5453 {
5454    const struct pvr_device_info *const dev_info =
5455       &ctx->device->pdevice->dev_info;
5456 
5457    struct pvr_transfer_3d_state *state = &prep_data->state;
5458    struct pvr_transfer_cmd *active_cmd = transfer_cmd;
5459    struct pvr_transfer_cmd int_cmd;
5460    VkResult result;
5461 
5462    state->dont_force_pbe = false;
5463    state->pass_idx = pass_idx;
5464 
5465    pvr_transfer_set_filter(transfer_cmd, state);
5466 
5467    if (transfer_cmd->source_count == 1U) {
5468       struct pvr_transfer_cmd_source *src = &transfer_cmd->sources[0];
5469 
5470       /* Try to work out a condition to map pixel formats to RAW. That is only
5471        * possible if we don't perform any kind of 2D operation on the blit as we
5472        * don't know the actual pixel values - i.e. it has to be point sampled -
5473        * scaling doesn't matter as long as point sampled.
5474        */
5475       if (src->surface.vk_format == transfer_cmd->dst.vk_format &&
5476           state->filter[0] == PVR_FILTER_POINT &&
5477           src->surface.sample_count <= transfer_cmd->dst.sample_count &&
5478           (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) == 0U) {
5479          uint32_t bpp;
5480 
5481          int_cmd = *transfer_cmd;
5482          active_cmd = &int_cmd;
5483          bpp = vk_format_get_blocksizebits(int_cmd.dst.vk_format);
5484 
5485          if (bpp > 0U) {
5486             switch (bpp) {
5487             case 8U:
5488                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R8_UINT;
5489                break;
5490             case 16U:
5491                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R8G8_UINT;
5492                break;
5493             case 24U:
5494                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R8G8B8_UINT;
5495                break;
5496             case 32U:
5497                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R32_UINT;
5498                break;
5499             case 48U:
5500                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R16G16B16_UINT;
5501                break;
5502             case 64U:
5503                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R32G32_UINT;
5504                break;
5505             case 96U:
5506                int_cmd.sources[0].surface.vk_format = VK_FORMAT_R32G32B32_UINT;
5507                break;
5508             case 128U:
5509                int_cmd.sources[0].surface.vk_format =
5510                   VK_FORMAT_R32G32B32A32_UINT;
5511                break;
5512             default:
5513                active_cmd = transfer_cmd;
5514                break;
5515             }
5516          }
5517 
5518          int_cmd.dst.vk_format = int_cmd.sources[0].surface.vk_format;
5519       }
5520    }
5521 
5522    if (pass_idx == 0U) {
5523       pvr_get_custom_mapping(dev_info, active_cmd, 3U, &state->custom_mapping);
5524 
5525       if (state->custom_mapping.texel_extend_src > 1U)
5526          state->custom_mapping.texel_extend_dst = 1U;
5527    }
5528 
5529    if (state->custom_mapping.pass_count > 0U) {
5530       struct pvr_transfer_pass *pass = &state->custom_mapping.passes[pass_idx];
5531 
5532       if (active_cmd != &int_cmd) {
5533          int_cmd = *active_cmd;
5534          active_cmd = &int_cmd;
5535       }
5536 
5537       state->custom_filter = true;
5538 
5539       pvr_modify_command(&state->custom_mapping, pass_idx, active_cmd);
5540 
5541       if (state->custom_mapping.double_stride ||
5542           pass->sources[0].mapping_count > 1U || pass->source_count > 1U) {
5543          result =
5544             pvr_3d_clip_blit(ctx, active_cmd, prep_data, pass_idx, finished_out);
5545       } else {
5546          struct pvr_rect_mapping *mappings = &pass->sources[0].mappings[0U];
5547 
5548          mappings[0U].src_rect.offset.x /=
5549             MAX2(1U, state->custom_mapping.texel_extend_dst);
5550          mappings[0U].src_rect.extent.width /=
5551             MAX2(1U, state->custom_mapping.texel_extend_dst);
5552 
5553          if (int_cmd.source_count > 0) {
5554             for (uint32_t i = 0U; i < pass->sources[0].mapping_count; i++)
5555                active_cmd->sources[0].mappings[i] = mappings[i];
5556          }
5557 
5558          active_cmd->scissor = mappings[0U].dst_rect;
5559 
5560          result = pvr_3d_copy_blit_core(ctx,
5561                                         active_cmd,
5562                                         prep_data,
5563                                         pass_idx,
5564                                         finished_out);
5565       }
5566 
5567       return result;
5568    }
5569 
5570    /* Route DS merge blits to Clip blit. Background object is used to preserve
5571     * the unmerged channel.
5572     */
5573    if ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_DSMERGE) != 0U) {
5574       /* PBE byte mask could be used for DS merge with FastScale. Clearing the
5575        * other channel on a DS merge requires Clip blit.
5576        */
5577       if (!PVR_HAS_ERN(dev_info, 42064) ||
5578           ((transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FILL) != 0U)) {
5579          return pvr_reroute_to_clip(ctx,
5580                                     active_cmd,
5581                                     &active_cmd->scissor,
5582                                     prep_data,
5583                                     pass_idx,
5584                                     finished_out);
5585       }
5586    }
5587 
5588    return pvr_3d_copy_blit_core(ctx,
5589                                 active_cmd,
5590                                 prep_data,
5591                                 pass_idx,
5592                                 finished_out);
5593 }
5594 
5595 /* TODO: This should be generated in csbgen. */
5596 #define TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_MASK \
5597    BITFIELD64_RANGE(2, (53 - 16) + 1)
5598 
pvr_validate_source_addr(pvr_dev_addr_t addr)5599 static bool pvr_validate_source_addr(pvr_dev_addr_t addr)
5600 {
5601    if (!pvr_dev_addr_is_aligned(
5602           addr,
5603           PVRX(TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_ALIGNMENT))) {
5604       return false;
5605    }
5606 
5607    if (addr.addr & ~TEXSTATE_STRIDE_IMAGE_WORD1_TEXADDR_MASK)
5608       return false;
5609 
5610    return true;
5611 }
5612 
pvr_supports_texel_unwind(struct pvr_transfer_cmd * transfer_cmd)5613 static bool pvr_supports_texel_unwind(struct pvr_transfer_cmd *transfer_cmd)
5614 {
5615    struct pvr_transfer_cmd_surface *dst = &transfer_cmd->dst;
5616 
5617    if (transfer_cmd->source_count > 1)
5618       return false;
5619 
5620    if (transfer_cmd->source_count) {
5621       struct pvr_transfer_cmd_surface *src = &transfer_cmd->sources[0].surface;
5622 
5623       if (src->height == 1) {
5624          if (src->mem_layout != PVR_MEMLAYOUT_LINEAR &&
5625              src->mem_layout != PVR_MEMLAYOUT_TWIDDLED &&
5626              src->mem_layout != PVR_MEMLAYOUT_3DTWIDDLED) {
5627             return false;
5628          }
5629       } else if (src->mem_layout == PVR_MEMLAYOUT_TWIDDLED ||
5630                  src->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
5631          if (!pvr_validate_source_addr(src->dev_addr))
5632             return false;
5633       } else {
5634          if (src->mem_layout != PVR_MEMLAYOUT_LINEAR)
5635             return false;
5636       }
5637    }
5638 
5639    if (dst->mem_layout != PVR_MEMLAYOUT_LINEAR &&
5640        dst->mem_layout != PVR_MEMLAYOUT_TWIDDLED) {
5641       return false;
5642    }
5643 
5644    return true;
5645 }
5646 
pvr_3d_validate_addr(struct pvr_transfer_cmd * transfer_cmd)5647 static bool pvr_3d_validate_addr(struct pvr_transfer_cmd *transfer_cmd)
5648 {
5649    if (!pvr_supports_texel_unwind(transfer_cmd)) {
5650       return pvr_dev_addr_is_aligned(
5651          transfer_cmd->dst.dev_addr,
5652          PVRX(PBESTATE_STATE_WORD0_ADDRESS_LOW_ALIGNMENT));
5653    }
5654 
5655    return true;
5656 }
5657 
5658 static void
pvr_submit_info_stream_init(struct pvr_transfer_ctx * ctx,struct pvr_transfer_prep_data * prep_data,struct pvr_winsys_transfer_cmd * cmd)5659 pvr_submit_info_stream_init(struct pvr_transfer_ctx *ctx,
5660                             struct pvr_transfer_prep_data *prep_data,
5661                             struct pvr_winsys_transfer_cmd *cmd)
5662 {
5663    const struct pvr_winsys_transfer_regs *const regs = &prep_data->state.regs;
5664    const struct pvr_physical_device *const pdevice = ctx->device->pdevice;
5665    const struct pvr_device_info *const dev_info = &pdevice->dev_info;
5666 
5667    uint32_t *stream_ptr = (uint32_t *)cmd->fw_stream;
5668    uint32_t *stream_len_ptr = stream_ptr;
5669 
5670    /* Leave space for stream header. */
5671    stream_ptr += pvr_cmd_length(KMD_STREAM_HDR);
5672 
5673    *(uint64_t *)stream_ptr = regs->pds_bgnd0_base;
5674    stream_ptr += pvr_cmd_length(CR_PDS_BGRND0_BASE);
5675 
5676    *(uint64_t *)stream_ptr = regs->pds_bgnd1_base;
5677    stream_ptr += pvr_cmd_length(CR_PDS_BGRND1_BASE);
5678 
5679    *(uint64_t *)stream_ptr = regs->pds_bgnd3_sizeinfo;
5680    stream_ptr += pvr_cmd_length(CR_PDS_BGRND3_SIZEINFO);
5681 
5682    *(uint64_t *)stream_ptr = regs->isp_mtile_base;
5683    stream_ptr += pvr_cmd_length(CR_ISP_MTILE_BASE);
5684 
5685    STATIC_ASSERT(ARRAY_SIZE(regs->pbe_wordx_mrty) == 9U);
5686    STATIC_ASSERT(sizeof(regs->pbe_wordx_mrty[0]) == sizeof(uint64_t));
5687    memcpy(stream_ptr, regs->pbe_wordx_mrty, sizeof(regs->pbe_wordx_mrty));
5688    stream_ptr += 9U * 2U;
5689 
5690    *stream_ptr = regs->isp_bgobjvals;
5691    stream_ptr += pvr_cmd_length(CR_ISP_BGOBJVALS);
5692 
5693    *stream_ptr = regs->usc_pixel_output_ctrl;
5694    stream_ptr += pvr_cmd_length(CR_USC_PIXEL_OUTPUT_CTRL);
5695 
5696    *stream_ptr = regs->usc_clear_register0;
5697    stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5698 
5699    *stream_ptr = regs->usc_clear_register1;
5700    stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5701 
5702    *stream_ptr = regs->usc_clear_register2;
5703    stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5704 
5705    *stream_ptr = regs->usc_clear_register3;
5706    stream_ptr += pvr_cmd_length(CR_USC_CLEAR_REGISTER);
5707 
5708    *stream_ptr = regs->isp_mtile_size;
5709    stream_ptr += pvr_cmd_length(CR_ISP_MTILE_SIZE);
5710 
5711    *stream_ptr = regs->isp_render_origin;
5712    stream_ptr += pvr_cmd_length(CR_ISP_RENDER_ORIGIN);
5713 
5714    *stream_ptr = regs->isp_ctl;
5715    stream_ptr += pvr_cmd_length(CR_ISP_CTL);
5716 
5717    *stream_ptr = regs->isp_aa;
5718    stream_ptr += pvr_cmd_length(CR_ISP_AA);
5719 
5720    *stream_ptr = regs->event_pixel_pds_info;
5721    stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_INFO);
5722 
5723    *stream_ptr = regs->event_pixel_pds_code;
5724    stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_CODE);
5725 
5726    *stream_ptr = regs->event_pixel_pds_data;
5727    stream_ptr += pvr_cmd_length(CR_EVENT_PIXEL_PDS_DATA);
5728 
5729    *stream_ptr = regs->isp_render;
5730    stream_ptr += pvr_cmd_length(CR_ISP_RENDER);
5731 
5732    *stream_ptr = regs->isp_rgn;
5733    stream_ptr++;
5734 
5735    if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
5736       *stream_ptr = regs->frag_screen;
5737       stream_ptr++;
5738    }
5739 
5740    cmd->fw_stream_len = (uint8_t *)stream_ptr - (uint8_t *)cmd->fw_stream;
5741    assert(cmd->fw_stream_len <= ARRAY_SIZE(cmd->fw_stream));
5742 
5743    pvr_csb_pack ((uint64_t *)stream_len_ptr, KMD_STREAM_HDR, value) {
5744       value.length = cmd->fw_stream_len;
5745    }
5746 }
5747 
5748 static void
pvr_submit_info_flags_init(const struct pvr_device_info * const dev_info,const struct pvr_transfer_prep_data * const prep_data,struct pvr_winsys_transfer_cmd_flags * flags)5749 pvr_submit_info_flags_init(const struct pvr_device_info *const dev_info,
5750                            const struct pvr_transfer_prep_data *const prep_data,
5751                            struct pvr_winsys_transfer_cmd_flags *flags)
5752 {
5753    *flags = prep_data->flags;
5754    flags->use_single_core = PVR_HAS_FEATURE(dev_info, gpu_multicore_support);
5755 }
5756 
pvr_transfer_job_ws_submit_info_init(struct pvr_transfer_ctx * ctx,struct pvr_transfer_submit * submit,struct vk_sync * wait,struct pvr_winsys_transfer_submit_info * submit_info)5757 static void pvr_transfer_job_ws_submit_info_init(
5758    struct pvr_transfer_ctx *ctx,
5759    struct pvr_transfer_submit *submit,
5760    struct vk_sync *wait,
5761    struct pvr_winsys_transfer_submit_info *submit_info)
5762 {
5763    const struct pvr_device *const device = ctx->device;
5764    const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
5765 
5766    submit_info->frame_num = device->global_queue_present_count;
5767    submit_info->job_num = device->global_cmd_buffer_submit_count;
5768    submit_info->wait = wait;
5769    submit_info->cmd_count = submit->prep_count;
5770 
5771    for (uint32_t i = 0U; i < submit->prep_count; i++) {
5772       struct pvr_winsys_transfer_cmd *const cmd = &submit_info->cmds[i];
5773       struct pvr_transfer_prep_data *prep_data = &submit->prep_array[i];
5774 
5775       pvr_submit_info_stream_init(ctx, prep_data, cmd);
5776       pvr_submit_info_flags_init(dev_info, prep_data, &cmd->flags);
5777    }
5778 }
5779 
pvr_submit_transfer(struct pvr_transfer_ctx * ctx,struct pvr_transfer_submit * submit,struct vk_sync * wait,struct vk_sync * signal_sync)5780 static VkResult pvr_submit_transfer(struct pvr_transfer_ctx *ctx,
5781                                     struct pvr_transfer_submit *submit,
5782                                     struct vk_sync *wait,
5783                                     struct vk_sync *signal_sync)
5784 {
5785    struct pvr_winsys_transfer_submit_info submit_info;
5786 
5787    pvr_transfer_job_ws_submit_info_init(ctx, submit, wait, &submit_info);
5788 
5789    return ctx->device->ws->ops->transfer_submit(ctx->ws_ctx,
5790                                                 &submit_info,
5791                                                 &ctx->device->pdevice->dev_info,
5792                                                 signal_sync);
5793 }
5794 
pvr_queue_transfer(struct pvr_transfer_ctx * ctx,struct pvr_transfer_cmd * transfer_cmd,struct vk_sync * wait,struct vk_sync * signal_sync)5795 static VkResult pvr_queue_transfer(struct pvr_transfer_ctx *ctx,
5796                                    struct pvr_transfer_cmd *transfer_cmd,
5797                                    struct vk_sync *wait,
5798                                    struct vk_sync *signal_sync)
5799 {
5800    struct pvr_transfer_prep_data *prep_data = NULL;
5801    struct pvr_transfer_prep_data *prev_prep_data;
5802    struct pvr_transfer_submit submit = { 0U };
5803    bool finished = false;
5804    uint32_t pass = 0U;
5805    VkResult result;
5806 
5807    /* Transfer queue might decide to do a blit in multiple passes. When the
5808     * prepare doesn't set the finished flag this code will keep calling the
5809     * prepare with increasing pass. If queued transfers are submitted from
5810     * here we submit them straight away. That's why we only need a single
5811     * prepare for the blit rather then one for each pass. Otherwise we insert
5812     * each prepare into the prepare array. When the client does blit batching
5813     * and we split the blit into multiple passes each pass in each queued
5814     * transfer adds one more prepare. Thus the prepare array after 2
5815     * pvr_queue_transfer calls might look like:
5816     *
5817     * +------+------++-------+-------+-------+
5818     * |B0/P0 |B0/P1 || B1/P0 | B1/P1 | B1/P2 |
5819     * +------+------++-------+-------+-------+
5820     * F           S/U F                    S/U
5821     *
5822     * Bn/Pm : nth blit (queue transfer call) / mth prepare
5823     * F     : fence point
5824     * S/U   : update / server sync update point
5825     */
5826 
5827    while (!finished) {
5828       prev_prep_data = prep_data;
5829       prep_data = &submit.prep_array[submit.prep_count++];
5830 
5831       /* Clear down the memory before we write to this prep. */
5832       memset(prep_data, 0U, sizeof(*prep_data));
5833 
5834       if (pass == 0U) {
5835          if (!pvr_3d_validate_addr(transfer_cmd))
5836             return vk_error(ctx->device, VK_ERROR_FEATURE_NOT_PRESENT);
5837       } else {
5838          /* Transfer queue workarounds could use more than one pass with 3D
5839           * path.
5840           */
5841          prep_data->state = prev_prep_data->state;
5842       }
5843 
5844       if (transfer_cmd->flags & PVR_TRANSFER_CMD_FLAGS_FAST2D) {
5845          result =
5846             pvr_3d_clip_blit(ctx, transfer_cmd, prep_data, pass, &finished);
5847       } else {
5848          result =
5849             pvr_3d_copy_blit(ctx, transfer_cmd, prep_data, pass, &finished);
5850       }
5851       if (result != VK_SUCCESS)
5852          return result;
5853 
5854       /* Submit if we have finished the blit or if we are out of prepares. */
5855       if (finished || submit.prep_count == ARRAY_SIZE(submit.prep_array)) {
5856          result = pvr_submit_transfer(ctx,
5857                                       &submit,
5858                                       wait,
5859                                       finished ? signal_sync : NULL);
5860          if (result != VK_SUCCESS)
5861             return result;
5862 
5863          /* Check if we need to reset prep_count. */
5864          if (submit.prep_count == ARRAY_SIZE(submit.prep_array))
5865             submit.prep_count = 0U;
5866       }
5867 
5868       pass++;
5869    }
5870 
5871    return VK_SUCCESS;
5872 }
5873 
pvr_transfer_job_submit(struct pvr_transfer_ctx * ctx,struct pvr_sub_cmd_transfer * sub_cmd,struct vk_sync * wait_sync,struct vk_sync * signal_sync)5874 VkResult pvr_transfer_job_submit(struct pvr_transfer_ctx *ctx,
5875                                  struct pvr_sub_cmd_transfer *sub_cmd,
5876                                  struct vk_sync *wait_sync,
5877                                  struct vk_sync *signal_sync)
5878 {
5879    list_for_each_entry_safe (struct pvr_transfer_cmd,
5880                              transfer_cmd,
5881                              sub_cmd->transfer_cmds,
5882                              link) {
5883       /* The fw guarantees that any kick on the same context will be
5884        * synchronized in submission order. This means only the first kick must
5885        * wait, and only the last kick need signal.
5886        */
5887       struct vk_sync *first_cmd_wait_sync = NULL;
5888       struct vk_sync *last_cmd_signal_sync = NULL;
5889       VkResult result;
5890 
5891       if (list_first_entry(sub_cmd->transfer_cmds,
5892                            struct pvr_transfer_cmd,
5893                            link) == transfer_cmd) {
5894          first_cmd_wait_sync = wait_sync;
5895       }
5896 
5897       if (list_last_entry(sub_cmd->transfer_cmds,
5898                           struct pvr_transfer_cmd,
5899                           link) == transfer_cmd) {
5900          last_cmd_signal_sync = signal_sync;
5901       }
5902 
5903       result = pvr_queue_transfer(ctx,
5904                                   transfer_cmd,
5905                                   first_cmd_wait_sync,
5906                                   last_cmd_signal_sync);
5907       if (result != VK_SUCCESS)
5908          return result;
5909    }
5910 
5911    return VK_SUCCESS;
5912 }
5913