xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pvr_job_common.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <stdbool.h>
25 #include <stdint.h>
26 
27 #include "hwdef/rogue_hw_defs.h"
28 #include "hwdef/rogue_hw_utils.h"
29 #include "pvr_csb_enum_helpers.h"
30 #include "pvr_device_info.h"
31 #include "pvr_formats.h"
32 #include "pvr_job_common.h"
33 #include "pvr_private.h"
34 #include "util/macros.h"
35 #include "util/u_math.h"
36 #include "vk_alloc.h"
37 #include "vk_format.h"
38 #include "vk_object.h"
39 
pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,enum pvr_pbe_gamma default_gamma,bool with_packed_usc_channel,uint32_t * const src_format_out,enum pvr_pbe_gamma * const gamma_out)40 void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,
41                                       enum pvr_pbe_gamma default_gamma,
42                                       bool with_packed_usc_channel,
43                                       uint32_t *const src_format_out,
44                                       enum pvr_pbe_gamma *const gamma_out)
45 {
46    const struct util_format_description *desc =
47       vk_format_description(vk_format);
48    uint32_t chan_0_width = desc->channel[0].size;
49 
50    *gamma_out = default_gamma;
51 
52    if (pvr_vk_format_has_32bit_component(vk_format) ||
53        vk_format_is_int(vk_format)) {
54       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
55    } else if (vk_format_is_float(vk_format)) {
56       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
57    } else if (vk_format_is_srgb(vk_format)) {
58       *gamma_out = PVR_PBE_GAMMA_ENABLED;
59 
60       /* F16 source for gamma'd formats. */
61       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
62    } else if (vk_format_has_depth(vk_format) &&
63               vk_format_get_component_bits(vk_format,
64                                            UTIL_FORMAT_COLORSPACE_ZS,
65                                            0) > 16) {
66       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
67    } else if (vk_format_has_stencil(vk_format) &&
68               vk_format_get_component_bits(vk_format,
69                                            UTIL_FORMAT_COLORSPACE_ZS,
70                                            1) > 0) {
71       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
72    } else if (chan_0_width > 16) {
73       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
74    } else if (chan_0_width > 8) {
75       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
76    } else if (!with_packed_usc_channel) {
77       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
78    } else {
79       *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
80    }
81 }
82 
pvr_pbe_pack_state(const struct pvr_device_info * dev_info,const struct pvr_pbe_surf_params * surface_params,const struct pvr_pbe_render_params * render_params,uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])83 void pvr_pbe_pack_state(
84    const struct pvr_device_info *dev_info,
85    const struct pvr_pbe_surf_params *surface_params,
86    const struct pvr_pbe_render_params *render_params,
87    uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
88    uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])
89 {
90    /* This function needs updating if the value of
91     * ROGUE_NUM_PBESTATE_STATE_WORDS changes, so check that it's the expected
92     * value.
93     */
94    STATIC_ASSERT(ROGUE_NUM_PBESTATE_STATE_WORDS == 2);
95 
96    /* This function needs updating if the value of ROGUE_NUM_PBESTATE_REG_WORDS
97     * changes, so check that it's the expected value.
98     */
99    STATIC_ASSERT(ROGUE_NUM_PBESTATE_REG_WORDS == 3);
100 
101    pbe_reg_words[2] = 0;
102 
103    if (surface_params->z_only_render) {
104       pbe_cs_words[0] = 0;
105 
106       pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) {
107          state.emptytile = true;
108       }
109 
110       pbe_reg_words[0] = 0;
111       pbe_reg_words[1] = 0;
112 
113       return;
114    }
115 
116    pvr_csb_pack (&pbe_cs_words[0], PBESTATE_STATE_WORD0, state) {
117       state.address_low = surface_params->addr;
118    }
119 
120    pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) {
121       state.address_high = surface_params->addr;
122 
123       state.source_format = surface_params->source_format;
124 
125       state.source_pos = pvr_pbestate_source_pos(render_params->source_start);
126       if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) {
127          state.source_pos_offset_128 = render_params->source_start >=
128                                        PVR_PBE_STARTPOS_BIT128;
129       } else {
130          assert(render_params->source_start < PVR_PBE_STARTPOS_BIT128);
131       }
132 
133       /* MRT index (Use 0 for a single render target)/ */
134       state.mrt_index = render_params->mrt_index;
135 
136       /* Normalization flag based on output format. */
137       state.norm = surface_params->is_normalized;
138 
139       state.packmode = surface_params->pbe_packmode;
140    }
141 
142    pvr_csb_pack (&pbe_reg_words[0], PBESTATE_REG_WORD0, reg) {
143       reg.tilerelative = true;
144 
145       switch (surface_params->mem_layout) {
146       case PVR_MEMLAYOUT_TWIDDLED:
147          reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_2D);
148          break;
149 
150       case PVR_MEMLAYOUT_3DTWIDDLED:
151          reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_3D);
152          break;
153 
154       case PVR_MEMLAYOUT_LINEAR:
155       default:
156          reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_LINEAR);
157          break;
158       }
159 
160       /* FIXME: Remove rotation and y_flip hardcoding if needed. */
161       reg.rotation = PVRX(PBESTATE_ROTATION_TYPE_0_DEG);
162       reg.y_flip = false;
163 
164       /* Note: Due to gamma being overridden above, anything other than
165        * ENABLED/NONE is ignored.
166        */
167       if (surface_params->gamma == PVR_PBE_GAMMA_ENABLED) {
168          reg.gamma = true;
169 
170          if (surface_params->nr_components == 2)
171             reg.twocomp_gamma =
172                PVRX(PBESTATE_TWOCOMP_GAMMA_GAMMA_BOTH_CHANNELS);
173       }
174 
175       reg.linestride = (surface_params->stride - 1) /
176                        PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE);
177       reg.minclip_x = render_params->min_x_clip;
178 
179       /* r, y or depth*/
180       switch (surface_params->swizzle[0]) {
181       case PIPE_SWIZZLE_X:
182          reg.swiz_chan0 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0;
183          break;
184       case PIPE_SWIZZLE_Y:
185          reg.swiz_chan1 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0;
186          break;
187       case PIPE_SWIZZLE_Z:
188          reg.swiz_chan2 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0;
189          break;
190       case PIPE_SWIZZLE_W:
191          reg.swiz_chan3 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0;
192          break;
193       case PIPE_SWIZZLE_0:
194       case PIPE_SWIZZLE_NONE:
195          reg.swiz_chan0 = ROGUE_PBESTATE_SWIZ_ZERO;
196          break;
197       case PIPE_SWIZZLE_1:
198          reg.swiz_chan0 = ROGUE_PBESTATE_SWIZ_ONE;
199          break;
200       default:
201          unreachable("Unknown enum pipe_swizzle");
202          break;
203       }
204       /* g, u or stencil*/
205       switch (surface_params->swizzle[1]) {
206       case PIPE_SWIZZLE_X:
207          reg.swiz_chan0 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1;
208          break;
209       case PIPE_SWIZZLE_Y:
210          reg.swiz_chan1 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1;
211          break;
212       case PIPE_SWIZZLE_Z:
213          reg.swiz_chan2 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1;
214          break;
215       case PIPE_SWIZZLE_W:
216          reg.swiz_chan3 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1;
217          break;
218       case PIPE_SWIZZLE_0:
219       case PIPE_SWIZZLE_NONE:
220          reg.swiz_chan1 = ROGUE_PBESTATE_SWIZ_ZERO;
221          break;
222       case PIPE_SWIZZLE_1:
223          reg.swiz_chan1 = ROGUE_PBESTATE_SWIZ_ONE;
224          break;
225       default:
226          unreachable("Unknown enum pipe_swizzle");
227          break;
228       }
229       /* b or v*/
230       switch (surface_params->swizzle[2]) {
231       case PIPE_SWIZZLE_X:
232          reg.swiz_chan0 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2;
233          break;
234       case PIPE_SWIZZLE_Y:
235          reg.swiz_chan1 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2;
236          break;
237       case PIPE_SWIZZLE_Z:
238          reg.swiz_chan2 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2;
239          break;
240       case PIPE_SWIZZLE_W:
241          reg.swiz_chan3 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2;
242          break;
243       case PIPE_SWIZZLE_0:
244       case PIPE_SWIZZLE_NONE:
245          reg.swiz_chan2 = ROGUE_PBESTATE_SWIZ_ZERO;
246          break;
247       case PIPE_SWIZZLE_1:
248          reg.swiz_chan2 = ROGUE_PBESTATE_SWIZ_ONE;
249          break;
250       default:
251          unreachable("Unknown enum pipe_swizzle");
252          break;
253       }
254       /* a */
255       switch (surface_params->swizzle[3]) {
256       case PIPE_SWIZZLE_X:
257          reg.swiz_chan0 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3;
258          break;
259       case PIPE_SWIZZLE_Y:
260          reg.swiz_chan1 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3;
261          break;
262       case PIPE_SWIZZLE_Z:
263          reg.swiz_chan2 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3;
264          break;
265       case PIPE_SWIZZLE_W:
266          reg.swiz_chan3 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3;
267          break;
268       case PIPE_SWIZZLE_0:
269       case PIPE_SWIZZLE_NONE:
270          reg.swiz_chan3 = ROGUE_PBESTATE_SWIZ_ZERO;
271          break;
272       case PIPE_SWIZZLE_1:
273          reg.swiz_chan3 = ROGUE_PBESTATE_SWIZ_ONE;
274          break;
275       default:
276          unreachable("Unknown enum pipe_swizzle");
277          break;
278       }
279 
280       if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
281          reg.size_z = util_logbase2_ceil(surface_params->depth);
282 
283       reg.downscale = surface_params->down_scale;
284    }
285 
286    pvr_csb_pack (&pbe_reg_words[1], PBESTATE_REG_WORD1, reg) {
287       if (surface_params->mem_layout == PVR_MEMLAYOUT_TWIDDLED ||
288           surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
289          reg.size_x = util_logbase2_ceil(surface_params->width);
290          reg.size_y = util_logbase2_ceil(surface_params->height);
291       }
292 
293       reg.minclip_y = render_params->min_y_clip;
294       reg.maxclip_x = render_params->max_x_clip;
295       reg.zslice = render_params->slice;
296       reg.maxclip_y = render_params->max_y_clip;
297    }
298 }
299 
300 /* TODO: Split this into smaller functions to make it easier to follow. When
301  * doing this, it would be nice to have a function that returns
302  * total_tiles_in_flight so that CR_ISP_CTL can be fully packed in
303  * pvr_render_job_ws_fragment_state_init().
304  */
pvr_setup_tiles_in_flight(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info,uint32_t msaa_mode,uint32_t pixel_width,bool paired_tiles,uint32_t max_tiles_in_flight,uint32_t * const isp_ctl_out,uint32_t * const pixel_ctl_out)305 void pvr_setup_tiles_in_flight(
306    const struct pvr_device_info *dev_info,
307    const struct pvr_device_runtime_info *dev_runtime_info,
308    uint32_t msaa_mode,
309    uint32_t pixel_width,
310    bool paired_tiles,
311    uint32_t max_tiles_in_flight,
312    uint32_t *const isp_ctl_out,
313    uint32_t *const pixel_ctl_out)
314 {
315    uint32_t total_tiles_in_flight = 0;
316    uint32_t usable_partition_size;
317    uint32_t partitions_available;
318    uint32_t usc_min_output_regs;
319    uint32_t max_partitions;
320    uint32_t partition_size;
321    uint32_t max_phantoms;
322    uint32_t tile_size_x;
323    uint32_t tile_size_y;
324    uint32_t isp_samples;
325 
326    /* Round up the pixel width to the next allocation granularity. */
327    usc_min_output_regs =
328       PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 0);
329    pixel_width = MAX2(pixel_width, usc_min_output_regs);
330    pixel_width = util_next_power_of_two(pixel_width);
331 
332    assert(pixel_width <= rogue_get_max_output_regs_per_pixel(dev_info));
333 
334    partition_size = pixel_width;
335 
336    isp_samples = PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1);
337    if (isp_samples == 2) {
338       if (msaa_mode != PVRX(CR_ISP_AA_MODE_TYPE_AA_NONE))
339          partition_size *= 2U;
340    } else if (isp_samples == 4) {
341       if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_4X) ||
342           msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_8X))
343          partition_size *= 4U;
344       else if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_2X))
345          partition_size *= 2U;
346    }
347 
348    /* Cores with a tile size of 16x16 don't have quadrant affinity. Hence the
349     * partition size is the same as for a 32x32 tile quadrant (with no MSAA).
350     * When MSAA is enabled, the USC has to process half the tile (16x8 pixels).
351     */
352    tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0);
353    tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0);
354 
355    /* We only support square tiles. */
356    assert(tile_size_x == tile_size_y);
357 
358    if (tile_size_x == 16U) {
359       /* Cores with 16x16 tiles does not use tile quadrants. */
360       partition_size *= tile_size_x * tile_size_y;
361    } else {
362       /* Size of a tile quadrant (in dwords). */
363       partition_size *= (tile_size_x * tile_size_y / 4U);
364    }
365 
366    /* Maximum available partition space for partitions of this size. */
367    max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
368    usable_partition_size = MIN2(dev_runtime_info->total_reserved_partition_size,
369                                 partition_size * max_partitions);
370 
371    if (PVR_GET_FEATURE_VALUE(dev_info, common_store_size_in_dwords, 0) <
372        (1024 * 4 * 4)) {
373       /* Do not apply the limit for cores with 16x16 tile size (no quadrant
374        * affinity). */
375       if (tile_size_x != 16) {
376          /* This is to counter the extremely limited CS size on some cores.
377           */
378          /* Available partition space is limited to 8 tile quadrants. */
379          usable_partition_size =
380             MIN2((tile_size_x * tile_size_y / 4U) * 8U, usable_partition_size);
381       }
382    }
383 
384    /* Ensure that maximum number of partitions in use is not greater
385     * than the total number of partitions available.
386     */
387    partitions_available =
388       MIN2(max_partitions, usable_partition_size / partition_size);
389 
390    if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
391       max_phantoms = dev_runtime_info->num_phantoms;
392    else if (PVR_HAS_FEATURE(dev_info, roguexe))
393       max_phantoms = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0);
394    else
395       max_phantoms = 1;
396 
397    for (uint32_t i = 0; i < max_phantoms; i++) {
398       uint32_t usc_tiles_in_flight = partitions_available;
399       uint32_t isp_tiles_in_flight;
400 
401       /* Cores with tiles size other than 16x16 use tile quadrants. */
402       if (tile_size_x != 16) {
403          uint32_t num_clusters =
404             PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0U);
405          usc_tiles_in_flight =
406             (usc_tiles_in_flight * MIN2(4U, num_clusters - (4U * i))) / 4U;
407       }
408 
409       assert(usc_tiles_in_flight > 0);
410 
411       isp_tiles_in_flight =
412          PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0);
413       /* Ensure that maximum number of ISP tiles in flight is not greater
414        * than the maximum number of USC tiles in flight.
415        */
416       if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
417           PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) !=
418              2) {
419          isp_tiles_in_flight /= dev_runtime_info->num_phantoms;
420       }
421 
422       isp_tiles_in_flight = MIN2(usc_tiles_in_flight, isp_tiles_in_flight);
423 
424       /* Limit the number of tiles in flight if the shaders have
425        * requested a large allocation of local memory.
426        */
427       if (max_tiles_in_flight > 0U) {
428          isp_tiles_in_flight = MIN2(usc_tiles_in_flight, max_tiles_in_flight);
429 
430          if (PVR_HAS_FEATURE(dev_info, roguexe)) {
431             if (tile_size_x == 16) {
432                /* The FW infers the tiles in flight value from the
433                 * partitions setting.
434                 */
435                /* Partitions per tile. */
436                partitions_available = isp_tiles_in_flight;
437             } else {
438                /* Partitions per tile quadrant. */
439                partitions_available = isp_tiles_in_flight * 4U;
440             }
441          }
442       }
443 
444       /* Due to limitations of ISP_CTL_PIPE there can only be a difference of
445        * 1 between Phantoms.
446        */
447       if (total_tiles_in_flight > (isp_tiles_in_flight + 1U))
448          total_tiles_in_flight = isp_tiles_in_flight + 1U;
449 
450       total_tiles_in_flight += isp_tiles_in_flight;
451    }
452 
453    if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
454        PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) ==
455           2) {
456       /* Limit the ISP tiles in flight to fit into the available USC partition
457        * store.
458        */
459       total_tiles_in_flight = MIN2(total_tiles_in_flight, partitions_available);
460    }
461 
462    if (PVR_HAS_FEATURE(dev_info, paired_tiles) && paired_tiles) {
463       total_tiles_in_flight =
464          MIN2(total_tiles_in_flight, partitions_available / 2);
465    }
466 
467    pvr_csb_pack (pixel_ctl_out, CR_USC_PIXEL_OUTPUT_CTRL, reg) {
468       if (pixel_width == 1 && usc_min_output_regs == 1) {
469          reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER);
470       } else if (pixel_width == 2) {
471          reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS);
472       } else if (pixel_width == 4) {
473          reg.width = PVRX(CR_PIXEL_WIDTH_4REGISTERS);
474       } else if (pixel_width == 8 &&
475                  PVR_HAS_FEATURE(dev_info, eight_output_registers)) {
476          reg.width = PVRX(CR_PIXEL_WIDTH_8REGISTERS);
477       } else if (usc_min_output_regs == 1) {
478          reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER);
479       } else {
480          reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS);
481       }
482 
483       if (PVR_HAS_FEATURE(dev_info, usc_pixel_partition_mask)) {
484          /* Setup the partition mask based on the maximum number of
485           * partitions available.
486           */
487          reg.partition_mask = (1 << max_partitions) - 1;
488       } else {
489          reg.enable_4th_partition = true;
490 
491          /* Setup the partition mask based on the number of partitions
492           * available.
493           */
494          reg.partition_mask = (1U << partitions_available) - 1U;
495       }
496    }
497 
498    pvr_csb_pack (isp_ctl_out, CR_ISP_CTL, reg) {
499       if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
500          reg.pipe_enable = (2 * total_tiles_in_flight) - 1;
501       else
502          reg.pipe_enable = total_tiles_in_flight - 1;
503    }
504 }
505