1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <stdbool.h>
25 #include <stdint.h>
26
27 #include "hwdef/rogue_hw_defs.h"
28 #include "hwdef/rogue_hw_utils.h"
29 #include "pvr_csb_enum_helpers.h"
30 #include "pvr_device_info.h"
31 #include "pvr_formats.h"
32 #include "pvr_job_common.h"
33 #include "pvr_private.h"
34 #include "util/macros.h"
35 #include "util/u_math.h"
36 #include "vk_alloc.h"
37 #include "vk_format.h"
38 #include "vk_object.h"
39
pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,enum pvr_pbe_gamma default_gamma,bool with_packed_usc_channel,uint32_t * const src_format_out,enum pvr_pbe_gamma * const gamma_out)40 void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,
41 enum pvr_pbe_gamma default_gamma,
42 bool with_packed_usc_channel,
43 uint32_t *const src_format_out,
44 enum pvr_pbe_gamma *const gamma_out)
45 {
46 const struct util_format_description *desc =
47 vk_format_description(vk_format);
48 uint32_t chan_0_width = desc->channel[0].size;
49
50 *gamma_out = default_gamma;
51
52 if (pvr_vk_format_has_32bit_component(vk_format) ||
53 vk_format_is_int(vk_format)) {
54 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
55 } else if (vk_format_is_float(vk_format)) {
56 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
57 } else if (vk_format_is_srgb(vk_format)) {
58 *gamma_out = PVR_PBE_GAMMA_ENABLED;
59
60 /* F16 source for gamma'd formats. */
61 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
62 } else if (vk_format_has_depth(vk_format) &&
63 vk_format_get_component_bits(vk_format,
64 UTIL_FORMAT_COLORSPACE_ZS,
65 0) > 16) {
66 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
67 } else if (vk_format_has_stencil(vk_format) &&
68 vk_format_get_component_bits(vk_format,
69 UTIL_FORMAT_COLORSPACE_ZS,
70 1) > 0) {
71 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
72 } else if (chan_0_width > 16) {
73 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
74 } else if (chan_0_width > 8) {
75 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
76 } else if (!with_packed_usc_channel) {
77 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
78 } else {
79 *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
80 }
81 }
82
pvr_pbe_pack_state(const struct pvr_device_info * dev_info,const struct pvr_pbe_surf_params * surface_params,const struct pvr_pbe_render_params * render_params,uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])83 void pvr_pbe_pack_state(
84 const struct pvr_device_info *dev_info,
85 const struct pvr_pbe_surf_params *surface_params,
86 const struct pvr_pbe_render_params *render_params,
87 uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
88 uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])
89 {
90 /* This function needs updating if the value of
91 * ROGUE_NUM_PBESTATE_STATE_WORDS changes, so check that it's the expected
92 * value.
93 */
94 STATIC_ASSERT(ROGUE_NUM_PBESTATE_STATE_WORDS == 2);
95
96 /* This function needs updating if the value of ROGUE_NUM_PBESTATE_REG_WORDS
97 * changes, so check that it's the expected value.
98 */
99 STATIC_ASSERT(ROGUE_NUM_PBESTATE_REG_WORDS == 3);
100
101 pbe_reg_words[2] = 0;
102
103 if (surface_params->z_only_render) {
104 pbe_cs_words[0] = 0;
105
106 pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) {
107 state.emptytile = true;
108 }
109
110 pbe_reg_words[0] = 0;
111 pbe_reg_words[1] = 0;
112
113 return;
114 }
115
116 pvr_csb_pack (&pbe_cs_words[0], PBESTATE_STATE_WORD0, state) {
117 state.address_low = surface_params->addr;
118 }
119
120 pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) {
121 state.address_high = surface_params->addr;
122
123 state.source_format = surface_params->source_format;
124
125 state.source_pos = pvr_pbestate_source_pos(render_params->source_start);
126 if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) {
127 state.source_pos_offset_128 = render_params->source_start >=
128 PVR_PBE_STARTPOS_BIT128;
129 } else {
130 assert(render_params->source_start < PVR_PBE_STARTPOS_BIT128);
131 }
132
133 /* MRT index (Use 0 for a single render target)/ */
134 state.mrt_index = render_params->mrt_index;
135
136 /* Normalization flag based on output format. */
137 state.norm = surface_params->is_normalized;
138
139 state.packmode = surface_params->pbe_packmode;
140 }
141
142 pvr_csb_pack (&pbe_reg_words[0], PBESTATE_REG_WORD0, reg) {
143 reg.tilerelative = true;
144
145 switch (surface_params->mem_layout) {
146 case PVR_MEMLAYOUT_TWIDDLED:
147 reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_2D);
148 break;
149
150 case PVR_MEMLAYOUT_3DTWIDDLED:
151 reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_3D);
152 break;
153
154 case PVR_MEMLAYOUT_LINEAR:
155 default:
156 reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_LINEAR);
157 break;
158 }
159
160 /* FIXME: Remove rotation and y_flip hardcoding if needed. */
161 reg.rotation = PVRX(PBESTATE_ROTATION_TYPE_0_DEG);
162 reg.y_flip = false;
163
164 /* Note: Due to gamma being overridden above, anything other than
165 * ENABLED/NONE is ignored.
166 */
167 if (surface_params->gamma == PVR_PBE_GAMMA_ENABLED) {
168 reg.gamma = true;
169
170 if (surface_params->nr_components == 2)
171 reg.twocomp_gamma =
172 PVRX(PBESTATE_TWOCOMP_GAMMA_GAMMA_BOTH_CHANNELS);
173 }
174
175 reg.linestride = (surface_params->stride - 1) /
176 PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE);
177 reg.minclip_x = render_params->min_x_clip;
178
179 /* r, y or depth*/
180 switch (surface_params->swizzle[0]) {
181 case PIPE_SWIZZLE_X:
182 reg.swiz_chan0 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0;
183 break;
184 case PIPE_SWIZZLE_Y:
185 reg.swiz_chan1 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0;
186 break;
187 case PIPE_SWIZZLE_Z:
188 reg.swiz_chan2 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0;
189 break;
190 case PIPE_SWIZZLE_W:
191 reg.swiz_chan3 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0;
192 break;
193 case PIPE_SWIZZLE_0:
194 case PIPE_SWIZZLE_NONE:
195 reg.swiz_chan0 = ROGUE_PBESTATE_SWIZ_ZERO;
196 break;
197 case PIPE_SWIZZLE_1:
198 reg.swiz_chan0 = ROGUE_PBESTATE_SWIZ_ONE;
199 break;
200 default:
201 unreachable("Unknown enum pipe_swizzle");
202 break;
203 }
204 /* g, u or stencil*/
205 switch (surface_params->swizzle[1]) {
206 case PIPE_SWIZZLE_X:
207 reg.swiz_chan0 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1;
208 break;
209 case PIPE_SWIZZLE_Y:
210 reg.swiz_chan1 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1;
211 break;
212 case PIPE_SWIZZLE_Z:
213 reg.swiz_chan2 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1;
214 break;
215 case PIPE_SWIZZLE_W:
216 reg.swiz_chan3 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1;
217 break;
218 case PIPE_SWIZZLE_0:
219 case PIPE_SWIZZLE_NONE:
220 reg.swiz_chan1 = ROGUE_PBESTATE_SWIZ_ZERO;
221 break;
222 case PIPE_SWIZZLE_1:
223 reg.swiz_chan1 = ROGUE_PBESTATE_SWIZ_ONE;
224 break;
225 default:
226 unreachable("Unknown enum pipe_swizzle");
227 break;
228 }
229 /* b or v*/
230 switch (surface_params->swizzle[2]) {
231 case PIPE_SWIZZLE_X:
232 reg.swiz_chan0 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2;
233 break;
234 case PIPE_SWIZZLE_Y:
235 reg.swiz_chan1 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2;
236 break;
237 case PIPE_SWIZZLE_Z:
238 reg.swiz_chan2 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2;
239 break;
240 case PIPE_SWIZZLE_W:
241 reg.swiz_chan3 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2;
242 break;
243 case PIPE_SWIZZLE_0:
244 case PIPE_SWIZZLE_NONE:
245 reg.swiz_chan2 = ROGUE_PBESTATE_SWIZ_ZERO;
246 break;
247 case PIPE_SWIZZLE_1:
248 reg.swiz_chan2 = ROGUE_PBESTATE_SWIZ_ONE;
249 break;
250 default:
251 unreachable("Unknown enum pipe_swizzle");
252 break;
253 }
254 /* a */
255 switch (surface_params->swizzle[3]) {
256 case PIPE_SWIZZLE_X:
257 reg.swiz_chan0 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3;
258 break;
259 case PIPE_SWIZZLE_Y:
260 reg.swiz_chan1 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3;
261 break;
262 case PIPE_SWIZZLE_Z:
263 reg.swiz_chan2 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3;
264 break;
265 case PIPE_SWIZZLE_W:
266 reg.swiz_chan3 = ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3;
267 break;
268 case PIPE_SWIZZLE_0:
269 case PIPE_SWIZZLE_NONE:
270 reg.swiz_chan3 = ROGUE_PBESTATE_SWIZ_ZERO;
271 break;
272 case PIPE_SWIZZLE_1:
273 reg.swiz_chan3 = ROGUE_PBESTATE_SWIZ_ONE;
274 break;
275 default:
276 unreachable("Unknown enum pipe_swizzle");
277 break;
278 }
279
280 if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
281 reg.size_z = util_logbase2_ceil(surface_params->depth);
282
283 reg.downscale = surface_params->down_scale;
284 }
285
286 pvr_csb_pack (&pbe_reg_words[1], PBESTATE_REG_WORD1, reg) {
287 if (surface_params->mem_layout == PVR_MEMLAYOUT_TWIDDLED ||
288 surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
289 reg.size_x = util_logbase2_ceil(surface_params->width);
290 reg.size_y = util_logbase2_ceil(surface_params->height);
291 }
292
293 reg.minclip_y = render_params->min_y_clip;
294 reg.maxclip_x = render_params->max_x_clip;
295 reg.zslice = render_params->slice;
296 reg.maxclip_y = render_params->max_y_clip;
297 }
298 }
299
300 /* TODO: Split this into smaller functions to make it easier to follow. When
301 * doing this, it would be nice to have a function that returns
302 * total_tiles_in_flight so that CR_ISP_CTL can be fully packed in
303 * pvr_render_job_ws_fragment_state_init().
304 */
pvr_setup_tiles_in_flight(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info,uint32_t msaa_mode,uint32_t pixel_width,bool paired_tiles,uint32_t max_tiles_in_flight,uint32_t * const isp_ctl_out,uint32_t * const pixel_ctl_out)305 void pvr_setup_tiles_in_flight(
306 const struct pvr_device_info *dev_info,
307 const struct pvr_device_runtime_info *dev_runtime_info,
308 uint32_t msaa_mode,
309 uint32_t pixel_width,
310 bool paired_tiles,
311 uint32_t max_tiles_in_flight,
312 uint32_t *const isp_ctl_out,
313 uint32_t *const pixel_ctl_out)
314 {
315 uint32_t total_tiles_in_flight = 0;
316 uint32_t usable_partition_size;
317 uint32_t partitions_available;
318 uint32_t usc_min_output_regs;
319 uint32_t max_partitions;
320 uint32_t partition_size;
321 uint32_t max_phantoms;
322 uint32_t tile_size_x;
323 uint32_t tile_size_y;
324 uint32_t isp_samples;
325
326 /* Round up the pixel width to the next allocation granularity. */
327 usc_min_output_regs =
328 PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 0);
329 pixel_width = MAX2(pixel_width, usc_min_output_regs);
330 pixel_width = util_next_power_of_two(pixel_width);
331
332 assert(pixel_width <= rogue_get_max_output_regs_per_pixel(dev_info));
333
334 partition_size = pixel_width;
335
336 isp_samples = PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1);
337 if (isp_samples == 2) {
338 if (msaa_mode != PVRX(CR_ISP_AA_MODE_TYPE_AA_NONE))
339 partition_size *= 2U;
340 } else if (isp_samples == 4) {
341 if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_4X) ||
342 msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_8X))
343 partition_size *= 4U;
344 else if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_2X))
345 partition_size *= 2U;
346 }
347
348 /* Cores with a tile size of 16x16 don't have quadrant affinity. Hence the
349 * partition size is the same as for a 32x32 tile quadrant (with no MSAA).
350 * When MSAA is enabled, the USC has to process half the tile (16x8 pixels).
351 */
352 tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0);
353 tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0);
354
355 /* We only support square tiles. */
356 assert(tile_size_x == tile_size_y);
357
358 if (tile_size_x == 16U) {
359 /* Cores with 16x16 tiles does not use tile quadrants. */
360 partition_size *= tile_size_x * tile_size_y;
361 } else {
362 /* Size of a tile quadrant (in dwords). */
363 partition_size *= (tile_size_x * tile_size_y / 4U);
364 }
365
366 /* Maximum available partition space for partitions of this size. */
367 max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
368 usable_partition_size = MIN2(dev_runtime_info->total_reserved_partition_size,
369 partition_size * max_partitions);
370
371 if (PVR_GET_FEATURE_VALUE(dev_info, common_store_size_in_dwords, 0) <
372 (1024 * 4 * 4)) {
373 /* Do not apply the limit for cores with 16x16 tile size (no quadrant
374 * affinity). */
375 if (tile_size_x != 16) {
376 /* This is to counter the extremely limited CS size on some cores.
377 */
378 /* Available partition space is limited to 8 tile quadrants. */
379 usable_partition_size =
380 MIN2((tile_size_x * tile_size_y / 4U) * 8U, usable_partition_size);
381 }
382 }
383
384 /* Ensure that maximum number of partitions in use is not greater
385 * than the total number of partitions available.
386 */
387 partitions_available =
388 MIN2(max_partitions, usable_partition_size / partition_size);
389
390 if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
391 max_phantoms = dev_runtime_info->num_phantoms;
392 else if (PVR_HAS_FEATURE(dev_info, roguexe))
393 max_phantoms = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0);
394 else
395 max_phantoms = 1;
396
397 for (uint32_t i = 0; i < max_phantoms; i++) {
398 uint32_t usc_tiles_in_flight = partitions_available;
399 uint32_t isp_tiles_in_flight;
400
401 /* Cores with tiles size other than 16x16 use tile quadrants. */
402 if (tile_size_x != 16) {
403 uint32_t num_clusters =
404 PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0U);
405 usc_tiles_in_flight =
406 (usc_tiles_in_flight * MIN2(4U, num_clusters - (4U * i))) / 4U;
407 }
408
409 assert(usc_tiles_in_flight > 0);
410
411 isp_tiles_in_flight =
412 PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0);
413 /* Ensure that maximum number of ISP tiles in flight is not greater
414 * than the maximum number of USC tiles in flight.
415 */
416 if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
417 PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) !=
418 2) {
419 isp_tiles_in_flight /= dev_runtime_info->num_phantoms;
420 }
421
422 isp_tiles_in_flight = MIN2(usc_tiles_in_flight, isp_tiles_in_flight);
423
424 /* Limit the number of tiles in flight if the shaders have
425 * requested a large allocation of local memory.
426 */
427 if (max_tiles_in_flight > 0U) {
428 isp_tiles_in_flight = MIN2(usc_tiles_in_flight, max_tiles_in_flight);
429
430 if (PVR_HAS_FEATURE(dev_info, roguexe)) {
431 if (tile_size_x == 16) {
432 /* The FW infers the tiles in flight value from the
433 * partitions setting.
434 */
435 /* Partitions per tile. */
436 partitions_available = isp_tiles_in_flight;
437 } else {
438 /* Partitions per tile quadrant. */
439 partitions_available = isp_tiles_in_flight * 4U;
440 }
441 }
442 }
443
444 /* Due to limitations of ISP_CTL_PIPE there can only be a difference of
445 * 1 between Phantoms.
446 */
447 if (total_tiles_in_flight > (isp_tiles_in_flight + 1U))
448 total_tiles_in_flight = isp_tiles_in_flight + 1U;
449
450 total_tiles_in_flight += isp_tiles_in_flight;
451 }
452
453 if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
454 PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) ==
455 2) {
456 /* Limit the ISP tiles in flight to fit into the available USC partition
457 * store.
458 */
459 total_tiles_in_flight = MIN2(total_tiles_in_flight, partitions_available);
460 }
461
462 if (PVR_HAS_FEATURE(dev_info, paired_tiles) && paired_tiles) {
463 total_tiles_in_flight =
464 MIN2(total_tiles_in_flight, partitions_available / 2);
465 }
466
467 pvr_csb_pack (pixel_ctl_out, CR_USC_PIXEL_OUTPUT_CTRL, reg) {
468 if (pixel_width == 1 && usc_min_output_regs == 1) {
469 reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER);
470 } else if (pixel_width == 2) {
471 reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS);
472 } else if (pixel_width == 4) {
473 reg.width = PVRX(CR_PIXEL_WIDTH_4REGISTERS);
474 } else if (pixel_width == 8 &&
475 PVR_HAS_FEATURE(dev_info, eight_output_registers)) {
476 reg.width = PVRX(CR_PIXEL_WIDTH_8REGISTERS);
477 } else if (usc_min_output_regs == 1) {
478 reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER);
479 } else {
480 reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS);
481 }
482
483 if (PVR_HAS_FEATURE(dev_info, usc_pixel_partition_mask)) {
484 /* Setup the partition mask based on the maximum number of
485 * partitions available.
486 */
487 reg.partition_mask = (1 << max_partitions) - 1;
488 } else {
489 reg.enable_4th_partition = true;
490
491 /* Setup the partition mask based on the number of partitions
492 * available.
493 */
494 reg.partition_mask = (1U << partitions_available) - 1U;
495 }
496 }
497
498 pvr_csb_pack (isp_ctl_out, CR_ISP_CTL, reg) {
499 if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
500 reg.pipe_enable = (2 * total_tiles_in_flight) - 1;
501 else
502 reg.pipe_enable = total_tiles_in_flight - 1;
503 }
504 }
505