xref: /aosp_15_r20/external/mesa3d/src/amd/vpelib/src/chip/vpe10/vpe10_dpp.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* Copyright 2022 Advanced Micro Devices, Inc.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a
4  * copy of this software and associated documentation files (the "Software"),
5  * to deal in the Software without restriction, including without limitation
6  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7  * and/or sell copies of the Software, and to permit persons to whom the
8  * Software is furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
17  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19  * OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * Authors: AMD
22  *
23  */
24 
25 #include <string.h>
26 #include <math.h>
27 #include "common.h"
28 #include "vpe_priv.h"
29 #include "vpe10_dpp.h"
30 #include "color.h"
31 #include "vpe10/inc/vpe10_cm_common.h"
32 #include "hw_shared.h"
33 #include "reg_helper.h"
34 
35 #define CTX_BASE dpp
36 #define CTX      vpe10_dpp
37 
38 static struct dpp_funcs vpe10_dpp_funcs = {
39 
40     // cnv
41     .program_cnv            = vpe10_dpp_program_cnv,
42     .program_pre_dgam       = vpe10_dpp_cnv_program_pre_dgam,
43     .program_cnv_bias_scale = vpe10_dpp_program_cnv_bias_scale,
44     .program_alpha_keyer    = vpe10_dpp_cnv_program_alpha_keyer,
45     .program_crc            = vpe10_dpp_program_crc,
46 
47     // cm
48     .program_input_transfer_func = vpe10_dpp_program_input_transfer_func,
49     .program_gamut_remap         = vpe10_dpp_program_gamut_remap,
50     .program_post_csc            = vpe10_dpp_program_post_csc,
51     .set_hdr_multiplier          = vpe10_dpp_set_hdr_multiplier,
52 
53     // scaler
54     .get_optimal_number_of_taps  = vpe10_dpp_get_optimal_number_of_taps,
55     .dscl_calc_lb_num_partitions = vpe10_dscl_calc_lb_num_partitions,
56     .set_segment_scaler          = vpe10_dpp_set_segment_scaler,
57     .set_frame_scaler            = vpe10_dpp_set_frame_scaler,
58     .get_line_buffer_size        = vpe10_get_line_buffer_size,
59     .validate_number_of_taps     = vpe10_dpp_validate_number_of_taps,
60 };
61 
vpe10_construct_dpp(struct vpe_priv * vpe_priv,struct dpp * dpp)62 void vpe10_construct_dpp(struct vpe_priv *vpe_priv, struct dpp *dpp)
63 {
64     dpp->vpe_priv = vpe_priv;
65     dpp->funcs    = &vpe10_dpp_funcs;
66 }
67 
vpe10_dpp_get_optimal_number_of_taps(struct vpe_rect * src_rect,struct vpe_rect * dst_rect,struct vpe_scaling_taps * taps)68 bool vpe10_dpp_get_optimal_number_of_taps(
69     struct vpe_rect *src_rect, struct vpe_rect *dst_rect, struct vpe_scaling_taps *taps)
70 {
71     double   h_ratio = 1.0, v_ratio = 1.0;
72     uint32_t h_taps = 1, v_taps = 1;
73     if (taps->h_taps > 8 || taps->v_taps > 8 || taps->h_taps_c > 8 || taps->v_taps_c > 8)
74         return false;
75 
76     /*
77      * if calculated taps are greater than 8, it means the downscaling ratio is greater than 4:1,
78      * and since the given taps are used by default, if the given taps are less than the
79      * calculated ones, the image quality will not be good, so vpelib would reject this case.
80      */
81 
82     // Horizontal taps
83 
84     h_ratio = (double)src_rect->width / (double)dst_rect->width;
85 
86     if (src_rect->width == dst_rect->width) {
87         h_taps = 1;
88     } else if (h_ratio > 1) {
89         h_taps = (uint32_t)max(4, ceil(h_ratio * 2.0));
90     } else {
91         h_taps = 4;
92     }
93 
94     if (h_taps != 1) {
95         h_taps += h_taps % 2;
96     }
97 
98     if (taps->h_taps == 0 && h_taps <= 8) {
99         taps->h_taps = h_taps;
100     } else if (taps->h_taps < h_taps || h_taps > 8) {
101         return false;
102     }
103 
104     // Vertical taps
105     v_ratio = (double)src_rect->height / (double)dst_rect->height;
106 
107     if (src_rect->height == dst_rect->height) {
108         v_taps = 1;
109     } else if (v_ratio > 1) {
110         v_taps = (uint32_t)max(4, ceil(v_ratio * 2.0));
111     } else {
112         v_taps = 4;
113     }
114 
115     if (v_taps != 1) {
116         v_taps += v_taps % 2;
117     }
118 
119     if (taps->v_taps == 0 && v_taps <= 8) {
120         taps->v_taps = v_taps;
121     } else if (taps->v_taps < v_taps || v_taps > 8) {
122         return false;
123     }
124 
125     // Chroma taps
126     if (taps->h_taps_c == 0) {
127         taps->h_taps_c = 2;
128     }
129 
130     if (taps->v_taps_c == 0) {
131         taps->v_taps_c = 2;
132     }
133 
134     return true;
135 }
136 
vpe10_dscl_calc_lb_num_partitions(const struct scaler_data * scl_data,enum lb_memory_config lb_config,uint32_t * num_part_y,uint32_t * num_part_c)137 void vpe10_dscl_calc_lb_num_partitions(const struct scaler_data *scl_data,
138     enum lb_memory_config lb_config, uint32_t *num_part_y, uint32_t *num_part_c)
139 {
140     uint32_t memory_line_size_y, memory_line_size_c, memory_line_size_a, lb_memory_size,
141         lb_memory_size_c, lb_memory_size_a, num_partitions_a;
142 
143     uint32_t line_size   = scl_data->viewport.width < scl_data->recout.width
144                                ? scl_data->viewport.width
145                                : scl_data->recout.width;
146     uint32_t line_size_c = scl_data->viewport_c.width < scl_data->recout.width
147                                ? scl_data->viewport_c.width
148                                : scl_data->recout.width;
149 
150     if (line_size == 0)
151         line_size = 1;
152 
153     if (line_size_c == 0)
154         line_size_c = 1;
155 
156     memory_line_size_y = (line_size + 5) / 6;   /* +5 to ceil */
157     memory_line_size_c = (line_size_c + 5) / 6; /* +5 to ceil */
158     memory_line_size_a = (line_size + 5) / 6;   /* +5 to ceil */
159 
160     // only has 1-piece lb config in vpe1
161     lb_memory_size   = 696;
162     lb_memory_size_c = 696;
163     lb_memory_size_a = 696;
164 
165     *num_part_y      = lb_memory_size / memory_line_size_y;
166     *num_part_c      = lb_memory_size_c / memory_line_size_c;
167     num_partitions_a = lb_memory_size_a / memory_line_size_a;
168 
169     if (scl_data->lb_params.alpha_en && (num_partitions_a < *num_part_y))
170         *num_part_y = num_partitions_a;
171 
172     if (*num_part_y > 12)
173         *num_part_y = 12;
174     if (*num_part_c > 12)
175         *num_part_c = 12;
176 }
177 
178 /* Not used as we don't enable prealpha dealpha currently
179  * Can skip for optimize performance and use default val
180  */
vpe10_dpp_program_prealpha_dealpha(struct dpp * dpp)181 static void vpe10_dpp_program_prealpha_dealpha(struct dpp *dpp)
182 {
183     uint32_t dealpha_en = 0, dealpha_ablnd_en = 0;
184     uint32_t realpha_en = 0, realpha_ablnd_en = 0;
185     uint32_t program_prealpha_dealpha = 0;
186     PROGRAM_ENTRY();
187 
188     if (program_prealpha_dealpha) {
189         dealpha_en = 1;
190         realpha_en = 1;
191     }
192     REG_SET_2(
193         VPCNVC_PRE_DEALPHA, 0, PRE_DEALPHA_EN, dealpha_en, PRE_DEALPHA_ABLND_EN, dealpha_ablnd_en);
194     REG_SET_2(
195         VPCNVC_PRE_REALPHA, 0, PRE_REALPHA_EN, realpha_en, PRE_REALPHA_ABLND_EN, realpha_ablnd_en);
196 }
197 
198 /* Not used as we don't have special 2bit LUt currently
199  * Can skip for optimize performance and use default val
200  */
vpe10_dpp_program_alpha_2bit_lut(struct dpp * dpp,struct cnv_alpha_2bit_lut * alpha_2bit_lut)201 static void vpe10_dpp_program_alpha_2bit_lut(
202     struct dpp *dpp, struct cnv_alpha_2bit_lut *alpha_2bit_lut)
203 {
204     PROGRAM_ENTRY();
205 
206     if (alpha_2bit_lut != NULL) {
207         REG_SET_4(VPCNVC_ALPHA_2BIT_LUT, 0, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0, ALPHA_2BIT_LUT1,
208             alpha_2bit_lut->lut1, ALPHA_2BIT_LUT2, alpha_2bit_lut->lut2, ALPHA_2BIT_LUT3,
209             alpha_2bit_lut->lut3);
210     } else { // restore to default
211         REG_SET_DEFAULT(VPCNVC_ALPHA_2BIT_LUT);
212     }
213 }
214 
vpe10_dpp_program_cnv(struct dpp * dpp,enum vpe_surface_pixel_format format,enum vpe_expansion_mode mode)215 void vpe10_dpp_program_cnv(
216     struct dpp *dpp, enum vpe_surface_pixel_format format, enum vpe_expansion_mode mode)
217 {
218     uint32_t alpha_en     = 1;
219     uint32_t pixel_format = 0;
220     uint32_t hw_expansion_mode = 0;
221 
222     PROGRAM_ENTRY();
223 
224     switch (mode) {
225     case VPE_EXPANSION_MODE_DYNAMIC:
226         hw_expansion_mode = 0;
227         break;
228     case VPE_EXPANSION_MODE_ZERO:
229         hw_expansion_mode = 1;
230         break;
231     default:
232         VPE_ASSERT(0);
233         break;
234     }
235 
236     switch (format) {
237     case VPE_SURFACE_PIXEL_FORMAT_GRPH_XRGB8888:
238     case VPE_SURFACE_PIXEL_FORMAT_GRPH_XBGR8888:
239         alpha_en = 0;
240     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
241     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
242         pixel_format = 8;
243         break;
244     case VPE_SURFACE_PIXEL_FORMAT_GRPH_RGBX8888:
245     case VPE_SURFACE_PIXEL_FORMAT_GRPH_BGRX8888:
246         alpha_en = 0;
247     case VPE_SURFACE_PIXEL_FORMAT_GRPH_RGBA8888:
248     case VPE_SURFACE_PIXEL_FORMAT_GRPH_BGRA8888:
249         pixel_format = 9;
250         break;
251     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
252     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
253         pixel_format = 10;
254         break;
255     case VPE_SURFACE_PIXEL_FORMAT_GRPH_RGBA1010102:
256     case VPE_SURFACE_PIXEL_FORMAT_GRPH_BGRA1010102:
257         pixel_format = 11;
258         break;
259     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888:
260     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_AYCbCr8888:
261         pixel_format = 12;
262         break;
263     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
264         pixel_format = 64;
265         alpha_en     = 0;
266         break;
267     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
268         pixel_format = 65;
269         alpha_en     = 0;
270         break;
271     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
272         pixel_format = 66;
273         alpha_en     = 0;
274         break;
275     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
276         pixel_format = 67;
277         alpha_en     = 0;
278         break;
279     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
280         pixel_format = 22;
281         break;
282     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
283     case VPE_SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
284         pixel_format = 24;
285         break;
286     case VPE_SURFACE_PIXEL_FORMAT_GRPH_RGBA16161616F:
287     case VPE_SURFACE_PIXEL_FORMAT_GRPH_BGRA16161616F:
288         pixel_format = 25;
289         break;
290     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_ACrYCb2101010:
291         pixel_format = 114;
292         break;
293     case VPE_SURFACE_PIXEL_FORMAT_VIDEO_CrYCbA1010102:
294         pixel_format = 115;
295         break;
296     default:
297         break;
298     }
299 
300     REG_SET(VPCNVC_SURFACE_PIXEL_FORMAT, 0, VPCNVC_SURFACE_PIXEL_FORMAT, pixel_format);
301 
302     REG_SET_7(VPCNVC_FORMAT_CONTROL, 0, FORMAT_EXPANSION_MODE, hw_expansion_mode, FORMAT_CNV16, 0,
303         FORMAT_CONTROL__ALPHA_EN, alpha_en, VPCNVC_BYPASS, dpp->vpe_priv->init.debug.vpcnvc_bypass,
304         VPCNVC_BYPASS_MSB_ALIGN, 0, CLAMP_POSITIVE, 0, CLAMP_POSITIVE_C, 0);
305 }
306 
vpe10_dpp_program_cnv_bias_scale(struct dpp * dpp,struct bias_and_scale * bias_and_scale)307 void vpe10_dpp_program_cnv_bias_scale(struct dpp *dpp, struct bias_and_scale *bias_and_scale)
308 {
309     PROGRAM_ENTRY();
310 
311     REG_SET(VPCNVC_FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, bias_and_scale->bias_red);
312     REG_SET(VPCNVC_FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, bias_and_scale->bias_green);
313     REG_SET(VPCNVC_FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, bias_and_scale->bias_blue);
314 
315     REG_SET(VPCNVC_FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, bias_and_scale->scale_red);
316     REG_SET(VPCNVC_FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, bias_and_scale->scale_green);
317     REG_SET(VPCNVC_FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, bias_and_scale->scale_blue);
318 }
319 
vpe10_dpp_cnv_program_pre_dgam(struct dpp * dpp,enum color_transfer_func tr)320 void vpe10_dpp_cnv_program_pre_dgam(struct dpp *dpp, enum color_transfer_func tr)
321 {
322     int pre_degam_en          = 1;
323     int degamma_lut_selection = 0;
324 
325     PROGRAM_ENTRY();
326 
327     switch (tr) {
328     case TRANSFER_FUNC_LINEAR:
329         pre_degam_en = 0; // bypass
330         break;
331     case TRANSFER_FUNC_SRGB:
332         degamma_lut_selection = 0;
333         break;
334     case TRANSFER_FUNC_BT709:
335         degamma_lut_selection = 4;
336         break;
337     case TRANSFER_FUNC_PQ2084:
338         degamma_lut_selection = 5;
339         break;
340     default:
341         pre_degam_en = 0;
342         break;
343     }
344 
345     REG_SET_2(
346         VPCNVC_PRE_DEGAM, 0, PRE_DEGAM_MODE, pre_degam_en, PRE_DEGAM_SELECT, degamma_lut_selection);
347 }
348 
vpe10_dpp_cnv_program_alpha_keyer(struct dpp * dpp,struct cnv_color_keyer_params * color_keyer)349 void vpe10_dpp_cnv_program_alpha_keyer(struct dpp *dpp, struct cnv_color_keyer_params *color_keyer)
350 {
351     PROGRAM_ENTRY();
352 
353     REG_SET_2(VPCNVC_COLOR_KEYER_CONTROL, 0, COLOR_KEYER_EN, color_keyer->color_keyer_en,
354         COLOR_KEYER_MODE, color_keyer->color_keyer_mode);
355 
356     REG_SET_2(VPCNVC_COLOR_KEYER_ALPHA, 0, COLOR_KEYER_ALPHA_LOW,
357         color_keyer->color_keyer_alpha_low, COLOR_KEYER_ALPHA_HIGH,
358         color_keyer->color_keyer_alpha_high);
359 
360     REG_SET_2(VPCNVC_COLOR_KEYER_RED, 0, COLOR_KEYER_RED_LOW, color_keyer->color_keyer_red_low,
361         COLOR_KEYER_RED_HIGH, color_keyer->color_keyer_red_high);
362 
363     REG_SET_2(VPCNVC_COLOR_KEYER_GREEN, 0, COLOR_KEYER_GREEN_LOW,
364         color_keyer->color_keyer_green_low, COLOR_KEYER_GREEN_HIGH,
365         color_keyer->color_keyer_green_high);
366 
367     REG_SET_2(VPCNVC_COLOR_KEYER_BLUE, 0, COLOR_KEYER_BLUE_LOW, color_keyer->color_keyer_blue_low,
368         COLOR_KEYER_BLUE_HIGH, color_keyer->color_keyer_blue_high);
369 }
370 
vpe10_get_line_buffer_size()371 uint32_t vpe10_get_line_buffer_size()
372 {
373     return MAX_LINE_SIZE * MAX_LINE_CNT;
374 }
375 
vpe10_dpp_validate_number_of_taps(struct dpp * dpp,struct scaler_data * scl_data)376 bool vpe10_dpp_validate_number_of_taps(struct dpp *dpp, struct scaler_data *scl_data)
377 {
378     uint32_t num_part_y, num_part_c;
379     uint32_t max_taps_y, max_taps_c;
380     uint32_t min_taps_y, min_taps_c;
381 
382     /*Ensure we can support the requested number of vtaps*/
383     min_taps_y = (uint32_t)vpe_fixpt_ceil(scl_data->ratios.vert);
384     min_taps_c = (uint32_t)vpe_fixpt_ceil(scl_data->ratios.vert_c);
385 
386     dpp->funcs->dscl_calc_lb_num_partitions(scl_data, LB_MEMORY_CONFIG_1, &num_part_y, &num_part_c);
387 
388     /* MAX_V_TAPS = MIN (NUM_LINES - MAX(CEILING(V_RATIO,1)-2, 0), 8) */
389     if (vpe_fixpt_ceil(scl_data->ratios.vert) > 2)
390         max_taps_y = num_part_y - ((uint32_t)vpe_fixpt_ceil(scl_data->ratios.vert) - 2);
391     else
392         max_taps_y = num_part_y;
393 
394     if (vpe_fixpt_ceil(scl_data->ratios.vert_c) > 2)
395         max_taps_c = num_part_c - ((uint32_t)vpe_fixpt_ceil(scl_data->ratios.vert_c) - 2);
396     else
397         max_taps_c = num_part_c;
398 
399     if (max_taps_y < min_taps_y)
400         return false;
401     else if (max_taps_c < min_taps_c)
402         return false;
403 
404     if (scl_data->taps.v_taps > max_taps_y)
405         scl_data->taps.v_taps = max_taps_y;
406 
407     if (scl_data->taps.v_taps_c > max_taps_c)
408         scl_data->taps.v_taps_c = max_taps_c;
409 
410     if (IDENTITY_RATIO(scl_data->ratios.vert))
411         scl_data->taps.v_taps = 1;
412 
413     if (scl_data->taps.v_taps % 2 && scl_data->taps.v_taps != 1)
414         scl_data->taps.v_taps++;
415 
416     if (scl_data->taps.v_taps_c % 2 && scl_data->taps.v_taps_c != 1)
417         scl_data->taps.v_taps_c++;
418 
419     return true;
420 }
421 
vpe10_dpp_program_crc(struct dpp * dpp,bool enable)422 void vpe10_dpp_program_crc(struct dpp *dpp, bool enable)
423 {
424     PROGRAM_ENTRY();
425     REG_UPDATE(VPDPP_CRC_CTRL, VPDPP_CRC_EN, enable);
426 }
427 
428