xref: /aosp_15_r20/external/mesa3d/src/amd/vpelib/src/chip/vpe10/vpe10_dpp_dscl.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* Copyright 2022 Advanced Micro Devices, Inc.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a
4  * copy of this software and associated documentation files (the "Software"),
5  * to deal in the Software without restriction, including without limitation
6  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7  * and/or sell copies of the Software, and to permit persons to whom the
8  * Software is furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
17  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19  * OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * Authors: AMD
22  *
23  */
24 
25 #include "vpe_priv.h"
26 #include "vpe10_dpp.h"
27 
28 #define CTX      vpe10_dpp
29 #define CTX_BASE dpp
30 
31 #define NUM_PHASES    64
32 #define HORZ_MAX_TAPS 8
33 #define VERT_MAX_TAPS 8
34 
35 #define LB_MAX_PARTITION 12
36 
37 enum vpe10_coef_filter_type_sel {
38     SCL_COEF_LUMA_VERT_FILTER   = 0,
39     SCL_COEF_LUMA_HORZ_FILTER   = 1,
40     SCL_COEF_CHROMA_VERT_FILTER = 2,
41     SCL_COEF_CHROMA_HORZ_FILTER = 3,
42     SCL_COEF_ALPHA_VERT_FILTER  = 4,
43     SCL_COEF_ALPHA_HORZ_FILTER  = 5
44 };
45 
46 enum dscl_autocal_mode {
47     AUTOCAL_MODE_OFF = 0,
48 
49     /* Autocal calculate the scaling ratio and initial phase and the
50      * DSCL_MODE_SEL must be set to 1
51      */
52     AUTOCAL_MODE_AUTOSCALE = 1,
53     /* Autocal perform auto centering without replication and the
54      * DSCL_MODE_SEL must be set to 0
55      */
56     AUTOCAL_MODE_AUTOCENTER = 2,
57     /* Autocal perform auto centering and auto replication and the
58      * DSCL_MODE_SEL must be set to 0
59      */
60     AUTOCAL_MODE_AUTOREPLICATE = 3
61 };
62 
63 enum dscl_mode_sel {
64     DSCL_MODE_SCALING_444_BYPASS        = 0,
65     DSCL_MODE_SCALING_444_RGB_ENABLE    = 1,
66     DSCL_MODE_SCALING_444_YCBCR_ENABLE  = 2,
67     DSCL_MODE_SCALING_420_YCBCR_ENABLE  = 3,
68     DSCL_MODE_SCALING_420_LUMA_BYPASS   = 4,
69     DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5,
70     DSCL_MODE_DSCL_BYPASS               = 6
71 };
72 
dpp1_dscl_is_ycbcr(const enum vpe_surface_pixel_format format)73 static bool dpp1_dscl_is_ycbcr(const enum vpe_surface_pixel_format format)
74 {
75     return format >= VPE_SURFACE_PIXEL_FORMAT_VIDEO_BEGIN &&
76            format <= VPE_SURFACE_PIXEL_FORMAT_VIDEO_END;
77 }
78 
dpp1_dscl_is_video_subsampled(const enum vpe_surface_pixel_format format)79 static bool dpp1_dscl_is_video_subsampled(const enum vpe_surface_pixel_format format)
80 {
81     return (format >= VPE_SURFACE_PIXEL_FORMAT_VIDEO_BEGIN &&
82             format <= VPE_SURFACE_PIXEL_FORMAT_SUBSAMPLE_END);
83 }
84 
dpp1_dscl_get_dscl_mode(const struct scaler_data * data)85 static enum dscl_mode_sel dpp1_dscl_get_dscl_mode(const struct scaler_data *data)
86 {
87 
88     // TODO Check if bypass bit enabled
89     const long long one = vpe_fixpt_one.value;
90 
91     if (data->ratios.horz.value == one && data->ratios.vert.value == one &&
92         data->ratios.horz_c.value == one && data->ratios.vert_c.value == one)
93         return DSCL_MODE_DSCL_BYPASS;
94 
95     if (!dpp1_dscl_is_ycbcr(data->format))
96         return DSCL_MODE_SCALING_444_RGB_ENABLE;
97 
98     if (!dpp1_dscl_is_video_subsampled(data->format))
99         return DSCL_MODE_SCALING_444_YCBCR_ENABLE;
100 
101     if (data->ratios.horz.value == one && data->ratios.vert.value == one)
102         return DSCL_MODE_SCALING_420_LUMA_BYPASS;
103 
104     return DSCL_MODE_SCALING_420_YCBCR_ENABLE;
105 }
106 
dpp1_dscl_set_dscl_mode(struct dpp * dpp,enum dscl_mode_sel dscl_mode)107 static void dpp1_dscl_set_dscl_mode(struct dpp *dpp, enum dscl_mode_sel dscl_mode)
108 {
109 
110     PROGRAM_ENTRY();
111 
112     REG_SET(VPDSCL_MODE, 0, VPDSCL_MODE, dscl_mode);
113 }
114 
dpp1_dscl_set_recout(struct dpp * dpp,const struct vpe_rect * recout)115 static void dpp1_dscl_set_recout(struct dpp *dpp, const struct vpe_rect *recout)
116 {
117 
118     PROGRAM_ENTRY();
119 
120     REG_SET_2(VPDSCL_RECOUT_START, 0, RECOUT_START_X, recout->x, RECOUT_START_Y, recout->y);
121 
122     REG_SET_2(VPDSCL_RECOUT_SIZE, 0, RECOUT_WIDTH, recout->width, RECOUT_HEIGHT, recout->height);
123 }
124 
dpp1_dscl_set_mpc_size(struct dpp * dpp,const struct scaler_data * scl_data)125 static void dpp1_dscl_set_mpc_size(struct dpp *dpp, const struct scaler_data *scl_data)
126 {
127 
128     PROGRAM_ENTRY();
129 
130     REG_SET_2(VPMPC_SIZE, 0, VPMPC_WIDTH, scl_data->h_active, VPMPC_HEIGHT, scl_data->v_active);
131 }
132 
dpp1_dscl_set_h_blank(struct dpp * dpp,uint16_t start,uint16_t end)133 static void dpp1_dscl_set_h_blank(struct dpp *dpp, uint16_t start, uint16_t end)
134 {
135 
136     PROGRAM_ENTRY();
137     REG_SET_2(VPOTG_H_BLANK, 0, OTG_H_BLANK_END, end, OTG_H_BLANK_START, start);
138 }
139 
dpp1_dscl_set_v_blank(struct dpp * dpp,uint16_t start,uint16_t end)140 static void dpp1_dscl_set_v_blank(struct dpp *dpp, uint16_t start, uint16_t end)
141 {
142 
143     PROGRAM_ENTRY();
144     REG_SET_2(VPOTG_V_BLANK, 0, OTG_V_BLANK_END, end, OTG_V_BLANK_START, start);
145 }
146 
dpp1_dscl_set_taps(struct dpp * dpp,const struct scaler_data * scl_data)147 static void dpp1_dscl_set_taps(struct dpp *dpp, const struct scaler_data *scl_data)
148 {
149 
150     PROGRAM_ENTRY();
151 
152     REG_SET_4(VPDSCL_TAP_CONTROL, 0, SCL_V_NUM_TAPS, scl_data->taps.v_taps - 1, SCL_H_NUM_TAPS,
153         scl_data->taps.h_taps - 1, SCL_V_NUM_TAPS_C, scl_data->taps.v_taps_c - 1, SCL_H_NUM_TAPS_C,
154         scl_data->taps.h_taps_c - 1);
155 }
156 
dpp1_dscl_get_filter_coeffs_64p(int taps,struct fixed31_32 ratio)157 static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio)
158 {
159     if (taps == 8)
160         return vpe_get_filter_8tap_64p(ratio);
161     else if (taps == 6)
162         return vpe_get_filter_6tap_64p(ratio);
163     else if (taps == 4)
164         return vpe_get_filter_4tap_64p(ratio);
165     else if (taps == 2)
166         return vpe_get_2tap_bilinear_64p();
167     else if (taps == 1)
168         return NULL;
169     else {
170         /* should never happen, bug */
171         return NULL;
172     }
173 }
174 
dpp1_dscl_set_scaler_filter(struct dpp * dpp,uint32_t taps,enum vpe10_coef_filter_type_sel filter_type,const uint16_t * filter)175 static void dpp1_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps,
176     enum vpe10_coef_filter_type_sel filter_type, const uint16_t *filter)
177 {
178     const int tap_pairs = (taps + 1) / 2;
179     int       phase;
180     int       pair;
181     uint16_t  odd_coef, even_coef;
182 
183     PROGRAM_ENTRY();
184 
185     REG_SET_3(VPDSCL_COEF_RAM_TAP_SELECT, 0, SCL_COEF_RAM_TAP_PAIR_IDX, 0, SCL_COEF_RAM_PHASE, 0,
186         SCL_COEF_RAM_FILTER_TYPE, filter_type);
187 
188     for (phase = 0; phase < (NUM_PHASES / 2 + 1); phase++) {
189         for (pair = 0; pair < tap_pairs; pair++) {
190             even_coef = filter[phase * (int)taps + 2 * pair];
191             if ((pair * 2 + 1) < (int)taps)
192                 odd_coef = filter[phase * (int)taps + 2 * pair + 1];
193             else
194                 odd_coef = 0;
195 
196             REG_SET_4(VPDSCL_COEF_RAM_TAP_DATA, 0,
197                 /* Even tap coefficient (bits 1:0 fixed to 0) */
198                 SCL_COEF_RAM_EVEN_TAP_COEF, even_coef,
199                 /* Write/read control for even coefficient */
200                 SCL_COEF_RAM_EVEN_TAP_COEF_EN, 1,
201                 /* Odd tap coefficient (bits 1:0 fixed to 0) */
202                 SCL_COEF_RAM_ODD_TAP_COEF, odd_coef,
203                 /* Write/read control for odd coefficient */
204                 SCL_COEF_RAM_ODD_TAP_COEF_EN, 1);
205         }
206     }
207 }
208 
dpp1_dscl_set_scl_filter(struct dpp * dpp,const struct scaler_data * scl_data,enum dscl_mode_sel scl_mode,bool chroma_coef_mode)209 static void dpp1_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *scl_data,
210     enum dscl_mode_sel scl_mode, bool chroma_coef_mode)
211 {
212 
213     const uint16_t *filter_h   = NULL;
214     const uint16_t *filter_v   = NULL;
215     const uint16_t *filter_h_c = NULL;
216     const uint16_t *filter_v_c = NULL;
217 
218     PROGRAM_ENTRY();
219 
220     if (scl_data->polyphase_filter_coeffs == 0) /*no externally provided set of coeffs and taps*/
221     {
222         filter_h = (uint16_t *)dpp1_dscl_get_filter_coeffs_64p(
223             (int)scl_data->taps.h_taps, scl_data->ratios.horz);
224         filter_v =
225             dpp1_dscl_get_filter_coeffs_64p((int)scl_data->taps.v_taps, scl_data->ratios.vert);
226     } else {
227         filter_h = (const uint16_t *)&scl_data->polyphase_filter_coeffs->horiz_polyphase_coeffs;
228         filter_v = (const uint16_t *)&scl_data->polyphase_filter_coeffs->vert_polyphase_coeffs;
229     }
230     if (filter_h != NULL)
231         dpp1_dscl_set_scaler_filter(
232             dpp, scl_data->taps.h_taps, SCL_COEF_LUMA_HORZ_FILTER, filter_h);
233 
234     if (filter_v != NULL)
235         dpp1_dscl_set_scaler_filter(
236             dpp, scl_data->taps.v_taps, SCL_COEF_LUMA_VERT_FILTER, filter_v);
237 
238     if (chroma_coef_mode) {
239 
240         filter_h_c =
241             dpp1_dscl_get_filter_coeffs_64p((int)scl_data->taps.h_taps_c, scl_data->ratios.horz_c);
242         filter_v_c =
243             dpp1_dscl_get_filter_coeffs_64p((int)scl_data->taps.v_taps_c, scl_data->ratios.vert_c);
244 
245         if (filter_h_c != NULL)
246             dpp1_dscl_set_scaler_filter(
247                 dpp, scl_data->taps.h_taps_c, SCL_COEF_CHROMA_HORZ_FILTER, filter_h_c);
248 
249         if (filter_v_c != NULL)
250             dpp1_dscl_set_scaler_filter(
251                 dpp, scl_data->taps.v_taps_c, SCL_COEF_CHROMA_VERT_FILTER, filter_v_c);
252     }
253 
254     REG_UPDATE(VPDSCL_MODE, SCL_CHROMA_COEF_MODE, chroma_coef_mode);
255 }
256 
dpp1_dscl_set_lb(struct dpp * dpp,const struct line_buffer_params * lb_params,enum lb_memory_config mem_size_config)257 static void dpp1_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *lb_params,
258     enum lb_memory_config mem_size_config)
259 {
260 
261     PROGRAM_ENTRY();
262 
263     REG_SET(VPLB_DATA_FORMAT, 0, ALPHA_EN, lb_params->alpha_en); /* Alpha enable */
264 
265     REG_SET_2(
266         VPLB_MEMORY_CTRL, 0, MEMORY_CONFIG, mem_size_config, LB_MAX_PARTITIONS, LB_MAX_PARTITION);
267 }
268 
dpp1_dscl_set_scale_ratio(struct dpp * dpp,const struct scaler_data * data)269 static void dpp1_dscl_set_scale_ratio(struct dpp *dpp, const struct scaler_data *data)
270 {
271 
272     PROGRAM_ENTRY();
273 
274     REG_SET(VPDSCL_HORZ_FILTER_SCALE_RATIO, 0, SCL_H_SCALE_RATIO,
275         vpe_fixpt_u3d19(data->ratios.horz) << 5);
276 
277     REG_SET(VPDSCL_VERT_FILTER_SCALE_RATIO, 0, SCL_V_SCALE_RATIO,
278         vpe_fixpt_u3d19(data->ratios.vert) << 5);
279 
280     REG_SET(VPDSCL_HORZ_FILTER_SCALE_RATIO_C, 0, SCL_H_SCALE_RATIO_C,
281         vpe_fixpt_u3d19(data->ratios.horz_c) << 5);
282 
283     REG_SET(VPDSCL_VERT_FILTER_SCALE_RATIO_C, 0, SCL_V_SCALE_RATIO_C,
284         vpe_fixpt_u3d19(data->ratios.vert_c) << 5);
285 }
286 
dpp1_dscl_set_scaler_position(struct dpp * dpp,const struct scaler_data * data)287 static void dpp1_dscl_set_scaler_position(struct dpp *dpp, const struct scaler_data *data)
288 {
289     uint32_t init_frac = 0;
290     uint32_t init_int  = 0;
291 
292     PROGRAM_ENTRY();
293 
294     /*
295      * 0.24 format for fraction, first five bits zeroed
296      */
297     init_frac = vpe_fixpt_u0d19(data->inits.h) << 5;
298     init_int  = (uint32_t)vpe_fixpt_floor(data->inits.h);
299     REG_SET_2(VPDSCL_HORZ_FILTER_INIT, 0, SCL_H_INIT_FRAC, init_frac, SCL_H_INIT_INT, init_int);
300 
301     init_frac = vpe_fixpt_u0d19(data->inits.h_c) << 5;
302     init_int  = (uint32_t)vpe_fixpt_floor(data->inits.h_c);
303     REG_SET_2(
304         VPDSCL_HORZ_FILTER_INIT_C, 0, SCL_H_INIT_FRAC_C, init_frac, SCL_H_INIT_INT_C, init_int);
305 
306     init_frac = vpe_fixpt_u0d19(data->inits.v) << 5;
307     init_int  = (uint32_t)vpe_fixpt_floor(data->inits.v);
308     REG_SET_2(VPDSCL_VERT_FILTER_INIT, 0, SCL_V_INIT_FRAC, init_frac, SCL_V_INIT_INT, init_int);
309 
310     init_frac = vpe_fixpt_u0d19(data->inits.v_c) << 5;
311     init_int  = (uint32_t)vpe_fixpt_floor(data->inits.v_c);
312     REG_SET_2(
313         VPDSCL_VERT_FILTER_INIT_C, 0, SCL_V_INIT_FRAC_C, init_frac, SCL_V_INIT_INT_C, init_int);
314 }
315 
dpp1_power_on_dscl(struct dpp * dpp,bool power_on)316 static void dpp1_power_on_dscl(struct dpp *dpp, bool power_on)
317 {
318     PROGRAM_ENTRY();
319 
320     if (dpp->vpe_priv->init.debug.enable_mem_low_power.bits.dscl) {
321         if (power_on) {
322             REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
323                 LUT_MEM_PWR_FORCE, 0);
324 
325             // introduce a delay by dummy set
326             REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
327                 LUT_MEM_PWR_FORCE, 0);
328 
329             REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
330                 LUT_MEM_PWR_FORCE, 0);
331         } else {
332             REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
333                 LUT_MEM_PWR_FORCE, 3);
334         }
335     } else {
336         if (power_on) {
337             REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 1,
338                 LUT_MEM_PWR_FORCE, 0);
339         } else {
340             REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
341                 LUT_MEM_PWR_FORCE, 0);
342         }
343     }
344 }
345 
vpe10_dpp_set_segment_scaler(struct dpp * dpp,const struct scaler_data * scl_data)346 void vpe10_dpp_set_segment_scaler(struct dpp *dpp, const struct scaler_data *scl_data)
347 {
348 
349     enum dscl_mode_sel dscl_mode = dpp1_dscl_get_dscl_mode(scl_data);
350 
351     dpp1_dscl_set_recout(dpp, &scl_data->recout);
352     dpp1_dscl_set_mpc_size(dpp, scl_data);
353 
354     if (dscl_mode == DSCL_MODE_DSCL_BYPASS)
355         return;
356 
357     dpp1_dscl_set_scaler_position(dpp, scl_data);
358 }
359 
vpe10_dpp_set_frame_scaler(struct dpp * dpp,const struct scaler_data * scl_data)360 void vpe10_dpp_set_frame_scaler(struct dpp *dpp, const struct scaler_data *scl_data)
361 {
362 
363     enum dscl_mode_sel dscl_mode = dpp1_dscl_get_dscl_mode(scl_data);
364     bool               ycbcr     = dpp1_dscl_is_ycbcr(scl_data->format);
365 
366     dpp1_dscl_set_h_blank(dpp, 1, 0);
367     dpp1_dscl_set_v_blank(dpp, 1, 0);
368 
369     if (dscl_mode != DSCL_MODE_DSCL_BYPASS)
370         dpp1_power_on_dscl(dpp, true);
371 
372     dpp1_dscl_set_dscl_mode(dpp, dscl_mode);
373 
374     if (dscl_mode == DSCL_MODE_DSCL_BYPASS) {
375         dpp1_power_on_dscl(dpp, false);
376         return;
377     }
378 
379     dpp1_dscl_set_lb(dpp, &scl_data->lb_params, LB_MEMORY_CONFIG_0);
380     dpp1_dscl_set_scale_ratio(dpp, scl_data);
381     dpp1_dscl_set_taps(dpp, scl_data);
382     dpp1_dscl_set_scl_filter(dpp, scl_data, dscl_mode, ycbcr);
383 }
384