1 /* Copyright 2022 Advanced Micro Devices, Inc.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
17 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19 * OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors: AMD
22 *
23 */
24
25 #include "vpe_priv.h"
26 #include "vpe10_dpp.h"
27
28 #define CTX vpe10_dpp
29 #define CTX_BASE dpp
30
31 #define NUM_PHASES 64
32 #define HORZ_MAX_TAPS 8
33 #define VERT_MAX_TAPS 8
34
35 #define LB_MAX_PARTITION 12
36
37 enum vpe10_coef_filter_type_sel {
38 SCL_COEF_LUMA_VERT_FILTER = 0,
39 SCL_COEF_LUMA_HORZ_FILTER = 1,
40 SCL_COEF_CHROMA_VERT_FILTER = 2,
41 SCL_COEF_CHROMA_HORZ_FILTER = 3,
42 SCL_COEF_ALPHA_VERT_FILTER = 4,
43 SCL_COEF_ALPHA_HORZ_FILTER = 5
44 };
45
46 enum dscl_autocal_mode {
47 AUTOCAL_MODE_OFF = 0,
48
49 /* Autocal calculate the scaling ratio and initial phase and the
50 * DSCL_MODE_SEL must be set to 1
51 */
52 AUTOCAL_MODE_AUTOSCALE = 1,
53 /* Autocal perform auto centering without replication and the
54 * DSCL_MODE_SEL must be set to 0
55 */
56 AUTOCAL_MODE_AUTOCENTER = 2,
57 /* Autocal perform auto centering and auto replication and the
58 * DSCL_MODE_SEL must be set to 0
59 */
60 AUTOCAL_MODE_AUTOREPLICATE = 3
61 };
62
63 enum dscl_mode_sel {
64 DSCL_MODE_SCALING_444_BYPASS = 0,
65 DSCL_MODE_SCALING_444_RGB_ENABLE = 1,
66 DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2,
67 DSCL_MODE_SCALING_420_YCBCR_ENABLE = 3,
68 DSCL_MODE_SCALING_420_LUMA_BYPASS = 4,
69 DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5,
70 DSCL_MODE_DSCL_BYPASS = 6
71 };
72
dpp1_dscl_is_ycbcr(const enum vpe_surface_pixel_format format)73 static bool dpp1_dscl_is_ycbcr(const enum vpe_surface_pixel_format format)
74 {
75 return format >= VPE_SURFACE_PIXEL_FORMAT_VIDEO_BEGIN &&
76 format <= VPE_SURFACE_PIXEL_FORMAT_VIDEO_END;
77 }
78
dpp1_dscl_is_video_subsampled(const enum vpe_surface_pixel_format format)79 static bool dpp1_dscl_is_video_subsampled(const enum vpe_surface_pixel_format format)
80 {
81 return (format >= VPE_SURFACE_PIXEL_FORMAT_VIDEO_BEGIN &&
82 format <= VPE_SURFACE_PIXEL_FORMAT_SUBSAMPLE_END);
83 }
84
dpp1_dscl_get_dscl_mode(const struct scaler_data * data)85 static enum dscl_mode_sel dpp1_dscl_get_dscl_mode(const struct scaler_data *data)
86 {
87
88 // TODO Check if bypass bit enabled
89 const long long one = vpe_fixpt_one.value;
90
91 if (data->ratios.horz.value == one && data->ratios.vert.value == one &&
92 data->ratios.horz_c.value == one && data->ratios.vert_c.value == one)
93 return DSCL_MODE_DSCL_BYPASS;
94
95 if (!dpp1_dscl_is_ycbcr(data->format))
96 return DSCL_MODE_SCALING_444_RGB_ENABLE;
97
98 if (!dpp1_dscl_is_video_subsampled(data->format))
99 return DSCL_MODE_SCALING_444_YCBCR_ENABLE;
100
101 if (data->ratios.horz.value == one && data->ratios.vert.value == one)
102 return DSCL_MODE_SCALING_420_LUMA_BYPASS;
103
104 return DSCL_MODE_SCALING_420_YCBCR_ENABLE;
105 }
106
dpp1_dscl_set_dscl_mode(struct dpp * dpp,enum dscl_mode_sel dscl_mode)107 static void dpp1_dscl_set_dscl_mode(struct dpp *dpp, enum dscl_mode_sel dscl_mode)
108 {
109
110 PROGRAM_ENTRY();
111
112 REG_SET(VPDSCL_MODE, 0, VPDSCL_MODE, dscl_mode);
113 }
114
dpp1_dscl_set_recout(struct dpp * dpp,const struct vpe_rect * recout)115 static void dpp1_dscl_set_recout(struct dpp *dpp, const struct vpe_rect *recout)
116 {
117
118 PROGRAM_ENTRY();
119
120 REG_SET_2(VPDSCL_RECOUT_START, 0, RECOUT_START_X, recout->x, RECOUT_START_Y, recout->y);
121
122 REG_SET_2(VPDSCL_RECOUT_SIZE, 0, RECOUT_WIDTH, recout->width, RECOUT_HEIGHT, recout->height);
123 }
124
dpp1_dscl_set_mpc_size(struct dpp * dpp,const struct scaler_data * scl_data)125 static void dpp1_dscl_set_mpc_size(struct dpp *dpp, const struct scaler_data *scl_data)
126 {
127
128 PROGRAM_ENTRY();
129
130 REG_SET_2(VPMPC_SIZE, 0, VPMPC_WIDTH, scl_data->h_active, VPMPC_HEIGHT, scl_data->v_active);
131 }
132
dpp1_dscl_set_h_blank(struct dpp * dpp,uint16_t start,uint16_t end)133 static void dpp1_dscl_set_h_blank(struct dpp *dpp, uint16_t start, uint16_t end)
134 {
135
136 PROGRAM_ENTRY();
137 REG_SET_2(VPOTG_H_BLANK, 0, OTG_H_BLANK_END, end, OTG_H_BLANK_START, start);
138 }
139
dpp1_dscl_set_v_blank(struct dpp * dpp,uint16_t start,uint16_t end)140 static void dpp1_dscl_set_v_blank(struct dpp *dpp, uint16_t start, uint16_t end)
141 {
142
143 PROGRAM_ENTRY();
144 REG_SET_2(VPOTG_V_BLANK, 0, OTG_V_BLANK_END, end, OTG_V_BLANK_START, start);
145 }
146
dpp1_dscl_set_taps(struct dpp * dpp,const struct scaler_data * scl_data)147 static void dpp1_dscl_set_taps(struct dpp *dpp, const struct scaler_data *scl_data)
148 {
149
150 PROGRAM_ENTRY();
151
152 REG_SET_4(VPDSCL_TAP_CONTROL, 0, SCL_V_NUM_TAPS, scl_data->taps.v_taps - 1, SCL_H_NUM_TAPS,
153 scl_data->taps.h_taps - 1, SCL_V_NUM_TAPS_C, scl_data->taps.v_taps_c - 1, SCL_H_NUM_TAPS_C,
154 scl_data->taps.h_taps_c - 1);
155 }
156
dpp1_dscl_get_filter_coeffs_64p(int taps,struct fixed31_32 ratio)157 static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio)
158 {
159 if (taps == 8)
160 return vpe_get_filter_8tap_64p(ratio);
161 else if (taps == 6)
162 return vpe_get_filter_6tap_64p(ratio);
163 else if (taps == 4)
164 return vpe_get_filter_4tap_64p(ratio);
165 else if (taps == 2)
166 return vpe_get_2tap_bilinear_64p();
167 else if (taps == 1)
168 return NULL;
169 else {
170 /* should never happen, bug */
171 return NULL;
172 }
173 }
174
dpp1_dscl_set_scaler_filter(struct dpp * dpp,uint32_t taps,enum vpe10_coef_filter_type_sel filter_type,const uint16_t * filter)175 static void dpp1_dscl_set_scaler_filter(struct dpp *dpp, uint32_t taps,
176 enum vpe10_coef_filter_type_sel filter_type, const uint16_t *filter)
177 {
178 const int tap_pairs = (taps + 1) / 2;
179 int phase;
180 int pair;
181 uint16_t odd_coef, even_coef;
182
183 PROGRAM_ENTRY();
184
185 REG_SET_3(VPDSCL_COEF_RAM_TAP_SELECT, 0, SCL_COEF_RAM_TAP_PAIR_IDX, 0, SCL_COEF_RAM_PHASE, 0,
186 SCL_COEF_RAM_FILTER_TYPE, filter_type);
187
188 for (phase = 0; phase < (NUM_PHASES / 2 + 1); phase++) {
189 for (pair = 0; pair < tap_pairs; pair++) {
190 even_coef = filter[phase * (int)taps + 2 * pair];
191 if ((pair * 2 + 1) < (int)taps)
192 odd_coef = filter[phase * (int)taps + 2 * pair + 1];
193 else
194 odd_coef = 0;
195
196 REG_SET_4(VPDSCL_COEF_RAM_TAP_DATA, 0,
197 /* Even tap coefficient (bits 1:0 fixed to 0) */
198 SCL_COEF_RAM_EVEN_TAP_COEF, even_coef,
199 /* Write/read control for even coefficient */
200 SCL_COEF_RAM_EVEN_TAP_COEF_EN, 1,
201 /* Odd tap coefficient (bits 1:0 fixed to 0) */
202 SCL_COEF_RAM_ODD_TAP_COEF, odd_coef,
203 /* Write/read control for odd coefficient */
204 SCL_COEF_RAM_ODD_TAP_COEF_EN, 1);
205 }
206 }
207 }
208
dpp1_dscl_set_scl_filter(struct dpp * dpp,const struct scaler_data * scl_data,enum dscl_mode_sel scl_mode,bool chroma_coef_mode)209 static void dpp1_dscl_set_scl_filter(struct dpp *dpp, const struct scaler_data *scl_data,
210 enum dscl_mode_sel scl_mode, bool chroma_coef_mode)
211 {
212
213 const uint16_t *filter_h = NULL;
214 const uint16_t *filter_v = NULL;
215 const uint16_t *filter_h_c = NULL;
216 const uint16_t *filter_v_c = NULL;
217
218 PROGRAM_ENTRY();
219
220 if (scl_data->polyphase_filter_coeffs == 0) /*no externally provided set of coeffs and taps*/
221 {
222 filter_h = (uint16_t *)dpp1_dscl_get_filter_coeffs_64p(
223 (int)scl_data->taps.h_taps, scl_data->ratios.horz);
224 filter_v =
225 dpp1_dscl_get_filter_coeffs_64p((int)scl_data->taps.v_taps, scl_data->ratios.vert);
226 } else {
227 filter_h = (const uint16_t *)&scl_data->polyphase_filter_coeffs->horiz_polyphase_coeffs;
228 filter_v = (const uint16_t *)&scl_data->polyphase_filter_coeffs->vert_polyphase_coeffs;
229 }
230 if (filter_h != NULL)
231 dpp1_dscl_set_scaler_filter(
232 dpp, scl_data->taps.h_taps, SCL_COEF_LUMA_HORZ_FILTER, filter_h);
233
234 if (filter_v != NULL)
235 dpp1_dscl_set_scaler_filter(
236 dpp, scl_data->taps.v_taps, SCL_COEF_LUMA_VERT_FILTER, filter_v);
237
238 if (chroma_coef_mode) {
239
240 filter_h_c =
241 dpp1_dscl_get_filter_coeffs_64p((int)scl_data->taps.h_taps_c, scl_data->ratios.horz_c);
242 filter_v_c =
243 dpp1_dscl_get_filter_coeffs_64p((int)scl_data->taps.v_taps_c, scl_data->ratios.vert_c);
244
245 if (filter_h_c != NULL)
246 dpp1_dscl_set_scaler_filter(
247 dpp, scl_data->taps.h_taps_c, SCL_COEF_CHROMA_HORZ_FILTER, filter_h_c);
248
249 if (filter_v_c != NULL)
250 dpp1_dscl_set_scaler_filter(
251 dpp, scl_data->taps.v_taps_c, SCL_COEF_CHROMA_VERT_FILTER, filter_v_c);
252 }
253
254 REG_UPDATE(VPDSCL_MODE, SCL_CHROMA_COEF_MODE, chroma_coef_mode);
255 }
256
dpp1_dscl_set_lb(struct dpp * dpp,const struct line_buffer_params * lb_params,enum lb_memory_config mem_size_config)257 static void dpp1_dscl_set_lb(struct dpp *dpp, const struct line_buffer_params *lb_params,
258 enum lb_memory_config mem_size_config)
259 {
260
261 PROGRAM_ENTRY();
262
263 REG_SET(VPLB_DATA_FORMAT, 0, ALPHA_EN, lb_params->alpha_en); /* Alpha enable */
264
265 REG_SET_2(
266 VPLB_MEMORY_CTRL, 0, MEMORY_CONFIG, mem_size_config, LB_MAX_PARTITIONS, LB_MAX_PARTITION);
267 }
268
dpp1_dscl_set_scale_ratio(struct dpp * dpp,const struct scaler_data * data)269 static void dpp1_dscl_set_scale_ratio(struct dpp *dpp, const struct scaler_data *data)
270 {
271
272 PROGRAM_ENTRY();
273
274 REG_SET(VPDSCL_HORZ_FILTER_SCALE_RATIO, 0, SCL_H_SCALE_RATIO,
275 vpe_fixpt_u3d19(data->ratios.horz) << 5);
276
277 REG_SET(VPDSCL_VERT_FILTER_SCALE_RATIO, 0, SCL_V_SCALE_RATIO,
278 vpe_fixpt_u3d19(data->ratios.vert) << 5);
279
280 REG_SET(VPDSCL_HORZ_FILTER_SCALE_RATIO_C, 0, SCL_H_SCALE_RATIO_C,
281 vpe_fixpt_u3d19(data->ratios.horz_c) << 5);
282
283 REG_SET(VPDSCL_VERT_FILTER_SCALE_RATIO_C, 0, SCL_V_SCALE_RATIO_C,
284 vpe_fixpt_u3d19(data->ratios.vert_c) << 5);
285 }
286
dpp1_dscl_set_scaler_position(struct dpp * dpp,const struct scaler_data * data)287 static void dpp1_dscl_set_scaler_position(struct dpp *dpp, const struct scaler_data *data)
288 {
289 uint32_t init_frac = 0;
290 uint32_t init_int = 0;
291
292 PROGRAM_ENTRY();
293
294 /*
295 * 0.24 format for fraction, first five bits zeroed
296 */
297 init_frac = vpe_fixpt_u0d19(data->inits.h) << 5;
298 init_int = (uint32_t)vpe_fixpt_floor(data->inits.h);
299 REG_SET_2(VPDSCL_HORZ_FILTER_INIT, 0, SCL_H_INIT_FRAC, init_frac, SCL_H_INIT_INT, init_int);
300
301 init_frac = vpe_fixpt_u0d19(data->inits.h_c) << 5;
302 init_int = (uint32_t)vpe_fixpt_floor(data->inits.h_c);
303 REG_SET_2(
304 VPDSCL_HORZ_FILTER_INIT_C, 0, SCL_H_INIT_FRAC_C, init_frac, SCL_H_INIT_INT_C, init_int);
305
306 init_frac = vpe_fixpt_u0d19(data->inits.v) << 5;
307 init_int = (uint32_t)vpe_fixpt_floor(data->inits.v);
308 REG_SET_2(VPDSCL_VERT_FILTER_INIT, 0, SCL_V_INIT_FRAC, init_frac, SCL_V_INIT_INT, init_int);
309
310 init_frac = vpe_fixpt_u0d19(data->inits.v_c) << 5;
311 init_int = (uint32_t)vpe_fixpt_floor(data->inits.v_c);
312 REG_SET_2(
313 VPDSCL_VERT_FILTER_INIT_C, 0, SCL_V_INIT_FRAC_C, init_frac, SCL_V_INIT_INT_C, init_int);
314 }
315
dpp1_power_on_dscl(struct dpp * dpp,bool power_on)316 static void dpp1_power_on_dscl(struct dpp *dpp, bool power_on)
317 {
318 PROGRAM_ENTRY();
319
320 if (dpp->vpe_priv->init.debug.enable_mem_low_power.bits.dscl) {
321 if (power_on) {
322 REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
323 LUT_MEM_PWR_FORCE, 0);
324
325 // introduce a delay by dummy set
326 REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
327 LUT_MEM_PWR_FORCE, 0);
328
329 REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
330 LUT_MEM_PWR_FORCE, 0);
331 } else {
332 REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
333 LUT_MEM_PWR_FORCE, 3);
334 }
335 } else {
336 if (power_on) {
337 REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 1,
338 LUT_MEM_PWR_FORCE, 0);
339 } else {
340 REG_SET_2(VPDSCL_MEM_PWR_CTRL, REG_DEFAULT(VPDSCL_MEM_PWR_CTRL), LUT_MEM_PWR_DIS, 0,
341 LUT_MEM_PWR_FORCE, 0);
342 }
343 }
344 }
345
vpe10_dpp_set_segment_scaler(struct dpp * dpp,const struct scaler_data * scl_data)346 void vpe10_dpp_set_segment_scaler(struct dpp *dpp, const struct scaler_data *scl_data)
347 {
348
349 enum dscl_mode_sel dscl_mode = dpp1_dscl_get_dscl_mode(scl_data);
350
351 dpp1_dscl_set_recout(dpp, &scl_data->recout);
352 dpp1_dscl_set_mpc_size(dpp, scl_data);
353
354 if (dscl_mode == DSCL_MODE_DSCL_BYPASS)
355 return;
356
357 dpp1_dscl_set_scaler_position(dpp, scl_data);
358 }
359
vpe10_dpp_set_frame_scaler(struct dpp * dpp,const struct scaler_data * scl_data)360 void vpe10_dpp_set_frame_scaler(struct dpp *dpp, const struct scaler_data *scl_data)
361 {
362
363 enum dscl_mode_sel dscl_mode = dpp1_dscl_get_dscl_mode(scl_data);
364 bool ycbcr = dpp1_dscl_is_ycbcr(scl_data->format);
365
366 dpp1_dscl_set_h_blank(dpp, 1, 0);
367 dpp1_dscl_set_v_blank(dpp, 1, 0);
368
369 if (dscl_mode != DSCL_MODE_DSCL_BYPASS)
370 dpp1_power_on_dscl(dpp, true);
371
372 dpp1_dscl_set_dscl_mode(dpp, dscl_mode);
373
374 if (dscl_mode == DSCL_MODE_DSCL_BYPASS) {
375 dpp1_power_on_dscl(dpp, false);
376 return;
377 }
378
379 dpp1_dscl_set_lb(dpp, &scl_data->lb_params, LB_MEMORY_CONFIG_0);
380 dpp1_dscl_set_scale_ratio(dpp, scl_data);
381 dpp1_dscl_set_taps(dpp, scl_data);
382 dpp1_dscl_set_scl_filter(dpp, scl_data, dscl_mode, ycbcr);
383 }
384