xref: /aosp_15_r20/external/libavc/encoder/svc/isvce_downscaler.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1*495ae853SAndroid Build Coastguard Worker /******************************************************************************
2*495ae853SAndroid Build Coastguard Worker  *
3*495ae853SAndroid Build Coastguard Worker  * Copyright (C) 2022 The Android Open Source Project
4*495ae853SAndroid Build Coastguard Worker  *
5*495ae853SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
6*495ae853SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
7*495ae853SAndroid Build Coastguard Worker  * You may obtain a copy of the License at:
8*495ae853SAndroid Build Coastguard Worker  *
9*495ae853SAndroid Build Coastguard Worker  * http://www.apache.org/licenses/LICENSE-2.0
10*495ae853SAndroid Build Coastguard Worker  *
11*495ae853SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
12*495ae853SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
13*495ae853SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*495ae853SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
15*495ae853SAndroid Build Coastguard Worker  * limitations under the License.
16*495ae853SAndroid Build Coastguard Worker  *
17*495ae853SAndroid Build Coastguard Worker  *****************************************************************************
18*495ae853SAndroid Build Coastguard Worker  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*495ae853SAndroid Build Coastguard Worker  */
20*495ae853SAndroid Build Coastguard Worker 
21*495ae853SAndroid Build Coastguard Worker /**
22*495ae853SAndroid Build Coastguard Worker *******************************************************************************
23*495ae853SAndroid Build Coastguard Worker * @file
24*495ae853SAndroid Build Coastguard Worker *  isvce_downscaler.c
25*495ae853SAndroid Build Coastguard Worker *
26*495ae853SAndroid Build Coastguard Worker * @brief
27*495ae853SAndroid Build Coastguard Worker *  Contains downscaler functions required by the SVC encoder
28*495ae853SAndroid Build Coastguard Worker *
29*495ae853SAndroid Build Coastguard Worker * @author
30*495ae853SAndroid Build Coastguard Worker *  ittiam
31*495ae853SAndroid Build Coastguard Worker *
32*495ae853SAndroid Build Coastguard Worker * @par List of Functions:
33*495ae853SAndroid Build Coastguard Worker *  - isvce_get_downscaler_data_size()
34*495ae853SAndroid Build Coastguard Worker *  - isvce_get_downscaler_padding_dims()
35*495ae853SAndroid Build Coastguard Worker *  - isvce_get_downscaler_normalized_filtered_pixel()
36*495ae853SAndroid Build Coastguard Worker *  - isvce_horizontal_downscale_and_transpose()
37*495ae853SAndroid Build Coastguard Worker *  - isvce_process_downscaler()
38*495ae853SAndroid Build Coastguard Worker *  - isvce_initialize_downscaler()
39*495ae853SAndroid Build Coastguard Worker *
40*495ae853SAndroid Build Coastguard Worker * @remarks
41*495ae853SAndroid Build Coastguard Worker *  None
42*495ae853SAndroid Build Coastguard Worker *
43*495ae853SAndroid Build Coastguard Worker *******************************************************************************
44*495ae853SAndroid Build Coastguard Worker */
45*495ae853SAndroid Build Coastguard Worker 
46*495ae853SAndroid Build Coastguard Worker /*****************************************************************************/
47*495ae853SAndroid Build Coastguard Worker /* File Includes                                                             */
48*495ae853SAndroid Build Coastguard Worker /*****************************************************************************/
49*495ae853SAndroid Build Coastguard Worker 
50*495ae853SAndroid Build Coastguard Worker /* system include files */
51*495ae853SAndroid Build Coastguard Worker #include <stdio.h>
52*495ae853SAndroid Build Coastguard Worker #include <stdlib.h>
53*495ae853SAndroid Build Coastguard Worker 
54*495ae853SAndroid Build Coastguard Worker #include "ih264_typedefs.h"
55*495ae853SAndroid Build Coastguard Worker #include "ih264_macros.h"
56*495ae853SAndroid Build Coastguard Worker #include "isvc_macros.h"
57*495ae853SAndroid Build Coastguard Worker #include "ih264_platform_macros.h"
58*495ae853SAndroid Build Coastguard Worker #include "iv2.h"
59*495ae853SAndroid Build Coastguard Worker #include "isvc_defs.h"
60*495ae853SAndroid Build Coastguard Worker #include "isvce_defs.h"
61*495ae853SAndroid Build Coastguard Worker #include "isvc_structs.h"
62*495ae853SAndroid Build Coastguard Worker #include "isvc_structs.h"
63*495ae853SAndroid Build Coastguard Worker #include "isvce_downscaler.h"
64*495ae853SAndroid Build Coastguard Worker #include "isvce_downscaler_private_defs.h"
65*495ae853SAndroid Build Coastguard Worker 
66*495ae853SAndroid Build Coastguard Worker /**
67*495ae853SAndroid Build Coastguard Worker ******************************************************************************
68*495ae853SAndroid Build Coastguard Worker * @brief  lanczos filter coefficients for 2x downscaling
69*495ae853SAndroid Build Coastguard Worker * @remarks Though the length of the filter is 8, the
70*495ae853SAndroid Build Coastguard Worker * same coefficients
71*495ae853SAndroid Build Coastguard Worker * are replicated so that 2 rows can be processed at one
72*495ae853SAndroid Build Coastguard Worker * go in SIMD
73*495ae853SAndroid Build Coastguard Worker ******************************************************************************
74*495ae853SAndroid Build Coastguard Worker */
75*495ae853SAndroid Build Coastguard Worker static WORD8 gai1_lanczos_coefficients_2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] = {
76*495ae853SAndroid Build Coastguard Worker     {-7, 0, 39, 64, 39, 0, -7, 0, -7, 0, 39, 64, 39, 0, -7, 0},
77*495ae853SAndroid Build Coastguard Worker     {-6, 0, 33, 62, 41, 4, -6, 0, -6, 0, 33, 62, 41, 4, -6, 0},
78*495ae853SAndroid Build Coastguard Worker     {-5, -1, 29, 57, 45, 9, -5, -1, -5, -1, 29, 57, 45, 9, -5, -1},
79*495ae853SAndroid Build Coastguard Worker     {-4, -2, 23, 55, 48, 14, -4, -2, -4, -2, 23, 55, 48, 14, -4, -2},
80*495ae853SAndroid Build Coastguard Worker     {-3, -3, 18, 52, 52, 18, -3, -3, -3, -3, 18, 52, 52, 18, -3, -3},
81*495ae853SAndroid Build Coastguard Worker     {-2, -4, 13, 49, 54, 24, -2, -4, -2, -4, 13, 49, 54, 24, -2, -4},
82*495ae853SAndroid Build Coastguard Worker     {-1, -5, 9, 44, 58, 29, -1, -5, -1, -5, 9, 44, 58, 29, -1, -5},
83*495ae853SAndroid Build Coastguard Worker     {0, -6, 3, 42, 61, 34, 0, -6, 0, -6, 3, 42, 61, 34, 0, -6}};
84*495ae853SAndroid Build Coastguard Worker 
85*495ae853SAndroid Build Coastguard Worker /**
86*495ae853SAndroid Build Coastguard Worker ******************************************************************************
87*495ae853SAndroid Build Coastguard Worker * @brief  lanczos filter coefficients for 1.5x downscaling
88*495ae853SAndroid Build Coastguard Worker * @remarks Though the length of the filter is 8, the same coefficients
89*495ae853SAndroid Build Coastguard Worker * are replicated so that 2 rows can be processed at one go in SIMD.
90*495ae853SAndroid Build Coastguard Worker ******************************************************************************
91*495ae853SAndroid Build Coastguard Worker */
92*495ae853SAndroid Build Coastguard Worker static WORD8 gai1_lanczos_coefficients_3by2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] =
93*495ae853SAndroid Build Coastguard Worker     {{0, -11, 32, 86, 32, -11, 0, 0, 0, -11, 32, 86, 32, -11, 0, 0},
94*495ae853SAndroid Build Coastguard Worker      {0, -10, 26, 79, 39, -5, 0, 0, 0, -10, 26, 79, 39, -5, 0, 0},
95*495ae853SAndroid Build Coastguard Worker      {0, -8, 21, 72, 46, 0, -2, 0, 0, -8, 21, 72, 46, 0, -2, 0},
96*495ae853SAndroid Build Coastguard Worker      {0, -6, 15, 66, 52, 3, -3, 0, 0, -6, 15, 66, 52, 3, -3, 0},
97*495ae853SAndroid Build Coastguard Worker      {0, -6, 10, 60, 60, 10, -6, 0, 0, -6, 10, 60, 60, 10, -6, 0},
98*495ae853SAndroid Build Coastguard Worker      {0, -3, 3, 52, 66, 15, -6, 0, 0, -3, 3, 52, 66, 15, -6, 0},
99*495ae853SAndroid Build Coastguard Worker      {0, -2, 0, 46, 72, 21, -8, 0, 0, -2, 0, 46, 72, 21, -8, 0},
100*495ae853SAndroid Build Coastguard Worker      {0, 0, -5, 39, 79, 26, -10, 0, 0, 0, -5, 39, 79, 26, -10, 0}};
101*495ae853SAndroid Build Coastguard Worker 
102*495ae853SAndroid Build Coastguard Worker /**
103*495ae853SAndroid Build Coastguard Worker *******************************************************************************
104*495ae853SAndroid Build Coastguard Worker *
105*495ae853SAndroid Build Coastguard Worker * @brief
106*495ae853SAndroid Build Coastguard Worker *   gets the memory size required for downscaler
107*495ae853SAndroid Build Coastguard Worker *
108*495ae853SAndroid Build Coastguard Worker * @par Description:
109*495ae853SAndroid Build Coastguard Worker *   returns the memory required by the downscaler context and state structs
110*495ae853SAndroid Build Coastguard Worker *   for allocation.
111*495ae853SAndroid Build Coastguard Worker *
112*495ae853SAndroid Build Coastguard Worker * @returns
113*495ae853SAndroid Build Coastguard Worker *
114*495ae853SAndroid Build Coastguard Worker * @remarks
115*495ae853SAndroid Build Coastguard Worker *
116*495ae853SAndroid Build Coastguard Worker *
117*495ae853SAndroid Build Coastguard Worker *******************************************************************************
118*495ae853SAndroid Build Coastguard Worker */
119*495ae853SAndroid Build Coastguard Worker 
isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers,DOUBLE d_scaling_factor,UWORD32 u4_width,UWORD32 u4_height)120*495ae853SAndroid Build Coastguard Worker UWORD32 isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor,
121*495ae853SAndroid Build Coastguard Worker                                        UWORD32 u4_width, UWORD32 u4_height)
122*495ae853SAndroid Build Coastguard Worker {
123*495ae853SAndroid Build Coastguard Worker     UWORD32 u4_size = 0;
124*495ae853SAndroid Build Coastguard Worker 
125*495ae853SAndroid Build Coastguard Worker     if(u1_num_spatial_layers > 1)
126*495ae853SAndroid Build Coastguard Worker     {
127*495ae853SAndroid Build Coastguard Worker         u4_size += sizeof(downscaler_state_t);
128*495ae853SAndroid Build Coastguard Worker 
129*495ae853SAndroid Build Coastguard Worker         u4_size +=
130*495ae853SAndroid Build Coastguard Worker             (u4_height + NUM_SCALER_FILTER_TAPS * 2) * ((UWORD32) (u4_width / d_scaling_factor));
131*495ae853SAndroid Build Coastguard Worker     }
132*495ae853SAndroid Build Coastguard Worker 
133*495ae853SAndroid Build Coastguard Worker     return u4_size;
134*495ae853SAndroid Build Coastguard Worker }
135*495ae853SAndroid Build Coastguard Worker 
136*495ae853SAndroid Build Coastguard Worker /**
137*495ae853SAndroid Build Coastguard Worker *******************************************************************************
138*495ae853SAndroid Build Coastguard Worker *
139*495ae853SAndroid Build Coastguard Worker * @brief
140*495ae853SAndroid Build Coastguard Worker *   gets the padding size required for filtering
141*495ae853SAndroid Build Coastguard Worker *
142*495ae853SAndroid Build Coastguard Worker * @par Description:
143*495ae853SAndroid Build Coastguard Worker *   gets the padding size required for filtering
144*495ae853SAndroid Build Coastguard Worker *
145*495ae853SAndroid Build Coastguard Worker * @returns
146*495ae853SAndroid Build Coastguard Worker *
147*495ae853SAndroid Build Coastguard Worker * @remarks
148*495ae853SAndroid Build Coastguard Worker *
149*495ae853SAndroid Build Coastguard Worker *
150*495ae853SAndroid Build Coastguard Worker *******************************************************************************
151*495ae853SAndroid Build Coastguard Worker */
152*495ae853SAndroid Build Coastguard Worker 
isvce_get_downscaler_padding_dims(padding_dims_t * ps_pad_dims)153*495ae853SAndroid Build Coastguard Worker void isvce_get_downscaler_padding_dims(padding_dims_t *ps_pad_dims)
154*495ae853SAndroid Build Coastguard Worker {
155*495ae853SAndroid Build Coastguard Worker     ps_pad_dims->u1_left_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
156*495ae853SAndroid Build Coastguard Worker     ps_pad_dims->u1_right_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
157*495ae853SAndroid Build Coastguard Worker     ps_pad_dims->u1_top_pad_size = NUM_SCALER_FILTER_TAPS / 2;
158*495ae853SAndroid Build Coastguard Worker     ps_pad_dims->u1_bottom_pad_size = NUM_SCALER_FILTER_TAPS / 2;
159*495ae853SAndroid Build Coastguard Worker }
160*495ae853SAndroid Build Coastguard Worker 
161*495ae853SAndroid Build Coastguard Worker /**
162*495ae853SAndroid Build Coastguard Worker *******************************************************************************
163*495ae853SAndroid Build Coastguard Worker *
164*495ae853SAndroid Build Coastguard Worker * @brief
165*495ae853SAndroid Build Coastguard Worker *   processes downscaler
166*495ae853SAndroid Build Coastguard Worker *
167*495ae853SAndroid Build Coastguard Worker * @par Description:
168*495ae853SAndroid Build Coastguard Worker *   calls the function for padding and scaling
169*495ae853SAndroid Build Coastguard Worker *
170*495ae853SAndroid Build Coastguard Worker * @param[in] ps_scaler
171*495ae853SAndroid Build Coastguard Worker *  pointer to downdownscaler context
172*495ae853SAndroid Build Coastguard Worker *
173*495ae853SAndroid Build Coastguard Worker * @param[in] ps_src_buf_props
174*495ae853SAndroid Build Coastguard Worker *  pointer to source buffer props struct
175*495ae853SAndroid Build Coastguard Worker *
176*495ae853SAndroid Build Coastguard Worker * @param[in] u4_blk_wd
177*495ae853SAndroid Build Coastguard Worker *  width of the block to be processed
178*495ae853SAndroid Build Coastguard Worker *
179*495ae853SAndroid Build Coastguard Worker * @param[in] u4_blk_ht
180*495ae853SAndroid Build Coastguard Worker *  height of the block to be processed
181*495ae853SAndroid Build Coastguard Worker *
182*495ae853SAndroid Build Coastguard Worker * @returns
183*495ae853SAndroid Build Coastguard Worker *
184*495ae853SAndroid Build Coastguard Worker * @remarks
185*495ae853SAndroid Build Coastguard Worker *
186*495ae853SAndroid Build Coastguard Worker *
187*495ae853SAndroid Build Coastguard Worker *******************************************************************************
188*495ae853SAndroid Build Coastguard Worker */
189*495ae853SAndroid Build Coastguard Worker 
isvce_process_downscaler(downscaler_ctxt_t * ps_scaler,yuv_buf_props_t * ps_src_buf_props,yuv_buf_props_t * ps_dst_buf_props,UWORD32 u4_blk_wd,UWORD32 u4_blk_ht)190*495ae853SAndroid Build Coastguard Worker void isvce_process_downscaler(downscaler_ctxt_t *ps_scaler, yuv_buf_props_t *ps_src_buf_props,
191*495ae853SAndroid Build Coastguard Worker                               yuv_buf_props_t *ps_dst_buf_props, UWORD32 u4_blk_wd,
192*495ae853SAndroid Build Coastguard Worker                               UWORD32 u4_blk_ht)
193*495ae853SAndroid Build Coastguard Worker {
194*495ae853SAndroid Build Coastguard Worker     buffer_container_t s_src_buf;
195*495ae853SAndroid Build Coastguard Worker     buffer_container_t s_dst_buf;
196*495ae853SAndroid Build Coastguard Worker 
197*495ae853SAndroid Build Coastguard Worker     UWORD32 u4_scaled_block_size_x, u4_scaled_block_size_y;
198*495ae853SAndroid Build Coastguard Worker 
199*495ae853SAndroid Build Coastguard Worker     downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
200*495ae853SAndroid Build Coastguard Worker 
201*495ae853SAndroid Build Coastguard Worker     ASSERT(ps_src_buf_props->e_color_format == IV_YUV_420SP_UV);
202*495ae853SAndroid Build Coastguard Worker 
203*495ae853SAndroid Build Coastguard Worker     u4_scaled_block_size_x = (UWORD32) (u4_blk_wd / ps_scaler->d_scaling_factor);
204*495ae853SAndroid Build Coastguard Worker     u4_scaled_block_size_y = (UWORD32) (u4_blk_ht / ps_scaler->d_scaling_factor);
205*495ae853SAndroid Build Coastguard Worker 
206*495ae853SAndroid Build Coastguard Worker     /* luma */
207*495ae853SAndroid Build Coastguard Worker     s_src_buf = ps_src_buf_props->as_component_bufs[Y];
208*495ae853SAndroid Build Coastguard Worker     s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - (NUM_SCALER_FILTER_TAPS / 2) -
209*495ae853SAndroid Build Coastguard Worker                         (NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;
210*495ae853SAndroid Build Coastguard Worker 
211*495ae853SAndroid Build Coastguard Worker     s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
212*495ae853SAndroid Build Coastguard Worker     s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;
213*495ae853SAndroid Build Coastguard Worker 
214*495ae853SAndroid Build Coastguard Worker     ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
215*495ae853SAndroid Build Coastguard Worker                                    u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 0);
216*495ae853SAndroid Build Coastguard Worker 
217*495ae853SAndroid Build Coastguard Worker     s_src_buf = s_dst_buf;
218*495ae853SAndroid Build Coastguard Worker     s_dst_buf = ps_dst_buf_props->as_component_bufs[Y];
219*495ae853SAndroid Build Coastguard Worker 
220*495ae853SAndroid Build Coastguard Worker     ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
221*495ae853SAndroid Build Coastguard Worker                                    u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
222*495ae853SAndroid Build Coastguard Worker 
223*495ae853SAndroid Build Coastguard Worker     /* chroma */
224*495ae853SAndroid Build Coastguard Worker     u4_blk_ht /= 2;
225*495ae853SAndroid Build Coastguard Worker     u4_scaled_block_size_y /= 2;
226*495ae853SAndroid Build Coastguard Worker 
227*495ae853SAndroid Build Coastguard Worker     s_src_buf = ps_src_buf_props->as_component_bufs[U];
228*495ae853SAndroid Build Coastguard Worker     s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - NUM_SCALER_FILTER_TAPS -
229*495ae853SAndroid Build Coastguard Worker                         (NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;
230*495ae853SAndroid Build Coastguard Worker 
231*495ae853SAndroid Build Coastguard Worker     s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
232*495ae853SAndroid Build Coastguard Worker     s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;
233*495ae853SAndroid Build Coastguard Worker 
234*495ae853SAndroid Build Coastguard Worker     ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
235*495ae853SAndroid Build Coastguard Worker                                    u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 1);
236*495ae853SAndroid Build Coastguard Worker 
237*495ae853SAndroid Build Coastguard Worker     s_src_buf = s_dst_buf;
238*495ae853SAndroid Build Coastguard Worker     s_dst_buf = ps_dst_buf_props->as_component_bufs[U];
239*495ae853SAndroid Build Coastguard Worker 
240*495ae853SAndroid Build Coastguard Worker     ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
241*495ae853SAndroid Build Coastguard Worker                                    u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
242*495ae853SAndroid Build Coastguard Worker }
243*495ae853SAndroid Build Coastguard Worker 
244*495ae853SAndroid Build Coastguard Worker /**
245*495ae853SAndroid Build Coastguard Worker *******************************************************************************
246*495ae853SAndroid Build Coastguard Worker *
247*495ae853SAndroid Build Coastguard Worker * @brief
248*495ae853SAndroid Build Coastguard Worker *   normalized dot product computer for downscaler
249*495ae853SAndroid Build Coastguard Worker *
250*495ae853SAndroid Build Coastguard Worker * @par Description:
251*495ae853SAndroid Build Coastguard Worker *   Given the downscaler filter coefficients, source buffer, the function
252*495ae853SAndroid Build Coastguard Worker *   calculates the dot product between them, adds an offset and normalizes it
253*495ae853SAndroid Build Coastguard Worker *
254*495ae853SAndroid Build Coastguard Worker * @param[in] ps_scaler
255*495ae853SAndroid Build Coastguard Worker *  pointer to src buf
256*495ae853SAndroid Build Coastguard Worker *
257*495ae853SAndroid Build Coastguard Worker * @param[in] pi1_filter
258*495ae853SAndroid Build Coastguard Worker *  pointer to filter coefficients
259*495ae853SAndroid Build Coastguard Worker *
260*495ae853SAndroid Build Coastguard Worker * @returns
261*495ae853SAndroid Build Coastguard Worker *
262*495ae853SAndroid Build Coastguard Worker * @remarks
263*495ae853SAndroid Build Coastguard Worker *
264*495ae853SAndroid Build Coastguard Worker *******************************************************************************
265*495ae853SAndroid Build Coastguard Worker */
266*495ae853SAndroid Build Coastguard Worker 
isvce_get_downscaler_normalized_filtered_pixel(UWORD8 * pu1_src,WORD8 * pi1_filter)267*495ae853SAndroid Build Coastguard Worker static UWORD8 isvce_get_downscaler_normalized_filtered_pixel(UWORD8 *pu1_src, WORD8 *pi1_filter)
268*495ae853SAndroid Build Coastguard Worker {
269*495ae853SAndroid Build Coastguard Worker     WORD32 i;
270*495ae853SAndroid Build Coastguard Worker     WORD32 i4_norm_dot_product;
271*495ae853SAndroid Build Coastguard Worker     UWORD8 u1_out_pixel;
272*495ae853SAndroid Build Coastguard Worker     WORD32 i4_dot_product_sum = 0;
273*495ae853SAndroid Build Coastguard Worker     WORD32 i4_rounding_offset = 1 << (FILTER_COEFF_Q - 1);
274*495ae853SAndroid Build Coastguard Worker     WORD32 i4_normalizing_factor = 1 << FILTER_COEFF_Q;
275*495ae853SAndroid Build Coastguard Worker 
276*495ae853SAndroid Build Coastguard Worker     for(i = 0; i < NUM_SCALER_FILTER_TAPS; i++)
277*495ae853SAndroid Build Coastguard Worker     {
278*495ae853SAndroid Build Coastguard Worker         i4_dot_product_sum += (pu1_src[i] * pi1_filter[i]);
279*495ae853SAndroid Build Coastguard Worker     }
280*495ae853SAndroid Build Coastguard Worker 
281*495ae853SAndroid Build Coastguard Worker     i4_norm_dot_product = ((i4_dot_product_sum + i4_rounding_offset) / i4_normalizing_factor);
282*495ae853SAndroid Build Coastguard Worker     u1_out_pixel = (UWORD8) CLIP_U8(i4_norm_dot_product);
283*495ae853SAndroid Build Coastguard Worker 
284*495ae853SAndroid Build Coastguard Worker     return u1_out_pixel;
285*495ae853SAndroid Build Coastguard Worker }
286*495ae853SAndroid Build Coastguard Worker 
287*495ae853SAndroid Build Coastguard Worker /**
288*495ae853SAndroid Build Coastguard Worker *******************************************************************************
289*495ae853SAndroid Build Coastguard Worker *
290*495ae853SAndroid Build Coastguard Worker * @brief
291*495ae853SAndroid Build Coastguard Worker *   horizontal scaler function
292*495ae853SAndroid Build Coastguard Worker *
293*495ae853SAndroid Build Coastguard Worker * @par Description:
294*495ae853SAndroid Build Coastguard Worker *   Does horizontal scaling for the given block
295*495ae853SAndroid Build Coastguard Worker *
296*495ae853SAndroid Build Coastguard Worker * @param[in] ps_scaler
297*495ae853SAndroid Build Coastguard Worker *  pointer to downscaler context
298*495ae853SAndroid Build Coastguard Worker *
299*495ae853SAndroid Build Coastguard Worker * @param[in] ps_src
300*495ae853SAndroid Build Coastguard Worker *  pointer to source buffer container
301*495ae853SAndroid Build Coastguard Worker *
302*495ae853SAndroid Build Coastguard Worker * @param[in] ps_dst
303*495ae853SAndroid Build Coastguard Worker *  pointer to destination buffer container
304*495ae853SAndroid Build Coastguard Worker *
305*495ae853SAndroid Build Coastguard Worker * @param[in] pai1_filters
306*495ae853SAndroid Build Coastguard Worker *  pointer to array of downscaler filters
307*495ae853SAndroid Build Coastguard Worker *
308*495ae853SAndroid Build Coastguard Worker * @param[in] u4_blk_wd
309*495ae853SAndroid Build Coastguard Worker *  width of the block after horizontal scaling (output block width)
310*495ae853SAndroid Build Coastguard Worker *
311*495ae853SAndroid Build Coastguard Worker * @param[in] u4_blk_ht
312*495ae853SAndroid Build Coastguard Worker *  height of the current block (input block height)
313*495ae853SAndroid Build Coastguard Worker *
314*495ae853SAndroid Build Coastguard Worker * @param[in] u1_is_chroma
315*495ae853SAndroid Build Coastguard Worker *  flag suggesting whether the buffer is luma or chroma
316*495ae853SAndroid Build Coastguard Worker *
317*495ae853SAndroid Build Coastguard Worker *
318*495ae853SAndroid Build Coastguard Worker * @returns
319*495ae853SAndroid Build Coastguard Worker *
320*495ae853SAndroid Build Coastguard Worker * @remarks
321*495ae853SAndroid Build Coastguard Worker *  The same function is used for vertical scaling too as
322*495ae853SAndroid Build Coastguard Worker *  the horizontally scaled input in stored in transpose fashion.
323*495ae853SAndroid Build Coastguard Worker *
324*495ae853SAndroid Build Coastguard Worker *******************************************************************************
325*495ae853SAndroid Build Coastguard Worker */
326*495ae853SAndroid Build Coastguard Worker 
isvce_horizontal_downscale_and_transpose(downscaler_ctxt_t * ps_scaler,buffer_container_t * ps_src,buffer_container_t * ps_dst,FILTER_COEFF_ARRAY pai1_filters,UWORD32 u4_blk_wd,UWORD32 u4_blk_ht,UWORD8 u1_is_chroma)327*495ae853SAndroid Build Coastguard Worker static void isvce_horizontal_downscale_and_transpose(
328*495ae853SAndroid Build Coastguard Worker     downscaler_ctxt_t *ps_scaler, buffer_container_t *ps_src, buffer_container_t *ps_dst,
329*495ae853SAndroid Build Coastguard Worker     FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma)
330*495ae853SAndroid Build Coastguard Worker {
331*495ae853SAndroid Build Coastguard Worker     WORD32 i, j, k;
332*495ae853SAndroid Build Coastguard Worker     UWORD8 u1_phase;
333*495ae853SAndroid Build Coastguard Worker     UWORD8 u1_filtered_out_pixel;
334*495ae853SAndroid Build Coastguard Worker     UWORD8 *pu1_src_j, *pu1_dst_j;
335*495ae853SAndroid Build Coastguard Worker     UWORD8 u1_filtered_out_u_pixel, u1_filtered_out_v_pixel;
336*495ae853SAndroid Build Coastguard Worker     UWORD8 *pu1_in_pixel;
337*495ae853SAndroid Build Coastguard Worker     UWORD8 *pu1_out_pixel;
338*495ae853SAndroid Build Coastguard Worker     WORD8 *pi1_filter_grid;
339*495ae853SAndroid Build Coastguard Worker     UWORD16 u2_full_pixel_inc;
340*495ae853SAndroid Build Coastguard Worker     UWORD8 au1_temp_u_buff[NUM_SCALER_FILTER_TAPS];
341*495ae853SAndroid Build Coastguard Worker     UWORD8 au1_temp_v_buff[NUM_SCALER_FILTER_TAPS];
342*495ae853SAndroid Build Coastguard Worker 
343*495ae853SAndroid Build Coastguard Worker     downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
344*495ae853SAndroid Build Coastguard Worker 
345*495ae853SAndroid Build Coastguard Worker     UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset;
346*495ae853SAndroid Build Coastguard Worker     UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment;
347*495ae853SAndroid Build Coastguard Worker     UWORD8 *pu1_src = ps_src->pv_data;
348*495ae853SAndroid Build Coastguard Worker     UWORD32 u4_in_stride = ps_src->i4_data_stride;
349*495ae853SAndroid Build Coastguard Worker     UWORD8 *pu1_dst = ps_dst->pv_data;
350*495ae853SAndroid Build Coastguard Worker     UWORD32 u4_out_stride = ps_dst->i4_data_stride;
351*495ae853SAndroid Build Coastguard Worker     UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos;
352*495ae853SAndroid Build Coastguard Worker 
353*495ae853SAndroid Build Coastguard Worker     /* Offset the input so that the input pixel to be processed
354*495ae853SAndroid Build Coastguard Worker     co-incides with the centre of filter (4th coefficient)*/
355*495ae853SAndroid Build Coastguard Worker     pu1_src += (1 + u1_is_chroma);
356*495ae853SAndroid Build Coastguard Worker 
357*495ae853SAndroid Build Coastguard Worker     ASSERT((1 << DOWNSCALER_Q) == ps_scaler_state->u4_vert_increment);
358*495ae853SAndroid Build Coastguard Worker 
359*495ae853SAndroid Build Coastguard Worker     if(!u1_is_chroma)
360*495ae853SAndroid Build Coastguard Worker     {
361*495ae853SAndroid Build Coastguard Worker         for(j = 0; j < (WORD32) u4_blk_ht; j++)
362*495ae853SAndroid Build Coastguard Worker         {
363*495ae853SAndroid Build Coastguard Worker             pu1_src_j = pu1_src + (j * u4_in_stride);
364*495ae853SAndroid Build Coastguard Worker             pu1_dst_j = pu1_dst + j;
365*495ae853SAndroid Build Coastguard Worker 
366*495ae853SAndroid Build Coastguard Worker             u4_center_pixel_pos = u4_center_pixel_pos_src;
367*495ae853SAndroid Build Coastguard Worker 
368*495ae853SAndroid Build Coastguard Worker             for(i = 0; i < (WORD32) u4_blk_wd; i++)
369*495ae853SAndroid Build Coastguard Worker             {
370*495ae853SAndroid Build Coastguard Worker                 u1_phase = get_filter_phase(u4_center_pixel_pos);
371*495ae853SAndroid Build Coastguard Worker                 pi1_filter_grid = pai1_filters[u1_phase];
372*495ae853SAndroid Build Coastguard Worker 
373*495ae853SAndroid Build Coastguard Worker                 /* Doing the Calculation for current Loop Count  */
374*495ae853SAndroid Build Coastguard Worker                 u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
375*495ae853SAndroid Build Coastguard Worker                 pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
376*495ae853SAndroid Build Coastguard Worker                 pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
377*495ae853SAndroid Build Coastguard Worker 
378*495ae853SAndroid Build Coastguard Worker                 u1_filtered_out_pixel =
379*495ae853SAndroid Build Coastguard Worker                     isvce_get_downscaler_normalized_filtered_pixel(pu1_in_pixel, pi1_filter_grid);
380*495ae853SAndroid Build Coastguard Worker                 *pu1_out_pixel = u1_filtered_out_pixel;
381*495ae853SAndroid Build Coastguard Worker 
382*495ae853SAndroid Build Coastguard Worker                 /* Update the context for next Loop Count */
383*495ae853SAndroid Build Coastguard Worker                 u4_center_pixel_pos += u4_src_horz_increments;
384*495ae853SAndroid Build Coastguard Worker             }
385*495ae853SAndroid Build Coastguard Worker         }
386*495ae853SAndroid Build Coastguard Worker     }
387*495ae853SAndroid Build Coastguard Worker     else
388*495ae853SAndroid Build Coastguard Worker     {
389*495ae853SAndroid Build Coastguard Worker         for(j = 0; j < (WORD32) u4_blk_ht; j++)
390*495ae853SAndroid Build Coastguard Worker         {
391*495ae853SAndroid Build Coastguard Worker             pu1_src_j = pu1_src + (j * u4_in_stride);
392*495ae853SAndroid Build Coastguard Worker             pu1_dst_j = pu1_dst + j;
393*495ae853SAndroid Build Coastguard Worker 
394*495ae853SAndroid Build Coastguard Worker             u4_center_pixel_pos = u4_center_pixel_pos_src;
395*495ae853SAndroid Build Coastguard Worker 
396*495ae853SAndroid Build Coastguard Worker             for(i = 0; i < (WORD32) u4_blk_wd; i++)
397*495ae853SAndroid Build Coastguard Worker             {
398*495ae853SAndroid Build Coastguard Worker                 u1_phase = get_filter_phase(u4_center_pixel_pos);
399*495ae853SAndroid Build Coastguard Worker                 pi1_filter_grid = pai1_filters[u1_phase];
400*495ae853SAndroid Build Coastguard Worker 
401*495ae853SAndroid Build Coastguard Worker                 /*Doing the Calculation for current Loop Count  */
402*495ae853SAndroid Build Coastguard Worker                 u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
403*495ae853SAndroid Build Coastguard Worker                 pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
404*495ae853SAndroid Build Coastguard Worker                 pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
405*495ae853SAndroid Build Coastguard Worker 
406*495ae853SAndroid Build Coastguard Worker                 for(k = 0; k < NUM_SCALER_FILTER_TAPS; k++)
407*495ae853SAndroid Build Coastguard Worker                 {
408*495ae853SAndroid Build Coastguard Worker                     au1_temp_u_buff[k] = *(pu1_in_pixel + (2 * k));
409*495ae853SAndroid Build Coastguard Worker                     au1_temp_v_buff[k] = *(pu1_in_pixel + ((2 * k) + 1));
410*495ae853SAndroid Build Coastguard Worker                 }
411*495ae853SAndroid Build Coastguard Worker 
412*495ae853SAndroid Build Coastguard Worker                 u1_filtered_out_u_pixel = isvce_get_downscaler_normalized_filtered_pixel(
413*495ae853SAndroid Build Coastguard Worker                     au1_temp_u_buff, pi1_filter_grid);
414*495ae853SAndroid Build Coastguard Worker                 u1_filtered_out_v_pixel = isvce_get_downscaler_normalized_filtered_pixel(
415*495ae853SAndroid Build Coastguard Worker                     au1_temp_v_buff, pi1_filter_grid);
416*495ae853SAndroid Build Coastguard Worker                 *pu1_out_pixel = u1_filtered_out_u_pixel;
417*495ae853SAndroid Build Coastguard Worker                 *(pu1_out_pixel + u4_out_stride) = u1_filtered_out_v_pixel;
418*495ae853SAndroid Build Coastguard Worker 
419*495ae853SAndroid Build Coastguard Worker                 /* Update the context for next Loop Count */
420*495ae853SAndroid Build Coastguard Worker                 u4_center_pixel_pos += u4_src_horz_increments;
421*495ae853SAndroid Build Coastguard Worker             }
422*495ae853SAndroid Build Coastguard Worker         }
423*495ae853SAndroid Build Coastguard Worker     }
424*495ae853SAndroid Build Coastguard Worker }
425*495ae853SAndroid Build Coastguard Worker 
isvce_downscaler_function_selector(downscaler_state_t * ps_scaler_state,IV_ARCH_T e_arch)426*495ae853SAndroid Build Coastguard Worker void isvce_downscaler_function_selector(downscaler_state_t *ps_scaler_state, IV_ARCH_T e_arch)
427*495ae853SAndroid Build Coastguard Worker {
428*495ae853SAndroid Build Coastguard Worker     switch(e_arch)
429*495ae853SAndroid Build Coastguard Worker     {
430*495ae853SAndroid Build Coastguard Worker #if defined(X86)
431*495ae853SAndroid Build Coastguard Worker         case ARCH_X86_SSE42:
432*495ae853SAndroid Build Coastguard Worker         {
433*495ae853SAndroid Build Coastguard Worker             ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_sse42;
434*495ae853SAndroid Build Coastguard Worker 
435*495ae853SAndroid Build Coastguard Worker             break;
436*495ae853SAndroid Build Coastguard Worker         }
437*495ae853SAndroid Build Coastguard Worker #elif defined(ARMV8)
438*495ae853SAndroid Build Coastguard Worker         case ARCH_ARM_A53:
439*495ae853SAndroid Build Coastguard Worker         case ARCH_ARM_A57:
440*495ae853SAndroid Build Coastguard Worker         case ARCH_ARM_V8_NEON:
441*495ae853SAndroid Build Coastguard Worker         {
442*495ae853SAndroid Build Coastguard Worker             ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;
443*495ae853SAndroid Build Coastguard Worker 
444*495ae853SAndroid Build Coastguard Worker             break;
445*495ae853SAndroid Build Coastguard Worker         }
446*495ae853SAndroid Build Coastguard Worker #elif defined(ARM) && !defined(DISABLE_NEON)
447*495ae853SAndroid Build Coastguard Worker         case ARCH_ARM_A9Q:
448*495ae853SAndroid Build Coastguard Worker         case ARCH_ARM_A9A:
449*495ae853SAndroid Build Coastguard Worker         case ARCH_ARM_A9:
450*495ae853SAndroid Build Coastguard Worker         case ARCH_ARM_A7:
451*495ae853SAndroid Build Coastguard Worker         case ARCH_ARM_A5:
452*495ae853SAndroid Build Coastguard Worker         case ARCH_ARM_A15:
453*495ae853SAndroid Build Coastguard Worker         {
454*495ae853SAndroid Build Coastguard Worker             ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;
455*495ae853SAndroid Build Coastguard Worker 
456*495ae853SAndroid Build Coastguard Worker             break;
457*495ae853SAndroid Build Coastguard Worker         }
458*495ae853SAndroid Build Coastguard Worker #endif
459*495ae853SAndroid Build Coastguard Worker         default:
460*495ae853SAndroid Build Coastguard Worker         {
461*495ae853SAndroid Build Coastguard Worker             ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose;
462*495ae853SAndroid Build Coastguard Worker 
463*495ae853SAndroid Build Coastguard Worker             break;
464*495ae853SAndroid Build Coastguard Worker         }
465*495ae853SAndroid Build Coastguard Worker     }
466*495ae853SAndroid Build Coastguard Worker }
467*495ae853SAndroid Build Coastguard Worker 
468*495ae853SAndroid Build Coastguard Worker /**
469*495ae853SAndroid Build Coastguard Worker *******************************************************************************
470*495ae853SAndroid Build Coastguard Worker *
471*495ae853SAndroid Build Coastguard Worker * @brief
472*495ae853SAndroid Build Coastguard Worker *   initializes the downscaler context
473*495ae853SAndroid Build Coastguard Worker *
474*495ae853SAndroid Build Coastguard Worker * @par Description:
475*495ae853SAndroid Build Coastguard Worker *   initializes the downscaler context for the given scaling factor
476*495ae853SAndroid Build Coastguard Worker *   with padding size, filter size, etc.
477*495ae853SAndroid Build Coastguard Worker *
478*495ae853SAndroid Build Coastguard Worker * @param[in] ps_scaler
479*495ae853SAndroid Build Coastguard Worker *   pointer downscaler context
480*495ae853SAndroid Build Coastguard Worker *
481*495ae853SAndroid Build Coastguard Worker * @param[in] ps_mem_rec
482*495ae853SAndroid Build Coastguard Worker *   pointer to memory allocated to downscaler process
483*495ae853SAndroid Build Coastguard Worker *
484*495ae853SAndroid Build Coastguard Worker * @param[in] d_scaling_factor
485*495ae853SAndroid Build Coastguard Worker *   scaling reatio of width/ height between two consecutive SVC layers
486*495ae853SAndroid Build Coastguard Worker *
487*495ae853SAndroid Build Coastguard Worker * @param[in] u1_num_spatial_layers
488*495ae853SAndroid Build Coastguard Worker *   scaling reatio of width/ height between two consecutive SVC layers
489*495ae853SAndroid Build Coastguard Worker *
490*495ae853SAndroid Build Coastguard Worker * @param[in] u4_wd
491*495ae853SAndroid Build Coastguard Worker *   width of the input
492*495ae853SAndroid Build Coastguard Worker *
493*495ae853SAndroid Build Coastguard Worker * @param[in] u4_ht
494*495ae853SAndroid Build Coastguard Worker *   height of the input
495*495ae853SAndroid Build Coastguard Worker *
496*495ae853SAndroid Build Coastguard Worker * @param[in] e_arch
497*495ae853SAndroid Build Coastguard Worker *   architecure type
498*495ae853SAndroid Build Coastguard Worker *
499*495ae853SAndroid Build Coastguard Worker * @returns
500*495ae853SAndroid Build Coastguard Worker *
501*495ae853SAndroid Build Coastguard Worker * @remarks
502*495ae853SAndroid Build Coastguard Worker *  when ARM intrinsics are added, update should be done here
503*495ae853SAndroid Build Coastguard Worker *
504*495ae853SAndroid Build Coastguard Worker *******************************************************************************
505*495ae853SAndroid Build Coastguard Worker */
506*495ae853SAndroid Build Coastguard Worker 
isvce_initialize_downscaler(downscaler_ctxt_t * ps_scaler,iv_mem_rec_t * ps_mem_rec,DOUBLE d_scaling_factor,UWORD8 u1_num_spatial_layers,UWORD32 u4_in_width,UWORD32 u4_in_height,IV_ARCH_T e_arch)507*495ae853SAndroid Build Coastguard Worker void isvce_initialize_downscaler(downscaler_ctxt_t *ps_scaler, iv_mem_rec_t *ps_mem_rec,
508*495ae853SAndroid Build Coastguard Worker                                  DOUBLE d_scaling_factor, UWORD8 u1_num_spatial_layers,
509*495ae853SAndroid Build Coastguard Worker                                  UWORD32 u4_in_width, UWORD32 u4_in_height, IV_ARCH_T e_arch)
510*495ae853SAndroid Build Coastguard Worker {
511*495ae853SAndroid Build Coastguard Worker     if(u1_num_spatial_layers > 1)
512*495ae853SAndroid Build Coastguard Worker     {
513*495ae853SAndroid Build Coastguard Worker         downscaler_state_t *ps_scaler_state;
514*495ae853SAndroid Build Coastguard Worker 
515*495ae853SAndroid Build Coastguard Worker         UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
516*495ae853SAndroid Build Coastguard Worker 
517*495ae853SAndroid Build Coastguard Worker         ps_scaler_state = (downscaler_state_t *) pu1_buf;
518*495ae853SAndroid Build Coastguard Worker         pu1_buf += sizeof(ps_scaler_state[0]);
519*495ae853SAndroid Build Coastguard Worker 
520*495ae853SAndroid Build Coastguard Worker         ps_scaler_state->pv_scratch_buf = pu1_buf;
521*495ae853SAndroid Build Coastguard Worker         ps_scaler_state->u4_in_wd = u4_in_width;
522*495ae853SAndroid Build Coastguard Worker         ps_scaler_state->u4_in_ht = u4_in_height;
523*495ae853SAndroid Build Coastguard Worker 
524*495ae853SAndroid Build Coastguard Worker         ps_scaler->pv_scaler_state = ps_scaler_state;
525*495ae853SAndroid Build Coastguard Worker         ps_scaler->d_scaling_factor = d_scaling_factor;
526*495ae853SAndroid Build Coastguard Worker         ps_scaler->u1_num_spatial_layers = u1_num_spatial_layers;
527*495ae853SAndroid Build Coastguard Worker 
528*495ae853SAndroid Build Coastguard Worker         isvce_downscaler_function_selector(ps_scaler_state, e_arch);
529*495ae853SAndroid Build Coastguard Worker 
530*495ae853SAndroid Build Coastguard Worker         ps_scaler_state->u4_horz_increment = (UWORD32) (d_scaling_factor * (1 << DOWNSCALER_Q));
531*495ae853SAndroid Build Coastguard Worker 
532*495ae853SAndroid Build Coastguard Worker         ps_scaler_state->u4_vert_increment = (1 << DOWNSCALER_Q);
533*495ae853SAndroid Build Coastguard Worker         ps_scaler_state->i4_init_offset = 0;
534*495ae853SAndroid Build Coastguard Worker         ps_scaler_state->pai1_filters = (d_scaling_factor == 2.0) ? gai1_lanczos_coefficients_2x
535*495ae853SAndroid Build Coastguard Worker                                                                   : gai1_lanczos_coefficients_3by2x;
536*495ae853SAndroid Build Coastguard Worker     }
537*495ae853SAndroid Build Coastguard Worker }
538