xref: /aosp_15_r20/external/libhevc/encoder/ihevce_stasino_helpers.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar  *
3*c83a76b0SSuyog Pawar  * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar  *
5*c83a76b0SSuyog Pawar  * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar  * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar  * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar  *
9*c83a76b0SSuyog Pawar  * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar  *
11*c83a76b0SSuyog Pawar  * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar  * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar  * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar  * limitations under the License.
16*c83a76b0SSuyog Pawar  *
17*c83a76b0SSuyog Pawar  *****************************************************************************
18*c83a76b0SSuyog Pawar  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar /**
21*c83a76b0SSuyog Pawar *******************************************************************************
22*c83a76b0SSuyog Pawar * @file
23*c83a76b0SSuyog Pawar *  ihevce_stasino_helpers.c
24*c83a76b0SSuyog Pawar *
25*c83a76b0SSuyog Pawar * @brief
26*c83a76b0SSuyog Pawar *
27*c83a76b0SSuyog Pawar * @author
28*c83a76b0SSuyog Pawar *  Ittiam
29*c83a76b0SSuyog Pawar *
30*c83a76b0SSuyog Pawar * @par List of Functions:
31*c83a76b0SSuyog Pawar *
32*c83a76b0SSuyog Pawar * @remarks
33*c83a76b0SSuyog Pawar *  None
34*c83a76b0SSuyog Pawar *
35*c83a76b0SSuyog Pawar *******************************************************************************
36*c83a76b0SSuyog Pawar */
37*c83a76b0SSuyog Pawar 
38*c83a76b0SSuyog Pawar /*****************************************************************************/
39*c83a76b0SSuyog Pawar /* File Includes                                                             */
40*c83a76b0SSuyog Pawar /*****************************************************************************/
41*c83a76b0SSuyog Pawar /* System include files */
42*c83a76b0SSuyog Pawar #include <stdio.h>
43*c83a76b0SSuyog Pawar #include <stdlib.h>
44*c83a76b0SSuyog Pawar #include <assert.h>
45*c83a76b0SSuyog Pawar #include <string.h>
46*c83a76b0SSuyog Pawar 
47*c83a76b0SSuyog Pawar /* User include files */
48*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
49*c83a76b0SSuyog Pawar #include "itt_video_api.h"
50*c83a76b0SSuyog Pawar #include "ihevce_api.h"
51*c83a76b0SSuyog Pawar 
52*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
53*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
54*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
55*c83a76b0SSuyog Pawar 
56*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
57*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
58*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
59*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
60*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
61*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
62*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
63*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
64*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
65*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
66*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
67*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
68*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
69*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
70*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
71*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
72*c83a76b0SSuyog Pawar 
73*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
74*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
75*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
76*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
77*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
78*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
79*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
80*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
81*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
82*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
83*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
84*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
85*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
86*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
87*c83a76b0SSuyog Pawar #include "ihevce_stasino_helpers.h"
88*c83a76b0SSuyog Pawar 
89*c83a76b0SSuyog Pawar /*****************************************************************************/
90*c83a76b0SSuyog Pawar /* Function Definitions                                                      */
91*c83a76b0SSuyog Pawar /*****************************************************************************/
92*c83a76b0SSuyog Pawar 
93*c83a76b0SSuyog Pawar /**
94*c83a76b0SSuyog Pawar *******************************************************************************
95*c83a76b0SSuyog Pawar *
96*c83a76b0SSuyog Pawar * @brief
97*c83a76b0SSuyog Pawar *  This function calculates the variance of given data set.
98*c83a76b0SSuyog Pawar *
99*c83a76b0SSuyog Pawar * @par Description:
100*c83a76b0SSuyog Pawar *  This function is mainly used to find the variance of the block of pixel values.
101*c83a76b0SSuyog Pawar *  The block can be rectangular also. Single pass variance calculation
102*c83a76b0SSuyog Pawar *  implementation.
103*c83a76b0SSuyog Pawar *
104*c83a76b0SSuyog Pawar * @param[in] p_input
105*c83a76b0SSuyog Pawar *  The input buffer to calculate the variance.
106*c83a76b0SSuyog Pawar *
107*c83a76b0SSuyog Pawar * @param[out] pi4_mean
108*c83a76b0SSuyog Pawar *  Pointer ot the mean of the datset
109*c83a76b0SSuyog Pawar *
110*c83a76b0SSuyog Pawar * @param[out] pi4_variance
111*c83a76b0SSuyog Pawar *  Pointer tot he variabce of the data set
112*c83a76b0SSuyog Pawar *
113*c83a76b0SSuyog Pawar * @param[in] u1_is_hbd
114*c83a76b0SSuyog Pawar *  1 if the data is in  high bit depth
115*c83a76b0SSuyog Pawar *
116*c83a76b0SSuyog Pawar * @param[in] stride
117*c83a76b0SSuyog Pawar *  Stride for the input buffer
118*c83a76b0SSuyog Pawar *
119*c83a76b0SSuyog Pawar * @param[in] block_height
120*c83a76b0SSuyog Pawar *  height of the pixel block
121*c83a76b0SSuyog Pawar *
122*c83a76b0SSuyog Pawar * @param[in] block_width
123*c83a76b0SSuyog Pawar *  width of the pixel block
124*c83a76b0SSuyog Pawar *
125*c83a76b0SSuyog Pawar * @remarks
126*c83a76b0SSuyog Pawar *  None
127*c83a76b0SSuyog Pawar *
128*c83a76b0SSuyog Pawar *******************************************************************************
129*c83a76b0SSuyog Pawar */
ihevce_calc_variance(void * pv_input,WORD32 i4_stride,WORD32 * pi4_mean,UWORD32 * pu4_variance,UWORD8 u1_block_height,UWORD8 u1_block_width,UWORD8 u1_is_hbd,UWORD8 u1_disable_normalization)130*c83a76b0SSuyog Pawar void ihevce_calc_variance(
131*c83a76b0SSuyog Pawar     void *pv_input,
132*c83a76b0SSuyog Pawar     WORD32 i4_stride,
133*c83a76b0SSuyog Pawar     WORD32 *pi4_mean,
134*c83a76b0SSuyog Pawar     UWORD32 *pu4_variance,
135*c83a76b0SSuyog Pawar     UWORD8 u1_block_height,
136*c83a76b0SSuyog Pawar     UWORD8 u1_block_width,
137*c83a76b0SSuyog Pawar     UWORD8 u1_is_hbd,
138*c83a76b0SSuyog Pawar     UWORD8 u1_disable_normalization)
139*c83a76b0SSuyog Pawar {
140*c83a76b0SSuyog Pawar     UWORD8 *pui1_buffer;  // pointer for 8 bit usecase
141*c83a76b0SSuyog Pawar     WORD32 i, j;
142*c83a76b0SSuyog Pawar     WORD32 total_elements;
143*c83a76b0SSuyog Pawar 
144*c83a76b0SSuyog Pawar     LWORD64 mean;
145*c83a76b0SSuyog Pawar     ULWORD64 variance;
146*c83a76b0SSuyog Pawar     ULWORD64 sum;
147*c83a76b0SSuyog Pawar     ULWORD64 sq_sum;
148*c83a76b0SSuyog Pawar 
149*c83a76b0SSuyog Pawar     /* intialisation */
150*c83a76b0SSuyog Pawar     total_elements = u1_block_height * u1_block_width;
151*c83a76b0SSuyog Pawar     mean = 0;
152*c83a76b0SSuyog Pawar     variance = 0;
153*c83a76b0SSuyog Pawar     sum = 0;
154*c83a76b0SSuyog Pawar     sq_sum = 0;
155*c83a76b0SSuyog Pawar 
156*c83a76b0SSuyog Pawar     /* handle the case of 8/10 bit depth separately */
157*c83a76b0SSuyog Pawar     if(!u1_is_hbd)
158*c83a76b0SSuyog Pawar     {
159*c83a76b0SSuyog Pawar         pui1_buffer = (UWORD8 *)pv_input;
160*c83a76b0SSuyog Pawar 
161*c83a76b0SSuyog Pawar         /* loop over all the values in the block */
162*c83a76b0SSuyog Pawar         for(i = 0; i < u1_block_height; i++)
163*c83a76b0SSuyog Pawar         {
164*c83a76b0SSuyog Pawar             /* loop over a row in the block */
165*c83a76b0SSuyog Pawar             for(j = 0; j < u1_block_width; j++)
166*c83a76b0SSuyog Pawar             {
167*c83a76b0SSuyog Pawar                 sum += pui1_buffer[i * i4_stride + j];
168*c83a76b0SSuyog Pawar                 sq_sum += (pui1_buffer[i * i4_stride + j] * pui1_buffer[i * i4_stride + j]);
169*c83a76b0SSuyog Pawar             }
170*c83a76b0SSuyog Pawar         }
171*c83a76b0SSuyog Pawar 
172*c83a76b0SSuyog Pawar         if(!u1_disable_normalization)
173*c83a76b0SSuyog Pawar         {
174*c83a76b0SSuyog Pawar             mean = sum / total_elements;
175*c83a76b0SSuyog Pawar             variance =
176*c83a76b0SSuyog Pawar                 ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements));
177*c83a76b0SSuyog Pawar         }
178*c83a76b0SSuyog Pawar         else
179*c83a76b0SSuyog Pawar         {
180*c83a76b0SSuyog Pawar             mean = sum;
181*c83a76b0SSuyog Pawar             variance = ((total_elements * sq_sum) - (sum * sum));
182*c83a76b0SSuyog Pawar         }
183*c83a76b0SSuyog Pawar     }
184*c83a76b0SSuyog Pawar 
185*c83a76b0SSuyog Pawar     /* copy back the values to the output variables */
186*c83a76b0SSuyog Pawar     *pi4_mean = mean;
187*c83a76b0SSuyog Pawar     *pu4_variance = variance;
188*c83a76b0SSuyog Pawar }
189*c83a76b0SSuyog Pawar 
190*c83a76b0SSuyog Pawar /**
191*c83a76b0SSuyog Pawar *******************************************************************************
192*c83a76b0SSuyog Pawar *
193*c83a76b0SSuyog Pawar * @brief
194*c83a76b0SSuyog Pawar *  This function calcluates the variance of given data set which is WORD16
195*c83a76b0SSuyog Pawar *
196*c83a76b0SSuyog Pawar * @par Description:
197*c83a76b0SSuyog Pawar *  This function is mainly used to find the variance of the block of pixel values.
198*c83a76b0SSuyog Pawar *  Single pass variance calculation implementation.
199*c83a76b0SSuyog Pawar *
200*c83a76b0SSuyog Pawar * @param[in] pv_input
201*c83a76b0SSuyog Pawar *  The input buffer to calculate the variance.
202*c83a76b0SSuyog Pawar *
203*c83a76b0SSuyog Pawar *
204*c83a76b0SSuyog Pawar * @param[in] stride
205*c83a76b0SSuyog Pawar *  Stride for the input buffer
206*c83a76b0SSuyog Pawar *
207*c83a76b0SSuyog Pawar * @param[out] pi4_mean
208*c83a76b0SSuyog Pawar *  Pointer ot the mean of the datset
209*c83a76b0SSuyog Pawar *
210*c83a76b0SSuyog Pawar * @param[out] pi4_variance
211*c83a76b0SSuyog Pawar *  Pointer tot he variabce of the data set
212*c83a76b0SSuyog Pawar *
213*c83a76b0SSuyog Pawar * @param[in] block_height
214*c83a76b0SSuyog Pawar *  height of the pixel block
215*c83a76b0SSuyog Pawar *
216*c83a76b0SSuyog Pawar * @param[in] block_width
217*c83a76b0SSuyog Pawar *  width of the pixel block
218*c83a76b0SSuyog Pawar *
219*c83a76b0SSuyog Pawar *
220*c83a76b0SSuyog Pawar * @remarks
221*c83a76b0SSuyog Pawar *  None
222*c83a76b0SSuyog Pawar *
223*c83a76b0SSuyog Pawar *******************************************************************************/
ihevce_calc_variance_signed(WORD16 * pv_input,WORD32 i4_stride,WORD32 * pi4_mean,UWORD32 * pu4_variance,UWORD8 u1_block_height,UWORD8 u1_block_width)224*c83a76b0SSuyog Pawar void ihevce_calc_variance_signed(
225*c83a76b0SSuyog Pawar     WORD16 *pv_input,
226*c83a76b0SSuyog Pawar     WORD32 i4_stride,
227*c83a76b0SSuyog Pawar     WORD32 *pi4_mean,
228*c83a76b0SSuyog Pawar     UWORD32 *pu4_variance,
229*c83a76b0SSuyog Pawar     UWORD8 u1_block_height,
230*c83a76b0SSuyog Pawar     UWORD8 u1_block_width)
231*c83a76b0SSuyog Pawar {
232*c83a76b0SSuyog Pawar     WORD16 *pi2_buffer;  // poinbter for 10 bit use case
233*c83a76b0SSuyog Pawar 
234*c83a76b0SSuyog Pawar     WORD32 i, j;
235*c83a76b0SSuyog Pawar     WORD32 total_elements;
236*c83a76b0SSuyog Pawar 
237*c83a76b0SSuyog Pawar     LWORD64 mean;
238*c83a76b0SSuyog Pawar     LWORD64 variance;
239*c83a76b0SSuyog Pawar     LWORD64 sum;
240*c83a76b0SSuyog Pawar     LWORD64 sq_sum;
241*c83a76b0SSuyog Pawar 
242*c83a76b0SSuyog Pawar     /* intialisation */
243*c83a76b0SSuyog Pawar     total_elements = u1_block_height * u1_block_width;
244*c83a76b0SSuyog Pawar     mean = 0;
245*c83a76b0SSuyog Pawar     variance = 0;
246*c83a76b0SSuyog Pawar     sum = 0;
247*c83a76b0SSuyog Pawar     sq_sum = 0;
248*c83a76b0SSuyog Pawar 
249*c83a76b0SSuyog Pawar     pi2_buffer = pv_input;
250*c83a76b0SSuyog Pawar 
251*c83a76b0SSuyog Pawar     for(i = 0; i < u1_block_height; i++)
252*c83a76b0SSuyog Pawar     {
253*c83a76b0SSuyog Pawar         for(j = 0; j < u1_block_width; j++)
254*c83a76b0SSuyog Pawar         {
255*c83a76b0SSuyog Pawar             sum += pi2_buffer[i * i4_stride + j];
256*c83a76b0SSuyog Pawar             sq_sum += (pi2_buffer[i * i4_stride + j] * pi2_buffer[i * i4_stride + j]);
257*c83a76b0SSuyog Pawar         }
258*c83a76b0SSuyog Pawar     }
259*c83a76b0SSuyog Pawar 
260*c83a76b0SSuyog Pawar     mean = sum;  /// total_elements;
261*c83a76b0SSuyog Pawar     variance = ((total_elements * sq_sum) - (sum * sum));  // / (total_elements * (total_elements) )
262*c83a76b0SSuyog Pawar 
263*c83a76b0SSuyog Pawar     /* copy back the values to the output variables */
264*c83a76b0SSuyog Pawar     *pi4_mean = mean;
265*c83a76b0SSuyog Pawar     *pu4_variance = variance;
266*c83a76b0SSuyog Pawar }
267*c83a76b0SSuyog Pawar 
268*c83a76b0SSuyog Pawar /**
269*c83a76b0SSuyog Pawar *******************************************************************************
270*c83a76b0SSuyog Pawar *
271*c83a76b0SSuyog Pawar * @brief
272*c83a76b0SSuyog Pawar *  This function calculates the variance of a chrominance plane for 420SP data
273*c83a76b0SSuyog Pawar *
274*c83a76b0SSuyog Pawar * @par Description:
275*c83a76b0SSuyog Pawar *  This function is mainly used to find the variance of the block of pixel values.
276*c83a76b0SSuyog Pawar *  The block can be rectangular also. Single pass variance calculation
277*c83a76b0SSuyog Pawar *  implementation.
278*c83a76b0SSuyog Pawar *
279*c83a76b0SSuyog Pawar * @param[in] p_input
280*c83a76b0SSuyog Pawar *  The input buffer to calculate the variance.
281*c83a76b0SSuyog Pawar *
282*c83a76b0SSuyog Pawar * @param[in] stride
283*c83a76b0SSuyog Pawar *  Stride for the input buffer
284*c83a76b0SSuyog Pawar *
285*c83a76b0SSuyog Pawar * @param[out] pi4_mean
286*c83a76b0SSuyog Pawar *  Pointer ot the mean of the datset
287*c83a76b0SSuyog Pawar *
288*c83a76b0SSuyog Pawar * @param[out] pi4_variance
289*c83a76b0SSuyog Pawar *  Pointer tot he variabce of the data set
290*c83a76b0SSuyog Pawar *
291*c83a76b0SSuyog Pawar * @param[in] block_height
292*c83a76b0SSuyog Pawar *  height of the pixel block
293*c83a76b0SSuyog Pawar *
294*c83a76b0SSuyog Pawar * @param[in] block_width
295*c83a76b0SSuyog Pawar *  width of the pixel block
296*c83a76b0SSuyog Pawar *
297*c83a76b0SSuyog Pawar * @param[in] u1_is_hbd
298*c83a76b0SSuyog Pawar *  1 if the data is in  high bit depth
299*c83a76b0SSuyog Pawar *
300*c83a76b0SSuyog Pawar * @param[in] e_chroma_plane
301*c83a76b0SSuyog Pawar *  is U or V
302*c83a76b0SSuyog Pawar *
303*c83a76b0SSuyog Pawar * @remarks
304*c83a76b0SSuyog Pawar *  None
305*c83a76b0SSuyog Pawar *
306*c83a76b0SSuyog Pawar *******************************************************************************
307*c83a76b0SSuyog Pawar */
ihevce_calc_chroma_variance(void * pv_input,WORD32 i4_stride,WORD32 * pi4_mean,UWORD32 * pu4_variance,UWORD8 u1_block_height,UWORD8 u1_block_width,UWORD8 u1_is_hbd,CHROMA_PLANE_ID_T e_chroma_plane)308*c83a76b0SSuyog Pawar void ihevce_calc_chroma_variance(
309*c83a76b0SSuyog Pawar     void *pv_input,
310*c83a76b0SSuyog Pawar     WORD32 i4_stride,
311*c83a76b0SSuyog Pawar     WORD32 *pi4_mean,
312*c83a76b0SSuyog Pawar     UWORD32 *pu4_variance,
313*c83a76b0SSuyog Pawar     UWORD8 u1_block_height,
314*c83a76b0SSuyog Pawar     UWORD8 u1_block_width,
315*c83a76b0SSuyog Pawar     UWORD8 u1_is_hbd,
316*c83a76b0SSuyog Pawar     CHROMA_PLANE_ID_T e_chroma_plane)
317*c83a76b0SSuyog Pawar {
318*c83a76b0SSuyog Pawar     UWORD8 *pui1_buffer;  // pointer for 8 bit usecase
319*c83a76b0SSuyog Pawar     WORD32 i, j;
320*c83a76b0SSuyog Pawar     WORD32 total_elements;
321*c83a76b0SSuyog Pawar 
322*c83a76b0SSuyog Pawar     LWORD64 mean;
323*c83a76b0SSuyog Pawar     ULWORD64 variance;
324*c83a76b0SSuyog Pawar     LWORD64 sum;
325*c83a76b0SSuyog Pawar     LWORD64 sq_sum;
326*c83a76b0SSuyog Pawar 
327*c83a76b0SSuyog Pawar     /* intialisation */
328*c83a76b0SSuyog Pawar     total_elements = u1_block_height * u1_block_width;
329*c83a76b0SSuyog Pawar     mean = 0;
330*c83a76b0SSuyog Pawar     variance = 0;
331*c83a76b0SSuyog Pawar     sum = 0;
332*c83a76b0SSuyog Pawar     sq_sum = 0;
333*c83a76b0SSuyog Pawar 
334*c83a76b0SSuyog Pawar     /* handle the case of 8/10 bit depth separately */
335*c83a76b0SSuyog Pawar     if(!u1_is_hbd)
336*c83a76b0SSuyog Pawar     {
337*c83a76b0SSuyog Pawar         pui1_buffer = (UWORD8 *)pv_input;
338*c83a76b0SSuyog Pawar 
339*c83a76b0SSuyog Pawar         pui1_buffer += e_chroma_plane;
340*c83a76b0SSuyog Pawar 
341*c83a76b0SSuyog Pawar         /* loop over all the values in the block */
342*c83a76b0SSuyog Pawar         for(i = 0; i < u1_block_height; i++)
343*c83a76b0SSuyog Pawar         {
344*c83a76b0SSuyog Pawar             /* loop over a row in the block */
345*c83a76b0SSuyog Pawar             for(j = 0; j < u1_block_width; j++)
346*c83a76b0SSuyog Pawar             {
347*c83a76b0SSuyog Pawar                 sum += pui1_buffer[i * i4_stride + j * 2];
348*c83a76b0SSuyog Pawar                 sq_sum += (pui1_buffer[i * i4_stride + j * 2] * pui1_buffer[i * i4_stride + j * 2]);
349*c83a76b0SSuyog Pawar             }
350*c83a76b0SSuyog Pawar         }
351*c83a76b0SSuyog Pawar 
352*c83a76b0SSuyog Pawar         mean = sum / total_elements;
353*c83a76b0SSuyog Pawar         variance = ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements));
354*c83a76b0SSuyog Pawar     }
355*c83a76b0SSuyog Pawar 
356*c83a76b0SSuyog Pawar     /* copy back the values to the output variables */
357*c83a76b0SSuyog Pawar     *pi4_mean = mean;
358*c83a76b0SSuyog Pawar     *pu4_variance = variance;
359*c83a76b0SSuyog Pawar }
360*c83a76b0SSuyog Pawar 
ihevce_inject_stim_into_distortion(void * pv_src,WORD32 i4_src_stride,void * pv_pred,WORD32 i4_pred_stride,LWORD64 i8_distortion,WORD32 i4_alpha_stim_multiplier,UWORD8 u1_blk_size,UWORD8 u1_is_hbd,UWORD8 u1_enable_psyRDOPT,CHROMA_PLANE_ID_T e_chroma_plane)361*c83a76b0SSuyog Pawar LWORD64 ihevce_inject_stim_into_distortion(
362*c83a76b0SSuyog Pawar     void *pv_src,
363*c83a76b0SSuyog Pawar     WORD32 i4_src_stride,
364*c83a76b0SSuyog Pawar     void *pv_pred,
365*c83a76b0SSuyog Pawar     WORD32 i4_pred_stride,
366*c83a76b0SSuyog Pawar     LWORD64 i8_distortion,
367*c83a76b0SSuyog Pawar     WORD32 i4_alpha_stim_multiplier,
368*c83a76b0SSuyog Pawar     UWORD8 u1_blk_size,
369*c83a76b0SSuyog Pawar     UWORD8 u1_is_hbd,
370*c83a76b0SSuyog Pawar     UWORD8 u1_enable_psyRDOPT,
371*c83a76b0SSuyog Pawar     CHROMA_PLANE_ID_T e_chroma_plane)
372*c83a76b0SSuyog Pawar {
373*c83a76b0SSuyog Pawar     if(!u1_enable_psyRDOPT)
374*c83a76b0SSuyog Pawar     {
375*c83a76b0SSuyog Pawar         UWORD32 u4_src_variance;
376*c83a76b0SSuyog Pawar         UWORD32 u4_pred_variance;
377*c83a76b0SSuyog Pawar         WORD32 i4_mean;
378*c83a76b0SSuyog Pawar         WORD32 i4_noise_term;
379*c83a76b0SSuyog Pawar 
380*c83a76b0SSuyog Pawar         if(NULL_PLANE == e_chroma_plane)
381*c83a76b0SSuyog Pawar         {
382*c83a76b0SSuyog Pawar             ihevce_calc_variance(
383*c83a76b0SSuyog Pawar                 pv_src,
384*c83a76b0SSuyog Pawar                 i4_src_stride,
385*c83a76b0SSuyog Pawar                 &i4_mean,
386*c83a76b0SSuyog Pawar                 &u4_src_variance,
387*c83a76b0SSuyog Pawar                 u1_blk_size,
388*c83a76b0SSuyog Pawar                 u1_blk_size,
389*c83a76b0SSuyog Pawar                 u1_is_hbd,
390*c83a76b0SSuyog Pawar                 0);
391*c83a76b0SSuyog Pawar 
392*c83a76b0SSuyog Pawar             ihevce_calc_variance(
393*c83a76b0SSuyog Pawar                 pv_pred,
394*c83a76b0SSuyog Pawar                 i4_pred_stride,
395*c83a76b0SSuyog Pawar                 &i4_mean,
396*c83a76b0SSuyog Pawar                 &u4_pred_variance,
397*c83a76b0SSuyog Pawar                 u1_blk_size,
398*c83a76b0SSuyog Pawar                 u1_blk_size,
399*c83a76b0SSuyog Pawar                 u1_is_hbd,
400*c83a76b0SSuyog Pawar                 0);
401*c83a76b0SSuyog Pawar         }
402*c83a76b0SSuyog Pawar         else
403*c83a76b0SSuyog Pawar         {
404*c83a76b0SSuyog Pawar             ihevce_calc_chroma_variance(
405*c83a76b0SSuyog Pawar                 pv_src,
406*c83a76b0SSuyog Pawar                 i4_src_stride,
407*c83a76b0SSuyog Pawar                 &i4_mean,
408*c83a76b0SSuyog Pawar                 &u4_src_variance,
409*c83a76b0SSuyog Pawar                 u1_blk_size,
410*c83a76b0SSuyog Pawar                 u1_blk_size,
411*c83a76b0SSuyog Pawar                 u1_is_hbd,
412*c83a76b0SSuyog Pawar                 e_chroma_plane);
413*c83a76b0SSuyog Pawar 
414*c83a76b0SSuyog Pawar             ihevce_calc_chroma_variance(
415*c83a76b0SSuyog Pawar                 pv_pred,
416*c83a76b0SSuyog Pawar                 i4_pred_stride,
417*c83a76b0SSuyog Pawar                 &i4_mean,
418*c83a76b0SSuyog Pawar                 &u4_pred_variance,
419*c83a76b0SSuyog Pawar                 u1_blk_size,
420*c83a76b0SSuyog Pawar                 u1_blk_size,
421*c83a76b0SSuyog Pawar                 u1_is_hbd,
422*c83a76b0SSuyog Pawar                 e_chroma_plane);
423*c83a76b0SSuyog Pawar         }
424*c83a76b0SSuyog Pawar 
425*c83a76b0SSuyog Pawar         i4_noise_term =
426*c83a76b0SSuyog Pawar             ihevce_compute_noise_term(i4_alpha_stim_multiplier, u4_src_variance, u4_pred_variance);
427*c83a76b0SSuyog Pawar 
428*c83a76b0SSuyog Pawar         MULTIPLY_STIM_WITH_DISTORTION(i8_distortion, i4_noise_term, STIM_Q_FORMAT, ALPHA_Q_FORMAT);
429*c83a76b0SSuyog Pawar 
430*c83a76b0SSuyog Pawar         return i8_distortion;
431*c83a76b0SSuyog Pawar     }
432*c83a76b0SSuyog Pawar     else
433*c83a76b0SSuyog Pawar     {
434*c83a76b0SSuyog Pawar         return i8_distortion;
435*c83a76b0SSuyog Pawar     }
436*c83a76b0SSuyog Pawar }
437*c83a76b0SSuyog Pawar 
ihevce_determine_cu_noise_based_on_8x8Blk_data(UWORD8 * pu1_is_8x8Blk_noisy,UWORD8 u1_cu_x_pos,UWORD8 u1_cu_y_pos,UWORD8 u1_cu_size)438*c83a76b0SSuyog Pawar UWORD8 ihevce_determine_cu_noise_based_on_8x8Blk_data(
439*c83a76b0SSuyog Pawar     UWORD8 *pu1_is_8x8Blk_noisy, UWORD8 u1_cu_x_pos, UWORD8 u1_cu_y_pos, UWORD8 u1_cu_size)
440*c83a76b0SSuyog Pawar {
441*c83a76b0SSuyog Pawar     UWORD8 u1_num_noisy_children = 0;
442*c83a76b0SSuyog Pawar     UWORD8 u1_start_index = (u1_cu_x_pos / 8) + u1_cu_y_pos;
443*c83a76b0SSuyog Pawar 
444*c83a76b0SSuyog Pawar     if(8 == u1_cu_size)
445*c83a76b0SSuyog Pawar     {
446*c83a76b0SSuyog Pawar         return pu1_is_8x8Blk_noisy[u1_start_index];
447*c83a76b0SSuyog Pawar     }
448*c83a76b0SSuyog Pawar 
449*c83a76b0SSuyog Pawar     u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
450*c83a76b0SSuyog Pawar         pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos, u1_cu_size / 2);
451*c83a76b0SSuyog Pawar 
452*c83a76b0SSuyog Pawar     u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
453*c83a76b0SSuyog Pawar         pu1_is_8x8Blk_noisy, u1_cu_x_pos + (u1_cu_size / 2), u1_cu_y_pos, u1_cu_size / 2);
454*c83a76b0SSuyog Pawar 
455*c83a76b0SSuyog Pawar     u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
456*c83a76b0SSuyog Pawar         pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos + (u1_cu_size / 2), u1_cu_size / 2);
457*c83a76b0SSuyog Pawar 
458*c83a76b0SSuyog Pawar     u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
459*c83a76b0SSuyog Pawar         pu1_is_8x8Blk_noisy,
460*c83a76b0SSuyog Pawar         u1_cu_x_pos + (u1_cu_size / 2),
461*c83a76b0SSuyog Pawar         u1_cu_y_pos + (u1_cu_size / 2),
462*c83a76b0SSuyog Pawar         u1_cu_size / 2);
463*c83a76b0SSuyog Pawar 
464*c83a76b0SSuyog Pawar     return (u1_num_noisy_children >= 2);
465*c83a76b0SSuyog Pawar }
466*c83a76b0SSuyog Pawar 
467*c83a76b0SSuyog Pawar /*!
468*c83a76b0SSuyog Pawar ******************************************************************************
469*c83a76b0SSuyog Pawar * \if Function name : ihevce_psy_rd_cost_croma \endif
470*c83a76b0SSuyog Pawar *
471*c83a76b0SSuyog Pawar * \brief
472*c83a76b0SSuyog Pawar *    Calculates the psyco visual cost for RD opt. This is
473*c83a76b0SSuyog Pawar *
474*c83a76b0SSuyog Pawar * \param[in] pui4_source_satd
475*c83a76b0SSuyog Pawar *   This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated.
476*c83a76b0SSuyog Pawar * \param[in] *pui1_recon
477*c83a76b0SSuyog Pawar *   This si the pointer to the pred data.
478*c83a76b0SSuyog Pawar * \param[in] recon_stride
479*c83a76b0SSuyog Pawar *   This si the pred stride
480*c83a76b0SSuyog Pawar * \param[in] pic_type
481*c83a76b0SSuyog Pawar *   Picture type.
482*c83a76b0SSuyog Pawar * \param[in] layer_id
483*c83a76b0SSuyog Pawar *   Indicates the temporal layer.
484*c83a76b0SSuyog Pawar * \param[in] lambda
485*c83a76b0SSuyog Pawar *   This is the weighting factor for the cost.
486*c83a76b0SSuyog Pawar * \param[in] is_hbd
487*c83a76b0SSuyog Pawar *   This is the high bit depth flag which indicates if the bit depth of the pixels is 10 bit or 8 bit.
488*c83a76b0SSuyog Pawar * \param[in] sub_sampling_type
489*c83a76b0SSuyog Pawar *   This is the chroma subsampling type. 11 - for 420 and 13 for 422
490*c83a76b0SSuyog Pawar * \return
491*c83a76b0SSuyog Pawar *    the cost for the psyRDopt
492*c83a76b0SSuyog Pawar *
493*c83a76b0SSuyog Pawar * \author
494*c83a76b0SSuyog Pawar *  Ittiam
495*c83a76b0SSuyog Pawar *
496*c83a76b0SSuyog Pawar *****************************************************************************
497*c83a76b0SSuyog Pawar */
ihevce_psy_rd_cost_croma(LWORD64 * pui4_source_satd,void * p_recon,WORD32 recon_stride_vert,WORD32 recond_stride_horz,WORD32 cu_size_luma,WORD32 pic_type,WORD32 layer_id,WORD32 lambda,WORD32 start_index,WORD32 is_hbd,WORD32 sub_sampling_type,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list)498*c83a76b0SSuyog Pawar LWORD64 ihevce_psy_rd_cost_croma(
499*c83a76b0SSuyog Pawar     LWORD64 *pui4_source_satd,
500*c83a76b0SSuyog Pawar     void *p_recon,
501*c83a76b0SSuyog Pawar     WORD32 recon_stride_vert,
502*c83a76b0SSuyog Pawar     WORD32 recond_stride_horz,
503*c83a76b0SSuyog Pawar     WORD32 cu_size_luma,
504*c83a76b0SSuyog Pawar     WORD32 pic_type,
505*c83a76b0SSuyog Pawar     WORD32 layer_id,
506*c83a76b0SSuyog Pawar     WORD32 lambda,
507*c83a76b0SSuyog Pawar     WORD32 start_index,
508*c83a76b0SSuyog Pawar     WORD32 is_hbd,
509*c83a76b0SSuyog Pawar     WORD32 sub_sampling_type,
510*c83a76b0SSuyog Pawar     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
511*c83a76b0SSuyog Pawar {
512*c83a76b0SSuyog Pawar     /* declare local variables to store the SATD values for the pred  for the current block. */
513*c83a76b0SSuyog Pawar     LWORD64 psy_rd_cost;
514*c83a76b0SSuyog Pawar     UWORD32 lambda_mod;
515*c83a76b0SSuyog Pawar     WORD32 psy_factor;
516*c83a76b0SSuyog Pawar 
517*c83a76b0SSuyog Pawar     /* declare local variables */
518*c83a76b0SSuyog Pawar     WORD32 i;
519*c83a76b0SSuyog Pawar     WORD32 cu_total_size;
520*c83a76b0SSuyog Pawar     WORD32 num_comp_had_blocks;
521*c83a76b0SSuyog Pawar 
522*c83a76b0SSuyog Pawar     UWORD8 *pu1_l0_block;
523*c83a76b0SSuyog Pawar     UWORD8 *pu1_l0_block_prev;
524*c83a76b0SSuyog Pawar     UWORD8 *pu1_recon;
525*c83a76b0SSuyog Pawar     WORD32 ht_offset;
526*c83a76b0SSuyog Pawar     WORD32 wd_offset;
527*c83a76b0SSuyog Pawar     WORD32 cu_ht;
528*c83a76b0SSuyog Pawar     WORD32 cu_wd;
529*c83a76b0SSuyog Pawar 
530*c83a76b0SSuyog Pawar     WORD32 num_horz_blocks;
531*c83a76b0SSuyog Pawar 
532*c83a76b0SSuyog Pawar     WORD16 pi2_residue_had[64];
533*c83a76b0SSuyog Pawar     /* this is used as a buffer with all values equal to 0. This is emulate the case with
534*c83a76b0SSuyog Pawar        pred being zero in HAD fucntion */
535*c83a76b0SSuyog Pawar     UWORD8 ai1_zeros_buffer[64];
536*c83a76b0SSuyog Pawar 
537*c83a76b0SSuyog Pawar     WORD32 had_block_size;
538*c83a76b0SSuyog Pawar     LWORD64 source_satd;  // to hold source for current 8x8 block
539*c83a76b0SSuyog Pawar     LWORD64 recon_satd;  // holds the current recon 8x8 satd
540*c83a76b0SSuyog Pawar 
541*c83a76b0SSuyog Pawar     WORD32 index_for_src_satd;
542*c83a76b0SSuyog Pawar 
543*c83a76b0SSuyog Pawar     (void)recond_stride_horz;
544*c83a76b0SSuyog Pawar     (void)pic_type;
545*c83a76b0SSuyog Pawar     (void)layer_id;
546*c83a76b0SSuyog Pawar     if(!is_hbd)
547*c83a76b0SSuyog Pawar     {
548*c83a76b0SSuyog Pawar         pu1_recon = (UWORD8 *)p_recon;
549*c83a76b0SSuyog Pawar     }
550*c83a76b0SSuyog Pawar 
551*c83a76b0SSuyog Pawar     /**** initialize the variables ****/
552*c83a76b0SSuyog Pawar     had_block_size = 4;
553*c83a76b0SSuyog Pawar 
554*c83a76b0SSuyog Pawar     if(sub_sampling_type == 1)  // 420
555*c83a76b0SSuyog Pawar     {
556*c83a76b0SSuyog Pawar         cu_ht = cu_size_luma / 2;
557*c83a76b0SSuyog Pawar         cu_wd = cu_size_luma / 2;
558*c83a76b0SSuyog Pawar     }
559*c83a76b0SSuyog Pawar     else
560*c83a76b0SSuyog Pawar     {
561*c83a76b0SSuyog Pawar         cu_ht = cu_size_luma;
562*c83a76b0SSuyog Pawar         cu_wd = cu_size_luma / 2;
563*c83a76b0SSuyog Pawar     }
564*c83a76b0SSuyog Pawar 
565*c83a76b0SSuyog Pawar     num_horz_blocks = 2 * cu_wd / had_block_size;  //ctb_width / had_block_size;
566*c83a76b0SSuyog Pawar     ht_offset = -had_block_size;
567*c83a76b0SSuyog Pawar     wd_offset = 0;  //-had_block_size;
568*c83a76b0SSuyog Pawar 
569*c83a76b0SSuyog Pawar     cu_total_size = cu_ht * cu_wd;
570*c83a76b0SSuyog Pawar     num_comp_had_blocks = 2 * cu_total_size / (had_block_size * had_block_size);
571*c83a76b0SSuyog Pawar 
572*c83a76b0SSuyog Pawar     index_for_src_satd = start_index;
573*c83a76b0SSuyog Pawar 
574*c83a76b0SSuyog Pawar     for(i = 0; i < 64; i++)
575*c83a76b0SSuyog Pawar     {
576*c83a76b0SSuyog Pawar         ai1_zeros_buffer[i] = 0;
577*c83a76b0SSuyog Pawar     }
578*c83a76b0SSuyog Pawar 
579*c83a76b0SSuyog Pawar     psy_factor = PSY_STRENGTH_CHROMA;
580*c83a76b0SSuyog Pawar     psy_rd_cost = 0;
581*c83a76b0SSuyog Pawar     lambda_mod = lambda * psy_factor;
582*c83a76b0SSuyog Pawar 
583*c83a76b0SSuyog Pawar     /************************************************************/
584*c83a76b0SSuyog Pawar     /* loop over for every 4x4 blocks in the CU for Cb */
585*c83a76b0SSuyog Pawar     for(i = 0; i < num_comp_had_blocks; i++)
586*c83a76b0SSuyog Pawar     {
587*c83a76b0SSuyog Pawar         if(i % num_horz_blocks == 0)
588*c83a76b0SSuyog Pawar         {
589*c83a76b0SSuyog Pawar             wd_offset = -had_block_size;
590*c83a76b0SSuyog Pawar             ht_offset += had_block_size;
591*c83a76b0SSuyog Pawar         }
592*c83a76b0SSuyog Pawar         wd_offset += had_block_size;
593*c83a76b0SSuyog Pawar 
594*c83a76b0SSuyog Pawar         /* source satd for the current 8x8 block */
595*c83a76b0SSuyog Pawar         source_satd = pui4_source_satd[index_for_src_satd];
596*c83a76b0SSuyog Pawar 
597*c83a76b0SSuyog Pawar         if(i % 2 != 0)
598*c83a76b0SSuyog Pawar         {
599*c83a76b0SSuyog Pawar             if(!is_hbd)
600*c83a76b0SSuyog Pawar             {
601*c83a76b0SSuyog Pawar                 pu1_l0_block = pu1_l0_block_prev + 1;
602*c83a76b0SSuyog Pawar             }
603*c83a76b0SSuyog Pawar         }
604*c83a76b0SSuyog Pawar         else
605*c83a76b0SSuyog Pawar         {
606*c83a76b0SSuyog Pawar             if(!is_hbd)
607*c83a76b0SSuyog Pawar             {
608*c83a76b0SSuyog Pawar                 /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
609*c83a76b0SSuyog Pawar                 pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset;
610*c83a76b0SSuyog Pawar                 pu1_l0_block_prev = pu1_l0_block;
611*c83a76b0SSuyog Pawar             }
612*c83a76b0SSuyog Pawar         }
613*c83a76b0SSuyog Pawar 
614*c83a76b0SSuyog Pawar         if(had_block_size == 4)
615*c83a76b0SSuyog Pawar         {
616*c83a76b0SSuyog Pawar             if(!is_hbd)
617*c83a76b0SSuyog Pawar             {
618*c83a76b0SSuyog Pawar                 recon_satd = ps_cmn_utils_optimised_function_list->pf_chroma_AC_HAD_4x4_8bit(
619*c83a76b0SSuyog Pawar                     pu1_l0_block,
620*c83a76b0SSuyog Pawar                     recon_stride_vert,
621*c83a76b0SSuyog Pawar                     ai1_zeros_buffer,
622*c83a76b0SSuyog Pawar                     had_block_size,
623*c83a76b0SSuyog Pawar                     pi2_residue_had,
624*c83a76b0SSuyog Pawar                     had_block_size);
625*c83a76b0SSuyog Pawar             }
626*c83a76b0SSuyog Pawar 
627*c83a76b0SSuyog Pawar             /* get the additional cost function based on the absolute SATD diff of source and recon. */
628*c83a76b0SSuyog Pawar             psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd));
629*c83a76b0SSuyog Pawar 
630*c83a76b0SSuyog Pawar             index_for_src_satd++;
631*c83a76b0SSuyog Pawar 
632*c83a76b0SSuyog Pawar             if((i % num_horz_blocks) == (num_horz_blocks - 1))
633*c83a76b0SSuyog Pawar             {
634*c83a76b0SSuyog Pawar                 index_for_src_satd -= num_horz_blocks;
635*c83a76b0SSuyog Pawar                 index_for_src_satd +=
636*c83a76b0SSuyog Pawar                     (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */
637*c83a76b0SSuyog Pawar             }
638*c83a76b0SSuyog Pawar 
639*c83a76b0SSuyog Pawar         }  // if had block size ==4
640*c83a76b0SSuyog Pawar     }  // for loop for all 4x4 block in the cu
641*c83a76b0SSuyog Pawar 
642*c83a76b0SSuyog Pawar     psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH_CHROMA + LAMBDA_Q_SHIFT);
643*c83a76b0SSuyog Pawar     /* reutrn the additional cost for the psy RD opt */
644*c83a76b0SSuyog Pawar     return (psy_rd_cost);
645*c83a76b0SSuyog Pawar }
646*c83a76b0SSuyog Pawar 
647*c83a76b0SSuyog Pawar /*!
648*c83a76b0SSuyog Pawar ******************************************************************************
649*c83a76b0SSuyog Pawar * \if Function name : ihevce_psy_rd_cost \endif
650*c83a76b0SSuyog Pawar *
651*c83a76b0SSuyog Pawar * \brief
652*c83a76b0SSuyog Pawar *    Calculates the psyco visual cost for RD opt. This is
653*c83a76b0SSuyog Pawar *
654*c83a76b0SSuyog Pawar * \param[in] pui4_source_satd
655*c83a76b0SSuyog Pawar *   This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated.
656*c83a76b0SSuyog Pawar * \param[in] *pui1_recon
657*c83a76b0SSuyog Pawar *   This si the pointer to the pred data.
658*c83a76b0SSuyog Pawar * \param[in] recon_stride
659*c83a76b0SSuyog Pawar *   This si the pred stride
660*c83a76b0SSuyog Pawar * \param[in] pic_type
661*c83a76b0SSuyog Pawar *   Picture type.
662*c83a76b0SSuyog Pawar * \param[in] layer_id
663*c83a76b0SSuyog Pawar *   Indicates the temporal layer.
664*c83a76b0SSuyog Pawar * \param[in] lambda
665*c83a76b0SSuyog Pawar *   This is the weighting factor for the cost.
666*c83a76b0SSuyog Pawar *
667*c83a76b0SSuyog Pawar * \return
668*c83a76b0SSuyog Pawar *    the cost for the psyRDopt
669*c83a76b0SSuyog Pawar *
670*c83a76b0SSuyog Pawar * \author
671*c83a76b0SSuyog Pawar *  Ittiam
672*c83a76b0SSuyog Pawar *
673*c83a76b0SSuyog Pawar *****************************************************************************
674*c83a76b0SSuyog Pawar */
ihevce_psy_rd_cost(LWORD64 * pui4_source_satd,void * pv_recon,WORD32 recon_stride_vert,WORD32 recond_stride_horz,WORD32 cu_size,WORD32 pic_type,WORD32 layer_id,WORD32 lambda,WORD32 start_index,WORD32 is_hbd,UWORD32 u4_psy_strength,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list)675*c83a76b0SSuyog Pawar LWORD64 ihevce_psy_rd_cost(
676*c83a76b0SSuyog Pawar     LWORD64 *pui4_source_satd,
677*c83a76b0SSuyog Pawar     void *pv_recon,
678*c83a76b0SSuyog Pawar     WORD32 recon_stride_vert,
679*c83a76b0SSuyog Pawar     WORD32 recond_stride_horz,
680*c83a76b0SSuyog Pawar     WORD32 cu_size,
681*c83a76b0SSuyog Pawar     WORD32 pic_type,
682*c83a76b0SSuyog Pawar     WORD32 layer_id,
683*c83a76b0SSuyog Pawar     WORD32 lambda,
684*c83a76b0SSuyog Pawar     WORD32 start_index,
685*c83a76b0SSuyog Pawar     WORD32 is_hbd,
686*c83a76b0SSuyog Pawar     UWORD32 u4_psy_strength,
687*c83a76b0SSuyog Pawar     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
688*c83a76b0SSuyog Pawar {
689*c83a76b0SSuyog Pawar     /* declare local variables to store the SATD values for the pred  for the current block. */
690*c83a76b0SSuyog Pawar     LWORD64 psy_rd_cost;  // TODO : check if overflow is there.
691*c83a76b0SSuyog Pawar     UWORD32 lambda_mod;
692*c83a76b0SSuyog Pawar     WORD32 psy_factor;
693*c83a76b0SSuyog Pawar 
694*c83a76b0SSuyog Pawar     /* declare local variables */
695*c83a76b0SSuyog Pawar     WORD32 i;
696*c83a76b0SSuyog Pawar     WORD32 cu_total_size;
697*c83a76b0SSuyog Pawar     WORD32 num_comp_had_blocks;
698*c83a76b0SSuyog Pawar 
699*c83a76b0SSuyog Pawar     UWORD8 *pu1_l0_block;
700*c83a76b0SSuyog Pawar     UWORD8 *pu1_recon;
701*c83a76b0SSuyog Pawar 
702*c83a76b0SSuyog Pawar     WORD32 ht_offset;
703*c83a76b0SSuyog Pawar     WORD32 wd_offset;
704*c83a76b0SSuyog Pawar     WORD32 cu_ht;
705*c83a76b0SSuyog Pawar     WORD32 cu_wd;
706*c83a76b0SSuyog Pawar 
707*c83a76b0SSuyog Pawar     WORD32 num_horz_blocks;
708*c83a76b0SSuyog Pawar 
709*c83a76b0SSuyog Pawar     //WORD16 pi2_residue_had[64];
710*c83a76b0SSuyog Pawar     WORD16 pi2_residue_had_zscan[64];
711*c83a76b0SSuyog Pawar     //WORD16 pi2_residue[64];
712*c83a76b0SSuyog Pawar     /* this is used as a buffer with all values equal to 0. This is emulate the case with
713*c83a76b0SSuyog Pawar        pred being zero in HAD fucntion */
714*c83a76b0SSuyog Pawar     UWORD8 ai1_zeros_buffer[64];
715*c83a76b0SSuyog Pawar 
716*c83a76b0SSuyog Pawar     WORD32 had_block_size;
717*c83a76b0SSuyog Pawar     LWORD64 source_satd;  // to hold source for current 8x8 block
718*c83a76b0SSuyog Pawar     LWORD64 recon_satd;  // holds the current recon 8x8 satd
719*c83a76b0SSuyog Pawar 
720*c83a76b0SSuyog Pawar     WORD32 index_for_src_satd;
721*c83a76b0SSuyog Pawar 
722*c83a76b0SSuyog Pawar     (void)recond_stride_horz;
723*c83a76b0SSuyog Pawar     (void)pic_type;
724*c83a76b0SSuyog Pawar     (void)layer_id;
725*c83a76b0SSuyog Pawar     /***** initialize the variables ****/
726*c83a76b0SSuyog Pawar     had_block_size = 8;
727*c83a76b0SSuyog Pawar     cu_ht = cu_size;
728*c83a76b0SSuyog Pawar     cu_wd = cu_size;
729*c83a76b0SSuyog Pawar 
730*c83a76b0SSuyog Pawar     num_horz_blocks = cu_wd / had_block_size;  //ctb_width / had_block_size;
731*c83a76b0SSuyog Pawar 
732*c83a76b0SSuyog Pawar     ht_offset = -had_block_size;
733*c83a76b0SSuyog Pawar     wd_offset = 0 - had_block_size;
734*c83a76b0SSuyog Pawar 
735*c83a76b0SSuyog Pawar     cu_total_size = cu_ht * cu_wd;
736*c83a76b0SSuyog Pawar     num_comp_had_blocks = cu_total_size / (had_block_size * had_block_size);
737*c83a76b0SSuyog Pawar 
738*c83a76b0SSuyog Pawar     index_for_src_satd = start_index;
739*c83a76b0SSuyog Pawar 
740*c83a76b0SSuyog Pawar     for(i = 0; i < 64; i++)
741*c83a76b0SSuyog Pawar     {
742*c83a76b0SSuyog Pawar         ai1_zeros_buffer[i] = 0;
743*c83a76b0SSuyog Pawar     }
744*c83a76b0SSuyog Pawar     psy_factor = u4_psy_strength;  //PSY_STRENGTH;
745*c83a76b0SSuyog Pawar     psy_rd_cost = 0;
746*c83a76b0SSuyog Pawar     lambda_mod = lambda * psy_factor;
747*c83a76b0SSuyog Pawar 
748*c83a76b0SSuyog Pawar     if(!is_hbd)
749*c83a76b0SSuyog Pawar     {
750*c83a76b0SSuyog Pawar         pu1_recon = (UWORD8 *)pv_recon;
751*c83a76b0SSuyog Pawar     }
752*c83a76b0SSuyog Pawar 
753*c83a76b0SSuyog Pawar     /**************************************************************/
754*c83a76b0SSuyog Pawar     /* loop over for every 8x8 blocks in the CU */
755*c83a76b0SSuyog Pawar     for(i = 0; i < num_comp_had_blocks; i++)
756*c83a76b0SSuyog Pawar     {
757*c83a76b0SSuyog Pawar         if(i % num_horz_blocks == 0)
758*c83a76b0SSuyog Pawar         {
759*c83a76b0SSuyog Pawar             wd_offset = -had_block_size;
760*c83a76b0SSuyog Pawar             ht_offset += had_block_size;
761*c83a76b0SSuyog Pawar         }
762*c83a76b0SSuyog Pawar         wd_offset += had_block_size;
763*c83a76b0SSuyog Pawar 
764*c83a76b0SSuyog Pawar         /* source satd for the current 8x8 block */
765*c83a76b0SSuyog Pawar         source_satd = pui4_source_satd[index_for_src_satd];
766*c83a76b0SSuyog Pawar 
767*c83a76b0SSuyog Pawar         if(had_block_size == 8)
768*c83a76b0SSuyog Pawar         {
769*c83a76b0SSuyog Pawar             //WORD32 index;
770*c83a76b0SSuyog Pawar             //WORD32 u4_satd;
771*c83a76b0SSuyog Pawar             //WORD32 dst_strd = 8;
772*c83a76b0SSuyog Pawar             //WORD32 i4_frm_qstep = 0;
773*c83a76b0SSuyog Pawar             //WORD32 early_cbf;
774*c83a76b0SSuyog Pawar             if(!is_hbd)
775*c83a76b0SSuyog Pawar             {
776*c83a76b0SSuyog Pawar                 /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
777*c83a76b0SSuyog Pawar                 pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset;
778*c83a76b0SSuyog Pawar 
779*c83a76b0SSuyog Pawar                 recon_satd = ps_cmn_utils_optimised_function_list->pf_AC_HAD_8x8_8bit(
780*c83a76b0SSuyog Pawar                     pu1_l0_block,
781*c83a76b0SSuyog Pawar                     recon_stride_vert,
782*c83a76b0SSuyog Pawar                     ai1_zeros_buffer,
783*c83a76b0SSuyog Pawar                     had_block_size,
784*c83a76b0SSuyog Pawar                     pi2_residue_had_zscan,
785*c83a76b0SSuyog Pawar                     had_block_size);
786*c83a76b0SSuyog Pawar             }
787*c83a76b0SSuyog Pawar 
788*c83a76b0SSuyog Pawar             /* get the additional cost function based on the absolute SATD diff of source and recon. */
789*c83a76b0SSuyog Pawar             psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd));
790*c83a76b0SSuyog Pawar 
791*c83a76b0SSuyog Pawar             index_for_src_satd++;
792*c83a76b0SSuyog Pawar             if((i % num_horz_blocks) == (num_horz_blocks - 1))
793*c83a76b0SSuyog Pawar             {
794*c83a76b0SSuyog Pawar                 index_for_src_satd -= num_horz_blocks;
795*c83a76b0SSuyog Pawar                 index_for_src_satd +=
796*c83a76b0SSuyog Pawar                     (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */
797*c83a76b0SSuyog Pawar             }
798*c83a76b0SSuyog Pawar         }  // if
799*c83a76b0SSuyog Pawar     }  // for loop
800*c83a76b0SSuyog Pawar     psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH + LAMBDA_Q_SHIFT);
801*c83a76b0SSuyog Pawar 
802*c83a76b0SSuyog Pawar     /* reutrn the additional cost for the psy RD opt */
803*c83a76b0SSuyog Pawar     return (psy_rd_cost);
804*c83a76b0SSuyog Pawar }
805*c83a76b0SSuyog Pawar 
ihevce_calc_stim_injected_variance(ULWORD64 * pu8_sigmaX,ULWORD64 * pu8_sigmaXSquared,ULWORD64 * u8_var,WORD32 i4_inv_wpred_wt,WORD32 i4_inv_wt_shift_val,WORD32 i4_wpred_log_wdc,WORD32 i4_part_id)806*c83a76b0SSuyog Pawar unsigned long ihevce_calc_stim_injected_variance(
807*c83a76b0SSuyog Pawar     ULWORD64 *pu8_sigmaX,
808*c83a76b0SSuyog Pawar     ULWORD64 *pu8_sigmaXSquared,
809*c83a76b0SSuyog Pawar     ULWORD64 *u8_var,
810*c83a76b0SSuyog Pawar     WORD32 i4_inv_wpred_wt,
811*c83a76b0SSuyog Pawar     WORD32 i4_inv_wt_shift_val,
812*c83a76b0SSuyog Pawar     WORD32 i4_wpred_log_wdc,
813*c83a76b0SSuyog Pawar     WORD32 i4_part_id)
814*c83a76b0SSuyog Pawar {
815*c83a76b0SSuyog Pawar     ULWORD64 u8_X_Square, u8_temp_var;
816*c83a76b0SSuyog Pawar     WORD32 i4_bits_req;
817*c83a76b0SSuyog Pawar 
818*c83a76b0SSuyog Pawar     const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
819*c83a76b0SSuyog Pawar 
820*c83a76b0SSuyog Pawar     u8_X_Square = (pu8_sigmaX[i4_part_id] * pu8_sigmaX[i4_part_id]);
821*c83a76b0SSuyog Pawar     u8_temp_var = pu8_sigmaXSquared[i4_part_id] - u8_X_Square;
822*c83a76b0SSuyog Pawar 
823*c83a76b0SSuyog Pawar     if(i4_inv_wpred_wt != i4_default_src_wt)
824*c83a76b0SSuyog Pawar     {
825*c83a76b0SSuyog Pawar         i4_inv_wpred_wt = i4_inv_wpred_wt >> i4_inv_wt_shift_val;
826*c83a76b0SSuyog Pawar 
827*c83a76b0SSuyog Pawar         u8_temp_var = SHR_NEG(
828*c83a76b0SSuyog Pawar             (u8_temp_var * i4_inv_wpred_wt * i4_inv_wpred_wt),
829*c83a76b0SSuyog Pawar             (30 - (2 * i4_inv_wt_shift_val) - i4_wpred_log_wdc * 2));
830*c83a76b0SSuyog Pawar     }
831*c83a76b0SSuyog Pawar 
832*c83a76b0SSuyog Pawar     GETRANGE64(i4_bits_req, u8_temp_var);
833*c83a76b0SSuyog Pawar 
834*c83a76b0SSuyog Pawar     if(i4_bits_req > 27)
835*c83a76b0SSuyog Pawar     {
836*c83a76b0SSuyog Pawar         *u8_var = u8_temp_var >> (i4_bits_req - 27);
837*c83a76b0SSuyog Pawar         return (i4_bits_req - 27);
838*c83a76b0SSuyog Pawar     }
839*c83a76b0SSuyog Pawar     else
840*c83a76b0SSuyog Pawar     {
841*c83a76b0SSuyog Pawar         *u8_var = u8_temp_var;
842*c83a76b0SSuyog Pawar         return 0;
843*c83a76b0SSuyog Pawar     }
844*c83a76b0SSuyog Pawar }
845*c83a76b0SSuyog Pawar 
ihevce_calc_variance_for_diff_weights(ULWORD64 * pu8_sigmaX,ULWORD64 * pu8_sigmaXSquared,ULWORD64 * u8_var,WORD32 * pi4_inv_wt,WORD32 * pi4_inv_wt_shift_val,pu_result_t * ps_result,WORD32 i4_wpred_log_wdc,PART_ID_T * pe_part_id,UWORD8 u1_cu_size,UWORD8 u1_num_parts,UWORD8 u1_is_for_src)846*c83a76b0SSuyog Pawar unsigned long ihevce_calc_variance_for_diff_weights(
847*c83a76b0SSuyog Pawar     ULWORD64 *pu8_sigmaX,
848*c83a76b0SSuyog Pawar     ULWORD64 *pu8_sigmaXSquared,
849*c83a76b0SSuyog Pawar     ULWORD64 *u8_var,
850*c83a76b0SSuyog Pawar     WORD32 *pi4_inv_wt,
851*c83a76b0SSuyog Pawar     WORD32 *pi4_inv_wt_shift_val,
852*c83a76b0SSuyog Pawar     pu_result_t *ps_result,
853*c83a76b0SSuyog Pawar     WORD32 i4_wpred_log_wdc,
854*c83a76b0SSuyog Pawar     PART_ID_T *pe_part_id,
855*c83a76b0SSuyog Pawar     UWORD8 u1_cu_size,
856*c83a76b0SSuyog Pawar     UWORD8 u1_num_parts,
857*c83a76b0SSuyog Pawar     UWORD8 u1_is_for_src)
858*c83a76b0SSuyog Pawar {
859*c83a76b0SSuyog Pawar     WORD32 i4_k;
860*c83a76b0SSuyog Pawar     UWORD32 u4_wd, u4_ht;
861*c83a76b0SSuyog Pawar     UWORD8 u1_num_base_blks;
862*c83a76b0SSuyog Pawar     UWORD32 u4_num_pixels_in_part;
863*c83a76b0SSuyog Pawar     UWORD8 u1_index;
864*c83a76b0SSuyog Pawar     WORD32 i4_bits_req;
865*c83a76b0SSuyog Pawar 
866*c83a76b0SSuyog Pawar     UWORD8 u1_base_blk_size = 4;
867*c83a76b0SSuyog Pawar     UWORD32 u4_tot_num_pixels = u1_cu_size * u1_cu_size;
868*c83a76b0SSuyog Pawar     ULWORD64 u8_temp_sigmaX[MAX_NUM_INTER_PARTS] = { 0, 0 };
869*c83a76b0SSuyog Pawar     ULWORD64 u8_temp_sigmaXsquared[MAX_NUM_INTER_PARTS] = { 0, 0 };
870*c83a76b0SSuyog Pawar     ULWORD64 u8_z;
871*c83a76b0SSuyog Pawar 
872*c83a76b0SSuyog Pawar     const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
873*c83a76b0SSuyog Pawar 
874*c83a76b0SSuyog Pawar     for(i4_k = 0; i4_k < u1_num_parts; i4_k++)
875*c83a76b0SSuyog Pawar     {
876*c83a76b0SSuyog Pawar         u4_wd = ps_result[i4_k].pu.b4_wd + 1;
877*c83a76b0SSuyog Pawar         u4_ht = ps_result[i4_k].pu.b4_ht + 1;
878*c83a76b0SSuyog Pawar         u1_num_base_blks = u4_wd * u4_ht;
879*c83a76b0SSuyog Pawar         u4_num_pixels_in_part = u1_num_base_blks * u1_base_blk_size * u1_base_blk_size;
880*c83a76b0SSuyog Pawar 
881*c83a76b0SSuyog Pawar         if(u1_is_for_src)
882*c83a76b0SSuyog Pawar         {
883*c83a76b0SSuyog Pawar             u1_index = pe_part_id[i4_k];
884*c83a76b0SSuyog Pawar         }
885*c83a76b0SSuyog Pawar         else
886*c83a76b0SSuyog Pawar         {
887*c83a76b0SSuyog Pawar             u1_index = i4_k;
888*c83a76b0SSuyog Pawar         }
889*c83a76b0SSuyog Pawar 
890*c83a76b0SSuyog Pawar         u8_temp_sigmaXsquared[i4_k] = pu8_sigmaXSquared[u1_index] / u4_num_pixels_in_part;
891*c83a76b0SSuyog Pawar         u8_temp_sigmaX[i4_k] = pu8_sigmaX[u1_index];
892*c83a76b0SSuyog Pawar 
893*c83a76b0SSuyog Pawar         if(u1_is_for_src)
894*c83a76b0SSuyog Pawar         {
895*c83a76b0SSuyog Pawar             if(pi4_inv_wt[i4_k] != i4_default_src_wt)
896*c83a76b0SSuyog Pawar             {
897*c83a76b0SSuyog Pawar                 pi4_inv_wt[i4_k] = pi4_inv_wt[i4_k] >> pi4_inv_wt_shift_val[i4_k];
898*c83a76b0SSuyog Pawar                 u8_temp_sigmaX[i4_k] = SHR_NEG(
899*c83a76b0SSuyog Pawar                     (u8_temp_sigmaX[i4_k] * pi4_inv_wt[i4_k]),
900*c83a76b0SSuyog Pawar                     (15 - pi4_inv_wt_shift_val[i4_k] - i4_wpred_log_wdc));
901*c83a76b0SSuyog Pawar                 u8_temp_sigmaXsquared[i4_k] = SHR_NEG(
902*c83a76b0SSuyog Pawar                     (u8_temp_sigmaXsquared[i4_k] * pi4_inv_wt[i4_k] * pi4_inv_wt[i4_k]),
903*c83a76b0SSuyog Pawar                     (30 - (2 * pi4_inv_wt_shift_val[i4_k]) - i4_wpred_log_wdc * 2));
904*c83a76b0SSuyog Pawar             }
905*c83a76b0SSuyog Pawar         }
906*c83a76b0SSuyog Pawar     }
907*c83a76b0SSuyog Pawar 
908*c83a76b0SSuyog Pawar     u8_z = (u4_tot_num_pixels * (u8_temp_sigmaXsquared[0] + u8_temp_sigmaXsquared[1])) -
909*c83a76b0SSuyog Pawar            ((u8_temp_sigmaX[0] + u8_temp_sigmaX[1]) * (u8_temp_sigmaX[0] + u8_temp_sigmaX[1]));
910*c83a76b0SSuyog Pawar 
911*c83a76b0SSuyog Pawar     GETRANGE64(i4_bits_req, u8_z);
912*c83a76b0SSuyog Pawar 
913*c83a76b0SSuyog Pawar     if(i4_bits_req > 27)
914*c83a76b0SSuyog Pawar     {
915*c83a76b0SSuyog Pawar         *u8_var = u8_z >> (i4_bits_req - 27);
916*c83a76b0SSuyog Pawar         return (i4_bits_req - 27);
917*c83a76b0SSuyog Pawar     }
918*c83a76b0SSuyog Pawar     else
919*c83a76b0SSuyog Pawar     {
920*c83a76b0SSuyog Pawar         *u8_var = u8_z;
921*c83a76b0SSuyog Pawar         return 0;
922*c83a76b0SSuyog Pawar     }
923*c83a76b0SSuyog Pawar }
924