1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar *****************************************************************************
18*c83a76b0SSuyog Pawar * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar /**
21*c83a76b0SSuyog Pawar *******************************************************************************
22*c83a76b0SSuyog Pawar * @file
23*c83a76b0SSuyog Pawar * ihevce_stasino_helpers.c
24*c83a76b0SSuyog Pawar *
25*c83a76b0SSuyog Pawar * @brief
26*c83a76b0SSuyog Pawar *
27*c83a76b0SSuyog Pawar * @author
28*c83a76b0SSuyog Pawar * Ittiam
29*c83a76b0SSuyog Pawar *
30*c83a76b0SSuyog Pawar * @par List of Functions:
31*c83a76b0SSuyog Pawar *
32*c83a76b0SSuyog Pawar * @remarks
33*c83a76b0SSuyog Pawar * None
34*c83a76b0SSuyog Pawar *
35*c83a76b0SSuyog Pawar *******************************************************************************
36*c83a76b0SSuyog Pawar */
37*c83a76b0SSuyog Pawar
38*c83a76b0SSuyog Pawar /*****************************************************************************/
39*c83a76b0SSuyog Pawar /* File Includes */
40*c83a76b0SSuyog Pawar /*****************************************************************************/
41*c83a76b0SSuyog Pawar /* System include files */
42*c83a76b0SSuyog Pawar #include <stdio.h>
43*c83a76b0SSuyog Pawar #include <stdlib.h>
44*c83a76b0SSuyog Pawar #include <assert.h>
45*c83a76b0SSuyog Pawar #include <string.h>
46*c83a76b0SSuyog Pawar
47*c83a76b0SSuyog Pawar /* User include files */
48*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
49*c83a76b0SSuyog Pawar #include "itt_video_api.h"
50*c83a76b0SSuyog Pawar #include "ihevce_api.h"
51*c83a76b0SSuyog Pawar
52*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
53*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
54*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
55*c83a76b0SSuyog Pawar
56*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
57*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
58*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
59*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
60*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
61*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
62*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
63*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
64*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
65*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
66*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
67*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
68*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
69*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
70*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
71*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
72*c83a76b0SSuyog Pawar
73*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
74*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
75*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
76*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
77*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
78*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
79*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
80*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
81*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
82*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
83*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
84*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
85*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
86*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
87*c83a76b0SSuyog Pawar #include "ihevce_stasino_helpers.h"
88*c83a76b0SSuyog Pawar
89*c83a76b0SSuyog Pawar /*****************************************************************************/
90*c83a76b0SSuyog Pawar /* Function Definitions */
91*c83a76b0SSuyog Pawar /*****************************************************************************/
92*c83a76b0SSuyog Pawar
93*c83a76b0SSuyog Pawar /**
94*c83a76b0SSuyog Pawar *******************************************************************************
95*c83a76b0SSuyog Pawar *
96*c83a76b0SSuyog Pawar * @brief
97*c83a76b0SSuyog Pawar * This function calculates the variance of given data set.
98*c83a76b0SSuyog Pawar *
99*c83a76b0SSuyog Pawar * @par Description:
100*c83a76b0SSuyog Pawar * This function is mainly used to find the variance of the block of pixel values.
101*c83a76b0SSuyog Pawar * The block can be rectangular also. Single pass variance calculation
102*c83a76b0SSuyog Pawar * implementation.
103*c83a76b0SSuyog Pawar *
104*c83a76b0SSuyog Pawar * @param[in] p_input
105*c83a76b0SSuyog Pawar * The input buffer to calculate the variance.
106*c83a76b0SSuyog Pawar *
107*c83a76b0SSuyog Pawar * @param[out] pi4_mean
108*c83a76b0SSuyog Pawar * Pointer ot the mean of the datset
109*c83a76b0SSuyog Pawar *
110*c83a76b0SSuyog Pawar * @param[out] pi4_variance
111*c83a76b0SSuyog Pawar * Pointer tot he variabce of the data set
112*c83a76b0SSuyog Pawar *
113*c83a76b0SSuyog Pawar * @param[in] u1_is_hbd
114*c83a76b0SSuyog Pawar * 1 if the data is in high bit depth
115*c83a76b0SSuyog Pawar *
116*c83a76b0SSuyog Pawar * @param[in] stride
117*c83a76b0SSuyog Pawar * Stride for the input buffer
118*c83a76b0SSuyog Pawar *
119*c83a76b0SSuyog Pawar * @param[in] block_height
120*c83a76b0SSuyog Pawar * height of the pixel block
121*c83a76b0SSuyog Pawar *
122*c83a76b0SSuyog Pawar * @param[in] block_width
123*c83a76b0SSuyog Pawar * width of the pixel block
124*c83a76b0SSuyog Pawar *
125*c83a76b0SSuyog Pawar * @remarks
126*c83a76b0SSuyog Pawar * None
127*c83a76b0SSuyog Pawar *
128*c83a76b0SSuyog Pawar *******************************************************************************
129*c83a76b0SSuyog Pawar */
ihevce_calc_variance(void * pv_input,WORD32 i4_stride,WORD32 * pi4_mean,UWORD32 * pu4_variance,UWORD8 u1_block_height,UWORD8 u1_block_width,UWORD8 u1_is_hbd,UWORD8 u1_disable_normalization)130*c83a76b0SSuyog Pawar void ihevce_calc_variance(
131*c83a76b0SSuyog Pawar void *pv_input,
132*c83a76b0SSuyog Pawar WORD32 i4_stride,
133*c83a76b0SSuyog Pawar WORD32 *pi4_mean,
134*c83a76b0SSuyog Pawar UWORD32 *pu4_variance,
135*c83a76b0SSuyog Pawar UWORD8 u1_block_height,
136*c83a76b0SSuyog Pawar UWORD8 u1_block_width,
137*c83a76b0SSuyog Pawar UWORD8 u1_is_hbd,
138*c83a76b0SSuyog Pawar UWORD8 u1_disable_normalization)
139*c83a76b0SSuyog Pawar {
140*c83a76b0SSuyog Pawar UWORD8 *pui1_buffer; // pointer for 8 bit usecase
141*c83a76b0SSuyog Pawar WORD32 i, j;
142*c83a76b0SSuyog Pawar WORD32 total_elements;
143*c83a76b0SSuyog Pawar
144*c83a76b0SSuyog Pawar LWORD64 mean;
145*c83a76b0SSuyog Pawar ULWORD64 variance;
146*c83a76b0SSuyog Pawar ULWORD64 sum;
147*c83a76b0SSuyog Pawar ULWORD64 sq_sum;
148*c83a76b0SSuyog Pawar
149*c83a76b0SSuyog Pawar /* intialisation */
150*c83a76b0SSuyog Pawar total_elements = u1_block_height * u1_block_width;
151*c83a76b0SSuyog Pawar mean = 0;
152*c83a76b0SSuyog Pawar variance = 0;
153*c83a76b0SSuyog Pawar sum = 0;
154*c83a76b0SSuyog Pawar sq_sum = 0;
155*c83a76b0SSuyog Pawar
156*c83a76b0SSuyog Pawar /* handle the case of 8/10 bit depth separately */
157*c83a76b0SSuyog Pawar if(!u1_is_hbd)
158*c83a76b0SSuyog Pawar {
159*c83a76b0SSuyog Pawar pui1_buffer = (UWORD8 *)pv_input;
160*c83a76b0SSuyog Pawar
161*c83a76b0SSuyog Pawar /* loop over all the values in the block */
162*c83a76b0SSuyog Pawar for(i = 0; i < u1_block_height; i++)
163*c83a76b0SSuyog Pawar {
164*c83a76b0SSuyog Pawar /* loop over a row in the block */
165*c83a76b0SSuyog Pawar for(j = 0; j < u1_block_width; j++)
166*c83a76b0SSuyog Pawar {
167*c83a76b0SSuyog Pawar sum += pui1_buffer[i * i4_stride + j];
168*c83a76b0SSuyog Pawar sq_sum += (pui1_buffer[i * i4_stride + j] * pui1_buffer[i * i4_stride + j]);
169*c83a76b0SSuyog Pawar }
170*c83a76b0SSuyog Pawar }
171*c83a76b0SSuyog Pawar
172*c83a76b0SSuyog Pawar if(!u1_disable_normalization)
173*c83a76b0SSuyog Pawar {
174*c83a76b0SSuyog Pawar mean = sum / total_elements;
175*c83a76b0SSuyog Pawar variance =
176*c83a76b0SSuyog Pawar ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements));
177*c83a76b0SSuyog Pawar }
178*c83a76b0SSuyog Pawar else
179*c83a76b0SSuyog Pawar {
180*c83a76b0SSuyog Pawar mean = sum;
181*c83a76b0SSuyog Pawar variance = ((total_elements * sq_sum) - (sum * sum));
182*c83a76b0SSuyog Pawar }
183*c83a76b0SSuyog Pawar }
184*c83a76b0SSuyog Pawar
185*c83a76b0SSuyog Pawar /* copy back the values to the output variables */
186*c83a76b0SSuyog Pawar *pi4_mean = mean;
187*c83a76b0SSuyog Pawar *pu4_variance = variance;
188*c83a76b0SSuyog Pawar }
189*c83a76b0SSuyog Pawar
190*c83a76b0SSuyog Pawar /**
191*c83a76b0SSuyog Pawar *******************************************************************************
192*c83a76b0SSuyog Pawar *
193*c83a76b0SSuyog Pawar * @brief
194*c83a76b0SSuyog Pawar * This function calcluates the variance of given data set which is WORD16
195*c83a76b0SSuyog Pawar *
196*c83a76b0SSuyog Pawar * @par Description:
197*c83a76b0SSuyog Pawar * This function is mainly used to find the variance of the block of pixel values.
198*c83a76b0SSuyog Pawar * Single pass variance calculation implementation.
199*c83a76b0SSuyog Pawar *
200*c83a76b0SSuyog Pawar * @param[in] pv_input
201*c83a76b0SSuyog Pawar * The input buffer to calculate the variance.
202*c83a76b0SSuyog Pawar *
203*c83a76b0SSuyog Pawar *
204*c83a76b0SSuyog Pawar * @param[in] stride
205*c83a76b0SSuyog Pawar * Stride for the input buffer
206*c83a76b0SSuyog Pawar *
207*c83a76b0SSuyog Pawar * @param[out] pi4_mean
208*c83a76b0SSuyog Pawar * Pointer ot the mean of the datset
209*c83a76b0SSuyog Pawar *
210*c83a76b0SSuyog Pawar * @param[out] pi4_variance
211*c83a76b0SSuyog Pawar * Pointer tot he variabce of the data set
212*c83a76b0SSuyog Pawar *
213*c83a76b0SSuyog Pawar * @param[in] block_height
214*c83a76b0SSuyog Pawar * height of the pixel block
215*c83a76b0SSuyog Pawar *
216*c83a76b0SSuyog Pawar * @param[in] block_width
217*c83a76b0SSuyog Pawar * width of the pixel block
218*c83a76b0SSuyog Pawar *
219*c83a76b0SSuyog Pawar *
220*c83a76b0SSuyog Pawar * @remarks
221*c83a76b0SSuyog Pawar * None
222*c83a76b0SSuyog Pawar *
223*c83a76b0SSuyog Pawar *******************************************************************************/
ihevce_calc_variance_signed(WORD16 * pv_input,WORD32 i4_stride,WORD32 * pi4_mean,UWORD32 * pu4_variance,UWORD8 u1_block_height,UWORD8 u1_block_width)224*c83a76b0SSuyog Pawar void ihevce_calc_variance_signed(
225*c83a76b0SSuyog Pawar WORD16 *pv_input,
226*c83a76b0SSuyog Pawar WORD32 i4_stride,
227*c83a76b0SSuyog Pawar WORD32 *pi4_mean,
228*c83a76b0SSuyog Pawar UWORD32 *pu4_variance,
229*c83a76b0SSuyog Pawar UWORD8 u1_block_height,
230*c83a76b0SSuyog Pawar UWORD8 u1_block_width)
231*c83a76b0SSuyog Pawar {
232*c83a76b0SSuyog Pawar WORD16 *pi2_buffer; // poinbter for 10 bit use case
233*c83a76b0SSuyog Pawar
234*c83a76b0SSuyog Pawar WORD32 i, j;
235*c83a76b0SSuyog Pawar WORD32 total_elements;
236*c83a76b0SSuyog Pawar
237*c83a76b0SSuyog Pawar LWORD64 mean;
238*c83a76b0SSuyog Pawar LWORD64 variance;
239*c83a76b0SSuyog Pawar LWORD64 sum;
240*c83a76b0SSuyog Pawar LWORD64 sq_sum;
241*c83a76b0SSuyog Pawar
242*c83a76b0SSuyog Pawar /* intialisation */
243*c83a76b0SSuyog Pawar total_elements = u1_block_height * u1_block_width;
244*c83a76b0SSuyog Pawar mean = 0;
245*c83a76b0SSuyog Pawar variance = 0;
246*c83a76b0SSuyog Pawar sum = 0;
247*c83a76b0SSuyog Pawar sq_sum = 0;
248*c83a76b0SSuyog Pawar
249*c83a76b0SSuyog Pawar pi2_buffer = pv_input;
250*c83a76b0SSuyog Pawar
251*c83a76b0SSuyog Pawar for(i = 0; i < u1_block_height; i++)
252*c83a76b0SSuyog Pawar {
253*c83a76b0SSuyog Pawar for(j = 0; j < u1_block_width; j++)
254*c83a76b0SSuyog Pawar {
255*c83a76b0SSuyog Pawar sum += pi2_buffer[i * i4_stride + j];
256*c83a76b0SSuyog Pawar sq_sum += (pi2_buffer[i * i4_stride + j] * pi2_buffer[i * i4_stride + j]);
257*c83a76b0SSuyog Pawar }
258*c83a76b0SSuyog Pawar }
259*c83a76b0SSuyog Pawar
260*c83a76b0SSuyog Pawar mean = sum; /// total_elements;
261*c83a76b0SSuyog Pawar variance = ((total_elements * sq_sum) - (sum * sum)); // / (total_elements * (total_elements) )
262*c83a76b0SSuyog Pawar
263*c83a76b0SSuyog Pawar /* copy back the values to the output variables */
264*c83a76b0SSuyog Pawar *pi4_mean = mean;
265*c83a76b0SSuyog Pawar *pu4_variance = variance;
266*c83a76b0SSuyog Pawar }
267*c83a76b0SSuyog Pawar
268*c83a76b0SSuyog Pawar /**
269*c83a76b0SSuyog Pawar *******************************************************************************
270*c83a76b0SSuyog Pawar *
271*c83a76b0SSuyog Pawar * @brief
272*c83a76b0SSuyog Pawar * This function calculates the variance of a chrominance plane for 420SP data
273*c83a76b0SSuyog Pawar *
274*c83a76b0SSuyog Pawar * @par Description:
275*c83a76b0SSuyog Pawar * This function is mainly used to find the variance of the block of pixel values.
276*c83a76b0SSuyog Pawar * The block can be rectangular also. Single pass variance calculation
277*c83a76b0SSuyog Pawar * implementation.
278*c83a76b0SSuyog Pawar *
279*c83a76b0SSuyog Pawar * @param[in] p_input
280*c83a76b0SSuyog Pawar * The input buffer to calculate the variance.
281*c83a76b0SSuyog Pawar *
282*c83a76b0SSuyog Pawar * @param[in] stride
283*c83a76b0SSuyog Pawar * Stride for the input buffer
284*c83a76b0SSuyog Pawar *
285*c83a76b0SSuyog Pawar * @param[out] pi4_mean
286*c83a76b0SSuyog Pawar * Pointer ot the mean of the datset
287*c83a76b0SSuyog Pawar *
288*c83a76b0SSuyog Pawar * @param[out] pi4_variance
289*c83a76b0SSuyog Pawar * Pointer tot he variabce of the data set
290*c83a76b0SSuyog Pawar *
291*c83a76b0SSuyog Pawar * @param[in] block_height
292*c83a76b0SSuyog Pawar * height of the pixel block
293*c83a76b0SSuyog Pawar *
294*c83a76b0SSuyog Pawar * @param[in] block_width
295*c83a76b0SSuyog Pawar * width of the pixel block
296*c83a76b0SSuyog Pawar *
297*c83a76b0SSuyog Pawar * @param[in] u1_is_hbd
298*c83a76b0SSuyog Pawar * 1 if the data is in high bit depth
299*c83a76b0SSuyog Pawar *
300*c83a76b0SSuyog Pawar * @param[in] e_chroma_plane
301*c83a76b0SSuyog Pawar * is U or V
302*c83a76b0SSuyog Pawar *
303*c83a76b0SSuyog Pawar * @remarks
304*c83a76b0SSuyog Pawar * None
305*c83a76b0SSuyog Pawar *
306*c83a76b0SSuyog Pawar *******************************************************************************
307*c83a76b0SSuyog Pawar */
ihevce_calc_chroma_variance(void * pv_input,WORD32 i4_stride,WORD32 * pi4_mean,UWORD32 * pu4_variance,UWORD8 u1_block_height,UWORD8 u1_block_width,UWORD8 u1_is_hbd,CHROMA_PLANE_ID_T e_chroma_plane)308*c83a76b0SSuyog Pawar void ihevce_calc_chroma_variance(
309*c83a76b0SSuyog Pawar void *pv_input,
310*c83a76b0SSuyog Pawar WORD32 i4_stride,
311*c83a76b0SSuyog Pawar WORD32 *pi4_mean,
312*c83a76b0SSuyog Pawar UWORD32 *pu4_variance,
313*c83a76b0SSuyog Pawar UWORD8 u1_block_height,
314*c83a76b0SSuyog Pawar UWORD8 u1_block_width,
315*c83a76b0SSuyog Pawar UWORD8 u1_is_hbd,
316*c83a76b0SSuyog Pawar CHROMA_PLANE_ID_T e_chroma_plane)
317*c83a76b0SSuyog Pawar {
318*c83a76b0SSuyog Pawar UWORD8 *pui1_buffer; // pointer for 8 bit usecase
319*c83a76b0SSuyog Pawar WORD32 i, j;
320*c83a76b0SSuyog Pawar WORD32 total_elements;
321*c83a76b0SSuyog Pawar
322*c83a76b0SSuyog Pawar LWORD64 mean;
323*c83a76b0SSuyog Pawar ULWORD64 variance;
324*c83a76b0SSuyog Pawar LWORD64 sum;
325*c83a76b0SSuyog Pawar LWORD64 sq_sum;
326*c83a76b0SSuyog Pawar
327*c83a76b0SSuyog Pawar /* intialisation */
328*c83a76b0SSuyog Pawar total_elements = u1_block_height * u1_block_width;
329*c83a76b0SSuyog Pawar mean = 0;
330*c83a76b0SSuyog Pawar variance = 0;
331*c83a76b0SSuyog Pawar sum = 0;
332*c83a76b0SSuyog Pawar sq_sum = 0;
333*c83a76b0SSuyog Pawar
334*c83a76b0SSuyog Pawar /* handle the case of 8/10 bit depth separately */
335*c83a76b0SSuyog Pawar if(!u1_is_hbd)
336*c83a76b0SSuyog Pawar {
337*c83a76b0SSuyog Pawar pui1_buffer = (UWORD8 *)pv_input;
338*c83a76b0SSuyog Pawar
339*c83a76b0SSuyog Pawar pui1_buffer += e_chroma_plane;
340*c83a76b0SSuyog Pawar
341*c83a76b0SSuyog Pawar /* loop over all the values in the block */
342*c83a76b0SSuyog Pawar for(i = 0; i < u1_block_height; i++)
343*c83a76b0SSuyog Pawar {
344*c83a76b0SSuyog Pawar /* loop over a row in the block */
345*c83a76b0SSuyog Pawar for(j = 0; j < u1_block_width; j++)
346*c83a76b0SSuyog Pawar {
347*c83a76b0SSuyog Pawar sum += pui1_buffer[i * i4_stride + j * 2];
348*c83a76b0SSuyog Pawar sq_sum += (pui1_buffer[i * i4_stride + j * 2] * pui1_buffer[i * i4_stride + j * 2]);
349*c83a76b0SSuyog Pawar }
350*c83a76b0SSuyog Pawar }
351*c83a76b0SSuyog Pawar
352*c83a76b0SSuyog Pawar mean = sum / total_elements;
353*c83a76b0SSuyog Pawar variance = ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements));
354*c83a76b0SSuyog Pawar }
355*c83a76b0SSuyog Pawar
356*c83a76b0SSuyog Pawar /* copy back the values to the output variables */
357*c83a76b0SSuyog Pawar *pi4_mean = mean;
358*c83a76b0SSuyog Pawar *pu4_variance = variance;
359*c83a76b0SSuyog Pawar }
360*c83a76b0SSuyog Pawar
ihevce_inject_stim_into_distortion(void * pv_src,WORD32 i4_src_stride,void * pv_pred,WORD32 i4_pred_stride,LWORD64 i8_distortion,WORD32 i4_alpha_stim_multiplier,UWORD8 u1_blk_size,UWORD8 u1_is_hbd,UWORD8 u1_enable_psyRDOPT,CHROMA_PLANE_ID_T e_chroma_plane)361*c83a76b0SSuyog Pawar LWORD64 ihevce_inject_stim_into_distortion(
362*c83a76b0SSuyog Pawar void *pv_src,
363*c83a76b0SSuyog Pawar WORD32 i4_src_stride,
364*c83a76b0SSuyog Pawar void *pv_pred,
365*c83a76b0SSuyog Pawar WORD32 i4_pred_stride,
366*c83a76b0SSuyog Pawar LWORD64 i8_distortion,
367*c83a76b0SSuyog Pawar WORD32 i4_alpha_stim_multiplier,
368*c83a76b0SSuyog Pawar UWORD8 u1_blk_size,
369*c83a76b0SSuyog Pawar UWORD8 u1_is_hbd,
370*c83a76b0SSuyog Pawar UWORD8 u1_enable_psyRDOPT,
371*c83a76b0SSuyog Pawar CHROMA_PLANE_ID_T e_chroma_plane)
372*c83a76b0SSuyog Pawar {
373*c83a76b0SSuyog Pawar if(!u1_enable_psyRDOPT)
374*c83a76b0SSuyog Pawar {
375*c83a76b0SSuyog Pawar UWORD32 u4_src_variance;
376*c83a76b0SSuyog Pawar UWORD32 u4_pred_variance;
377*c83a76b0SSuyog Pawar WORD32 i4_mean;
378*c83a76b0SSuyog Pawar WORD32 i4_noise_term;
379*c83a76b0SSuyog Pawar
380*c83a76b0SSuyog Pawar if(NULL_PLANE == e_chroma_plane)
381*c83a76b0SSuyog Pawar {
382*c83a76b0SSuyog Pawar ihevce_calc_variance(
383*c83a76b0SSuyog Pawar pv_src,
384*c83a76b0SSuyog Pawar i4_src_stride,
385*c83a76b0SSuyog Pawar &i4_mean,
386*c83a76b0SSuyog Pawar &u4_src_variance,
387*c83a76b0SSuyog Pawar u1_blk_size,
388*c83a76b0SSuyog Pawar u1_blk_size,
389*c83a76b0SSuyog Pawar u1_is_hbd,
390*c83a76b0SSuyog Pawar 0);
391*c83a76b0SSuyog Pawar
392*c83a76b0SSuyog Pawar ihevce_calc_variance(
393*c83a76b0SSuyog Pawar pv_pred,
394*c83a76b0SSuyog Pawar i4_pred_stride,
395*c83a76b0SSuyog Pawar &i4_mean,
396*c83a76b0SSuyog Pawar &u4_pred_variance,
397*c83a76b0SSuyog Pawar u1_blk_size,
398*c83a76b0SSuyog Pawar u1_blk_size,
399*c83a76b0SSuyog Pawar u1_is_hbd,
400*c83a76b0SSuyog Pawar 0);
401*c83a76b0SSuyog Pawar }
402*c83a76b0SSuyog Pawar else
403*c83a76b0SSuyog Pawar {
404*c83a76b0SSuyog Pawar ihevce_calc_chroma_variance(
405*c83a76b0SSuyog Pawar pv_src,
406*c83a76b0SSuyog Pawar i4_src_stride,
407*c83a76b0SSuyog Pawar &i4_mean,
408*c83a76b0SSuyog Pawar &u4_src_variance,
409*c83a76b0SSuyog Pawar u1_blk_size,
410*c83a76b0SSuyog Pawar u1_blk_size,
411*c83a76b0SSuyog Pawar u1_is_hbd,
412*c83a76b0SSuyog Pawar e_chroma_plane);
413*c83a76b0SSuyog Pawar
414*c83a76b0SSuyog Pawar ihevce_calc_chroma_variance(
415*c83a76b0SSuyog Pawar pv_pred,
416*c83a76b0SSuyog Pawar i4_pred_stride,
417*c83a76b0SSuyog Pawar &i4_mean,
418*c83a76b0SSuyog Pawar &u4_pred_variance,
419*c83a76b0SSuyog Pawar u1_blk_size,
420*c83a76b0SSuyog Pawar u1_blk_size,
421*c83a76b0SSuyog Pawar u1_is_hbd,
422*c83a76b0SSuyog Pawar e_chroma_plane);
423*c83a76b0SSuyog Pawar }
424*c83a76b0SSuyog Pawar
425*c83a76b0SSuyog Pawar i4_noise_term =
426*c83a76b0SSuyog Pawar ihevce_compute_noise_term(i4_alpha_stim_multiplier, u4_src_variance, u4_pred_variance);
427*c83a76b0SSuyog Pawar
428*c83a76b0SSuyog Pawar MULTIPLY_STIM_WITH_DISTORTION(i8_distortion, i4_noise_term, STIM_Q_FORMAT, ALPHA_Q_FORMAT);
429*c83a76b0SSuyog Pawar
430*c83a76b0SSuyog Pawar return i8_distortion;
431*c83a76b0SSuyog Pawar }
432*c83a76b0SSuyog Pawar else
433*c83a76b0SSuyog Pawar {
434*c83a76b0SSuyog Pawar return i8_distortion;
435*c83a76b0SSuyog Pawar }
436*c83a76b0SSuyog Pawar }
437*c83a76b0SSuyog Pawar
ihevce_determine_cu_noise_based_on_8x8Blk_data(UWORD8 * pu1_is_8x8Blk_noisy,UWORD8 u1_cu_x_pos,UWORD8 u1_cu_y_pos,UWORD8 u1_cu_size)438*c83a76b0SSuyog Pawar UWORD8 ihevce_determine_cu_noise_based_on_8x8Blk_data(
439*c83a76b0SSuyog Pawar UWORD8 *pu1_is_8x8Blk_noisy, UWORD8 u1_cu_x_pos, UWORD8 u1_cu_y_pos, UWORD8 u1_cu_size)
440*c83a76b0SSuyog Pawar {
441*c83a76b0SSuyog Pawar UWORD8 u1_num_noisy_children = 0;
442*c83a76b0SSuyog Pawar UWORD8 u1_start_index = (u1_cu_x_pos / 8) + u1_cu_y_pos;
443*c83a76b0SSuyog Pawar
444*c83a76b0SSuyog Pawar if(8 == u1_cu_size)
445*c83a76b0SSuyog Pawar {
446*c83a76b0SSuyog Pawar return pu1_is_8x8Blk_noisy[u1_start_index];
447*c83a76b0SSuyog Pawar }
448*c83a76b0SSuyog Pawar
449*c83a76b0SSuyog Pawar u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
450*c83a76b0SSuyog Pawar pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos, u1_cu_size / 2);
451*c83a76b0SSuyog Pawar
452*c83a76b0SSuyog Pawar u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
453*c83a76b0SSuyog Pawar pu1_is_8x8Blk_noisy, u1_cu_x_pos + (u1_cu_size / 2), u1_cu_y_pos, u1_cu_size / 2);
454*c83a76b0SSuyog Pawar
455*c83a76b0SSuyog Pawar u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
456*c83a76b0SSuyog Pawar pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos + (u1_cu_size / 2), u1_cu_size / 2);
457*c83a76b0SSuyog Pawar
458*c83a76b0SSuyog Pawar u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
459*c83a76b0SSuyog Pawar pu1_is_8x8Blk_noisy,
460*c83a76b0SSuyog Pawar u1_cu_x_pos + (u1_cu_size / 2),
461*c83a76b0SSuyog Pawar u1_cu_y_pos + (u1_cu_size / 2),
462*c83a76b0SSuyog Pawar u1_cu_size / 2);
463*c83a76b0SSuyog Pawar
464*c83a76b0SSuyog Pawar return (u1_num_noisy_children >= 2);
465*c83a76b0SSuyog Pawar }
466*c83a76b0SSuyog Pawar
467*c83a76b0SSuyog Pawar /*!
468*c83a76b0SSuyog Pawar ******************************************************************************
469*c83a76b0SSuyog Pawar * \if Function name : ihevce_psy_rd_cost_croma \endif
470*c83a76b0SSuyog Pawar *
471*c83a76b0SSuyog Pawar * \brief
472*c83a76b0SSuyog Pawar * Calculates the psyco visual cost for RD opt. This is
473*c83a76b0SSuyog Pawar *
474*c83a76b0SSuyog Pawar * \param[in] pui4_source_satd
475*c83a76b0SSuyog Pawar * This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated.
476*c83a76b0SSuyog Pawar * \param[in] *pui1_recon
477*c83a76b0SSuyog Pawar * This si the pointer to the pred data.
478*c83a76b0SSuyog Pawar * \param[in] recon_stride
479*c83a76b0SSuyog Pawar * This si the pred stride
480*c83a76b0SSuyog Pawar * \param[in] pic_type
481*c83a76b0SSuyog Pawar * Picture type.
482*c83a76b0SSuyog Pawar * \param[in] layer_id
483*c83a76b0SSuyog Pawar * Indicates the temporal layer.
484*c83a76b0SSuyog Pawar * \param[in] lambda
485*c83a76b0SSuyog Pawar * This is the weighting factor for the cost.
486*c83a76b0SSuyog Pawar * \param[in] is_hbd
487*c83a76b0SSuyog Pawar * This is the high bit depth flag which indicates if the bit depth of the pixels is 10 bit or 8 bit.
488*c83a76b0SSuyog Pawar * \param[in] sub_sampling_type
489*c83a76b0SSuyog Pawar * This is the chroma subsampling type. 11 - for 420 and 13 for 422
490*c83a76b0SSuyog Pawar * \return
491*c83a76b0SSuyog Pawar * the cost for the psyRDopt
492*c83a76b0SSuyog Pawar *
493*c83a76b0SSuyog Pawar * \author
494*c83a76b0SSuyog Pawar * Ittiam
495*c83a76b0SSuyog Pawar *
496*c83a76b0SSuyog Pawar *****************************************************************************
497*c83a76b0SSuyog Pawar */
ihevce_psy_rd_cost_croma(LWORD64 * pui4_source_satd,void * p_recon,WORD32 recon_stride_vert,WORD32 recond_stride_horz,WORD32 cu_size_luma,WORD32 pic_type,WORD32 layer_id,WORD32 lambda,WORD32 start_index,WORD32 is_hbd,WORD32 sub_sampling_type,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list)498*c83a76b0SSuyog Pawar LWORD64 ihevce_psy_rd_cost_croma(
499*c83a76b0SSuyog Pawar LWORD64 *pui4_source_satd,
500*c83a76b0SSuyog Pawar void *p_recon,
501*c83a76b0SSuyog Pawar WORD32 recon_stride_vert,
502*c83a76b0SSuyog Pawar WORD32 recond_stride_horz,
503*c83a76b0SSuyog Pawar WORD32 cu_size_luma,
504*c83a76b0SSuyog Pawar WORD32 pic_type,
505*c83a76b0SSuyog Pawar WORD32 layer_id,
506*c83a76b0SSuyog Pawar WORD32 lambda,
507*c83a76b0SSuyog Pawar WORD32 start_index,
508*c83a76b0SSuyog Pawar WORD32 is_hbd,
509*c83a76b0SSuyog Pawar WORD32 sub_sampling_type,
510*c83a76b0SSuyog Pawar ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
511*c83a76b0SSuyog Pawar {
512*c83a76b0SSuyog Pawar /* declare local variables to store the SATD values for the pred for the current block. */
513*c83a76b0SSuyog Pawar LWORD64 psy_rd_cost;
514*c83a76b0SSuyog Pawar UWORD32 lambda_mod;
515*c83a76b0SSuyog Pawar WORD32 psy_factor;
516*c83a76b0SSuyog Pawar
517*c83a76b0SSuyog Pawar /* declare local variables */
518*c83a76b0SSuyog Pawar WORD32 i;
519*c83a76b0SSuyog Pawar WORD32 cu_total_size;
520*c83a76b0SSuyog Pawar WORD32 num_comp_had_blocks;
521*c83a76b0SSuyog Pawar
522*c83a76b0SSuyog Pawar UWORD8 *pu1_l0_block;
523*c83a76b0SSuyog Pawar UWORD8 *pu1_l0_block_prev;
524*c83a76b0SSuyog Pawar UWORD8 *pu1_recon;
525*c83a76b0SSuyog Pawar WORD32 ht_offset;
526*c83a76b0SSuyog Pawar WORD32 wd_offset;
527*c83a76b0SSuyog Pawar WORD32 cu_ht;
528*c83a76b0SSuyog Pawar WORD32 cu_wd;
529*c83a76b0SSuyog Pawar
530*c83a76b0SSuyog Pawar WORD32 num_horz_blocks;
531*c83a76b0SSuyog Pawar
532*c83a76b0SSuyog Pawar WORD16 pi2_residue_had[64];
533*c83a76b0SSuyog Pawar /* this is used as a buffer with all values equal to 0. This is emulate the case with
534*c83a76b0SSuyog Pawar pred being zero in HAD fucntion */
535*c83a76b0SSuyog Pawar UWORD8 ai1_zeros_buffer[64];
536*c83a76b0SSuyog Pawar
537*c83a76b0SSuyog Pawar WORD32 had_block_size;
538*c83a76b0SSuyog Pawar LWORD64 source_satd; // to hold source for current 8x8 block
539*c83a76b0SSuyog Pawar LWORD64 recon_satd; // holds the current recon 8x8 satd
540*c83a76b0SSuyog Pawar
541*c83a76b0SSuyog Pawar WORD32 index_for_src_satd;
542*c83a76b0SSuyog Pawar
543*c83a76b0SSuyog Pawar (void)recond_stride_horz;
544*c83a76b0SSuyog Pawar (void)pic_type;
545*c83a76b0SSuyog Pawar (void)layer_id;
546*c83a76b0SSuyog Pawar if(!is_hbd)
547*c83a76b0SSuyog Pawar {
548*c83a76b0SSuyog Pawar pu1_recon = (UWORD8 *)p_recon;
549*c83a76b0SSuyog Pawar }
550*c83a76b0SSuyog Pawar
551*c83a76b0SSuyog Pawar /**** initialize the variables ****/
552*c83a76b0SSuyog Pawar had_block_size = 4;
553*c83a76b0SSuyog Pawar
554*c83a76b0SSuyog Pawar if(sub_sampling_type == 1) // 420
555*c83a76b0SSuyog Pawar {
556*c83a76b0SSuyog Pawar cu_ht = cu_size_luma / 2;
557*c83a76b0SSuyog Pawar cu_wd = cu_size_luma / 2;
558*c83a76b0SSuyog Pawar }
559*c83a76b0SSuyog Pawar else
560*c83a76b0SSuyog Pawar {
561*c83a76b0SSuyog Pawar cu_ht = cu_size_luma;
562*c83a76b0SSuyog Pawar cu_wd = cu_size_luma / 2;
563*c83a76b0SSuyog Pawar }
564*c83a76b0SSuyog Pawar
565*c83a76b0SSuyog Pawar num_horz_blocks = 2 * cu_wd / had_block_size; //ctb_width / had_block_size;
566*c83a76b0SSuyog Pawar ht_offset = -had_block_size;
567*c83a76b0SSuyog Pawar wd_offset = 0; //-had_block_size;
568*c83a76b0SSuyog Pawar
569*c83a76b0SSuyog Pawar cu_total_size = cu_ht * cu_wd;
570*c83a76b0SSuyog Pawar num_comp_had_blocks = 2 * cu_total_size / (had_block_size * had_block_size);
571*c83a76b0SSuyog Pawar
572*c83a76b0SSuyog Pawar index_for_src_satd = start_index;
573*c83a76b0SSuyog Pawar
574*c83a76b0SSuyog Pawar for(i = 0; i < 64; i++)
575*c83a76b0SSuyog Pawar {
576*c83a76b0SSuyog Pawar ai1_zeros_buffer[i] = 0;
577*c83a76b0SSuyog Pawar }
578*c83a76b0SSuyog Pawar
579*c83a76b0SSuyog Pawar psy_factor = PSY_STRENGTH_CHROMA;
580*c83a76b0SSuyog Pawar psy_rd_cost = 0;
581*c83a76b0SSuyog Pawar lambda_mod = lambda * psy_factor;
582*c83a76b0SSuyog Pawar
583*c83a76b0SSuyog Pawar /************************************************************/
584*c83a76b0SSuyog Pawar /* loop over for every 4x4 blocks in the CU for Cb */
585*c83a76b0SSuyog Pawar for(i = 0; i < num_comp_had_blocks; i++)
586*c83a76b0SSuyog Pawar {
587*c83a76b0SSuyog Pawar if(i % num_horz_blocks == 0)
588*c83a76b0SSuyog Pawar {
589*c83a76b0SSuyog Pawar wd_offset = -had_block_size;
590*c83a76b0SSuyog Pawar ht_offset += had_block_size;
591*c83a76b0SSuyog Pawar }
592*c83a76b0SSuyog Pawar wd_offset += had_block_size;
593*c83a76b0SSuyog Pawar
594*c83a76b0SSuyog Pawar /* source satd for the current 8x8 block */
595*c83a76b0SSuyog Pawar source_satd = pui4_source_satd[index_for_src_satd];
596*c83a76b0SSuyog Pawar
597*c83a76b0SSuyog Pawar if(i % 2 != 0)
598*c83a76b0SSuyog Pawar {
599*c83a76b0SSuyog Pawar if(!is_hbd)
600*c83a76b0SSuyog Pawar {
601*c83a76b0SSuyog Pawar pu1_l0_block = pu1_l0_block_prev + 1;
602*c83a76b0SSuyog Pawar }
603*c83a76b0SSuyog Pawar }
604*c83a76b0SSuyog Pawar else
605*c83a76b0SSuyog Pawar {
606*c83a76b0SSuyog Pawar if(!is_hbd)
607*c83a76b0SSuyog Pawar {
608*c83a76b0SSuyog Pawar /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
609*c83a76b0SSuyog Pawar pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset;
610*c83a76b0SSuyog Pawar pu1_l0_block_prev = pu1_l0_block;
611*c83a76b0SSuyog Pawar }
612*c83a76b0SSuyog Pawar }
613*c83a76b0SSuyog Pawar
614*c83a76b0SSuyog Pawar if(had_block_size == 4)
615*c83a76b0SSuyog Pawar {
616*c83a76b0SSuyog Pawar if(!is_hbd)
617*c83a76b0SSuyog Pawar {
618*c83a76b0SSuyog Pawar recon_satd = ps_cmn_utils_optimised_function_list->pf_chroma_AC_HAD_4x4_8bit(
619*c83a76b0SSuyog Pawar pu1_l0_block,
620*c83a76b0SSuyog Pawar recon_stride_vert,
621*c83a76b0SSuyog Pawar ai1_zeros_buffer,
622*c83a76b0SSuyog Pawar had_block_size,
623*c83a76b0SSuyog Pawar pi2_residue_had,
624*c83a76b0SSuyog Pawar had_block_size);
625*c83a76b0SSuyog Pawar }
626*c83a76b0SSuyog Pawar
627*c83a76b0SSuyog Pawar /* get the additional cost function based on the absolute SATD diff of source and recon. */
628*c83a76b0SSuyog Pawar psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd));
629*c83a76b0SSuyog Pawar
630*c83a76b0SSuyog Pawar index_for_src_satd++;
631*c83a76b0SSuyog Pawar
632*c83a76b0SSuyog Pawar if((i % num_horz_blocks) == (num_horz_blocks - 1))
633*c83a76b0SSuyog Pawar {
634*c83a76b0SSuyog Pawar index_for_src_satd -= num_horz_blocks;
635*c83a76b0SSuyog Pawar index_for_src_satd +=
636*c83a76b0SSuyog Pawar (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */
637*c83a76b0SSuyog Pawar }
638*c83a76b0SSuyog Pawar
639*c83a76b0SSuyog Pawar } // if had block size ==4
640*c83a76b0SSuyog Pawar } // for loop for all 4x4 block in the cu
641*c83a76b0SSuyog Pawar
642*c83a76b0SSuyog Pawar psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH_CHROMA + LAMBDA_Q_SHIFT);
643*c83a76b0SSuyog Pawar /* reutrn the additional cost for the psy RD opt */
644*c83a76b0SSuyog Pawar return (psy_rd_cost);
645*c83a76b0SSuyog Pawar }
646*c83a76b0SSuyog Pawar
647*c83a76b0SSuyog Pawar /*!
648*c83a76b0SSuyog Pawar ******************************************************************************
649*c83a76b0SSuyog Pawar * \if Function name : ihevce_psy_rd_cost \endif
650*c83a76b0SSuyog Pawar *
651*c83a76b0SSuyog Pawar * \brief
652*c83a76b0SSuyog Pawar * Calculates the psyco visual cost for RD opt. This is
653*c83a76b0SSuyog Pawar *
654*c83a76b0SSuyog Pawar * \param[in] pui4_source_satd
655*c83a76b0SSuyog Pawar * This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated.
656*c83a76b0SSuyog Pawar * \param[in] *pui1_recon
657*c83a76b0SSuyog Pawar * This si the pointer to the pred data.
658*c83a76b0SSuyog Pawar * \param[in] recon_stride
659*c83a76b0SSuyog Pawar * This si the pred stride
660*c83a76b0SSuyog Pawar * \param[in] pic_type
661*c83a76b0SSuyog Pawar * Picture type.
662*c83a76b0SSuyog Pawar * \param[in] layer_id
663*c83a76b0SSuyog Pawar * Indicates the temporal layer.
664*c83a76b0SSuyog Pawar * \param[in] lambda
665*c83a76b0SSuyog Pawar * This is the weighting factor for the cost.
666*c83a76b0SSuyog Pawar *
667*c83a76b0SSuyog Pawar * \return
668*c83a76b0SSuyog Pawar * the cost for the psyRDopt
669*c83a76b0SSuyog Pawar *
670*c83a76b0SSuyog Pawar * \author
671*c83a76b0SSuyog Pawar * Ittiam
672*c83a76b0SSuyog Pawar *
673*c83a76b0SSuyog Pawar *****************************************************************************
674*c83a76b0SSuyog Pawar */
ihevce_psy_rd_cost(LWORD64 * pui4_source_satd,void * pv_recon,WORD32 recon_stride_vert,WORD32 recond_stride_horz,WORD32 cu_size,WORD32 pic_type,WORD32 layer_id,WORD32 lambda,WORD32 start_index,WORD32 is_hbd,UWORD32 u4_psy_strength,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list)675*c83a76b0SSuyog Pawar LWORD64 ihevce_psy_rd_cost(
676*c83a76b0SSuyog Pawar LWORD64 *pui4_source_satd,
677*c83a76b0SSuyog Pawar void *pv_recon,
678*c83a76b0SSuyog Pawar WORD32 recon_stride_vert,
679*c83a76b0SSuyog Pawar WORD32 recond_stride_horz,
680*c83a76b0SSuyog Pawar WORD32 cu_size,
681*c83a76b0SSuyog Pawar WORD32 pic_type,
682*c83a76b0SSuyog Pawar WORD32 layer_id,
683*c83a76b0SSuyog Pawar WORD32 lambda,
684*c83a76b0SSuyog Pawar WORD32 start_index,
685*c83a76b0SSuyog Pawar WORD32 is_hbd,
686*c83a76b0SSuyog Pawar UWORD32 u4_psy_strength,
687*c83a76b0SSuyog Pawar ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
688*c83a76b0SSuyog Pawar {
689*c83a76b0SSuyog Pawar /* declare local variables to store the SATD values for the pred for the current block. */
690*c83a76b0SSuyog Pawar LWORD64 psy_rd_cost; // TODO : check if overflow is there.
691*c83a76b0SSuyog Pawar UWORD32 lambda_mod;
692*c83a76b0SSuyog Pawar WORD32 psy_factor;
693*c83a76b0SSuyog Pawar
694*c83a76b0SSuyog Pawar /* declare local variables */
695*c83a76b0SSuyog Pawar WORD32 i;
696*c83a76b0SSuyog Pawar WORD32 cu_total_size;
697*c83a76b0SSuyog Pawar WORD32 num_comp_had_blocks;
698*c83a76b0SSuyog Pawar
699*c83a76b0SSuyog Pawar UWORD8 *pu1_l0_block;
700*c83a76b0SSuyog Pawar UWORD8 *pu1_recon;
701*c83a76b0SSuyog Pawar
702*c83a76b0SSuyog Pawar WORD32 ht_offset;
703*c83a76b0SSuyog Pawar WORD32 wd_offset;
704*c83a76b0SSuyog Pawar WORD32 cu_ht;
705*c83a76b0SSuyog Pawar WORD32 cu_wd;
706*c83a76b0SSuyog Pawar
707*c83a76b0SSuyog Pawar WORD32 num_horz_blocks;
708*c83a76b0SSuyog Pawar
709*c83a76b0SSuyog Pawar //WORD16 pi2_residue_had[64];
710*c83a76b0SSuyog Pawar WORD16 pi2_residue_had_zscan[64];
711*c83a76b0SSuyog Pawar //WORD16 pi2_residue[64];
712*c83a76b0SSuyog Pawar /* this is used as a buffer with all values equal to 0. This is emulate the case with
713*c83a76b0SSuyog Pawar pred being zero in HAD fucntion */
714*c83a76b0SSuyog Pawar UWORD8 ai1_zeros_buffer[64];
715*c83a76b0SSuyog Pawar
716*c83a76b0SSuyog Pawar WORD32 had_block_size;
717*c83a76b0SSuyog Pawar LWORD64 source_satd; // to hold source for current 8x8 block
718*c83a76b0SSuyog Pawar LWORD64 recon_satd; // holds the current recon 8x8 satd
719*c83a76b0SSuyog Pawar
720*c83a76b0SSuyog Pawar WORD32 index_for_src_satd;
721*c83a76b0SSuyog Pawar
722*c83a76b0SSuyog Pawar (void)recond_stride_horz;
723*c83a76b0SSuyog Pawar (void)pic_type;
724*c83a76b0SSuyog Pawar (void)layer_id;
725*c83a76b0SSuyog Pawar /***** initialize the variables ****/
726*c83a76b0SSuyog Pawar had_block_size = 8;
727*c83a76b0SSuyog Pawar cu_ht = cu_size;
728*c83a76b0SSuyog Pawar cu_wd = cu_size;
729*c83a76b0SSuyog Pawar
730*c83a76b0SSuyog Pawar num_horz_blocks = cu_wd / had_block_size; //ctb_width / had_block_size;
731*c83a76b0SSuyog Pawar
732*c83a76b0SSuyog Pawar ht_offset = -had_block_size;
733*c83a76b0SSuyog Pawar wd_offset = 0 - had_block_size;
734*c83a76b0SSuyog Pawar
735*c83a76b0SSuyog Pawar cu_total_size = cu_ht * cu_wd;
736*c83a76b0SSuyog Pawar num_comp_had_blocks = cu_total_size / (had_block_size * had_block_size);
737*c83a76b0SSuyog Pawar
738*c83a76b0SSuyog Pawar index_for_src_satd = start_index;
739*c83a76b0SSuyog Pawar
740*c83a76b0SSuyog Pawar for(i = 0; i < 64; i++)
741*c83a76b0SSuyog Pawar {
742*c83a76b0SSuyog Pawar ai1_zeros_buffer[i] = 0;
743*c83a76b0SSuyog Pawar }
744*c83a76b0SSuyog Pawar psy_factor = u4_psy_strength; //PSY_STRENGTH;
745*c83a76b0SSuyog Pawar psy_rd_cost = 0;
746*c83a76b0SSuyog Pawar lambda_mod = lambda * psy_factor;
747*c83a76b0SSuyog Pawar
748*c83a76b0SSuyog Pawar if(!is_hbd)
749*c83a76b0SSuyog Pawar {
750*c83a76b0SSuyog Pawar pu1_recon = (UWORD8 *)pv_recon;
751*c83a76b0SSuyog Pawar }
752*c83a76b0SSuyog Pawar
753*c83a76b0SSuyog Pawar /**************************************************************/
754*c83a76b0SSuyog Pawar /* loop over for every 8x8 blocks in the CU */
755*c83a76b0SSuyog Pawar for(i = 0; i < num_comp_had_blocks; i++)
756*c83a76b0SSuyog Pawar {
757*c83a76b0SSuyog Pawar if(i % num_horz_blocks == 0)
758*c83a76b0SSuyog Pawar {
759*c83a76b0SSuyog Pawar wd_offset = -had_block_size;
760*c83a76b0SSuyog Pawar ht_offset += had_block_size;
761*c83a76b0SSuyog Pawar }
762*c83a76b0SSuyog Pawar wd_offset += had_block_size;
763*c83a76b0SSuyog Pawar
764*c83a76b0SSuyog Pawar /* source satd for the current 8x8 block */
765*c83a76b0SSuyog Pawar source_satd = pui4_source_satd[index_for_src_satd];
766*c83a76b0SSuyog Pawar
767*c83a76b0SSuyog Pawar if(had_block_size == 8)
768*c83a76b0SSuyog Pawar {
769*c83a76b0SSuyog Pawar //WORD32 index;
770*c83a76b0SSuyog Pawar //WORD32 u4_satd;
771*c83a76b0SSuyog Pawar //WORD32 dst_strd = 8;
772*c83a76b0SSuyog Pawar //WORD32 i4_frm_qstep = 0;
773*c83a76b0SSuyog Pawar //WORD32 early_cbf;
774*c83a76b0SSuyog Pawar if(!is_hbd)
775*c83a76b0SSuyog Pawar {
776*c83a76b0SSuyog Pawar /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
777*c83a76b0SSuyog Pawar pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset;
778*c83a76b0SSuyog Pawar
779*c83a76b0SSuyog Pawar recon_satd = ps_cmn_utils_optimised_function_list->pf_AC_HAD_8x8_8bit(
780*c83a76b0SSuyog Pawar pu1_l0_block,
781*c83a76b0SSuyog Pawar recon_stride_vert,
782*c83a76b0SSuyog Pawar ai1_zeros_buffer,
783*c83a76b0SSuyog Pawar had_block_size,
784*c83a76b0SSuyog Pawar pi2_residue_had_zscan,
785*c83a76b0SSuyog Pawar had_block_size);
786*c83a76b0SSuyog Pawar }
787*c83a76b0SSuyog Pawar
788*c83a76b0SSuyog Pawar /* get the additional cost function based on the absolute SATD diff of source and recon. */
789*c83a76b0SSuyog Pawar psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd));
790*c83a76b0SSuyog Pawar
791*c83a76b0SSuyog Pawar index_for_src_satd++;
792*c83a76b0SSuyog Pawar if((i % num_horz_blocks) == (num_horz_blocks - 1))
793*c83a76b0SSuyog Pawar {
794*c83a76b0SSuyog Pawar index_for_src_satd -= num_horz_blocks;
795*c83a76b0SSuyog Pawar index_for_src_satd +=
796*c83a76b0SSuyog Pawar (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */
797*c83a76b0SSuyog Pawar }
798*c83a76b0SSuyog Pawar } // if
799*c83a76b0SSuyog Pawar } // for loop
800*c83a76b0SSuyog Pawar psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH + LAMBDA_Q_SHIFT);
801*c83a76b0SSuyog Pawar
802*c83a76b0SSuyog Pawar /* reutrn the additional cost for the psy RD opt */
803*c83a76b0SSuyog Pawar return (psy_rd_cost);
804*c83a76b0SSuyog Pawar }
805*c83a76b0SSuyog Pawar
ihevce_calc_stim_injected_variance(ULWORD64 * pu8_sigmaX,ULWORD64 * pu8_sigmaXSquared,ULWORD64 * u8_var,WORD32 i4_inv_wpred_wt,WORD32 i4_inv_wt_shift_val,WORD32 i4_wpred_log_wdc,WORD32 i4_part_id)806*c83a76b0SSuyog Pawar unsigned long ihevce_calc_stim_injected_variance(
807*c83a76b0SSuyog Pawar ULWORD64 *pu8_sigmaX,
808*c83a76b0SSuyog Pawar ULWORD64 *pu8_sigmaXSquared,
809*c83a76b0SSuyog Pawar ULWORD64 *u8_var,
810*c83a76b0SSuyog Pawar WORD32 i4_inv_wpred_wt,
811*c83a76b0SSuyog Pawar WORD32 i4_inv_wt_shift_val,
812*c83a76b0SSuyog Pawar WORD32 i4_wpred_log_wdc,
813*c83a76b0SSuyog Pawar WORD32 i4_part_id)
814*c83a76b0SSuyog Pawar {
815*c83a76b0SSuyog Pawar ULWORD64 u8_X_Square, u8_temp_var;
816*c83a76b0SSuyog Pawar WORD32 i4_bits_req;
817*c83a76b0SSuyog Pawar
818*c83a76b0SSuyog Pawar const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
819*c83a76b0SSuyog Pawar
820*c83a76b0SSuyog Pawar u8_X_Square = (pu8_sigmaX[i4_part_id] * pu8_sigmaX[i4_part_id]);
821*c83a76b0SSuyog Pawar u8_temp_var = pu8_sigmaXSquared[i4_part_id] - u8_X_Square;
822*c83a76b0SSuyog Pawar
823*c83a76b0SSuyog Pawar if(i4_inv_wpred_wt != i4_default_src_wt)
824*c83a76b0SSuyog Pawar {
825*c83a76b0SSuyog Pawar i4_inv_wpred_wt = i4_inv_wpred_wt >> i4_inv_wt_shift_val;
826*c83a76b0SSuyog Pawar
827*c83a76b0SSuyog Pawar u8_temp_var = SHR_NEG(
828*c83a76b0SSuyog Pawar (u8_temp_var * i4_inv_wpred_wt * i4_inv_wpred_wt),
829*c83a76b0SSuyog Pawar (30 - (2 * i4_inv_wt_shift_val) - i4_wpred_log_wdc * 2));
830*c83a76b0SSuyog Pawar }
831*c83a76b0SSuyog Pawar
832*c83a76b0SSuyog Pawar GETRANGE64(i4_bits_req, u8_temp_var);
833*c83a76b0SSuyog Pawar
834*c83a76b0SSuyog Pawar if(i4_bits_req > 27)
835*c83a76b0SSuyog Pawar {
836*c83a76b0SSuyog Pawar *u8_var = u8_temp_var >> (i4_bits_req - 27);
837*c83a76b0SSuyog Pawar return (i4_bits_req - 27);
838*c83a76b0SSuyog Pawar }
839*c83a76b0SSuyog Pawar else
840*c83a76b0SSuyog Pawar {
841*c83a76b0SSuyog Pawar *u8_var = u8_temp_var;
842*c83a76b0SSuyog Pawar return 0;
843*c83a76b0SSuyog Pawar }
844*c83a76b0SSuyog Pawar }
845*c83a76b0SSuyog Pawar
ihevce_calc_variance_for_diff_weights(ULWORD64 * pu8_sigmaX,ULWORD64 * pu8_sigmaXSquared,ULWORD64 * u8_var,WORD32 * pi4_inv_wt,WORD32 * pi4_inv_wt_shift_val,pu_result_t * ps_result,WORD32 i4_wpred_log_wdc,PART_ID_T * pe_part_id,UWORD8 u1_cu_size,UWORD8 u1_num_parts,UWORD8 u1_is_for_src)846*c83a76b0SSuyog Pawar unsigned long ihevce_calc_variance_for_diff_weights(
847*c83a76b0SSuyog Pawar ULWORD64 *pu8_sigmaX,
848*c83a76b0SSuyog Pawar ULWORD64 *pu8_sigmaXSquared,
849*c83a76b0SSuyog Pawar ULWORD64 *u8_var,
850*c83a76b0SSuyog Pawar WORD32 *pi4_inv_wt,
851*c83a76b0SSuyog Pawar WORD32 *pi4_inv_wt_shift_val,
852*c83a76b0SSuyog Pawar pu_result_t *ps_result,
853*c83a76b0SSuyog Pawar WORD32 i4_wpred_log_wdc,
854*c83a76b0SSuyog Pawar PART_ID_T *pe_part_id,
855*c83a76b0SSuyog Pawar UWORD8 u1_cu_size,
856*c83a76b0SSuyog Pawar UWORD8 u1_num_parts,
857*c83a76b0SSuyog Pawar UWORD8 u1_is_for_src)
858*c83a76b0SSuyog Pawar {
859*c83a76b0SSuyog Pawar WORD32 i4_k;
860*c83a76b0SSuyog Pawar UWORD32 u4_wd, u4_ht;
861*c83a76b0SSuyog Pawar UWORD8 u1_num_base_blks;
862*c83a76b0SSuyog Pawar UWORD32 u4_num_pixels_in_part;
863*c83a76b0SSuyog Pawar UWORD8 u1_index;
864*c83a76b0SSuyog Pawar WORD32 i4_bits_req;
865*c83a76b0SSuyog Pawar
866*c83a76b0SSuyog Pawar UWORD8 u1_base_blk_size = 4;
867*c83a76b0SSuyog Pawar UWORD32 u4_tot_num_pixels = u1_cu_size * u1_cu_size;
868*c83a76b0SSuyog Pawar ULWORD64 u8_temp_sigmaX[MAX_NUM_INTER_PARTS] = { 0, 0 };
869*c83a76b0SSuyog Pawar ULWORD64 u8_temp_sigmaXsquared[MAX_NUM_INTER_PARTS] = { 0, 0 };
870*c83a76b0SSuyog Pawar ULWORD64 u8_z;
871*c83a76b0SSuyog Pawar
872*c83a76b0SSuyog Pawar const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
873*c83a76b0SSuyog Pawar
874*c83a76b0SSuyog Pawar for(i4_k = 0; i4_k < u1_num_parts; i4_k++)
875*c83a76b0SSuyog Pawar {
876*c83a76b0SSuyog Pawar u4_wd = ps_result[i4_k].pu.b4_wd + 1;
877*c83a76b0SSuyog Pawar u4_ht = ps_result[i4_k].pu.b4_ht + 1;
878*c83a76b0SSuyog Pawar u1_num_base_blks = u4_wd * u4_ht;
879*c83a76b0SSuyog Pawar u4_num_pixels_in_part = u1_num_base_blks * u1_base_blk_size * u1_base_blk_size;
880*c83a76b0SSuyog Pawar
881*c83a76b0SSuyog Pawar if(u1_is_for_src)
882*c83a76b0SSuyog Pawar {
883*c83a76b0SSuyog Pawar u1_index = pe_part_id[i4_k];
884*c83a76b0SSuyog Pawar }
885*c83a76b0SSuyog Pawar else
886*c83a76b0SSuyog Pawar {
887*c83a76b0SSuyog Pawar u1_index = i4_k;
888*c83a76b0SSuyog Pawar }
889*c83a76b0SSuyog Pawar
890*c83a76b0SSuyog Pawar u8_temp_sigmaXsquared[i4_k] = pu8_sigmaXSquared[u1_index] / u4_num_pixels_in_part;
891*c83a76b0SSuyog Pawar u8_temp_sigmaX[i4_k] = pu8_sigmaX[u1_index];
892*c83a76b0SSuyog Pawar
893*c83a76b0SSuyog Pawar if(u1_is_for_src)
894*c83a76b0SSuyog Pawar {
895*c83a76b0SSuyog Pawar if(pi4_inv_wt[i4_k] != i4_default_src_wt)
896*c83a76b0SSuyog Pawar {
897*c83a76b0SSuyog Pawar pi4_inv_wt[i4_k] = pi4_inv_wt[i4_k] >> pi4_inv_wt_shift_val[i4_k];
898*c83a76b0SSuyog Pawar u8_temp_sigmaX[i4_k] = SHR_NEG(
899*c83a76b0SSuyog Pawar (u8_temp_sigmaX[i4_k] * pi4_inv_wt[i4_k]),
900*c83a76b0SSuyog Pawar (15 - pi4_inv_wt_shift_val[i4_k] - i4_wpred_log_wdc));
901*c83a76b0SSuyog Pawar u8_temp_sigmaXsquared[i4_k] = SHR_NEG(
902*c83a76b0SSuyog Pawar (u8_temp_sigmaXsquared[i4_k] * pi4_inv_wt[i4_k] * pi4_inv_wt[i4_k]),
903*c83a76b0SSuyog Pawar (30 - (2 * pi4_inv_wt_shift_val[i4_k]) - i4_wpred_log_wdc * 2));
904*c83a76b0SSuyog Pawar }
905*c83a76b0SSuyog Pawar }
906*c83a76b0SSuyog Pawar }
907*c83a76b0SSuyog Pawar
908*c83a76b0SSuyog Pawar u8_z = (u4_tot_num_pixels * (u8_temp_sigmaXsquared[0] + u8_temp_sigmaXsquared[1])) -
909*c83a76b0SSuyog Pawar ((u8_temp_sigmaX[0] + u8_temp_sigmaX[1]) * (u8_temp_sigmaX[0] + u8_temp_sigmaX[1]));
910*c83a76b0SSuyog Pawar
911*c83a76b0SSuyog Pawar GETRANGE64(i4_bits_req, u8_z);
912*c83a76b0SSuyog Pawar
913*c83a76b0SSuyog Pawar if(i4_bits_req > 27)
914*c83a76b0SSuyog Pawar {
915*c83a76b0SSuyog Pawar *u8_var = u8_z >> (i4_bits_req - 27);
916*c83a76b0SSuyog Pawar return (i4_bits_req - 27);
917*c83a76b0SSuyog Pawar }
918*c83a76b0SSuyog Pawar else
919*c83a76b0SSuyog Pawar {
920*c83a76b0SSuyog Pawar *u8_var = u8_z;
921*c83a76b0SSuyog Pawar return 0;
922*c83a76b0SSuyog Pawar }
923*c83a76b0SSuyog Pawar }
924