xref: /aosp_15_r20/external/libhevc/encoder/hme_interface.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar  *
3*c83a76b0SSuyog Pawar  * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar  *
5*c83a76b0SSuyog Pawar  * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar  * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar  * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar  *
9*c83a76b0SSuyog Pawar  * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar  *
11*c83a76b0SSuyog Pawar  * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar  * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar  * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar  * limitations under the License.
16*c83a76b0SSuyog Pawar  *
17*c83a76b0SSuyog Pawar  *****************************************************************************
18*c83a76b0SSuyog Pawar  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar 
21*c83a76b0SSuyog Pawar /*****************************************************************************/
22*c83a76b0SSuyog Pawar /* File Includes                                                             */
23*c83a76b0SSuyog Pawar /*****************************************************************************/
24*c83a76b0SSuyog Pawar /* System include files */
25*c83a76b0SSuyog Pawar #include <stdio.h>
26*c83a76b0SSuyog Pawar #include <string.h>
27*c83a76b0SSuyog Pawar #include <stdlib.h>
28*c83a76b0SSuyog Pawar #include <assert.h>
29*c83a76b0SSuyog Pawar #include <stdarg.h>
30*c83a76b0SSuyog Pawar #include <math.h>
31*c83a76b0SSuyog Pawar #include <limits.h>
32*c83a76b0SSuyog Pawar 
33*c83a76b0SSuyog Pawar /* User include files */
34*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
35*c83a76b0SSuyog Pawar #include "itt_video_api.h"
36*c83a76b0SSuyog Pawar #include "ihevce_api.h"
37*c83a76b0SSuyog Pawar 
38*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
39*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
40*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
41*c83a76b0SSuyog Pawar 
42*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
43*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
44*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
45*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
46*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
47*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
48*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
49*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
50*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
51*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
52*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
53*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
54*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
55*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
56*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
57*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
58*c83a76b0SSuyog Pawar 
59*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
60*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
61*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
62*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_funcs.h"
63*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
64*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
65*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
66*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
67*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
68*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
69*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
70*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
71*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
72*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
73*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
74*c83a76b0SSuyog Pawar #include "ihevce_bs_compute_ctb.h"
75*c83a76b0SSuyog Pawar #include "ihevce_global_tables.h"
76*c83a76b0SSuyog Pawar #include "ihevce_dep_mngr_interface.h"
77*c83a76b0SSuyog Pawar #include "hme_datatype.h"
78*c83a76b0SSuyog Pawar #include "hme_interface.h"
79*c83a76b0SSuyog Pawar #include "hme_common_defs.h"
80*c83a76b0SSuyog Pawar #include "hme_defs.h"
81*c83a76b0SSuyog Pawar #include "ihevce_me_instr_set_router.h"
82*c83a76b0SSuyog Pawar #include "hme_globals.h"
83*c83a76b0SSuyog Pawar #include "hme_utils.h"
84*c83a76b0SSuyog Pawar #include "hme_coarse.h"
85*c83a76b0SSuyog Pawar #include "hme_refine.h"
86*c83a76b0SSuyog Pawar #include "hme_err_compute.h"
87*c83a76b0SSuyog Pawar #include "hme_common_utils.h"
88*c83a76b0SSuyog Pawar #include "hme_search_algo.h"
89*c83a76b0SSuyog Pawar #include "ihevce_profile.h"
90*c83a76b0SSuyog Pawar 
91*c83a76b0SSuyog Pawar /*****************************************************************************/
92*c83a76b0SSuyog Pawar /* Function Definitions                                                      */
93*c83a76b0SSuyog Pawar /*****************************************************************************/
94*c83a76b0SSuyog Pawar 
hme_init_globals()95*c83a76b0SSuyog Pawar void hme_init_globals()
96*c83a76b0SSuyog Pawar {
97*c83a76b0SSuyog Pawar     GRID_PT_T id;
98*c83a76b0SSuyog Pawar     S32 i, j;
99*c83a76b0SSuyog Pawar     /*************************************************************************/
100*c83a76b0SSuyog Pawar     /* Initialize the lookup table for x offset, y offset, optimized mask    */
101*c83a76b0SSuyog Pawar     /* based on grid id. The design is as follows:                           */
102*c83a76b0SSuyog Pawar     /*                                                                       */
103*c83a76b0SSuyog Pawar     /*     a  b  c  d                                                        */
104*c83a76b0SSuyog Pawar     /*    TL  T TR  e                                                        */
105*c83a76b0SSuyog Pawar     /*     L  C  R  f                                                        */
106*c83a76b0SSuyog Pawar     /*    BL  B BR                                                           */
107*c83a76b0SSuyog Pawar     /*                                                                       */
108*c83a76b0SSuyog Pawar     /*  IF a non corner pt, like T is the new minima, then we need to        */
109*c83a76b0SSuyog Pawar     /*  evaluate only 3 new pts, in this case, a, b, c. So the optimal       */
110*c83a76b0SSuyog Pawar     /*  grid mask would reflect this. If a corner pt like TR is the new      */
111*c83a76b0SSuyog Pawar     /*  minima, then we need to evaluate 5 new pts, in this case, b, c, d,   */
112*c83a76b0SSuyog Pawar     /*  e and f. So the grid mask will have 5 pts enabled.                   */
113*c83a76b0SSuyog Pawar     /*************************************************************************/
114*c83a76b0SSuyog Pawar 
115*c83a76b0SSuyog Pawar     id = PT_C;
116*c83a76b0SSuyog Pawar     gai4_opt_grid_mask[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
117*c83a76b0SSuyog Pawar     gai1_grid_id_to_x[id] = 0;
118*c83a76b0SSuyog Pawar     gai1_grid_id_to_y[id] = 0;
119*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_diamond[id] = GRID_DIAMOND_ENABLE_ALL ^ (BIT_EN(PT_C));
120*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_conventional[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
121*c83a76b0SSuyog Pawar 
122*c83a76b0SSuyog Pawar     id = PT_L;
123*c83a76b0SSuyog Pawar     gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL);
124*c83a76b0SSuyog Pawar     gai1_grid_id_to_x[id] = -1;
125*c83a76b0SSuyog Pawar     gai1_grid_id_to_y[id] = 0;
126*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
127*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
128*c83a76b0SSuyog Pawar 
129*c83a76b0SSuyog Pawar     id = PT_R;
130*c83a76b0SSuyog Pawar     gai4_opt_grid_mask[id] = BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR);
131*c83a76b0SSuyog Pawar     gai1_grid_id_to_x[id] = 1;
132*c83a76b0SSuyog Pawar     gai1_grid_id_to_y[id] = 0;
133*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
134*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
135*c83a76b0SSuyog Pawar 
136*c83a76b0SSuyog Pawar     id = PT_T;
137*c83a76b0SSuyog Pawar     gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR);
138*c83a76b0SSuyog Pawar     gai1_grid_id_to_x[id] = 0;
139*c83a76b0SSuyog Pawar     gai1_grid_id_to_y[id] = -1;
140*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
141*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
142*c83a76b0SSuyog Pawar 
143*c83a76b0SSuyog Pawar     id = PT_B;
144*c83a76b0SSuyog Pawar     gai4_opt_grid_mask[id] = BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR);
145*c83a76b0SSuyog Pawar     gai1_grid_id_to_x[id] = 0;
146*c83a76b0SSuyog Pawar     gai1_grid_id_to_y[id] = 1;
147*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
148*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
149*c83a76b0SSuyog Pawar 
150*c83a76b0SSuyog Pawar     id = PT_TL;
151*c83a76b0SSuyog Pawar     gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_T];
152*c83a76b0SSuyog Pawar     gai1_grid_id_to_x[id] = -1;
153*c83a76b0SSuyog Pawar     gai1_grid_id_to_y[id] = -1;
154*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L);
155*c83a76b0SSuyog Pawar 
156*c83a76b0SSuyog Pawar     id = PT_TR;
157*c83a76b0SSuyog Pawar     gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_T];
158*c83a76b0SSuyog Pawar     gai1_grid_id_to_x[id] = 1;
159*c83a76b0SSuyog Pawar     gai1_grid_id_to_y[id] = -1;
160*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R);
161*c83a76b0SSuyog Pawar 
162*c83a76b0SSuyog Pawar     id = PT_BL;
163*c83a76b0SSuyog Pawar     gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_B];
164*c83a76b0SSuyog Pawar     gai1_grid_id_to_x[id] = -1;
165*c83a76b0SSuyog Pawar     gai1_grid_id_to_y[id] = 1;
166*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_L) | BIT_EN(PT_B);
167*c83a76b0SSuyog Pawar 
168*c83a76b0SSuyog Pawar     id = PT_BR;
169*c83a76b0SSuyog Pawar     gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_B];
170*c83a76b0SSuyog Pawar     gai1_grid_id_to_x[id] = 1;
171*c83a76b0SSuyog Pawar     gai1_grid_id_to_y[id] = 1;
172*c83a76b0SSuyog Pawar     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_B);
173*c83a76b0SSuyog Pawar 
174*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_2Nx2N] = BLK_8x8;
175*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_T] = BLK_8x4;
176*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_B] = BLK_8x4;
177*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_L] = BLK_4x8;
178*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_R] = BLK_4x8;
179*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TL] = BLK_4x4;
180*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TR] = BLK_4x4;
181*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BL] = BLK_4x4;
182*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BR] = BLK_4x4;
183*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_T] = BLK_INVALID;
184*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_B] = BLK_INVALID;
185*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_T] = BLK_INVALID;
186*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_B] = BLK_INVALID;
187*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_L] = BLK_INVALID;
188*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_R] = BLK_INVALID;
189*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_L] = BLK_INVALID;
190*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_R] = BLK_INVALID;
191*c83a76b0SSuyog Pawar 
192*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_2Nx2N] = BLK_16x16;
193*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_T] = BLK_16x8;
194*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_B] = BLK_16x8;
195*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_L] = BLK_8x16;
196*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_R] = BLK_8x16;
197*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TL] = BLK_8x8;
198*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TR] = BLK_8x8;
199*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BL] = BLK_8x8;
200*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BR] = BLK_8x8;
201*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_T] = BLK_16x4;
202*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_B] = BLK_16x12;
203*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_T] = BLK_16x12;
204*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_B] = BLK_16x4;
205*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_L] = BLK_4x16;
206*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_R] = BLK_12x16;
207*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_L] = BLK_12x16;
208*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_R] = BLK_4x16;
209*c83a76b0SSuyog Pawar 
210*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_2Nx2N] = BLK_32x32;
211*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_T] = BLK_32x16;
212*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_B] = BLK_32x16;
213*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_L] = BLK_16x32;
214*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_R] = BLK_16x32;
215*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TL] = BLK_16x16;
216*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TR] = BLK_16x16;
217*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BL] = BLK_16x16;
218*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BR] = BLK_16x16;
219*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_T] = BLK_32x8;
220*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_B] = BLK_32x24;
221*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_T] = BLK_32x24;
222*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_B] = BLK_32x8;
223*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_L] = BLK_8x32;
224*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_R] = BLK_24x32;
225*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_L] = BLK_24x32;
226*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_R] = BLK_8x32;
227*c83a76b0SSuyog Pawar 
228*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_2Nx2N] = BLK_64x64;
229*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_T] = BLK_64x32;
230*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_B] = BLK_64x32;
231*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_L] = BLK_32x64;
232*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_R] = BLK_32x64;
233*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TL] = BLK_32x32;
234*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TR] = BLK_32x32;
235*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BL] = BLK_32x32;
236*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BR] = BLK_32x32;
237*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_T] = BLK_64x16;
238*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_B] = BLK_64x48;
239*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_T] = BLK_64x48;
240*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_B] = BLK_64x16;
241*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_L] = BLK_16x64;
242*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_R] = BLK_48x64;
243*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_L] = BLK_48x64;
244*c83a76b0SSuyog Pawar     ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_R] = BLK_16x64;
245*c83a76b0SSuyog Pawar 
246*c83a76b0SSuyog Pawar     gau1_num_parts_in_part_type[PRT_2Nx2N] = 1;
247*c83a76b0SSuyog Pawar     gau1_num_parts_in_part_type[PRT_2NxN] = 2;
248*c83a76b0SSuyog Pawar     gau1_num_parts_in_part_type[PRT_Nx2N] = 2;
249*c83a76b0SSuyog Pawar     gau1_num_parts_in_part_type[PRT_NxN] = 4;
250*c83a76b0SSuyog Pawar     gau1_num_parts_in_part_type[PRT_2NxnU] = 2;
251*c83a76b0SSuyog Pawar     gau1_num_parts_in_part_type[PRT_2NxnD] = 2;
252*c83a76b0SSuyog Pawar     gau1_num_parts_in_part_type[PRT_nLx2N] = 2;
253*c83a76b0SSuyog Pawar     gau1_num_parts_in_part_type[PRT_nRx2N] = 2;
254*c83a76b0SSuyog Pawar 
255*c83a76b0SSuyog Pawar     for(i = 0; i < MAX_PART_TYPES; i++)
256*c83a76b0SSuyog Pawar         for(j = 0; j < MAX_NUM_PARTS; j++)
257*c83a76b0SSuyog Pawar             ge_part_type_to_part_id[i][j] = PART_ID_INVALID;
258*c83a76b0SSuyog Pawar 
259*c83a76b0SSuyog Pawar     /* 2Nx2N only one partition */
260*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_2Nx2N][0] = PART_ID_2Nx2N;
261*c83a76b0SSuyog Pawar 
262*c83a76b0SSuyog Pawar     /* 2NxN 2 partitions */
263*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_2NxN][0] = PART_ID_2NxN_T;
264*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_2NxN][1] = PART_ID_2NxN_B;
265*c83a76b0SSuyog Pawar 
266*c83a76b0SSuyog Pawar     /* Nx2N 2 partitions */
267*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_Nx2N][0] = PART_ID_Nx2N_L;
268*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_Nx2N][1] = PART_ID_Nx2N_R;
269*c83a76b0SSuyog Pawar 
270*c83a76b0SSuyog Pawar     /* NxN 4 partitions */
271*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_NxN][0] = PART_ID_NxN_TL;
272*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_NxN][1] = PART_ID_NxN_TR;
273*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_NxN][2] = PART_ID_NxN_BL;
274*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_NxN][3] = PART_ID_NxN_BR;
275*c83a76b0SSuyog Pawar 
276*c83a76b0SSuyog Pawar     /* AMP 2Nx (N/2 + 3N/2) 2 partitions */
277*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_2NxnU][0] = PART_ID_2NxnU_T;
278*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_2NxnU][1] = PART_ID_2NxnU_B;
279*c83a76b0SSuyog Pawar 
280*c83a76b0SSuyog Pawar     /* AMP 2Nx (3N/2 + N/2) 2 partitions */
281*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_2NxnD][0] = PART_ID_2NxnD_T;
282*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_2NxnD][1] = PART_ID_2NxnD_B;
283*c83a76b0SSuyog Pawar 
284*c83a76b0SSuyog Pawar     /* AMP (N/2 + 3N/2) x 2N 2 partitions */
285*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_nLx2N][0] = PART_ID_nLx2N_L;
286*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_nLx2N][1] = PART_ID_nLx2N_R;
287*c83a76b0SSuyog Pawar 
288*c83a76b0SSuyog Pawar     /* AMP (3N/2 + N/2) x 2N 2 partitions */
289*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_nRx2N][0] = PART_ID_nRx2N_L;
290*c83a76b0SSuyog Pawar     ge_part_type_to_part_id[PRT_nRx2N][1] = PART_ID_nRx2N_R;
291*c83a76b0SSuyog Pawar 
292*c83a76b0SSuyog Pawar     /*************************************************************************/
293*c83a76b0SSuyog Pawar     /* initialize attributes for each partition id within the cu.            */
294*c83a76b0SSuyog Pawar     /*************************************************************************/
295*c83a76b0SSuyog Pawar     {
296*c83a76b0SSuyog Pawar         part_attr_t *ps_part_attr;
297*c83a76b0SSuyog Pawar 
298*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2Nx2N];
299*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 0;
300*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 0;
301*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 8;
302*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 8;
303*c83a76b0SSuyog Pawar 
304*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_T];
305*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 0;
306*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 0;
307*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 8;
308*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 4;
309*c83a76b0SSuyog Pawar 
310*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_B];
311*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 0;
312*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 4;
313*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 8;
314*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 4;
315*c83a76b0SSuyog Pawar 
316*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_L];
317*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 0;
318*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 0;
319*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 4;
320*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 8;
321*c83a76b0SSuyog Pawar 
322*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_R];
323*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 4;
324*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 0;
325*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 4;
326*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 8;
327*c83a76b0SSuyog Pawar 
328*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TL];
329*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 0;
330*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 0;
331*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 4;
332*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 4;
333*c83a76b0SSuyog Pawar 
334*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TR];
335*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 4;
336*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 0;
337*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 4;
338*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 4;
339*c83a76b0SSuyog Pawar 
340*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BL];
341*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 0;
342*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 4;
343*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 4;
344*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 4;
345*c83a76b0SSuyog Pawar 
346*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BR];
347*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 4;
348*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 4;
349*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 4;
350*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 4;
351*c83a76b0SSuyog Pawar 
352*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_T];
353*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 0;
354*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 0;
355*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 8;
356*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 2;
357*c83a76b0SSuyog Pawar 
358*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_B];
359*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 0;
360*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 2;
361*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 8;
362*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 6;
363*c83a76b0SSuyog Pawar 
364*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_T];
365*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 0;
366*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 0;
367*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 8;
368*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 6;
369*c83a76b0SSuyog Pawar 
370*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_B];
371*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 0;
372*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 6;
373*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 8;
374*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 2;
375*c83a76b0SSuyog Pawar 
376*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_L];
377*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 0;
378*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 0;
379*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 2;
380*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 8;
381*c83a76b0SSuyog Pawar 
382*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_R];
383*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 2;
384*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 0;
385*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 6;
386*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 8;
387*c83a76b0SSuyog Pawar 
388*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_L];
389*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 0;
390*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 0;
391*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 6;
392*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 8;
393*c83a76b0SSuyog Pawar 
394*c83a76b0SSuyog Pawar         ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_R];
395*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_start = 6;
396*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_start = 0;
397*c83a76b0SSuyog Pawar         ps_part_attr->u1_x_count = 2;
398*c83a76b0SSuyog Pawar         ps_part_attr->u1_y_count = 8;
399*c83a76b0SSuyog Pawar     }
400*c83a76b0SSuyog Pawar     for(i = 0; i < NUM_BLK_SIZES; i++)
401*c83a76b0SSuyog Pawar         ge_blk_size_to_cu_size[i] = CU_INVALID;
402*c83a76b0SSuyog Pawar 
403*c83a76b0SSuyog Pawar     ge_blk_size_to_cu_size[BLK_8x8] = CU_8x8;
404*c83a76b0SSuyog Pawar     ge_blk_size_to_cu_size[BLK_16x16] = CU_16x16;
405*c83a76b0SSuyog Pawar     ge_blk_size_to_cu_size[BLK_32x32] = CU_32x32;
406*c83a76b0SSuyog Pawar     ge_blk_size_to_cu_size[BLK_64x64] = CU_64x64;
407*c83a76b0SSuyog Pawar 
408*c83a76b0SSuyog Pawar     /* This is the reverse, given cU size, get blk size */
409*c83a76b0SSuyog Pawar     ge_cu_size_to_blk_size[CU_8x8] = BLK_8x8;
410*c83a76b0SSuyog Pawar     ge_cu_size_to_blk_size[CU_16x16] = BLK_16x16;
411*c83a76b0SSuyog Pawar     ge_cu_size_to_blk_size[CU_32x32] = BLK_32x32;
412*c83a76b0SSuyog Pawar     ge_cu_size_to_blk_size[CU_64x64] = BLK_64x64;
413*c83a76b0SSuyog Pawar 
414*c83a76b0SSuyog Pawar     gau1_is_vert_part[PRT_2Nx2N] = 0;
415*c83a76b0SSuyog Pawar     gau1_is_vert_part[PRT_2NxN] = 0;
416*c83a76b0SSuyog Pawar     gau1_is_vert_part[PRT_Nx2N] = 1;
417*c83a76b0SSuyog Pawar     gau1_is_vert_part[PRT_NxN] = 1;
418*c83a76b0SSuyog Pawar     gau1_is_vert_part[PRT_2NxnU] = 0;
419*c83a76b0SSuyog Pawar     gau1_is_vert_part[PRT_2NxnD] = 0;
420*c83a76b0SSuyog Pawar     gau1_is_vert_part[PRT_nLx2N] = 1;
421*c83a76b0SSuyog Pawar     gau1_is_vert_part[PRT_nRx2N] = 1;
422*c83a76b0SSuyog Pawar 
423*c83a76b0SSuyog Pawar     /* Initialise the number of best results for the full pell refinement */
424*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_2Nx2N] = 2;
425*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_2NxN_T] = 0;
426*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_2NxN_B] = 0;
427*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_Nx2N_L] = 0;
428*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_Nx2N_R] = 0;
429*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_NxN_TL] = 1;
430*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_NxN_TR] = 1;
431*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_NxN_BL] = 1;
432*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_NxN_BR] = 1;
433*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_2NxnU_T] = 1;
434*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_2NxnU_B] = 0;
435*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_2NxnD_T] = 0;
436*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_2NxnD_B] = 1;
437*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_nLx2N_L] = 1;
438*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_nLx2N_R] = 0;
439*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_nRx2N_L] = 0;
440*c83a76b0SSuyog Pawar     gau1_num_best_results_PQ[PART_ID_nRx2N_R] = 1;
441*c83a76b0SSuyog Pawar 
442*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_2Nx2N] = 2;
443*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_2NxN_T] = 0;
444*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_2NxN_B] = 0;
445*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_Nx2N_L] = 0;
446*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_Nx2N_R] = 0;
447*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_NxN_TL] = 1;
448*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_NxN_TR] = 1;
449*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_NxN_BL] = 1;
450*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_NxN_BR] = 1;
451*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_2NxnU_T] = 1;
452*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_2NxnU_B] = 0;
453*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_2NxnD_T] = 0;
454*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_2NxnD_B] = 1;
455*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_nLx2N_L] = 1;
456*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_nLx2N_R] = 0;
457*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_nRx2N_L] = 0;
458*c83a76b0SSuyog Pawar     gau1_num_best_results_HQ[PART_ID_nRx2N_R] = 1;
459*c83a76b0SSuyog Pawar 
460*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_2Nx2N] = 2;
461*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_2NxN_T] = 0;
462*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_2NxN_B] = 0;
463*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_Nx2N_L] = 0;
464*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_Nx2N_R] = 0;
465*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_NxN_TL] = 1;
466*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_NxN_TR] = 1;
467*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_NxN_BL] = 1;
468*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_NxN_BR] = 1;
469*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_2NxnU_T] = 1;
470*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_2NxnU_B] = 0;
471*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_2NxnD_T] = 0;
472*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_2NxnD_B] = 1;
473*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_nLx2N_L] = 1;
474*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_nLx2N_R] = 0;
475*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_nRx2N_L] = 0;
476*c83a76b0SSuyog Pawar     gau1_num_best_results_MS[PART_ID_nRx2N_R] = 1;
477*c83a76b0SSuyog Pawar 
478*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_2Nx2N] = 2;
479*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_2NxN_T] = 0;
480*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_2NxN_B] = 0;
481*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_Nx2N_L] = 0;
482*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_Nx2N_R] = 0;
483*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_NxN_TL] = 0;
484*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_NxN_TR] = 0;
485*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_NxN_BL] = 0;
486*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_NxN_BR] = 0;
487*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_2NxnU_T] = 0;
488*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_2NxnU_B] = 0;
489*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_2NxnD_T] = 0;
490*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_2NxnD_B] = 0;
491*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_nLx2N_L] = 0;
492*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_nLx2N_R] = 0;
493*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_nRx2N_L] = 0;
494*c83a76b0SSuyog Pawar     gau1_num_best_results_HS[PART_ID_nRx2N_R] = 0;
495*c83a76b0SSuyog Pawar 
496*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_2Nx2N] = 2;
497*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_2NxN_T] = 0;
498*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_2NxN_B] = 0;
499*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_Nx2N_L] = 0;
500*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_Nx2N_R] = 0;
501*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_NxN_TL] = 0;
502*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_NxN_TR] = 0;
503*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_NxN_BL] = 0;
504*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_NxN_BR] = 0;
505*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_2NxnU_T] = 0;
506*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_2NxnU_B] = 0;
507*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_2NxnD_T] = 0;
508*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_2NxnD_B] = 0;
509*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_nLx2N_L] = 0;
510*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_nLx2N_R] = 0;
511*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_nRx2N_L] = 0;
512*c83a76b0SSuyog Pawar     gau1_num_best_results_XS[PART_ID_nRx2N_R] = 0;
513*c83a76b0SSuyog Pawar 
514*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_2Nx2N] = MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25;
515*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_2NxN_T] = 0;
516*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_2NxN_B] = 0;
517*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_Nx2N_L] = 0;
518*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_Nx2N_R] = 0;
519*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_NxN_TL] = 0;
520*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_NxN_TR] = 0;
521*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_NxN_BL] = 0;
522*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_NxN_BR] = 0;
523*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_2NxnU_T] = 0;
524*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_2NxnU_B] = 0;
525*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_2NxnD_T] = 0;
526*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_2NxnD_B] = 0;
527*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_nLx2N_L] = 0;
528*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_nLx2N_R] = 0;
529*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_nRx2N_L] = 0;
530*c83a76b0SSuyog Pawar     gau1_num_best_results_XS25[PART_ID_nRx2N_R] = 0;
531*c83a76b0SSuyog Pawar 
532*c83a76b0SSuyog Pawar     /* Top right validity for each part id */
533*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_2Nx2N] = 1;
534*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_2NxN_T] = 1;
535*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_2NxN_B] = 0;
536*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_Nx2N_L] = 1;
537*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_Nx2N_R] = 1;
538*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_NxN_TL] = 1;
539*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_NxN_TR] = 1;
540*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_NxN_BL] = 1;
541*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_NxN_BR] = 0;
542*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_2NxnU_T] = 1;
543*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_2NxnU_B] = 0;
544*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_2NxnD_T] = 1;
545*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_2NxnD_B] = 0;
546*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_nLx2N_L] = 1;
547*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_nLx2N_R] = 1;
548*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_nRx2N_L] = 1;
549*c83a76b0SSuyog Pawar     gau1_partid_tr_valid[PART_ID_nRx2N_R] = 1;
550*c83a76b0SSuyog Pawar 
551*c83a76b0SSuyog Pawar     /* Bot Left validity for each part id */
552*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_2Nx2N] = 1;
553*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_2NxN_T] = 1;
554*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_2NxN_B] = 1;
555*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_Nx2N_L] = 1;
556*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_Nx2N_R] = 0;
557*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_NxN_TL] = 1;
558*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_NxN_TR] = 0;
559*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_NxN_BL] = 1;
560*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_NxN_BR] = 0;
561*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_2NxnU_T] = 1;
562*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_2NxnU_B] = 1;
563*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_2NxnD_T] = 1;
564*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_2NxnD_B] = 1;
565*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_nLx2N_L] = 1;
566*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_nLx2N_R] = 0;
567*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_nRx2N_L] = 1;
568*c83a76b0SSuyog Pawar     gau1_partid_bl_valid[PART_ID_nRx2N_R] = 0;
569*c83a76b0SSuyog Pawar 
570*c83a76b0SSuyog Pawar     /*Part id to part num of this partition id in the CU */
571*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_2Nx2N] = 0;
572*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_2NxN_T] = 0;
573*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_2NxN_B] = 1;
574*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_Nx2N_L] = 0;
575*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_Nx2N_R] = 1;
576*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_NxN_TL] = 0;
577*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_NxN_TR] = 1;
578*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_NxN_BL] = 2;
579*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_NxN_BR] = 3;
580*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_2NxnU_T] = 0;
581*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_2NxnU_B] = 1;
582*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_2NxnD_T] = 0;
583*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_2NxnD_B] = 1;
584*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_nLx2N_L] = 0;
585*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_nLx2N_R] = 1;
586*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_nRx2N_L] = 0;
587*c83a76b0SSuyog Pawar     gau1_part_id_to_part_num[PART_ID_nRx2N_R] = 1;
588*c83a76b0SSuyog Pawar 
589*c83a76b0SSuyog Pawar     /*Which partition type does this partition id belong to */
590*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_2Nx2N] = PRT_2Nx2N;
591*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_2NxN_T] = PRT_2NxN;
592*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_2NxN_B] = PRT_2NxN;
593*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_Nx2N_L] = PRT_Nx2N;
594*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_Nx2N_R] = PRT_Nx2N;
595*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_NxN_TL] = PRT_NxN;
596*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_NxN_TR] = PRT_NxN;
597*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_NxN_BL] = PRT_NxN;
598*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_NxN_BR] = PRT_NxN;
599*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_2NxnU_T] = PRT_2NxnU;
600*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_2NxnU_B] = PRT_2NxnU;
601*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_2NxnD_T] = PRT_2NxnD;
602*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_2NxnD_B] = PRT_2NxnD;
603*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_nLx2N_L] = PRT_nLx2N;
604*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_nLx2N_R] = PRT_nLx2N;
605*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_nRx2N_L] = PRT_nRx2N;
606*c83a76b0SSuyog Pawar     ge_part_id_to_part_type[PART_ID_nRx2N_R] = PRT_nRx2N;
607*c83a76b0SSuyog Pawar 
608*c83a76b0SSuyog Pawar     /*************************************************************************/
609*c83a76b0SSuyog Pawar     /* Set up the bits to be taken up for the part type. This is equally     */
610*c83a76b0SSuyog Pawar     /* divided up between the various partitions in the part-type.           */
611*c83a76b0SSuyog Pawar     /* For NxN @ CU 16x16, we assume it as CU 8x8, so consider it as         */
612*c83a76b0SSuyog Pawar     /* partition 2Nx2N.                                                      */
613*c83a76b0SSuyog Pawar     /*************************************************************************/
614*c83a76b0SSuyog Pawar     /* 1 bit for 2Nx2N partition */
615*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_2Nx2N] = 2;
616*c83a76b0SSuyog Pawar 
617*c83a76b0SSuyog Pawar     /* 3 bits for symmetric part types, so 1.5 bits per partition */
618*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_2NxN_T] = 3;
619*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_2NxN_B] = 3;
620*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_Nx2N_L] = 3;
621*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_Nx2N_R] = 3;
622*c83a76b0SSuyog Pawar 
623*c83a76b0SSuyog Pawar     /* 1 bit for NxN partitions, assuming these to be 2Nx2N CUs of lower level */
624*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_NxN_TL] = 2;
625*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_NxN_TR] = 2;
626*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_NxN_BL] = 2;
627*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_NxN_BR] = 2;
628*c83a76b0SSuyog Pawar 
629*c83a76b0SSuyog Pawar     /* 4 bits for AMP so 2 bits per partition */
630*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_2NxnU_T] = 4;
631*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_2NxnU_B] = 4;
632*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_2NxnD_T] = 4;
633*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_2NxnD_B] = 4;
634*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_nLx2N_L] = 4;
635*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_nLx2N_R] = 4;
636*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_nRx2N_L] = 4;
637*c83a76b0SSuyog Pawar     gau1_bits_for_part_id_q1[PART_ID_nRx2N_R] = 4;
638*c83a76b0SSuyog Pawar }
639*c83a76b0SSuyog Pawar 
640*c83a76b0SSuyog Pawar /**
641*c83a76b0SSuyog Pawar ********************************************************************************
642*c83a76b0SSuyog Pawar *  @fn     hme_enc_num_alloc()
643*c83a76b0SSuyog Pawar *
644*c83a76b0SSuyog Pawar *  @brief  returns number of memtabs that is required by hme module
645*c83a76b0SSuyog Pawar *
646*c83a76b0SSuyog Pawar *  @return   Number of memtabs required
647*c83a76b0SSuyog Pawar ********************************************************************************
648*c83a76b0SSuyog Pawar */
hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel)649*c83a76b0SSuyog Pawar S32 hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel)
650*c83a76b0SSuyog Pawar {
651*c83a76b0SSuyog Pawar     if(i4_num_me_frm_pllel > 1)
652*c83a76b0SSuyog Pawar     {
653*c83a76b0SSuyog Pawar         return ((S32)MAX_HME_ENC_TOT_MEMTABS);
654*c83a76b0SSuyog Pawar     }
655*c83a76b0SSuyog Pawar     else
656*c83a76b0SSuyog Pawar     {
657*c83a76b0SSuyog Pawar         return ((S32)MIN_HME_ENC_TOT_MEMTABS);
658*c83a76b0SSuyog Pawar     }
659*c83a76b0SSuyog Pawar }
660*c83a76b0SSuyog Pawar 
661*c83a76b0SSuyog Pawar /**
662*c83a76b0SSuyog Pawar ********************************************************************************
663*c83a76b0SSuyog Pawar *  @fn     hme_coarse_num_alloc()
664*c83a76b0SSuyog Pawar *
665*c83a76b0SSuyog Pawar *  @brief  returns number of memtabs that is required by hme module
666*c83a76b0SSuyog Pawar *
667*c83a76b0SSuyog Pawar *  @return   Number of memtabs required
668*c83a76b0SSuyog Pawar ********************************************************************************
669*c83a76b0SSuyog Pawar */
hme_coarse_num_alloc()670*c83a76b0SSuyog Pawar S32 hme_coarse_num_alloc()
671*c83a76b0SSuyog Pawar {
672*c83a76b0SSuyog Pawar     return ((S32)HME_COARSE_TOT_MEMTABS);
673*c83a76b0SSuyog Pawar }
674*c83a76b0SSuyog Pawar 
675*c83a76b0SSuyog Pawar /**
676*c83a76b0SSuyog Pawar ********************************************************************************
677*c83a76b0SSuyog Pawar *  @fn     hme_coarse_dep_mngr_num_alloc()
678*c83a76b0SSuyog Pawar *
679*c83a76b0SSuyog Pawar *  @brief  returns number of memtabs that is required by Dep Mngr for hme module
680*c83a76b0SSuyog Pawar *
681*c83a76b0SSuyog Pawar *  @return   Number of memtabs required
682*c83a76b0SSuyog Pawar ********************************************************************************
683*c83a76b0SSuyog Pawar */
hme_coarse_dep_mngr_num_alloc()684*c83a76b0SSuyog Pawar WORD32 hme_coarse_dep_mngr_num_alloc()
685*c83a76b0SSuyog Pawar {
686*c83a76b0SSuyog Pawar     return ((WORD32)((MAX_NUM_HME_LAYERS - 1) * ihevce_dmgr_get_num_mem_recs()));
687*c83a76b0SSuyog Pawar }
688*c83a76b0SSuyog Pawar 
hme_validate_init_prms(hme_init_prms_t * ps_prms)689*c83a76b0SSuyog Pawar S32 hme_validate_init_prms(hme_init_prms_t *ps_prms)
690*c83a76b0SSuyog Pawar {
691*c83a76b0SSuyog Pawar     S32 n_layers = ps_prms->num_simulcast_layers;
692*c83a76b0SSuyog Pawar 
693*c83a76b0SSuyog Pawar     /* The final layer has got to be a non encode coarse layer */
694*c83a76b0SSuyog Pawar     if(n_layers > (MAX_NUM_LAYERS - 1))
695*c83a76b0SSuyog Pawar         return (-1);
696*c83a76b0SSuyog Pawar 
697*c83a76b0SSuyog Pawar     if(n_layers < 1)
698*c83a76b0SSuyog Pawar         return (-1);
699*c83a76b0SSuyog Pawar 
700*c83a76b0SSuyog Pawar     /* Width of the coarsest encode layer got to be >= 2*min_wd where min_Wd */
701*c83a76b0SSuyog Pawar     /* represents the min allowed width in any layer. Ditto with ht          */
702*c83a76b0SSuyog Pawar     if(ps_prms->a_wd[n_layers - 1] < 2 * (MIN_WD_COARSE))
703*c83a76b0SSuyog Pawar         return (-1);
704*c83a76b0SSuyog Pawar     if(ps_prms->a_ht[n_layers - 1] < 2 * (MIN_HT_COARSE))
705*c83a76b0SSuyog Pawar         return (-1);
706*c83a76b0SSuyog Pawar     if(ps_prms->max_num_ref > MAX_NUM_REF)
707*c83a76b0SSuyog Pawar         return (-1);
708*c83a76b0SSuyog Pawar     if(ps_prms->max_num_ref < 0)
709*c83a76b0SSuyog Pawar         return (-1);
710*c83a76b0SSuyog Pawar 
711*c83a76b0SSuyog Pawar     return (0);
712*c83a76b0SSuyog Pawar }
hme_set_layer_res_attrs(layer_ctxt_t * ps_layer,S32 wd,S32 ht,S32 disp_wd,S32 disp_ht,U08 u1_enc)713*c83a76b0SSuyog Pawar void hme_set_layer_res_attrs(
714*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_layer, S32 wd, S32 ht, S32 disp_wd, S32 disp_ht, U08 u1_enc)
715*c83a76b0SSuyog Pawar {
716*c83a76b0SSuyog Pawar     ps_layer->i4_wd = wd;
717*c83a76b0SSuyog Pawar     ps_layer->i4_ht = ht;
718*c83a76b0SSuyog Pawar     ps_layer->i4_disp_wd = disp_wd;
719*c83a76b0SSuyog Pawar     ps_layer->i4_disp_ht = disp_ht;
720*c83a76b0SSuyog Pawar     if(0 == u1_enc)
721*c83a76b0SSuyog Pawar     {
722*c83a76b0SSuyog Pawar         ps_layer->i4_inp_stride = wd + 32 + 4;
723*c83a76b0SSuyog Pawar         ps_layer->i4_inp_offset = (ps_layer->i4_inp_stride * 16) + 16;
724*c83a76b0SSuyog Pawar         ps_layer->i4_pad_x_inp = 16;
725*c83a76b0SSuyog Pawar         ps_layer->i4_pad_y_inp = 16;
726*c83a76b0SSuyog Pawar         ps_layer->pu1_inp = ps_layer->pu1_inp_base + ps_layer->i4_inp_offset;
727*c83a76b0SSuyog Pawar     }
728*c83a76b0SSuyog Pawar }
729*c83a76b0SSuyog Pawar 
730*c83a76b0SSuyog Pawar /**
731*c83a76b0SSuyog Pawar ********************************************************************************
732*c83a76b0SSuyog Pawar *  @fn     hme_coarse_get_layer1_mv_bank_ref_idx_size()
733*c83a76b0SSuyog Pawar *
734*c83a76b0SSuyog Pawar *  @brief  returns the MV bank and ref idx size of Layer 1 (penultimate)
735*c83a76b0SSuyog Pawar *
736*c83a76b0SSuyog Pawar *  @return   none
737*c83a76b0SSuyog Pawar ********************************************************************************
738*c83a76b0SSuyog Pawar */
hme_coarse_get_layer1_mv_bank_ref_idx_size(S32 n_tot_layers,S32 * a_wd,S32 * a_ht,S32 max_num_ref,S32 * pi4_mv_bank_size,S32 * pi4_ref_idx_size)739*c83a76b0SSuyog Pawar void hme_coarse_get_layer1_mv_bank_ref_idx_size(
740*c83a76b0SSuyog Pawar     S32 n_tot_layers,
741*c83a76b0SSuyog Pawar     S32 *a_wd,
742*c83a76b0SSuyog Pawar     S32 *a_ht,
743*c83a76b0SSuyog Pawar     S32 max_num_ref,
744*c83a76b0SSuyog Pawar     S32 *pi4_mv_bank_size,
745*c83a76b0SSuyog Pawar     S32 *pi4_ref_idx_size)
746*c83a76b0SSuyog Pawar {
747*c83a76b0SSuyog Pawar     S32 num_blks, num_mvs_per_blk, num_ref;
748*c83a76b0SSuyog Pawar     S32 num_cols, num_rows, num_mvs_per_row;
749*c83a76b0SSuyog Pawar     S32 is_explicit_store = 1;
750*c83a76b0SSuyog Pawar     S32 wd, ht, num_layers_explicit_search;
751*c83a76b0SSuyog Pawar     S32 num_results, use_4x4;
752*c83a76b0SSuyog Pawar     wd = a_wd[1];
753*c83a76b0SSuyog Pawar     ht = a_ht[1];
754*c83a76b0SSuyog Pawar 
755*c83a76b0SSuyog Pawar     /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */
756*c83a76b0SSuyog Pawar     /* frames in all but final layer In final layer, it could be 1/2 */
757*c83a76b0SSuyog Pawar     //ps_hme_init_prms->num_layers_explicit_search = 3;
758*c83a76b0SSuyog Pawar     num_layers_explicit_search = 3;
759*c83a76b0SSuyog Pawar 
760*c83a76b0SSuyog Pawar     if(num_layers_explicit_search <= 0)
761*c83a76b0SSuyog Pawar         num_layers_explicit_search = n_tot_layers - 1;
762*c83a76b0SSuyog Pawar 
763*c83a76b0SSuyog Pawar     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
764*c83a76b0SSuyog Pawar 
765*c83a76b0SSuyog Pawar     /* Possibly implicit search for lower (finer) layers */
766*c83a76b0SSuyog Pawar     if(n_tot_layers - 1 > num_layers_explicit_search)
767*c83a76b0SSuyog Pawar         is_explicit_store = 0;
768*c83a76b0SSuyog Pawar 
769*c83a76b0SSuyog Pawar     /* coarsest layer alwasy uses 4x4 blks to store results */
770*c83a76b0SSuyog Pawar     if(1 == (n_tot_layers - 1))
771*c83a76b0SSuyog Pawar     {
772*c83a76b0SSuyog Pawar         /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */
773*c83a76b0SSuyog Pawar         //ps_hme_init_prms->max_num_results_coarse = 4;
774*c83a76b0SSuyog Pawar         //vijay : with new algo in coarseset layer this has to be revisited
775*c83a76b0SSuyog Pawar         num_results = 4;
776*c83a76b0SSuyog Pawar     }
777*c83a76b0SSuyog Pawar     else
778*c83a76b0SSuyog Pawar     {
779*c83a76b0SSuyog Pawar         /* Every refinement layer stores a max of 2 results per partition */
780*c83a76b0SSuyog Pawar         //ps_hme_init_prms->max_num_results = 2;
781*c83a76b0SSuyog Pawar         num_results = 2;
782*c83a76b0SSuyog Pawar     }
783*c83a76b0SSuyog Pawar     use_4x4 = hme_get_mv_blk_size(1, 1, n_tot_layers, 0);
784*c83a76b0SSuyog Pawar 
785*c83a76b0SSuyog Pawar     num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
786*c83a76b0SSuyog Pawar     num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
787*c83a76b0SSuyog Pawar 
788*c83a76b0SSuyog Pawar     if(is_explicit_store)
789*c83a76b0SSuyog Pawar         num_ref = max_num_ref;
790*c83a76b0SSuyog Pawar     else
791*c83a76b0SSuyog Pawar         num_ref = 2;
792*c83a76b0SSuyog Pawar 
793*c83a76b0SSuyog Pawar     num_blks = num_cols * num_rows;
794*c83a76b0SSuyog Pawar     num_mvs_per_blk = num_ref * num_results;
795*c83a76b0SSuyog Pawar     num_mvs_per_row = num_mvs_per_blk * num_cols;
796*c83a76b0SSuyog Pawar 
797*c83a76b0SSuyog Pawar     /* stroe the sizes */
798*c83a76b0SSuyog Pawar     *pi4_mv_bank_size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
799*c83a76b0SSuyog Pawar     *pi4_ref_idx_size = num_blks * num_mvs_per_blk * sizeof(S08);
800*c83a76b0SSuyog Pawar 
801*c83a76b0SSuyog Pawar     return;
802*c83a76b0SSuyog Pawar }
803*c83a76b0SSuyog Pawar /**
804*c83a76b0SSuyog Pawar ********************************************************************************
805*c83a76b0SSuyog Pawar *  @fn     hme_alloc_init_layer_mv_bank()
806*c83a76b0SSuyog Pawar *
807*c83a76b0SSuyog Pawar *  @brief  memory alloc and init function for MV bank
808*c83a76b0SSuyog Pawar *
809*c83a76b0SSuyog Pawar *  @return   Number of memtabs required
810*c83a76b0SSuyog Pawar ********************************************************************************
811*c83a76b0SSuyog Pawar */
hme_alloc_init_layer_mv_bank(hme_memtab_t * ps_memtab,S32 max_num_results,S32 max_num_ref,S32 use_4x4,S32 mem_avail,S32 u1_enc,S32 wd,S32 ht,S32 is_explicit_store,hme_mv_t ** pps_mv_base,S08 ** pi1_ref_idx_base,S32 * pi4_num_mvs_per_row)812*c83a76b0SSuyog Pawar S32 hme_alloc_init_layer_mv_bank(
813*c83a76b0SSuyog Pawar     hme_memtab_t *ps_memtab,
814*c83a76b0SSuyog Pawar     S32 max_num_results,
815*c83a76b0SSuyog Pawar     S32 max_num_ref,
816*c83a76b0SSuyog Pawar     S32 use_4x4,
817*c83a76b0SSuyog Pawar     S32 mem_avail,
818*c83a76b0SSuyog Pawar     S32 u1_enc,
819*c83a76b0SSuyog Pawar     S32 wd,
820*c83a76b0SSuyog Pawar     S32 ht,
821*c83a76b0SSuyog Pawar     S32 is_explicit_store,
822*c83a76b0SSuyog Pawar     hme_mv_t **pps_mv_base,
823*c83a76b0SSuyog Pawar     S08 **pi1_ref_idx_base,
824*c83a76b0SSuyog Pawar     S32 *pi4_num_mvs_per_row)
825*c83a76b0SSuyog Pawar {
826*c83a76b0SSuyog Pawar     S32 count = 0;
827*c83a76b0SSuyog Pawar     S32 size;
828*c83a76b0SSuyog Pawar     S32 num_blks, num_mvs_per_blk;
829*c83a76b0SSuyog Pawar     S32 num_ref;
830*c83a76b0SSuyog Pawar     S32 num_cols, num_rows, num_mvs_per_row;
831*c83a76b0SSuyog Pawar 
832*c83a76b0SSuyog Pawar     if(is_explicit_store)
833*c83a76b0SSuyog Pawar         num_ref = max_num_ref;
834*c83a76b0SSuyog Pawar     else
835*c83a76b0SSuyog Pawar         num_ref = 2;
836*c83a76b0SSuyog Pawar 
837*c83a76b0SSuyog Pawar     /* MV Bank allocation takes into consideration following */
838*c83a76b0SSuyog Pawar     /* number of results per reference x max num refrences is the amount     */
839*c83a76b0SSuyog Pawar     /* bufffered up per blk. Numbero f blks in pic deps on the blk size,     */
840*c83a76b0SSuyog Pawar     /* which could be either 4x4 or 8x8.                                     */
841*c83a76b0SSuyog Pawar     num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
842*c83a76b0SSuyog Pawar     num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
843*c83a76b0SSuyog Pawar 
844*c83a76b0SSuyog Pawar     if(u1_enc)
845*c83a76b0SSuyog Pawar     {
846*c83a76b0SSuyog Pawar         /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
847*c83a76b0SSuyog Pawar         WORD32 num_ctb_cols = ((wd + 63) >> 6);
848*c83a76b0SSuyog Pawar         WORD32 num_ctb_rows = ((ht + 63) >> 6);
849*c83a76b0SSuyog Pawar 
850*c83a76b0SSuyog Pawar         num_cols = (num_ctb_cols << 3) + 2;
851*c83a76b0SSuyog Pawar         num_rows = (num_ctb_rows << 3) + 2;
852*c83a76b0SSuyog Pawar     }
853*c83a76b0SSuyog Pawar     num_blks = num_cols * num_rows;
854*c83a76b0SSuyog Pawar     num_mvs_per_blk = num_ref * max_num_results;
855*c83a76b0SSuyog Pawar     num_mvs_per_row = num_mvs_per_blk * num_cols;
856*c83a76b0SSuyog Pawar 
857*c83a76b0SSuyog Pawar     size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
858*c83a76b0SSuyog Pawar     if(mem_avail)
859*c83a76b0SSuyog Pawar     {
860*c83a76b0SSuyog Pawar         /* store this for run time verifications */
861*c83a76b0SSuyog Pawar         *pi4_num_mvs_per_row = num_mvs_per_row;
862*c83a76b0SSuyog Pawar         ASSERT(ps_memtab[count].size == size);
863*c83a76b0SSuyog Pawar         *pps_mv_base = (hme_mv_t *)ps_memtab[count].pu1_mem;
864*c83a76b0SSuyog Pawar     }
865*c83a76b0SSuyog Pawar     else
866*c83a76b0SSuyog Pawar     {
867*c83a76b0SSuyog Pawar         ps_memtab[count].size = size;
868*c83a76b0SSuyog Pawar         ps_memtab[count].align = 4;
869*c83a76b0SSuyog Pawar         ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
870*c83a76b0SSuyog Pawar     }
871*c83a76b0SSuyog Pawar 
872*c83a76b0SSuyog Pawar     count++;
873*c83a76b0SSuyog Pawar     /* Ref idx takes the same route as mvbase */
874*c83a76b0SSuyog Pawar 
875*c83a76b0SSuyog Pawar     size = num_blks * num_mvs_per_blk * sizeof(S08);
876*c83a76b0SSuyog Pawar     if(mem_avail)
877*c83a76b0SSuyog Pawar     {
878*c83a76b0SSuyog Pawar         ASSERT(ps_memtab[count].size == size);
879*c83a76b0SSuyog Pawar         *pi1_ref_idx_base = (S08 *)ps_memtab[count].pu1_mem;
880*c83a76b0SSuyog Pawar     }
881*c83a76b0SSuyog Pawar     else
882*c83a76b0SSuyog Pawar     {
883*c83a76b0SSuyog Pawar         ps_memtab[count].size = size;
884*c83a76b0SSuyog Pawar         ps_memtab[count].align = 4;
885*c83a76b0SSuyog Pawar         ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
886*c83a76b0SSuyog Pawar     }
887*c83a76b0SSuyog Pawar     count++;
888*c83a76b0SSuyog Pawar 
889*c83a76b0SSuyog Pawar     return (count);
890*c83a76b0SSuyog Pawar }
891*c83a76b0SSuyog Pawar /**
892*c83a76b0SSuyog Pawar ********************************************************************************
893*c83a76b0SSuyog Pawar *  @fn     hme_alloc_init_layer()
894*c83a76b0SSuyog Pawar *
895*c83a76b0SSuyog Pawar *  @brief  memory alloc and init function
896*c83a76b0SSuyog Pawar *
897*c83a76b0SSuyog Pawar *  @return   Number of memtabs required
898*c83a76b0SSuyog Pawar ********************************************************************************
899*c83a76b0SSuyog Pawar */
hme_alloc_init_layer(hme_memtab_t * ps_memtab,S32 max_num_results,S32 max_num_ref,S32 use_4x4,S32 mem_avail,S32 u1_enc,S32 wd,S32 ht,S32 disp_wd,S32 disp_ht,S32 segment_layer,S32 is_explicit_store,layer_ctxt_t ** pps_layer)900*c83a76b0SSuyog Pawar S32 hme_alloc_init_layer(
901*c83a76b0SSuyog Pawar     hme_memtab_t *ps_memtab,
902*c83a76b0SSuyog Pawar     S32 max_num_results,
903*c83a76b0SSuyog Pawar     S32 max_num_ref,
904*c83a76b0SSuyog Pawar     S32 use_4x4,
905*c83a76b0SSuyog Pawar     S32 mem_avail,
906*c83a76b0SSuyog Pawar     S32 u1_enc,
907*c83a76b0SSuyog Pawar     S32 wd,
908*c83a76b0SSuyog Pawar     S32 ht,
909*c83a76b0SSuyog Pawar     S32 disp_wd,
910*c83a76b0SSuyog Pawar     S32 disp_ht,
911*c83a76b0SSuyog Pawar     S32 segment_layer,
912*c83a76b0SSuyog Pawar     S32 is_explicit_store,
913*c83a76b0SSuyog Pawar     layer_ctxt_t **pps_layer)
914*c83a76b0SSuyog Pawar {
915*c83a76b0SSuyog Pawar     S32 count = 0;
916*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_layer = NULL;
917*c83a76b0SSuyog Pawar     S32 size;
918*c83a76b0SSuyog Pawar     S32 num_ref;
919*c83a76b0SSuyog Pawar 
920*c83a76b0SSuyog Pawar     ARG_NOT_USED(segment_layer);
921*c83a76b0SSuyog Pawar 
922*c83a76b0SSuyog Pawar     if(is_explicit_store)
923*c83a76b0SSuyog Pawar         num_ref = max_num_ref;
924*c83a76b0SSuyog Pawar     else
925*c83a76b0SSuyog Pawar         num_ref = 2;
926*c83a76b0SSuyog Pawar 
927*c83a76b0SSuyog Pawar     /* We do not store 4x4 results for encoding layers */
928*c83a76b0SSuyog Pawar     if(u1_enc)
929*c83a76b0SSuyog Pawar         use_4x4 = 0;
930*c83a76b0SSuyog Pawar 
931*c83a76b0SSuyog Pawar     size = sizeof(layer_ctxt_t);
932*c83a76b0SSuyog Pawar     if(mem_avail)
933*c83a76b0SSuyog Pawar     {
934*c83a76b0SSuyog Pawar         ASSERT(ps_memtab[count].size == size);
935*c83a76b0SSuyog Pawar         ps_layer = (layer_ctxt_t *)ps_memtab[count].pu1_mem;
936*c83a76b0SSuyog Pawar         *pps_layer = ps_layer;
937*c83a76b0SSuyog Pawar     }
938*c83a76b0SSuyog Pawar     else
939*c83a76b0SSuyog Pawar     {
940*c83a76b0SSuyog Pawar         ps_memtab[count].size = size;
941*c83a76b0SSuyog Pawar         ps_memtab[count].align = 8;
942*c83a76b0SSuyog Pawar         ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
943*c83a76b0SSuyog Pawar     }
944*c83a76b0SSuyog Pawar 
945*c83a76b0SSuyog Pawar     count++;
946*c83a76b0SSuyog Pawar 
947*c83a76b0SSuyog Pawar     /* Input luma buffer allocated only for non encode case */
948*c83a76b0SSuyog Pawar     if(0 == u1_enc)
949*c83a76b0SSuyog Pawar     {
950*c83a76b0SSuyog Pawar         /* Allocate input with padding of 16 pixels */
951*c83a76b0SSuyog Pawar         size = (wd + 32 + 4) * (ht + 32 + 4);
952*c83a76b0SSuyog Pawar         if(mem_avail)
953*c83a76b0SSuyog Pawar         {
954*c83a76b0SSuyog Pawar             ASSERT(ps_memtab[count].size == size);
955*c83a76b0SSuyog Pawar             ps_layer->pu1_inp_base = ps_memtab[count].pu1_mem;
956*c83a76b0SSuyog Pawar         }
957*c83a76b0SSuyog Pawar         else
958*c83a76b0SSuyog Pawar         {
959*c83a76b0SSuyog Pawar             ps_memtab[count].size = size;
960*c83a76b0SSuyog Pawar             ps_memtab[count].align = 16;
961*c83a76b0SSuyog Pawar             ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
962*c83a76b0SSuyog Pawar         }
963*c83a76b0SSuyog Pawar         count++;
964*c83a76b0SSuyog Pawar     }
965*c83a76b0SSuyog Pawar 
966*c83a76b0SSuyog Pawar     /* Allocate memory or just the layer mvbank strcture. */
967*c83a76b0SSuyog Pawar     /* TODO : see if this can be removed by moving it to layer_ctxt */
968*c83a76b0SSuyog Pawar     size = sizeof(layer_mv_t);
969*c83a76b0SSuyog Pawar 
970*c83a76b0SSuyog Pawar     if(mem_avail)
971*c83a76b0SSuyog Pawar     {
972*c83a76b0SSuyog Pawar         ASSERT(ps_memtab[count].size == size);
973*c83a76b0SSuyog Pawar         ps_layer->ps_layer_mvbank = (layer_mv_t *)ps_memtab[count].pu1_mem;
974*c83a76b0SSuyog Pawar     }
975*c83a76b0SSuyog Pawar     else
976*c83a76b0SSuyog Pawar     {
977*c83a76b0SSuyog Pawar         ps_memtab[count].size = size;
978*c83a76b0SSuyog Pawar         ps_memtab[count].align = 8;
979*c83a76b0SSuyog Pawar         ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
980*c83a76b0SSuyog Pawar     }
981*c83a76b0SSuyog Pawar 
982*c83a76b0SSuyog Pawar     count++;
983*c83a76b0SSuyog Pawar 
984*c83a76b0SSuyog Pawar     if(mem_avail)
985*c83a76b0SSuyog Pawar     {
986*c83a76b0SSuyog Pawar         hme_set_layer_res_attrs(ps_layer, wd, ht, disp_wd, disp_ht, u1_enc);
987*c83a76b0SSuyog Pawar     }
988*c83a76b0SSuyog Pawar 
989*c83a76b0SSuyog Pawar     return (count);
990*c83a76b0SSuyog Pawar }
991*c83a76b0SSuyog Pawar 
hme_alloc_init_search_nodes(search_results_t * ps_search_results,hme_memtab_t * ps_memtabs,S32 mem_avail,S32 max_num_ref,S32 max_num_results)992*c83a76b0SSuyog Pawar S32 hme_alloc_init_search_nodes(
993*c83a76b0SSuyog Pawar     search_results_t *ps_search_results,
994*c83a76b0SSuyog Pawar     hme_memtab_t *ps_memtabs,
995*c83a76b0SSuyog Pawar     S32 mem_avail,
996*c83a76b0SSuyog Pawar     S32 max_num_ref,
997*c83a76b0SSuyog Pawar     S32 max_num_results)
998*c83a76b0SSuyog Pawar {
999*c83a76b0SSuyog Pawar     S32 size = max_num_results * sizeof(search_node_t) * max_num_ref * TOT_NUM_PARTS;
1000*c83a76b0SSuyog Pawar     S32 j, k;
1001*c83a76b0SSuyog Pawar     search_node_t *ps_search_node;
1002*c83a76b0SSuyog Pawar 
1003*c83a76b0SSuyog Pawar     if(mem_avail == 0)
1004*c83a76b0SSuyog Pawar     {
1005*c83a76b0SSuyog Pawar         ps_memtabs->size = size;
1006*c83a76b0SSuyog Pawar         ps_memtabs->align = 4;
1007*c83a76b0SSuyog Pawar         ps_memtabs->e_mem_attr = HME_SCRATCH_OVLY_MEM;
1008*c83a76b0SSuyog Pawar         return (1);
1009*c83a76b0SSuyog Pawar     }
1010*c83a76b0SSuyog Pawar 
1011*c83a76b0SSuyog Pawar     ps_search_node = (search_node_t *)ps_memtabs->pu1_mem;
1012*c83a76b0SSuyog Pawar     ASSERT(ps_memtabs->size == size);
1013*c83a76b0SSuyog Pawar     /****************************************************************************/
1014*c83a76b0SSuyog Pawar     /* For each CU, we search and store N best results, per partition, per ref  */
1015*c83a76b0SSuyog Pawar     /* So, number of memtabs is  num_refs * num_parts                           */
1016*c83a76b0SSuyog Pawar     /****************************************************************************/
1017*c83a76b0SSuyog Pawar     for(j = 0; j < max_num_ref; j++)
1018*c83a76b0SSuyog Pawar     {
1019*c83a76b0SSuyog Pawar         for(k = 0; k < TOT_NUM_PARTS; k++)
1020*c83a76b0SSuyog Pawar         {
1021*c83a76b0SSuyog Pawar             ps_search_results->aps_part_results[j][k] = ps_search_node;
1022*c83a76b0SSuyog Pawar             ps_search_node += max_num_results;
1023*c83a76b0SSuyog Pawar         }
1024*c83a76b0SSuyog Pawar     }
1025*c83a76b0SSuyog Pawar     return (1);
1026*c83a76b0SSuyog Pawar }
1027*c83a76b0SSuyog Pawar 
hme_derive_num_layers(S32 n_enc_layers,S32 * p_wd,S32 * p_ht,S32 * p_disp_wd,S32 * p_disp_ht)1028*c83a76b0SSuyog Pawar S32 hme_derive_num_layers(S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 *p_disp_wd, S32 *p_disp_ht)
1029*c83a76b0SSuyog Pawar {
1030*c83a76b0SSuyog Pawar     S32 i;
1031*c83a76b0SSuyog Pawar     /* We keep downscaling by 2 till we hit one of the conditions:           */
1032*c83a76b0SSuyog Pawar     /* 1. MAX_NUM_LAYERS reached.                                            */
1033*c83a76b0SSuyog Pawar     /* 2. Width or ht goes below min width and ht allowed at coarsest layer  */
1034*c83a76b0SSuyog Pawar     ASSERT(n_enc_layers < MAX_NUM_LAYERS);
1035*c83a76b0SSuyog Pawar     ASSERT(n_enc_layers > 0);
1036*c83a76b0SSuyog Pawar     ASSERT(p_wd[0] <= HME_MAX_WIDTH);
1037*c83a76b0SSuyog Pawar     ASSERT(p_ht[0] <= HME_MAX_HEIGHT);
1038*c83a76b0SSuyog Pawar 
1039*c83a76b0SSuyog Pawar     p_disp_wd[0] = p_wd[0];
1040*c83a76b0SSuyog Pawar     p_disp_ht[0] = p_ht[0];
1041*c83a76b0SSuyog Pawar     /*************************************************************************/
1042*c83a76b0SSuyog Pawar     /* Verify that for simulcast, lower layer to higher layer ratio is bet   */
1043*c83a76b0SSuyog Pawar     /* 2 (dyadic) and 1.33. Typically it should be 1.5.                      */
1044*c83a76b0SSuyog Pawar     /* TODO : for interlace, we may choose to have additional downscaling for*/
1045*c83a76b0SSuyog Pawar     /* width alone in coarsest layer to next layer.                          */
1046*c83a76b0SSuyog Pawar     /*************************************************************************/
1047*c83a76b0SSuyog Pawar     for(i = 1; i < n_enc_layers; i++)
1048*c83a76b0SSuyog Pawar     {
1049*c83a76b0SSuyog Pawar         S32 wd1, wd2, ht1, ht2;
1050*c83a76b0SSuyog Pawar         wd1 = FLOOR16(p_wd[i - 1] >> 1);
1051*c83a76b0SSuyog Pawar         wd2 = CEIL16((p_wd[i - 1] * 3) >> 2);
1052*c83a76b0SSuyog Pawar         ASSERT(p_wd[i] >= wd1);
1053*c83a76b0SSuyog Pawar         ASSERT(p_wd[i] <= wd2);
1054*c83a76b0SSuyog Pawar         ht1 = FLOOR16(p_ht[i - 1] >> 1);
1055*c83a76b0SSuyog Pawar         ht2 = CEIL16((p_ht[i - 1] * 3) >> 2);
1056*c83a76b0SSuyog Pawar         ASSERT(p_ht[i] >= ht1);
1057*c83a76b0SSuyog Pawar         ASSERT(p_ht[i] <= ht2);
1058*c83a76b0SSuyog Pawar     }
1059*c83a76b0SSuyog Pawar     ASSERT(p_wd[n_enc_layers - 1] >= 2 * MIN_WD_COARSE);
1060*c83a76b0SSuyog Pawar     ASSERT(p_ht[n_enc_layers - 1] >= 2 * MIN_HT_COARSE);
1061*c83a76b0SSuyog Pawar 
1062*c83a76b0SSuyog Pawar     for(i = n_enc_layers; i < MAX_NUM_LAYERS; i++)
1063*c83a76b0SSuyog Pawar     {
1064*c83a76b0SSuyog Pawar         if((p_wd[i - 1] < 2 * MIN_WD_COARSE) || (p_ht[i - 1] < 2 * MIN_HT_COARSE))
1065*c83a76b0SSuyog Pawar         {
1066*c83a76b0SSuyog Pawar             return (i);
1067*c83a76b0SSuyog Pawar         }
1068*c83a76b0SSuyog Pawar         /* Use CEIL16 to facilitate 16x16 searches in future, or to do       */
1069*c83a76b0SSuyog Pawar         /* segmentation study in future                                      */
1070*c83a76b0SSuyog Pawar         p_wd[i] = CEIL16(p_wd[i - 1] >> 1);
1071*c83a76b0SSuyog Pawar         p_ht[i] = CEIL16(p_ht[i - 1] >> 1);
1072*c83a76b0SSuyog Pawar 
1073*c83a76b0SSuyog Pawar         p_disp_wd[i] = p_disp_wd[i - 1] >> 1;
1074*c83a76b0SSuyog Pawar         p_disp_ht[i] = p_disp_ht[i - 1] >> 1;
1075*c83a76b0SSuyog Pawar     }
1076*c83a76b0SSuyog Pawar     return (i);
1077*c83a76b0SSuyog Pawar }
1078*c83a76b0SSuyog Pawar 
1079*c83a76b0SSuyog Pawar /**
1080*c83a76b0SSuyog Pawar ********************************************************************************
1081*c83a76b0SSuyog Pawar *  @fn     hme_get_mv_blk_size()
1082*c83a76b0SSuyog Pawar *
1083*c83a76b0SSuyog Pawar *  @brief  returns whether blk uses 4x4 size or something else.
1084*c83a76b0SSuyog Pawar *
1085*c83a76b0SSuyog Pawar *  @param[in] enable_4x4 : input param from application to enable 4x4
1086*c83a76b0SSuyog Pawar *
1087*c83a76b0SSuyog Pawar *  @param[in] layer_id : id of current layer (0 finest)
1088*c83a76b0SSuyog Pawar *
1089*c83a76b0SSuyog Pawar *  @param[in] num_layeers : total num layers
1090*c83a76b0SSuyog Pawar *
1091*c83a76b0SSuyog Pawar *  @param[in] is_enc : Whether encoding enabled for layer
1092*c83a76b0SSuyog Pawar *
1093*c83a76b0SSuyog Pawar *  @return   1 for 4x4 blks, 0 for 8x8
1094*c83a76b0SSuyog Pawar ********************************************************************************
1095*c83a76b0SSuyog Pawar */
hme_get_mv_blk_size(S32 enable_4x4,S32 layer_id,S32 num_layers,S32 is_enc)1096*c83a76b0SSuyog Pawar S32 hme_get_mv_blk_size(S32 enable_4x4, S32 layer_id, S32 num_layers, S32 is_enc)
1097*c83a76b0SSuyog Pawar {
1098*c83a76b0SSuyog Pawar     S32 use_4x4 = enable_4x4;
1099*c83a76b0SSuyog Pawar 
1100*c83a76b0SSuyog Pawar     if((layer_id <= 1) && (num_layers >= 4))
1101*c83a76b0SSuyog Pawar         use_4x4 = USE_4x4_IN_L1;
1102*c83a76b0SSuyog Pawar     if(layer_id == num_layers - 1)
1103*c83a76b0SSuyog Pawar         use_4x4 = 1;
1104*c83a76b0SSuyog Pawar     if(is_enc)
1105*c83a76b0SSuyog Pawar         use_4x4 = 0;
1106*c83a76b0SSuyog Pawar 
1107*c83a76b0SSuyog Pawar     return (use_4x4);
1108*c83a76b0SSuyog Pawar }
1109*c83a76b0SSuyog Pawar 
1110*c83a76b0SSuyog Pawar /**
1111*c83a76b0SSuyog Pawar ********************************************************************************
1112*c83a76b0SSuyog Pawar *  @fn     hme_enc_alloc_init_mem()
1113*c83a76b0SSuyog Pawar *
1114*c83a76b0SSuyog Pawar *  @brief  Requests/ assign memory based on mem avail
1115*c83a76b0SSuyog Pawar *
1116*c83a76b0SSuyog Pawar *  @param[in] ps_memtabs : memtab array
1117*c83a76b0SSuyog Pawar *
1118*c83a76b0SSuyog Pawar *  @param[in] ps_prms : init prms
1119*c83a76b0SSuyog Pawar *
1120*c83a76b0SSuyog Pawar *  @param[in] pv_ctxt : ME ctxt
1121*c83a76b0SSuyog Pawar *
1122*c83a76b0SSuyog Pawar *  @param[in] mem_avail : request/assign flag
1123*c83a76b0SSuyog Pawar *
1124*c83a76b0SSuyog Pawar *  @return   1 for 4x4 blks, 0 for 8x8
1125*c83a76b0SSuyog Pawar ********************************************************************************
1126*c83a76b0SSuyog Pawar */
hme_enc_alloc_init_mem(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,void * pv_ctxt,S32 mem_avail,S32 i4_num_me_frm_pllel)1127*c83a76b0SSuyog Pawar S32 hme_enc_alloc_init_mem(
1128*c83a76b0SSuyog Pawar     hme_memtab_t *ps_memtabs,
1129*c83a76b0SSuyog Pawar     hme_init_prms_t *ps_prms,
1130*c83a76b0SSuyog Pawar     void *pv_ctxt,
1131*c83a76b0SSuyog Pawar     S32 mem_avail,
1132*c83a76b0SSuyog Pawar     S32 i4_num_me_frm_pllel)
1133*c83a76b0SSuyog Pawar {
1134*c83a76b0SSuyog Pawar     me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_ctxt;
1135*c83a76b0SSuyog Pawar     me_ctxt_t *ps_ctxt;
1136*c83a76b0SSuyog Pawar     S32 count = 0, size, i, j, use_4x4;
1137*c83a76b0SSuyog Pawar     S32 n_tot_layers, n_enc_layers;
1138*c83a76b0SSuyog Pawar     S32 num_layers_explicit_search;
1139*c83a76b0SSuyog Pawar     S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
1140*c83a76b0SSuyog Pawar     S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
1141*c83a76b0SSuyog Pawar     S32 num_results;
1142*c83a76b0SSuyog Pawar     S32 num_thrds;
1143*c83a76b0SSuyog Pawar     S32 ctb_wd = 1 << ps_prms->log_ctb_size;
1144*c83a76b0SSuyog Pawar 
1145*c83a76b0SSuyog Pawar     /* MV bank changes */
1146*c83a76b0SSuyog Pawar     hme_mv_t *aps_mv_bank[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
1147*c83a76b0SSuyog Pawar     S32 i4_num_mvs_per_row = 0;
1148*c83a76b0SSuyog Pawar     S08 *api1_ref_idx[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
1149*c83a76b0SSuyog Pawar 
1150*c83a76b0SSuyog Pawar     n_enc_layers = ps_prms->num_simulcast_layers;
1151*c83a76b0SSuyog Pawar 
1152*c83a76b0SSuyog Pawar     /* Memtab 0: handle */
1153*c83a76b0SSuyog Pawar     size = sizeof(me_master_ctxt_t);
1154*c83a76b0SSuyog Pawar     if(mem_avail)
1155*c83a76b0SSuyog Pawar     {
1156*c83a76b0SSuyog Pawar         /* store the number of processing threads */
1157*c83a76b0SSuyog Pawar         ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
1158*c83a76b0SSuyog Pawar     }
1159*c83a76b0SSuyog Pawar     else
1160*c83a76b0SSuyog Pawar     {
1161*c83a76b0SSuyog Pawar         ps_memtabs[count].size = size;
1162*c83a76b0SSuyog Pawar         ps_memtabs[count].align = 8;
1163*c83a76b0SSuyog Pawar         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1164*c83a76b0SSuyog Pawar     }
1165*c83a76b0SSuyog Pawar 
1166*c83a76b0SSuyog Pawar     count++;
1167*c83a76b0SSuyog Pawar 
1168*c83a76b0SSuyog Pawar     /* Memtab 1: ME threads ctxt */
1169*c83a76b0SSuyog Pawar     size = ps_prms->i4_num_proc_thrds * sizeof(me_ctxt_t);
1170*c83a76b0SSuyog Pawar     if(mem_avail)
1171*c83a76b0SSuyog Pawar     {
1172*c83a76b0SSuyog Pawar         me_ctxt_t *ps_me_tmp_ctxt = (me_ctxt_t *)ps_memtabs[count].pu1_mem;
1173*c83a76b0SSuyog Pawar 
1174*c83a76b0SSuyog Pawar         /* store the indivisual thread ctxt pointers */
1175*c83a76b0SSuyog Pawar         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1176*c83a76b0SSuyog Pawar         {
1177*c83a76b0SSuyog Pawar             ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
1178*c83a76b0SSuyog Pawar         }
1179*c83a76b0SSuyog Pawar     }
1180*c83a76b0SSuyog Pawar     else
1181*c83a76b0SSuyog Pawar     {
1182*c83a76b0SSuyog Pawar         ps_memtabs[count].size = size;
1183*c83a76b0SSuyog Pawar         ps_memtabs[count].align = 8;
1184*c83a76b0SSuyog Pawar         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1185*c83a76b0SSuyog Pawar     }
1186*c83a76b0SSuyog Pawar 
1187*c83a76b0SSuyog Pawar     count++;
1188*c83a76b0SSuyog Pawar 
1189*c83a76b0SSuyog Pawar     /* Memtab 2: ME frame ctxts */
1190*c83a76b0SSuyog Pawar     size = sizeof(me_frm_ctxt_t) * MAX_NUM_ME_PARALLEL * ps_prms->i4_num_proc_thrds;
1191*c83a76b0SSuyog Pawar     if(mem_avail)
1192*c83a76b0SSuyog Pawar     {
1193*c83a76b0SSuyog Pawar         me_frm_ctxt_t *ps_me_frm_tmp_ctxt = (me_frm_ctxt_t *)ps_memtabs[count].pu1_mem;
1194*c83a76b0SSuyog Pawar 
1195*c83a76b0SSuyog Pawar         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1196*c83a76b0SSuyog Pawar         {
1197*c83a76b0SSuyog Pawar             /* store the indivisual thread ctxt pointers */
1198*c83a76b0SSuyog Pawar             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1199*c83a76b0SSuyog Pawar             {
1200*c83a76b0SSuyog Pawar                 ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[i] = ps_me_frm_tmp_ctxt;
1201*c83a76b0SSuyog Pawar 
1202*c83a76b0SSuyog Pawar                 ps_me_frm_tmp_ctxt++;
1203*c83a76b0SSuyog Pawar             }
1204*c83a76b0SSuyog Pawar         }
1205*c83a76b0SSuyog Pawar     }
1206*c83a76b0SSuyog Pawar     else
1207*c83a76b0SSuyog Pawar     {
1208*c83a76b0SSuyog Pawar         ps_memtabs[count].size = size;
1209*c83a76b0SSuyog Pawar         ps_memtabs[count].align = 8;
1210*c83a76b0SSuyog Pawar         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1211*c83a76b0SSuyog Pawar     }
1212*c83a76b0SSuyog Pawar 
1213*c83a76b0SSuyog Pawar     count++;
1214*c83a76b0SSuyog Pawar 
1215*c83a76b0SSuyog Pawar     memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
1216*c83a76b0SSuyog Pawar     memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
1217*c83a76b0SSuyog Pawar     /*************************************************************************/
1218*c83a76b0SSuyog Pawar     /* Derive the number of HME layers, including both encoded and non encode*/
1219*c83a76b0SSuyog Pawar     /* This function also derives the width and ht of each layer.            */
1220*c83a76b0SSuyog Pawar     /*************************************************************************/
1221*c83a76b0SSuyog Pawar     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
1222*c83a76b0SSuyog Pawar     num_layers_explicit_search = ps_prms->num_layers_explicit_search;
1223*c83a76b0SSuyog Pawar     if(num_layers_explicit_search <= 0)
1224*c83a76b0SSuyog Pawar         num_layers_explicit_search = n_tot_layers - 1;
1225*c83a76b0SSuyog Pawar 
1226*c83a76b0SSuyog Pawar     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
1227*c83a76b0SSuyog Pawar 
1228*c83a76b0SSuyog Pawar     if(mem_avail)
1229*c83a76b0SSuyog Pawar     {
1230*c83a76b0SSuyog Pawar         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1231*c83a76b0SSuyog Pawar         {
1232*c83a76b0SSuyog Pawar             me_frm_ctxt_t *ps_frm_ctxt;
1233*c83a76b0SSuyog Pawar             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1234*c83a76b0SSuyog Pawar 
1235*c83a76b0SSuyog Pawar             for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1236*c83a76b0SSuyog Pawar             {
1237*c83a76b0SSuyog Pawar                 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1238*c83a76b0SSuyog Pawar 
1239*c83a76b0SSuyog Pawar                 memset(ps_frm_ctxt->u1_encode, 0, n_tot_layers);
1240*c83a76b0SSuyog Pawar                 memset(ps_frm_ctxt->u1_encode, 1, n_enc_layers);
1241*c83a76b0SSuyog Pawar 
1242*c83a76b0SSuyog Pawar                 /* only one enocde layer is used */
1243*c83a76b0SSuyog Pawar                 ps_frm_ctxt->num_layers = 1;
1244*c83a76b0SSuyog Pawar 
1245*c83a76b0SSuyog Pawar                 ps_frm_ctxt->i4_wd = a_wd[0];
1246*c83a76b0SSuyog Pawar                 ps_frm_ctxt->i4_ht = a_ht[0];
1247*c83a76b0SSuyog Pawar                 /*
1248*c83a76b0SSuyog Pawar             memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32)*n_tot_layers);
1249*c83a76b0SSuyog Pawar             memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32)*n_tot_layers);
1250*c83a76b0SSuyog Pawar */
1251*c83a76b0SSuyog Pawar                 ps_frm_ctxt->num_layers_explicit_search = num_layers_explicit_search;
1252*c83a76b0SSuyog Pawar                 ps_frm_ctxt->max_num_results = ps_prms->max_num_results;
1253*c83a76b0SSuyog Pawar                 ps_frm_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
1254*c83a76b0SSuyog Pawar                 ps_frm_ctxt->max_num_ref = ps_prms->max_num_ref;
1255*c83a76b0SSuyog Pawar             }
1256*c83a76b0SSuyog Pawar         }
1257*c83a76b0SSuyog Pawar     }
1258*c83a76b0SSuyog Pawar 
1259*c83a76b0SSuyog Pawar     /* Memtabs : Layers MV bank for encode layer */
1260*c83a76b0SSuyog Pawar     /* Each ref_desr in master ctxt will have seperate layer ctxt */
1261*c83a76b0SSuyog Pawar 
1262*c83a76b0SSuyog Pawar     for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1263*c83a76b0SSuyog Pawar     {
1264*c83a76b0SSuyog Pawar         for(j = 0; j < 1; j++)
1265*c83a76b0SSuyog Pawar         {
1266*c83a76b0SSuyog Pawar             S32 is_explicit_store = 1;
1267*c83a76b0SSuyog Pawar             S32 wd, ht;
1268*c83a76b0SSuyog Pawar             U08 u1_enc = 1;
1269*c83a76b0SSuyog Pawar             wd = a_wd[j];
1270*c83a76b0SSuyog Pawar             ht = a_ht[j];
1271*c83a76b0SSuyog Pawar 
1272*c83a76b0SSuyog Pawar             /* Possibly implicit search for lower (finer) layers */
1273*c83a76b0SSuyog Pawar             if(n_tot_layers - j > num_layers_explicit_search)
1274*c83a76b0SSuyog Pawar                 is_explicit_store = 0;
1275*c83a76b0SSuyog Pawar 
1276*c83a76b0SSuyog Pawar             /* Even if explicit search, we store only 2 results (L0 and L1) */
1277*c83a76b0SSuyog Pawar             /* in finest layer */
1278*c83a76b0SSuyog Pawar             if(j == 0)
1279*c83a76b0SSuyog Pawar             {
1280*c83a76b0SSuyog Pawar                 is_explicit_store = 0;
1281*c83a76b0SSuyog Pawar             }
1282*c83a76b0SSuyog Pawar 
1283*c83a76b0SSuyog Pawar             /* coarsest layer alwasy uses 4x4 blks to store results */
1284*c83a76b0SSuyog Pawar             if(j == n_tot_layers - 1)
1285*c83a76b0SSuyog Pawar             {
1286*c83a76b0SSuyog Pawar                 num_results = ps_prms->max_num_results_coarse;
1287*c83a76b0SSuyog Pawar             }
1288*c83a76b0SSuyog Pawar             else
1289*c83a76b0SSuyog Pawar             {
1290*c83a76b0SSuyog Pawar                 num_results = ps_prms->max_num_results;
1291*c83a76b0SSuyog Pawar                 if(j == 0)
1292*c83a76b0SSuyog Pawar                     num_results = 1;
1293*c83a76b0SSuyog Pawar             }
1294*c83a76b0SSuyog Pawar             use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1295*c83a76b0SSuyog Pawar 
1296*c83a76b0SSuyog Pawar             count += hme_alloc_init_layer_mv_bank(
1297*c83a76b0SSuyog Pawar                 &ps_memtabs[count],
1298*c83a76b0SSuyog Pawar                 num_results,
1299*c83a76b0SSuyog Pawar                 ps_prms->max_num_ref,
1300*c83a76b0SSuyog Pawar                 use_4x4,
1301*c83a76b0SSuyog Pawar                 mem_avail,
1302*c83a76b0SSuyog Pawar                 u1_enc,
1303*c83a76b0SSuyog Pawar                 wd,
1304*c83a76b0SSuyog Pawar                 ht,
1305*c83a76b0SSuyog Pawar                 is_explicit_store,
1306*c83a76b0SSuyog Pawar                 &aps_mv_bank[i],
1307*c83a76b0SSuyog Pawar                 &api1_ref_idx[i],
1308*c83a76b0SSuyog Pawar                 &i4_num_mvs_per_row);
1309*c83a76b0SSuyog Pawar         }
1310*c83a76b0SSuyog Pawar     }
1311*c83a76b0SSuyog Pawar 
1312*c83a76b0SSuyog Pawar     /* Memtabs : Layers * num-ref + 1 */
1313*c83a76b0SSuyog Pawar     for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1314*c83a76b0SSuyog Pawar     {
1315*c83a76b0SSuyog Pawar         /* layer memory allocated only for enocde layer */
1316*c83a76b0SSuyog Pawar         for(j = 0; j < 1; j++)
1317*c83a76b0SSuyog Pawar         {
1318*c83a76b0SSuyog Pawar             layer_ctxt_t *ps_layer;
1319*c83a76b0SSuyog Pawar             S32 is_explicit_store = 1;
1320*c83a76b0SSuyog Pawar             S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
1321*c83a76b0SSuyog Pawar             S32 wd, ht;
1322*c83a76b0SSuyog Pawar             U08 u1_enc = 1;
1323*c83a76b0SSuyog Pawar             wd = a_wd[j];
1324*c83a76b0SSuyog Pawar             ht = a_ht[j];
1325*c83a76b0SSuyog Pawar 
1326*c83a76b0SSuyog Pawar             /* Possibly implicit search for lower (finer) layers */
1327*c83a76b0SSuyog Pawar             if(n_tot_layers - j > num_layers_explicit_search)
1328*c83a76b0SSuyog Pawar                 is_explicit_store = 0;
1329*c83a76b0SSuyog Pawar 
1330*c83a76b0SSuyog Pawar             /* Even if explicit search, we store only 2 results (L0 and L1) */
1331*c83a76b0SSuyog Pawar             /* in finest layer */
1332*c83a76b0SSuyog Pawar             if(j == 0)
1333*c83a76b0SSuyog Pawar             {
1334*c83a76b0SSuyog Pawar                 is_explicit_store = 0;
1335*c83a76b0SSuyog Pawar             }
1336*c83a76b0SSuyog Pawar 
1337*c83a76b0SSuyog Pawar             /* coarsest layer alwasy uses 4x4 blks to store results */
1338*c83a76b0SSuyog Pawar             if(j == n_tot_layers - 1)
1339*c83a76b0SSuyog Pawar             {
1340*c83a76b0SSuyog Pawar                 num_results = ps_prms->max_num_results_coarse;
1341*c83a76b0SSuyog Pawar             }
1342*c83a76b0SSuyog Pawar             else
1343*c83a76b0SSuyog Pawar             {
1344*c83a76b0SSuyog Pawar                 num_results = ps_prms->max_num_results;
1345*c83a76b0SSuyog Pawar                 if(j == 0)
1346*c83a76b0SSuyog Pawar                     num_results = 1;
1347*c83a76b0SSuyog Pawar             }
1348*c83a76b0SSuyog Pawar             use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1349*c83a76b0SSuyog Pawar 
1350*c83a76b0SSuyog Pawar             count += hme_alloc_init_layer(
1351*c83a76b0SSuyog Pawar                 &ps_memtabs[count],
1352*c83a76b0SSuyog Pawar                 num_results,
1353*c83a76b0SSuyog Pawar                 ps_prms->max_num_ref,
1354*c83a76b0SSuyog Pawar                 use_4x4,
1355*c83a76b0SSuyog Pawar                 mem_avail,
1356*c83a76b0SSuyog Pawar                 u1_enc,
1357*c83a76b0SSuyog Pawar                 wd,
1358*c83a76b0SSuyog Pawar                 ht,
1359*c83a76b0SSuyog Pawar                 a_disp_wd[j],
1360*c83a76b0SSuyog Pawar                 a_disp_ht[j],
1361*c83a76b0SSuyog Pawar                 segment_this_layer,
1362*c83a76b0SSuyog Pawar                 is_explicit_store,
1363*c83a76b0SSuyog Pawar                 &ps_layer);
1364*c83a76b0SSuyog Pawar             if(mem_avail)
1365*c83a76b0SSuyog Pawar             {
1366*c83a76b0SSuyog Pawar                 /* same ps_layer memory pointer is stored in all the threads */
1367*c83a76b0SSuyog Pawar                 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1368*c83a76b0SSuyog Pawar                 {
1369*c83a76b0SSuyog Pawar                     ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1370*c83a76b0SSuyog Pawar                     ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
1371*c83a76b0SSuyog Pawar                 }
1372*c83a76b0SSuyog Pawar 
1373*c83a76b0SSuyog Pawar                 /* store the MV bank pointers */
1374*c83a76b0SSuyog Pawar                 ps_layer->ps_layer_mvbank->max_num_mvs_per_row = i4_num_mvs_per_row;
1375*c83a76b0SSuyog Pawar                 ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[i];
1376*c83a76b0SSuyog Pawar                 ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[i];
1377*c83a76b0SSuyog Pawar             }
1378*c83a76b0SSuyog Pawar         }
1379*c83a76b0SSuyog Pawar     }
1380*c83a76b0SSuyog Pawar 
1381*c83a76b0SSuyog Pawar     /* Memtabs : Buf Mgr for predictor bufs and working mem */
1382*c83a76b0SSuyog Pawar     /* TODO : Parameterise this appropriately */
1383*c83a76b0SSuyog Pawar     size = MAX_WKG_MEM_SIZE_PER_THREAD * ps_prms->i4_num_proc_thrds * i4_num_me_frm_pllel;
1384*c83a76b0SSuyog Pawar 
1385*c83a76b0SSuyog Pawar     if(mem_avail)
1386*c83a76b0SSuyog Pawar     {
1387*c83a76b0SSuyog Pawar         U08 *pu1_mem = ps_memtabs[count].pu1_mem;
1388*c83a76b0SSuyog Pawar 
1389*c83a76b0SSuyog Pawar         ASSERT(ps_memtabs[count].size == size);
1390*c83a76b0SSuyog Pawar 
1391*c83a76b0SSuyog Pawar         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1392*c83a76b0SSuyog Pawar         {
1393*c83a76b0SSuyog Pawar             me_frm_ctxt_t *ps_frm_ctxt;
1394*c83a76b0SSuyog Pawar             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1395*c83a76b0SSuyog Pawar 
1396*c83a76b0SSuyog Pawar             for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1397*c83a76b0SSuyog Pawar             {
1398*c83a76b0SSuyog Pawar                 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1399*c83a76b0SSuyog Pawar 
1400*c83a76b0SSuyog Pawar                 hme_init_wkg_mem(&ps_frm_ctxt->s_buf_mgr, pu1_mem, MAX_WKG_MEM_SIZE_PER_THREAD);
1401*c83a76b0SSuyog Pawar 
1402*c83a76b0SSuyog Pawar                 if(i4_num_me_frm_pllel != 1)
1403*c83a76b0SSuyog Pawar                 {
1404*c83a76b0SSuyog Pawar                     /* update the memory buffer pointer */
1405*c83a76b0SSuyog Pawar                     pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
1406*c83a76b0SSuyog Pawar                 }
1407*c83a76b0SSuyog Pawar             }
1408*c83a76b0SSuyog Pawar             if(i4_num_me_frm_pllel == 1)
1409*c83a76b0SSuyog Pawar             {
1410*c83a76b0SSuyog Pawar                 pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
1411*c83a76b0SSuyog Pawar             }
1412*c83a76b0SSuyog Pawar         }
1413*c83a76b0SSuyog Pawar     }
1414*c83a76b0SSuyog Pawar     else
1415*c83a76b0SSuyog Pawar     {
1416*c83a76b0SSuyog Pawar         ps_memtabs[count].size = size;
1417*c83a76b0SSuyog Pawar         ps_memtabs[count].align = 4;
1418*c83a76b0SSuyog Pawar         ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1419*c83a76b0SSuyog Pawar     }
1420*c83a76b0SSuyog Pawar     count++;
1421*c83a76b0SSuyog Pawar 
1422*c83a76b0SSuyog Pawar     /*************************************************************************/
1423*c83a76b0SSuyog Pawar     /* Memtab : We need 64x64 buffer to store the entire CTB input for bidir */
1424*c83a76b0SSuyog Pawar     /* refinement. This memtab stores 2I - P0, I is input and P0 is L0 pred  */
1425*c83a76b0SSuyog Pawar     /*************************************************************************/
1426*c83a76b0SSuyog Pawar     size = sizeof(S16) * CTB_BLK_SIZE * CTB_BLK_SIZE * ps_prms->i4_num_proc_thrds *
1427*c83a76b0SSuyog Pawar            i4_num_me_frm_pllel;
1428*c83a76b0SSuyog Pawar 
1429*c83a76b0SSuyog Pawar     if(mem_avail)
1430*c83a76b0SSuyog Pawar     {
1431*c83a76b0SSuyog Pawar         S16 *pi2_mem = (S16 *)ps_memtabs[count].pu1_mem;
1432*c83a76b0SSuyog Pawar 
1433*c83a76b0SSuyog Pawar         ASSERT(ps_memtabs[count].size == size);
1434*c83a76b0SSuyog Pawar 
1435*c83a76b0SSuyog Pawar         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1436*c83a76b0SSuyog Pawar         {
1437*c83a76b0SSuyog Pawar             me_frm_ctxt_t *ps_frm_ctxt;
1438*c83a76b0SSuyog Pawar             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1439*c83a76b0SSuyog Pawar 
1440*c83a76b0SSuyog Pawar             for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1441*c83a76b0SSuyog Pawar             {
1442*c83a76b0SSuyog Pawar                 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1443*c83a76b0SSuyog Pawar 
1444*c83a76b0SSuyog Pawar                 ps_frm_ctxt->pi2_inp_bck = pi2_mem;
1445*c83a76b0SSuyog Pawar                 /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
1446*c83a76b0SSuyog Pawar                 if(i4_num_me_frm_pllel != 1)
1447*c83a76b0SSuyog Pawar                 {
1448*c83a76b0SSuyog Pawar                     pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
1449*c83a76b0SSuyog Pawar                 }
1450*c83a76b0SSuyog Pawar             }
1451*c83a76b0SSuyog Pawar             if(i4_num_me_frm_pllel == 1)
1452*c83a76b0SSuyog Pawar             {
1453*c83a76b0SSuyog Pawar                 pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
1454*c83a76b0SSuyog Pawar             }
1455*c83a76b0SSuyog Pawar         }
1456*c83a76b0SSuyog Pawar     }
1457*c83a76b0SSuyog Pawar     else
1458*c83a76b0SSuyog Pawar     {
1459*c83a76b0SSuyog Pawar         ps_memtabs[count].size = size;
1460*c83a76b0SSuyog Pawar         ps_memtabs[count].align = 16;
1461*c83a76b0SSuyog Pawar         ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1462*c83a76b0SSuyog Pawar     }
1463*c83a76b0SSuyog Pawar 
1464*c83a76b0SSuyog Pawar     count++;
1465*c83a76b0SSuyog Pawar 
1466*c83a76b0SSuyog Pawar     /* Allocate a memtab for each histogram. As many as num ref and number of threads */
1467*c83a76b0SSuyog Pawar     /* Loop across for each ME_FRM in PARALLEL */
1468*c83a76b0SSuyog Pawar     for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1469*c83a76b0SSuyog Pawar     {
1470*c83a76b0SSuyog Pawar         for(i = 0; i < ps_prms->max_num_ref; i++)
1471*c83a76b0SSuyog Pawar         {
1472*c83a76b0SSuyog Pawar             size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
1473*c83a76b0SSuyog Pawar             if(mem_avail)
1474*c83a76b0SSuyog Pawar             {
1475*c83a76b0SSuyog Pawar                 mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
1476*c83a76b0SSuyog Pawar 
1477*c83a76b0SSuyog Pawar                 ASSERT(size == ps_memtabs[count].size);
1478*c83a76b0SSuyog Pawar 
1479*c83a76b0SSuyog Pawar                 /* divide the memory accross the threads */
1480*c83a76b0SSuyog Pawar                 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1481*c83a76b0SSuyog Pawar                 {
1482*c83a76b0SSuyog Pawar                     ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1483*c83a76b0SSuyog Pawar 
1484*c83a76b0SSuyog Pawar                     ps_ctxt->aps_me_frm_prms[j]->aps_mv_hist[i] = ps_mv_hist;
1485*c83a76b0SSuyog Pawar                     ps_mv_hist++;
1486*c83a76b0SSuyog Pawar                 }
1487*c83a76b0SSuyog Pawar             }
1488*c83a76b0SSuyog Pawar             else
1489*c83a76b0SSuyog Pawar             {
1490*c83a76b0SSuyog Pawar                 ps_memtabs[count].size = size;
1491*c83a76b0SSuyog Pawar                 ps_memtabs[count].align = 8;
1492*c83a76b0SSuyog Pawar                 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1493*c83a76b0SSuyog Pawar             }
1494*c83a76b0SSuyog Pawar             count++;
1495*c83a76b0SSuyog Pawar         }
1496*c83a76b0SSuyog Pawar         if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
1497*c83a76b0SSuyog Pawar         {
1498*c83a76b0SSuyog Pawar             /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
1499*c83a76b0SSuyog Pawar             /** bring the count back to earlier value if there are no me frames in parallel. don't decrement for last loop **/
1500*c83a76b0SSuyog Pawar             count -= ps_prms->max_num_ref;
1501*c83a76b0SSuyog Pawar         }
1502*c83a76b0SSuyog Pawar     }
1503*c83a76b0SSuyog Pawar 
1504*c83a76b0SSuyog Pawar     /* Memtabs : Search nodes for 16x16 CUs, 32x32 and 64x64 CUs */
1505*c83a76b0SSuyog Pawar     for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1506*c83a76b0SSuyog Pawar     {
1507*c83a76b0SSuyog Pawar         S32 count_cpy = count;
1508*c83a76b0SSuyog Pawar         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1509*c83a76b0SSuyog Pawar         {
1510*c83a76b0SSuyog Pawar             if(mem_avail)
1511*c83a76b0SSuyog Pawar             {
1512*c83a76b0SSuyog Pawar                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1513*c83a76b0SSuyog Pawar             }
1514*c83a76b0SSuyog Pawar 
1515*c83a76b0SSuyog Pawar             for(i = 0; i < 21; i++)
1516*c83a76b0SSuyog Pawar             {
1517*c83a76b0SSuyog Pawar                 search_results_t *ps_search_results = NULL;
1518*c83a76b0SSuyog Pawar                 if(mem_avail)
1519*c83a76b0SSuyog Pawar                 {
1520*c83a76b0SSuyog Pawar                     if(i < 16)
1521*c83a76b0SSuyog Pawar                     {
1522*c83a76b0SSuyog Pawar                         ps_search_results =
1523*c83a76b0SSuyog Pawar                             &ps_ctxt->aps_me_frm_prms[j]->as_search_results_16x16[i];
1524*c83a76b0SSuyog Pawar                     }
1525*c83a76b0SSuyog Pawar                     else if(i < 20)
1526*c83a76b0SSuyog Pawar                     {
1527*c83a76b0SSuyog Pawar                         ps_search_results =
1528*c83a76b0SSuyog Pawar                             &ps_ctxt->aps_me_frm_prms[j]->as_search_results_32x32[i - 16];
1529*c83a76b0SSuyog Pawar                         ps_search_results->ps_cu_results =
1530*c83a76b0SSuyog Pawar                             &ps_ctxt->aps_me_frm_prms[j]->as_cu32x32_results[i - 16];
1531*c83a76b0SSuyog Pawar                     }
1532*c83a76b0SSuyog Pawar                     else if(i == 20)
1533*c83a76b0SSuyog Pawar                     {
1534*c83a76b0SSuyog Pawar                         ps_search_results = &ps_ctxt->aps_me_frm_prms[j]->s_search_results_64x64;
1535*c83a76b0SSuyog Pawar                         ps_search_results->ps_cu_results =
1536*c83a76b0SSuyog Pawar                             &ps_ctxt->aps_me_frm_prms[j]->s_cu64x64_results;
1537*c83a76b0SSuyog Pawar                     }
1538*c83a76b0SSuyog Pawar                     else
1539*c83a76b0SSuyog Pawar                     {
1540*c83a76b0SSuyog Pawar                         /* 8x8 search results are not required in LO ME */
1541*c83a76b0SSuyog Pawar                         ASSERT(0);
1542*c83a76b0SSuyog Pawar                     }
1543*c83a76b0SSuyog Pawar                 }
1544*c83a76b0SSuyog Pawar                 count += hme_alloc_init_search_nodes(
1545*c83a76b0SSuyog Pawar                     ps_search_results, &ps_memtabs[count], mem_avail, 2, ps_prms->max_num_results);
1546*c83a76b0SSuyog Pawar             }
1547*c83a76b0SSuyog Pawar         }
1548*c83a76b0SSuyog Pawar 
1549*c83a76b0SSuyog Pawar         if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
1550*c83a76b0SSuyog Pawar         {
1551*c83a76b0SSuyog Pawar             count = count_cpy;
1552*c83a76b0SSuyog Pawar         }
1553*c83a76b0SSuyog Pawar     }
1554*c83a76b0SSuyog Pawar 
1555*c83a76b0SSuyog Pawar     /* Weighted inputs, one for each ref + one non weighted */
1556*c83a76b0SSuyog Pawar     for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1557*c83a76b0SSuyog Pawar     {
1558*c83a76b0SSuyog Pawar         size = (ps_prms->max_num_ref + 1) * ctb_wd * ctb_wd * ps_prms->i4_num_proc_thrds;
1559*c83a76b0SSuyog Pawar         if(mem_avail)
1560*c83a76b0SSuyog Pawar         {
1561*c83a76b0SSuyog Pawar             U08 *pu1_mem;
1562*c83a76b0SSuyog Pawar             ASSERT(ps_memtabs[count].size == size);
1563*c83a76b0SSuyog Pawar             pu1_mem = ps_memtabs[count].pu1_mem;
1564*c83a76b0SSuyog Pawar 
1565*c83a76b0SSuyog Pawar             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1566*c83a76b0SSuyog Pawar             {
1567*c83a76b0SSuyog Pawar                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1568*c83a76b0SSuyog Pawar 
1569*c83a76b0SSuyog Pawar                 for(i = 0; i < ps_prms->max_num_ref + 1; i++)
1570*c83a76b0SSuyog Pawar                 {
1571*c83a76b0SSuyog Pawar                     ps_ctxt->aps_me_frm_prms[j]->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
1572*c83a76b0SSuyog Pawar                     pu1_mem += (ctb_wd * ctb_wd);
1573*c83a76b0SSuyog Pawar                 }
1574*c83a76b0SSuyog Pawar             }
1575*c83a76b0SSuyog Pawar         }
1576*c83a76b0SSuyog Pawar         else
1577*c83a76b0SSuyog Pawar         {
1578*c83a76b0SSuyog Pawar             ps_memtabs[count].size = size;
1579*c83a76b0SSuyog Pawar             ps_memtabs[count].align = 16;
1580*c83a76b0SSuyog Pawar             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1581*c83a76b0SSuyog Pawar         }
1582*c83a76b0SSuyog Pawar         if((i4_num_me_frm_pllel != 1) || (j == (MAX_NUM_ME_PARALLEL - 1)))
1583*c83a76b0SSuyog Pawar         {
1584*c83a76b0SSuyog Pawar             count++;
1585*c83a76b0SSuyog Pawar         }
1586*c83a76b0SSuyog Pawar     }
1587*c83a76b0SSuyog Pawar 
1588*c83a76b0SSuyog Pawar     /* if memory is allocated the intislaise the frm prms ptr to each thrd */
1589*c83a76b0SSuyog Pawar     if(mem_avail)
1590*c83a76b0SSuyog Pawar     {
1591*c83a76b0SSuyog Pawar         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1592*c83a76b0SSuyog Pawar         {
1593*c83a76b0SSuyog Pawar             me_frm_ctxt_t *ps_frm_ctxt;
1594*c83a76b0SSuyog Pawar             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1595*c83a76b0SSuyog Pawar 
1596*c83a76b0SSuyog Pawar             for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1597*c83a76b0SSuyog Pawar             {
1598*c83a76b0SSuyog Pawar                 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1599*c83a76b0SSuyog Pawar 
1600*c83a76b0SSuyog Pawar                 ps_frm_ctxt->ps_hme_frm_prms = &ps_master_ctxt->as_frm_prms[i];
1601*c83a76b0SSuyog Pawar                 ps_frm_ctxt->ps_hme_ref_map = &ps_master_ctxt->as_ref_map[i];
1602*c83a76b0SSuyog Pawar             }
1603*c83a76b0SSuyog Pawar         }
1604*c83a76b0SSuyog Pawar     }
1605*c83a76b0SSuyog Pawar 
1606*c83a76b0SSuyog Pawar     /* Memory allocation for use in Clustering */
1607*c83a76b0SSuyog Pawar     if(ps_prms->s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY)
1608*c83a76b0SSuyog Pawar     {
1609*c83a76b0SSuyog Pawar         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1610*c83a76b0SSuyog Pawar         {
1611*c83a76b0SSuyog Pawar             size = 16 * sizeof(cluster_16x16_blk_t) + 4 * sizeof(cluster_32x32_blk_t) +
1612*c83a76b0SSuyog Pawar                    sizeof(cluster_64x64_blk_t) + sizeof(ctb_cluster_info_t);
1613*c83a76b0SSuyog Pawar             size *= ps_prms->i4_num_proc_thrds;
1614*c83a76b0SSuyog Pawar 
1615*c83a76b0SSuyog Pawar             if(mem_avail)
1616*c83a76b0SSuyog Pawar             {
1617*c83a76b0SSuyog Pawar                 U08 *pu1_mem;
1618*c83a76b0SSuyog Pawar 
1619*c83a76b0SSuyog Pawar                 ASSERT(ps_memtabs[count].size == size);
1620*c83a76b0SSuyog Pawar                 pu1_mem = ps_memtabs[count].pu1_mem;
1621*c83a76b0SSuyog Pawar 
1622*c83a76b0SSuyog Pawar                 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1623*c83a76b0SSuyog Pawar                 {
1624*c83a76b0SSuyog Pawar                     ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1625*c83a76b0SSuyog Pawar 
1626*c83a76b0SSuyog Pawar                     ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = (cluster_16x16_blk_t *)pu1_mem;
1627*c83a76b0SSuyog Pawar                     pu1_mem += (16 * sizeof(cluster_16x16_blk_t));
1628*c83a76b0SSuyog Pawar 
1629*c83a76b0SSuyog Pawar                     ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = (cluster_32x32_blk_t *)pu1_mem;
1630*c83a76b0SSuyog Pawar                     pu1_mem += (4 * sizeof(cluster_32x32_blk_t));
1631*c83a76b0SSuyog Pawar 
1632*c83a76b0SSuyog Pawar                     ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = (cluster_64x64_blk_t *)pu1_mem;
1633*c83a76b0SSuyog Pawar                     pu1_mem += (sizeof(cluster_64x64_blk_t));
1634*c83a76b0SSuyog Pawar 
1635*c83a76b0SSuyog Pawar                     ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info =
1636*c83a76b0SSuyog Pawar                         (ctb_cluster_info_t *)pu1_mem;
1637*c83a76b0SSuyog Pawar                     pu1_mem += (sizeof(ctb_cluster_info_t));
1638*c83a76b0SSuyog Pawar                 }
1639*c83a76b0SSuyog Pawar             }
1640*c83a76b0SSuyog Pawar             else
1641*c83a76b0SSuyog Pawar             {
1642*c83a76b0SSuyog Pawar                 ps_memtabs[count].size = size;
1643*c83a76b0SSuyog Pawar                 ps_memtabs[count].align = 16;
1644*c83a76b0SSuyog Pawar                 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1645*c83a76b0SSuyog Pawar             }
1646*c83a76b0SSuyog Pawar 
1647*c83a76b0SSuyog Pawar             if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
1648*c83a76b0SSuyog Pawar             {
1649*c83a76b0SSuyog Pawar                 count++;
1650*c83a76b0SSuyog Pawar             }
1651*c83a76b0SSuyog Pawar         }
1652*c83a76b0SSuyog Pawar     }
1653*c83a76b0SSuyog Pawar     else if(mem_avail)
1654*c83a76b0SSuyog Pawar     {
1655*c83a76b0SSuyog Pawar         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1656*c83a76b0SSuyog Pawar         {
1657*c83a76b0SSuyog Pawar             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1658*c83a76b0SSuyog Pawar             {
1659*c83a76b0SSuyog Pawar                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1660*c83a76b0SSuyog Pawar 
1661*c83a76b0SSuyog Pawar                 ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = NULL;
1662*c83a76b0SSuyog Pawar 
1663*c83a76b0SSuyog Pawar                 ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = NULL;
1664*c83a76b0SSuyog Pawar 
1665*c83a76b0SSuyog Pawar                 ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = NULL;
1666*c83a76b0SSuyog Pawar 
1667*c83a76b0SSuyog Pawar                 ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info = NULL;
1668*c83a76b0SSuyog Pawar             }
1669*c83a76b0SSuyog Pawar         }
1670*c83a76b0SSuyog Pawar     }
1671*c83a76b0SSuyog Pawar 
1672*c83a76b0SSuyog Pawar     for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1673*c83a76b0SSuyog Pawar     {
1674*c83a76b0SSuyog Pawar         size = sizeof(fullpel_refine_ctxt_t);
1675*c83a76b0SSuyog Pawar         size *= ps_prms->i4_num_proc_thrds;
1676*c83a76b0SSuyog Pawar 
1677*c83a76b0SSuyog Pawar         if(mem_avail)
1678*c83a76b0SSuyog Pawar         {
1679*c83a76b0SSuyog Pawar             U08 *pu1_mem;
1680*c83a76b0SSuyog Pawar 
1681*c83a76b0SSuyog Pawar             ASSERT(ps_memtabs[count].size == size);
1682*c83a76b0SSuyog Pawar             pu1_mem = ps_memtabs[count].pu1_mem;
1683*c83a76b0SSuyog Pawar 
1684*c83a76b0SSuyog Pawar             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1685*c83a76b0SSuyog Pawar             {
1686*c83a76b0SSuyog Pawar                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1687*c83a76b0SSuyog Pawar 
1688*c83a76b0SSuyog Pawar                 ps_ctxt->aps_me_frm_prms[i]->ps_fullpel_refine_ctxt =
1689*c83a76b0SSuyog Pawar                     (fullpel_refine_ctxt_t *)pu1_mem;
1690*c83a76b0SSuyog Pawar                 pu1_mem += (sizeof(fullpel_refine_ctxt_t));
1691*c83a76b0SSuyog Pawar             }
1692*c83a76b0SSuyog Pawar         }
1693*c83a76b0SSuyog Pawar         else
1694*c83a76b0SSuyog Pawar         {
1695*c83a76b0SSuyog Pawar             ps_memtabs[count].size = size;
1696*c83a76b0SSuyog Pawar             ps_memtabs[count].align = 16;
1697*c83a76b0SSuyog Pawar             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1698*c83a76b0SSuyog Pawar         }
1699*c83a76b0SSuyog Pawar 
1700*c83a76b0SSuyog Pawar         if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
1701*c83a76b0SSuyog Pawar         {
1702*c83a76b0SSuyog Pawar             count++;
1703*c83a76b0SSuyog Pawar         }
1704*c83a76b0SSuyog Pawar     }
1705*c83a76b0SSuyog Pawar 
1706*c83a76b0SSuyog Pawar     /* Memory for ihevce_me_optimised_function_list_t struct  */
1707*c83a76b0SSuyog Pawar     if(mem_avail)
1708*c83a76b0SSuyog Pawar     {
1709*c83a76b0SSuyog Pawar         ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
1710*c83a76b0SSuyog Pawar     }
1711*c83a76b0SSuyog Pawar     else
1712*c83a76b0SSuyog Pawar     {
1713*c83a76b0SSuyog Pawar         ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
1714*c83a76b0SSuyog Pawar         ps_memtabs[count].align = 16;
1715*c83a76b0SSuyog Pawar         ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1716*c83a76b0SSuyog Pawar     }
1717*c83a76b0SSuyog Pawar 
1718*c83a76b0SSuyog Pawar     ASSERT(count < hme_enc_num_alloc(i4_num_me_frm_pllel));
1719*c83a76b0SSuyog Pawar     return (count);
1720*c83a76b0SSuyog Pawar }
1721*c83a76b0SSuyog Pawar 
1722*c83a76b0SSuyog Pawar /**
1723*c83a76b0SSuyog Pawar ********************************************************************************
1724*c83a76b0SSuyog Pawar *  @fn     hme_coarse_alloc_init_mem()
1725*c83a76b0SSuyog Pawar *
1726*c83a76b0SSuyog Pawar *  @brief  Requests/ assign memory based on mem avail
1727*c83a76b0SSuyog Pawar *
1728*c83a76b0SSuyog Pawar *  @param[in] ps_memtabs : memtab array
1729*c83a76b0SSuyog Pawar *
1730*c83a76b0SSuyog Pawar *  @param[in] ps_prms : init prms
1731*c83a76b0SSuyog Pawar *
1732*c83a76b0SSuyog Pawar *  @param[in] pv_ctxt : ME ctxt
1733*c83a76b0SSuyog Pawar *
1734*c83a76b0SSuyog Pawar *  @param[in] mem_avail : request/assign flag
1735*c83a76b0SSuyog Pawar *
1736*c83a76b0SSuyog Pawar *  @return  number of memtabs
1737*c83a76b0SSuyog Pawar ********************************************************************************
1738*c83a76b0SSuyog Pawar */
hme_coarse_alloc_init_mem(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,void * pv_ctxt,S32 mem_avail)1739*c83a76b0SSuyog Pawar S32 hme_coarse_alloc_init_mem(
1740*c83a76b0SSuyog Pawar     hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, void *pv_ctxt, S32 mem_avail)
1741*c83a76b0SSuyog Pawar {
1742*c83a76b0SSuyog Pawar     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
1743*c83a76b0SSuyog Pawar     coarse_me_ctxt_t *ps_ctxt;
1744*c83a76b0SSuyog Pawar     S32 count = 0, size, i, j, use_4x4, wd;
1745*c83a76b0SSuyog Pawar     S32 n_tot_layers;
1746*c83a76b0SSuyog Pawar     S32 num_layers_explicit_search;
1747*c83a76b0SSuyog Pawar     S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
1748*c83a76b0SSuyog Pawar     S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
1749*c83a76b0SSuyog Pawar     S32 num_results;
1750*c83a76b0SSuyog Pawar     S32 num_thrds;
1751*c83a76b0SSuyog Pawar     //S32 ctb_wd = 1 << ps_prms->log_ctb_size;
1752*c83a76b0SSuyog Pawar     S32 sad_4x4_block_size, sad_4x4_block_stride, search_step, num_rows;
1753*c83a76b0SSuyog Pawar     S32 layer1_blk_width = 8;  // 8x8 search
1754*c83a76b0SSuyog Pawar     S32 blk_shift;
1755*c83a76b0SSuyog Pawar 
1756*c83a76b0SSuyog Pawar     /* MV bank changes */
1757*c83a76b0SSuyog Pawar     hme_mv_t *aps_mv_bank[MAX_NUM_LAYERS] = { NULL };
1758*c83a76b0SSuyog Pawar     S32 ai4_num_mvs_per_row[MAX_NUM_LAYERS] = { 0 };
1759*c83a76b0SSuyog Pawar     S08 *api1_ref_idx[MAX_NUM_LAYERS] = { NULL };
1760*c83a76b0SSuyog Pawar 
1761*c83a76b0SSuyog Pawar     /* Memtab 0: handle */
1762*c83a76b0SSuyog Pawar     size = sizeof(coarse_me_master_ctxt_t);
1763*c83a76b0SSuyog Pawar     if(mem_avail)
1764*c83a76b0SSuyog Pawar     {
1765*c83a76b0SSuyog Pawar         /* store the number of processing threads */
1766*c83a76b0SSuyog Pawar         ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
1767*c83a76b0SSuyog Pawar     }
1768*c83a76b0SSuyog Pawar     else
1769*c83a76b0SSuyog Pawar     {
1770*c83a76b0SSuyog Pawar         ps_memtabs[count].size = size;
1771*c83a76b0SSuyog Pawar         ps_memtabs[count].align = 8;
1772*c83a76b0SSuyog Pawar         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1773*c83a76b0SSuyog Pawar     }
1774*c83a76b0SSuyog Pawar 
1775*c83a76b0SSuyog Pawar     count++;
1776*c83a76b0SSuyog Pawar 
1777*c83a76b0SSuyog Pawar     /* Memtab 1: ME threads ctxt */
1778*c83a76b0SSuyog Pawar     size = ps_prms->i4_num_proc_thrds * sizeof(coarse_me_ctxt_t);
1779*c83a76b0SSuyog Pawar     if(mem_avail)
1780*c83a76b0SSuyog Pawar     {
1781*c83a76b0SSuyog Pawar         coarse_me_ctxt_t *ps_me_tmp_ctxt = (coarse_me_ctxt_t *)ps_memtabs[count].pu1_mem;
1782*c83a76b0SSuyog Pawar 
1783*c83a76b0SSuyog Pawar         /* store the indivisual thread ctxt pointers */
1784*c83a76b0SSuyog Pawar         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1785*c83a76b0SSuyog Pawar         {
1786*c83a76b0SSuyog Pawar             ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
1787*c83a76b0SSuyog Pawar         }
1788*c83a76b0SSuyog Pawar     }
1789*c83a76b0SSuyog Pawar     else
1790*c83a76b0SSuyog Pawar     {
1791*c83a76b0SSuyog Pawar         ps_memtabs[count].size = size;
1792*c83a76b0SSuyog Pawar         ps_memtabs[count].align = 8;
1793*c83a76b0SSuyog Pawar         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1794*c83a76b0SSuyog Pawar     }
1795*c83a76b0SSuyog Pawar 
1796*c83a76b0SSuyog Pawar     count++;
1797*c83a76b0SSuyog Pawar 
1798*c83a76b0SSuyog Pawar     memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
1799*c83a76b0SSuyog Pawar     memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
1800*c83a76b0SSuyog Pawar     /*************************************************************************/
1801*c83a76b0SSuyog Pawar     /* Derive the number of HME layers, including both encoded and non encode*/
1802*c83a76b0SSuyog Pawar     /* This function also derives the width and ht of each layer.            */
1803*c83a76b0SSuyog Pawar     /*************************************************************************/
1804*c83a76b0SSuyog Pawar     n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
1805*c83a76b0SSuyog Pawar 
1806*c83a76b0SSuyog Pawar     num_layers_explicit_search = ps_prms->num_layers_explicit_search;
1807*c83a76b0SSuyog Pawar 
1808*c83a76b0SSuyog Pawar     if(num_layers_explicit_search <= 0)
1809*c83a76b0SSuyog Pawar         num_layers_explicit_search = n_tot_layers - 1;
1810*c83a76b0SSuyog Pawar 
1811*c83a76b0SSuyog Pawar     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
1812*c83a76b0SSuyog Pawar 
1813*c83a76b0SSuyog Pawar     if(mem_avail)
1814*c83a76b0SSuyog Pawar     {
1815*c83a76b0SSuyog Pawar         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1816*c83a76b0SSuyog Pawar         {
1817*c83a76b0SSuyog Pawar             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1818*c83a76b0SSuyog Pawar             memset(ps_ctxt->u1_encode, 0, n_tot_layers);
1819*c83a76b0SSuyog Pawar 
1820*c83a76b0SSuyog Pawar             /* encode layer should be excluded during processing */
1821*c83a76b0SSuyog Pawar             ps_ctxt->num_layers = n_tot_layers;
1822*c83a76b0SSuyog Pawar 
1823*c83a76b0SSuyog Pawar             memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
1824*c83a76b0SSuyog Pawar             memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
1825*c83a76b0SSuyog Pawar 
1826*c83a76b0SSuyog Pawar             ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
1827*c83a76b0SSuyog Pawar             ps_ctxt->max_num_results = ps_prms->max_num_results;
1828*c83a76b0SSuyog Pawar             ps_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
1829*c83a76b0SSuyog Pawar             ps_ctxt->max_num_ref = ps_prms->max_num_ref;
1830*c83a76b0SSuyog Pawar         }
1831*c83a76b0SSuyog Pawar     }
1832*c83a76b0SSuyog Pawar 
1833*c83a76b0SSuyog Pawar     /* Memtabs : Layers MV bank for total layers - 2  */
1834*c83a76b0SSuyog Pawar     /* for penultimate layer MV bank will be initialsed at every frame level */
1835*c83a76b0SSuyog Pawar     for(j = 1; j < n_tot_layers; j++)
1836*c83a76b0SSuyog Pawar     {
1837*c83a76b0SSuyog Pawar         S32 is_explicit_store = 1;
1838*c83a76b0SSuyog Pawar         S32 wd, ht;
1839*c83a76b0SSuyog Pawar         U08 u1_enc = 0;
1840*c83a76b0SSuyog Pawar         wd = a_wd[j];
1841*c83a76b0SSuyog Pawar         ht = a_ht[j];
1842*c83a76b0SSuyog Pawar 
1843*c83a76b0SSuyog Pawar         /* Possibly implicit search for lower (finer) layers */
1844*c83a76b0SSuyog Pawar         if(n_tot_layers - j > num_layers_explicit_search)
1845*c83a76b0SSuyog Pawar             is_explicit_store = 0;
1846*c83a76b0SSuyog Pawar 
1847*c83a76b0SSuyog Pawar         /* Even if explicit search, we store only 2 results (L0 and L1) */
1848*c83a76b0SSuyog Pawar         /* in finest layer */
1849*c83a76b0SSuyog Pawar         if(j == 0)
1850*c83a76b0SSuyog Pawar         {
1851*c83a76b0SSuyog Pawar             is_explicit_store = 0;
1852*c83a76b0SSuyog Pawar         }
1853*c83a76b0SSuyog Pawar 
1854*c83a76b0SSuyog Pawar         /* coarsest layer alwasy uses 4x4 blks to store results */
1855*c83a76b0SSuyog Pawar         if(j == n_tot_layers - 1)
1856*c83a76b0SSuyog Pawar         {
1857*c83a76b0SSuyog Pawar             num_results = ps_prms->max_num_results_coarse;
1858*c83a76b0SSuyog Pawar         }
1859*c83a76b0SSuyog Pawar         else
1860*c83a76b0SSuyog Pawar         {
1861*c83a76b0SSuyog Pawar             num_results = ps_prms->max_num_results;
1862*c83a76b0SSuyog Pawar             if(j == 0)
1863*c83a76b0SSuyog Pawar                 num_results = 1;
1864*c83a76b0SSuyog Pawar         }
1865*c83a76b0SSuyog Pawar         use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1866*c83a76b0SSuyog Pawar 
1867*c83a76b0SSuyog Pawar         /* for penultimate compute the parameters and store */
1868*c83a76b0SSuyog Pawar         if(j == 1)
1869*c83a76b0SSuyog Pawar         {
1870*c83a76b0SSuyog Pawar             S32 num_blks, num_mvs_per_blk, num_ref;
1871*c83a76b0SSuyog Pawar             S32 num_cols, num_rows, num_mvs_per_row;
1872*c83a76b0SSuyog Pawar 
1873*c83a76b0SSuyog Pawar             num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
1874*c83a76b0SSuyog Pawar             num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
1875*c83a76b0SSuyog Pawar 
1876*c83a76b0SSuyog Pawar             if(is_explicit_store)
1877*c83a76b0SSuyog Pawar                 num_ref = ps_prms->max_num_ref;
1878*c83a76b0SSuyog Pawar             else
1879*c83a76b0SSuyog Pawar                 num_ref = 2;
1880*c83a76b0SSuyog Pawar 
1881*c83a76b0SSuyog Pawar             num_blks = num_cols * num_rows;
1882*c83a76b0SSuyog Pawar             num_mvs_per_blk = num_ref * num_results;
1883*c83a76b0SSuyog Pawar             num_mvs_per_row = num_mvs_per_blk * num_cols;
1884*c83a76b0SSuyog Pawar 
1885*c83a76b0SSuyog Pawar             ai4_num_mvs_per_row[j] = num_mvs_per_row;
1886*c83a76b0SSuyog Pawar             aps_mv_bank[j] = NULL;
1887*c83a76b0SSuyog Pawar             api1_ref_idx[j] = NULL;
1888*c83a76b0SSuyog Pawar         }
1889*c83a76b0SSuyog Pawar         else
1890*c83a76b0SSuyog Pawar         {
1891*c83a76b0SSuyog Pawar             count += hme_alloc_init_layer_mv_bank(
1892*c83a76b0SSuyog Pawar                 &ps_memtabs[count],
1893*c83a76b0SSuyog Pawar                 num_results,
1894*c83a76b0SSuyog Pawar                 ps_prms->max_num_ref,
1895*c83a76b0SSuyog Pawar                 use_4x4,
1896*c83a76b0SSuyog Pawar                 mem_avail,
1897*c83a76b0SSuyog Pawar                 u1_enc,
1898*c83a76b0SSuyog Pawar                 wd,
1899*c83a76b0SSuyog Pawar                 ht,
1900*c83a76b0SSuyog Pawar                 is_explicit_store,
1901*c83a76b0SSuyog Pawar                 &aps_mv_bank[j],
1902*c83a76b0SSuyog Pawar                 &api1_ref_idx[j],
1903*c83a76b0SSuyog Pawar                 &ai4_num_mvs_per_row[j]);
1904*c83a76b0SSuyog Pawar         }
1905*c83a76b0SSuyog Pawar     }
1906*c83a76b0SSuyog Pawar 
1907*c83a76b0SSuyog Pawar     /* Memtabs : Layers * num-ref + 1 */
1908*c83a76b0SSuyog Pawar     for(i = 0; i < ps_prms->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
1909*c83a76b0SSuyog Pawar     {
1910*c83a76b0SSuyog Pawar         /* for all layer except encode layer */
1911*c83a76b0SSuyog Pawar         for(j = 1; j < n_tot_layers; j++)
1912*c83a76b0SSuyog Pawar         {
1913*c83a76b0SSuyog Pawar             layer_ctxt_t *ps_layer;
1914*c83a76b0SSuyog Pawar             S32 is_explicit_store = 1;
1915*c83a76b0SSuyog Pawar             S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
1916*c83a76b0SSuyog Pawar             S32 wd, ht;
1917*c83a76b0SSuyog Pawar             U08 u1_enc = 0;
1918*c83a76b0SSuyog Pawar             wd = a_wd[j];
1919*c83a76b0SSuyog Pawar             ht = a_ht[j];
1920*c83a76b0SSuyog Pawar 
1921*c83a76b0SSuyog Pawar             /* Possibly implicit search for lower (finer) layers */
1922*c83a76b0SSuyog Pawar             if(n_tot_layers - j > num_layers_explicit_search)
1923*c83a76b0SSuyog Pawar                 is_explicit_store = 0;
1924*c83a76b0SSuyog Pawar 
1925*c83a76b0SSuyog Pawar             /* Even if explicit search, we store only 2 results (L0 and L1) */
1926*c83a76b0SSuyog Pawar             /* in finest layer */
1927*c83a76b0SSuyog Pawar             if(j == 0)
1928*c83a76b0SSuyog Pawar             {
1929*c83a76b0SSuyog Pawar                 is_explicit_store = 0;
1930*c83a76b0SSuyog Pawar             }
1931*c83a76b0SSuyog Pawar 
1932*c83a76b0SSuyog Pawar             /* coarsest layer alwasy uses 4x4 blks to store results */
1933*c83a76b0SSuyog Pawar             if(j == n_tot_layers - 1)
1934*c83a76b0SSuyog Pawar             {
1935*c83a76b0SSuyog Pawar                 num_results = ps_prms->max_num_results_coarse;
1936*c83a76b0SSuyog Pawar             }
1937*c83a76b0SSuyog Pawar             else
1938*c83a76b0SSuyog Pawar             {
1939*c83a76b0SSuyog Pawar                 num_results = ps_prms->max_num_results;
1940*c83a76b0SSuyog Pawar                 if(j == 0)
1941*c83a76b0SSuyog Pawar                     num_results = 1;
1942*c83a76b0SSuyog Pawar             }
1943*c83a76b0SSuyog Pawar             use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1944*c83a76b0SSuyog Pawar 
1945*c83a76b0SSuyog Pawar             count += hme_alloc_init_layer(
1946*c83a76b0SSuyog Pawar                 &ps_memtabs[count],
1947*c83a76b0SSuyog Pawar                 num_results,
1948*c83a76b0SSuyog Pawar                 ps_prms->max_num_ref,
1949*c83a76b0SSuyog Pawar                 use_4x4,
1950*c83a76b0SSuyog Pawar                 mem_avail,
1951*c83a76b0SSuyog Pawar                 u1_enc,
1952*c83a76b0SSuyog Pawar                 wd,
1953*c83a76b0SSuyog Pawar                 ht,
1954*c83a76b0SSuyog Pawar                 a_disp_wd[j],
1955*c83a76b0SSuyog Pawar                 a_disp_ht[j],
1956*c83a76b0SSuyog Pawar                 segment_this_layer,
1957*c83a76b0SSuyog Pawar                 is_explicit_store,
1958*c83a76b0SSuyog Pawar                 &ps_layer);
1959*c83a76b0SSuyog Pawar             if(mem_avail)
1960*c83a76b0SSuyog Pawar             {
1961*c83a76b0SSuyog Pawar                 /* same ps_layer memory pointer is stored in all the threads */
1962*c83a76b0SSuyog Pawar                 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1963*c83a76b0SSuyog Pawar                 {
1964*c83a76b0SSuyog Pawar                     ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1965*c83a76b0SSuyog Pawar                     ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
1966*c83a76b0SSuyog Pawar                 }
1967*c83a76b0SSuyog Pawar 
1968*c83a76b0SSuyog Pawar                 /* store the MV bank pointers */
1969*c83a76b0SSuyog Pawar                 ps_layer->ps_layer_mvbank->max_num_mvs_per_row = ai4_num_mvs_per_row[j];
1970*c83a76b0SSuyog Pawar                 ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[j];
1971*c83a76b0SSuyog Pawar                 ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[j];
1972*c83a76b0SSuyog Pawar             }
1973*c83a76b0SSuyog Pawar         }
1974*c83a76b0SSuyog Pawar     }
1975*c83a76b0SSuyog Pawar 
1976*c83a76b0SSuyog Pawar     /* Memtabs : Prev Row search node at coarsest layer */
1977*c83a76b0SSuyog Pawar     wd = a_wd[n_tot_layers - 1];
1978*c83a76b0SSuyog Pawar 
1979*c83a76b0SSuyog Pawar     /* Allocate a memtab for storing 4x4 SADs for n rows. As many as num ref and number of threads */
1980*c83a76b0SSuyog Pawar     num_rows = ps_prms->i4_num_proc_thrds + 1;
1981*c83a76b0SSuyog Pawar     if(ps_prms->s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
1982*c83a76b0SSuyog Pawar         search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
1983*c83a76b0SSuyog Pawar     else
1984*c83a76b0SSuyog Pawar         search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
1985*c83a76b0SSuyog Pawar 
1986*c83a76b0SSuyog Pawar     /*shift factor*/
1987*c83a76b0SSuyog Pawar     blk_shift = 2; /*4x4*/
1988*c83a76b0SSuyog Pawar     search_step >>= 1;
1989*c83a76b0SSuyog Pawar 
1990*c83a76b0SSuyog Pawar     sad_4x4_block_size = ((2 * MAX_MVX_SUPPORTED_IN_COARSE_LAYER) >> search_step) *
1991*c83a76b0SSuyog Pawar                          ((2 * MAX_MVY_SUPPORTED_IN_COARSE_LAYER) >> search_step);
1992*c83a76b0SSuyog Pawar     sad_4x4_block_stride = ((wd >> blk_shift) + 1) * sad_4x4_block_size;
1993*c83a76b0SSuyog Pawar 
1994*c83a76b0SSuyog Pawar     size = num_rows * sad_4x4_block_stride * sizeof(S16);
1995*c83a76b0SSuyog Pawar     for(i = 0; i < ps_prms->max_num_ref; i++)
1996*c83a76b0SSuyog Pawar     {
1997*c83a76b0SSuyog Pawar         if(mem_avail)
1998*c83a76b0SSuyog Pawar         {
1999*c83a76b0SSuyog Pawar             ASSERT(size == ps_memtabs[count].size);
2000*c83a76b0SSuyog Pawar 
2001*c83a76b0SSuyog Pawar             /* same row memory pointer is stored in all the threads */
2002*c83a76b0SSuyog Pawar             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2003*c83a76b0SSuyog Pawar             {
2004*c83a76b0SSuyog Pawar                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2005*c83a76b0SSuyog Pawar                 ps_ctxt->api2_sads_4x4_n_rows[i] = (S16 *)ps_memtabs[count].pu1_mem;
2006*c83a76b0SSuyog Pawar             }
2007*c83a76b0SSuyog Pawar         }
2008*c83a76b0SSuyog Pawar         else
2009*c83a76b0SSuyog Pawar         {
2010*c83a76b0SSuyog Pawar             ps_memtabs[count].size = size;
2011*c83a76b0SSuyog Pawar             ps_memtabs[count].align = 4;
2012*c83a76b0SSuyog Pawar             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2013*c83a76b0SSuyog Pawar         }
2014*c83a76b0SSuyog Pawar         count++;
2015*c83a76b0SSuyog Pawar     }
2016*c83a76b0SSuyog Pawar 
2017*c83a76b0SSuyog Pawar     /* Allocate a memtab for storing best search nodes 8x4 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
2018*c83a76b0SSuyog Pawar     size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
2019*c83a76b0SSuyog Pawar     for(i = 0; i < ps_prms->max_num_ref; i++)
2020*c83a76b0SSuyog Pawar     {
2021*c83a76b0SSuyog Pawar         if(mem_avail)
2022*c83a76b0SSuyog Pawar         {
2023*c83a76b0SSuyog Pawar             ASSERT(size == ps_memtabs[count].size);
2024*c83a76b0SSuyog Pawar 
2025*c83a76b0SSuyog Pawar             /* same row memory pointer is stored in all the threads */
2026*c83a76b0SSuyog Pawar             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2027*c83a76b0SSuyog Pawar             {
2028*c83a76b0SSuyog Pawar                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2029*c83a76b0SSuyog Pawar                 ps_ctxt->aps_best_search_nodes_8x4_n_rows[i] =
2030*c83a76b0SSuyog Pawar                     (search_node_t *)ps_memtabs[count].pu1_mem;
2031*c83a76b0SSuyog Pawar             }
2032*c83a76b0SSuyog Pawar         }
2033*c83a76b0SSuyog Pawar         else
2034*c83a76b0SSuyog Pawar         {
2035*c83a76b0SSuyog Pawar             ps_memtabs[count].size = size;
2036*c83a76b0SSuyog Pawar             ps_memtabs[count].align = 4;
2037*c83a76b0SSuyog Pawar             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2038*c83a76b0SSuyog Pawar         }
2039*c83a76b0SSuyog Pawar         count++;
2040*c83a76b0SSuyog Pawar     }
2041*c83a76b0SSuyog Pawar     /* Allocate a memtab for storing best search nodes 4x8 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
2042*c83a76b0SSuyog Pawar     size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
2043*c83a76b0SSuyog Pawar     for(i = 0; i < ps_prms->max_num_ref; i++)
2044*c83a76b0SSuyog Pawar     {
2045*c83a76b0SSuyog Pawar         if(mem_avail)
2046*c83a76b0SSuyog Pawar         {
2047*c83a76b0SSuyog Pawar             ASSERT(size == ps_memtabs[count].size);
2048*c83a76b0SSuyog Pawar 
2049*c83a76b0SSuyog Pawar             /* same row memory pointer is stored in all the threads */
2050*c83a76b0SSuyog Pawar             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2051*c83a76b0SSuyog Pawar             {
2052*c83a76b0SSuyog Pawar                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2053*c83a76b0SSuyog Pawar                 ps_ctxt->aps_best_search_nodes_4x8_n_rows[i] =
2054*c83a76b0SSuyog Pawar                     (search_node_t *)ps_memtabs[count].pu1_mem;
2055*c83a76b0SSuyog Pawar             }
2056*c83a76b0SSuyog Pawar         }
2057*c83a76b0SSuyog Pawar         else
2058*c83a76b0SSuyog Pawar         {
2059*c83a76b0SSuyog Pawar             ps_memtabs[count].size = size;
2060*c83a76b0SSuyog Pawar             ps_memtabs[count].align = 4;
2061*c83a76b0SSuyog Pawar             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2062*c83a76b0SSuyog Pawar         }
2063*c83a76b0SSuyog Pawar         count++;
2064*c83a76b0SSuyog Pawar     }
2065*c83a76b0SSuyog Pawar 
2066*c83a76b0SSuyog Pawar     /* Allocate a memtab for each histogram. As many as num ref and number of threads */
2067*c83a76b0SSuyog Pawar     for(i = 0; i < ps_prms->max_num_ref; i++)
2068*c83a76b0SSuyog Pawar     {
2069*c83a76b0SSuyog Pawar         size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
2070*c83a76b0SSuyog Pawar         if(mem_avail)
2071*c83a76b0SSuyog Pawar         {
2072*c83a76b0SSuyog Pawar             mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
2073*c83a76b0SSuyog Pawar 
2074*c83a76b0SSuyog Pawar             ASSERT(size == ps_memtabs[count].size);
2075*c83a76b0SSuyog Pawar 
2076*c83a76b0SSuyog Pawar             /* divide the memory accross the threads */
2077*c83a76b0SSuyog Pawar             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2078*c83a76b0SSuyog Pawar             {
2079*c83a76b0SSuyog Pawar                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2080*c83a76b0SSuyog Pawar                 ps_ctxt->aps_mv_hist[i] = ps_mv_hist;
2081*c83a76b0SSuyog Pawar                 ps_mv_hist++;
2082*c83a76b0SSuyog Pawar             }
2083*c83a76b0SSuyog Pawar         }
2084*c83a76b0SSuyog Pawar         else
2085*c83a76b0SSuyog Pawar         {
2086*c83a76b0SSuyog Pawar             ps_memtabs[count].size = size;
2087*c83a76b0SSuyog Pawar             ps_memtabs[count].align = 8;
2088*c83a76b0SSuyog Pawar             ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
2089*c83a76b0SSuyog Pawar         }
2090*c83a76b0SSuyog Pawar         count++;
2091*c83a76b0SSuyog Pawar     }
2092*c83a76b0SSuyog Pawar 
2093*c83a76b0SSuyog Pawar     /* Memtabs : Search nodes for 8x8 blks */
2094*c83a76b0SSuyog Pawar     for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2095*c83a76b0SSuyog Pawar     {
2096*c83a76b0SSuyog Pawar         search_results_t *ps_search_results = NULL;
2097*c83a76b0SSuyog Pawar 
2098*c83a76b0SSuyog Pawar         if(mem_avail)
2099*c83a76b0SSuyog Pawar         {
2100*c83a76b0SSuyog Pawar             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2101*c83a76b0SSuyog Pawar         }
2102*c83a76b0SSuyog Pawar 
2103*c83a76b0SSuyog Pawar         if(mem_avail)
2104*c83a76b0SSuyog Pawar         {
2105*c83a76b0SSuyog Pawar             ps_search_results = &ps_ctxt->s_search_results_8x8;
2106*c83a76b0SSuyog Pawar         }
2107*c83a76b0SSuyog Pawar         count += hme_alloc_init_search_nodes(
2108*c83a76b0SSuyog Pawar             ps_search_results,
2109*c83a76b0SSuyog Pawar             &ps_memtabs[count],
2110*c83a76b0SSuyog Pawar             mem_avail,
2111*c83a76b0SSuyog Pawar             ps_prms->max_num_ref,
2112*c83a76b0SSuyog Pawar             ps_prms->max_num_results);
2113*c83a76b0SSuyog Pawar     }
2114*c83a76b0SSuyog Pawar 
2115*c83a76b0SSuyog Pawar     /* Weighted inputs, one for each ref  */
2116*c83a76b0SSuyog Pawar     size = (ps_prms->max_num_ref + 1) * layer1_blk_width * layer1_blk_width *
2117*c83a76b0SSuyog Pawar            ps_prms->i4_num_proc_thrds;
2118*c83a76b0SSuyog Pawar     if(mem_avail)
2119*c83a76b0SSuyog Pawar     {
2120*c83a76b0SSuyog Pawar         U08 *pu1_mem;
2121*c83a76b0SSuyog Pawar         ASSERT(ps_memtabs[count].size == size);
2122*c83a76b0SSuyog Pawar         pu1_mem = ps_memtabs[count].pu1_mem;
2123*c83a76b0SSuyog Pawar 
2124*c83a76b0SSuyog Pawar         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2125*c83a76b0SSuyog Pawar         {
2126*c83a76b0SSuyog Pawar             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2127*c83a76b0SSuyog Pawar 
2128*c83a76b0SSuyog Pawar             for(i = 0; i < ps_prms->max_num_ref + 1; i++)
2129*c83a76b0SSuyog Pawar             {
2130*c83a76b0SSuyog Pawar                 ps_ctxt->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
2131*c83a76b0SSuyog Pawar                 pu1_mem += (layer1_blk_width * layer1_blk_width);
2132*c83a76b0SSuyog Pawar             }
2133*c83a76b0SSuyog Pawar         }
2134*c83a76b0SSuyog Pawar     }
2135*c83a76b0SSuyog Pawar     else
2136*c83a76b0SSuyog Pawar     {
2137*c83a76b0SSuyog Pawar         ps_memtabs[count].size = size;
2138*c83a76b0SSuyog Pawar         ps_memtabs[count].align = 16;
2139*c83a76b0SSuyog Pawar         ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2140*c83a76b0SSuyog Pawar     }
2141*c83a76b0SSuyog Pawar     count++;
2142*c83a76b0SSuyog Pawar 
2143*c83a76b0SSuyog Pawar     /* if memory is allocated the intislaise the frm prms ptr to each thrd */
2144*c83a76b0SSuyog Pawar     if(mem_avail)
2145*c83a76b0SSuyog Pawar     {
2146*c83a76b0SSuyog Pawar         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2147*c83a76b0SSuyog Pawar         {
2148*c83a76b0SSuyog Pawar             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2149*c83a76b0SSuyog Pawar 
2150*c83a76b0SSuyog Pawar             ps_ctxt->ps_hme_frm_prms = &ps_master_ctxt->s_frm_prms;
2151*c83a76b0SSuyog Pawar             ps_ctxt->ps_hme_ref_map = &ps_master_ctxt->s_ref_map;
2152*c83a76b0SSuyog Pawar         }
2153*c83a76b0SSuyog Pawar     }
2154*c83a76b0SSuyog Pawar 
2155*c83a76b0SSuyog Pawar     /* Memory for ihevce_me_optimised_function_list_t struct  */
2156*c83a76b0SSuyog Pawar     if(mem_avail)
2157*c83a76b0SSuyog Pawar     {
2158*c83a76b0SSuyog Pawar         ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
2159*c83a76b0SSuyog Pawar     }
2160*c83a76b0SSuyog Pawar     else
2161*c83a76b0SSuyog Pawar     {
2162*c83a76b0SSuyog Pawar         ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
2163*c83a76b0SSuyog Pawar         ps_memtabs[count].align = 16;
2164*c83a76b0SSuyog Pawar         ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2165*c83a76b0SSuyog Pawar     }
2166*c83a76b0SSuyog Pawar 
2167*c83a76b0SSuyog Pawar     //ASSERT(count < hme_enc_num_alloc());
2168*c83a76b0SSuyog Pawar     ASSERT(count < hme_coarse_num_alloc());
2169*c83a76b0SSuyog Pawar     return (count);
2170*c83a76b0SSuyog Pawar }
2171*c83a76b0SSuyog Pawar 
2172*c83a76b0SSuyog Pawar /*!
2173*c83a76b0SSuyog Pawar ******************************************************************************
2174*c83a76b0SSuyog Pawar * \if Function name : ihevce_coarse_me_get_lyr_prms_dep_mngr \endif
2175*c83a76b0SSuyog Pawar *
2176*c83a76b0SSuyog Pawar * \brief Returns to the caller key attributes relevant for dependency manager,
2177*c83a76b0SSuyog Pawar *        ie, the number of vertical units in each layer
2178*c83a76b0SSuyog Pawar *
2179*c83a76b0SSuyog Pawar * \par Description:
2180*c83a76b0SSuyog Pawar *    This function requires the precondition that the width and ht of encode
2181*c83a76b0SSuyog Pawar *    layer is known.
2182*c83a76b0SSuyog Pawar *    The number of layers, number of vertical units in each layer, and for
2183*c83a76b0SSuyog Pawar *    each vertial unit in each layer, its dependency on previous layer's units
2184*c83a76b0SSuyog Pawar *    From ME's perspective, a vertical unit is one which is smallest min size
2185*c83a76b0SSuyog Pawar *    vertically (and spans the entire row horizontally). This is CTB for encode
2186*c83a76b0SSuyog Pawar *    layer, and 8x8 / 4x4 for non encode layers.
2187*c83a76b0SSuyog Pawar *
2188*c83a76b0SSuyog Pawar * \param[in] num_layers : Number of ME Layers
2189*c83a76b0SSuyog Pawar * \param[in] pai4_ht    : Array storing ht at each layer
2190*c83a76b0SSuyog Pawar * \param[in] pai4_wd    : Array storing wd at each layer
2191*c83a76b0SSuyog Pawar * \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each
2192*c83a76b0SSuyog Pawar *                     entry has num vertical units in that particular layer
2193*c83a76b0SSuyog Pawar *
2194*c83a76b0SSuyog Pawar * \return
2195*c83a76b0SSuyog Pawar *    None
2196*c83a76b0SSuyog Pawar *
2197*c83a76b0SSuyog Pawar * \author
2198*c83a76b0SSuyog Pawar *  Ittiam
2199*c83a76b0SSuyog Pawar *
2200*c83a76b0SSuyog Pawar *****************************************************************************
2201*c83a76b0SSuyog Pawar */
ihevce_coarse_me_get_lyr_prms_dep_mngr(WORD32 num_layers,WORD32 * pai4_ht,WORD32 * pai4_wd,WORD32 * pai4_num_vert_units_in_lyr)2202*c83a76b0SSuyog Pawar void ihevce_coarse_me_get_lyr_prms_dep_mngr(
2203*c83a76b0SSuyog Pawar     WORD32 num_layers, WORD32 *pai4_ht, WORD32 *pai4_wd, WORD32 *pai4_num_vert_units_in_lyr)
2204*c83a76b0SSuyog Pawar {
2205*c83a76b0SSuyog Pawar     /* Height of current and next layers */
2206*c83a76b0SSuyog Pawar     WORD32 ht_c, ht_n;
2207*c83a76b0SSuyog Pawar     /* Blk ht at a given layer and next layer*/
2208*c83a76b0SSuyog Pawar     WORD32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n;
2209*c83a76b0SSuyog Pawar     /* Number of vertical units in current and next layer */
2210*c83a76b0SSuyog Pawar     WORD32 num_vert_c, num_vert_n;
2211*c83a76b0SSuyog Pawar 
2212*c83a76b0SSuyog Pawar     WORD32 ctb_size = 64, num_enc_layers = 1, use_4x4 = 1, i;
2213*c83a76b0SSuyog Pawar     UWORD8 au1_encode[MAX_NUM_LAYERS];
2214*c83a76b0SSuyog Pawar 
2215*c83a76b0SSuyog Pawar     memset(au1_encode, 0, num_layers);
2216*c83a76b0SSuyog Pawar     memset(au1_encode, 1, num_enc_layers);
2217*c83a76b0SSuyog Pawar 
2218*c83a76b0SSuyog Pawar     ht_n = pai4_ht[num_layers - 2];
2219*c83a76b0SSuyog Pawar     ht_c = pai4_ht[num_layers - 1];
2220*c83a76b0SSuyog Pawar 
2221*c83a76b0SSuyog Pawar     /* compute blk ht and unit ht for c and n */
2222*c83a76b0SSuyog Pawar     if(au1_encode[num_layers - 1])
2223*c83a76b0SSuyog Pawar     {
2224*c83a76b0SSuyog Pawar         blk_ht_c = 16;
2225*c83a76b0SSuyog Pawar         unit_ht_c = ctb_size;
2226*c83a76b0SSuyog Pawar     }
2227*c83a76b0SSuyog Pawar     else
2228*c83a76b0SSuyog Pawar     {
2229*c83a76b0SSuyog Pawar         blk_ht_c = hme_get_blk_size(use_4x4, num_layers - 1, num_layers, 0);
2230*c83a76b0SSuyog Pawar         unit_ht_c = blk_ht_c;
2231*c83a76b0SSuyog Pawar     }
2232*c83a76b0SSuyog Pawar 
2233*c83a76b0SSuyog Pawar     num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c;
2234*c83a76b0SSuyog Pawar     /* For new design in Coarsest HME layer we need */
2235*c83a76b0SSuyog Pawar     /* one additional row extra at the end of frame */
2236*c83a76b0SSuyog Pawar     /* hence num_vert_c is incremented by 1         */
2237*c83a76b0SSuyog Pawar     num_vert_c++;
2238*c83a76b0SSuyog Pawar 
2239*c83a76b0SSuyog Pawar     /*************************************************************************/
2240*c83a76b0SSuyog Pawar     /* Run through each layer, set the number of vertical units              */
2241*c83a76b0SSuyog Pawar     /*************************************************************************/
2242*c83a76b0SSuyog Pawar     for(i = num_layers - 1; i > 0; i--)
2243*c83a76b0SSuyog Pawar     {
2244*c83a76b0SSuyog Pawar         pai4_num_vert_units_in_lyr[i] = num_vert_c;
2245*c83a76b0SSuyog Pawar 
2246*c83a76b0SSuyog Pawar         /* "n" is computed for first time */
2247*c83a76b0SSuyog Pawar         ht_n = pai4_ht[i - 1];
2248*c83a76b0SSuyog Pawar         blk_ht_n = hme_get_blk_size(use_4x4, i - 1, num_layers, 0);
2249*c83a76b0SSuyog Pawar         unit_ht_n = blk_ht_n;
2250*c83a76b0SSuyog Pawar         if(au1_encode[i - 1])
2251*c83a76b0SSuyog Pawar             unit_ht_n = ctb_size;
2252*c83a76b0SSuyog Pawar 
2253*c83a76b0SSuyog Pawar         num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n;
2254*c83a76b0SSuyog Pawar 
2255*c83a76b0SSuyog Pawar         /* Compute the blk size and vert unit size in each layer             */
2256*c83a76b0SSuyog Pawar         /* "c" denotes curr layer, and "n" denotes the layer to which result */
2257*c83a76b0SSuyog Pawar         /* is projected to                                                   */
2258*c83a76b0SSuyog Pawar         ht_c = ht_n;
2259*c83a76b0SSuyog Pawar         blk_ht_c = blk_ht_n;
2260*c83a76b0SSuyog Pawar         unit_ht_c = unit_ht_n;
2261*c83a76b0SSuyog Pawar         num_vert_c = num_vert_n;
2262*c83a76b0SSuyog Pawar     }
2263*c83a76b0SSuyog Pawar 
2264*c83a76b0SSuyog Pawar     /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */
2265*c83a76b0SSuyog Pawar     /* set the numebr of vertical units */
2266*c83a76b0SSuyog Pawar     pai4_num_vert_units_in_lyr[0] = num_vert_c;
2267*c83a76b0SSuyog Pawar }
2268*c83a76b0SSuyog Pawar 
2269*c83a76b0SSuyog Pawar /**
2270*c83a76b0SSuyog Pawar ********************************************************************************
2271*c83a76b0SSuyog Pawar *  @fn     hme_coarse_dep_mngr_alloc_mem()
2272*c83a76b0SSuyog Pawar *
2273*c83a76b0SSuyog Pawar *  @brief  Requests memory for HME Dep Mngr
2274*c83a76b0SSuyog Pawar *
2275*c83a76b0SSuyog Pawar * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
2276*c83a76b0SSuyog Pawar * \param[in] ps_init_prms : Create time static parameters
2277*c83a76b0SSuyog Pawar * \param[in] i4_mem_space : memspace in whihc memory request should be done
2278*c83a76b0SSuyog Pawar *
2279*c83a76b0SSuyog Pawar *  @return  number of memtabs
2280*c83a76b0SSuyog Pawar ********************************************************************************
2281*c83a76b0SSuyog Pawar */
hme_coarse_dep_mngr_alloc_mem(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_mem_space,WORD32 i4_num_proc_thrds,WORD32 i4_resolution_id)2282*c83a76b0SSuyog Pawar WORD32 hme_coarse_dep_mngr_alloc_mem(
2283*c83a76b0SSuyog Pawar     iv_mem_rec_t *ps_mem_tab,
2284*c83a76b0SSuyog Pawar     ihevce_static_cfg_params_t *ps_init_prms,
2285*c83a76b0SSuyog Pawar     WORD32 i4_mem_space,
2286*c83a76b0SSuyog Pawar     WORD32 i4_num_proc_thrds,
2287*c83a76b0SSuyog Pawar     WORD32 i4_resolution_id)
2288*c83a76b0SSuyog Pawar {
2289*c83a76b0SSuyog Pawar     WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
2290*c83a76b0SSuyog Pawar     WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2291*c83a76b0SSuyog Pawar     WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2292*c83a76b0SSuyog Pawar     WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
2293*c83a76b0SSuyog Pawar     WORD32 min_cu_size;
2294*c83a76b0SSuyog Pawar 
2295*c83a76b0SSuyog Pawar     /* get the min cu size from config params */
2296*c83a76b0SSuyog Pawar     min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2297*c83a76b0SSuyog Pawar 
2298*c83a76b0SSuyog Pawar     min_cu_size = 1 << min_cu_size;
2299*c83a76b0SSuyog Pawar 
2300*c83a76b0SSuyog Pawar     /* Get the width and heights of different decomp layers */
2301*c83a76b0SSuyog Pawar     *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2302*c83a76b0SSuyog Pawar             SET_CTB_ALIGN(
2303*c83a76b0SSuyog Pawar                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2304*c83a76b0SSuyog Pawar 
2305*c83a76b0SSuyog Pawar     *a_ht =
2306*c83a76b0SSuyog Pawar         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2307*c83a76b0SSuyog Pawar         SET_CTB_ALIGN(
2308*c83a76b0SSuyog Pawar             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2309*c83a76b0SSuyog Pawar 
2310*c83a76b0SSuyog Pawar     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2311*c83a76b0SSuyog Pawar     ASSERT(n_tot_layers >= 3);
2312*c83a76b0SSuyog Pawar 
2313*c83a76b0SSuyog Pawar     /* --- Get the number of vartical units in each layer for dep. mngr -- */
2314*c83a76b0SSuyog Pawar     ihevce_coarse_me_get_lyr_prms_dep_mngr(
2315*c83a76b0SSuyog Pawar         n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
2316*c83a76b0SSuyog Pawar 
2317*c83a76b0SSuyog Pawar     /* Fill memtabs for HME layers,except for L0 layer */
2318*c83a76b0SSuyog Pawar     for(i = 1; i < n_tot_layers; i++)
2319*c83a76b0SSuyog Pawar     {
2320*c83a76b0SSuyog Pawar         n_dep_tabs += ihevce_dmgr_get_mem_recs(
2321*c83a76b0SSuyog Pawar             &ps_mem_tab[n_dep_tabs],
2322*c83a76b0SSuyog Pawar             DEP_MNGR_ROW_ROW_SYNC,
2323*c83a76b0SSuyog Pawar             ai4_num_vert_units_in_lyr[i],
2324*c83a76b0SSuyog Pawar             1, /* Number of Col Tiles :  Not supported in PreEnc */
2325*c83a76b0SSuyog Pawar             i4_num_proc_thrds,
2326*c83a76b0SSuyog Pawar             i4_mem_space);
2327*c83a76b0SSuyog Pawar     }
2328*c83a76b0SSuyog Pawar 
2329*c83a76b0SSuyog Pawar     ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc());
2330*c83a76b0SSuyog Pawar 
2331*c83a76b0SSuyog Pawar     return (n_dep_tabs);
2332*c83a76b0SSuyog Pawar }
2333*c83a76b0SSuyog Pawar 
2334*c83a76b0SSuyog Pawar /**
2335*c83a76b0SSuyog Pawar ********************************************************************************
2336*c83a76b0SSuyog Pawar *  @fn     hme_coarse_dep_mngr_init()
2337*c83a76b0SSuyog Pawar *
2338*c83a76b0SSuyog Pawar *  @brief  Assign memory for HME Dep Mngr
2339*c83a76b0SSuyog Pawar *
2340*c83a76b0SSuyog Pawar * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
2341*c83a76b0SSuyog Pawar * \param[in] ps_init_prms : Create time static parameters
2342*c83a76b0SSuyog Pawar *  @param[in] pv_ctxt : ME ctxt
2343*c83a76b0SSuyog Pawar * \param[in] pv_osal_handle : Osal handle
2344*c83a76b0SSuyog Pawar *
2345*c83a76b0SSuyog Pawar *  @return  number of memtabs
2346*c83a76b0SSuyog Pawar ********************************************************************************
2347*c83a76b0SSuyog Pawar */
hme_coarse_dep_mngr_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,void * pv_ctxt,void * pv_osal_handle,WORD32 i4_num_proc_thrds,WORD32 i4_resolution_id)2348*c83a76b0SSuyog Pawar WORD32 hme_coarse_dep_mngr_init(
2349*c83a76b0SSuyog Pawar     iv_mem_rec_t *ps_mem_tab,
2350*c83a76b0SSuyog Pawar     ihevce_static_cfg_params_t *ps_init_prms,
2351*c83a76b0SSuyog Pawar     void *pv_ctxt,
2352*c83a76b0SSuyog Pawar     void *pv_osal_handle,
2353*c83a76b0SSuyog Pawar     WORD32 i4_num_proc_thrds,
2354*c83a76b0SSuyog Pawar     WORD32 i4_resolution_id)
2355*c83a76b0SSuyog Pawar {
2356*c83a76b0SSuyog Pawar     WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
2357*c83a76b0SSuyog Pawar     WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2358*c83a76b0SSuyog Pawar     WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2359*c83a76b0SSuyog Pawar     WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
2360*c83a76b0SSuyog Pawar     WORD32 min_cu_size;
2361*c83a76b0SSuyog Pawar 
2362*c83a76b0SSuyog Pawar     coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2363*c83a76b0SSuyog Pawar 
2364*c83a76b0SSuyog Pawar     /* get the min cu size from config params */
2365*c83a76b0SSuyog Pawar     min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2366*c83a76b0SSuyog Pawar 
2367*c83a76b0SSuyog Pawar     min_cu_size = 1 << min_cu_size;
2368*c83a76b0SSuyog Pawar 
2369*c83a76b0SSuyog Pawar     /* Get the width and heights of different decomp layers */
2370*c83a76b0SSuyog Pawar     *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2371*c83a76b0SSuyog Pawar             SET_CTB_ALIGN(
2372*c83a76b0SSuyog Pawar                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2373*c83a76b0SSuyog Pawar     *a_ht =
2374*c83a76b0SSuyog Pawar         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2375*c83a76b0SSuyog Pawar         SET_CTB_ALIGN(
2376*c83a76b0SSuyog Pawar             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2377*c83a76b0SSuyog Pawar 
2378*c83a76b0SSuyog Pawar     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2379*c83a76b0SSuyog Pawar     ASSERT(n_tot_layers >= 3);
2380*c83a76b0SSuyog Pawar 
2381*c83a76b0SSuyog Pawar     /* --- Get the number of vartical units in each layer for dep. mngr -- */
2382*c83a76b0SSuyog Pawar     ihevce_coarse_me_get_lyr_prms_dep_mngr(
2383*c83a76b0SSuyog Pawar         n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
2384*c83a76b0SSuyog Pawar 
2385*c83a76b0SSuyog Pawar     /* --- HME sync Dep Mngr Mem init --    */
2386*c83a76b0SSuyog Pawar     for(i = 1; i < n_tot_layers; i++)
2387*c83a76b0SSuyog Pawar     {
2388*c83a76b0SSuyog Pawar         WORD32 num_blks_in_row, num_blks_in_pic, blk_size_shift;
2389*c83a76b0SSuyog Pawar 
2390*c83a76b0SSuyog Pawar         if(i == (n_tot_layers - 1)) /* coarsest layer */
2391*c83a76b0SSuyog Pawar             blk_size_shift = 2;
2392*c83a76b0SSuyog Pawar         else
2393*c83a76b0SSuyog Pawar             blk_size_shift = 3; /* refine layers */
2394*c83a76b0SSuyog Pawar 
2395*c83a76b0SSuyog Pawar         GET_NUM_BLKS_IN_PIC(a_wd[i], a_ht[i], blk_size_shift, num_blks_in_row, num_blks_in_pic);
2396*c83a76b0SSuyog Pawar 
2397*c83a76b0SSuyog Pawar         /* Coarsest layer : 1 block extra, since the last block */
2398*c83a76b0SSuyog Pawar         if(i == (n_tot_layers - 1)) /*  in a row needs East block */
2399*c83a76b0SSuyog Pawar             num_blks_in_row += 1;
2400*c83a76b0SSuyog Pawar 
2401*c83a76b0SSuyog Pawar         /* Note : i-1, only for HME layers, L0 is separate */
2402*c83a76b0SSuyog Pawar         ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1] = ihevce_dmgr_init(
2403*c83a76b0SSuyog Pawar             &ps_mem_tab[n_dep_tabs],
2404*c83a76b0SSuyog Pawar             pv_osal_handle,
2405*c83a76b0SSuyog Pawar             DEP_MNGR_ROW_ROW_SYNC,
2406*c83a76b0SSuyog Pawar             ai4_num_vert_units_in_lyr[i],
2407*c83a76b0SSuyog Pawar             num_blks_in_row,
2408*c83a76b0SSuyog Pawar             1, /* Number of Col Tiles : Not supported in PreEnc */
2409*c83a76b0SSuyog Pawar             i4_num_proc_thrds,
2410*c83a76b0SSuyog Pawar             1 /*Sem disabled*/
2411*c83a76b0SSuyog Pawar         );
2412*c83a76b0SSuyog Pawar 
2413*c83a76b0SSuyog Pawar         n_dep_tabs += ihevce_dmgr_get_num_mem_recs();
2414*c83a76b0SSuyog Pawar     }
2415*c83a76b0SSuyog Pawar 
2416*c83a76b0SSuyog Pawar     return n_dep_tabs;
2417*c83a76b0SSuyog Pawar }
2418*c83a76b0SSuyog Pawar 
2419*c83a76b0SSuyog Pawar /**
2420*c83a76b0SSuyog Pawar ********************************************************************************
2421*c83a76b0SSuyog Pawar *  @fn     hme_coarse_dep_mngr_reg_sem()
2422*c83a76b0SSuyog Pawar *
2423*c83a76b0SSuyog Pawar *  @brief  Assign semaphores for HME Dep Mngr
2424*c83a76b0SSuyog Pawar *
2425*c83a76b0SSuyog Pawar * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
2426*c83a76b0SSuyog Pawar * \param[in] ppv_sem_hdls : Arry of semaphore handles
2427*c83a76b0SSuyog Pawar * \param[in] i4_num_proc_thrds : Number of processing threads
2428*c83a76b0SSuyog Pawar *
2429*c83a76b0SSuyog Pawar *  @return  number of memtabs
2430*c83a76b0SSuyog Pawar ********************************************************************************
2431*c83a76b0SSuyog Pawar */
hme_coarse_dep_mngr_reg_sem(void * pv_ctxt,void ** ppv_sem_hdls,WORD32 i4_num_proc_thrds)2432*c83a76b0SSuyog Pawar void hme_coarse_dep_mngr_reg_sem(void *pv_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
2433*c83a76b0SSuyog Pawar {
2434*c83a76b0SSuyog Pawar     WORD32 i;
2435*c83a76b0SSuyog Pawar     coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2436*c83a76b0SSuyog Pawar     coarse_me_ctxt_t *ps_ctxt = ps_me_ctxt->aps_me_ctxt[0];
2437*c83a76b0SSuyog Pawar 
2438*c83a76b0SSuyog Pawar     /* --- HME sync Dep Mngr semaphore init --    */
2439*c83a76b0SSuyog Pawar     for(i = 1; i < ps_ctxt->num_layers; i++)
2440*c83a76b0SSuyog Pawar     {
2441*c83a76b0SSuyog Pawar         ihevce_dmgr_reg_sem_hdls(
2442*c83a76b0SSuyog Pawar             ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1], ppv_sem_hdls, i4_num_proc_thrds);
2443*c83a76b0SSuyog Pawar     }
2444*c83a76b0SSuyog Pawar 
2445*c83a76b0SSuyog Pawar     return;
2446*c83a76b0SSuyog Pawar }
2447*c83a76b0SSuyog Pawar 
2448*c83a76b0SSuyog Pawar /**
2449*c83a76b0SSuyog Pawar ********************************************************************************
2450*c83a76b0SSuyog Pawar *  @fn     hme_coarse_dep_mngr_delete()
2451*c83a76b0SSuyog Pawar *
2452*c83a76b0SSuyog Pawar *    Destroy Coarse ME Dep Mngr module
2453*c83a76b0SSuyog Pawar *   Note : Only Destroys the resources allocated in the module like
2454*c83a76b0SSuyog Pawar *   semaphore,etc. Memory free is done Separately using memtabs
2455*c83a76b0SSuyog Pawar *
2456*c83a76b0SSuyog Pawar * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
2457*c83a76b0SSuyog Pawar * \param[in] ps_init_prms : Create time static parameters
2458*c83a76b0SSuyog Pawar *
2459*c83a76b0SSuyog Pawar *  @return  none
2460*c83a76b0SSuyog Pawar ********************************************************************************
2461*c83a76b0SSuyog Pawar */
hme_coarse_dep_mngr_delete(void * pv_me_ctxt,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_resolution_id)2462*c83a76b0SSuyog Pawar void hme_coarse_dep_mngr_delete(
2463*c83a76b0SSuyog Pawar     void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id)
2464*c83a76b0SSuyog Pawar {
2465*c83a76b0SSuyog Pawar     WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2466*c83a76b0SSuyog Pawar     WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2467*c83a76b0SSuyog Pawar     WORD32 n_enc_layers = 1, n_tot_layers, i;
2468*c83a76b0SSuyog Pawar     WORD32 min_cu_size;
2469*c83a76b0SSuyog Pawar 
2470*c83a76b0SSuyog Pawar     coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
2471*c83a76b0SSuyog Pawar 
2472*c83a76b0SSuyog Pawar     /* get the min cu size from config params */
2473*c83a76b0SSuyog Pawar     min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2474*c83a76b0SSuyog Pawar 
2475*c83a76b0SSuyog Pawar     min_cu_size = 1 << min_cu_size;
2476*c83a76b0SSuyog Pawar 
2477*c83a76b0SSuyog Pawar     /* Get the width and heights of different decomp layers */
2478*c83a76b0SSuyog Pawar     *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2479*c83a76b0SSuyog Pawar             SET_CTB_ALIGN(
2480*c83a76b0SSuyog Pawar                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2481*c83a76b0SSuyog Pawar     *a_ht =
2482*c83a76b0SSuyog Pawar         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2483*c83a76b0SSuyog Pawar         SET_CTB_ALIGN(
2484*c83a76b0SSuyog Pawar             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2485*c83a76b0SSuyog Pawar     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2486*c83a76b0SSuyog Pawar     ASSERT(n_tot_layers >= 3);
2487*c83a76b0SSuyog Pawar 
2488*c83a76b0SSuyog Pawar     /* --- HME sync Dep Mngr Delete --    */
2489*c83a76b0SSuyog Pawar     for(i = 1; i < n_tot_layers; i++)
2490*c83a76b0SSuyog Pawar     {
2491*c83a76b0SSuyog Pawar         /* Note : i-1, only for HME layers, L0 is separate */
2492*c83a76b0SSuyog Pawar         ihevce_dmgr_del(ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1]);
2493*c83a76b0SSuyog Pawar     }
2494*c83a76b0SSuyog Pawar }
2495*c83a76b0SSuyog Pawar 
2496*c83a76b0SSuyog Pawar /**
2497*c83a76b0SSuyog Pawar *******************************************************************************
2498*c83a76b0SSuyog Pawar *  @fn     S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2499*c83a76b0SSuyog Pawar *
2500*c83a76b0SSuyog Pawar *  @brief  Fills up memtabs with memory information details required by HME
2501*c83a76b0SSuyog Pawar *
2502*c83a76b0SSuyog Pawar *  @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
2503*c83a76b0SSuyog Pawar *              up its requirements of memory
2504*c83a76b0SSuyog Pawar *
2505*c83a76b0SSuyog Pawar *  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2506*c83a76b0SSuyog Pawar *                       amt of memory
2507*c83a76b0SSuyog Pawar *
2508*c83a76b0SSuyog Pawar *  @return   Number of memtabs required
2509*c83a76b0SSuyog Pawar *******************************************************************************
2510*c83a76b0SSuyog Pawar */
hme_enc_alloc(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,WORD32 i4_num_me_frm_pllel)2511*c83a76b0SSuyog Pawar S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, WORD32 i4_num_me_frm_pllel)
2512*c83a76b0SSuyog Pawar {
2513*c83a76b0SSuyog Pawar     S32 num, tot, i;
2514*c83a76b0SSuyog Pawar 
2515*c83a76b0SSuyog Pawar     /* Validation of init params */
2516*c83a76b0SSuyog Pawar     if(-1 == hme_validate_init_prms(ps_prms))
2517*c83a76b0SSuyog Pawar         return (-1);
2518*c83a76b0SSuyog Pawar 
2519*c83a76b0SSuyog Pawar     num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0, i4_num_me_frm_pllel);
2520*c83a76b0SSuyog Pawar     tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
2521*c83a76b0SSuyog Pawar     for(i = num; i < tot; i++)
2522*c83a76b0SSuyog Pawar     {
2523*c83a76b0SSuyog Pawar         ps_memtabs[i].size = 4;
2524*c83a76b0SSuyog Pawar         ps_memtabs[i].align = 4;
2525*c83a76b0SSuyog Pawar         ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
2526*c83a76b0SSuyog Pawar     }
2527*c83a76b0SSuyog Pawar     return (tot);
2528*c83a76b0SSuyog Pawar }
2529*c83a76b0SSuyog Pawar 
2530*c83a76b0SSuyog Pawar /**
2531*c83a76b0SSuyog Pawar *******************************************************************************
2532*c83a76b0SSuyog Pawar *  @fn     S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2533*c83a76b0SSuyog Pawar *
2534*c83a76b0SSuyog Pawar *  @brief  Fills up memtabs with memory information details required by Coarse HME
2535*c83a76b0SSuyog Pawar *
2536*c83a76b0SSuyog Pawar *  @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
2537*c83a76b0SSuyog Pawar *              up its requirements of memory
2538*c83a76b0SSuyog Pawar *
2539*c83a76b0SSuyog Pawar *  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2540*c83a76b0SSuyog Pawar *                       amt of memory
2541*c83a76b0SSuyog Pawar *
2542*c83a76b0SSuyog Pawar *  @return   Number of memtabs required
2543*c83a76b0SSuyog Pawar *******************************************************************************
2544*c83a76b0SSuyog Pawar */
hme_coarse_alloc(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms)2545*c83a76b0SSuyog Pawar S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2546*c83a76b0SSuyog Pawar {
2547*c83a76b0SSuyog Pawar     S32 num, tot, i;
2548*c83a76b0SSuyog Pawar 
2549*c83a76b0SSuyog Pawar     /* Validation of init params */
2550*c83a76b0SSuyog Pawar     if(-1 == hme_validate_init_prms(ps_prms))
2551*c83a76b0SSuyog Pawar         return (-1);
2552*c83a76b0SSuyog Pawar 
2553*c83a76b0SSuyog Pawar     num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0);
2554*c83a76b0SSuyog Pawar     tot = hme_coarse_num_alloc();
2555*c83a76b0SSuyog Pawar     for(i = num; i < tot; i++)
2556*c83a76b0SSuyog Pawar     {
2557*c83a76b0SSuyog Pawar         ps_memtabs[i].size = 4;
2558*c83a76b0SSuyog Pawar         ps_memtabs[i].align = 4;
2559*c83a76b0SSuyog Pawar         ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
2560*c83a76b0SSuyog Pawar     }
2561*c83a76b0SSuyog Pawar     return (tot);
2562*c83a76b0SSuyog Pawar }
2563*c83a76b0SSuyog Pawar 
2564*c83a76b0SSuyog Pawar /**
2565*c83a76b0SSuyog Pawar *******************************************************************************
2566*c83a76b0SSuyog Pawar *  @fn hme_coarse_dep_mngr_alloc
2567*c83a76b0SSuyog Pawar *
2568*c83a76b0SSuyog Pawar *  @brief  Fills up memtabs with memory information details required by Coarse HME
2569*c83a76b0SSuyog Pawar *
2570*c83a76b0SSuyog Pawar * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
2571*c83a76b0SSuyog Pawar * \param[in] ps_init_prms : Create time static parameters
2572*c83a76b0SSuyog Pawar * \param[in] i4_mem_space : memspace in whihc memory request should be done
2573*c83a76b0SSuyog Pawar *
2574*c83a76b0SSuyog Pawar *  @return   Number of memtabs required
2575*c83a76b0SSuyog Pawar *******************************************************************************
2576*c83a76b0SSuyog Pawar */
hme_coarse_dep_mngr_alloc(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_mem_space,WORD32 i4_num_proc_thrds,WORD32 i4_resolution_id)2577*c83a76b0SSuyog Pawar WORD32 hme_coarse_dep_mngr_alloc(
2578*c83a76b0SSuyog Pawar     iv_mem_rec_t *ps_mem_tab,
2579*c83a76b0SSuyog Pawar     ihevce_static_cfg_params_t *ps_init_prms,
2580*c83a76b0SSuyog Pawar     WORD32 i4_mem_space,
2581*c83a76b0SSuyog Pawar     WORD32 i4_num_proc_thrds,
2582*c83a76b0SSuyog Pawar     WORD32 i4_resolution_id)
2583*c83a76b0SSuyog Pawar {
2584*c83a76b0SSuyog Pawar     S32 num, tot, i;
2585*c83a76b0SSuyog Pawar 
2586*c83a76b0SSuyog Pawar     num = hme_coarse_dep_mngr_alloc_mem(
2587*c83a76b0SSuyog Pawar         ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id);
2588*c83a76b0SSuyog Pawar     tot = hme_coarse_dep_mngr_num_alloc();
2589*c83a76b0SSuyog Pawar     for(i = num; i < tot; i++)
2590*c83a76b0SSuyog Pawar     {
2591*c83a76b0SSuyog Pawar         ps_mem_tab[i].i4_mem_size = 4;
2592*c83a76b0SSuyog Pawar         ps_mem_tab[i].i4_mem_alignment = 4;
2593*c83a76b0SSuyog Pawar         ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
2594*c83a76b0SSuyog Pawar     }
2595*c83a76b0SSuyog Pawar     return (tot);
2596*c83a76b0SSuyog Pawar }
2597*c83a76b0SSuyog Pawar 
2598*c83a76b0SSuyog Pawar /**
2599*c83a76b0SSuyog Pawar ********************************************************************************
2600*c83a76b0SSuyog Pawar *  @fn     hme_coarse_init_ctxt()
2601*c83a76b0SSuyog Pawar *
2602*c83a76b0SSuyog Pawar *  @brief  initialise context memory
2603*c83a76b0SSuyog Pawar *
2604*c83a76b0SSuyog Pawar *  @param[in] ps_prms : init prms
2605*c83a76b0SSuyog Pawar *
2606*c83a76b0SSuyog Pawar *  @param[in] pv_ctxt : ME ctxt
2607*c83a76b0SSuyog Pawar *
2608*c83a76b0SSuyog Pawar *  @return  number of memtabs
2609*c83a76b0SSuyog Pawar ********************************************************************************
2610*c83a76b0SSuyog Pawar */
hme_coarse_init_ctxt(coarse_me_master_ctxt_t * ps_master_ctxt,hme_init_prms_t * ps_prms)2611*c83a76b0SSuyog Pawar void hme_coarse_init_ctxt(coarse_me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms)
2612*c83a76b0SSuyog Pawar {
2613*c83a76b0SSuyog Pawar     S32 i, j, num_thrds;
2614*c83a76b0SSuyog Pawar     coarse_me_ctxt_t *ps_ctxt;
2615*c83a76b0SSuyog Pawar     S32 num_rows_coarse;
2616*c83a76b0SSuyog Pawar 
2617*c83a76b0SSuyog Pawar     /* initialise the parameters inot context of all threads */
2618*c83a76b0SSuyog Pawar     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
2619*c83a76b0SSuyog Pawar     {
2620*c83a76b0SSuyog Pawar         ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2621*c83a76b0SSuyog Pawar 
2622*c83a76b0SSuyog Pawar         /* Copy the init prms to context */
2623*c83a76b0SSuyog Pawar         ps_ctxt->s_init_prms = *ps_prms;
2624*c83a76b0SSuyog Pawar 
2625*c83a76b0SSuyog Pawar         /* Initialize some other variables in ctxt */
2626*c83a76b0SSuyog Pawar         ps_ctxt->i4_prev_poc = -1;
2627*c83a76b0SSuyog Pawar 
2628*c83a76b0SSuyog Pawar         ps_ctxt->num_b_frms = ps_prms->num_b_frms;
2629*c83a76b0SSuyog Pawar 
2630*c83a76b0SSuyog Pawar         ps_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_ctxt->au1_ref_bits_tlu_lc[0][0];
2631*c83a76b0SSuyog Pawar         ps_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_ctxt->au1_ref_bits_tlu_lc[1][0];
2632*c83a76b0SSuyog Pawar 
2633*c83a76b0SSuyog Pawar         /* Initialize num rows lookuptable */
2634*c83a76b0SSuyog Pawar         ps_ctxt->i4_num_row_bufs = ps_prms->i4_num_proc_thrds + 1;
2635*c83a76b0SSuyog Pawar         num_rows_coarse = ps_ctxt->i4_num_row_bufs;
2636*c83a76b0SSuyog Pawar         for(i = 0; i < ((HEVCE_MAX_HEIGHT >> 1) >> 2); i++)
2637*c83a76b0SSuyog Pawar         {
2638*c83a76b0SSuyog Pawar             ps_ctxt->ai4_row_index[i] = (i % num_rows_coarse);
2639*c83a76b0SSuyog Pawar         }
2640*c83a76b0SSuyog Pawar     }
2641*c83a76b0SSuyog Pawar 
2642*c83a76b0SSuyog Pawar     /* since same layer desc pointer is stored in all the threads ctxt */
2643*c83a76b0SSuyog Pawar     /* layer init is done only using 0th thread ctxt                   */
2644*c83a76b0SSuyog Pawar     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
2645*c83a76b0SSuyog Pawar 
2646*c83a76b0SSuyog Pawar     /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
2647*c83a76b0SSuyog Pawar     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2648*c83a76b0SSuyog Pawar     {
2649*c83a76b0SSuyog Pawar         for(j = 1; j < ps_ctxt->num_layers; j++)
2650*c83a76b0SSuyog Pawar         {
2651*c83a76b0SSuyog Pawar             layer_ctxt_t *ps_layer;
2652*c83a76b0SSuyog Pawar             ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2653*c83a76b0SSuyog Pawar             ps_layer->i4_poc = -1;
2654*c83a76b0SSuyog Pawar             ps_layer->ppu1_list_inp = &ps_ctxt->apu1_list_inp[j][0];
2655*c83a76b0SSuyog Pawar             memset(
2656*c83a76b0SSuyog Pawar                 ps_layer->s_global_mv, 0, sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES);
2657*c83a76b0SSuyog Pawar         }
2658*c83a76b0SSuyog Pawar     }
2659*c83a76b0SSuyog Pawar }
2660*c83a76b0SSuyog Pawar 
2661*c83a76b0SSuyog Pawar /**
2662*c83a76b0SSuyog Pawar ********************************************************************************
2663*c83a76b0SSuyog Pawar *  @fn     hme_enc_init_ctxt()
2664*c83a76b0SSuyog Pawar *
2665*c83a76b0SSuyog Pawar *  @brief  initialise context memory
2666*c83a76b0SSuyog Pawar *
2667*c83a76b0SSuyog Pawar *  @param[in] ps_prms : init prms
2668*c83a76b0SSuyog Pawar *
2669*c83a76b0SSuyog Pawar *  @param[in] pv_ctxt : ME ctxt
2670*c83a76b0SSuyog Pawar *
2671*c83a76b0SSuyog Pawar *  @return  number of memtabs
2672*c83a76b0SSuyog Pawar ********************************************************************************
2673*c83a76b0SSuyog Pawar */
hme_enc_init_ctxt(me_master_ctxt_t * ps_master_ctxt,hme_init_prms_t * ps_prms,rc_quant_t * ps_rc_quant_ctxt)2674*c83a76b0SSuyog Pawar void hme_enc_init_ctxt(
2675*c83a76b0SSuyog Pawar     me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms, rc_quant_t *ps_rc_quant_ctxt)
2676*c83a76b0SSuyog Pawar {
2677*c83a76b0SSuyog Pawar     S32 i, j, num_thrds;
2678*c83a76b0SSuyog Pawar     me_ctxt_t *ps_ctxt;
2679*c83a76b0SSuyog Pawar     me_frm_ctxt_t *ps_frm_ctxt;
2680*c83a76b0SSuyog Pawar 
2681*c83a76b0SSuyog Pawar     /* initialise the parameters in context of all threads */
2682*c83a76b0SSuyog Pawar     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
2683*c83a76b0SSuyog Pawar     {
2684*c83a76b0SSuyog Pawar         ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2685*c83a76b0SSuyog Pawar         /* Store Tile params base into ME context */
2686*c83a76b0SSuyog Pawar         ps_ctxt->pv_tile_params_base = ps_master_ctxt->pv_tile_params_base;
2687*c83a76b0SSuyog Pawar 
2688*c83a76b0SSuyog Pawar         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
2689*c83a76b0SSuyog Pawar         {
2690*c83a76b0SSuyog Pawar             ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
2691*c83a76b0SSuyog Pawar 
2692*c83a76b0SSuyog Pawar             /* Copy the init prms to context */
2693*c83a76b0SSuyog Pawar             ps_ctxt->s_init_prms = *ps_prms;
2694*c83a76b0SSuyog Pawar 
2695*c83a76b0SSuyog Pawar             /* Initialize some other variables in ctxt */
2696*c83a76b0SSuyog Pawar             ps_frm_ctxt->i4_prev_poc = INVALID_POC;
2697*c83a76b0SSuyog Pawar 
2698*c83a76b0SSuyog Pawar             ps_frm_ctxt->log_ctb_size = ps_prms->log_ctb_size;
2699*c83a76b0SSuyog Pawar 
2700*c83a76b0SSuyog Pawar             ps_frm_ctxt->num_b_frms = ps_prms->num_b_frms;
2701*c83a76b0SSuyog Pawar 
2702*c83a76b0SSuyog Pawar             ps_frm_ctxt->i4_is_prev_frame_reference = 0;
2703*c83a76b0SSuyog Pawar 
2704*c83a76b0SSuyog Pawar             ps_frm_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
2705*c83a76b0SSuyog Pawar 
2706*c83a76b0SSuyog Pawar             /* Initialize mv grids for L0 and L1 used in final refinement layer */
2707*c83a76b0SSuyog Pawar             {
2708*c83a76b0SSuyog Pawar                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[0]);
2709*c83a76b0SSuyog Pawar                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[1]);
2710*c83a76b0SSuyog Pawar                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[0]);
2711*c83a76b0SSuyog Pawar                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[1]);
2712*c83a76b0SSuyog Pawar                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[0]);
2713*c83a76b0SSuyog Pawar                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[1]);
2714*c83a76b0SSuyog Pawar             }
2715*c83a76b0SSuyog Pawar 
2716*c83a76b0SSuyog Pawar             ps_frm_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[0][0];
2717*c83a76b0SSuyog Pawar             ps_frm_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[1][0];
2718*c83a76b0SSuyog Pawar         }
2719*c83a76b0SSuyog Pawar     }
2720*c83a76b0SSuyog Pawar 
2721*c83a76b0SSuyog Pawar     /* since same layer desc pointer is stored in all the threads ctxt */
2722*c83a76b0SSuyog Pawar     /* layer init is done only using 0th thread ctxt                   */
2723*c83a76b0SSuyog Pawar     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
2724*c83a76b0SSuyog Pawar 
2725*c83a76b0SSuyog Pawar     ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[0];
2726*c83a76b0SSuyog Pawar 
2727*c83a76b0SSuyog Pawar     /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
2728*c83a76b0SSuyog Pawar     for(i = 0; i < (ps_frm_ctxt->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1; i++)
2729*c83a76b0SSuyog Pawar     {
2730*c83a76b0SSuyog Pawar         /* only enocde layer is processed */
2731*c83a76b0SSuyog Pawar         for(j = 0; j < 1; j++)
2732*c83a76b0SSuyog Pawar         {
2733*c83a76b0SSuyog Pawar             layer_ctxt_t *ps_layer;
2734*c83a76b0SSuyog Pawar             ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2735*c83a76b0SSuyog Pawar             ps_layer->i4_poc = INVALID_POC;
2736*c83a76b0SSuyog Pawar             ps_layer->i4_is_free = 1;
2737*c83a76b0SSuyog Pawar             ps_layer->ppu1_list_inp = &ps_frm_ctxt->apu1_list_inp[j][0];
2738*c83a76b0SSuyog Pawar             ps_layer->ppu1_list_rec_fxfy = &ps_frm_ctxt->apu1_list_rec_fxfy[j][0];
2739*c83a76b0SSuyog Pawar             ps_layer->ppu1_list_rec_hxfy = &ps_frm_ctxt->apu1_list_rec_hxfy[j][0];
2740*c83a76b0SSuyog Pawar             ps_layer->ppu1_list_rec_fxhy = &ps_frm_ctxt->apu1_list_rec_fxhy[j][0];
2741*c83a76b0SSuyog Pawar             ps_layer->ppu1_list_rec_hxhy = &ps_frm_ctxt->apu1_list_rec_hxhy[j][0];
2742*c83a76b0SSuyog Pawar             ps_layer->ppv_dep_mngr_recon = &ps_frm_ctxt->apv_list_dep_mngr[j][0];
2743*c83a76b0SSuyog Pawar 
2744*c83a76b0SSuyog Pawar             memset(
2745*c83a76b0SSuyog Pawar                 ps_layer->s_global_mv,
2746*c83a76b0SSuyog Pawar                 0,
2747*c83a76b0SSuyog Pawar                 sizeof(hme_mv_t) * ps_frm_ctxt->max_num_ref * NUM_GMV_LOBES);
2748*c83a76b0SSuyog Pawar         }
2749*c83a76b0SSuyog Pawar     }
2750*c83a76b0SSuyog Pawar }
2751*c83a76b0SSuyog Pawar 
2752*c83a76b0SSuyog Pawar /**
2753*c83a76b0SSuyog Pawar *******************************************************************************
2754*c83a76b0SSuyog Pawar *  @fn     S32 hme_enc_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms,rc_quant_t *ps_rc_quant_ctxt)
2755*c83a76b0SSuyog Pawar *
2756*c83a76b0SSuyog Pawar *  @brief  Initialises the Encode Layer HME ctxt
2757*c83a76b0SSuyog Pawar *
2758*c83a76b0SSuyog Pawar *  @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
2759*c83a76b0SSuyog Pawar *              up its requirements of memory
2760*c83a76b0SSuyog Pawar *
2761*c83a76b0SSuyog Pawar *  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2762*c83a76b0SSuyog Pawar *                       amt of memory
2763*c83a76b0SSuyog Pawar *
2764*c83a76b0SSuyog Pawar *  @return   Number of memtabs required
2765*c83a76b0SSuyog Pawar *******************************************************************************
2766*c83a76b0SSuyog Pawar */
hme_enc_init(void * pv_ctxt,hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,rc_quant_t * ps_rc_quant_ctxt,WORD32 i4_num_me_frm_pllel)2767*c83a76b0SSuyog Pawar S32 hme_enc_init(
2768*c83a76b0SSuyog Pawar     void *pv_ctxt,
2769*c83a76b0SSuyog Pawar     hme_memtab_t *ps_memtabs,
2770*c83a76b0SSuyog Pawar     hme_init_prms_t *ps_prms,
2771*c83a76b0SSuyog Pawar     rc_quant_t *ps_rc_quant_ctxt,
2772*c83a76b0SSuyog Pawar     WORD32 i4_num_me_frm_pllel)
2773*c83a76b0SSuyog Pawar {
2774*c83a76b0SSuyog Pawar     S32 num, tot;
2775*c83a76b0SSuyog Pawar     me_master_ctxt_t *ps_ctxt = (me_master_ctxt_t *)pv_ctxt;
2776*c83a76b0SSuyog Pawar 
2777*c83a76b0SSuyog Pawar     tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
2778*c83a76b0SSuyog Pawar     /* Validation of init params */
2779*c83a76b0SSuyog Pawar     if(-1 == hme_validate_init_prms(ps_prms))
2780*c83a76b0SSuyog Pawar         return (-1);
2781*c83a76b0SSuyog Pawar 
2782*c83a76b0SSuyog Pawar     num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1, i4_num_me_frm_pllel);
2783*c83a76b0SSuyog Pawar     if(num > tot)
2784*c83a76b0SSuyog Pawar         return (-1);
2785*c83a76b0SSuyog Pawar 
2786*c83a76b0SSuyog Pawar     /* Initialize all enumerations based globals */
2787*c83a76b0SSuyog Pawar     //hme_init_globals(); /* done as part of coarse me */
2788*c83a76b0SSuyog Pawar 
2789*c83a76b0SSuyog Pawar     /* Copy the memtabs into the context for returning during free */
2790*c83a76b0SSuyog Pawar     memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
2791*c83a76b0SSuyog Pawar 
2792*c83a76b0SSuyog Pawar     /* initialize the context and related buffers */
2793*c83a76b0SSuyog Pawar     hme_enc_init_ctxt(ps_ctxt, ps_prms, ps_rc_quant_ctxt);
2794*c83a76b0SSuyog Pawar     return (0);
2795*c83a76b0SSuyog Pawar }
2796*c83a76b0SSuyog Pawar 
2797*c83a76b0SSuyog Pawar /**
2798*c83a76b0SSuyog Pawar *******************************************************************************
2799*c83a76b0SSuyog Pawar *  @fn     S32 hme_coarse_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2800*c83a76b0SSuyog Pawar *
2801*c83a76b0SSuyog Pawar *  @brief  Initialises the Coarse HME ctxt
2802*c83a76b0SSuyog Pawar *
2803*c83a76b0SSuyog Pawar *  @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
2804*c83a76b0SSuyog Pawar *              up its requirements of memory
2805*c83a76b0SSuyog Pawar *
2806*c83a76b0SSuyog Pawar *  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2807*c83a76b0SSuyog Pawar *                       amt of memory
2808*c83a76b0SSuyog Pawar *
2809*c83a76b0SSuyog Pawar *  @return   Number of memtabs required
2810*c83a76b0SSuyog Pawar *******************************************************************************
2811*c83a76b0SSuyog Pawar */
hme_coarse_init(void * pv_ctxt,hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms)2812*c83a76b0SSuyog Pawar S32 hme_coarse_init(void *pv_ctxt, hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2813*c83a76b0SSuyog Pawar {
2814*c83a76b0SSuyog Pawar     S32 num, tot;
2815*c83a76b0SSuyog Pawar     coarse_me_master_ctxt_t *ps_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2816*c83a76b0SSuyog Pawar 
2817*c83a76b0SSuyog Pawar     tot = hme_coarse_num_alloc();
2818*c83a76b0SSuyog Pawar     /* Validation of init params */
2819*c83a76b0SSuyog Pawar     if(-1 == hme_validate_init_prms(ps_prms))
2820*c83a76b0SSuyog Pawar         return (-1);
2821*c83a76b0SSuyog Pawar 
2822*c83a76b0SSuyog Pawar     num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1);
2823*c83a76b0SSuyog Pawar     if(num > tot)
2824*c83a76b0SSuyog Pawar         return (-1);
2825*c83a76b0SSuyog Pawar 
2826*c83a76b0SSuyog Pawar     /* Initialize all enumerations based globals */
2827*c83a76b0SSuyog Pawar     hme_init_globals();
2828*c83a76b0SSuyog Pawar 
2829*c83a76b0SSuyog Pawar     /* Copy the memtabs into the context for returning during free */
2830*c83a76b0SSuyog Pawar     memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
2831*c83a76b0SSuyog Pawar 
2832*c83a76b0SSuyog Pawar     /* initialize the context and related buffers */
2833*c83a76b0SSuyog Pawar     hme_coarse_init_ctxt(ps_ctxt, ps_prms);
2834*c83a76b0SSuyog Pawar 
2835*c83a76b0SSuyog Pawar     return (0);
2836*c83a76b0SSuyog Pawar }
2837*c83a76b0SSuyog Pawar 
2838*c83a76b0SSuyog Pawar /**
2839*c83a76b0SSuyog Pawar *******************************************************************************
2840*c83a76b0SSuyog Pawar *  @fn     S32 hme_set_resolution(void *pv_me_ctxt,
2841*c83a76b0SSuyog Pawar *                                   S32 n_enc_layers,
2842*c83a76b0SSuyog Pawar *                                   S32 *p_wd,
2843*c83a76b0SSuyog Pawar *                                   S32 *p_ht
2844*c83a76b0SSuyog Pawar *
2845*c83a76b0SSuyog Pawar *  @brief  Sets up the layers based on resolution information.
2846*c83a76b0SSuyog Pawar *
2847*c83a76b0SSuyog Pawar *  @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
2848*c83a76b0SSuyog Pawar *
2849*c83a76b0SSuyog Pawar *  @param[in] n_enc_layers : Number of layers encoded
2850*c83a76b0SSuyog Pawar *
2851*c83a76b0SSuyog Pawar *  @param[in] p_wd : Pointer to an array having widths for each encode layer
2852*c83a76b0SSuyog Pawar *
2853*c83a76b0SSuyog Pawar *  @param[in] p_ht : Pointer to an array having heights for each encode layer
2854*c83a76b0SSuyog Pawar *
2855*c83a76b0SSuyog Pawar *  @return   void
2856*c83a76b0SSuyog Pawar *******************************************************************************
2857*c83a76b0SSuyog Pawar */
2858*c83a76b0SSuyog Pawar 
hme_set_resolution(void * pv_me_ctxt,S32 n_enc_layers,S32 * p_wd,S32 * p_ht,S32 me_frm_id)2859*c83a76b0SSuyog Pawar void hme_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 me_frm_id)
2860*c83a76b0SSuyog Pawar {
2861*c83a76b0SSuyog Pawar     S32 n_tot_layers, num_layers_explicit_search, i, j;
2862*c83a76b0SSuyog Pawar     me_ctxt_t *ps_thrd_ctxt;
2863*c83a76b0SSuyog Pawar     me_frm_ctxt_t *ps_ctxt;
2864*c83a76b0SSuyog Pawar 
2865*c83a76b0SSuyog Pawar     S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
2866*c83a76b0SSuyog Pawar     S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
2867*c83a76b0SSuyog Pawar     memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
2868*c83a76b0SSuyog Pawar     memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
2869*c83a76b0SSuyog Pawar 
2870*c83a76b0SSuyog Pawar     ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
2871*c83a76b0SSuyog Pawar 
2872*c83a76b0SSuyog Pawar     ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
2873*c83a76b0SSuyog Pawar 
2874*c83a76b0SSuyog Pawar     /*************************************************************************/
2875*c83a76b0SSuyog Pawar     /* Derive the number of HME layers, including both encoded and non encode*/
2876*c83a76b0SSuyog Pawar     /* This function also derives the width and ht of each layer.            */
2877*c83a76b0SSuyog Pawar     /*************************************************************************/
2878*c83a76b0SSuyog Pawar     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2879*c83a76b0SSuyog Pawar     num_layers_explicit_search = ps_thrd_ctxt->s_init_prms.num_layers_explicit_search;
2880*c83a76b0SSuyog Pawar     if(num_layers_explicit_search <= 0)
2881*c83a76b0SSuyog Pawar         num_layers_explicit_search = n_tot_layers - 1;
2882*c83a76b0SSuyog Pawar 
2883*c83a76b0SSuyog Pawar     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
2884*c83a76b0SSuyog Pawar     ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
2885*c83a76b0SSuyog Pawar     memset(ps_ctxt->u1_encode, 0, n_tot_layers);
2886*c83a76b0SSuyog Pawar     memset(ps_ctxt->u1_encode, 1, n_enc_layers);
2887*c83a76b0SSuyog Pawar 
2888*c83a76b0SSuyog Pawar     /* only encode layer should be processed */
2889*c83a76b0SSuyog Pawar     ps_ctxt->num_layers = n_tot_layers;
2890*c83a76b0SSuyog Pawar 
2891*c83a76b0SSuyog Pawar     ps_ctxt->i4_wd = a_wd[0];
2892*c83a76b0SSuyog Pawar     ps_ctxt->i4_ht = a_ht[0];
2893*c83a76b0SSuyog Pawar 
2894*c83a76b0SSuyog Pawar     /* Memtabs : Layers * num-ref + 1 */
2895*c83a76b0SSuyog Pawar     for(i = 0; i < ps_ctxt->max_num_ref + 1; i++)
2896*c83a76b0SSuyog Pawar     {
2897*c83a76b0SSuyog Pawar         for(j = 0; j < 1; j++)
2898*c83a76b0SSuyog Pawar         {
2899*c83a76b0SSuyog Pawar             S32 wd, ht;
2900*c83a76b0SSuyog Pawar             layer_ctxt_t *ps_layer;
2901*c83a76b0SSuyog Pawar             U08 u1_enc = ps_ctxt->u1_encode[j];
2902*c83a76b0SSuyog Pawar             wd = a_wd[j];
2903*c83a76b0SSuyog Pawar             ht = a_ht[j];
2904*c83a76b0SSuyog Pawar             ps_layer = ps_thrd_ctxt->as_ref_descr[i].aps_layers[j];
2905*c83a76b0SSuyog Pawar             hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
2906*c83a76b0SSuyog Pawar         }
2907*c83a76b0SSuyog Pawar     }
2908*c83a76b0SSuyog Pawar }
2909*c83a76b0SSuyog Pawar 
2910*c83a76b0SSuyog Pawar /**
2911*c83a76b0SSuyog Pawar *******************************************************************************
2912*c83a76b0SSuyog Pawar *  @fn     S32 hme_coarse_set_resolution(void *pv_me_ctxt,
2913*c83a76b0SSuyog Pawar *                                   S32 n_enc_layers,
2914*c83a76b0SSuyog Pawar *                                   S32 *p_wd,
2915*c83a76b0SSuyog Pawar *                                   S32 *p_ht
2916*c83a76b0SSuyog Pawar *
2917*c83a76b0SSuyog Pawar *  @brief  Sets up the layers based on resolution information.
2918*c83a76b0SSuyog Pawar *
2919*c83a76b0SSuyog Pawar *  @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
2920*c83a76b0SSuyog Pawar *
2921*c83a76b0SSuyog Pawar *  @param[in] n_enc_layers : Number of layers encoded
2922*c83a76b0SSuyog Pawar *
2923*c83a76b0SSuyog Pawar *  @param[in] p_wd : Pointer to an array having widths for each encode layer
2924*c83a76b0SSuyog Pawar *
2925*c83a76b0SSuyog Pawar *  @param[in] p_ht : Pointer to an array having heights for each encode layer
2926*c83a76b0SSuyog Pawar *
2927*c83a76b0SSuyog Pawar *  @return   void
2928*c83a76b0SSuyog Pawar *******************************************************************************
2929*c83a76b0SSuyog Pawar */
2930*c83a76b0SSuyog Pawar 
hme_coarse_set_resolution(void * pv_me_ctxt,S32 n_enc_layers,S32 * p_wd,S32 * p_ht)2931*c83a76b0SSuyog Pawar void hme_coarse_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht)
2932*c83a76b0SSuyog Pawar {
2933*c83a76b0SSuyog Pawar     S32 n_tot_layers, num_layers_explicit_search, i, j;
2934*c83a76b0SSuyog Pawar     coarse_me_ctxt_t *ps_ctxt;
2935*c83a76b0SSuyog Pawar     S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
2936*c83a76b0SSuyog Pawar     S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
2937*c83a76b0SSuyog Pawar     memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
2938*c83a76b0SSuyog Pawar     memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
2939*c83a76b0SSuyog Pawar 
2940*c83a76b0SSuyog Pawar     ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
2941*c83a76b0SSuyog Pawar     /*************************************************************************/
2942*c83a76b0SSuyog Pawar     /* Derive the number of HME layers, including both encoded and non encode*/
2943*c83a76b0SSuyog Pawar     /* This function also derives the width and ht of each layer.            */
2944*c83a76b0SSuyog Pawar     /*************************************************************************/
2945*c83a76b0SSuyog Pawar     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2946*c83a76b0SSuyog Pawar     num_layers_explicit_search = ps_ctxt->s_init_prms.num_layers_explicit_search;
2947*c83a76b0SSuyog Pawar     if(num_layers_explicit_search <= 0)
2948*c83a76b0SSuyog Pawar         num_layers_explicit_search = n_tot_layers - 1;
2949*c83a76b0SSuyog Pawar 
2950*c83a76b0SSuyog Pawar     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
2951*c83a76b0SSuyog Pawar     ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
2952*c83a76b0SSuyog Pawar     memset(ps_ctxt->u1_encode, 0, n_tot_layers);
2953*c83a76b0SSuyog Pawar     memset(ps_ctxt->u1_encode, 1, n_enc_layers);
2954*c83a76b0SSuyog Pawar 
2955*c83a76b0SSuyog Pawar     /* encode layer should be excluded */
2956*c83a76b0SSuyog Pawar     ps_ctxt->num_layers = n_tot_layers;
2957*c83a76b0SSuyog Pawar 
2958*c83a76b0SSuyog Pawar     memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
2959*c83a76b0SSuyog Pawar     memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
2960*c83a76b0SSuyog Pawar 
2961*c83a76b0SSuyog Pawar     /* Memtabs : Layers * num-ref + 1 */
2962*c83a76b0SSuyog Pawar     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2963*c83a76b0SSuyog Pawar     {
2964*c83a76b0SSuyog Pawar         for(j = 1; j < n_tot_layers; j++)
2965*c83a76b0SSuyog Pawar         {
2966*c83a76b0SSuyog Pawar             S32 wd, ht;
2967*c83a76b0SSuyog Pawar             layer_ctxt_t *ps_layer;
2968*c83a76b0SSuyog Pawar             U08 u1_enc = ps_ctxt->u1_encode[j];
2969*c83a76b0SSuyog Pawar             wd = a_wd[j];
2970*c83a76b0SSuyog Pawar             ht = a_ht[j];
2971*c83a76b0SSuyog Pawar             ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2972*c83a76b0SSuyog Pawar             hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
2973*c83a76b0SSuyog Pawar         }
2974*c83a76b0SSuyog Pawar     }
2975*c83a76b0SSuyog Pawar }
2976*c83a76b0SSuyog Pawar 
hme_find_descr_idx(me_ctxt_t * ps_ctxt,S32 i4_poc,S32 i4_idr_gop_num,S32 i4_num_me_frm_pllel)2977*c83a76b0SSuyog Pawar S32 hme_find_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_poc, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
2978*c83a76b0SSuyog Pawar {
2979*c83a76b0SSuyog Pawar     S32 i;
2980*c83a76b0SSuyog Pawar 
2981*c83a76b0SSuyog Pawar     for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
2982*c83a76b0SSuyog Pawar     {
2983*c83a76b0SSuyog Pawar         if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc == i4_poc &&
2984*c83a76b0SSuyog Pawar            ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_idr_gop_num)
2985*c83a76b0SSuyog Pawar             return i;
2986*c83a76b0SSuyog Pawar     }
2987*c83a76b0SSuyog Pawar     /* Should not come here */
2988*c83a76b0SSuyog Pawar     ASSERT(0);
2989*c83a76b0SSuyog Pawar     return (-1);
2990*c83a76b0SSuyog Pawar }
2991*c83a76b0SSuyog Pawar 
hme_coarse_find_descr_idx(coarse_me_ctxt_t * ps_ctxt,S32 i4_poc)2992*c83a76b0SSuyog Pawar S32 hme_coarse_find_descr_idx(coarse_me_ctxt_t *ps_ctxt, S32 i4_poc)
2993*c83a76b0SSuyog Pawar {
2994*c83a76b0SSuyog Pawar     S32 i;
2995*c83a76b0SSuyog Pawar 
2996*c83a76b0SSuyog Pawar     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2997*c83a76b0SSuyog Pawar     {
2998*c83a76b0SSuyog Pawar         if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == i4_poc)
2999*c83a76b0SSuyog Pawar             return i;
3000*c83a76b0SSuyog Pawar     }
3001*c83a76b0SSuyog Pawar     /* Should not come here */
3002*c83a76b0SSuyog Pawar     ASSERT(0);
3003*c83a76b0SSuyog Pawar     return (-1);
3004*c83a76b0SSuyog Pawar }
3005*c83a76b0SSuyog Pawar 
hme_find_free_descr_idx(me_ctxt_t * ps_ctxt,S32 i4_num_me_frm_pllel)3006*c83a76b0SSuyog Pawar S32 hme_find_free_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_num_me_frm_pllel)
3007*c83a76b0SSuyog Pawar {
3008*c83a76b0SSuyog Pawar     S32 i;
3009*c83a76b0SSuyog Pawar 
3010*c83a76b0SSuyog Pawar     for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
3011*c83a76b0SSuyog Pawar     {
3012*c83a76b0SSuyog Pawar         if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free == 1)
3013*c83a76b0SSuyog Pawar         {
3014*c83a76b0SSuyog Pawar             ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free = 0;
3015*c83a76b0SSuyog Pawar             return i;
3016*c83a76b0SSuyog Pawar         }
3017*c83a76b0SSuyog Pawar     }
3018*c83a76b0SSuyog Pawar     /* Should not come here */
3019*c83a76b0SSuyog Pawar     ASSERT(0);
3020*c83a76b0SSuyog Pawar     return (-1);
3021*c83a76b0SSuyog Pawar }
3022*c83a76b0SSuyog Pawar 
hme_coarse_find_free_descr_idx(void * pv_ctxt)3023*c83a76b0SSuyog Pawar S32 hme_coarse_find_free_descr_idx(void *pv_ctxt)
3024*c83a76b0SSuyog Pawar {
3025*c83a76b0SSuyog Pawar     S32 i;
3026*c83a76b0SSuyog Pawar 
3027*c83a76b0SSuyog Pawar     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_ctxt;
3028*c83a76b0SSuyog Pawar 
3029*c83a76b0SSuyog Pawar     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
3030*c83a76b0SSuyog Pawar     {
3031*c83a76b0SSuyog Pawar         if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == -1)
3032*c83a76b0SSuyog Pawar             return i;
3033*c83a76b0SSuyog Pawar     }
3034*c83a76b0SSuyog Pawar     /* Should not come here */
3035*c83a76b0SSuyog Pawar     ASSERT(0);
3036*c83a76b0SSuyog Pawar     return (-1);
3037*c83a76b0SSuyog Pawar }
3038*c83a76b0SSuyog Pawar 
hme_discard_frm(void * pv_me_ctxt,S32 * p_pocs_to_remove,S32 i4_idr_gop_num,S32 i4_num_me_frm_pllel)3039*c83a76b0SSuyog Pawar void hme_discard_frm(
3040*c83a76b0SSuyog Pawar     void *pv_me_ctxt, S32 *p_pocs_to_remove, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
3041*c83a76b0SSuyog Pawar {
3042*c83a76b0SSuyog Pawar     me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
3043*c83a76b0SSuyog Pawar     S32 count = 0, idx, i;
3044*c83a76b0SSuyog Pawar     layers_descr_t *ps_descr;
3045*c83a76b0SSuyog Pawar 
3046*c83a76b0SSuyog Pawar     /* Search for the id of the layer descriptor that has this poc */
3047*c83a76b0SSuyog Pawar     while(p_pocs_to_remove[count] != INVALID_POC)
3048*c83a76b0SSuyog Pawar     {
3049*c83a76b0SSuyog Pawar         ASSERT(count == 0);
3050*c83a76b0SSuyog Pawar         idx = hme_find_descr_idx(
3051*c83a76b0SSuyog Pawar             ps_ctxt, p_pocs_to_remove[count], i4_idr_gop_num, i4_num_me_frm_pllel);
3052*c83a76b0SSuyog Pawar         ps_descr = &ps_ctxt->as_ref_descr[idx];
3053*c83a76b0SSuyog Pawar         /*********************************************************************/
3054*c83a76b0SSuyog Pawar         /* Setting i4_is_free = 1 in all layers invalidates this layer ctxt        */
3055*c83a76b0SSuyog Pawar         /* Now this can be used for a fresh picture.                         */
3056*c83a76b0SSuyog Pawar         /*********************************************************************/
3057*c83a76b0SSuyog Pawar         for(i = 0; i < 1; i++)
3058*c83a76b0SSuyog Pawar         {
3059*c83a76b0SSuyog Pawar             ps_descr->aps_layers[i]->i4_is_free = 1;
3060*c83a76b0SSuyog Pawar         }
3061*c83a76b0SSuyog Pawar         count++;
3062*c83a76b0SSuyog Pawar     }
3063*c83a76b0SSuyog Pawar }
3064*c83a76b0SSuyog Pawar 
hme_coarse_discard_frm(void * pv_me_ctxt,S32 * p_pocs_to_remove)3065*c83a76b0SSuyog Pawar void hme_coarse_discard_frm(void *pv_me_ctxt, S32 *p_pocs_to_remove)
3066*c83a76b0SSuyog Pawar {
3067*c83a76b0SSuyog Pawar     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
3068*c83a76b0SSuyog Pawar     S32 count = 0, idx, i;
3069*c83a76b0SSuyog Pawar     layers_descr_t *ps_descr;
3070*c83a76b0SSuyog Pawar 
3071*c83a76b0SSuyog Pawar     /* Search for the id of the layer descriptor that has this poc */
3072*c83a76b0SSuyog Pawar     while(p_pocs_to_remove[count] != -1)
3073*c83a76b0SSuyog Pawar     {
3074*c83a76b0SSuyog Pawar         idx = hme_coarse_find_descr_idx(ps_ctxt, p_pocs_to_remove[count]);
3075*c83a76b0SSuyog Pawar         ps_descr = &ps_ctxt->as_ref_descr[idx];
3076*c83a76b0SSuyog Pawar         /*********************************************************************/
3077*c83a76b0SSuyog Pawar         /* Setting poc = -1 in all layers invalidates this layer ctxt        */
3078*c83a76b0SSuyog Pawar         /* Now this can be used for a fresh picture.                         */
3079*c83a76b0SSuyog Pawar         /*********************************************************************/
3080*c83a76b0SSuyog Pawar         for(i = 1; i < ps_ctxt->num_layers; i++)
3081*c83a76b0SSuyog Pawar         {
3082*c83a76b0SSuyog Pawar             ps_descr->aps_layers[i]->i4_poc = -1;
3083*c83a76b0SSuyog Pawar         }
3084*c83a76b0SSuyog Pawar         count++;
3085*c83a76b0SSuyog Pawar     }
3086*c83a76b0SSuyog Pawar }
3087*c83a76b0SSuyog Pawar 
hme_update_layer_desc(layers_descr_t * ps_layers_desc,hme_ref_desc_t * ps_ref_desc,S32 start_lyr_id,S32 num_layers,layers_descr_t * ps_curr_desc)3088*c83a76b0SSuyog Pawar void hme_update_layer_desc(
3089*c83a76b0SSuyog Pawar     layers_descr_t *ps_layers_desc,
3090*c83a76b0SSuyog Pawar     hme_ref_desc_t *ps_ref_desc,
3091*c83a76b0SSuyog Pawar     S32 start_lyr_id,
3092*c83a76b0SSuyog Pawar     S32 num_layers,
3093*c83a76b0SSuyog Pawar     layers_descr_t *ps_curr_desc)
3094*c83a76b0SSuyog Pawar {
3095*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_layer_ctxt, *ps_curr_layer;
3096*c83a76b0SSuyog Pawar     S32 i;
3097*c83a76b0SSuyog Pawar     for(i = start_lyr_id; i < num_layers; i++)
3098*c83a76b0SSuyog Pawar     {
3099*c83a76b0SSuyog Pawar         ps_layer_ctxt = ps_layers_desc->aps_layers[i];
3100*c83a76b0SSuyog Pawar         ps_curr_layer = ps_curr_desc->aps_layers[i];
3101*c83a76b0SSuyog Pawar 
3102*c83a76b0SSuyog Pawar         ps_layer_ctxt->i4_poc = ps_ref_desc->i4_poc;
3103*c83a76b0SSuyog Pawar         ps_layer_ctxt->i4_idr_gop_num = ps_ref_desc->i4_GOP_num;
3104*c83a76b0SSuyog Pawar 
3105*c83a76b0SSuyog Pawar         /* Copy the recon planes for the given reference pic at given layer */
3106*c83a76b0SSuyog Pawar         ps_layer_ctxt->pu1_rec_fxfy = ps_ref_desc->as_ref_info[i].pu1_rec_fxfy;
3107*c83a76b0SSuyog Pawar         ps_layer_ctxt->pu1_rec_hxfy = ps_ref_desc->as_ref_info[i].pu1_rec_hxfy;
3108*c83a76b0SSuyog Pawar         ps_layer_ctxt->pu1_rec_fxhy = ps_ref_desc->as_ref_info[i].pu1_rec_fxhy;
3109*c83a76b0SSuyog Pawar         ps_layer_ctxt->pu1_rec_hxhy = ps_ref_desc->as_ref_info[i].pu1_rec_hxhy;
3110*c83a76b0SSuyog Pawar 
3111*c83a76b0SSuyog Pawar         /*********************************************************************/
3112*c83a76b0SSuyog Pawar         /* reconstruction strides, offsets and padding info are copied for   */
3113*c83a76b0SSuyog Pawar         /* this reference pic. It is assumed that these will be same across  */
3114*c83a76b0SSuyog Pawar         /* pics, so even the current pic has this info updated, though the   */
3115*c83a76b0SSuyog Pawar         /* current pic still does not have valid recon pointers.             */
3116*c83a76b0SSuyog Pawar         /*********************************************************************/
3117*c83a76b0SSuyog Pawar         ps_layer_ctxt->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
3118*c83a76b0SSuyog Pawar         ps_layer_ctxt->i4_rec_offset = ps_ref_desc->as_ref_info[i].luma_offset;
3119*c83a76b0SSuyog Pawar         ps_layer_ctxt->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
3120*c83a76b0SSuyog Pawar         ps_layer_ctxt->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
3121*c83a76b0SSuyog Pawar 
3122*c83a76b0SSuyog Pawar         ps_curr_layer->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
3123*c83a76b0SSuyog Pawar         ps_curr_layer->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
3124*c83a76b0SSuyog Pawar         ps_curr_layer->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
3125*c83a76b0SSuyog Pawar     }
3126*c83a76b0SSuyog Pawar }
3127*c83a76b0SSuyog Pawar 
hme_add_inp(void * pv_me_ctxt,hme_inp_desc_t * ps_inp_desc,S32 me_frm_id,S32 i4_thrd_id)3128*c83a76b0SSuyog Pawar void hme_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, S32 me_frm_id, S32 i4_thrd_id)
3129*c83a76b0SSuyog Pawar {
3130*c83a76b0SSuyog Pawar     layers_descr_t *ps_desc;
3131*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_layer_ctxt;
3132*c83a76b0SSuyog Pawar     me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
3133*c83a76b0SSuyog Pawar     me_ctxt_t *ps_thrd_ctxt;
3134*c83a76b0SSuyog Pawar     me_frm_ctxt_t *ps_ctxt;
3135*c83a76b0SSuyog Pawar 
3136*c83a76b0SSuyog Pawar     hme_inp_buf_attr_t *ps_attr;
3137*c83a76b0SSuyog Pawar     S32 i4_poc, idx, i, i4_prev_poc;
3138*c83a76b0SSuyog Pawar     S32 num_thrds, prev_me_frm_id;
3139*c83a76b0SSuyog Pawar     S32 i4_idr_gop_num, i4_is_reference;
3140*c83a76b0SSuyog Pawar 
3141*c83a76b0SSuyog Pawar     /* since same layer desc pointer is stored in all thread ctxt */
3142*c83a76b0SSuyog Pawar     /* a free idx is obtained using 0th thread ctxt pointer */
3143*c83a76b0SSuyog Pawar 
3144*c83a76b0SSuyog Pawar     ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
3145*c83a76b0SSuyog Pawar 
3146*c83a76b0SSuyog Pawar     ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
3147*c83a76b0SSuyog Pawar 
3148*c83a76b0SSuyog Pawar     /* Deriving the previous poc from previous frames context */
3149*c83a76b0SSuyog Pawar     if(me_frm_id == 0)
3150*c83a76b0SSuyog Pawar         prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1);
3151*c83a76b0SSuyog Pawar     else
3152*c83a76b0SSuyog Pawar         prev_me_frm_id = me_frm_id - 1;
3153*c83a76b0SSuyog Pawar 
3154*c83a76b0SSuyog Pawar     i4_prev_poc = ps_thrd_ctxt->aps_me_frm_prms[prev_me_frm_id]->i4_curr_poc;
3155*c83a76b0SSuyog Pawar 
3156*c83a76b0SSuyog Pawar     /* Obtain an empty layer descriptor */
3157*c83a76b0SSuyog Pawar     idx = hme_find_free_descr_idx(ps_thrd_ctxt, ps_master_ctxt->i4_num_me_frm_pllel);
3158*c83a76b0SSuyog Pawar     ps_desc = &ps_thrd_ctxt->as_ref_descr[idx];
3159*c83a76b0SSuyog Pawar 
3160*c83a76b0SSuyog Pawar     /* initialise the parameters for all the threads */
3161*c83a76b0SSuyog Pawar     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
3162*c83a76b0SSuyog Pawar     {
3163*c83a76b0SSuyog Pawar         me_frm_ctxt_t *ps_tmp_frm_ctxt;
3164*c83a76b0SSuyog Pawar 
3165*c83a76b0SSuyog Pawar         ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
3166*c83a76b0SSuyog Pawar         ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
3167*c83a76b0SSuyog Pawar 
3168*c83a76b0SSuyog Pawar         ps_tmp_frm_ctxt->ps_curr_descr = &ps_thrd_ctxt->as_ref_descr[idx];
3169*c83a76b0SSuyog Pawar 
3170*c83a76b0SSuyog Pawar         /* Do the initialization for the first thread alone */
3171*c83a76b0SSuyog Pawar         i4_poc = ps_inp_desc->i4_poc;
3172*c83a76b0SSuyog Pawar         i4_idr_gop_num = ps_inp_desc->i4_idr_gop_num;
3173*c83a76b0SSuyog Pawar         i4_is_reference = ps_inp_desc->i4_is_reference;
3174*c83a76b0SSuyog Pawar         /*Update poc id of previously encoded frm and curr frm */
3175*c83a76b0SSuyog Pawar         ps_tmp_frm_ctxt->i4_prev_poc = i4_prev_poc;
3176*c83a76b0SSuyog Pawar         ps_tmp_frm_ctxt->i4_curr_poc = i4_poc;
3177*c83a76b0SSuyog Pawar     }
3178*c83a76b0SSuyog Pawar 
3179*c83a76b0SSuyog Pawar     /* since same layer desc pointer is stored in all thread ctxt */
3180*c83a76b0SSuyog Pawar     /* following processing is done using 0th thread ctxt pointer */
3181*c83a76b0SSuyog Pawar     ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3182*c83a76b0SSuyog Pawar 
3183*c83a76b0SSuyog Pawar     /* only encode layer */
3184*c83a76b0SSuyog Pawar     for(i = 0; i < 1; i++)
3185*c83a76b0SSuyog Pawar     {
3186*c83a76b0SSuyog Pawar         ps_layer_ctxt = ps_desc->aps_layers[i];
3187*c83a76b0SSuyog Pawar         ps_attr = &ps_inp_desc->s_layer_desc[i];
3188*c83a76b0SSuyog Pawar 
3189*c83a76b0SSuyog Pawar         ps_layer_ctxt->i4_poc = i4_poc;
3190*c83a76b0SSuyog Pawar         ps_layer_ctxt->i4_idr_gop_num = i4_idr_gop_num;
3191*c83a76b0SSuyog Pawar         ps_layer_ctxt->i4_is_reference = i4_is_reference;
3192*c83a76b0SSuyog Pawar         ps_layer_ctxt->i4_non_ref_free = 0;
3193*c83a76b0SSuyog Pawar 
3194*c83a76b0SSuyog Pawar         /* If this layer is encoded, copy input attributes */
3195*c83a76b0SSuyog Pawar         if(ps_ctxt->u1_encode[i])
3196*c83a76b0SSuyog Pawar         {
3197*c83a76b0SSuyog Pawar             ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
3198*c83a76b0SSuyog Pawar             ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
3199*c83a76b0SSuyog Pawar             ps_layer_ctxt->i4_pad_x_inp = 0;
3200*c83a76b0SSuyog Pawar             ps_layer_ctxt->i4_pad_y_inp = 0;
3201*c83a76b0SSuyog Pawar         }
3202*c83a76b0SSuyog Pawar         else
3203*c83a76b0SSuyog Pawar         {
3204*c83a76b0SSuyog Pawar             /* If not encoded, then ME owns the buffer.*/
3205*c83a76b0SSuyog Pawar             S32 wd, dst_stride;
3206*c83a76b0SSuyog Pawar 
3207*c83a76b0SSuyog Pawar             ASSERT(i != 0);
3208*c83a76b0SSuyog Pawar 
3209*c83a76b0SSuyog Pawar             wd = ps_ctxt->i4_wd;
3210*c83a76b0SSuyog Pawar 
3211*c83a76b0SSuyog Pawar             /* destination has padding on either side of 16 */
3212*c83a76b0SSuyog Pawar             dst_stride = CEIL16((wd >> 1)) + 32 + 4;
3213*c83a76b0SSuyog Pawar             ps_layer_ctxt->i4_inp_stride = dst_stride;
3214*c83a76b0SSuyog Pawar         }
3215*c83a76b0SSuyog Pawar     }
3216*c83a76b0SSuyog Pawar 
3217*c83a76b0SSuyog Pawar     return;
3218*c83a76b0SSuyog Pawar }
3219*c83a76b0SSuyog Pawar 
hme_coarse_add_inp(void * pv_me_ctxt,hme_inp_desc_t * ps_inp_desc,WORD32 i4_curr_idx)3220*c83a76b0SSuyog Pawar void hme_coarse_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, WORD32 i4_curr_idx)
3221*c83a76b0SSuyog Pawar {
3222*c83a76b0SSuyog Pawar     layers_descr_t *ps_desc;
3223*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_layer_ctxt;
3224*c83a76b0SSuyog Pawar     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
3225*c83a76b0SSuyog Pawar     coarse_me_ctxt_t *ps_ctxt;
3226*c83a76b0SSuyog Pawar     hme_inp_buf_attr_t *ps_attr;
3227*c83a76b0SSuyog Pawar     S32 i4_poc, i;
3228*c83a76b0SSuyog Pawar     S32 num_thrds;
3229*c83a76b0SSuyog Pawar 
3230*c83a76b0SSuyog Pawar     /* since same layer desc pointer is stored in all thread ctxt */
3231*c83a76b0SSuyog Pawar     /* a free idx is obtained using 0th thread ctxt pointer */
3232*c83a76b0SSuyog Pawar     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3233*c83a76b0SSuyog Pawar 
3234*c83a76b0SSuyog Pawar     ps_desc = &ps_ctxt->as_ref_descr[i4_curr_idx];
3235*c83a76b0SSuyog Pawar 
3236*c83a76b0SSuyog Pawar     /* initialise the parameters for all the threads */
3237*c83a76b0SSuyog Pawar     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
3238*c83a76b0SSuyog Pawar     {
3239*c83a76b0SSuyog Pawar         ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
3240*c83a76b0SSuyog Pawar         ps_ctxt->ps_curr_descr = &ps_ctxt->as_ref_descr[i4_curr_idx];
3241*c83a76b0SSuyog Pawar         i4_poc = ps_inp_desc->i4_poc;
3242*c83a76b0SSuyog Pawar 
3243*c83a76b0SSuyog Pawar         /*Update poc id of previously encoded frm and curr frm */
3244*c83a76b0SSuyog Pawar         ps_ctxt->i4_prev_poc = ps_ctxt->i4_curr_poc;
3245*c83a76b0SSuyog Pawar         ps_ctxt->i4_curr_poc = i4_poc;
3246*c83a76b0SSuyog Pawar     }
3247*c83a76b0SSuyog Pawar 
3248*c83a76b0SSuyog Pawar     /* since same layer desc pointer is stored in all thread ctxt */
3249*c83a76b0SSuyog Pawar     /* following processing is done using 0th thread ctxt pointer */
3250*c83a76b0SSuyog Pawar     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3251*c83a76b0SSuyog Pawar 
3252*c83a76b0SSuyog Pawar     /* only non encode layer */
3253*c83a76b0SSuyog Pawar     for(i = 1; i < ps_ctxt->num_layers; i++)
3254*c83a76b0SSuyog Pawar     {
3255*c83a76b0SSuyog Pawar         ps_layer_ctxt = ps_desc->aps_layers[i];
3256*c83a76b0SSuyog Pawar         ps_attr = &ps_inp_desc->s_layer_desc[i];
3257*c83a76b0SSuyog Pawar 
3258*c83a76b0SSuyog Pawar         ps_layer_ctxt->i4_poc = i4_poc;
3259*c83a76b0SSuyog Pawar         /* If this layer is encoded, copy input attributes */
3260*c83a76b0SSuyog Pawar         if(ps_ctxt->u1_encode[i])
3261*c83a76b0SSuyog Pawar         {
3262*c83a76b0SSuyog Pawar             ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
3263*c83a76b0SSuyog Pawar             ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
3264*c83a76b0SSuyog Pawar             ps_layer_ctxt->i4_pad_x_inp = 0;
3265*c83a76b0SSuyog Pawar             ps_layer_ctxt->i4_pad_y_inp = 0;
3266*c83a76b0SSuyog Pawar         }
3267*c83a76b0SSuyog Pawar         else
3268*c83a76b0SSuyog Pawar         {
3269*c83a76b0SSuyog Pawar             /* If not encoded, then ME owns the buffer.           */
3270*c83a76b0SSuyog Pawar             /* decomp of lower layers happens on a seperate pass  */
3271*c83a76b0SSuyog Pawar             /* Coarse Me should export the pointers to the caller */
3272*c83a76b0SSuyog Pawar             S32 wd, dst_stride;
3273*c83a76b0SSuyog Pawar 
3274*c83a76b0SSuyog Pawar             ASSERT(i != 0);
3275*c83a76b0SSuyog Pawar 
3276*c83a76b0SSuyog Pawar             wd = ps_ctxt->a_wd[i - 1];
3277*c83a76b0SSuyog Pawar 
3278*c83a76b0SSuyog Pawar             /* destination has padding on either side of 16 */
3279*c83a76b0SSuyog Pawar             dst_stride = CEIL16((wd >> 1)) + 32 + 4;
3280*c83a76b0SSuyog Pawar             ps_layer_ctxt->i4_inp_stride = dst_stride;
3281*c83a76b0SSuyog Pawar         }
3282*c83a76b0SSuyog Pawar     }
3283*c83a76b0SSuyog Pawar }
3284*c83a76b0SSuyog Pawar 
hme_determine_num_results_per_part(U08 u1_layer_id,U08 u1_num_layers,ME_QUALITY_PRESETS_T e_quality_preset)3285*c83a76b0SSuyog Pawar static __inline U08 hme_determine_num_results_per_part(
3286*c83a76b0SSuyog Pawar     U08 u1_layer_id, U08 u1_num_layers, ME_QUALITY_PRESETS_T e_quality_preset)
3287*c83a76b0SSuyog Pawar {
3288*c83a76b0SSuyog Pawar     U08 u1_num_results_per_part = MAX_RESULTS_PER_PART;
3289*c83a76b0SSuyog Pawar 
3290*c83a76b0SSuyog Pawar     if((u1_layer_id == 0) && !!RESTRICT_NUM_PARTITION_LEVEL_L0ME_RESULTS_TO_1)
3291*c83a76b0SSuyog Pawar     {
3292*c83a76b0SSuyog Pawar         switch(e_quality_preset)
3293*c83a76b0SSuyog Pawar         {
3294*c83a76b0SSuyog Pawar         case ME_XTREME_SPEED_25:
3295*c83a76b0SSuyog Pawar         case ME_XTREME_SPEED:
3296*c83a76b0SSuyog Pawar         case ME_HIGH_SPEED:
3297*c83a76b0SSuyog Pawar         case ME_MEDIUM_SPEED:
3298*c83a76b0SSuyog Pawar         case ME_HIGH_QUALITY:
3299*c83a76b0SSuyog Pawar         case ME_PRISTINE_QUALITY:
3300*c83a76b0SSuyog Pawar         {
3301*c83a76b0SSuyog Pawar             u1_num_results_per_part = 1;
3302*c83a76b0SSuyog Pawar 
3303*c83a76b0SSuyog Pawar             break;
3304*c83a76b0SSuyog Pawar         }
3305*c83a76b0SSuyog Pawar         default:
3306*c83a76b0SSuyog Pawar         {
3307*c83a76b0SSuyog Pawar             u1_num_results_per_part = MAX_RESULTS_PER_PART;
3308*c83a76b0SSuyog Pawar 
3309*c83a76b0SSuyog Pawar             break;
3310*c83a76b0SSuyog Pawar         }
3311*c83a76b0SSuyog Pawar         }
3312*c83a76b0SSuyog Pawar     }
3313*c83a76b0SSuyog Pawar     else if((u1_layer_id == 1) && !!RESTRICT_NUM_PARTITION_LEVEL_L1ME_RESULTS_TO_1)
3314*c83a76b0SSuyog Pawar     {
3315*c83a76b0SSuyog Pawar         switch(e_quality_preset)
3316*c83a76b0SSuyog Pawar         {
3317*c83a76b0SSuyog Pawar         case ME_XTREME_SPEED_25:
3318*c83a76b0SSuyog Pawar         case ME_HIGH_QUALITY:
3319*c83a76b0SSuyog Pawar         case ME_PRISTINE_QUALITY:
3320*c83a76b0SSuyog Pawar         {
3321*c83a76b0SSuyog Pawar             u1_num_results_per_part = 1;
3322*c83a76b0SSuyog Pawar 
3323*c83a76b0SSuyog Pawar             break;
3324*c83a76b0SSuyog Pawar         }
3325*c83a76b0SSuyog Pawar         default:
3326*c83a76b0SSuyog Pawar         {
3327*c83a76b0SSuyog Pawar             u1_num_results_per_part = MAX_RESULTS_PER_PART;
3328*c83a76b0SSuyog Pawar 
3329*c83a76b0SSuyog Pawar             break;
3330*c83a76b0SSuyog Pawar         }
3331*c83a76b0SSuyog Pawar         }
3332*c83a76b0SSuyog Pawar     }
3333*c83a76b0SSuyog Pawar     else if((u1_layer_id == 2) && (u1_num_layers > 3) && !!RESTRICT_NUM_PARTITION_LEVEL_L2ME_RESULTS_TO_1)
3334*c83a76b0SSuyog Pawar     {
3335*c83a76b0SSuyog Pawar         switch(e_quality_preset)
3336*c83a76b0SSuyog Pawar         {
3337*c83a76b0SSuyog Pawar         case ME_XTREME_SPEED_25:
3338*c83a76b0SSuyog Pawar         case ME_XTREME_SPEED:
3339*c83a76b0SSuyog Pawar         case ME_HIGH_SPEED:
3340*c83a76b0SSuyog Pawar         case ME_MEDIUM_SPEED:
3341*c83a76b0SSuyog Pawar         {
3342*c83a76b0SSuyog Pawar             u1_num_results_per_part = 1;
3343*c83a76b0SSuyog Pawar 
3344*c83a76b0SSuyog Pawar             break;
3345*c83a76b0SSuyog Pawar         }
3346*c83a76b0SSuyog Pawar         default:
3347*c83a76b0SSuyog Pawar         {
3348*c83a76b0SSuyog Pawar             u1_num_results_per_part = MAX_RESULTS_PER_PART;
3349*c83a76b0SSuyog Pawar 
3350*c83a76b0SSuyog Pawar             break;
3351*c83a76b0SSuyog Pawar         }
3352*c83a76b0SSuyog Pawar         }
3353*c83a76b0SSuyog Pawar     }
3354*c83a76b0SSuyog Pawar 
3355*c83a76b0SSuyog Pawar     return u1_num_results_per_part;
3356*c83a76b0SSuyog Pawar }
3357*c83a76b0SSuyog Pawar 
hme_max_search_cands_per_search_cand_loc_populator(hme_frm_prms_t * ps_frm_prms,U08 * pu1_num_fpel_search_cands,U08 u1_layer_id,ME_QUALITY_PRESETS_T e_quality_preset)3358*c83a76b0SSuyog Pawar static __inline void hme_max_search_cands_per_search_cand_loc_populator(
3359*c83a76b0SSuyog Pawar     hme_frm_prms_t *ps_frm_prms,
3360*c83a76b0SSuyog Pawar     U08 *pu1_num_fpel_search_cands,
3361*c83a76b0SSuyog Pawar     U08 u1_layer_id,
3362*c83a76b0SSuyog Pawar     ME_QUALITY_PRESETS_T e_quality_preset)
3363*c83a76b0SSuyog Pawar {
3364*c83a76b0SSuyog Pawar     if(0 == u1_layer_id)
3365*c83a76b0SSuyog Pawar     {
3366*c83a76b0SSuyog Pawar         S32 i;
3367*c83a76b0SSuyog Pawar 
3368*c83a76b0SSuyog Pawar         for(i = 0; i < NUM_SEARCH_CAND_LOCATIONS; i++)
3369*c83a76b0SSuyog Pawar         {
3370*c83a76b0SSuyog Pawar             switch(e_quality_preset)
3371*c83a76b0SSuyog Pawar             {
3372*c83a76b0SSuyog Pawar #if RESTRICT_NUM_SEARCH_CANDS_PER_SEARCH_CAND_LOC
3373*c83a76b0SSuyog Pawar             case ME_XTREME_SPEED_25:
3374*c83a76b0SSuyog Pawar             case ME_XTREME_SPEED:
3375*c83a76b0SSuyog Pawar             case ME_HIGH_SPEED:
3376*c83a76b0SSuyog Pawar             case ME_MEDIUM_SPEED:
3377*c83a76b0SSuyog Pawar             {
3378*c83a76b0SSuyog Pawar                 pu1_num_fpel_search_cands[i] = 1;
3379*c83a76b0SSuyog Pawar 
3380*c83a76b0SSuyog Pawar                 break;
3381*c83a76b0SSuyog Pawar             }
3382*c83a76b0SSuyog Pawar #endif
3383*c83a76b0SSuyog Pawar             default:
3384*c83a76b0SSuyog Pawar             {
3385*c83a76b0SSuyog Pawar                 pu1_num_fpel_search_cands[i] =
3386*c83a76b0SSuyog Pawar                     MAX(2,
3387*c83a76b0SSuyog Pawar                         MAX(ps_frm_prms->u1_num_active_ref_l0, ps_frm_prms->u1_num_active_ref_l1) *
3388*c83a76b0SSuyog Pawar                             ((COLOCATED == (SEARCH_CAND_LOCATIONS_T)i) + 1));
3389*c83a76b0SSuyog Pawar 
3390*c83a76b0SSuyog Pawar                 break;
3391*c83a76b0SSuyog Pawar             }
3392*c83a76b0SSuyog Pawar             }
3393*c83a76b0SSuyog Pawar         }
3394*c83a76b0SSuyog Pawar     }
3395*c83a76b0SSuyog Pawar }
3396*c83a76b0SSuyog Pawar 
3397*c83a76b0SSuyog Pawar static __inline U08
hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id,ME_QUALITY_PRESETS_T e_quality_preset)3398*c83a76b0SSuyog Pawar     hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
3399*c83a76b0SSuyog Pawar {
3400*c83a76b0SSuyog Pawar     U08 u1_num_cands = 2;
3401*c83a76b0SSuyog Pawar 
3402*c83a76b0SSuyog Pawar     if((u1_layer_id == 0) && !!RESTRICT_NUM_2NX2N_TU_RECUR_CANDS)
3403*c83a76b0SSuyog Pawar     {
3404*c83a76b0SSuyog Pawar         switch(e_quality_preset)
3405*c83a76b0SSuyog Pawar         {
3406*c83a76b0SSuyog Pawar         case ME_XTREME_SPEED_25:
3407*c83a76b0SSuyog Pawar         case ME_XTREME_SPEED:
3408*c83a76b0SSuyog Pawar         case ME_HIGH_SPEED:
3409*c83a76b0SSuyog Pawar         case ME_MEDIUM_SPEED:
3410*c83a76b0SSuyog Pawar         {
3411*c83a76b0SSuyog Pawar             u1_num_cands = 1;
3412*c83a76b0SSuyog Pawar 
3413*c83a76b0SSuyog Pawar             break;
3414*c83a76b0SSuyog Pawar         }
3415*c83a76b0SSuyog Pawar         default:
3416*c83a76b0SSuyog Pawar         {
3417*c83a76b0SSuyog Pawar             u1_num_cands = 2;
3418*c83a76b0SSuyog Pawar 
3419*c83a76b0SSuyog Pawar             break;
3420*c83a76b0SSuyog Pawar         }
3421*c83a76b0SSuyog Pawar         }
3422*c83a76b0SSuyog Pawar     }
3423*c83a76b0SSuyog Pawar 
3424*c83a76b0SSuyog Pawar     return u1_num_cands;
3425*c83a76b0SSuyog Pawar }
3426*c83a76b0SSuyog Pawar 
3427*c83a76b0SSuyog Pawar static __inline U08
hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id,ME_QUALITY_PRESETS_T e_quality_preset)3428*c83a76b0SSuyog Pawar     hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
3429*c83a76b0SSuyog Pawar {
3430*c83a76b0SSuyog Pawar     U08 i;
3431*c83a76b0SSuyog Pawar 
3432*c83a76b0SSuyog Pawar     U08 u1_num_centers = 0;
3433*c83a76b0SSuyog Pawar 
3434*c83a76b0SSuyog Pawar     if(0 == u1_layer_id)
3435*c83a76b0SSuyog Pawar     {
3436*c83a76b0SSuyog Pawar         switch(e_quality_preset)
3437*c83a76b0SSuyog Pawar         {
3438*c83a76b0SSuyog Pawar         case ME_XTREME_SPEED_25:
3439*c83a76b0SSuyog Pawar         {
3440*c83a76b0SSuyog Pawar             for(i = 0; i < TOT_NUM_PARTS; i++)
3441*c83a76b0SSuyog Pawar             {
3442*c83a76b0SSuyog Pawar                 u1_num_centers += gau1_num_best_results_XS25[i];
3443*c83a76b0SSuyog Pawar             }
3444*c83a76b0SSuyog Pawar 
3445*c83a76b0SSuyog Pawar             break;
3446*c83a76b0SSuyog Pawar         }
3447*c83a76b0SSuyog Pawar         case ME_XTREME_SPEED:
3448*c83a76b0SSuyog Pawar         {
3449*c83a76b0SSuyog Pawar             for(i = 0; i < TOT_NUM_PARTS; i++)
3450*c83a76b0SSuyog Pawar             {
3451*c83a76b0SSuyog Pawar                 u1_num_centers += gau1_num_best_results_XS[i];
3452*c83a76b0SSuyog Pawar             }
3453*c83a76b0SSuyog Pawar 
3454*c83a76b0SSuyog Pawar             break;
3455*c83a76b0SSuyog Pawar         }
3456*c83a76b0SSuyog Pawar         case ME_HIGH_SPEED:
3457*c83a76b0SSuyog Pawar         {
3458*c83a76b0SSuyog Pawar             for(i = 0; i < TOT_NUM_PARTS; i++)
3459*c83a76b0SSuyog Pawar             {
3460*c83a76b0SSuyog Pawar                 u1_num_centers += gau1_num_best_results_HS[i];
3461*c83a76b0SSuyog Pawar             }
3462*c83a76b0SSuyog Pawar 
3463*c83a76b0SSuyog Pawar             break;
3464*c83a76b0SSuyog Pawar         }
3465*c83a76b0SSuyog Pawar         case ME_MEDIUM_SPEED:
3466*c83a76b0SSuyog Pawar         {
3467*c83a76b0SSuyog Pawar             for(i = 0; i < TOT_NUM_PARTS; i++)
3468*c83a76b0SSuyog Pawar             {
3469*c83a76b0SSuyog Pawar                 u1_num_centers += gau1_num_best_results_MS[i];
3470*c83a76b0SSuyog Pawar             }
3471*c83a76b0SSuyog Pawar 
3472*c83a76b0SSuyog Pawar             break;
3473*c83a76b0SSuyog Pawar         }
3474*c83a76b0SSuyog Pawar         case ME_HIGH_QUALITY:
3475*c83a76b0SSuyog Pawar         {
3476*c83a76b0SSuyog Pawar             for(i = 0; i < TOT_NUM_PARTS; i++)
3477*c83a76b0SSuyog Pawar             {
3478*c83a76b0SSuyog Pawar                 u1_num_centers += gau1_num_best_results_HQ[i];
3479*c83a76b0SSuyog Pawar             }
3480*c83a76b0SSuyog Pawar 
3481*c83a76b0SSuyog Pawar             break;
3482*c83a76b0SSuyog Pawar         }
3483*c83a76b0SSuyog Pawar         case ME_PRISTINE_QUALITY:
3484*c83a76b0SSuyog Pawar         {
3485*c83a76b0SSuyog Pawar             for(i = 0; i < TOT_NUM_PARTS; i++)
3486*c83a76b0SSuyog Pawar             {
3487*c83a76b0SSuyog Pawar                 u1_num_centers += gau1_num_best_results_PQ[i];
3488*c83a76b0SSuyog Pawar             }
3489*c83a76b0SSuyog Pawar 
3490*c83a76b0SSuyog Pawar             break;
3491*c83a76b0SSuyog Pawar         }
3492*c83a76b0SSuyog Pawar         }
3493*c83a76b0SSuyog Pawar     }
3494*c83a76b0SSuyog Pawar 
3495*c83a76b0SSuyog Pawar     return u1_num_centers;
3496*c83a76b0SSuyog Pawar }
3497*c83a76b0SSuyog Pawar 
hme_determine_max_num_subpel_refine_centers(U08 u1_layer_id,U08 u1_max_2Nx2N_subpel_cands,U08 u1_max_NxN_subpel_cands)3498*c83a76b0SSuyog Pawar static __inline U08 hme_determine_max_num_subpel_refine_centers(
3499*c83a76b0SSuyog Pawar     U08 u1_layer_id, U08 u1_max_2Nx2N_subpel_cands, U08 u1_max_NxN_subpel_cands)
3500*c83a76b0SSuyog Pawar {
3501*c83a76b0SSuyog Pawar     U08 u1_num_centers = 0;
3502*c83a76b0SSuyog Pawar 
3503*c83a76b0SSuyog Pawar     if(0 == u1_layer_id)
3504*c83a76b0SSuyog Pawar     {
3505*c83a76b0SSuyog Pawar         u1_num_centers += u1_max_2Nx2N_subpel_cands + 4 * u1_max_NxN_subpel_cands;
3506*c83a76b0SSuyog Pawar     }
3507*c83a76b0SSuyog Pawar 
3508*c83a76b0SSuyog Pawar     return u1_num_centers;
3509*c83a76b0SSuyog Pawar }
3510*c83a76b0SSuyog Pawar 
hme_set_refine_prms(void * pv_refine_prms,U08 u1_encode,S32 num_ref,S32 layer_id,S32 num_layers,S32 num_layers_explicit_search,S32 use_4x4,hme_frm_prms_t * ps_frm_prms,double ** ppd_intra_costs,me_coding_params_t * ps_me_coding_tools)3511*c83a76b0SSuyog Pawar void hme_set_refine_prms(
3512*c83a76b0SSuyog Pawar     void *pv_refine_prms,
3513*c83a76b0SSuyog Pawar     U08 u1_encode,
3514*c83a76b0SSuyog Pawar     S32 num_ref,
3515*c83a76b0SSuyog Pawar     S32 layer_id,
3516*c83a76b0SSuyog Pawar     S32 num_layers,
3517*c83a76b0SSuyog Pawar     S32 num_layers_explicit_search,
3518*c83a76b0SSuyog Pawar     S32 use_4x4,
3519*c83a76b0SSuyog Pawar     hme_frm_prms_t *ps_frm_prms,
3520*c83a76b0SSuyog Pawar     double **ppd_intra_costs,
3521*c83a76b0SSuyog Pawar     me_coding_params_t *ps_me_coding_tools)
3522*c83a76b0SSuyog Pawar {
3523*c83a76b0SSuyog Pawar     refine_prms_t *ps_refine_prms = (refine_prms_t *)pv_refine_prms;
3524*c83a76b0SSuyog Pawar 
3525*c83a76b0SSuyog Pawar     ps_refine_prms->i4_encode = u1_encode;
3526*c83a76b0SSuyog Pawar     ps_refine_prms->bidir_enabled = ps_frm_prms->bidir_enabled;
3527*c83a76b0SSuyog Pawar     ps_refine_prms->i4_layer_id = layer_id;
3528*c83a76b0SSuyog Pawar     /*************************************************************************/
3529*c83a76b0SSuyog Pawar     /* Refinement layers have two lambdas, one for closed loop, another for  */
3530*c83a76b0SSuyog Pawar     /* open loop. Non encode layers use only open loop lambda.               */
3531*c83a76b0SSuyog Pawar     /*************************************************************************/
3532*c83a76b0SSuyog Pawar     ps_refine_prms->lambda_inp = ps_frm_prms->i4_ol_sad_lambda_qf;
3533*c83a76b0SSuyog Pawar     ps_refine_prms->lambda_recon = ps_frm_prms->i4_cl_sad_lambda_qf;
3534*c83a76b0SSuyog Pawar     ps_refine_prms->lambda_q_shift = ps_frm_prms->lambda_q_shift;
3535*c83a76b0SSuyog Pawar     ps_refine_prms->lambda_inp =
3536*c83a76b0SSuyog Pawar         ((float)ps_refine_prms->lambda_inp) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
3537*c83a76b0SSuyog Pawar     ps_refine_prms->lambda_recon =
3538*c83a76b0SSuyog Pawar         ((float)ps_refine_prms->lambda_recon) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
3539*c83a76b0SSuyog Pawar 
3540*c83a76b0SSuyog Pawar     if((u1_encode) && (NULL != ppd_intra_costs))
3541*c83a76b0SSuyog Pawar     {
3542*c83a76b0SSuyog Pawar         ps_refine_prms->pd_intra_costs = ppd_intra_costs[layer_id];
3543*c83a76b0SSuyog Pawar     }
3544*c83a76b0SSuyog Pawar 
3545*c83a76b0SSuyog Pawar     /* Explicit or implicit depends on number of layers having eplicit search */
3546*c83a76b0SSuyog Pawar     if((layer_id == 0) || (num_layers - layer_id > num_layers_explicit_search))
3547*c83a76b0SSuyog Pawar     {
3548*c83a76b0SSuyog Pawar         ps_refine_prms->explicit_ref = 0;
3549*c83a76b0SSuyog Pawar         ps_refine_prms->i4_num_ref_fpel = MIN(2, num_ref);
3550*c83a76b0SSuyog Pawar     }
3551*c83a76b0SSuyog Pawar     else
3552*c83a76b0SSuyog Pawar     {
3553*c83a76b0SSuyog Pawar         ps_refine_prms->explicit_ref = 1;
3554*c83a76b0SSuyog Pawar         ps_refine_prms->i4_num_ref_fpel = num_ref;
3555*c83a76b0SSuyog Pawar     }
3556*c83a76b0SSuyog Pawar 
3557*c83a76b0SSuyog Pawar     ps_refine_prms->e_search_complexity = SEARCH_CX_HIGH;
3558*c83a76b0SSuyog Pawar 
3559*c83a76b0SSuyog Pawar     ps_refine_prms->i4_num_steps_hpel_refine = ps_me_coding_tools->i4_num_steps_hpel_refine;
3560*c83a76b0SSuyog Pawar     ps_refine_prms->i4_num_steps_qpel_refine = ps_me_coding_tools->i4_num_steps_qpel_refine;
3561*c83a76b0SSuyog Pawar 
3562*c83a76b0SSuyog Pawar     if(u1_encode)
3563*c83a76b0SSuyog Pawar     {
3564*c83a76b0SSuyog Pawar         ps_refine_prms->i4_num_mvbank_results = 1;
3565*c83a76b0SSuyog Pawar         ps_refine_prms->i4_use_rec_in_fpel = 1;
3566*c83a76b0SSuyog Pawar         ps_refine_prms->i4_num_steps_fpel_refine = 1;
3567*c83a76b0SSuyog Pawar 
3568*c83a76b0SSuyog Pawar         if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
3569*c83a76b0SSuyog Pawar         {
3570*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_fpel_results = 4;
3571*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_32x32_merge_results = 4;
3572*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_64x64_merge_results = 4;
3573*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
3574*c83a76b0SSuyog Pawar             ps_refine_prms->i4_use_satd_subpel = 1;
3575*c83a76b0SSuyog Pawar             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3576*c83a76b0SSuyog Pawar             ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3577*c83a76b0SSuyog Pawar             ps_refine_prms->u1_subpel_candt_threshold = 1;
3578*c83a76b0SSuyog Pawar             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3579*c83a76b0SSuyog Pawar             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
3580*c83a76b0SSuyog Pawar             ps_refine_prms->limit_active_partitions = 0;
3581*c83a76b0SSuyog Pawar         }
3582*c83a76b0SSuyog Pawar         else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
3583*c83a76b0SSuyog Pawar         {
3584*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_fpel_results = 4;
3585*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_32x32_merge_results = 4;
3586*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_64x64_merge_results = 4;
3587*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
3588*c83a76b0SSuyog Pawar             ps_refine_prms->i4_use_satd_subpel = 1;
3589*c83a76b0SSuyog Pawar             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3590*c83a76b0SSuyog Pawar             ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3591*c83a76b0SSuyog Pawar             ps_refine_prms->u1_subpel_candt_threshold = 2;
3592*c83a76b0SSuyog Pawar             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3593*c83a76b0SSuyog Pawar             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
3594*c83a76b0SSuyog Pawar             ps_refine_prms->limit_active_partitions = 0;
3595*c83a76b0SSuyog Pawar         }
3596*c83a76b0SSuyog Pawar         else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
3597*c83a76b0SSuyog Pawar         {
3598*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_fpel_results = 1;
3599*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_32x32_merge_results = 2;
3600*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_64x64_merge_results = 2;
3601*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3602*c83a76b0SSuyog Pawar             ps_refine_prms->i4_use_satd_subpel = 1;
3603*c83a76b0SSuyog Pawar             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3604*c83a76b0SSuyog Pawar             ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3605*c83a76b0SSuyog Pawar             ps_refine_prms->u1_subpel_candt_threshold = 3;
3606*c83a76b0SSuyog Pawar             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3607*c83a76b0SSuyog Pawar             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
3608*c83a76b0SSuyog Pawar             ps_refine_prms->limit_active_partitions = 1;
3609*c83a76b0SSuyog Pawar         }
3610*c83a76b0SSuyog Pawar         else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
3611*c83a76b0SSuyog Pawar         {
3612*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_fpel_results = 1;
3613*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_32x32_merge_results = 2;
3614*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_64x64_merge_results = 2;
3615*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3616*c83a76b0SSuyog Pawar             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3617*c83a76b0SSuyog Pawar             ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3618*c83a76b0SSuyog Pawar             ps_refine_prms->i4_use_satd_subpel = 0;
3619*c83a76b0SSuyog Pawar             ps_refine_prms->u1_subpel_candt_threshold = 0;
3620*c83a76b0SSuyog Pawar             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3621*c83a76b0SSuyog Pawar             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
3622*c83a76b0SSuyog Pawar             ps_refine_prms->limit_active_partitions = 1;
3623*c83a76b0SSuyog Pawar         }
3624*c83a76b0SSuyog Pawar         else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
3625*c83a76b0SSuyog Pawar         {
3626*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_fpel_results = 1;
3627*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_32x32_merge_results = 2;
3628*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_64x64_merge_results = 2;
3629*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3630*c83a76b0SSuyog Pawar             ps_refine_prms->i4_use_satd_subpel = 0;
3631*c83a76b0SSuyog Pawar             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3632*c83a76b0SSuyog Pawar             ps_refine_prms->u1_max_subpel_candts_NxN = 0;
3633*c83a76b0SSuyog Pawar             ps_refine_prms->u1_subpel_candt_threshold = 0;
3634*c83a76b0SSuyog Pawar             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3635*c83a76b0SSuyog Pawar             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
3636*c83a76b0SSuyog Pawar             ps_refine_prms->limit_active_partitions = 1;
3637*c83a76b0SSuyog Pawar         }
3638*c83a76b0SSuyog Pawar         else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
3639*c83a76b0SSuyog Pawar         {
3640*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_fpel_results = 1;
3641*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_32x32_merge_results = 2;
3642*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_64x64_merge_results = 2;
3643*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3644*c83a76b0SSuyog Pawar             ps_refine_prms->i4_use_satd_subpel = 0;
3645*c83a76b0SSuyog Pawar             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3646*c83a76b0SSuyog Pawar             ps_refine_prms->u1_max_subpel_candts_NxN = 0;
3647*c83a76b0SSuyog Pawar             ps_refine_prms->u1_subpel_candt_threshold = 0;
3648*c83a76b0SSuyog Pawar             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3649*c83a76b0SSuyog Pawar             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
3650*c83a76b0SSuyog Pawar             ps_refine_prms->limit_active_partitions = 1;
3651*c83a76b0SSuyog Pawar         }
3652*c83a76b0SSuyog Pawar     }
3653*c83a76b0SSuyog Pawar     else
3654*c83a76b0SSuyog Pawar     {
3655*c83a76b0SSuyog Pawar         ps_refine_prms->i4_num_fpel_results = 2;
3656*c83a76b0SSuyog Pawar         ps_refine_prms->i4_use_rec_in_fpel = 0;
3657*c83a76b0SSuyog Pawar         ps_refine_prms->i4_num_steps_fpel_refine = 1;
3658*c83a76b0SSuyog Pawar         ps_refine_prms->i4_num_steps_hpel_refine = 0;
3659*c83a76b0SSuyog Pawar         ps_refine_prms->i4_num_steps_qpel_refine = 0;
3660*c83a76b0SSuyog Pawar 
3661*c83a76b0SSuyog Pawar         if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
3662*c83a76b0SSuyog Pawar         {
3663*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3664*c83a76b0SSuyog Pawar             ps_refine_prms->i4_use_satd_subpel = 1;
3665*c83a76b0SSuyog Pawar             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3666*c83a76b0SSuyog Pawar             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
3667*c83a76b0SSuyog Pawar         }
3668*c83a76b0SSuyog Pawar         else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
3669*c83a76b0SSuyog Pawar         {
3670*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3671*c83a76b0SSuyog Pawar             ps_refine_prms->i4_use_satd_subpel = 0;
3672*c83a76b0SSuyog Pawar             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3673*c83a76b0SSuyog Pawar             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
3674*c83a76b0SSuyog Pawar         }
3675*c83a76b0SSuyog Pawar         else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
3676*c83a76b0SSuyog Pawar         {
3677*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3678*c83a76b0SSuyog Pawar             ps_refine_prms->i4_use_satd_subpel = 0;
3679*c83a76b0SSuyog Pawar             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3680*c83a76b0SSuyog Pawar             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
3681*c83a76b0SSuyog Pawar         }
3682*c83a76b0SSuyog Pawar         else if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
3683*c83a76b0SSuyog Pawar         {
3684*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
3685*c83a76b0SSuyog Pawar             ps_refine_prms->i4_use_satd_subpel = 1;
3686*c83a76b0SSuyog Pawar             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3687*c83a76b0SSuyog Pawar             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
3688*c83a76b0SSuyog Pawar         }
3689*c83a76b0SSuyog Pawar         else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
3690*c83a76b0SSuyog Pawar         {
3691*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
3692*c83a76b0SSuyog Pawar             ps_refine_prms->i4_use_satd_subpel = 1;
3693*c83a76b0SSuyog Pawar             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3694*c83a76b0SSuyog Pawar             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
3695*c83a76b0SSuyog Pawar         }
3696*c83a76b0SSuyog Pawar         else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
3697*c83a76b0SSuyog Pawar         {
3698*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3699*c83a76b0SSuyog Pawar             ps_refine_prms->i4_use_satd_subpel = 1;
3700*c83a76b0SSuyog Pawar             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3701*c83a76b0SSuyog Pawar             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
3702*c83a76b0SSuyog Pawar         }
3703*c83a76b0SSuyog Pawar 
3704*c83a76b0SSuyog Pawar         /* Following fields unused in the non-encode layers */
3705*c83a76b0SSuyog Pawar         /* But setting the same to default values           */
3706*c83a76b0SSuyog Pawar         ps_refine_prms->i4_num_32x32_merge_results = 4;
3707*c83a76b0SSuyog Pawar         ps_refine_prms->i4_num_64x64_merge_results = 4;
3708*c83a76b0SSuyog Pawar 
3709*c83a76b0SSuyog Pawar         if(!ps_frm_prms->bidir_enabled)
3710*c83a76b0SSuyog Pawar         {
3711*c83a76b0SSuyog Pawar             ps_refine_prms->limit_active_partitions = 0;
3712*c83a76b0SSuyog Pawar         }
3713*c83a76b0SSuyog Pawar         else
3714*c83a76b0SSuyog Pawar         {
3715*c83a76b0SSuyog Pawar             ps_refine_prms->limit_active_partitions = 1;
3716*c83a76b0SSuyog Pawar         }
3717*c83a76b0SSuyog Pawar     }
3718*c83a76b0SSuyog Pawar 
3719*c83a76b0SSuyog Pawar     ps_refine_prms->i4_enable_4x4_part =
3720*c83a76b0SSuyog Pawar         hme_get_mv_blk_size(use_4x4, layer_id, num_layers, u1_encode);
3721*c83a76b0SSuyog Pawar 
3722*c83a76b0SSuyog Pawar     if(!ps_me_coding_tools->u1_l0_me_controlled_via_cmd_line)
3723*c83a76b0SSuyog Pawar     {
3724*c83a76b0SSuyog Pawar         ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
3725*c83a76b0SSuyog Pawar             layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
3726*c83a76b0SSuyog Pawar 
3727*c83a76b0SSuyog Pawar         hme_max_search_cands_per_search_cand_loc_populator(
3728*c83a76b0SSuyog Pawar             ps_frm_prms,
3729*c83a76b0SSuyog Pawar             ps_refine_prms->au1_num_fpel_search_cands,
3730*c83a76b0SSuyog Pawar             layer_id,
3731*c83a76b0SSuyog Pawar             ps_me_coding_tools->e_me_quality_presets);
3732*c83a76b0SSuyog Pawar 
3733*c83a76b0SSuyog Pawar         ps_refine_prms->u1_max_2nx2n_tu_recur_cands = hme_determine_max_2nx2n_tu_recur_cands(
3734*c83a76b0SSuyog Pawar             layer_id, ps_me_coding_tools->e_me_quality_presets);
3735*c83a76b0SSuyog Pawar 
3736*c83a76b0SSuyog Pawar         ps_refine_prms->u1_max_num_fpel_refine_centers = hme_determine_max_num_fpel_refine_centers(
3737*c83a76b0SSuyog Pawar             layer_id, ps_me_coding_tools->e_me_quality_presets);
3738*c83a76b0SSuyog Pawar 
3739*c83a76b0SSuyog Pawar         ps_refine_prms->u1_max_num_subpel_refine_centers =
3740*c83a76b0SSuyog Pawar             hme_determine_max_num_subpel_refine_centers(
3741*c83a76b0SSuyog Pawar                 layer_id,
3742*c83a76b0SSuyog Pawar                 ps_refine_prms->u1_max_subpel_candts_2Nx2N,
3743*c83a76b0SSuyog Pawar                 ps_refine_prms->u1_max_subpel_candts_NxN);
3744*c83a76b0SSuyog Pawar     }
3745*c83a76b0SSuyog Pawar     else
3746*c83a76b0SSuyog Pawar     {
3747*c83a76b0SSuyog Pawar         if(0 == layer_id)
3748*c83a76b0SSuyog Pawar         {
3749*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_results_per_part =
3750*c83a76b0SSuyog Pawar                 ps_me_coding_tools->u1_num_results_per_part_in_l0me;
3751*c83a76b0SSuyog Pawar         }
3752*c83a76b0SSuyog Pawar         else if(1 == layer_id)
3753*c83a76b0SSuyog Pawar         {
3754*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_results_per_part =
3755*c83a76b0SSuyog Pawar                 ps_me_coding_tools->u1_num_results_per_part_in_l1me;
3756*c83a76b0SSuyog Pawar         }
3757*c83a76b0SSuyog Pawar         else if((2 == layer_id) && (num_layers > 3))
3758*c83a76b0SSuyog Pawar         {
3759*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_results_per_part =
3760*c83a76b0SSuyog Pawar                 ps_me_coding_tools->u1_num_results_per_part_in_l2me;
3761*c83a76b0SSuyog Pawar         }
3762*c83a76b0SSuyog Pawar         else
3763*c83a76b0SSuyog Pawar         {
3764*c83a76b0SSuyog Pawar             ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
3765*c83a76b0SSuyog Pawar                 layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
3766*c83a76b0SSuyog Pawar         }
3767*c83a76b0SSuyog Pawar 
3768*c83a76b0SSuyog Pawar         memset(
3769*c83a76b0SSuyog Pawar             ps_refine_prms->au1_num_fpel_search_cands,
3770*c83a76b0SSuyog Pawar             ps_me_coding_tools->u1_max_num_coloc_cands,
3771*c83a76b0SSuyog Pawar             sizeof(ps_refine_prms->au1_num_fpel_search_cands));
3772*c83a76b0SSuyog Pawar 
3773*c83a76b0SSuyog Pawar         ps_refine_prms->u1_max_2nx2n_tu_recur_cands =
3774*c83a76b0SSuyog Pawar             ps_me_coding_tools->u1_max_2nx2n_tu_recur_cands;
3775*c83a76b0SSuyog Pawar 
3776*c83a76b0SSuyog Pawar         ps_refine_prms->u1_max_num_fpel_refine_centers =
3777*c83a76b0SSuyog Pawar             ps_me_coding_tools->u1_max_num_fpel_refine_centers;
3778*c83a76b0SSuyog Pawar 
3779*c83a76b0SSuyog Pawar         ps_refine_prms->u1_max_num_subpel_refine_centers =
3780*c83a76b0SSuyog Pawar             ps_me_coding_tools->u1_max_num_subpel_refine_centers;
3781*c83a76b0SSuyog Pawar     }
3782*c83a76b0SSuyog Pawar 
3783*c83a76b0SSuyog Pawar     if(layer_id != 0)
3784*c83a76b0SSuyog Pawar     {
3785*c83a76b0SSuyog Pawar         ps_refine_prms->i4_num_mvbank_results = ps_refine_prms->i4_num_results_per_part;
3786*c83a76b0SSuyog Pawar     }
3787*c83a76b0SSuyog Pawar 
3788*c83a76b0SSuyog Pawar     /* 4 * lambda */
3789*c83a76b0SSuyog Pawar     ps_refine_prms->sdi_threshold =
3790*c83a76b0SSuyog Pawar         (ps_refine_prms->lambda_recon + (1 << (ps_frm_prms->lambda_q_shift - 1))) >>
3791*c83a76b0SSuyog Pawar         (ps_frm_prms->lambda_q_shift - 2);
3792*c83a76b0SSuyog Pawar 
3793*c83a76b0SSuyog Pawar     ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb =
3794*c83a76b0SSuyog Pawar         MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON && ps_frm_prms->u1_is_cu_qp_delta_enabled;
3795*c83a76b0SSuyog Pawar }
3796*c83a76b0SSuyog Pawar 
hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t * ps_attrs,S32 num_8x8_horz,S32 num_8x8_vert)3797*c83a76b0SSuyog Pawar void hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t *ps_attrs, S32 num_8x8_horz, S32 num_8x8_vert)
3798*c83a76b0SSuyog Pawar {
3799*c83a76b0SSuyog Pawar     S32 cu_16x16_valid_flag = 0, merge_pattern_x, merge_pattern_y;
3800*c83a76b0SSuyog Pawar     S32 blk, blk_x, blk_y;
3801*c83a76b0SSuyog Pawar     S32 num_16x16_horz, num_16x16_vert;
3802*c83a76b0SSuyog Pawar     blk_ctb_attrs_t *ps_blk_attrs = &ps_attrs->as_blk_attrs[0];
3803*c83a76b0SSuyog Pawar 
3804*c83a76b0SSuyog Pawar     num_16x16_horz = (num_8x8_horz + 1) >> 1;
3805*c83a76b0SSuyog Pawar     num_16x16_vert = (num_8x8_vert + 1) >> 1;
3806*c83a76b0SSuyog Pawar     ps_attrs->u1_num_blks_in_ctb = (U08)(num_16x16_horz * num_16x16_vert);
3807*c83a76b0SSuyog Pawar 
3808*c83a76b0SSuyog Pawar     /*************************************************************************/
3809*c83a76b0SSuyog Pawar     /* Run through each blk assuming all 16x16 CUs valid. The order would be */
3810*c83a76b0SSuyog Pawar     /* 0   1   4   5                                                         */
3811*c83a76b0SSuyog Pawar     /* 2   3   6   7                                                         */
3812*c83a76b0SSuyog Pawar     /* 8   9   12  13                                                        */
3813*c83a76b0SSuyog Pawar     /* 10  11  14  15                                                        */
3814*c83a76b0SSuyog Pawar     /* Out of these some may not be valid. For example, if num_16x16_horz is */
3815*c83a76b0SSuyog Pawar     /* 2 and num_16x16_vert is 4, then right 2 columns not valid. In this    */
3816*c83a76b0SSuyog Pawar     /* case, blks 8-11 get encoding number of 4-7. Further, the variable     */
3817*c83a76b0SSuyog Pawar     /* cu_16x16_valid_flag will be 1111 0000 1111 0000. Also, the variable   */
3818*c83a76b0SSuyog Pawar     /* u1_merge_to_32x32_flag will be 1010, and u1_merge_to_64x64_flag 0     */
3819*c83a76b0SSuyog Pawar     /*************************************************************************/
3820*c83a76b0SSuyog Pawar     for(blk = 0; blk < 16; blk++)
3821*c83a76b0SSuyog Pawar     {
3822*c83a76b0SSuyog Pawar         U08 u1_blk_8x8_mask = 0xF;
3823*c83a76b0SSuyog Pawar         blk_x = gau1_encode_to_raster_x[blk];
3824*c83a76b0SSuyog Pawar         blk_y = gau1_encode_to_raster_y[blk];
3825*c83a76b0SSuyog Pawar         if((blk_x >= num_16x16_horz) || (blk_y >= num_16x16_vert))
3826*c83a76b0SSuyog Pawar         {
3827*c83a76b0SSuyog Pawar             continue;
3828*c83a76b0SSuyog Pawar         }
3829*c83a76b0SSuyog Pawar 
3830*c83a76b0SSuyog Pawar         /* The CU at encode location blk is valid */
3831*c83a76b0SSuyog Pawar         cu_16x16_valid_flag |= (1 << blk);
3832*c83a76b0SSuyog Pawar         ps_blk_attrs->u1_blk_id_in_full_ctb = blk;
3833*c83a76b0SSuyog Pawar         ps_blk_attrs->u1_blk_x = blk_x;
3834*c83a76b0SSuyog Pawar         ps_blk_attrs->u1_blk_y = blk_y;
3835*c83a76b0SSuyog Pawar 
3836*c83a76b0SSuyog Pawar         /* Disable blks 1 and 3 if the 16x16 blk overshoots on rt border */
3837*c83a76b0SSuyog Pawar         if(((blk_x << 1) + 2) > num_8x8_horz)
3838*c83a76b0SSuyog Pawar             u1_blk_8x8_mask &= 0x5;
3839*c83a76b0SSuyog Pawar         /* Disable blks 2 and 3 if the 16x16 blk overshoots on bot border */
3840*c83a76b0SSuyog Pawar         if(((blk_y << 1) + 2) > num_8x8_vert)
3841*c83a76b0SSuyog Pawar             u1_blk_8x8_mask &= 0x3;
3842*c83a76b0SSuyog Pawar         ps_blk_attrs->u1_blk_8x8_mask = u1_blk_8x8_mask;
3843*c83a76b0SSuyog Pawar         ps_blk_attrs++;
3844*c83a76b0SSuyog Pawar     }
3845*c83a76b0SSuyog Pawar 
3846*c83a76b0SSuyog Pawar     ps_attrs->cu_16x16_valid_flag = cu_16x16_valid_flag;
3847*c83a76b0SSuyog Pawar 
3848*c83a76b0SSuyog Pawar     /* 32x32 merge is logical combination of what merge is possible          */
3849*c83a76b0SSuyog Pawar     /* horizontally as well as vertically.                                   */
3850*c83a76b0SSuyog Pawar     if(num_8x8_horz < 4)
3851*c83a76b0SSuyog Pawar         merge_pattern_x = 0x0;
3852*c83a76b0SSuyog Pawar     else if(num_8x8_horz < 8)
3853*c83a76b0SSuyog Pawar         merge_pattern_x = 0x5;
3854*c83a76b0SSuyog Pawar     else
3855*c83a76b0SSuyog Pawar         merge_pattern_x = 0xF;
3856*c83a76b0SSuyog Pawar 
3857*c83a76b0SSuyog Pawar     if(num_8x8_vert < 4)
3858*c83a76b0SSuyog Pawar         merge_pattern_y = 0x0;
3859*c83a76b0SSuyog Pawar     else if(num_8x8_vert < 8)
3860*c83a76b0SSuyog Pawar         merge_pattern_y = 0x3;
3861*c83a76b0SSuyog Pawar     else
3862*c83a76b0SSuyog Pawar         merge_pattern_y = 0xF;
3863*c83a76b0SSuyog Pawar 
3864*c83a76b0SSuyog Pawar     ps_attrs->u1_merge_to_32x32_flag = (U08)(merge_pattern_x & merge_pattern_y);
3865*c83a76b0SSuyog Pawar 
3866*c83a76b0SSuyog Pawar     /* Do not attempt 64x64 merge if any blk invalid */
3867*c83a76b0SSuyog Pawar     if(ps_attrs->u1_merge_to_32x32_flag != 0xF)
3868*c83a76b0SSuyog Pawar         ps_attrs->u1_merge_to_64x64_flag = 0;
3869*c83a76b0SSuyog Pawar     else
3870*c83a76b0SSuyog Pawar         ps_attrs->u1_merge_to_64x64_flag = 1;
3871*c83a76b0SSuyog Pawar }
3872*c83a76b0SSuyog Pawar 
hme_set_ctb_attrs(ctb_boundary_attrs_t * ps_attrs,S32 wd,S32 ht)3873*c83a76b0SSuyog Pawar void hme_set_ctb_attrs(ctb_boundary_attrs_t *ps_attrs, S32 wd, S32 ht)
3874*c83a76b0SSuyog Pawar {
3875*c83a76b0SSuyog Pawar     S32 is_cropped_rt, is_cropped_bot;
3876*c83a76b0SSuyog Pawar 
3877*c83a76b0SSuyog Pawar     is_cropped_rt = ((wd & 63) != 0) ? 1 : 0;
3878*c83a76b0SSuyog Pawar     is_cropped_bot = ((ht & 63) != 0) ? 1 : 0;
3879*c83a76b0SSuyog Pawar 
3880*c83a76b0SSuyog Pawar     if(is_cropped_rt)
3881*c83a76b0SSuyog Pawar     {
3882*c83a76b0SSuyog Pawar         hme_set_ctb_boundary_attrs(&ps_attrs[CTB_RT_PIC_BOUNDARY], (wd & 63) >> 3, 8);
3883*c83a76b0SSuyog Pawar     }
3884*c83a76b0SSuyog Pawar     if(is_cropped_bot)
3885*c83a76b0SSuyog Pawar     {
3886*c83a76b0SSuyog Pawar         hme_set_ctb_boundary_attrs(&ps_attrs[CTB_BOT_PIC_BOUNDARY], 8, (ht & 63) >> 3);
3887*c83a76b0SSuyog Pawar     }
3888*c83a76b0SSuyog Pawar     if(is_cropped_rt & is_cropped_bot)
3889*c83a76b0SSuyog Pawar     {
3890*c83a76b0SSuyog Pawar         hme_set_ctb_boundary_attrs(
3891*c83a76b0SSuyog Pawar             &ps_attrs[CTB_BOT_RT_PIC_BOUNDARY], (wd & 63) >> 3, (ht & 63) >> 3);
3892*c83a76b0SSuyog Pawar     }
3893*c83a76b0SSuyog Pawar     hme_set_ctb_boundary_attrs(&ps_attrs[CTB_CENTRE], 8, 8);
3894*c83a76b0SSuyog Pawar }
3895*c83a76b0SSuyog Pawar 
3896*c83a76b0SSuyog Pawar /**
3897*c83a76b0SSuyog Pawar ********************************************************************************
3898*c83a76b0SSuyog Pawar *  @fn     hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
3899*c83a76b0SSuyog Pawar *
3900*c83a76b0SSuyog Pawar *  @brief  When we have an mv with ref id "poc_to" for which predictor to be
3901*c83a76b0SSuyog Pawar *          computed, and predictor is ref id "poc_from", this funciton returns
3902*c83a76b0SSuyog Pawar *          scale factor in Q8 for such a purpose
3903*c83a76b0SSuyog Pawar *
3904*c83a76b0SSuyog Pawar *  @param[in] curr_poc : input picture poc
3905*c83a76b0SSuyog Pawar *
3906*c83a76b0SSuyog Pawar *  @param[in] poc_from : POC of the pic, pointed to by ref id to be scaled
3907*c83a76b0SSuyog Pawar *
3908*c83a76b0SSuyog Pawar *  @param[in] poc_to : POC of hte pic, pointed to by ref id to be scaled to
3909*c83a76b0SSuyog Pawar *
3910*c83a76b0SSuyog Pawar *  @return Scale factor in Q8 format
3911*c83a76b0SSuyog Pawar ********************************************************************************
3912*c83a76b0SSuyog Pawar */
hme_scale_for_ref_idx(S32 curr_poc,S32 poc_from,S32 poc_to)3913*c83a76b0SSuyog Pawar S16 hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
3914*c83a76b0SSuyog Pawar {
3915*c83a76b0SSuyog Pawar     S32 td, tx, tb;
3916*c83a76b0SSuyog Pawar     S16 i2_scf;
3917*c83a76b0SSuyog Pawar     /*************************************************************************/
3918*c83a76b0SSuyog Pawar     /* Approximate scale factor: 256 * num / denom                           */
3919*c83a76b0SSuyog Pawar     /* num = curr_poc - poc_to, denom = curr_poc - poc_from                  */
3920*c83a76b0SSuyog Pawar     /* Exact implementation as per standard.                                 */
3921*c83a76b0SSuyog Pawar     /*************************************************************************/
3922*c83a76b0SSuyog Pawar 
3923*c83a76b0SSuyog Pawar     tb = HME_CLIP((curr_poc - poc_to), -128, 127);
3924*c83a76b0SSuyog Pawar     td = HME_CLIP((curr_poc - poc_from), -128, 127);
3925*c83a76b0SSuyog Pawar 
3926*c83a76b0SSuyog Pawar     tx = (16384 + (ABS(td) >> 1)) / td;
3927*c83a76b0SSuyog Pawar     //i2_scf = HME_CLIP((((tb*tx)+32)>>6), -128, 127);
3928*c83a76b0SSuyog Pawar     i2_scf = HME_CLIP((((tb * tx) + 32) >> 6), -4096, 4095);
3929*c83a76b0SSuyog Pawar 
3930*c83a76b0SSuyog Pawar     return (i2_scf);
3931*c83a76b0SSuyog Pawar }
3932*c83a76b0SSuyog Pawar 
3933*c83a76b0SSuyog Pawar /**
3934*c83a76b0SSuyog Pawar ********************************************************************************
3935*c83a76b0SSuyog Pawar *  @fn     hme_process_frm_init
3936*c83a76b0SSuyog Pawar *
3937*c83a76b0SSuyog Pawar *  @brief  HME frame level initialsation processing function
3938*c83a76b0SSuyog Pawar *
3939*c83a76b0SSuyog Pawar *  @param[in] pv_me_ctxt : ME ctxt pointer
3940*c83a76b0SSuyog Pawar *
3941*c83a76b0SSuyog Pawar *  @param[in] ps_ref_map : Reference map prms pointer
3942*c83a76b0SSuyog Pawar *
3943*c83a76b0SSuyog Pawar *  @param[in] ps_frm_prms :Pointer to frame params
3944*c83a76b0SSuyog Pawar *
3945*c83a76b0SSuyog Pawar *  called only for encode layer
3946*c83a76b0SSuyog Pawar *
3947*c83a76b0SSuyog Pawar *  @return Scale factor in Q8 format
3948*c83a76b0SSuyog Pawar ********************************************************************************
3949*c83a76b0SSuyog Pawar */
hme_process_frm_init(void * pv_me_ctxt,hme_ref_map_t * ps_ref_map,hme_frm_prms_t * ps_frm_prms,WORD32 i4_me_frm_id,WORD32 i4_num_me_frm_pllel)3950*c83a76b0SSuyog Pawar void hme_process_frm_init(
3951*c83a76b0SSuyog Pawar     void *pv_me_ctxt,
3952*c83a76b0SSuyog Pawar     hme_ref_map_t *ps_ref_map,
3953*c83a76b0SSuyog Pawar     hme_frm_prms_t *ps_frm_prms,
3954*c83a76b0SSuyog Pawar     WORD32 i4_me_frm_id,
3955*c83a76b0SSuyog Pawar     WORD32 i4_num_me_frm_pllel)
3956*c83a76b0SSuyog Pawar {
3957*c83a76b0SSuyog Pawar     me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
3958*c83a76b0SSuyog Pawar     me_frm_ctxt_t *ps_ctxt = (me_frm_ctxt_t *)ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
3959*c83a76b0SSuyog Pawar 
3960*c83a76b0SSuyog Pawar     S32 i, j, desc_idx;
3961*c83a76b0SSuyog Pawar     S16 i2_max_x = 0, i2_max_y = 0;
3962*c83a76b0SSuyog Pawar 
3963*c83a76b0SSuyog Pawar     /* Set the Qp of current frm passed by caller. Required for intra cost */
3964*c83a76b0SSuyog Pawar     ps_ctxt->frm_qstep = ps_frm_prms->qstep;
3965*c83a76b0SSuyog Pawar     ps_ctxt->qstep_ls8 = ps_frm_prms->qstep_ls8;
3966*c83a76b0SSuyog Pawar 
3967*c83a76b0SSuyog Pawar     /* Bidir enabled or not */
3968*c83a76b0SSuyog Pawar     ps_ctxt->s_frm_prms = *ps_frm_prms;
3969*c83a76b0SSuyog Pawar 
3970*c83a76b0SSuyog Pawar     /*************************************************************************/
3971*c83a76b0SSuyog Pawar     /* Set up the ref pic parameters across all layers. For this, we do the  */
3972*c83a76b0SSuyog Pawar     /* following: the application has given us a ref pic list, we go index   */
3973*c83a76b0SSuyog Pawar     /* by index and pick up the picture. A picture can be uniquely be mapped */
3974*c83a76b0SSuyog Pawar     /* to a POC. So we search all layer descriptor array to find the POC     */
3975*c83a76b0SSuyog Pawar     /* Once found, we update all attributes in this descriptor.              */
3976*c83a76b0SSuyog Pawar     /* During this updation process we also create an index of descriptor id */
3977*c83a76b0SSuyog Pawar     /* to ref id mapping. It is important to find the same POC in the layers */
3978*c83a76b0SSuyog Pawar     /* descr strcture since it holds the pyramid inputs for non encode layers*/
3979*c83a76b0SSuyog Pawar     /* Apart from this, e also update array containing the index of the descr*/
3980*c83a76b0SSuyog Pawar     /* During processing for ease of access, each layer has a pointer to aray*/
3981*c83a76b0SSuyog Pawar     /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
3982*c83a76b0SSuyog Pawar     /* we update this too.                                                   */
3983*c83a76b0SSuyog Pawar     /*************************************************************************/
3984*c83a76b0SSuyog Pawar     ps_ctxt->num_ref_past = 0;
3985*c83a76b0SSuyog Pawar     ps_ctxt->num_ref_future = 0;
3986*c83a76b0SSuyog Pawar     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
3987*c83a76b0SSuyog Pawar     {
3988*c83a76b0SSuyog Pawar         S32 ref_id_lc, idx;
3989*c83a76b0SSuyog Pawar         hme_ref_desc_t *ps_ref_desc;
3990*c83a76b0SSuyog Pawar 
3991*c83a76b0SSuyog Pawar         ps_ref_desc = &ps_ref_map->as_ref_desc[i];
3992*c83a76b0SSuyog Pawar         ref_id_lc = ps_ref_desc->i1_ref_id_lc;
3993*c83a76b0SSuyog Pawar         /* Obtain the id of descriptor that contains this POC */
3994*c83a76b0SSuyog Pawar         idx = hme_find_descr_idx(
3995*c83a76b0SSuyog Pawar             ps_thrd_ctxt, ps_ref_desc->i4_poc, ps_ref_desc->i4_GOP_num, i4_num_me_frm_pllel);
3996*c83a76b0SSuyog Pawar 
3997*c83a76b0SSuyog Pawar         /* Update all layers in this descr with the reference attributes */
3998*c83a76b0SSuyog Pawar         hme_update_layer_desc(
3999*c83a76b0SSuyog Pawar             &ps_thrd_ctxt->as_ref_descr[idx],
4000*c83a76b0SSuyog Pawar             ps_ref_desc,
4001*c83a76b0SSuyog Pawar             0,
4002*c83a76b0SSuyog Pawar             1,  //ps_ctxt->num_layers,
4003*c83a76b0SSuyog Pawar             ps_ctxt->ps_curr_descr);
4004*c83a76b0SSuyog Pawar 
4005*c83a76b0SSuyog Pawar         /* Update the pointer holder for the recon planes */
4006*c83a76b0SSuyog Pawar         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_inp = &ps_ctxt->apu1_list_inp[0][0];
4007*c83a76b0SSuyog Pawar         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxfy =
4008*c83a76b0SSuyog Pawar             &ps_ctxt->apu1_list_rec_fxfy[0][0];
4009*c83a76b0SSuyog Pawar         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxfy =
4010*c83a76b0SSuyog Pawar             &ps_ctxt->apu1_list_rec_hxfy[0][0];
4011*c83a76b0SSuyog Pawar         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxhy =
4012*c83a76b0SSuyog Pawar             &ps_ctxt->apu1_list_rec_fxhy[0][0];
4013*c83a76b0SSuyog Pawar         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxhy =
4014*c83a76b0SSuyog Pawar             &ps_ctxt->apu1_list_rec_hxhy[0][0];
4015*c83a76b0SSuyog Pawar         ps_ctxt->ps_curr_descr->aps_layers[0]->ppv_dep_mngr_recon =
4016*c83a76b0SSuyog Pawar             &ps_ctxt->apv_list_dep_mngr[0][0];
4017*c83a76b0SSuyog Pawar 
4018*c83a76b0SSuyog Pawar         /* Update the array having ref id lc to descr id mapping */
4019*c83a76b0SSuyog Pawar         ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
4020*c83a76b0SSuyog Pawar 
4021*c83a76b0SSuyog Pawar         /* From ref id lc we need to work out the POC, So update this array */
4022*c83a76b0SSuyog Pawar         ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
4023*c83a76b0SSuyog Pawar 
4024*c83a76b0SSuyog Pawar         /* When computing costs in L0 and L1 directions, we need the */
4025*c83a76b0SSuyog Pawar         /* respective ref id L0 and L1, so update this mapping */
4026*c83a76b0SSuyog Pawar         ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
4027*c83a76b0SSuyog Pawar         ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
4028*c83a76b0SSuyog Pawar         if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
4029*c83a76b0SSuyog Pawar         {
4030*c83a76b0SSuyog Pawar             ps_ctxt->au1_is_past[ref_id_lc] = 1;
4031*c83a76b0SSuyog Pawar             ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
4032*c83a76b0SSuyog Pawar             ps_ctxt->num_ref_past++;
4033*c83a76b0SSuyog Pawar         }
4034*c83a76b0SSuyog Pawar         else
4035*c83a76b0SSuyog Pawar         {
4036*c83a76b0SSuyog Pawar             ps_ctxt->au1_is_past[ref_id_lc] = 0;
4037*c83a76b0SSuyog Pawar             ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
4038*c83a76b0SSuyog Pawar             ps_ctxt->num_ref_future++;
4039*c83a76b0SSuyog Pawar         }
4040*c83a76b0SSuyog Pawar 
4041*c83a76b0SSuyog Pawar         if(1 == ps_ctxt->i4_wt_pred_enable_flag)
4042*c83a76b0SSuyog Pawar         {
4043*c83a76b0SSuyog Pawar             /* copy the weight and offsets from current ref desc */
4044*c83a76b0SSuyog Pawar             ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
4045*c83a76b0SSuyog Pawar 
4046*c83a76b0SSuyog Pawar             /* inv weight is stored in Q15 format */
4047*c83a76b0SSuyog Pawar             ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4048*c83a76b0SSuyog Pawar                 ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
4049*c83a76b0SSuyog Pawar             ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
4050*c83a76b0SSuyog Pawar         }
4051*c83a76b0SSuyog Pawar         else
4052*c83a76b0SSuyog Pawar         {
4053*c83a76b0SSuyog Pawar             /* store default wt and offset*/
4054*c83a76b0SSuyog Pawar             ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
4055*c83a76b0SSuyog Pawar 
4056*c83a76b0SSuyog Pawar             /* inv weight is stored in Q15 format */
4057*c83a76b0SSuyog Pawar             ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4058*c83a76b0SSuyog Pawar                 ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
4059*c83a76b0SSuyog Pawar 
4060*c83a76b0SSuyog Pawar             ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
4061*c83a76b0SSuyog Pawar         }
4062*c83a76b0SSuyog Pawar     }
4063*c83a76b0SSuyog Pawar 
4064*c83a76b0SSuyog Pawar     ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
4065*c83a76b0SSuyog Pawar     ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
4066*c83a76b0SSuyog Pawar 
4067*c83a76b0SSuyog Pawar     /*************************************************************************/
4068*c83a76b0SSuyog Pawar     /* Preparation of the TLU for bits for reference indices.                */
4069*c83a76b0SSuyog Pawar     /* Special case is that of numref = 2. (TEV)                             */
4070*c83a76b0SSuyog Pawar     /* Other cases uses UEV                                                  */
4071*c83a76b0SSuyog Pawar     /*************************************************************************/
4072*c83a76b0SSuyog Pawar     for(i = 0; i < MAX_NUM_REF; i++)
4073*c83a76b0SSuyog Pawar     {
4074*c83a76b0SSuyog Pawar         ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
4075*c83a76b0SSuyog Pawar         ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
4076*c83a76b0SSuyog Pawar     }
4077*c83a76b0SSuyog Pawar 
4078*c83a76b0SSuyog Pawar     if(ps_ref_map->i4_num_ref == 2)
4079*c83a76b0SSuyog Pawar     {
4080*c83a76b0SSuyog Pawar         ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
4081*c83a76b0SSuyog Pawar         ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
4082*c83a76b0SSuyog Pawar         ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
4083*c83a76b0SSuyog Pawar         ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
4084*c83a76b0SSuyog Pawar     }
4085*c83a76b0SSuyog Pawar     else if(ps_ref_map->i4_num_ref > 2)
4086*c83a76b0SSuyog Pawar     {
4087*c83a76b0SSuyog Pawar         for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4088*c83a76b0SSuyog Pawar         {
4089*c83a76b0SSuyog Pawar             S32 l0, l1;
4090*c83a76b0SSuyog Pawar             l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
4091*c83a76b0SSuyog Pawar             l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
4092*c83a76b0SSuyog Pawar             ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
4093*c83a76b0SSuyog Pawar             ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
4094*c83a76b0SSuyog Pawar         }
4095*c83a76b0SSuyog Pawar     }
4096*c83a76b0SSuyog Pawar 
4097*c83a76b0SSuyog Pawar     /*************************************************************************/
4098*c83a76b0SSuyog Pawar     /* Preparation of the scaling factors for reference indices. The scale   */
4099*c83a76b0SSuyog Pawar     /* factor depends on distance of the two ref indices from current input  */
4100*c83a76b0SSuyog Pawar     /* in terms of poc delta.                                                */
4101*c83a76b0SSuyog Pawar     /*************************************************************************/
4102*c83a76b0SSuyog Pawar     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4103*c83a76b0SSuyog Pawar     {
4104*c83a76b0SSuyog Pawar         for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4105*c83a76b0SSuyog Pawar         {
4106*c83a76b0SSuyog Pawar             S16 i2_scf_q8;
4107*c83a76b0SSuyog Pawar             S32 poc_from, poc_to;
4108*c83a76b0SSuyog Pawar 
4109*c83a76b0SSuyog Pawar             poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
4110*c83a76b0SSuyog Pawar             poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
4111*c83a76b0SSuyog Pawar 
4112*c83a76b0SSuyog Pawar             i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
4113*c83a76b0SSuyog Pawar             ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
4114*c83a76b0SSuyog Pawar         }
4115*c83a76b0SSuyog Pawar     }
4116*c83a76b0SSuyog Pawar 
4117*c83a76b0SSuyog Pawar     /*************************************************************************/
4118*c83a76b0SSuyog Pawar     /* We store simplified look ups for 4 hpel planes and inp y plane for    */
4119*c83a76b0SSuyog Pawar     /* every layer and for every ref id in the layer. So update these lookups*/
4120*c83a76b0SSuyog Pawar     /*************************************************************************/
4121*c83a76b0SSuyog Pawar     for(i = 0; i < 1; i++)
4122*c83a76b0SSuyog Pawar     {
4123*c83a76b0SSuyog Pawar         U08 **ppu1_rec_fxfy, **ppu1_rec_hxfy, **ppu1_rec_fxhy, **ppu1_rec_hxhy;
4124*c83a76b0SSuyog Pawar         U08 **ppu1_inp;
4125*c83a76b0SSuyog Pawar         void **ppvlist_dep_mngr;
4126*c83a76b0SSuyog Pawar         layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4127*c83a76b0SSuyog Pawar 
4128*c83a76b0SSuyog Pawar         ppvlist_dep_mngr = &ps_ctxt->apv_list_dep_mngr[i][0];
4129*c83a76b0SSuyog Pawar         ppu1_rec_fxfy = &ps_ctxt->apu1_list_rec_fxfy[i][0];
4130*c83a76b0SSuyog Pawar         ppu1_rec_hxfy = &ps_ctxt->apu1_list_rec_hxfy[i][0];
4131*c83a76b0SSuyog Pawar         ppu1_rec_fxhy = &ps_ctxt->apu1_list_rec_fxhy[i][0];
4132*c83a76b0SSuyog Pawar         ppu1_rec_hxhy = &ps_ctxt->apu1_list_rec_hxhy[i][0];
4133*c83a76b0SSuyog Pawar         ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
4134*c83a76b0SSuyog Pawar         for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4135*c83a76b0SSuyog Pawar         {
4136*c83a76b0SSuyog Pawar             hme_ref_desc_t *ps_ref_desc;
4137*c83a76b0SSuyog Pawar             hme_ref_buf_info_t *ps_buf_info;
4138*c83a76b0SSuyog Pawar             layer_ctxt_t *ps_layer;
4139*c83a76b0SSuyog Pawar             S32 ref_id_lc;
4140*c83a76b0SSuyog Pawar 
4141*c83a76b0SSuyog Pawar             ps_ref_desc = &ps_ref_map->as_ref_desc[j];
4142*c83a76b0SSuyog Pawar             ps_buf_info = &ps_ref_desc->as_ref_info[i];
4143*c83a76b0SSuyog Pawar             ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4144*c83a76b0SSuyog Pawar 
4145*c83a76b0SSuyog Pawar             desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
4146*c83a76b0SSuyog Pawar             ps_layer = ps_thrd_ctxt->as_ref_descr[desc_idx].aps_layers[i];
4147*c83a76b0SSuyog Pawar 
4148*c83a76b0SSuyog Pawar             ppu1_inp[j] = ps_buf_info->pu1_ref_src;
4149*c83a76b0SSuyog Pawar             ppu1_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
4150*c83a76b0SSuyog Pawar             ppu1_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
4151*c83a76b0SSuyog Pawar             ppu1_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
4152*c83a76b0SSuyog Pawar             ppu1_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
4153*c83a76b0SSuyog Pawar             ppvlist_dep_mngr[j] = ps_buf_info->pv_dep_mngr;
4154*c83a76b0SSuyog Pawar 
4155*c83a76b0SSuyog Pawar             /* Update the curr descriptors reference pointers here */
4156*c83a76b0SSuyog Pawar             ps_layer_ctxt->ppu1_list_inp[j] = ps_buf_info->pu1_ref_src;
4157*c83a76b0SSuyog Pawar             ps_layer_ctxt->ppu1_list_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
4158*c83a76b0SSuyog Pawar             ps_layer_ctxt->ppu1_list_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
4159*c83a76b0SSuyog Pawar             ps_layer_ctxt->ppu1_list_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
4160*c83a76b0SSuyog Pawar             ps_layer_ctxt->ppu1_list_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
4161*c83a76b0SSuyog Pawar         }
4162*c83a76b0SSuyog Pawar     }
4163*c83a76b0SSuyog Pawar     /*************************************************************************/
4164*c83a76b0SSuyog Pawar     /* The mv range for each layer is computed. For dyadic layers it will    */
4165*c83a76b0SSuyog Pawar     /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
4166*c83a76b0SSuyog Pawar     /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
4167*c83a76b0SSuyog Pawar     /*************************************************************************/
4168*c83a76b0SSuyog Pawar     for(i = 0; i < 1; i++)
4169*c83a76b0SSuyog Pawar     {
4170*c83a76b0SSuyog Pawar         layer_ctxt_t *ps_layer_ctxt;
4171*c83a76b0SSuyog Pawar         if(i == 0)
4172*c83a76b0SSuyog Pawar         {
4173*c83a76b0SSuyog Pawar             i2_max_x = ps_frm_prms->i2_mv_range_x;
4174*c83a76b0SSuyog Pawar             i2_max_y = ps_frm_prms->i2_mv_range_y;
4175*c83a76b0SSuyog Pawar         }
4176*c83a76b0SSuyog Pawar         else
4177*c83a76b0SSuyog Pawar         {
4178*c83a76b0SSuyog Pawar             i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->i4_wd) / ps_ctxt->i4_wd));
4179*c83a76b0SSuyog Pawar             i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->i4_ht) / ps_ctxt->i4_ht));
4180*c83a76b0SSuyog Pawar         }
4181*c83a76b0SSuyog Pawar         ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4182*c83a76b0SSuyog Pawar         ps_layer_ctxt->i2_max_mv_x = i2_max_x;
4183*c83a76b0SSuyog Pawar         ps_layer_ctxt->i2_max_mv_y = i2_max_y;
4184*c83a76b0SSuyog Pawar 
4185*c83a76b0SSuyog Pawar         /*********************************************************************/
4186*c83a76b0SSuyog Pawar         /* Every layer maintains a reference id lc to POC mapping. This is   */
4187*c83a76b0SSuyog Pawar         /* because the mapping is unique for every frm. Also, in next frm,   */
4188*c83a76b0SSuyog Pawar         /* we require colocated mvs which means scaling according to temporal*/
4189*c83a76b0SSuyog Pawar         /*distance. Hence this mapping needs to be maintained in every       */
4190*c83a76b0SSuyog Pawar         /* layer ctxt                                                        */
4191*c83a76b0SSuyog Pawar         /*********************************************************************/
4192*c83a76b0SSuyog Pawar         memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
4193*c83a76b0SSuyog Pawar         if(ps_ref_map->i4_num_ref)
4194*c83a76b0SSuyog Pawar         {
4195*c83a76b0SSuyog Pawar             memcpy(
4196*c83a76b0SSuyog Pawar                 ps_layer_ctxt->ai4_ref_id_to_poc_lc,
4197*c83a76b0SSuyog Pawar                 ps_ctxt->ai4_ref_idx_to_poc_lc,
4198*c83a76b0SSuyog Pawar                 ps_ref_map->i4_num_ref * sizeof(S32));
4199*c83a76b0SSuyog Pawar         }
4200*c83a76b0SSuyog Pawar     }
4201*c83a76b0SSuyog Pawar 
4202*c83a76b0SSuyog Pawar     return;
4203*c83a76b0SSuyog Pawar }
4204*c83a76b0SSuyog Pawar 
4205*c83a76b0SSuyog Pawar /**
4206*c83a76b0SSuyog Pawar ********************************************************************************
4207*c83a76b0SSuyog Pawar *  @fn     hme_coarse_process_frm_init
4208*c83a76b0SSuyog Pawar *
4209*c83a76b0SSuyog Pawar *  @brief  HME frame level initialsation processing function
4210*c83a76b0SSuyog Pawar *
4211*c83a76b0SSuyog Pawar *  @param[in] pv_me_ctxt : ME ctxt pointer
4212*c83a76b0SSuyog Pawar *
4213*c83a76b0SSuyog Pawar *  @param[in] ps_ref_map : Reference map prms pointer
4214*c83a76b0SSuyog Pawar *
4215*c83a76b0SSuyog Pawar *  @param[in] ps_frm_prms :Pointer to frame params
4216*c83a76b0SSuyog Pawar *
4217*c83a76b0SSuyog Pawar *  @return Scale factor in Q8 format
4218*c83a76b0SSuyog Pawar ********************************************************************************
4219*c83a76b0SSuyog Pawar */
hme_coarse_process_frm_init(void * pv_me_ctxt,hme_ref_map_t * ps_ref_map,hme_frm_prms_t * ps_frm_prms)4220*c83a76b0SSuyog Pawar void hme_coarse_process_frm_init(
4221*c83a76b0SSuyog Pawar     void *pv_me_ctxt, hme_ref_map_t *ps_ref_map, hme_frm_prms_t *ps_frm_prms)
4222*c83a76b0SSuyog Pawar {
4223*c83a76b0SSuyog Pawar     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4224*c83a76b0SSuyog Pawar     S32 i, j, desc_idx;
4225*c83a76b0SSuyog Pawar     S16 i2_max_x = 0, i2_max_y = 0;
4226*c83a76b0SSuyog Pawar 
4227*c83a76b0SSuyog Pawar     /* Set the Qp of current frm passed by caller. Required for intra cost */
4228*c83a76b0SSuyog Pawar     ps_ctxt->frm_qstep = ps_frm_prms->qstep;
4229*c83a76b0SSuyog Pawar 
4230*c83a76b0SSuyog Pawar     /* Bidir enabled or not */
4231*c83a76b0SSuyog Pawar     ps_ctxt->s_frm_prms = *ps_frm_prms;
4232*c83a76b0SSuyog Pawar 
4233*c83a76b0SSuyog Pawar     /*************************************************************************/
4234*c83a76b0SSuyog Pawar     /* Set up the ref pic parameters across all layers. For this, we do the  */
4235*c83a76b0SSuyog Pawar     /* following: the application has given us a ref pic list, we go index   */
4236*c83a76b0SSuyog Pawar     /* by index and pick up the picture. A picture can be uniquely be mapped */
4237*c83a76b0SSuyog Pawar     /* to a POC. So we search all layer descriptor array to find the POC     */
4238*c83a76b0SSuyog Pawar     /* Once found, we update all attributes in this descriptor.              */
4239*c83a76b0SSuyog Pawar     /* During this updation process we also create an index of descriptor id */
4240*c83a76b0SSuyog Pawar     /* to ref id mapping. It is important to find the same POC in the layers */
4241*c83a76b0SSuyog Pawar     /* descr strcture since it holds the pyramid inputs for non encode layers*/
4242*c83a76b0SSuyog Pawar     /* Apart from this, e also update array containing the index of the descr*/
4243*c83a76b0SSuyog Pawar     /* During processing for ease of access, each layer has a pointer to aray*/
4244*c83a76b0SSuyog Pawar     /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
4245*c83a76b0SSuyog Pawar     /* we update this too.                                                   */
4246*c83a76b0SSuyog Pawar     /*************************************************************************/
4247*c83a76b0SSuyog Pawar     ps_ctxt->num_ref_past = 0;
4248*c83a76b0SSuyog Pawar     ps_ctxt->num_ref_future = 0;
4249*c83a76b0SSuyog Pawar     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4250*c83a76b0SSuyog Pawar     {
4251*c83a76b0SSuyog Pawar         S32 ref_id_lc, idx;
4252*c83a76b0SSuyog Pawar         hme_ref_desc_t *ps_ref_desc;
4253*c83a76b0SSuyog Pawar 
4254*c83a76b0SSuyog Pawar         ps_ref_desc = &ps_ref_map->as_ref_desc[i];
4255*c83a76b0SSuyog Pawar         ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4256*c83a76b0SSuyog Pawar         /* Obtain the id of descriptor that contains this POC */
4257*c83a76b0SSuyog Pawar         idx = hme_coarse_find_descr_idx(ps_ctxt, ps_ref_desc->i4_poc);
4258*c83a76b0SSuyog Pawar 
4259*c83a76b0SSuyog Pawar         /* Update all layers in this descr with the reference attributes */
4260*c83a76b0SSuyog Pawar         hme_update_layer_desc(
4261*c83a76b0SSuyog Pawar             &ps_ctxt->as_ref_descr[idx],
4262*c83a76b0SSuyog Pawar             ps_ref_desc,
4263*c83a76b0SSuyog Pawar             1,
4264*c83a76b0SSuyog Pawar             ps_ctxt->num_layers - 1,
4265*c83a76b0SSuyog Pawar             ps_ctxt->ps_curr_descr);
4266*c83a76b0SSuyog Pawar 
4267*c83a76b0SSuyog Pawar         /* Update the array having ref id lc to descr id mapping */
4268*c83a76b0SSuyog Pawar         ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
4269*c83a76b0SSuyog Pawar 
4270*c83a76b0SSuyog Pawar         /* From ref id lc we need to work out the POC, So update this array */
4271*c83a76b0SSuyog Pawar         ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
4272*c83a76b0SSuyog Pawar 
4273*c83a76b0SSuyog Pawar         /* From ref id lc we need to work out the display num, So update this array */
4274*c83a76b0SSuyog Pawar         ps_ctxt->ai4_ref_idx_to_disp_num[ref_id_lc] = ps_ref_desc->i4_display_num;
4275*c83a76b0SSuyog Pawar 
4276*c83a76b0SSuyog Pawar         /* When computing costs in L0 and L1 directions, we need the */
4277*c83a76b0SSuyog Pawar         /* respective ref id L0 and L1, so update this mapping */
4278*c83a76b0SSuyog Pawar         ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
4279*c83a76b0SSuyog Pawar         ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
4280*c83a76b0SSuyog Pawar         if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
4281*c83a76b0SSuyog Pawar         {
4282*c83a76b0SSuyog Pawar             ps_ctxt->au1_is_past[ref_id_lc] = 1;
4283*c83a76b0SSuyog Pawar             ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
4284*c83a76b0SSuyog Pawar             ps_ctxt->num_ref_past++;
4285*c83a76b0SSuyog Pawar         }
4286*c83a76b0SSuyog Pawar         else
4287*c83a76b0SSuyog Pawar         {
4288*c83a76b0SSuyog Pawar             ps_ctxt->au1_is_past[ref_id_lc] = 0;
4289*c83a76b0SSuyog Pawar             ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
4290*c83a76b0SSuyog Pawar             ps_ctxt->num_ref_future++;
4291*c83a76b0SSuyog Pawar         }
4292*c83a76b0SSuyog Pawar         if(1 == ps_ctxt->i4_wt_pred_enable_flag)
4293*c83a76b0SSuyog Pawar         {
4294*c83a76b0SSuyog Pawar             /* copy the weight and offsets from current ref desc */
4295*c83a76b0SSuyog Pawar             ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
4296*c83a76b0SSuyog Pawar 
4297*c83a76b0SSuyog Pawar             /* inv weight is stored in Q15 format */
4298*c83a76b0SSuyog Pawar             ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4299*c83a76b0SSuyog Pawar                 ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
4300*c83a76b0SSuyog Pawar 
4301*c83a76b0SSuyog Pawar             ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
4302*c83a76b0SSuyog Pawar         }
4303*c83a76b0SSuyog Pawar         else
4304*c83a76b0SSuyog Pawar         {
4305*c83a76b0SSuyog Pawar             /* store default wt and offset*/
4306*c83a76b0SSuyog Pawar             ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
4307*c83a76b0SSuyog Pawar 
4308*c83a76b0SSuyog Pawar             /* inv weight is stored in Q15 format */
4309*c83a76b0SSuyog Pawar             ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4310*c83a76b0SSuyog Pawar                 ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
4311*c83a76b0SSuyog Pawar 
4312*c83a76b0SSuyog Pawar             ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
4313*c83a76b0SSuyog Pawar         }
4314*c83a76b0SSuyog Pawar     }
4315*c83a76b0SSuyog Pawar 
4316*c83a76b0SSuyog Pawar     ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
4317*c83a76b0SSuyog Pawar     ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
4318*c83a76b0SSuyog Pawar 
4319*c83a76b0SSuyog Pawar     /*************************************************************************/
4320*c83a76b0SSuyog Pawar     /* Preparation of the TLU for bits for reference indices.                */
4321*c83a76b0SSuyog Pawar     /* Special case is that of numref = 2. (TEV)                             */
4322*c83a76b0SSuyog Pawar     /* Other cases uses UEV                                                  */
4323*c83a76b0SSuyog Pawar     /*************************************************************************/
4324*c83a76b0SSuyog Pawar     for(i = 0; i < MAX_NUM_REF; i++)
4325*c83a76b0SSuyog Pawar     {
4326*c83a76b0SSuyog Pawar         ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
4327*c83a76b0SSuyog Pawar         ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
4328*c83a76b0SSuyog Pawar     }
4329*c83a76b0SSuyog Pawar 
4330*c83a76b0SSuyog Pawar     if(ps_ref_map->i4_num_ref == 2)
4331*c83a76b0SSuyog Pawar     {
4332*c83a76b0SSuyog Pawar         ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
4333*c83a76b0SSuyog Pawar         ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
4334*c83a76b0SSuyog Pawar         ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
4335*c83a76b0SSuyog Pawar         ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
4336*c83a76b0SSuyog Pawar     }
4337*c83a76b0SSuyog Pawar     else if(ps_ref_map->i4_num_ref > 2)
4338*c83a76b0SSuyog Pawar     {
4339*c83a76b0SSuyog Pawar         for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4340*c83a76b0SSuyog Pawar         {
4341*c83a76b0SSuyog Pawar             S32 l0, l1;
4342*c83a76b0SSuyog Pawar             l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
4343*c83a76b0SSuyog Pawar             l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
4344*c83a76b0SSuyog Pawar             ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
4345*c83a76b0SSuyog Pawar             ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
4346*c83a76b0SSuyog Pawar         }
4347*c83a76b0SSuyog Pawar     }
4348*c83a76b0SSuyog Pawar 
4349*c83a76b0SSuyog Pawar     /*************************************************************************/
4350*c83a76b0SSuyog Pawar     /* Preparation of the scaling factors for reference indices. The scale   */
4351*c83a76b0SSuyog Pawar     /* factor depends on distance of the two ref indices from current input  */
4352*c83a76b0SSuyog Pawar     /* in terms of poc delta.                                                */
4353*c83a76b0SSuyog Pawar     /*************************************************************************/
4354*c83a76b0SSuyog Pawar     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4355*c83a76b0SSuyog Pawar     {
4356*c83a76b0SSuyog Pawar         for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4357*c83a76b0SSuyog Pawar         {
4358*c83a76b0SSuyog Pawar             S16 i2_scf_q8;
4359*c83a76b0SSuyog Pawar             S32 poc_from, poc_to;
4360*c83a76b0SSuyog Pawar 
4361*c83a76b0SSuyog Pawar             poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
4362*c83a76b0SSuyog Pawar             poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
4363*c83a76b0SSuyog Pawar 
4364*c83a76b0SSuyog Pawar             i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
4365*c83a76b0SSuyog Pawar             ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
4366*c83a76b0SSuyog Pawar         }
4367*c83a76b0SSuyog Pawar     }
4368*c83a76b0SSuyog Pawar 
4369*c83a76b0SSuyog Pawar     /*************************************************************************/
4370*c83a76b0SSuyog Pawar     /* We store simplified look ups for inp y plane for                      */
4371*c83a76b0SSuyog Pawar     /* every layer and for every ref id in the layer.                        */
4372*c83a76b0SSuyog Pawar     /*************************************************************************/
4373*c83a76b0SSuyog Pawar     for(i = 1; i < ps_ctxt->num_layers; i++)
4374*c83a76b0SSuyog Pawar     {
4375*c83a76b0SSuyog Pawar         U08 **ppu1_inp;
4376*c83a76b0SSuyog Pawar 
4377*c83a76b0SSuyog Pawar         ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
4378*c83a76b0SSuyog Pawar         for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4379*c83a76b0SSuyog Pawar         {
4380*c83a76b0SSuyog Pawar             hme_ref_desc_t *ps_ref_desc;
4381*c83a76b0SSuyog Pawar             hme_ref_buf_info_t *ps_buf_info;
4382*c83a76b0SSuyog Pawar             layer_ctxt_t *ps_layer;
4383*c83a76b0SSuyog Pawar             S32 ref_id_lc;
4384*c83a76b0SSuyog Pawar 
4385*c83a76b0SSuyog Pawar             ps_ref_desc = &ps_ref_map->as_ref_desc[j];
4386*c83a76b0SSuyog Pawar             ps_buf_info = &ps_ref_desc->as_ref_info[i];
4387*c83a76b0SSuyog Pawar             ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4388*c83a76b0SSuyog Pawar 
4389*c83a76b0SSuyog Pawar             desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
4390*c83a76b0SSuyog Pawar             ps_layer = ps_ctxt->as_ref_descr[desc_idx].aps_layers[i];
4391*c83a76b0SSuyog Pawar 
4392*c83a76b0SSuyog Pawar             ppu1_inp[j] = ps_layer->pu1_inp;
4393*c83a76b0SSuyog Pawar         }
4394*c83a76b0SSuyog Pawar     }
4395*c83a76b0SSuyog Pawar     /*************************************************************************/
4396*c83a76b0SSuyog Pawar     /* The mv range for each layer is computed. For dyadic layers it will    */
4397*c83a76b0SSuyog Pawar     /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
4398*c83a76b0SSuyog Pawar     /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
4399*c83a76b0SSuyog Pawar     /*************************************************************************/
4400*c83a76b0SSuyog Pawar 
4401*c83a76b0SSuyog Pawar     /* set to layer 0 search range params */
4402*c83a76b0SSuyog Pawar     i2_max_x = ps_frm_prms->i2_mv_range_x;
4403*c83a76b0SSuyog Pawar     i2_max_y = ps_frm_prms->i2_mv_range_y;
4404*c83a76b0SSuyog Pawar 
4405*c83a76b0SSuyog Pawar     for(i = 1; i < ps_ctxt->num_layers; i++)
4406*c83a76b0SSuyog Pawar     {
4407*c83a76b0SSuyog Pawar         layer_ctxt_t *ps_layer_ctxt;
4408*c83a76b0SSuyog Pawar 
4409*c83a76b0SSuyog Pawar         {
4410*c83a76b0SSuyog Pawar             i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->a_wd[i]) / ps_ctxt->a_wd[i - 1]));
4411*c83a76b0SSuyog Pawar             i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->a_ht[i]) / ps_ctxt->a_ht[i - 1]));
4412*c83a76b0SSuyog Pawar         }
4413*c83a76b0SSuyog Pawar         ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4414*c83a76b0SSuyog Pawar         ps_layer_ctxt->i2_max_mv_x = i2_max_x;
4415*c83a76b0SSuyog Pawar         ps_layer_ctxt->i2_max_mv_y = i2_max_y;
4416*c83a76b0SSuyog Pawar 
4417*c83a76b0SSuyog Pawar         /*********************************************************************/
4418*c83a76b0SSuyog Pawar         /* Every layer maintains a reference id lc to POC mapping. This is   */
4419*c83a76b0SSuyog Pawar         /* because the mapping is unique for every frm. Also, in next frm,   */
4420*c83a76b0SSuyog Pawar         /* we require colocated mvs which means scaling according to temporal*/
4421*c83a76b0SSuyog Pawar         /*distance. Hence this mapping needs to be maintained in every       */
4422*c83a76b0SSuyog Pawar         /* layer ctxt                                                        */
4423*c83a76b0SSuyog Pawar         /*********************************************************************/
4424*c83a76b0SSuyog Pawar         memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
4425*c83a76b0SSuyog Pawar         if(ps_ref_map->i4_num_ref)
4426*c83a76b0SSuyog Pawar         {
4427*c83a76b0SSuyog Pawar             memcpy(
4428*c83a76b0SSuyog Pawar                 ps_layer_ctxt->ai4_ref_id_to_poc_lc,
4429*c83a76b0SSuyog Pawar                 ps_ctxt->ai4_ref_idx_to_poc_lc,
4430*c83a76b0SSuyog Pawar                 ps_ref_map->i4_num_ref * sizeof(S32));
4431*c83a76b0SSuyog Pawar             memcpy(
4432*c83a76b0SSuyog Pawar                 ps_layer_ctxt->ai4_ref_id_to_disp_num,
4433*c83a76b0SSuyog Pawar                 ps_ctxt->ai4_ref_idx_to_disp_num,
4434*c83a76b0SSuyog Pawar                 ps_ref_map->i4_num_ref * sizeof(S32));
4435*c83a76b0SSuyog Pawar         }
4436*c83a76b0SSuyog Pawar     }
4437*c83a76b0SSuyog Pawar 
4438*c83a76b0SSuyog Pawar     return;
4439*c83a76b0SSuyog Pawar }
4440*c83a76b0SSuyog Pawar 
4441*c83a76b0SSuyog Pawar /**
4442*c83a76b0SSuyog Pawar ********************************************************************************
4443*c83a76b0SSuyog Pawar *  @fn     hme_process_frm
4444*c83a76b0SSuyog Pawar *
4445*c83a76b0SSuyog Pawar *  @brief  HME frame level processing function
4446*c83a76b0SSuyog Pawar *
4447*c83a76b0SSuyog Pawar *  @param[in] pv_me_ctxt : ME ctxt pointer
4448*c83a76b0SSuyog Pawar *
4449*c83a76b0SSuyog Pawar *  @param[in] ps_ref_map : Reference map prms pointer
4450*c83a76b0SSuyog Pawar *
4451*c83a76b0SSuyog Pawar *  @param[in] ppd_intra_costs : pointer to array of intra cost cost buffers for each layer
4452*c83a76b0SSuyog Pawar *
4453*c83a76b0SSuyog Pawar *  @param[in] ps_frm_prms : pointer to Frame level parameters of HME
4454*c83a76b0SSuyog Pawar *
4455*c83a76b0SSuyog Pawar *  @param[in] pf_ext_update_fxn : function pointer to update CTb results
4456*c83a76b0SSuyog Pawar *
4457*c83a76b0SSuyog Pawar *  @param[in] pf_get_intra_cu_and_cost :function pointer to get intra cu size and cost
4458*c83a76b0SSuyog Pawar *
4459*c83a76b0SSuyog Pawar *  @param[in] ps_multi_thrd_ctxt :function pointer to get intra cu size and cost
4460*c83a76b0SSuyog Pawar *
4461*c83a76b0SSuyog Pawar *  @return Scale factor in Q8 format
4462*c83a76b0SSuyog Pawar ********************************************************************************
4463*c83a76b0SSuyog Pawar */
4464*c83a76b0SSuyog Pawar 
hme_process_frm(void * pv_me_ctxt,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input,hme_ref_map_t * ps_ref_map,double ** ppd_intra_costs,hme_frm_prms_t * ps_frm_prms,PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,void * pv_coarse_layer,void * pv_multi_thrd_ctxt,S32 i4_frame_parallelism_level,S32 thrd_id,S32 i4_me_frm_id)4465*c83a76b0SSuyog Pawar void hme_process_frm(
4466*c83a76b0SSuyog Pawar     void *pv_me_ctxt,
4467*c83a76b0SSuyog Pawar     pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
4468*c83a76b0SSuyog Pawar     hme_ref_map_t *ps_ref_map,
4469*c83a76b0SSuyog Pawar     double **ppd_intra_costs,
4470*c83a76b0SSuyog Pawar     hme_frm_prms_t *ps_frm_prms,
4471*c83a76b0SSuyog Pawar     PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
4472*c83a76b0SSuyog Pawar     void *pv_coarse_layer,
4473*c83a76b0SSuyog Pawar     void *pv_multi_thrd_ctxt,
4474*c83a76b0SSuyog Pawar     S32 i4_frame_parallelism_level,
4475*c83a76b0SSuyog Pawar     S32 thrd_id,
4476*c83a76b0SSuyog Pawar     S32 i4_me_frm_id)
4477*c83a76b0SSuyog Pawar {
4478*c83a76b0SSuyog Pawar     refine_prms_t s_refine_prms;
4479*c83a76b0SSuyog Pawar     me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
4480*c83a76b0SSuyog Pawar     me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
4481*c83a76b0SSuyog Pawar 
4482*c83a76b0SSuyog Pawar     S32 lyr_job_type;
4483*c83a76b0SSuyog Pawar     multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
4484*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
4485*c83a76b0SSuyog Pawar 
4486*c83a76b0SSuyog Pawar     ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
4487*c83a76b0SSuyog Pawar 
4488*c83a76b0SSuyog Pawar     lyr_job_type = ME_JOB_ENC_LYR;
4489*c83a76b0SSuyog Pawar     /*************************************************************************/
4490*c83a76b0SSuyog Pawar     /* Final L0 layer ME call                                                */
4491*c83a76b0SSuyog Pawar     /*************************************************************************/
4492*c83a76b0SSuyog Pawar     {
4493*c83a76b0SSuyog Pawar         /* Set the CTB attributes dependin on corner/rt edge/bot edge/center*/
4494*c83a76b0SSuyog Pawar         hme_set_ctb_attrs(ps_ctxt->as_ctb_bound_attrs, ps_ctxt->i4_wd, ps_ctxt->i4_ht);
4495*c83a76b0SSuyog Pawar 
4496*c83a76b0SSuyog Pawar         hme_set_refine_prms(
4497*c83a76b0SSuyog Pawar             &s_refine_prms,
4498*c83a76b0SSuyog Pawar             ps_ctxt->u1_encode[0],
4499*c83a76b0SSuyog Pawar             ps_ref_map->i4_num_ref,
4500*c83a76b0SSuyog Pawar             0,
4501*c83a76b0SSuyog Pawar             ps_ctxt->num_layers,
4502*c83a76b0SSuyog Pawar             ps_ctxt->num_layers_explicit_search,
4503*c83a76b0SSuyog Pawar             ps_thrd_ctxt->s_init_prms.use_4x4,
4504*c83a76b0SSuyog Pawar             ps_frm_prms,
4505*c83a76b0SSuyog Pawar             ppd_intra_costs,
4506*c83a76b0SSuyog Pawar             &ps_thrd_ctxt->s_init_prms.s_me_coding_tools);
4507*c83a76b0SSuyog Pawar 
4508*c83a76b0SSuyog Pawar         hme_refine(
4509*c83a76b0SSuyog Pawar             ps_thrd_ctxt,
4510*c83a76b0SSuyog Pawar             &s_refine_prms,
4511*c83a76b0SSuyog Pawar             pf_ext_update_fxn,
4512*c83a76b0SSuyog Pawar             ps_coarse_layer,
4513*c83a76b0SSuyog Pawar             ps_multi_thrd_ctxt,
4514*c83a76b0SSuyog Pawar             lyr_job_type,
4515*c83a76b0SSuyog Pawar             thrd_id,
4516*c83a76b0SSuyog Pawar             i4_me_frm_id,
4517*c83a76b0SSuyog Pawar             ps_l0_ipe_input);
4518*c83a76b0SSuyog Pawar 
4519*c83a76b0SSuyog Pawar         /* Set current ref pic status which will used as perv frame ref pic */
4520*c83a76b0SSuyog Pawar         if(i4_frame_parallelism_level)
4521*c83a76b0SSuyog Pawar         {
4522*c83a76b0SSuyog Pawar             ps_ctxt->i4_is_prev_frame_reference = 0;
4523*c83a76b0SSuyog Pawar         }
4524*c83a76b0SSuyog Pawar         else
4525*c83a76b0SSuyog Pawar         {
4526*c83a76b0SSuyog Pawar             ps_ctxt->i4_is_prev_frame_reference =
4527*c83a76b0SSuyog Pawar                 ps_multi_thrd_ctxt->aps_cur_inp_me_prms[i4_me_frm_id]
4528*c83a76b0SSuyog Pawar                     ->ps_curr_inp->s_lap_out.i4_is_ref_pic;
4529*c83a76b0SSuyog Pawar         }
4530*c83a76b0SSuyog Pawar     }
4531*c83a76b0SSuyog Pawar 
4532*c83a76b0SSuyog Pawar     return;
4533*c83a76b0SSuyog Pawar }
4534*c83a76b0SSuyog Pawar 
4535*c83a76b0SSuyog Pawar /**
4536*c83a76b0SSuyog Pawar ********************************************************************************
4537*c83a76b0SSuyog Pawar *  @fn     hme_coarse_process_frm
4538*c83a76b0SSuyog Pawar *
4539*c83a76b0SSuyog Pawar *  @brief  HME frame level processing function (coarse + refine)
4540*c83a76b0SSuyog Pawar *
4541*c83a76b0SSuyog Pawar *  @param[in] pv_me_ctxt : ME ctxt pointer
4542*c83a76b0SSuyog Pawar *
4543*c83a76b0SSuyog Pawar *  @param[in] ps_ref_map : Reference map prms pointer
4544*c83a76b0SSuyog Pawar *
4545*c83a76b0SSuyog Pawar *  @param[in] ps_frm_prms : pointer to Frame level parameters of HME
4546*c83a76b0SSuyog Pawar *
4547*c83a76b0SSuyog Pawar *  @param[in] ps_multi_thrd_ctxt :Multi thread related ctxt
4548*c83a76b0SSuyog Pawar *
4549*c83a76b0SSuyog Pawar *  @return Scale factor in Q8 format
4550*c83a76b0SSuyog Pawar ********************************************************************************
4551*c83a76b0SSuyog Pawar */
4552*c83a76b0SSuyog Pawar 
hme_coarse_process_frm(void * pv_me_ctxt,hme_ref_map_t * ps_ref_map,hme_frm_prms_t * ps_frm_prms,void * pv_multi_thrd_ctxt,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)4553*c83a76b0SSuyog Pawar void hme_coarse_process_frm(
4554*c83a76b0SSuyog Pawar     void *pv_me_ctxt,
4555*c83a76b0SSuyog Pawar     hme_ref_map_t *ps_ref_map,
4556*c83a76b0SSuyog Pawar     hme_frm_prms_t *ps_frm_prms,
4557*c83a76b0SSuyog Pawar     void *pv_multi_thrd_ctxt,
4558*c83a76b0SSuyog Pawar     WORD32 i4_ping_pong,
4559*c83a76b0SSuyog Pawar     void **ppv_dep_mngr_hme_sync)
4560*c83a76b0SSuyog Pawar {
4561*c83a76b0SSuyog Pawar     S16 i2_max;
4562*c83a76b0SSuyog Pawar     S32 layer_id;
4563*c83a76b0SSuyog Pawar     coarse_prms_t s_coarse_prms;
4564*c83a76b0SSuyog Pawar     refine_prms_t s_refine_prms;
4565*c83a76b0SSuyog Pawar     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4566*c83a76b0SSuyog Pawar     S32 lyr_job_type;
4567*c83a76b0SSuyog Pawar     multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
4568*c83a76b0SSuyog Pawar 
4569*c83a76b0SSuyog Pawar     ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
4570*c83a76b0SSuyog Pawar     /*************************************************************************/
4571*c83a76b0SSuyog Pawar     /* Fire processing of all layers, starting with coarsest layer.          */
4572*c83a76b0SSuyog Pawar     /*************************************************************************/
4573*c83a76b0SSuyog Pawar     layer_id = ps_ctxt->num_layers - 1;
4574*c83a76b0SSuyog Pawar     i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
4575*c83a76b0SSuyog Pawar     i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
4576*c83a76b0SSuyog Pawar     s_coarse_prms.i4_layer_id = layer_id;
4577*c83a76b0SSuyog Pawar     {
4578*c83a76b0SSuyog Pawar         S32 log_start_step;
4579*c83a76b0SSuyog Pawar         /* Based on Preset, set the starting step size for Refinement */
4580*c83a76b0SSuyog Pawar         if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets)
4581*c83a76b0SSuyog Pawar         {
4582*c83a76b0SSuyog Pawar             log_start_step = 0;
4583*c83a76b0SSuyog Pawar         }
4584*c83a76b0SSuyog Pawar         else
4585*c83a76b0SSuyog Pawar         {
4586*c83a76b0SSuyog Pawar             log_start_step = 1;
4587*c83a76b0SSuyog Pawar         }
4588*c83a76b0SSuyog Pawar 
4589*c83a76b0SSuyog Pawar         s_coarse_prms.i4_max_iters = i2_max >> log_start_step;
4590*c83a76b0SSuyog Pawar         s_coarse_prms.i4_start_step = 1 << log_start_step;
4591*c83a76b0SSuyog Pawar     }
4592*c83a76b0SSuyog Pawar     s_coarse_prms.i4_num_ref = ps_ref_map->i4_num_ref;
4593*c83a76b0SSuyog Pawar     s_coarse_prms.do_full_search = 1;
4594*c83a76b0SSuyog Pawar     if(s_coarse_prms.do_full_search)
4595*c83a76b0SSuyog Pawar     {
4596*c83a76b0SSuyog Pawar         /* Set to 2 or 4 */
4597*c83a76b0SSuyog Pawar         if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
4598*c83a76b0SSuyog Pawar             s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
4599*c83a76b0SSuyog Pawar         else if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets >= ME_MEDIUM_SPEED)
4600*c83a76b0SSuyog Pawar             s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
4601*c83a76b0SSuyog Pawar     }
4602*c83a76b0SSuyog Pawar     s_coarse_prms.num_results = ps_ctxt->max_num_results_coarse;
4603*c83a76b0SSuyog Pawar 
4604*c83a76b0SSuyog Pawar     /* Coarse layer uses only 1 lambda, i.e. the one for open loop ME */
4605*c83a76b0SSuyog Pawar     s_coarse_prms.lambda = ps_frm_prms->i4_ol_sad_lambda_qf;
4606*c83a76b0SSuyog Pawar     s_coarse_prms.lambda_q_shift = ps_frm_prms->lambda_q_shift;
4607*c83a76b0SSuyog Pawar     s_coarse_prms.lambda = ((float)s_coarse_prms.lambda * (100.0 - ME_LAMBDA_DISCOUNT) / 100.0);
4608*c83a76b0SSuyog Pawar 
4609*c83a76b0SSuyog Pawar     hme_coarsest(ps_ctxt, &s_coarse_prms, ps_multi_thrd_ctxt, i4_ping_pong, ppv_dep_mngr_hme_sync);
4610*c83a76b0SSuyog Pawar 
4611*c83a76b0SSuyog Pawar     /* all refinement layer processed in the loop below */
4612*c83a76b0SSuyog Pawar     layer_id--;
4613*c83a76b0SSuyog Pawar     lyr_job_type = ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type + 1;
4614*c83a76b0SSuyog Pawar 
4615*c83a76b0SSuyog Pawar     /*************************************************************************/
4616*c83a76b0SSuyog Pawar     /* This loop will run for all refine layers (non- encode layers)          */
4617*c83a76b0SSuyog Pawar     /*************************************************************************/
4618*c83a76b0SSuyog Pawar     while(layer_id > 0)
4619*c83a76b0SSuyog Pawar     {
4620*c83a76b0SSuyog Pawar         hme_set_refine_prms(
4621*c83a76b0SSuyog Pawar             &s_refine_prms,
4622*c83a76b0SSuyog Pawar             ps_ctxt->u1_encode[layer_id],
4623*c83a76b0SSuyog Pawar             ps_ref_map->i4_num_ref,
4624*c83a76b0SSuyog Pawar             layer_id,
4625*c83a76b0SSuyog Pawar             ps_ctxt->num_layers,
4626*c83a76b0SSuyog Pawar             ps_ctxt->num_layers_explicit_search,
4627*c83a76b0SSuyog Pawar             ps_ctxt->s_init_prms.use_4x4,
4628*c83a76b0SSuyog Pawar             ps_frm_prms,
4629*c83a76b0SSuyog Pawar             NULL,
4630*c83a76b0SSuyog Pawar             &ps_ctxt->s_init_prms.s_me_coding_tools);
4631*c83a76b0SSuyog Pawar 
4632*c83a76b0SSuyog Pawar         hme_refine_no_encode(
4633*c83a76b0SSuyog Pawar             ps_ctxt,
4634*c83a76b0SSuyog Pawar             &s_refine_prms,
4635*c83a76b0SSuyog Pawar             ps_multi_thrd_ctxt,
4636*c83a76b0SSuyog Pawar             lyr_job_type,
4637*c83a76b0SSuyog Pawar             i4_ping_pong,
4638*c83a76b0SSuyog Pawar             ppv_dep_mngr_hme_sync);
4639*c83a76b0SSuyog Pawar 
4640*c83a76b0SSuyog Pawar         layer_id--;
4641*c83a76b0SSuyog Pawar         lyr_job_type++;
4642*c83a76b0SSuyog Pawar     }
4643*c83a76b0SSuyog Pawar }
4644*c83a76b0SSuyog Pawar /**
4645*c83a76b0SSuyog Pawar ********************************************************************************
4646*c83a76b0SSuyog Pawar *  @fn     hme_fill_neighbour_mvs
4647*c83a76b0SSuyog Pawar *
4648*c83a76b0SSuyog Pawar *  @brief  HME neighbour MV population function
4649*c83a76b0SSuyog Pawar *
4650*c83a76b0SSuyog Pawar *  @param[in] pps_mv_grid : MV grid array pointer
4651*c83a76b0SSuyog Pawar *
4652*c83a76b0SSuyog Pawar *  @param[in] i4_ctb_x : CTB pos X
4653*c83a76b0SSuyog Pawar 
4654*c83a76b0SSuyog Pawar *  @param[in] i4_ctb_y : CTB pos Y
4655*c83a76b0SSuyog Pawar *
4656*c83a76b0SSuyog Pawar *  @remarks :  Needs to be populated for proper implementation of cost fxn
4657*c83a76b0SSuyog Pawar *
4658*c83a76b0SSuyog Pawar *  @return Scale factor in Q8 format
4659*c83a76b0SSuyog Pawar ********************************************************************************
4660*c83a76b0SSuyog Pawar */
hme_fill_neighbour_mvs(mv_grid_t ** pps_mv_grid,S32 i4_ctb_x,S32 i4_ctb_y,S32 i4_num_ref,void * pv_ctxt)4661*c83a76b0SSuyog Pawar void hme_fill_neighbour_mvs(
4662*c83a76b0SSuyog Pawar     mv_grid_t **pps_mv_grid, S32 i4_ctb_x, S32 i4_ctb_y, S32 i4_num_ref, void *pv_ctxt)
4663*c83a76b0SSuyog Pawar {
4664*c83a76b0SSuyog Pawar     /* TODO : Needs to be populated for proper implementation of cost fxn */
4665*c83a76b0SSuyog Pawar     ARG_NOT_USED(pps_mv_grid);
4666*c83a76b0SSuyog Pawar     ARG_NOT_USED(i4_ctb_x);
4667*c83a76b0SSuyog Pawar     ARG_NOT_USED(i4_ctb_y);
4668*c83a76b0SSuyog Pawar     ARG_NOT_USED(i4_num_ref);
4669*c83a76b0SSuyog Pawar     ARG_NOT_USED(pv_ctxt);
4670*c83a76b0SSuyog Pawar }
4671*c83a76b0SSuyog Pawar 
4672*c83a76b0SSuyog Pawar /**
4673*c83a76b0SSuyog Pawar *******************************************************************************
4674*c83a76b0SSuyog Pawar *  @fn     void hme_get_active_pocs_list(void *pv_me_ctxt,
4675*c83a76b0SSuyog Pawar *                                       S32 *p_pocs_buffered_in_me)
4676*c83a76b0SSuyog Pawar *
4677*c83a76b0SSuyog Pawar *  @brief  Returns the list of active POCs in ME ctxt
4678*c83a76b0SSuyog Pawar *
4679*c83a76b0SSuyog Pawar *  @param[in] pv_me_ctxt : handle to ME context
4680*c83a76b0SSuyog Pawar *
4681*c83a76b0SSuyog Pawar *  @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
4682*c83a76b0SSuyog Pawar *                                      populates with pocs active
4683*c83a76b0SSuyog Pawar *
4684*c83a76b0SSuyog Pawar *  @return   void
4685*c83a76b0SSuyog Pawar *******************************************************************************
4686*c83a76b0SSuyog Pawar */
hme_get_active_pocs_list(void * pv_me_ctxt,S32 i4_num_me_frm_pllel)4687*c83a76b0SSuyog Pawar WORD32 hme_get_active_pocs_list(void *pv_me_ctxt, S32 i4_num_me_frm_pllel)
4688*c83a76b0SSuyog Pawar {
4689*c83a76b0SSuyog Pawar     me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
4690*c83a76b0SSuyog Pawar     S32 i, count = 0;
4691*c83a76b0SSuyog Pawar 
4692*c83a76b0SSuyog Pawar     for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
4693*c83a76b0SSuyog Pawar     {
4694*c83a76b0SSuyog Pawar         S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
4695*c83a76b0SSuyog Pawar         S32 i4_is_free = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free;
4696*c83a76b0SSuyog Pawar 
4697*c83a76b0SSuyog Pawar         if((i4_is_free == 0) && (poc != INVALID_POC))
4698*c83a76b0SSuyog Pawar         {
4699*c83a76b0SSuyog Pawar             count++;
4700*c83a76b0SSuyog Pawar         }
4701*c83a76b0SSuyog Pawar     }
4702*c83a76b0SSuyog Pawar     if(count == (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1)
4703*c83a76b0SSuyog Pawar     {
4704*c83a76b0SSuyog Pawar         return 1;
4705*c83a76b0SSuyog Pawar     }
4706*c83a76b0SSuyog Pawar     else
4707*c83a76b0SSuyog Pawar     {
4708*c83a76b0SSuyog Pawar         return 0;
4709*c83a76b0SSuyog Pawar     }
4710*c83a76b0SSuyog Pawar }
4711*c83a76b0SSuyog Pawar 
4712*c83a76b0SSuyog Pawar /**
4713*c83a76b0SSuyog Pawar *******************************************************************************
4714*c83a76b0SSuyog Pawar *  @fn     void hme_coarse_get_active_pocs_list(void *pv_me_ctxt,
4715*c83a76b0SSuyog Pawar *                                       S32 *p_pocs_buffered_in_me)
4716*c83a76b0SSuyog Pawar *
4717*c83a76b0SSuyog Pawar *  @brief  Returns the list of active POCs in ME ctxt
4718*c83a76b0SSuyog Pawar *
4719*c83a76b0SSuyog Pawar *  @param[in] pv_me_ctxt : handle to ME context
4720*c83a76b0SSuyog Pawar *
4721*c83a76b0SSuyog Pawar *  @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
4722*c83a76b0SSuyog Pawar *                                      populates with pocs active
4723*c83a76b0SSuyog Pawar *
4724*c83a76b0SSuyog Pawar *  @return   void
4725*c83a76b0SSuyog Pawar *******************************************************************************
4726*c83a76b0SSuyog Pawar */
hme_coarse_get_active_pocs_list(void * pv_me_ctxt,S32 * p_pocs_buffered_in_me)4727*c83a76b0SSuyog Pawar void hme_coarse_get_active_pocs_list(void *pv_me_ctxt, S32 *p_pocs_buffered_in_me)
4728*c83a76b0SSuyog Pawar {
4729*c83a76b0SSuyog Pawar     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4730*c83a76b0SSuyog Pawar     S32 i, count = 0;
4731*c83a76b0SSuyog Pawar 
4732*c83a76b0SSuyog Pawar     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
4733*c83a76b0SSuyog Pawar     {
4734*c83a76b0SSuyog Pawar         S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc;
4735*c83a76b0SSuyog Pawar 
4736*c83a76b0SSuyog Pawar         if(poc != -1)
4737*c83a76b0SSuyog Pawar         {
4738*c83a76b0SSuyog Pawar             p_pocs_buffered_in_me[count] = poc;
4739*c83a76b0SSuyog Pawar             count++;
4740*c83a76b0SSuyog Pawar         }
4741*c83a76b0SSuyog Pawar     }
4742*c83a76b0SSuyog Pawar     p_pocs_buffered_in_me[count] = -1;
4743*c83a76b0SSuyog Pawar }
4744*c83a76b0SSuyog Pawar 
hme_get_blk_size(S32 use_4x4,S32 layer_id,S32 n_layers,S32 encode)4745*c83a76b0SSuyog Pawar S32 hme_get_blk_size(S32 use_4x4, S32 layer_id, S32 n_layers, S32 encode)
4746*c83a76b0SSuyog Pawar {
4747*c83a76b0SSuyog Pawar     /* coarsest layer uses 4x4 blks, lowermost layer/encode layer uses 16x16 */
4748*c83a76b0SSuyog Pawar     if(layer_id == n_layers - 1)
4749*c83a76b0SSuyog Pawar         return 4;
4750*c83a76b0SSuyog Pawar     else if((layer_id == 0) || (encode))
4751*c83a76b0SSuyog Pawar         return 16;
4752*c83a76b0SSuyog Pawar 
4753*c83a76b0SSuyog Pawar     /* Intermediate non encode layers use 8 */
4754*c83a76b0SSuyog Pawar     return 8;
4755*c83a76b0SSuyog Pawar }
4756