1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar *****************************************************************************
18*c83a76b0SSuyog Pawar * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar
21*c83a76b0SSuyog Pawar /*****************************************************************************/
22*c83a76b0SSuyog Pawar /* File Includes */
23*c83a76b0SSuyog Pawar /*****************************************************************************/
24*c83a76b0SSuyog Pawar /* System include files */
25*c83a76b0SSuyog Pawar #include <stdio.h>
26*c83a76b0SSuyog Pawar #include <string.h>
27*c83a76b0SSuyog Pawar #include <stdlib.h>
28*c83a76b0SSuyog Pawar #include <assert.h>
29*c83a76b0SSuyog Pawar #include <stdarg.h>
30*c83a76b0SSuyog Pawar #include <math.h>
31*c83a76b0SSuyog Pawar #include <limits.h>
32*c83a76b0SSuyog Pawar
33*c83a76b0SSuyog Pawar /* User include files */
34*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
35*c83a76b0SSuyog Pawar #include "itt_video_api.h"
36*c83a76b0SSuyog Pawar #include "ihevce_api.h"
37*c83a76b0SSuyog Pawar
38*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
39*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
40*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
41*c83a76b0SSuyog Pawar
42*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
43*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
44*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
45*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
46*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
47*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
48*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
49*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
50*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
51*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
52*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
53*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
54*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
55*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
56*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
57*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
58*c83a76b0SSuyog Pawar
59*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
60*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
61*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
62*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_funcs.h"
63*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
64*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
65*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
66*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
67*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
68*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
69*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
70*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
71*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
72*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
73*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
74*c83a76b0SSuyog Pawar #include "ihevce_bs_compute_ctb.h"
75*c83a76b0SSuyog Pawar #include "ihevce_global_tables.h"
76*c83a76b0SSuyog Pawar #include "ihevce_dep_mngr_interface.h"
77*c83a76b0SSuyog Pawar #include "hme_datatype.h"
78*c83a76b0SSuyog Pawar #include "hme_interface.h"
79*c83a76b0SSuyog Pawar #include "hme_common_defs.h"
80*c83a76b0SSuyog Pawar #include "hme_defs.h"
81*c83a76b0SSuyog Pawar #include "ihevce_me_instr_set_router.h"
82*c83a76b0SSuyog Pawar #include "hme_globals.h"
83*c83a76b0SSuyog Pawar #include "hme_utils.h"
84*c83a76b0SSuyog Pawar #include "hme_coarse.h"
85*c83a76b0SSuyog Pawar #include "hme_refine.h"
86*c83a76b0SSuyog Pawar #include "hme_err_compute.h"
87*c83a76b0SSuyog Pawar #include "hme_common_utils.h"
88*c83a76b0SSuyog Pawar #include "hme_search_algo.h"
89*c83a76b0SSuyog Pawar #include "ihevce_profile.h"
90*c83a76b0SSuyog Pawar
91*c83a76b0SSuyog Pawar /*****************************************************************************/
92*c83a76b0SSuyog Pawar /* Function Definitions */
93*c83a76b0SSuyog Pawar /*****************************************************************************/
94*c83a76b0SSuyog Pawar
hme_init_globals()95*c83a76b0SSuyog Pawar void hme_init_globals()
96*c83a76b0SSuyog Pawar {
97*c83a76b0SSuyog Pawar GRID_PT_T id;
98*c83a76b0SSuyog Pawar S32 i, j;
99*c83a76b0SSuyog Pawar /*************************************************************************/
100*c83a76b0SSuyog Pawar /* Initialize the lookup table for x offset, y offset, optimized mask */
101*c83a76b0SSuyog Pawar /* based on grid id. The design is as follows: */
102*c83a76b0SSuyog Pawar /* */
103*c83a76b0SSuyog Pawar /* a b c d */
104*c83a76b0SSuyog Pawar /* TL T TR e */
105*c83a76b0SSuyog Pawar /* L C R f */
106*c83a76b0SSuyog Pawar /* BL B BR */
107*c83a76b0SSuyog Pawar /* */
108*c83a76b0SSuyog Pawar /* IF a non corner pt, like T is the new minima, then we need to */
109*c83a76b0SSuyog Pawar /* evaluate only 3 new pts, in this case, a, b, c. So the optimal */
110*c83a76b0SSuyog Pawar /* grid mask would reflect this. If a corner pt like TR is the new */
111*c83a76b0SSuyog Pawar /* minima, then we need to evaluate 5 new pts, in this case, b, c, d, */
112*c83a76b0SSuyog Pawar /* e and f. So the grid mask will have 5 pts enabled. */
113*c83a76b0SSuyog Pawar /*************************************************************************/
114*c83a76b0SSuyog Pawar
115*c83a76b0SSuyog Pawar id = PT_C;
116*c83a76b0SSuyog Pawar gai4_opt_grid_mask[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
117*c83a76b0SSuyog Pawar gai1_grid_id_to_x[id] = 0;
118*c83a76b0SSuyog Pawar gai1_grid_id_to_y[id] = 0;
119*c83a76b0SSuyog Pawar gai4_opt_grid_mask_diamond[id] = GRID_DIAMOND_ENABLE_ALL ^ (BIT_EN(PT_C));
120*c83a76b0SSuyog Pawar gai4_opt_grid_mask_conventional[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
121*c83a76b0SSuyog Pawar
122*c83a76b0SSuyog Pawar id = PT_L;
123*c83a76b0SSuyog Pawar gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL);
124*c83a76b0SSuyog Pawar gai1_grid_id_to_x[id] = -1;
125*c83a76b0SSuyog Pawar gai1_grid_id_to_y[id] = 0;
126*c83a76b0SSuyog Pawar gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
127*c83a76b0SSuyog Pawar gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
128*c83a76b0SSuyog Pawar
129*c83a76b0SSuyog Pawar id = PT_R;
130*c83a76b0SSuyog Pawar gai4_opt_grid_mask[id] = BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR);
131*c83a76b0SSuyog Pawar gai1_grid_id_to_x[id] = 1;
132*c83a76b0SSuyog Pawar gai1_grid_id_to_y[id] = 0;
133*c83a76b0SSuyog Pawar gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
134*c83a76b0SSuyog Pawar gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
135*c83a76b0SSuyog Pawar
136*c83a76b0SSuyog Pawar id = PT_T;
137*c83a76b0SSuyog Pawar gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR);
138*c83a76b0SSuyog Pawar gai1_grid_id_to_x[id] = 0;
139*c83a76b0SSuyog Pawar gai1_grid_id_to_y[id] = -1;
140*c83a76b0SSuyog Pawar gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
141*c83a76b0SSuyog Pawar gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
142*c83a76b0SSuyog Pawar
143*c83a76b0SSuyog Pawar id = PT_B;
144*c83a76b0SSuyog Pawar gai4_opt_grid_mask[id] = BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR);
145*c83a76b0SSuyog Pawar gai1_grid_id_to_x[id] = 0;
146*c83a76b0SSuyog Pawar gai1_grid_id_to_y[id] = 1;
147*c83a76b0SSuyog Pawar gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
148*c83a76b0SSuyog Pawar gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
149*c83a76b0SSuyog Pawar
150*c83a76b0SSuyog Pawar id = PT_TL;
151*c83a76b0SSuyog Pawar gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_T];
152*c83a76b0SSuyog Pawar gai1_grid_id_to_x[id] = -1;
153*c83a76b0SSuyog Pawar gai1_grid_id_to_y[id] = -1;
154*c83a76b0SSuyog Pawar gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L);
155*c83a76b0SSuyog Pawar
156*c83a76b0SSuyog Pawar id = PT_TR;
157*c83a76b0SSuyog Pawar gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_T];
158*c83a76b0SSuyog Pawar gai1_grid_id_to_x[id] = 1;
159*c83a76b0SSuyog Pawar gai1_grid_id_to_y[id] = -1;
160*c83a76b0SSuyog Pawar gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R);
161*c83a76b0SSuyog Pawar
162*c83a76b0SSuyog Pawar id = PT_BL;
163*c83a76b0SSuyog Pawar gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_B];
164*c83a76b0SSuyog Pawar gai1_grid_id_to_x[id] = -1;
165*c83a76b0SSuyog Pawar gai1_grid_id_to_y[id] = 1;
166*c83a76b0SSuyog Pawar gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_L) | BIT_EN(PT_B);
167*c83a76b0SSuyog Pawar
168*c83a76b0SSuyog Pawar id = PT_BR;
169*c83a76b0SSuyog Pawar gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_B];
170*c83a76b0SSuyog Pawar gai1_grid_id_to_x[id] = 1;
171*c83a76b0SSuyog Pawar gai1_grid_id_to_y[id] = 1;
172*c83a76b0SSuyog Pawar gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_B);
173*c83a76b0SSuyog Pawar
174*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_2Nx2N] = BLK_8x8;
175*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_T] = BLK_8x4;
176*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_B] = BLK_8x4;
177*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_L] = BLK_4x8;
178*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_R] = BLK_4x8;
179*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TL] = BLK_4x4;
180*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TR] = BLK_4x4;
181*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BL] = BLK_4x4;
182*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BR] = BLK_4x4;
183*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_T] = BLK_INVALID;
184*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_B] = BLK_INVALID;
185*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_T] = BLK_INVALID;
186*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_B] = BLK_INVALID;
187*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_L] = BLK_INVALID;
188*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_R] = BLK_INVALID;
189*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_L] = BLK_INVALID;
190*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_R] = BLK_INVALID;
191*c83a76b0SSuyog Pawar
192*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_2Nx2N] = BLK_16x16;
193*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_T] = BLK_16x8;
194*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_B] = BLK_16x8;
195*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_L] = BLK_8x16;
196*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_R] = BLK_8x16;
197*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TL] = BLK_8x8;
198*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TR] = BLK_8x8;
199*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BL] = BLK_8x8;
200*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BR] = BLK_8x8;
201*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_T] = BLK_16x4;
202*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_B] = BLK_16x12;
203*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_T] = BLK_16x12;
204*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_B] = BLK_16x4;
205*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_L] = BLK_4x16;
206*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_R] = BLK_12x16;
207*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_L] = BLK_12x16;
208*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_R] = BLK_4x16;
209*c83a76b0SSuyog Pawar
210*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_2Nx2N] = BLK_32x32;
211*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_T] = BLK_32x16;
212*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_B] = BLK_32x16;
213*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_L] = BLK_16x32;
214*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_R] = BLK_16x32;
215*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TL] = BLK_16x16;
216*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TR] = BLK_16x16;
217*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BL] = BLK_16x16;
218*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BR] = BLK_16x16;
219*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_T] = BLK_32x8;
220*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_B] = BLK_32x24;
221*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_T] = BLK_32x24;
222*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_B] = BLK_32x8;
223*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_L] = BLK_8x32;
224*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_R] = BLK_24x32;
225*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_L] = BLK_24x32;
226*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_R] = BLK_8x32;
227*c83a76b0SSuyog Pawar
228*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_2Nx2N] = BLK_64x64;
229*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_T] = BLK_64x32;
230*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_B] = BLK_64x32;
231*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_L] = BLK_32x64;
232*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_R] = BLK_32x64;
233*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TL] = BLK_32x32;
234*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TR] = BLK_32x32;
235*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BL] = BLK_32x32;
236*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BR] = BLK_32x32;
237*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_T] = BLK_64x16;
238*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_B] = BLK_64x48;
239*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_T] = BLK_64x48;
240*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_B] = BLK_64x16;
241*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_L] = BLK_16x64;
242*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_R] = BLK_48x64;
243*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_L] = BLK_48x64;
244*c83a76b0SSuyog Pawar ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_R] = BLK_16x64;
245*c83a76b0SSuyog Pawar
246*c83a76b0SSuyog Pawar gau1_num_parts_in_part_type[PRT_2Nx2N] = 1;
247*c83a76b0SSuyog Pawar gau1_num_parts_in_part_type[PRT_2NxN] = 2;
248*c83a76b0SSuyog Pawar gau1_num_parts_in_part_type[PRT_Nx2N] = 2;
249*c83a76b0SSuyog Pawar gau1_num_parts_in_part_type[PRT_NxN] = 4;
250*c83a76b0SSuyog Pawar gau1_num_parts_in_part_type[PRT_2NxnU] = 2;
251*c83a76b0SSuyog Pawar gau1_num_parts_in_part_type[PRT_2NxnD] = 2;
252*c83a76b0SSuyog Pawar gau1_num_parts_in_part_type[PRT_nLx2N] = 2;
253*c83a76b0SSuyog Pawar gau1_num_parts_in_part_type[PRT_nRx2N] = 2;
254*c83a76b0SSuyog Pawar
255*c83a76b0SSuyog Pawar for(i = 0; i < MAX_PART_TYPES; i++)
256*c83a76b0SSuyog Pawar for(j = 0; j < MAX_NUM_PARTS; j++)
257*c83a76b0SSuyog Pawar ge_part_type_to_part_id[i][j] = PART_ID_INVALID;
258*c83a76b0SSuyog Pawar
259*c83a76b0SSuyog Pawar /* 2Nx2N only one partition */
260*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_2Nx2N][0] = PART_ID_2Nx2N;
261*c83a76b0SSuyog Pawar
262*c83a76b0SSuyog Pawar /* 2NxN 2 partitions */
263*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_2NxN][0] = PART_ID_2NxN_T;
264*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_2NxN][1] = PART_ID_2NxN_B;
265*c83a76b0SSuyog Pawar
266*c83a76b0SSuyog Pawar /* Nx2N 2 partitions */
267*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_Nx2N][0] = PART_ID_Nx2N_L;
268*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_Nx2N][1] = PART_ID_Nx2N_R;
269*c83a76b0SSuyog Pawar
270*c83a76b0SSuyog Pawar /* NxN 4 partitions */
271*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_NxN][0] = PART_ID_NxN_TL;
272*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_NxN][1] = PART_ID_NxN_TR;
273*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_NxN][2] = PART_ID_NxN_BL;
274*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_NxN][3] = PART_ID_NxN_BR;
275*c83a76b0SSuyog Pawar
276*c83a76b0SSuyog Pawar /* AMP 2Nx (N/2 + 3N/2) 2 partitions */
277*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_2NxnU][0] = PART_ID_2NxnU_T;
278*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_2NxnU][1] = PART_ID_2NxnU_B;
279*c83a76b0SSuyog Pawar
280*c83a76b0SSuyog Pawar /* AMP 2Nx (3N/2 + N/2) 2 partitions */
281*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_2NxnD][0] = PART_ID_2NxnD_T;
282*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_2NxnD][1] = PART_ID_2NxnD_B;
283*c83a76b0SSuyog Pawar
284*c83a76b0SSuyog Pawar /* AMP (N/2 + 3N/2) x 2N 2 partitions */
285*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_nLx2N][0] = PART_ID_nLx2N_L;
286*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_nLx2N][1] = PART_ID_nLx2N_R;
287*c83a76b0SSuyog Pawar
288*c83a76b0SSuyog Pawar /* AMP (3N/2 + N/2) x 2N 2 partitions */
289*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_nRx2N][0] = PART_ID_nRx2N_L;
290*c83a76b0SSuyog Pawar ge_part_type_to_part_id[PRT_nRx2N][1] = PART_ID_nRx2N_R;
291*c83a76b0SSuyog Pawar
292*c83a76b0SSuyog Pawar /*************************************************************************/
293*c83a76b0SSuyog Pawar /* initialize attributes for each partition id within the cu. */
294*c83a76b0SSuyog Pawar /*************************************************************************/
295*c83a76b0SSuyog Pawar {
296*c83a76b0SSuyog Pawar part_attr_t *ps_part_attr;
297*c83a76b0SSuyog Pawar
298*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_2Nx2N];
299*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 0;
300*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 0;
301*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 8;
302*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 8;
303*c83a76b0SSuyog Pawar
304*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_T];
305*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 0;
306*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 0;
307*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 8;
308*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 4;
309*c83a76b0SSuyog Pawar
310*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_B];
311*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 0;
312*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 4;
313*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 8;
314*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 4;
315*c83a76b0SSuyog Pawar
316*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_L];
317*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 0;
318*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 0;
319*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 4;
320*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 8;
321*c83a76b0SSuyog Pawar
322*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_R];
323*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 4;
324*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 0;
325*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 4;
326*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 8;
327*c83a76b0SSuyog Pawar
328*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TL];
329*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 0;
330*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 0;
331*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 4;
332*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 4;
333*c83a76b0SSuyog Pawar
334*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TR];
335*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 4;
336*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 0;
337*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 4;
338*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 4;
339*c83a76b0SSuyog Pawar
340*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BL];
341*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 0;
342*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 4;
343*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 4;
344*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 4;
345*c83a76b0SSuyog Pawar
346*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BR];
347*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 4;
348*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 4;
349*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 4;
350*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 4;
351*c83a76b0SSuyog Pawar
352*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_T];
353*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 0;
354*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 0;
355*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 8;
356*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 2;
357*c83a76b0SSuyog Pawar
358*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_B];
359*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 0;
360*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 2;
361*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 8;
362*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 6;
363*c83a76b0SSuyog Pawar
364*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_T];
365*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 0;
366*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 0;
367*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 8;
368*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 6;
369*c83a76b0SSuyog Pawar
370*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_B];
371*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 0;
372*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 6;
373*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 8;
374*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 2;
375*c83a76b0SSuyog Pawar
376*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_L];
377*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 0;
378*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 0;
379*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 2;
380*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 8;
381*c83a76b0SSuyog Pawar
382*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_R];
383*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 2;
384*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 0;
385*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 6;
386*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 8;
387*c83a76b0SSuyog Pawar
388*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_L];
389*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 0;
390*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 0;
391*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 6;
392*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 8;
393*c83a76b0SSuyog Pawar
394*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_R];
395*c83a76b0SSuyog Pawar ps_part_attr->u1_x_start = 6;
396*c83a76b0SSuyog Pawar ps_part_attr->u1_y_start = 0;
397*c83a76b0SSuyog Pawar ps_part_attr->u1_x_count = 2;
398*c83a76b0SSuyog Pawar ps_part_attr->u1_y_count = 8;
399*c83a76b0SSuyog Pawar }
400*c83a76b0SSuyog Pawar for(i = 0; i < NUM_BLK_SIZES; i++)
401*c83a76b0SSuyog Pawar ge_blk_size_to_cu_size[i] = CU_INVALID;
402*c83a76b0SSuyog Pawar
403*c83a76b0SSuyog Pawar ge_blk_size_to_cu_size[BLK_8x8] = CU_8x8;
404*c83a76b0SSuyog Pawar ge_blk_size_to_cu_size[BLK_16x16] = CU_16x16;
405*c83a76b0SSuyog Pawar ge_blk_size_to_cu_size[BLK_32x32] = CU_32x32;
406*c83a76b0SSuyog Pawar ge_blk_size_to_cu_size[BLK_64x64] = CU_64x64;
407*c83a76b0SSuyog Pawar
408*c83a76b0SSuyog Pawar /* This is the reverse, given cU size, get blk size */
409*c83a76b0SSuyog Pawar ge_cu_size_to_blk_size[CU_8x8] = BLK_8x8;
410*c83a76b0SSuyog Pawar ge_cu_size_to_blk_size[CU_16x16] = BLK_16x16;
411*c83a76b0SSuyog Pawar ge_cu_size_to_blk_size[CU_32x32] = BLK_32x32;
412*c83a76b0SSuyog Pawar ge_cu_size_to_blk_size[CU_64x64] = BLK_64x64;
413*c83a76b0SSuyog Pawar
414*c83a76b0SSuyog Pawar gau1_is_vert_part[PRT_2Nx2N] = 0;
415*c83a76b0SSuyog Pawar gau1_is_vert_part[PRT_2NxN] = 0;
416*c83a76b0SSuyog Pawar gau1_is_vert_part[PRT_Nx2N] = 1;
417*c83a76b0SSuyog Pawar gau1_is_vert_part[PRT_NxN] = 1;
418*c83a76b0SSuyog Pawar gau1_is_vert_part[PRT_2NxnU] = 0;
419*c83a76b0SSuyog Pawar gau1_is_vert_part[PRT_2NxnD] = 0;
420*c83a76b0SSuyog Pawar gau1_is_vert_part[PRT_nLx2N] = 1;
421*c83a76b0SSuyog Pawar gau1_is_vert_part[PRT_nRx2N] = 1;
422*c83a76b0SSuyog Pawar
423*c83a76b0SSuyog Pawar /* Initialise the number of best results for the full pell refinement */
424*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_2Nx2N] = 2;
425*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_2NxN_T] = 0;
426*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_2NxN_B] = 0;
427*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_Nx2N_L] = 0;
428*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_Nx2N_R] = 0;
429*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_NxN_TL] = 1;
430*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_NxN_TR] = 1;
431*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_NxN_BL] = 1;
432*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_NxN_BR] = 1;
433*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_2NxnU_T] = 1;
434*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_2NxnU_B] = 0;
435*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_2NxnD_T] = 0;
436*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_2NxnD_B] = 1;
437*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_nLx2N_L] = 1;
438*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_nLx2N_R] = 0;
439*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_nRx2N_L] = 0;
440*c83a76b0SSuyog Pawar gau1_num_best_results_PQ[PART_ID_nRx2N_R] = 1;
441*c83a76b0SSuyog Pawar
442*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_2Nx2N] = 2;
443*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_2NxN_T] = 0;
444*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_2NxN_B] = 0;
445*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_Nx2N_L] = 0;
446*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_Nx2N_R] = 0;
447*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_NxN_TL] = 1;
448*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_NxN_TR] = 1;
449*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_NxN_BL] = 1;
450*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_NxN_BR] = 1;
451*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_2NxnU_T] = 1;
452*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_2NxnU_B] = 0;
453*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_2NxnD_T] = 0;
454*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_2NxnD_B] = 1;
455*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_nLx2N_L] = 1;
456*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_nLx2N_R] = 0;
457*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_nRx2N_L] = 0;
458*c83a76b0SSuyog Pawar gau1_num_best_results_HQ[PART_ID_nRx2N_R] = 1;
459*c83a76b0SSuyog Pawar
460*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_2Nx2N] = 2;
461*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_2NxN_T] = 0;
462*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_2NxN_B] = 0;
463*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_Nx2N_L] = 0;
464*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_Nx2N_R] = 0;
465*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_NxN_TL] = 1;
466*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_NxN_TR] = 1;
467*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_NxN_BL] = 1;
468*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_NxN_BR] = 1;
469*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_2NxnU_T] = 1;
470*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_2NxnU_B] = 0;
471*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_2NxnD_T] = 0;
472*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_2NxnD_B] = 1;
473*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_nLx2N_L] = 1;
474*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_nLx2N_R] = 0;
475*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_nRx2N_L] = 0;
476*c83a76b0SSuyog Pawar gau1_num_best_results_MS[PART_ID_nRx2N_R] = 1;
477*c83a76b0SSuyog Pawar
478*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_2Nx2N] = 2;
479*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_2NxN_T] = 0;
480*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_2NxN_B] = 0;
481*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_Nx2N_L] = 0;
482*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_Nx2N_R] = 0;
483*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_NxN_TL] = 0;
484*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_NxN_TR] = 0;
485*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_NxN_BL] = 0;
486*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_NxN_BR] = 0;
487*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_2NxnU_T] = 0;
488*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_2NxnU_B] = 0;
489*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_2NxnD_T] = 0;
490*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_2NxnD_B] = 0;
491*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_nLx2N_L] = 0;
492*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_nLx2N_R] = 0;
493*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_nRx2N_L] = 0;
494*c83a76b0SSuyog Pawar gau1_num_best_results_HS[PART_ID_nRx2N_R] = 0;
495*c83a76b0SSuyog Pawar
496*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_2Nx2N] = 2;
497*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_2NxN_T] = 0;
498*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_2NxN_B] = 0;
499*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_Nx2N_L] = 0;
500*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_Nx2N_R] = 0;
501*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_NxN_TL] = 0;
502*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_NxN_TR] = 0;
503*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_NxN_BL] = 0;
504*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_NxN_BR] = 0;
505*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_2NxnU_T] = 0;
506*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_2NxnU_B] = 0;
507*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_2NxnD_T] = 0;
508*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_2NxnD_B] = 0;
509*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_nLx2N_L] = 0;
510*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_nLx2N_R] = 0;
511*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_nRx2N_L] = 0;
512*c83a76b0SSuyog Pawar gau1_num_best_results_XS[PART_ID_nRx2N_R] = 0;
513*c83a76b0SSuyog Pawar
514*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_2Nx2N] = MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25;
515*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_2NxN_T] = 0;
516*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_2NxN_B] = 0;
517*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_Nx2N_L] = 0;
518*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_Nx2N_R] = 0;
519*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_NxN_TL] = 0;
520*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_NxN_TR] = 0;
521*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_NxN_BL] = 0;
522*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_NxN_BR] = 0;
523*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_2NxnU_T] = 0;
524*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_2NxnU_B] = 0;
525*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_2NxnD_T] = 0;
526*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_2NxnD_B] = 0;
527*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_nLx2N_L] = 0;
528*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_nLx2N_R] = 0;
529*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_nRx2N_L] = 0;
530*c83a76b0SSuyog Pawar gau1_num_best_results_XS25[PART_ID_nRx2N_R] = 0;
531*c83a76b0SSuyog Pawar
532*c83a76b0SSuyog Pawar /* Top right validity for each part id */
533*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_2Nx2N] = 1;
534*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_2NxN_T] = 1;
535*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_2NxN_B] = 0;
536*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_Nx2N_L] = 1;
537*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_Nx2N_R] = 1;
538*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_NxN_TL] = 1;
539*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_NxN_TR] = 1;
540*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_NxN_BL] = 1;
541*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_NxN_BR] = 0;
542*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_2NxnU_T] = 1;
543*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_2NxnU_B] = 0;
544*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_2NxnD_T] = 1;
545*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_2NxnD_B] = 0;
546*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_nLx2N_L] = 1;
547*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_nLx2N_R] = 1;
548*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_nRx2N_L] = 1;
549*c83a76b0SSuyog Pawar gau1_partid_tr_valid[PART_ID_nRx2N_R] = 1;
550*c83a76b0SSuyog Pawar
551*c83a76b0SSuyog Pawar /* Bot Left validity for each part id */
552*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_2Nx2N] = 1;
553*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_2NxN_T] = 1;
554*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_2NxN_B] = 1;
555*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_Nx2N_L] = 1;
556*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_Nx2N_R] = 0;
557*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_NxN_TL] = 1;
558*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_NxN_TR] = 0;
559*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_NxN_BL] = 1;
560*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_NxN_BR] = 0;
561*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_2NxnU_T] = 1;
562*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_2NxnU_B] = 1;
563*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_2NxnD_T] = 1;
564*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_2NxnD_B] = 1;
565*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_nLx2N_L] = 1;
566*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_nLx2N_R] = 0;
567*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_nRx2N_L] = 1;
568*c83a76b0SSuyog Pawar gau1_partid_bl_valid[PART_ID_nRx2N_R] = 0;
569*c83a76b0SSuyog Pawar
570*c83a76b0SSuyog Pawar /*Part id to part num of this partition id in the CU */
571*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_2Nx2N] = 0;
572*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_2NxN_T] = 0;
573*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_2NxN_B] = 1;
574*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_Nx2N_L] = 0;
575*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_Nx2N_R] = 1;
576*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_NxN_TL] = 0;
577*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_NxN_TR] = 1;
578*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_NxN_BL] = 2;
579*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_NxN_BR] = 3;
580*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_2NxnU_T] = 0;
581*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_2NxnU_B] = 1;
582*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_2NxnD_T] = 0;
583*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_2NxnD_B] = 1;
584*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_nLx2N_L] = 0;
585*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_nLx2N_R] = 1;
586*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_nRx2N_L] = 0;
587*c83a76b0SSuyog Pawar gau1_part_id_to_part_num[PART_ID_nRx2N_R] = 1;
588*c83a76b0SSuyog Pawar
589*c83a76b0SSuyog Pawar /*Which partition type does this partition id belong to */
590*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_2Nx2N] = PRT_2Nx2N;
591*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_2NxN_T] = PRT_2NxN;
592*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_2NxN_B] = PRT_2NxN;
593*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_Nx2N_L] = PRT_Nx2N;
594*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_Nx2N_R] = PRT_Nx2N;
595*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_NxN_TL] = PRT_NxN;
596*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_NxN_TR] = PRT_NxN;
597*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_NxN_BL] = PRT_NxN;
598*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_NxN_BR] = PRT_NxN;
599*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_2NxnU_T] = PRT_2NxnU;
600*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_2NxnU_B] = PRT_2NxnU;
601*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_2NxnD_T] = PRT_2NxnD;
602*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_2NxnD_B] = PRT_2NxnD;
603*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_nLx2N_L] = PRT_nLx2N;
604*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_nLx2N_R] = PRT_nLx2N;
605*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_nRx2N_L] = PRT_nRx2N;
606*c83a76b0SSuyog Pawar ge_part_id_to_part_type[PART_ID_nRx2N_R] = PRT_nRx2N;
607*c83a76b0SSuyog Pawar
608*c83a76b0SSuyog Pawar /*************************************************************************/
609*c83a76b0SSuyog Pawar /* Set up the bits to be taken up for the part type. This is equally */
610*c83a76b0SSuyog Pawar /* divided up between the various partitions in the part-type. */
611*c83a76b0SSuyog Pawar /* For NxN @ CU 16x16, we assume it as CU 8x8, so consider it as */
612*c83a76b0SSuyog Pawar /* partition 2Nx2N. */
613*c83a76b0SSuyog Pawar /*************************************************************************/
614*c83a76b0SSuyog Pawar /* 1 bit for 2Nx2N partition */
615*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_2Nx2N] = 2;
616*c83a76b0SSuyog Pawar
617*c83a76b0SSuyog Pawar /* 3 bits for symmetric part types, so 1.5 bits per partition */
618*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_2NxN_T] = 3;
619*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_2NxN_B] = 3;
620*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_Nx2N_L] = 3;
621*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_Nx2N_R] = 3;
622*c83a76b0SSuyog Pawar
623*c83a76b0SSuyog Pawar /* 1 bit for NxN partitions, assuming these to be 2Nx2N CUs of lower level */
624*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_NxN_TL] = 2;
625*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_NxN_TR] = 2;
626*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_NxN_BL] = 2;
627*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_NxN_BR] = 2;
628*c83a76b0SSuyog Pawar
629*c83a76b0SSuyog Pawar /* 4 bits for AMP so 2 bits per partition */
630*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_2NxnU_T] = 4;
631*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_2NxnU_B] = 4;
632*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_2NxnD_T] = 4;
633*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_2NxnD_B] = 4;
634*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_nLx2N_L] = 4;
635*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_nLx2N_R] = 4;
636*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_nRx2N_L] = 4;
637*c83a76b0SSuyog Pawar gau1_bits_for_part_id_q1[PART_ID_nRx2N_R] = 4;
638*c83a76b0SSuyog Pawar }
639*c83a76b0SSuyog Pawar
640*c83a76b0SSuyog Pawar /**
641*c83a76b0SSuyog Pawar ********************************************************************************
642*c83a76b0SSuyog Pawar * @fn hme_enc_num_alloc()
643*c83a76b0SSuyog Pawar *
644*c83a76b0SSuyog Pawar * @brief returns number of memtabs that is required by hme module
645*c83a76b0SSuyog Pawar *
646*c83a76b0SSuyog Pawar * @return Number of memtabs required
647*c83a76b0SSuyog Pawar ********************************************************************************
648*c83a76b0SSuyog Pawar */
hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel)649*c83a76b0SSuyog Pawar S32 hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel)
650*c83a76b0SSuyog Pawar {
651*c83a76b0SSuyog Pawar if(i4_num_me_frm_pllel > 1)
652*c83a76b0SSuyog Pawar {
653*c83a76b0SSuyog Pawar return ((S32)MAX_HME_ENC_TOT_MEMTABS);
654*c83a76b0SSuyog Pawar }
655*c83a76b0SSuyog Pawar else
656*c83a76b0SSuyog Pawar {
657*c83a76b0SSuyog Pawar return ((S32)MIN_HME_ENC_TOT_MEMTABS);
658*c83a76b0SSuyog Pawar }
659*c83a76b0SSuyog Pawar }
660*c83a76b0SSuyog Pawar
661*c83a76b0SSuyog Pawar /**
662*c83a76b0SSuyog Pawar ********************************************************************************
663*c83a76b0SSuyog Pawar * @fn hme_coarse_num_alloc()
664*c83a76b0SSuyog Pawar *
665*c83a76b0SSuyog Pawar * @brief returns number of memtabs that is required by hme module
666*c83a76b0SSuyog Pawar *
667*c83a76b0SSuyog Pawar * @return Number of memtabs required
668*c83a76b0SSuyog Pawar ********************************************************************************
669*c83a76b0SSuyog Pawar */
hme_coarse_num_alloc()670*c83a76b0SSuyog Pawar S32 hme_coarse_num_alloc()
671*c83a76b0SSuyog Pawar {
672*c83a76b0SSuyog Pawar return ((S32)HME_COARSE_TOT_MEMTABS);
673*c83a76b0SSuyog Pawar }
674*c83a76b0SSuyog Pawar
675*c83a76b0SSuyog Pawar /**
676*c83a76b0SSuyog Pawar ********************************************************************************
677*c83a76b0SSuyog Pawar * @fn hme_coarse_dep_mngr_num_alloc()
678*c83a76b0SSuyog Pawar *
679*c83a76b0SSuyog Pawar * @brief returns number of memtabs that is required by Dep Mngr for hme module
680*c83a76b0SSuyog Pawar *
681*c83a76b0SSuyog Pawar * @return Number of memtabs required
682*c83a76b0SSuyog Pawar ********************************************************************************
683*c83a76b0SSuyog Pawar */
hme_coarse_dep_mngr_num_alloc()684*c83a76b0SSuyog Pawar WORD32 hme_coarse_dep_mngr_num_alloc()
685*c83a76b0SSuyog Pawar {
686*c83a76b0SSuyog Pawar return ((WORD32)((MAX_NUM_HME_LAYERS - 1) * ihevce_dmgr_get_num_mem_recs()));
687*c83a76b0SSuyog Pawar }
688*c83a76b0SSuyog Pawar
hme_validate_init_prms(hme_init_prms_t * ps_prms)689*c83a76b0SSuyog Pawar S32 hme_validate_init_prms(hme_init_prms_t *ps_prms)
690*c83a76b0SSuyog Pawar {
691*c83a76b0SSuyog Pawar S32 n_layers = ps_prms->num_simulcast_layers;
692*c83a76b0SSuyog Pawar
693*c83a76b0SSuyog Pawar /* The final layer has got to be a non encode coarse layer */
694*c83a76b0SSuyog Pawar if(n_layers > (MAX_NUM_LAYERS - 1))
695*c83a76b0SSuyog Pawar return (-1);
696*c83a76b0SSuyog Pawar
697*c83a76b0SSuyog Pawar if(n_layers < 1)
698*c83a76b0SSuyog Pawar return (-1);
699*c83a76b0SSuyog Pawar
700*c83a76b0SSuyog Pawar /* Width of the coarsest encode layer got to be >= 2*min_wd where min_Wd */
701*c83a76b0SSuyog Pawar /* represents the min allowed width in any layer. Ditto with ht */
702*c83a76b0SSuyog Pawar if(ps_prms->a_wd[n_layers - 1] < 2 * (MIN_WD_COARSE))
703*c83a76b0SSuyog Pawar return (-1);
704*c83a76b0SSuyog Pawar if(ps_prms->a_ht[n_layers - 1] < 2 * (MIN_HT_COARSE))
705*c83a76b0SSuyog Pawar return (-1);
706*c83a76b0SSuyog Pawar if(ps_prms->max_num_ref > MAX_NUM_REF)
707*c83a76b0SSuyog Pawar return (-1);
708*c83a76b0SSuyog Pawar if(ps_prms->max_num_ref < 0)
709*c83a76b0SSuyog Pawar return (-1);
710*c83a76b0SSuyog Pawar
711*c83a76b0SSuyog Pawar return (0);
712*c83a76b0SSuyog Pawar }
hme_set_layer_res_attrs(layer_ctxt_t * ps_layer,S32 wd,S32 ht,S32 disp_wd,S32 disp_ht,U08 u1_enc)713*c83a76b0SSuyog Pawar void hme_set_layer_res_attrs(
714*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer, S32 wd, S32 ht, S32 disp_wd, S32 disp_ht, U08 u1_enc)
715*c83a76b0SSuyog Pawar {
716*c83a76b0SSuyog Pawar ps_layer->i4_wd = wd;
717*c83a76b0SSuyog Pawar ps_layer->i4_ht = ht;
718*c83a76b0SSuyog Pawar ps_layer->i4_disp_wd = disp_wd;
719*c83a76b0SSuyog Pawar ps_layer->i4_disp_ht = disp_ht;
720*c83a76b0SSuyog Pawar if(0 == u1_enc)
721*c83a76b0SSuyog Pawar {
722*c83a76b0SSuyog Pawar ps_layer->i4_inp_stride = wd + 32 + 4;
723*c83a76b0SSuyog Pawar ps_layer->i4_inp_offset = (ps_layer->i4_inp_stride * 16) + 16;
724*c83a76b0SSuyog Pawar ps_layer->i4_pad_x_inp = 16;
725*c83a76b0SSuyog Pawar ps_layer->i4_pad_y_inp = 16;
726*c83a76b0SSuyog Pawar ps_layer->pu1_inp = ps_layer->pu1_inp_base + ps_layer->i4_inp_offset;
727*c83a76b0SSuyog Pawar }
728*c83a76b0SSuyog Pawar }
729*c83a76b0SSuyog Pawar
730*c83a76b0SSuyog Pawar /**
731*c83a76b0SSuyog Pawar ********************************************************************************
732*c83a76b0SSuyog Pawar * @fn hme_coarse_get_layer1_mv_bank_ref_idx_size()
733*c83a76b0SSuyog Pawar *
734*c83a76b0SSuyog Pawar * @brief returns the MV bank and ref idx size of Layer 1 (penultimate)
735*c83a76b0SSuyog Pawar *
736*c83a76b0SSuyog Pawar * @return none
737*c83a76b0SSuyog Pawar ********************************************************************************
738*c83a76b0SSuyog Pawar */
hme_coarse_get_layer1_mv_bank_ref_idx_size(S32 n_tot_layers,S32 * a_wd,S32 * a_ht,S32 max_num_ref,S32 * pi4_mv_bank_size,S32 * pi4_ref_idx_size)739*c83a76b0SSuyog Pawar void hme_coarse_get_layer1_mv_bank_ref_idx_size(
740*c83a76b0SSuyog Pawar S32 n_tot_layers,
741*c83a76b0SSuyog Pawar S32 *a_wd,
742*c83a76b0SSuyog Pawar S32 *a_ht,
743*c83a76b0SSuyog Pawar S32 max_num_ref,
744*c83a76b0SSuyog Pawar S32 *pi4_mv_bank_size,
745*c83a76b0SSuyog Pawar S32 *pi4_ref_idx_size)
746*c83a76b0SSuyog Pawar {
747*c83a76b0SSuyog Pawar S32 num_blks, num_mvs_per_blk, num_ref;
748*c83a76b0SSuyog Pawar S32 num_cols, num_rows, num_mvs_per_row;
749*c83a76b0SSuyog Pawar S32 is_explicit_store = 1;
750*c83a76b0SSuyog Pawar S32 wd, ht, num_layers_explicit_search;
751*c83a76b0SSuyog Pawar S32 num_results, use_4x4;
752*c83a76b0SSuyog Pawar wd = a_wd[1];
753*c83a76b0SSuyog Pawar ht = a_ht[1];
754*c83a76b0SSuyog Pawar
755*c83a76b0SSuyog Pawar /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */
756*c83a76b0SSuyog Pawar /* frames in all but final layer In final layer, it could be 1/2 */
757*c83a76b0SSuyog Pawar //ps_hme_init_prms->num_layers_explicit_search = 3;
758*c83a76b0SSuyog Pawar num_layers_explicit_search = 3;
759*c83a76b0SSuyog Pawar
760*c83a76b0SSuyog Pawar if(num_layers_explicit_search <= 0)
761*c83a76b0SSuyog Pawar num_layers_explicit_search = n_tot_layers - 1;
762*c83a76b0SSuyog Pawar
763*c83a76b0SSuyog Pawar num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
764*c83a76b0SSuyog Pawar
765*c83a76b0SSuyog Pawar /* Possibly implicit search for lower (finer) layers */
766*c83a76b0SSuyog Pawar if(n_tot_layers - 1 > num_layers_explicit_search)
767*c83a76b0SSuyog Pawar is_explicit_store = 0;
768*c83a76b0SSuyog Pawar
769*c83a76b0SSuyog Pawar /* coarsest layer alwasy uses 4x4 blks to store results */
770*c83a76b0SSuyog Pawar if(1 == (n_tot_layers - 1))
771*c83a76b0SSuyog Pawar {
772*c83a76b0SSuyog Pawar /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */
773*c83a76b0SSuyog Pawar //ps_hme_init_prms->max_num_results_coarse = 4;
774*c83a76b0SSuyog Pawar //vijay : with new algo in coarseset layer this has to be revisited
775*c83a76b0SSuyog Pawar num_results = 4;
776*c83a76b0SSuyog Pawar }
777*c83a76b0SSuyog Pawar else
778*c83a76b0SSuyog Pawar {
779*c83a76b0SSuyog Pawar /* Every refinement layer stores a max of 2 results per partition */
780*c83a76b0SSuyog Pawar //ps_hme_init_prms->max_num_results = 2;
781*c83a76b0SSuyog Pawar num_results = 2;
782*c83a76b0SSuyog Pawar }
783*c83a76b0SSuyog Pawar use_4x4 = hme_get_mv_blk_size(1, 1, n_tot_layers, 0);
784*c83a76b0SSuyog Pawar
785*c83a76b0SSuyog Pawar num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
786*c83a76b0SSuyog Pawar num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
787*c83a76b0SSuyog Pawar
788*c83a76b0SSuyog Pawar if(is_explicit_store)
789*c83a76b0SSuyog Pawar num_ref = max_num_ref;
790*c83a76b0SSuyog Pawar else
791*c83a76b0SSuyog Pawar num_ref = 2;
792*c83a76b0SSuyog Pawar
793*c83a76b0SSuyog Pawar num_blks = num_cols * num_rows;
794*c83a76b0SSuyog Pawar num_mvs_per_blk = num_ref * num_results;
795*c83a76b0SSuyog Pawar num_mvs_per_row = num_mvs_per_blk * num_cols;
796*c83a76b0SSuyog Pawar
797*c83a76b0SSuyog Pawar /* stroe the sizes */
798*c83a76b0SSuyog Pawar *pi4_mv_bank_size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
799*c83a76b0SSuyog Pawar *pi4_ref_idx_size = num_blks * num_mvs_per_blk * sizeof(S08);
800*c83a76b0SSuyog Pawar
801*c83a76b0SSuyog Pawar return;
802*c83a76b0SSuyog Pawar }
803*c83a76b0SSuyog Pawar /**
804*c83a76b0SSuyog Pawar ********************************************************************************
805*c83a76b0SSuyog Pawar * @fn hme_alloc_init_layer_mv_bank()
806*c83a76b0SSuyog Pawar *
807*c83a76b0SSuyog Pawar * @brief memory alloc and init function for MV bank
808*c83a76b0SSuyog Pawar *
809*c83a76b0SSuyog Pawar * @return Number of memtabs required
810*c83a76b0SSuyog Pawar ********************************************************************************
811*c83a76b0SSuyog Pawar */
hme_alloc_init_layer_mv_bank(hme_memtab_t * ps_memtab,S32 max_num_results,S32 max_num_ref,S32 use_4x4,S32 mem_avail,S32 u1_enc,S32 wd,S32 ht,S32 is_explicit_store,hme_mv_t ** pps_mv_base,S08 ** pi1_ref_idx_base,S32 * pi4_num_mvs_per_row)812*c83a76b0SSuyog Pawar S32 hme_alloc_init_layer_mv_bank(
813*c83a76b0SSuyog Pawar hme_memtab_t *ps_memtab,
814*c83a76b0SSuyog Pawar S32 max_num_results,
815*c83a76b0SSuyog Pawar S32 max_num_ref,
816*c83a76b0SSuyog Pawar S32 use_4x4,
817*c83a76b0SSuyog Pawar S32 mem_avail,
818*c83a76b0SSuyog Pawar S32 u1_enc,
819*c83a76b0SSuyog Pawar S32 wd,
820*c83a76b0SSuyog Pawar S32 ht,
821*c83a76b0SSuyog Pawar S32 is_explicit_store,
822*c83a76b0SSuyog Pawar hme_mv_t **pps_mv_base,
823*c83a76b0SSuyog Pawar S08 **pi1_ref_idx_base,
824*c83a76b0SSuyog Pawar S32 *pi4_num_mvs_per_row)
825*c83a76b0SSuyog Pawar {
826*c83a76b0SSuyog Pawar S32 count = 0;
827*c83a76b0SSuyog Pawar S32 size;
828*c83a76b0SSuyog Pawar S32 num_blks, num_mvs_per_blk;
829*c83a76b0SSuyog Pawar S32 num_ref;
830*c83a76b0SSuyog Pawar S32 num_cols, num_rows, num_mvs_per_row;
831*c83a76b0SSuyog Pawar
832*c83a76b0SSuyog Pawar if(is_explicit_store)
833*c83a76b0SSuyog Pawar num_ref = max_num_ref;
834*c83a76b0SSuyog Pawar else
835*c83a76b0SSuyog Pawar num_ref = 2;
836*c83a76b0SSuyog Pawar
837*c83a76b0SSuyog Pawar /* MV Bank allocation takes into consideration following */
838*c83a76b0SSuyog Pawar /* number of results per reference x max num refrences is the amount */
839*c83a76b0SSuyog Pawar /* bufffered up per blk. Numbero f blks in pic deps on the blk size, */
840*c83a76b0SSuyog Pawar /* which could be either 4x4 or 8x8. */
841*c83a76b0SSuyog Pawar num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
842*c83a76b0SSuyog Pawar num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
843*c83a76b0SSuyog Pawar
844*c83a76b0SSuyog Pawar if(u1_enc)
845*c83a76b0SSuyog Pawar {
846*c83a76b0SSuyog Pawar /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
847*c83a76b0SSuyog Pawar WORD32 num_ctb_cols = ((wd + 63) >> 6);
848*c83a76b0SSuyog Pawar WORD32 num_ctb_rows = ((ht + 63) >> 6);
849*c83a76b0SSuyog Pawar
850*c83a76b0SSuyog Pawar num_cols = (num_ctb_cols << 3) + 2;
851*c83a76b0SSuyog Pawar num_rows = (num_ctb_rows << 3) + 2;
852*c83a76b0SSuyog Pawar }
853*c83a76b0SSuyog Pawar num_blks = num_cols * num_rows;
854*c83a76b0SSuyog Pawar num_mvs_per_blk = num_ref * max_num_results;
855*c83a76b0SSuyog Pawar num_mvs_per_row = num_mvs_per_blk * num_cols;
856*c83a76b0SSuyog Pawar
857*c83a76b0SSuyog Pawar size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
858*c83a76b0SSuyog Pawar if(mem_avail)
859*c83a76b0SSuyog Pawar {
860*c83a76b0SSuyog Pawar /* store this for run time verifications */
861*c83a76b0SSuyog Pawar *pi4_num_mvs_per_row = num_mvs_per_row;
862*c83a76b0SSuyog Pawar ASSERT(ps_memtab[count].size == size);
863*c83a76b0SSuyog Pawar *pps_mv_base = (hme_mv_t *)ps_memtab[count].pu1_mem;
864*c83a76b0SSuyog Pawar }
865*c83a76b0SSuyog Pawar else
866*c83a76b0SSuyog Pawar {
867*c83a76b0SSuyog Pawar ps_memtab[count].size = size;
868*c83a76b0SSuyog Pawar ps_memtab[count].align = 4;
869*c83a76b0SSuyog Pawar ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
870*c83a76b0SSuyog Pawar }
871*c83a76b0SSuyog Pawar
872*c83a76b0SSuyog Pawar count++;
873*c83a76b0SSuyog Pawar /* Ref idx takes the same route as mvbase */
874*c83a76b0SSuyog Pawar
875*c83a76b0SSuyog Pawar size = num_blks * num_mvs_per_blk * sizeof(S08);
876*c83a76b0SSuyog Pawar if(mem_avail)
877*c83a76b0SSuyog Pawar {
878*c83a76b0SSuyog Pawar ASSERT(ps_memtab[count].size == size);
879*c83a76b0SSuyog Pawar *pi1_ref_idx_base = (S08 *)ps_memtab[count].pu1_mem;
880*c83a76b0SSuyog Pawar }
881*c83a76b0SSuyog Pawar else
882*c83a76b0SSuyog Pawar {
883*c83a76b0SSuyog Pawar ps_memtab[count].size = size;
884*c83a76b0SSuyog Pawar ps_memtab[count].align = 4;
885*c83a76b0SSuyog Pawar ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
886*c83a76b0SSuyog Pawar }
887*c83a76b0SSuyog Pawar count++;
888*c83a76b0SSuyog Pawar
889*c83a76b0SSuyog Pawar return (count);
890*c83a76b0SSuyog Pawar }
891*c83a76b0SSuyog Pawar /**
892*c83a76b0SSuyog Pawar ********************************************************************************
893*c83a76b0SSuyog Pawar * @fn hme_alloc_init_layer()
894*c83a76b0SSuyog Pawar *
895*c83a76b0SSuyog Pawar * @brief memory alloc and init function
896*c83a76b0SSuyog Pawar *
897*c83a76b0SSuyog Pawar * @return Number of memtabs required
898*c83a76b0SSuyog Pawar ********************************************************************************
899*c83a76b0SSuyog Pawar */
hme_alloc_init_layer(hme_memtab_t * ps_memtab,S32 max_num_results,S32 max_num_ref,S32 use_4x4,S32 mem_avail,S32 u1_enc,S32 wd,S32 ht,S32 disp_wd,S32 disp_ht,S32 segment_layer,S32 is_explicit_store,layer_ctxt_t ** pps_layer)900*c83a76b0SSuyog Pawar S32 hme_alloc_init_layer(
901*c83a76b0SSuyog Pawar hme_memtab_t *ps_memtab,
902*c83a76b0SSuyog Pawar S32 max_num_results,
903*c83a76b0SSuyog Pawar S32 max_num_ref,
904*c83a76b0SSuyog Pawar S32 use_4x4,
905*c83a76b0SSuyog Pawar S32 mem_avail,
906*c83a76b0SSuyog Pawar S32 u1_enc,
907*c83a76b0SSuyog Pawar S32 wd,
908*c83a76b0SSuyog Pawar S32 ht,
909*c83a76b0SSuyog Pawar S32 disp_wd,
910*c83a76b0SSuyog Pawar S32 disp_ht,
911*c83a76b0SSuyog Pawar S32 segment_layer,
912*c83a76b0SSuyog Pawar S32 is_explicit_store,
913*c83a76b0SSuyog Pawar layer_ctxt_t **pps_layer)
914*c83a76b0SSuyog Pawar {
915*c83a76b0SSuyog Pawar S32 count = 0;
916*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer = NULL;
917*c83a76b0SSuyog Pawar S32 size;
918*c83a76b0SSuyog Pawar S32 num_ref;
919*c83a76b0SSuyog Pawar
920*c83a76b0SSuyog Pawar ARG_NOT_USED(segment_layer);
921*c83a76b0SSuyog Pawar
922*c83a76b0SSuyog Pawar if(is_explicit_store)
923*c83a76b0SSuyog Pawar num_ref = max_num_ref;
924*c83a76b0SSuyog Pawar else
925*c83a76b0SSuyog Pawar num_ref = 2;
926*c83a76b0SSuyog Pawar
927*c83a76b0SSuyog Pawar /* We do not store 4x4 results for encoding layers */
928*c83a76b0SSuyog Pawar if(u1_enc)
929*c83a76b0SSuyog Pawar use_4x4 = 0;
930*c83a76b0SSuyog Pawar
931*c83a76b0SSuyog Pawar size = sizeof(layer_ctxt_t);
932*c83a76b0SSuyog Pawar if(mem_avail)
933*c83a76b0SSuyog Pawar {
934*c83a76b0SSuyog Pawar ASSERT(ps_memtab[count].size == size);
935*c83a76b0SSuyog Pawar ps_layer = (layer_ctxt_t *)ps_memtab[count].pu1_mem;
936*c83a76b0SSuyog Pawar *pps_layer = ps_layer;
937*c83a76b0SSuyog Pawar }
938*c83a76b0SSuyog Pawar else
939*c83a76b0SSuyog Pawar {
940*c83a76b0SSuyog Pawar ps_memtab[count].size = size;
941*c83a76b0SSuyog Pawar ps_memtab[count].align = 8;
942*c83a76b0SSuyog Pawar ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
943*c83a76b0SSuyog Pawar }
944*c83a76b0SSuyog Pawar
945*c83a76b0SSuyog Pawar count++;
946*c83a76b0SSuyog Pawar
947*c83a76b0SSuyog Pawar /* Input luma buffer allocated only for non encode case */
948*c83a76b0SSuyog Pawar if(0 == u1_enc)
949*c83a76b0SSuyog Pawar {
950*c83a76b0SSuyog Pawar /* Allocate input with padding of 16 pixels */
951*c83a76b0SSuyog Pawar size = (wd + 32 + 4) * (ht + 32 + 4);
952*c83a76b0SSuyog Pawar if(mem_avail)
953*c83a76b0SSuyog Pawar {
954*c83a76b0SSuyog Pawar ASSERT(ps_memtab[count].size == size);
955*c83a76b0SSuyog Pawar ps_layer->pu1_inp_base = ps_memtab[count].pu1_mem;
956*c83a76b0SSuyog Pawar }
957*c83a76b0SSuyog Pawar else
958*c83a76b0SSuyog Pawar {
959*c83a76b0SSuyog Pawar ps_memtab[count].size = size;
960*c83a76b0SSuyog Pawar ps_memtab[count].align = 16;
961*c83a76b0SSuyog Pawar ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
962*c83a76b0SSuyog Pawar }
963*c83a76b0SSuyog Pawar count++;
964*c83a76b0SSuyog Pawar }
965*c83a76b0SSuyog Pawar
966*c83a76b0SSuyog Pawar /* Allocate memory or just the layer mvbank strcture. */
967*c83a76b0SSuyog Pawar /* TODO : see if this can be removed by moving it to layer_ctxt */
968*c83a76b0SSuyog Pawar size = sizeof(layer_mv_t);
969*c83a76b0SSuyog Pawar
970*c83a76b0SSuyog Pawar if(mem_avail)
971*c83a76b0SSuyog Pawar {
972*c83a76b0SSuyog Pawar ASSERT(ps_memtab[count].size == size);
973*c83a76b0SSuyog Pawar ps_layer->ps_layer_mvbank = (layer_mv_t *)ps_memtab[count].pu1_mem;
974*c83a76b0SSuyog Pawar }
975*c83a76b0SSuyog Pawar else
976*c83a76b0SSuyog Pawar {
977*c83a76b0SSuyog Pawar ps_memtab[count].size = size;
978*c83a76b0SSuyog Pawar ps_memtab[count].align = 8;
979*c83a76b0SSuyog Pawar ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
980*c83a76b0SSuyog Pawar }
981*c83a76b0SSuyog Pawar
982*c83a76b0SSuyog Pawar count++;
983*c83a76b0SSuyog Pawar
984*c83a76b0SSuyog Pawar if(mem_avail)
985*c83a76b0SSuyog Pawar {
986*c83a76b0SSuyog Pawar hme_set_layer_res_attrs(ps_layer, wd, ht, disp_wd, disp_ht, u1_enc);
987*c83a76b0SSuyog Pawar }
988*c83a76b0SSuyog Pawar
989*c83a76b0SSuyog Pawar return (count);
990*c83a76b0SSuyog Pawar }
991*c83a76b0SSuyog Pawar
hme_alloc_init_search_nodes(search_results_t * ps_search_results,hme_memtab_t * ps_memtabs,S32 mem_avail,S32 max_num_ref,S32 max_num_results)992*c83a76b0SSuyog Pawar S32 hme_alloc_init_search_nodes(
993*c83a76b0SSuyog Pawar search_results_t *ps_search_results,
994*c83a76b0SSuyog Pawar hme_memtab_t *ps_memtabs,
995*c83a76b0SSuyog Pawar S32 mem_avail,
996*c83a76b0SSuyog Pawar S32 max_num_ref,
997*c83a76b0SSuyog Pawar S32 max_num_results)
998*c83a76b0SSuyog Pawar {
999*c83a76b0SSuyog Pawar S32 size = max_num_results * sizeof(search_node_t) * max_num_ref * TOT_NUM_PARTS;
1000*c83a76b0SSuyog Pawar S32 j, k;
1001*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
1002*c83a76b0SSuyog Pawar
1003*c83a76b0SSuyog Pawar if(mem_avail == 0)
1004*c83a76b0SSuyog Pawar {
1005*c83a76b0SSuyog Pawar ps_memtabs->size = size;
1006*c83a76b0SSuyog Pawar ps_memtabs->align = 4;
1007*c83a76b0SSuyog Pawar ps_memtabs->e_mem_attr = HME_SCRATCH_OVLY_MEM;
1008*c83a76b0SSuyog Pawar return (1);
1009*c83a76b0SSuyog Pawar }
1010*c83a76b0SSuyog Pawar
1011*c83a76b0SSuyog Pawar ps_search_node = (search_node_t *)ps_memtabs->pu1_mem;
1012*c83a76b0SSuyog Pawar ASSERT(ps_memtabs->size == size);
1013*c83a76b0SSuyog Pawar /****************************************************************************/
1014*c83a76b0SSuyog Pawar /* For each CU, we search and store N best results, per partition, per ref */
1015*c83a76b0SSuyog Pawar /* So, number of memtabs is num_refs * num_parts */
1016*c83a76b0SSuyog Pawar /****************************************************************************/
1017*c83a76b0SSuyog Pawar for(j = 0; j < max_num_ref; j++)
1018*c83a76b0SSuyog Pawar {
1019*c83a76b0SSuyog Pawar for(k = 0; k < TOT_NUM_PARTS; k++)
1020*c83a76b0SSuyog Pawar {
1021*c83a76b0SSuyog Pawar ps_search_results->aps_part_results[j][k] = ps_search_node;
1022*c83a76b0SSuyog Pawar ps_search_node += max_num_results;
1023*c83a76b0SSuyog Pawar }
1024*c83a76b0SSuyog Pawar }
1025*c83a76b0SSuyog Pawar return (1);
1026*c83a76b0SSuyog Pawar }
1027*c83a76b0SSuyog Pawar
hme_derive_num_layers(S32 n_enc_layers,S32 * p_wd,S32 * p_ht,S32 * p_disp_wd,S32 * p_disp_ht)1028*c83a76b0SSuyog Pawar S32 hme_derive_num_layers(S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 *p_disp_wd, S32 *p_disp_ht)
1029*c83a76b0SSuyog Pawar {
1030*c83a76b0SSuyog Pawar S32 i;
1031*c83a76b0SSuyog Pawar /* We keep downscaling by 2 till we hit one of the conditions: */
1032*c83a76b0SSuyog Pawar /* 1. MAX_NUM_LAYERS reached. */
1033*c83a76b0SSuyog Pawar /* 2. Width or ht goes below min width and ht allowed at coarsest layer */
1034*c83a76b0SSuyog Pawar ASSERT(n_enc_layers < MAX_NUM_LAYERS);
1035*c83a76b0SSuyog Pawar ASSERT(n_enc_layers > 0);
1036*c83a76b0SSuyog Pawar ASSERT(p_wd[0] <= HME_MAX_WIDTH);
1037*c83a76b0SSuyog Pawar ASSERT(p_ht[0] <= HME_MAX_HEIGHT);
1038*c83a76b0SSuyog Pawar
1039*c83a76b0SSuyog Pawar p_disp_wd[0] = p_wd[0];
1040*c83a76b0SSuyog Pawar p_disp_ht[0] = p_ht[0];
1041*c83a76b0SSuyog Pawar /*************************************************************************/
1042*c83a76b0SSuyog Pawar /* Verify that for simulcast, lower layer to higher layer ratio is bet */
1043*c83a76b0SSuyog Pawar /* 2 (dyadic) and 1.33. Typically it should be 1.5. */
1044*c83a76b0SSuyog Pawar /* TODO : for interlace, we may choose to have additional downscaling for*/
1045*c83a76b0SSuyog Pawar /* width alone in coarsest layer to next layer. */
1046*c83a76b0SSuyog Pawar /*************************************************************************/
1047*c83a76b0SSuyog Pawar for(i = 1; i < n_enc_layers; i++)
1048*c83a76b0SSuyog Pawar {
1049*c83a76b0SSuyog Pawar S32 wd1, wd2, ht1, ht2;
1050*c83a76b0SSuyog Pawar wd1 = FLOOR16(p_wd[i - 1] >> 1);
1051*c83a76b0SSuyog Pawar wd2 = CEIL16((p_wd[i - 1] * 3) >> 2);
1052*c83a76b0SSuyog Pawar ASSERT(p_wd[i] >= wd1);
1053*c83a76b0SSuyog Pawar ASSERT(p_wd[i] <= wd2);
1054*c83a76b0SSuyog Pawar ht1 = FLOOR16(p_ht[i - 1] >> 1);
1055*c83a76b0SSuyog Pawar ht2 = CEIL16((p_ht[i - 1] * 3) >> 2);
1056*c83a76b0SSuyog Pawar ASSERT(p_ht[i] >= ht1);
1057*c83a76b0SSuyog Pawar ASSERT(p_ht[i] <= ht2);
1058*c83a76b0SSuyog Pawar }
1059*c83a76b0SSuyog Pawar ASSERT(p_wd[n_enc_layers - 1] >= 2 * MIN_WD_COARSE);
1060*c83a76b0SSuyog Pawar ASSERT(p_ht[n_enc_layers - 1] >= 2 * MIN_HT_COARSE);
1061*c83a76b0SSuyog Pawar
1062*c83a76b0SSuyog Pawar for(i = n_enc_layers; i < MAX_NUM_LAYERS; i++)
1063*c83a76b0SSuyog Pawar {
1064*c83a76b0SSuyog Pawar if((p_wd[i - 1] < 2 * MIN_WD_COARSE) || (p_ht[i - 1] < 2 * MIN_HT_COARSE))
1065*c83a76b0SSuyog Pawar {
1066*c83a76b0SSuyog Pawar return (i);
1067*c83a76b0SSuyog Pawar }
1068*c83a76b0SSuyog Pawar /* Use CEIL16 to facilitate 16x16 searches in future, or to do */
1069*c83a76b0SSuyog Pawar /* segmentation study in future */
1070*c83a76b0SSuyog Pawar p_wd[i] = CEIL16(p_wd[i - 1] >> 1);
1071*c83a76b0SSuyog Pawar p_ht[i] = CEIL16(p_ht[i - 1] >> 1);
1072*c83a76b0SSuyog Pawar
1073*c83a76b0SSuyog Pawar p_disp_wd[i] = p_disp_wd[i - 1] >> 1;
1074*c83a76b0SSuyog Pawar p_disp_ht[i] = p_disp_ht[i - 1] >> 1;
1075*c83a76b0SSuyog Pawar }
1076*c83a76b0SSuyog Pawar return (i);
1077*c83a76b0SSuyog Pawar }
1078*c83a76b0SSuyog Pawar
1079*c83a76b0SSuyog Pawar /**
1080*c83a76b0SSuyog Pawar ********************************************************************************
1081*c83a76b0SSuyog Pawar * @fn hme_get_mv_blk_size()
1082*c83a76b0SSuyog Pawar *
1083*c83a76b0SSuyog Pawar * @brief returns whether blk uses 4x4 size or something else.
1084*c83a76b0SSuyog Pawar *
1085*c83a76b0SSuyog Pawar * @param[in] enable_4x4 : input param from application to enable 4x4
1086*c83a76b0SSuyog Pawar *
1087*c83a76b0SSuyog Pawar * @param[in] layer_id : id of current layer (0 finest)
1088*c83a76b0SSuyog Pawar *
1089*c83a76b0SSuyog Pawar * @param[in] num_layeers : total num layers
1090*c83a76b0SSuyog Pawar *
1091*c83a76b0SSuyog Pawar * @param[in] is_enc : Whether encoding enabled for layer
1092*c83a76b0SSuyog Pawar *
1093*c83a76b0SSuyog Pawar * @return 1 for 4x4 blks, 0 for 8x8
1094*c83a76b0SSuyog Pawar ********************************************************************************
1095*c83a76b0SSuyog Pawar */
hme_get_mv_blk_size(S32 enable_4x4,S32 layer_id,S32 num_layers,S32 is_enc)1096*c83a76b0SSuyog Pawar S32 hme_get_mv_blk_size(S32 enable_4x4, S32 layer_id, S32 num_layers, S32 is_enc)
1097*c83a76b0SSuyog Pawar {
1098*c83a76b0SSuyog Pawar S32 use_4x4 = enable_4x4;
1099*c83a76b0SSuyog Pawar
1100*c83a76b0SSuyog Pawar if((layer_id <= 1) && (num_layers >= 4))
1101*c83a76b0SSuyog Pawar use_4x4 = USE_4x4_IN_L1;
1102*c83a76b0SSuyog Pawar if(layer_id == num_layers - 1)
1103*c83a76b0SSuyog Pawar use_4x4 = 1;
1104*c83a76b0SSuyog Pawar if(is_enc)
1105*c83a76b0SSuyog Pawar use_4x4 = 0;
1106*c83a76b0SSuyog Pawar
1107*c83a76b0SSuyog Pawar return (use_4x4);
1108*c83a76b0SSuyog Pawar }
1109*c83a76b0SSuyog Pawar
1110*c83a76b0SSuyog Pawar /**
1111*c83a76b0SSuyog Pawar ********************************************************************************
1112*c83a76b0SSuyog Pawar * @fn hme_enc_alloc_init_mem()
1113*c83a76b0SSuyog Pawar *
1114*c83a76b0SSuyog Pawar * @brief Requests/ assign memory based on mem avail
1115*c83a76b0SSuyog Pawar *
1116*c83a76b0SSuyog Pawar * @param[in] ps_memtabs : memtab array
1117*c83a76b0SSuyog Pawar *
1118*c83a76b0SSuyog Pawar * @param[in] ps_prms : init prms
1119*c83a76b0SSuyog Pawar *
1120*c83a76b0SSuyog Pawar * @param[in] pv_ctxt : ME ctxt
1121*c83a76b0SSuyog Pawar *
1122*c83a76b0SSuyog Pawar * @param[in] mem_avail : request/assign flag
1123*c83a76b0SSuyog Pawar *
1124*c83a76b0SSuyog Pawar * @return 1 for 4x4 blks, 0 for 8x8
1125*c83a76b0SSuyog Pawar ********************************************************************************
1126*c83a76b0SSuyog Pawar */
hme_enc_alloc_init_mem(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,void * pv_ctxt,S32 mem_avail,S32 i4_num_me_frm_pllel)1127*c83a76b0SSuyog Pawar S32 hme_enc_alloc_init_mem(
1128*c83a76b0SSuyog Pawar hme_memtab_t *ps_memtabs,
1129*c83a76b0SSuyog Pawar hme_init_prms_t *ps_prms,
1130*c83a76b0SSuyog Pawar void *pv_ctxt,
1131*c83a76b0SSuyog Pawar S32 mem_avail,
1132*c83a76b0SSuyog Pawar S32 i4_num_me_frm_pllel)
1133*c83a76b0SSuyog Pawar {
1134*c83a76b0SSuyog Pawar me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_ctxt;
1135*c83a76b0SSuyog Pawar me_ctxt_t *ps_ctxt;
1136*c83a76b0SSuyog Pawar S32 count = 0, size, i, j, use_4x4;
1137*c83a76b0SSuyog Pawar S32 n_tot_layers, n_enc_layers;
1138*c83a76b0SSuyog Pawar S32 num_layers_explicit_search;
1139*c83a76b0SSuyog Pawar S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
1140*c83a76b0SSuyog Pawar S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
1141*c83a76b0SSuyog Pawar S32 num_results;
1142*c83a76b0SSuyog Pawar S32 num_thrds;
1143*c83a76b0SSuyog Pawar S32 ctb_wd = 1 << ps_prms->log_ctb_size;
1144*c83a76b0SSuyog Pawar
1145*c83a76b0SSuyog Pawar /* MV bank changes */
1146*c83a76b0SSuyog Pawar hme_mv_t *aps_mv_bank[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
1147*c83a76b0SSuyog Pawar S32 i4_num_mvs_per_row = 0;
1148*c83a76b0SSuyog Pawar S08 *api1_ref_idx[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
1149*c83a76b0SSuyog Pawar
1150*c83a76b0SSuyog Pawar n_enc_layers = ps_prms->num_simulcast_layers;
1151*c83a76b0SSuyog Pawar
1152*c83a76b0SSuyog Pawar /* Memtab 0: handle */
1153*c83a76b0SSuyog Pawar size = sizeof(me_master_ctxt_t);
1154*c83a76b0SSuyog Pawar if(mem_avail)
1155*c83a76b0SSuyog Pawar {
1156*c83a76b0SSuyog Pawar /* store the number of processing threads */
1157*c83a76b0SSuyog Pawar ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
1158*c83a76b0SSuyog Pawar }
1159*c83a76b0SSuyog Pawar else
1160*c83a76b0SSuyog Pawar {
1161*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
1162*c83a76b0SSuyog Pawar ps_memtabs[count].align = 8;
1163*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1164*c83a76b0SSuyog Pawar }
1165*c83a76b0SSuyog Pawar
1166*c83a76b0SSuyog Pawar count++;
1167*c83a76b0SSuyog Pawar
1168*c83a76b0SSuyog Pawar /* Memtab 1: ME threads ctxt */
1169*c83a76b0SSuyog Pawar size = ps_prms->i4_num_proc_thrds * sizeof(me_ctxt_t);
1170*c83a76b0SSuyog Pawar if(mem_avail)
1171*c83a76b0SSuyog Pawar {
1172*c83a76b0SSuyog Pawar me_ctxt_t *ps_me_tmp_ctxt = (me_ctxt_t *)ps_memtabs[count].pu1_mem;
1173*c83a76b0SSuyog Pawar
1174*c83a76b0SSuyog Pawar /* store the indivisual thread ctxt pointers */
1175*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1176*c83a76b0SSuyog Pawar {
1177*c83a76b0SSuyog Pawar ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
1178*c83a76b0SSuyog Pawar }
1179*c83a76b0SSuyog Pawar }
1180*c83a76b0SSuyog Pawar else
1181*c83a76b0SSuyog Pawar {
1182*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
1183*c83a76b0SSuyog Pawar ps_memtabs[count].align = 8;
1184*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1185*c83a76b0SSuyog Pawar }
1186*c83a76b0SSuyog Pawar
1187*c83a76b0SSuyog Pawar count++;
1188*c83a76b0SSuyog Pawar
1189*c83a76b0SSuyog Pawar /* Memtab 2: ME frame ctxts */
1190*c83a76b0SSuyog Pawar size = sizeof(me_frm_ctxt_t) * MAX_NUM_ME_PARALLEL * ps_prms->i4_num_proc_thrds;
1191*c83a76b0SSuyog Pawar if(mem_avail)
1192*c83a76b0SSuyog Pawar {
1193*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_me_frm_tmp_ctxt = (me_frm_ctxt_t *)ps_memtabs[count].pu1_mem;
1194*c83a76b0SSuyog Pawar
1195*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1196*c83a76b0SSuyog Pawar {
1197*c83a76b0SSuyog Pawar /* store the indivisual thread ctxt pointers */
1198*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1199*c83a76b0SSuyog Pawar {
1200*c83a76b0SSuyog Pawar ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[i] = ps_me_frm_tmp_ctxt;
1201*c83a76b0SSuyog Pawar
1202*c83a76b0SSuyog Pawar ps_me_frm_tmp_ctxt++;
1203*c83a76b0SSuyog Pawar }
1204*c83a76b0SSuyog Pawar }
1205*c83a76b0SSuyog Pawar }
1206*c83a76b0SSuyog Pawar else
1207*c83a76b0SSuyog Pawar {
1208*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
1209*c83a76b0SSuyog Pawar ps_memtabs[count].align = 8;
1210*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1211*c83a76b0SSuyog Pawar }
1212*c83a76b0SSuyog Pawar
1213*c83a76b0SSuyog Pawar count++;
1214*c83a76b0SSuyog Pawar
1215*c83a76b0SSuyog Pawar memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
1216*c83a76b0SSuyog Pawar memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
1217*c83a76b0SSuyog Pawar /*************************************************************************/
1218*c83a76b0SSuyog Pawar /* Derive the number of HME layers, including both encoded and non encode*/
1219*c83a76b0SSuyog Pawar /* This function also derives the width and ht of each layer. */
1220*c83a76b0SSuyog Pawar /*************************************************************************/
1221*c83a76b0SSuyog Pawar n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
1222*c83a76b0SSuyog Pawar num_layers_explicit_search = ps_prms->num_layers_explicit_search;
1223*c83a76b0SSuyog Pawar if(num_layers_explicit_search <= 0)
1224*c83a76b0SSuyog Pawar num_layers_explicit_search = n_tot_layers - 1;
1225*c83a76b0SSuyog Pawar
1226*c83a76b0SSuyog Pawar num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
1227*c83a76b0SSuyog Pawar
1228*c83a76b0SSuyog Pawar if(mem_avail)
1229*c83a76b0SSuyog Pawar {
1230*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1231*c83a76b0SSuyog Pawar {
1232*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_frm_ctxt;
1233*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1234*c83a76b0SSuyog Pawar
1235*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1236*c83a76b0SSuyog Pawar {
1237*c83a76b0SSuyog Pawar ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1238*c83a76b0SSuyog Pawar
1239*c83a76b0SSuyog Pawar memset(ps_frm_ctxt->u1_encode, 0, n_tot_layers);
1240*c83a76b0SSuyog Pawar memset(ps_frm_ctxt->u1_encode, 1, n_enc_layers);
1241*c83a76b0SSuyog Pawar
1242*c83a76b0SSuyog Pawar /* only one enocde layer is used */
1243*c83a76b0SSuyog Pawar ps_frm_ctxt->num_layers = 1;
1244*c83a76b0SSuyog Pawar
1245*c83a76b0SSuyog Pawar ps_frm_ctxt->i4_wd = a_wd[0];
1246*c83a76b0SSuyog Pawar ps_frm_ctxt->i4_ht = a_ht[0];
1247*c83a76b0SSuyog Pawar /*
1248*c83a76b0SSuyog Pawar memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32)*n_tot_layers);
1249*c83a76b0SSuyog Pawar memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32)*n_tot_layers);
1250*c83a76b0SSuyog Pawar */
1251*c83a76b0SSuyog Pawar ps_frm_ctxt->num_layers_explicit_search = num_layers_explicit_search;
1252*c83a76b0SSuyog Pawar ps_frm_ctxt->max_num_results = ps_prms->max_num_results;
1253*c83a76b0SSuyog Pawar ps_frm_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
1254*c83a76b0SSuyog Pawar ps_frm_ctxt->max_num_ref = ps_prms->max_num_ref;
1255*c83a76b0SSuyog Pawar }
1256*c83a76b0SSuyog Pawar }
1257*c83a76b0SSuyog Pawar }
1258*c83a76b0SSuyog Pawar
1259*c83a76b0SSuyog Pawar /* Memtabs : Layers MV bank for encode layer */
1260*c83a76b0SSuyog Pawar /* Each ref_desr in master ctxt will have seperate layer ctxt */
1261*c83a76b0SSuyog Pawar
1262*c83a76b0SSuyog Pawar for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1263*c83a76b0SSuyog Pawar {
1264*c83a76b0SSuyog Pawar for(j = 0; j < 1; j++)
1265*c83a76b0SSuyog Pawar {
1266*c83a76b0SSuyog Pawar S32 is_explicit_store = 1;
1267*c83a76b0SSuyog Pawar S32 wd, ht;
1268*c83a76b0SSuyog Pawar U08 u1_enc = 1;
1269*c83a76b0SSuyog Pawar wd = a_wd[j];
1270*c83a76b0SSuyog Pawar ht = a_ht[j];
1271*c83a76b0SSuyog Pawar
1272*c83a76b0SSuyog Pawar /* Possibly implicit search for lower (finer) layers */
1273*c83a76b0SSuyog Pawar if(n_tot_layers - j > num_layers_explicit_search)
1274*c83a76b0SSuyog Pawar is_explicit_store = 0;
1275*c83a76b0SSuyog Pawar
1276*c83a76b0SSuyog Pawar /* Even if explicit search, we store only 2 results (L0 and L1) */
1277*c83a76b0SSuyog Pawar /* in finest layer */
1278*c83a76b0SSuyog Pawar if(j == 0)
1279*c83a76b0SSuyog Pawar {
1280*c83a76b0SSuyog Pawar is_explicit_store = 0;
1281*c83a76b0SSuyog Pawar }
1282*c83a76b0SSuyog Pawar
1283*c83a76b0SSuyog Pawar /* coarsest layer alwasy uses 4x4 blks to store results */
1284*c83a76b0SSuyog Pawar if(j == n_tot_layers - 1)
1285*c83a76b0SSuyog Pawar {
1286*c83a76b0SSuyog Pawar num_results = ps_prms->max_num_results_coarse;
1287*c83a76b0SSuyog Pawar }
1288*c83a76b0SSuyog Pawar else
1289*c83a76b0SSuyog Pawar {
1290*c83a76b0SSuyog Pawar num_results = ps_prms->max_num_results;
1291*c83a76b0SSuyog Pawar if(j == 0)
1292*c83a76b0SSuyog Pawar num_results = 1;
1293*c83a76b0SSuyog Pawar }
1294*c83a76b0SSuyog Pawar use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1295*c83a76b0SSuyog Pawar
1296*c83a76b0SSuyog Pawar count += hme_alloc_init_layer_mv_bank(
1297*c83a76b0SSuyog Pawar &ps_memtabs[count],
1298*c83a76b0SSuyog Pawar num_results,
1299*c83a76b0SSuyog Pawar ps_prms->max_num_ref,
1300*c83a76b0SSuyog Pawar use_4x4,
1301*c83a76b0SSuyog Pawar mem_avail,
1302*c83a76b0SSuyog Pawar u1_enc,
1303*c83a76b0SSuyog Pawar wd,
1304*c83a76b0SSuyog Pawar ht,
1305*c83a76b0SSuyog Pawar is_explicit_store,
1306*c83a76b0SSuyog Pawar &aps_mv_bank[i],
1307*c83a76b0SSuyog Pawar &api1_ref_idx[i],
1308*c83a76b0SSuyog Pawar &i4_num_mvs_per_row);
1309*c83a76b0SSuyog Pawar }
1310*c83a76b0SSuyog Pawar }
1311*c83a76b0SSuyog Pawar
1312*c83a76b0SSuyog Pawar /* Memtabs : Layers * num-ref + 1 */
1313*c83a76b0SSuyog Pawar for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1314*c83a76b0SSuyog Pawar {
1315*c83a76b0SSuyog Pawar /* layer memory allocated only for enocde layer */
1316*c83a76b0SSuyog Pawar for(j = 0; j < 1; j++)
1317*c83a76b0SSuyog Pawar {
1318*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer;
1319*c83a76b0SSuyog Pawar S32 is_explicit_store = 1;
1320*c83a76b0SSuyog Pawar S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
1321*c83a76b0SSuyog Pawar S32 wd, ht;
1322*c83a76b0SSuyog Pawar U08 u1_enc = 1;
1323*c83a76b0SSuyog Pawar wd = a_wd[j];
1324*c83a76b0SSuyog Pawar ht = a_ht[j];
1325*c83a76b0SSuyog Pawar
1326*c83a76b0SSuyog Pawar /* Possibly implicit search for lower (finer) layers */
1327*c83a76b0SSuyog Pawar if(n_tot_layers - j > num_layers_explicit_search)
1328*c83a76b0SSuyog Pawar is_explicit_store = 0;
1329*c83a76b0SSuyog Pawar
1330*c83a76b0SSuyog Pawar /* Even if explicit search, we store only 2 results (L0 and L1) */
1331*c83a76b0SSuyog Pawar /* in finest layer */
1332*c83a76b0SSuyog Pawar if(j == 0)
1333*c83a76b0SSuyog Pawar {
1334*c83a76b0SSuyog Pawar is_explicit_store = 0;
1335*c83a76b0SSuyog Pawar }
1336*c83a76b0SSuyog Pawar
1337*c83a76b0SSuyog Pawar /* coarsest layer alwasy uses 4x4 blks to store results */
1338*c83a76b0SSuyog Pawar if(j == n_tot_layers - 1)
1339*c83a76b0SSuyog Pawar {
1340*c83a76b0SSuyog Pawar num_results = ps_prms->max_num_results_coarse;
1341*c83a76b0SSuyog Pawar }
1342*c83a76b0SSuyog Pawar else
1343*c83a76b0SSuyog Pawar {
1344*c83a76b0SSuyog Pawar num_results = ps_prms->max_num_results;
1345*c83a76b0SSuyog Pawar if(j == 0)
1346*c83a76b0SSuyog Pawar num_results = 1;
1347*c83a76b0SSuyog Pawar }
1348*c83a76b0SSuyog Pawar use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1349*c83a76b0SSuyog Pawar
1350*c83a76b0SSuyog Pawar count += hme_alloc_init_layer(
1351*c83a76b0SSuyog Pawar &ps_memtabs[count],
1352*c83a76b0SSuyog Pawar num_results,
1353*c83a76b0SSuyog Pawar ps_prms->max_num_ref,
1354*c83a76b0SSuyog Pawar use_4x4,
1355*c83a76b0SSuyog Pawar mem_avail,
1356*c83a76b0SSuyog Pawar u1_enc,
1357*c83a76b0SSuyog Pawar wd,
1358*c83a76b0SSuyog Pawar ht,
1359*c83a76b0SSuyog Pawar a_disp_wd[j],
1360*c83a76b0SSuyog Pawar a_disp_ht[j],
1361*c83a76b0SSuyog Pawar segment_this_layer,
1362*c83a76b0SSuyog Pawar is_explicit_store,
1363*c83a76b0SSuyog Pawar &ps_layer);
1364*c83a76b0SSuyog Pawar if(mem_avail)
1365*c83a76b0SSuyog Pawar {
1366*c83a76b0SSuyog Pawar /* same ps_layer memory pointer is stored in all the threads */
1367*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1368*c83a76b0SSuyog Pawar {
1369*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1370*c83a76b0SSuyog Pawar ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
1371*c83a76b0SSuyog Pawar }
1372*c83a76b0SSuyog Pawar
1373*c83a76b0SSuyog Pawar /* store the MV bank pointers */
1374*c83a76b0SSuyog Pawar ps_layer->ps_layer_mvbank->max_num_mvs_per_row = i4_num_mvs_per_row;
1375*c83a76b0SSuyog Pawar ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[i];
1376*c83a76b0SSuyog Pawar ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[i];
1377*c83a76b0SSuyog Pawar }
1378*c83a76b0SSuyog Pawar }
1379*c83a76b0SSuyog Pawar }
1380*c83a76b0SSuyog Pawar
1381*c83a76b0SSuyog Pawar /* Memtabs : Buf Mgr for predictor bufs and working mem */
1382*c83a76b0SSuyog Pawar /* TODO : Parameterise this appropriately */
1383*c83a76b0SSuyog Pawar size = MAX_WKG_MEM_SIZE_PER_THREAD * ps_prms->i4_num_proc_thrds * i4_num_me_frm_pllel;
1384*c83a76b0SSuyog Pawar
1385*c83a76b0SSuyog Pawar if(mem_avail)
1386*c83a76b0SSuyog Pawar {
1387*c83a76b0SSuyog Pawar U08 *pu1_mem = ps_memtabs[count].pu1_mem;
1388*c83a76b0SSuyog Pawar
1389*c83a76b0SSuyog Pawar ASSERT(ps_memtabs[count].size == size);
1390*c83a76b0SSuyog Pawar
1391*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1392*c83a76b0SSuyog Pawar {
1393*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_frm_ctxt;
1394*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1395*c83a76b0SSuyog Pawar
1396*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1397*c83a76b0SSuyog Pawar {
1398*c83a76b0SSuyog Pawar ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1399*c83a76b0SSuyog Pawar
1400*c83a76b0SSuyog Pawar hme_init_wkg_mem(&ps_frm_ctxt->s_buf_mgr, pu1_mem, MAX_WKG_MEM_SIZE_PER_THREAD);
1401*c83a76b0SSuyog Pawar
1402*c83a76b0SSuyog Pawar if(i4_num_me_frm_pllel != 1)
1403*c83a76b0SSuyog Pawar {
1404*c83a76b0SSuyog Pawar /* update the memory buffer pointer */
1405*c83a76b0SSuyog Pawar pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
1406*c83a76b0SSuyog Pawar }
1407*c83a76b0SSuyog Pawar }
1408*c83a76b0SSuyog Pawar if(i4_num_me_frm_pllel == 1)
1409*c83a76b0SSuyog Pawar {
1410*c83a76b0SSuyog Pawar pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
1411*c83a76b0SSuyog Pawar }
1412*c83a76b0SSuyog Pawar }
1413*c83a76b0SSuyog Pawar }
1414*c83a76b0SSuyog Pawar else
1415*c83a76b0SSuyog Pawar {
1416*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
1417*c83a76b0SSuyog Pawar ps_memtabs[count].align = 4;
1418*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1419*c83a76b0SSuyog Pawar }
1420*c83a76b0SSuyog Pawar count++;
1421*c83a76b0SSuyog Pawar
1422*c83a76b0SSuyog Pawar /*************************************************************************/
1423*c83a76b0SSuyog Pawar /* Memtab : We need 64x64 buffer to store the entire CTB input for bidir */
1424*c83a76b0SSuyog Pawar /* refinement. This memtab stores 2I - P0, I is input and P0 is L0 pred */
1425*c83a76b0SSuyog Pawar /*************************************************************************/
1426*c83a76b0SSuyog Pawar size = sizeof(S16) * CTB_BLK_SIZE * CTB_BLK_SIZE * ps_prms->i4_num_proc_thrds *
1427*c83a76b0SSuyog Pawar i4_num_me_frm_pllel;
1428*c83a76b0SSuyog Pawar
1429*c83a76b0SSuyog Pawar if(mem_avail)
1430*c83a76b0SSuyog Pawar {
1431*c83a76b0SSuyog Pawar S16 *pi2_mem = (S16 *)ps_memtabs[count].pu1_mem;
1432*c83a76b0SSuyog Pawar
1433*c83a76b0SSuyog Pawar ASSERT(ps_memtabs[count].size == size);
1434*c83a76b0SSuyog Pawar
1435*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1436*c83a76b0SSuyog Pawar {
1437*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_frm_ctxt;
1438*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1439*c83a76b0SSuyog Pawar
1440*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1441*c83a76b0SSuyog Pawar {
1442*c83a76b0SSuyog Pawar ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1443*c83a76b0SSuyog Pawar
1444*c83a76b0SSuyog Pawar ps_frm_ctxt->pi2_inp_bck = pi2_mem;
1445*c83a76b0SSuyog Pawar /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
1446*c83a76b0SSuyog Pawar if(i4_num_me_frm_pllel != 1)
1447*c83a76b0SSuyog Pawar {
1448*c83a76b0SSuyog Pawar pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
1449*c83a76b0SSuyog Pawar }
1450*c83a76b0SSuyog Pawar }
1451*c83a76b0SSuyog Pawar if(i4_num_me_frm_pllel == 1)
1452*c83a76b0SSuyog Pawar {
1453*c83a76b0SSuyog Pawar pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
1454*c83a76b0SSuyog Pawar }
1455*c83a76b0SSuyog Pawar }
1456*c83a76b0SSuyog Pawar }
1457*c83a76b0SSuyog Pawar else
1458*c83a76b0SSuyog Pawar {
1459*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
1460*c83a76b0SSuyog Pawar ps_memtabs[count].align = 16;
1461*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1462*c83a76b0SSuyog Pawar }
1463*c83a76b0SSuyog Pawar
1464*c83a76b0SSuyog Pawar count++;
1465*c83a76b0SSuyog Pawar
1466*c83a76b0SSuyog Pawar /* Allocate a memtab for each histogram. As many as num ref and number of threads */
1467*c83a76b0SSuyog Pawar /* Loop across for each ME_FRM in PARALLEL */
1468*c83a76b0SSuyog Pawar for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1469*c83a76b0SSuyog Pawar {
1470*c83a76b0SSuyog Pawar for(i = 0; i < ps_prms->max_num_ref; i++)
1471*c83a76b0SSuyog Pawar {
1472*c83a76b0SSuyog Pawar size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
1473*c83a76b0SSuyog Pawar if(mem_avail)
1474*c83a76b0SSuyog Pawar {
1475*c83a76b0SSuyog Pawar mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
1476*c83a76b0SSuyog Pawar
1477*c83a76b0SSuyog Pawar ASSERT(size == ps_memtabs[count].size);
1478*c83a76b0SSuyog Pawar
1479*c83a76b0SSuyog Pawar /* divide the memory accross the threads */
1480*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1481*c83a76b0SSuyog Pawar {
1482*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1483*c83a76b0SSuyog Pawar
1484*c83a76b0SSuyog Pawar ps_ctxt->aps_me_frm_prms[j]->aps_mv_hist[i] = ps_mv_hist;
1485*c83a76b0SSuyog Pawar ps_mv_hist++;
1486*c83a76b0SSuyog Pawar }
1487*c83a76b0SSuyog Pawar }
1488*c83a76b0SSuyog Pawar else
1489*c83a76b0SSuyog Pawar {
1490*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
1491*c83a76b0SSuyog Pawar ps_memtabs[count].align = 8;
1492*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1493*c83a76b0SSuyog Pawar }
1494*c83a76b0SSuyog Pawar count++;
1495*c83a76b0SSuyog Pawar }
1496*c83a76b0SSuyog Pawar if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
1497*c83a76b0SSuyog Pawar {
1498*c83a76b0SSuyog Pawar /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
1499*c83a76b0SSuyog Pawar /** bring the count back to earlier value if there are no me frames in parallel. don't decrement for last loop **/
1500*c83a76b0SSuyog Pawar count -= ps_prms->max_num_ref;
1501*c83a76b0SSuyog Pawar }
1502*c83a76b0SSuyog Pawar }
1503*c83a76b0SSuyog Pawar
1504*c83a76b0SSuyog Pawar /* Memtabs : Search nodes for 16x16 CUs, 32x32 and 64x64 CUs */
1505*c83a76b0SSuyog Pawar for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1506*c83a76b0SSuyog Pawar {
1507*c83a76b0SSuyog Pawar S32 count_cpy = count;
1508*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1509*c83a76b0SSuyog Pawar {
1510*c83a76b0SSuyog Pawar if(mem_avail)
1511*c83a76b0SSuyog Pawar {
1512*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1513*c83a76b0SSuyog Pawar }
1514*c83a76b0SSuyog Pawar
1515*c83a76b0SSuyog Pawar for(i = 0; i < 21; i++)
1516*c83a76b0SSuyog Pawar {
1517*c83a76b0SSuyog Pawar search_results_t *ps_search_results = NULL;
1518*c83a76b0SSuyog Pawar if(mem_avail)
1519*c83a76b0SSuyog Pawar {
1520*c83a76b0SSuyog Pawar if(i < 16)
1521*c83a76b0SSuyog Pawar {
1522*c83a76b0SSuyog Pawar ps_search_results =
1523*c83a76b0SSuyog Pawar &ps_ctxt->aps_me_frm_prms[j]->as_search_results_16x16[i];
1524*c83a76b0SSuyog Pawar }
1525*c83a76b0SSuyog Pawar else if(i < 20)
1526*c83a76b0SSuyog Pawar {
1527*c83a76b0SSuyog Pawar ps_search_results =
1528*c83a76b0SSuyog Pawar &ps_ctxt->aps_me_frm_prms[j]->as_search_results_32x32[i - 16];
1529*c83a76b0SSuyog Pawar ps_search_results->ps_cu_results =
1530*c83a76b0SSuyog Pawar &ps_ctxt->aps_me_frm_prms[j]->as_cu32x32_results[i - 16];
1531*c83a76b0SSuyog Pawar }
1532*c83a76b0SSuyog Pawar else if(i == 20)
1533*c83a76b0SSuyog Pawar {
1534*c83a76b0SSuyog Pawar ps_search_results = &ps_ctxt->aps_me_frm_prms[j]->s_search_results_64x64;
1535*c83a76b0SSuyog Pawar ps_search_results->ps_cu_results =
1536*c83a76b0SSuyog Pawar &ps_ctxt->aps_me_frm_prms[j]->s_cu64x64_results;
1537*c83a76b0SSuyog Pawar }
1538*c83a76b0SSuyog Pawar else
1539*c83a76b0SSuyog Pawar {
1540*c83a76b0SSuyog Pawar /* 8x8 search results are not required in LO ME */
1541*c83a76b0SSuyog Pawar ASSERT(0);
1542*c83a76b0SSuyog Pawar }
1543*c83a76b0SSuyog Pawar }
1544*c83a76b0SSuyog Pawar count += hme_alloc_init_search_nodes(
1545*c83a76b0SSuyog Pawar ps_search_results, &ps_memtabs[count], mem_avail, 2, ps_prms->max_num_results);
1546*c83a76b0SSuyog Pawar }
1547*c83a76b0SSuyog Pawar }
1548*c83a76b0SSuyog Pawar
1549*c83a76b0SSuyog Pawar if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
1550*c83a76b0SSuyog Pawar {
1551*c83a76b0SSuyog Pawar count = count_cpy;
1552*c83a76b0SSuyog Pawar }
1553*c83a76b0SSuyog Pawar }
1554*c83a76b0SSuyog Pawar
1555*c83a76b0SSuyog Pawar /* Weighted inputs, one for each ref + one non weighted */
1556*c83a76b0SSuyog Pawar for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1557*c83a76b0SSuyog Pawar {
1558*c83a76b0SSuyog Pawar size = (ps_prms->max_num_ref + 1) * ctb_wd * ctb_wd * ps_prms->i4_num_proc_thrds;
1559*c83a76b0SSuyog Pawar if(mem_avail)
1560*c83a76b0SSuyog Pawar {
1561*c83a76b0SSuyog Pawar U08 *pu1_mem;
1562*c83a76b0SSuyog Pawar ASSERT(ps_memtabs[count].size == size);
1563*c83a76b0SSuyog Pawar pu1_mem = ps_memtabs[count].pu1_mem;
1564*c83a76b0SSuyog Pawar
1565*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1566*c83a76b0SSuyog Pawar {
1567*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1568*c83a76b0SSuyog Pawar
1569*c83a76b0SSuyog Pawar for(i = 0; i < ps_prms->max_num_ref + 1; i++)
1570*c83a76b0SSuyog Pawar {
1571*c83a76b0SSuyog Pawar ps_ctxt->aps_me_frm_prms[j]->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
1572*c83a76b0SSuyog Pawar pu1_mem += (ctb_wd * ctb_wd);
1573*c83a76b0SSuyog Pawar }
1574*c83a76b0SSuyog Pawar }
1575*c83a76b0SSuyog Pawar }
1576*c83a76b0SSuyog Pawar else
1577*c83a76b0SSuyog Pawar {
1578*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
1579*c83a76b0SSuyog Pawar ps_memtabs[count].align = 16;
1580*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1581*c83a76b0SSuyog Pawar }
1582*c83a76b0SSuyog Pawar if((i4_num_me_frm_pllel != 1) || (j == (MAX_NUM_ME_PARALLEL - 1)))
1583*c83a76b0SSuyog Pawar {
1584*c83a76b0SSuyog Pawar count++;
1585*c83a76b0SSuyog Pawar }
1586*c83a76b0SSuyog Pawar }
1587*c83a76b0SSuyog Pawar
1588*c83a76b0SSuyog Pawar /* if memory is allocated the intislaise the frm prms ptr to each thrd */
1589*c83a76b0SSuyog Pawar if(mem_avail)
1590*c83a76b0SSuyog Pawar {
1591*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1592*c83a76b0SSuyog Pawar {
1593*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_frm_ctxt;
1594*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1595*c83a76b0SSuyog Pawar
1596*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1597*c83a76b0SSuyog Pawar {
1598*c83a76b0SSuyog Pawar ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1599*c83a76b0SSuyog Pawar
1600*c83a76b0SSuyog Pawar ps_frm_ctxt->ps_hme_frm_prms = &ps_master_ctxt->as_frm_prms[i];
1601*c83a76b0SSuyog Pawar ps_frm_ctxt->ps_hme_ref_map = &ps_master_ctxt->as_ref_map[i];
1602*c83a76b0SSuyog Pawar }
1603*c83a76b0SSuyog Pawar }
1604*c83a76b0SSuyog Pawar }
1605*c83a76b0SSuyog Pawar
1606*c83a76b0SSuyog Pawar /* Memory allocation for use in Clustering */
1607*c83a76b0SSuyog Pawar if(ps_prms->s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY)
1608*c83a76b0SSuyog Pawar {
1609*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1610*c83a76b0SSuyog Pawar {
1611*c83a76b0SSuyog Pawar size = 16 * sizeof(cluster_16x16_blk_t) + 4 * sizeof(cluster_32x32_blk_t) +
1612*c83a76b0SSuyog Pawar sizeof(cluster_64x64_blk_t) + sizeof(ctb_cluster_info_t);
1613*c83a76b0SSuyog Pawar size *= ps_prms->i4_num_proc_thrds;
1614*c83a76b0SSuyog Pawar
1615*c83a76b0SSuyog Pawar if(mem_avail)
1616*c83a76b0SSuyog Pawar {
1617*c83a76b0SSuyog Pawar U08 *pu1_mem;
1618*c83a76b0SSuyog Pawar
1619*c83a76b0SSuyog Pawar ASSERT(ps_memtabs[count].size == size);
1620*c83a76b0SSuyog Pawar pu1_mem = ps_memtabs[count].pu1_mem;
1621*c83a76b0SSuyog Pawar
1622*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1623*c83a76b0SSuyog Pawar {
1624*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1625*c83a76b0SSuyog Pawar
1626*c83a76b0SSuyog Pawar ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = (cluster_16x16_blk_t *)pu1_mem;
1627*c83a76b0SSuyog Pawar pu1_mem += (16 * sizeof(cluster_16x16_blk_t));
1628*c83a76b0SSuyog Pawar
1629*c83a76b0SSuyog Pawar ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = (cluster_32x32_blk_t *)pu1_mem;
1630*c83a76b0SSuyog Pawar pu1_mem += (4 * sizeof(cluster_32x32_blk_t));
1631*c83a76b0SSuyog Pawar
1632*c83a76b0SSuyog Pawar ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = (cluster_64x64_blk_t *)pu1_mem;
1633*c83a76b0SSuyog Pawar pu1_mem += (sizeof(cluster_64x64_blk_t));
1634*c83a76b0SSuyog Pawar
1635*c83a76b0SSuyog Pawar ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info =
1636*c83a76b0SSuyog Pawar (ctb_cluster_info_t *)pu1_mem;
1637*c83a76b0SSuyog Pawar pu1_mem += (sizeof(ctb_cluster_info_t));
1638*c83a76b0SSuyog Pawar }
1639*c83a76b0SSuyog Pawar }
1640*c83a76b0SSuyog Pawar else
1641*c83a76b0SSuyog Pawar {
1642*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
1643*c83a76b0SSuyog Pawar ps_memtabs[count].align = 16;
1644*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1645*c83a76b0SSuyog Pawar }
1646*c83a76b0SSuyog Pawar
1647*c83a76b0SSuyog Pawar if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
1648*c83a76b0SSuyog Pawar {
1649*c83a76b0SSuyog Pawar count++;
1650*c83a76b0SSuyog Pawar }
1651*c83a76b0SSuyog Pawar }
1652*c83a76b0SSuyog Pawar }
1653*c83a76b0SSuyog Pawar else if(mem_avail)
1654*c83a76b0SSuyog Pawar {
1655*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1656*c83a76b0SSuyog Pawar {
1657*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1658*c83a76b0SSuyog Pawar {
1659*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1660*c83a76b0SSuyog Pawar
1661*c83a76b0SSuyog Pawar ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = NULL;
1662*c83a76b0SSuyog Pawar
1663*c83a76b0SSuyog Pawar ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = NULL;
1664*c83a76b0SSuyog Pawar
1665*c83a76b0SSuyog Pawar ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = NULL;
1666*c83a76b0SSuyog Pawar
1667*c83a76b0SSuyog Pawar ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info = NULL;
1668*c83a76b0SSuyog Pawar }
1669*c83a76b0SSuyog Pawar }
1670*c83a76b0SSuyog Pawar }
1671*c83a76b0SSuyog Pawar
1672*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1673*c83a76b0SSuyog Pawar {
1674*c83a76b0SSuyog Pawar size = sizeof(fullpel_refine_ctxt_t);
1675*c83a76b0SSuyog Pawar size *= ps_prms->i4_num_proc_thrds;
1676*c83a76b0SSuyog Pawar
1677*c83a76b0SSuyog Pawar if(mem_avail)
1678*c83a76b0SSuyog Pawar {
1679*c83a76b0SSuyog Pawar U08 *pu1_mem;
1680*c83a76b0SSuyog Pawar
1681*c83a76b0SSuyog Pawar ASSERT(ps_memtabs[count].size == size);
1682*c83a76b0SSuyog Pawar pu1_mem = ps_memtabs[count].pu1_mem;
1683*c83a76b0SSuyog Pawar
1684*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1685*c83a76b0SSuyog Pawar {
1686*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1687*c83a76b0SSuyog Pawar
1688*c83a76b0SSuyog Pawar ps_ctxt->aps_me_frm_prms[i]->ps_fullpel_refine_ctxt =
1689*c83a76b0SSuyog Pawar (fullpel_refine_ctxt_t *)pu1_mem;
1690*c83a76b0SSuyog Pawar pu1_mem += (sizeof(fullpel_refine_ctxt_t));
1691*c83a76b0SSuyog Pawar }
1692*c83a76b0SSuyog Pawar }
1693*c83a76b0SSuyog Pawar else
1694*c83a76b0SSuyog Pawar {
1695*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
1696*c83a76b0SSuyog Pawar ps_memtabs[count].align = 16;
1697*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1698*c83a76b0SSuyog Pawar }
1699*c83a76b0SSuyog Pawar
1700*c83a76b0SSuyog Pawar if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
1701*c83a76b0SSuyog Pawar {
1702*c83a76b0SSuyog Pawar count++;
1703*c83a76b0SSuyog Pawar }
1704*c83a76b0SSuyog Pawar }
1705*c83a76b0SSuyog Pawar
1706*c83a76b0SSuyog Pawar /* Memory for ihevce_me_optimised_function_list_t struct */
1707*c83a76b0SSuyog Pawar if(mem_avail)
1708*c83a76b0SSuyog Pawar {
1709*c83a76b0SSuyog Pawar ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
1710*c83a76b0SSuyog Pawar }
1711*c83a76b0SSuyog Pawar else
1712*c83a76b0SSuyog Pawar {
1713*c83a76b0SSuyog Pawar ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
1714*c83a76b0SSuyog Pawar ps_memtabs[count].align = 16;
1715*c83a76b0SSuyog Pawar ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1716*c83a76b0SSuyog Pawar }
1717*c83a76b0SSuyog Pawar
1718*c83a76b0SSuyog Pawar ASSERT(count < hme_enc_num_alloc(i4_num_me_frm_pllel));
1719*c83a76b0SSuyog Pawar return (count);
1720*c83a76b0SSuyog Pawar }
1721*c83a76b0SSuyog Pawar
1722*c83a76b0SSuyog Pawar /**
1723*c83a76b0SSuyog Pawar ********************************************************************************
1724*c83a76b0SSuyog Pawar * @fn hme_coarse_alloc_init_mem()
1725*c83a76b0SSuyog Pawar *
1726*c83a76b0SSuyog Pawar * @brief Requests/ assign memory based on mem avail
1727*c83a76b0SSuyog Pawar *
1728*c83a76b0SSuyog Pawar * @param[in] ps_memtabs : memtab array
1729*c83a76b0SSuyog Pawar *
1730*c83a76b0SSuyog Pawar * @param[in] ps_prms : init prms
1731*c83a76b0SSuyog Pawar *
1732*c83a76b0SSuyog Pawar * @param[in] pv_ctxt : ME ctxt
1733*c83a76b0SSuyog Pawar *
1734*c83a76b0SSuyog Pawar * @param[in] mem_avail : request/assign flag
1735*c83a76b0SSuyog Pawar *
1736*c83a76b0SSuyog Pawar * @return number of memtabs
1737*c83a76b0SSuyog Pawar ********************************************************************************
1738*c83a76b0SSuyog Pawar */
hme_coarse_alloc_init_mem(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,void * pv_ctxt,S32 mem_avail)1739*c83a76b0SSuyog Pawar S32 hme_coarse_alloc_init_mem(
1740*c83a76b0SSuyog Pawar hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, void *pv_ctxt, S32 mem_avail)
1741*c83a76b0SSuyog Pawar {
1742*c83a76b0SSuyog Pawar coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
1743*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_ctxt;
1744*c83a76b0SSuyog Pawar S32 count = 0, size, i, j, use_4x4, wd;
1745*c83a76b0SSuyog Pawar S32 n_tot_layers;
1746*c83a76b0SSuyog Pawar S32 num_layers_explicit_search;
1747*c83a76b0SSuyog Pawar S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
1748*c83a76b0SSuyog Pawar S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
1749*c83a76b0SSuyog Pawar S32 num_results;
1750*c83a76b0SSuyog Pawar S32 num_thrds;
1751*c83a76b0SSuyog Pawar //S32 ctb_wd = 1 << ps_prms->log_ctb_size;
1752*c83a76b0SSuyog Pawar S32 sad_4x4_block_size, sad_4x4_block_stride, search_step, num_rows;
1753*c83a76b0SSuyog Pawar S32 layer1_blk_width = 8; // 8x8 search
1754*c83a76b0SSuyog Pawar S32 blk_shift;
1755*c83a76b0SSuyog Pawar
1756*c83a76b0SSuyog Pawar /* MV bank changes */
1757*c83a76b0SSuyog Pawar hme_mv_t *aps_mv_bank[MAX_NUM_LAYERS] = { NULL };
1758*c83a76b0SSuyog Pawar S32 ai4_num_mvs_per_row[MAX_NUM_LAYERS] = { 0 };
1759*c83a76b0SSuyog Pawar S08 *api1_ref_idx[MAX_NUM_LAYERS] = { NULL };
1760*c83a76b0SSuyog Pawar
1761*c83a76b0SSuyog Pawar /* Memtab 0: handle */
1762*c83a76b0SSuyog Pawar size = sizeof(coarse_me_master_ctxt_t);
1763*c83a76b0SSuyog Pawar if(mem_avail)
1764*c83a76b0SSuyog Pawar {
1765*c83a76b0SSuyog Pawar /* store the number of processing threads */
1766*c83a76b0SSuyog Pawar ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
1767*c83a76b0SSuyog Pawar }
1768*c83a76b0SSuyog Pawar else
1769*c83a76b0SSuyog Pawar {
1770*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
1771*c83a76b0SSuyog Pawar ps_memtabs[count].align = 8;
1772*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1773*c83a76b0SSuyog Pawar }
1774*c83a76b0SSuyog Pawar
1775*c83a76b0SSuyog Pawar count++;
1776*c83a76b0SSuyog Pawar
1777*c83a76b0SSuyog Pawar /* Memtab 1: ME threads ctxt */
1778*c83a76b0SSuyog Pawar size = ps_prms->i4_num_proc_thrds * sizeof(coarse_me_ctxt_t);
1779*c83a76b0SSuyog Pawar if(mem_avail)
1780*c83a76b0SSuyog Pawar {
1781*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_me_tmp_ctxt = (coarse_me_ctxt_t *)ps_memtabs[count].pu1_mem;
1782*c83a76b0SSuyog Pawar
1783*c83a76b0SSuyog Pawar /* store the indivisual thread ctxt pointers */
1784*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1785*c83a76b0SSuyog Pawar {
1786*c83a76b0SSuyog Pawar ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
1787*c83a76b0SSuyog Pawar }
1788*c83a76b0SSuyog Pawar }
1789*c83a76b0SSuyog Pawar else
1790*c83a76b0SSuyog Pawar {
1791*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
1792*c83a76b0SSuyog Pawar ps_memtabs[count].align = 8;
1793*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1794*c83a76b0SSuyog Pawar }
1795*c83a76b0SSuyog Pawar
1796*c83a76b0SSuyog Pawar count++;
1797*c83a76b0SSuyog Pawar
1798*c83a76b0SSuyog Pawar memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
1799*c83a76b0SSuyog Pawar memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
1800*c83a76b0SSuyog Pawar /*************************************************************************/
1801*c83a76b0SSuyog Pawar /* Derive the number of HME layers, including both encoded and non encode*/
1802*c83a76b0SSuyog Pawar /* This function also derives the width and ht of each layer. */
1803*c83a76b0SSuyog Pawar /*************************************************************************/
1804*c83a76b0SSuyog Pawar n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
1805*c83a76b0SSuyog Pawar
1806*c83a76b0SSuyog Pawar num_layers_explicit_search = ps_prms->num_layers_explicit_search;
1807*c83a76b0SSuyog Pawar
1808*c83a76b0SSuyog Pawar if(num_layers_explicit_search <= 0)
1809*c83a76b0SSuyog Pawar num_layers_explicit_search = n_tot_layers - 1;
1810*c83a76b0SSuyog Pawar
1811*c83a76b0SSuyog Pawar num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
1812*c83a76b0SSuyog Pawar
1813*c83a76b0SSuyog Pawar if(mem_avail)
1814*c83a76b0SSuyog Pawar {
1815*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1816*c83a76b0SSuyog Pawar {
1817*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1818*c83a76b0SSuyog Pawar memset(ps_ctxt->u1_encode, 0, n_tot_layers);
1819*c83a76b0SSuyog Pawar
1820*c83a76b0SSuyog Pawar /* encode layer should be excluded during processing */
1821*c83a76b0SSuyog Pawar ps_ctxt->num_layers = n_tot_layers;
1822*c83a76b0SSuyog Pawar
1823*c83a76b0SSuyog Pawar memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
1824*c83a76b0SSuyog Pawar memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
1825*c83a76b0SSuyog Pawar
1826*c83a76b0SSuyog Pawar ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
1827*c83a76b0SSuyog Pawar ps_ctxt->max_num_results = ps_prms->max_num_results;
1828*c83a76b0SSuyog Pawar ps_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
1829*c83a76b0SSuyog Pawar ps_ctxt->max_num_ref = ps_prms->max_num_ref;
1830*c83a76b0SSuyog Pawar }
1831*c83a76b0SSuyog Pawar }
1832*c83a76b0SSuyog Pawar
1833*c83a76b0SSuyog Pawar /* Memtabs : Layers MV bank for total layers - 2 */
1834*c83a76b0SSuyog Pawar /* for penultimate layer MV bank will be initialsed at every frame level */
1835*c83a76b0SSuyog Pawar for(j = 1; j < n_tot_layers; j++)
1836*c83a76b0SSuyog Pawar {
1837*c83a76b0SSuyog Pawar S32 is_explicit_store = 1;
1838*c83a76b0SSuyog Pawar S32 wd, ht;
1839*c83a76b0SSuyog Pawar U08 u1_enc = 0;
1840*c83a76b0SSuyog Pawar wd = a_wd[j];
1841*c83a76b0SSuyog Pawar ht = a_ht[j];
1842*c83a76b0SSuyog Pawar
1843*c83a76b0SSuyog Pawar /* Possibly implicit search for lower (finer) layers */
1844*c83a76b0SSuyog Pawar if(n_tot_layers - j > num_layers_explicit_search)
1845*c83a76b0SSuyog Pawar is_explicit_store = 0;
1846*c83a76b0SSuyog Pawar
1847*c83a76b0SSuyog Pawar /* Even if explicit search, we store only 2 results (L0 and L1) */
1848*c83a76b0SSuyog Pawar /* in finest layer */
1849*c83a76b0SSuyog Pawar if(j == 0)
1850*c83a76b0SSuyog Pawar {
1851*c83a76b0SSuyog Pawar is_explicit_store = 0;
1852*c83a76b0SSuyog Pawar }
1853*c83a76b0SSuyog Pawar
1854*c83a76b0SSuyog Pawar /* coarsest layer alwasy uses 4x4 blks to store results */
1855*c83a76b0SSuyog Pawar if(j == n_tot_layers - 1)
1856*c83a76b0SSuyog Pawar {
1857*c83a76b0SSuyog Pawar num_results = ps_prms->max_num_results_coarse;
1858*c83a76b0SSuyog Pawar }
1859*c83a76b0SSuyog Pawar else
1860*c83a76b0SSuyog Pawar {
1861*c83a76b0SSuyog Pawar num_results = ps_prms->max_num_results;
1862*c83a76b0SSuyog Pawar if(j == 0)
1863*c83a76b0SSuyog Pawar num_results = 1;
1864*c83a76b0SSuyog Pawar }
1865*c83a76b0SSuyog Pawar use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1866*c83a76b0SSuyog Pawar
1867*c83a76b0SSuyog Pawar /* for penultimate compute the parameters and store */
1868*c83a76b0SSuyog Pawar if(j == 1)
1869*c83a76b0SSuyog Pawar {
1870*c83a76b0SSuyog Pawar S32 num_blks, num_mvs_per_blk, num_ref;
1871*c83a76b0SSuyog Pawar S32 num_cols, num_rows, num_mvs_per_row;
1872*c83a76b0SSuyog Pawar
1873*c83a76b0SSuyog Pawar num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
1874*c83a76b0SSuyog Pawar num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
1875*c83a76b0SSuyog Pawar
1876*c83a76b0SSuyog Pawar if(is_explicit_store)
1877*c83a76b0SSuyog Pawar num_ref = ps_prms->max_num_ref;
1878*c83a76b0SSuyog Pawar else
1879*c83a76b0SSuyog Pawar num_ref = 2;
1880*c83a76b0SSuyog Pawar
1881*c83a76b0SSuyog Pawar num_blks = num_cols * num_rows;
1882*c83a76b0SSuyog Pawar num_mvs_per_blk = num_ref * num_results;
1883*c83a76b0SSuyog Pawar num_mvs_per_row = num_mvs_per_blk * num_cols;
1884*c83a76b0SSuyog Pawar
1885*c83a76b0SSuyog Pawar ai4_num_mvs_per_row[j] = num_mvs_per_row;
1886*c83a76b0SSuyog Pawar aps_mv_bank[j] = NULL;
1887*c83a76b0SSuyog Pawar api1_ref_idx[j] = NULL;
1888*c83a76b0SSuyog Pawar }
1889*c83a76b0SSuyog Pawar else
1890*c83a76b0SSuyog Pawar {
1891*c83a76b0SSuyog Pawar count += hme_alloc_init_layer_mv_bank(
1892*c83a76b0SSuyog Pawar &ps_memtabs[count],
1893*c83a76b0SSuyog Pawar num_results,
1894*c83a76b0SSuyog Pawar ps_prms->max_num_ref,
1895*c83a76b0SSuyog Pawar use_4x4,
1896*c83a76b0SSuyog Pawar mem_avail,
1897*c83a76b0SSuyog Pawar u1_enc,
1898*c83a76b0SSuyog Pawar wd,
1899*c83a76b0SSuyog Pawar ht,
1900*c83a76b0SSuyog Pawar is_explicit_store,
1901*c83a76b0SSuyog Pawar &aps_mv_bank[j],
1902*c83a76b0SSuyog Pawar &api1_ref_idx[j],
1903*c83a76b0SSuyog Pawar &ai4_num_mvs_per_row[j]);
1904*c83a76b0SSuyog Pawar }
1905*c83a76b0SSuyog Pawar }
1906*c83a76b0SSuyog Pawar
1907*c83a76b0SSuyog Pawar /* Memtabs : Layers * num-ref + 1 */
1908*c83a76b0SSuyog Pawar for(i = 0; i < ps_prms->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
1909*c83a76b0SSuyog Pawar {
1910*c83a76b0SSuyog Pawar /* for all layer except encode layer */
1911*c83a76b0SSuyog Pawar for(j = 1; j < n_tot_layers; j++)
1912*c83a76b0SSuyog Pawar {
1913*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer;
1914*c83a76b0SSuyog Pawar S32 is_explicit_store = 1;
1915*c83a76b0SSuyog Pawar S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
1916*c83a76b0SSuyog Pawar S32 wd, ht;
1917*c83a76b0SSuyog Pawar U08 u1_enc = 0;
1918*c83a76b0SSuyog Pawar wd = a_wd[j];
1919*c83a76b0SSuyog Pawar ht = a_ht[j];
1920*c83a76b0SSuyog Pawar
1921*c83a76b0SSuyog Pawar /* Possibly implicit search for lower (finer) layers */
1922*c83a76b0SSuyog Pawar if(n_tot_layers - j > num_layers_explicit_search)
1923*c83a76b0SSuyog Pawar is_explicit_store = 0;
1924*c83a76b0SSuyog Pawar
1925*c83a76b0SSuyog Pawar /* Even if explicit search, we store only 2 results (L0 and L1) */
1926*c83a76b0SSuyog Pawar /* in finest layer */
1927*c83a76b0SSuyog Pawar if(j == 0)
1928*c83a76b0SSuyog Pawar {
1929*c83a76b0SSuyog Pawar is_explicit_store = 0;
1930*c83a76b0SSuyog Pawar }
1931*c83a76b0SSuyog Pawar
1932*c83a76b0SSuyog Pawar /* coarsest layer alwasy uses 4x4 blks to store results */
1933*c83a76b0SSuyog Pawar if(j == n_tot_layers - 1)
1934*c83a76b0SSuyog Pawar {
1935*c83a76b0SSuyog Pawar num_results = ps_prms->max_num_results_coarse;
1936*c83a76b0SSuyog Pawar }
1937*c83a76b0SSuyog Pawar else
1938*c83a76b0SSuyog Pawar {
1939*c83a76b0SSuyog Pawar num_results = ps_prms->max_num_results;
1940*c83a76b0SSuyog Pawar if(j == 0)
1941*c83a76b0SSuyog Pawar num_results = 1;
1942*c83a76b0SSuyog Pawar }
1943*c83a76b0SSuyog Pawar use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1944*c83a76b0SSuyog Pawar
1945*c83a76b0SSuyog Pawar count += hme_alloc_init_layer(
1946*c83a76b0SSuyog Pawar &ps_memtabs[count],
1947*c83a76b0SSuyog Pawar num_results,
1948*c83a76b0SSuyog Pawar ps_prms->max_num_ref,
1949*c83a76b0SSuyog Pawar use_4x4,
1950*c83a76b0SSuyog Pawar mem_avail,
1951*c83a76b0SSuyog Pawar u1_enc,
1952*c83a76b0SSuyog Pawar wd,
1953*c83a76b0SSuyog Pawar ht,
1954*c83a76b0SSuyog Pawar a_disp_wd[j],
1955*c83a76b0SSuyog Pawar a_disp_ht[j],
1956*c83a76b0SSuyog Pawar segment_this_layer,
1957*c83a76b0SSuyog Pawar is_explicit_store,
1958*c83a76b0SSuyog Pawar &ps_layer);
1959*c83a76b0SSuyog Pawar if(mem_avail)
1960*c83a76b0SSuyog Pawar {
1961*c83a76b0SSuyog Pawar /* same ps_layer memory pointer is stored in all the threads */
1962*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1963*c83a76b0SSuyog Pawar {
1964*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1965*c83a76b0SSuyog Pawar ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
1966*c83a76b0SSuyog Pawar }
1967*c83a76b0SSuyog Pawar
1968*c83a76b0SSuyog Pawar /* store the MV bank pointers */
1969*c83a76b0SSuyog Pawar ps_layer->ps_layer_mvbank->max_num_mvs_per_row = ai4_num_mvs_per_row[j];
1970*c83a76b0SSuyog Pawar ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[j];
1971*c83a76b0SSuyog Pawar ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[j];
1972*c83a76b0SSuyog Pawar }
1973*c83a76b0SSuyog Pawar }
1974*c83a76b0SSuyog Pawar }
1975*c83a76b0SSuyog Pawar
1976*c83a76b0SSuyog Pawar /* Memtabs : Prev Row search node at coarsest layer */
1977*c83a76b0SSuyog Pawar wd = a_wd[n_tot_layers - 1];
1978*c83a76b0SSuyog Pawar
1979*c83a76b0SSuyog Pawar /* Allocate a memtab for storing 4x4 SADs for n rows. As many as num ref and number of threads */
1980*c83a76b0SSuyog Pawar num_rows = ps_prms->i4_num_proc_thrds + 1;
1981*c83a76b0SSuyog Pawar if(ps_prms->s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
1982*c83a76b0SSuyog Pawar search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
1983*c83a76b0SSuyog Pawar else
1984*c83a76b0SSuyog Pawar search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
1985*c83a76b0SSuyog Pawar
1986*c83a76b0SSuyog Pawar /*shift factor*/
1987*c83a76b0SSuyog Pawar blk_shift = 2; /*4x4*/
1988*c83a76b0SSuyog Pawar search_step >>= 1;
1989*c83a76b0SSuyog Pawar
1990*c83a76b0SSuyog Pawar sad_4x4_block_size = ((2 * MAX_MVX_SUPPORTED_IN_COARSE_LAYER) >> search_step) *
1991*c83a76b0SSuyog Pawar ((2 * MAX_MVY_SUPPORTED_IN_COARSE_LAYER) >> search_step);
1992*c83a76b0SSuyog Pawar sad_4x4_block_stride = ((wd >> blk_shift) + 1) * sad_4x4_block_size;
1993*c83a76b0SSuyog Pawar
1994*c83a76b0SSuyog Pawar size = num_rows * sad_4x4_block_stride * sizeof(S16);
1995*c83a76b0SSuyog Pawar for(i = 0; i < ps_prms->max_num_ref; i++)
1996*c83a76b0SSuyog Pawar {
1997*c83a76b0SSuyog Pawar if(mem_avail)
1998*c83a76b0SSuyog Pawar {
1999*c83a76b0SSuyog Pawar ASSERT(size == ps_memtabs[count].size);
2000*c83a76b0SSuyog Pawar
2001*c83a76b0SSuyog Pawar /* same row memory pointer is stored in all the threads */
2002*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2003*c83a76b0SSuyog Pawar {
2004*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2005*c83a76b0SSuyog Pawar ps_ctxt->api2_sads_4x4_n_rows[i] = (S16 *)ps_memtabs[count].pu1_mem;
2006*c83a76b0SSuyog Pawar }
2007*c83a76b0SSuyog Pawar }
2008*c83a76b0SSuyog Pawar else
2009*c83a76b0SSuyog Pawar {
2010*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
2011*c83a76b0SSuyog Pawar ps_memtabs[count].align = 4;
2012*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2013*c83a76b0SSuyog Pawar }
2014*c83a76b0SSuyog Pawar count++;
2015*c83a76b0SSuyog Pawar }
2016*c83a76b0SSuyog Pawar
2017*c83a76b0SSuyog Pawar /* Allocate a memtab for storing best search nodes 8x4 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
2018*c83a76b0SSuyog Pawar size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
2019*c83a76b0SSuyog Pawar for(i = 0; i < ps_prms->max_num_ref; i++)
2020*c83a76b0SSuyog Pawar {
2021*c83a76b0SSuyog Pawar if(mem_avail)
2022*c83a76b0SSuyog Pawar {
2023*c83a76b0SSuyog Pawar ASSERT(size == ps_memtabs[count].size);
2024*c83a76b0SSuyog Pawar
2025*c83a76b0SSuyog Pawar /* same row memory pointer is stored in all the threads */
2026*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2027*c83a76b0SSuyog Pawar {
2028*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2029*c83a76b0SSuyog Pawar ps_ctxt->aps_best_search_nodes_8x4_n_rows[i] =
2030*c83a76b0SSuyog Pawar (search_node_t *)ps_memtabs[count].pu1_mem;
2031*c83a76b0SSuyog Pawar }
2032*c83a76b0SSuyog Pawar }
2033*c83a76b0SSuyog Pawar else
2034*c83a76b0SSuyog Pawar {
2035*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
2036*c83a76b0SSuyog Pawar ps_memtabs[count].align = 4;
2037*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2038*c83a76b0SSuyog Pawar }
2039*c83a76b0SSuyog Pawar count++;
2040*c83a76b0SSuyog Pawar }
2041*c83a76b0SSuyog Pawar /* Allocate a memtab for storing best search nodes 4x8 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
2042*c83a76b0SSuyog Pawar size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
2043*c83a76b0SSuyog Pawar for(i = 0; i < ps_prms->max_num_ref; i++)
2044*c83a76b0SSuyog Pawar {
2045*c83a76b0SSuyog Pawar if(mem_avail)
2046*c83a76b0SSuyog Pawar {
2047*c83a76b0SSuyog Pawar ASSERT(size == ps_memtabs[count].size);
2048*c83a76b0SSuyog Pawar
2049*c83a76b0SSuyog Pawar /* same row memory pointer is stored in all the threads */
2050*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2051*c83a76b0SSuyog Pawar {
2052*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2053*c83a76b0SSuyog Pawar ps_ctxt->aps_best_search_nodes_4x8_n_rows[i] =
2054*c83a76b0SSuyog Pawar (search_node_t *)ps_memtabs[count].pu1_mem;
2055*c83a76b0SSuyog Pawar }
2056*c83a76b0SSuyog Pawar }
2057*c83a76b0SSuyog Pawar else
2058*c83a76b0SSuyog Pawar {
2059*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
2060*c83a76b0SSuyog Pawar ps_memtabs[count].align = 4;
2061*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2062*c83a76b0SSuyog Pawar }
2063*c83a76b0SSuyog Pawar count++;
2064*c83a76b0SSuyog Pawar }
2065*c83a76b0SSuyog Pawar
2066*c83a76b0SSuyog Pawar /* Allocate a memtab for each histogram. As many as num ref and number of threads */
2067*c83a76b0SSuyog Pawar for(i = 0; i < ps_prms->max_num_ref; i++)
2068*c83a76b0SSuyog Pawar {
2069*c83a76b0SSuyog Pawar size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
2070*c83a76b0SSuyog Pawar if(mem_avail)
2071*c83a76b0SSuyog Pawar {
2072*c83a76b0SSuyog Pawar mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
2073*c83a76b0SSuyog Pawar
2074*c83a76b0SSuyog Pawar ASSERT(size == ps_memtabs[count].size);
2075*c83a76b0SSuyog Pawar
2076*c83a76b0SSuyog Pawar /* divide the memory accross the threads */
2077*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2078*c83a76b0SSuyog Pawar {
2079*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2080*c83a76b0SSuyog Pawar ps_ctxt->aps_mv_hist[i] = ps_mv_hist;
2081*c83a76b0SSuyog Pawar ps_mv_hist++;
2082*c83a76b0SSuyog Pawar }
2083*c83a76b0SSuyog Pawar }
2084*c83a76b0SSuyog Pawar else
2085*c83a76b0SSuyog Pawar {
2086*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
2087*c83a76b0SSuyog Pawar ps_memtabs[count].align = 8;
2088*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
2089*c83a76b0SSuyog Pawar }
2090*c83a76b0SSuyog Pawar count++;
2091*c83a76b0SSuyog Pawar }
2092*c83a76b0SSuyog Pawar
2093*c83a76b0SSuyog Pawar /* Memtabs : Search nodes for 8x8 blks */
2094*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2095*c83a76b0SSuyog Pawar {
2096*c83a76b0SSuyog Pawar search_results_t *ps_search_results = NULL;
2097*c83a76b0SSuyog Pawar
2098*c83a76b0SSuyog Pawar if(mem_avail)
2099*c83a76b0SSuyog Pawar {
2100*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2101*c83a76b0SSuyog Pawar }
2102*c83a76b0SSuyog Pawar
2103*c83a76b0SSuyog Pawar if(mem_avail)
2104*c83a76b0SSuyog Pawar {
2105*c83a76b0SSuyog Pawar ps_search_results = &ps_ctxt->s_search_results_8x8;
2106*c83a76b0SSuyog Pawar }
2107*c83a76b0SSuyog Pawar count += hme_alloc_init_search_nodes(
2108*c83a76b0SSuyog Pawar ps_search_results,
2109*c83a76b0SSuyog Pawar &ps_memtabs[count],
2110*c83a76b0SSuyog Pawar mem_avail,
2111*c83a76b0SSuyog Pawar ps_prms->max_num_ref,
2112*c83a76b0SSuyog Pawar ps_prms->max_num_results);
2113*c83a76b0SSuyog Pawar }
2114*c83a76b0SSuyog Pawar
2115*c83a76b0SSuyog Pawar /* Weighted inputs, one for each ref */
2116*c83a76b0SSuyog Pawar size = (ps_prms->max_num_ref + 1) * layer1_blk_width * layer1_blk_width *
2117*c83a76b0SSuyog Pawar ps_prms->i4_num_proc_thrds;
2118*c83a76b0SSuyog Pawar if(mem_avail)
2119*c83a76b0SSuyog Pawar {
2120*c83a76b0SSuyog Pawar U08 *pu1_mem;
2121*c83a76b0SSuyog Pawar ASSERT(ps_memtabs[count].size == size);
2122*c83a76b0SSuyog Pawar pu1_mem = ps_memtabs[count].pu1_mem;
2123*c83a76b0SSuyog Pawar
2124*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2125*c83a76b0SSuyog Pawar {
2126*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2127*c83a76b0SSuyog Pawar
2128*c83a76b0SSuyog Pawar for(i = 0; i < ps_prms->max_num_ref + 1; i++)
2129*c83a76b0SSuyog Pawar {
2130*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
2131*c83a76b0SSuyog Pawar pu1_mem += (layer1_blk_width * layer1_blk_width);
2132*c83a76b0SSuyog Pawar }
2133*c83a76b0SSuyog Pawar }
2134*c83a76b0SSuyog Pawar }
2135*c83a76b0SSuyog Pawar else
2136*c83a76b0SSuyog Pawar {
2137*c83a76b0SSuyog Pawar ps_memtabs[count].size = size;
2138*c83a76b0SSuyog Pawar ps_memtabs[count].align = 16;
2139*c83a76b0SSuyog Pawar ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2140*c83a76b0SSuyog Pawar }
2141*c83a76b0SSuyog Pawar count++;
2142*c83a76b0SSuyog Pawar
2143*c83a76b0SSuyog Pawar /* if memory is allocated the intislaise the frm prms ptr to each thrd */
2144*c83a76b0SSuyog Pawar if(mem_avail)
2145*c83a76b0SSuyog Pawar {
2146*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2147*c83a76b0SSuyog Pawar {
2148*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2149*c83a76b0SSuyog Pawar
2150*c83a76b0SSuyog Pawar ps_ctxt->ps_hme_frm_prms = &ps_master_ctxt->s_frm_prms;
2151*c83a76b0SSuyog Pawar ps_ctxt->ps_hme_ref_map = &ps_master_ctxt->s_ref_map;
2152*c83a76b0SSuyog Pawar }
2153*c83a76b0SSuyog Pawar }
2154*c83a76b0SSuyog Pawar
2155*c83a76b0SSuyog Pawar /* Memory for ihevce_me_optimised_function_list_t struct */
2156*c83a76b0SSuyog Pawar if(mem_avail)
2157*c83a76b0SSuyog Pawar {
2158*c83a76b0SSuyog Pawar ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
2159*c83a76b0SSuyog Pawar }
2160*c83a76b0SSuyog Pawar else
2161*c83a76b0SSuyog Pawar {
2162*c83a76b0SSuyog Pawar ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
2163*c83a76b0SSuyog Pawar ps_memtabs[count].align = 16;
2164*c83a76b0SSuyog Pawar ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2165*c83a76b0SSuyog Pawar }
2166*c83a76b0SSuyog Pawar
2167*c83a76b0SSuyog Pawar //ASSERT(count < hme_enc_num_alloc());
2168*c83a76b0SSuyog Pawar ASSERT(count < hme_coarse_num_alloc());
2169*c83a76b0SSuyog Pawar return (count);
2170*c83a76b0SSuyog Pawar }
2171*c83a76b0SSuyog Pawar
2172*c83a76b0SSuyog Pawar /*!
2173*c83a76b0SSuyog Pawar ******************************************************************************
2174*c83a76b0SSuyog Pawar * \if Function name : ihevce_coarse_me_get_lyr_prms_dep_mngr \endif
2175*c83a76b0SSuyog Pawar *
2176*c83a76b0SSuyog Pawar * \brief Returns to the caller key attributes relevant for dependency manager,
2177*c83a76b0SSuyog Pawar * ie, the number of vertical units in each layer
2178*c83a76b0SSuyog Pawar *
2179*c83a76b0SSuyog Pawar * \par Description:
2180*c83a76b0SSuyog Pawar * This function requires the precondition that the width and ht of encode
2181*c83a76b0SSuyog Pawar * layer is known.
2182*c83a76b0SSuyog Pawar * The number of layers, number of vertical units in each layer, and for
2183*c83a76b0SSuyog Pawar * each vertial unit in each layer, its dependency on previous layer's units
2184*c83a76b0SSuyog Pawar * From ME's perspective, a vertical unit is one which is smallest min size
2185*c83a76b0SSuyog Pawar * vertically (and spans the entire row horizontally). This is CTB for encode
2186*c83a76b0SSuyog Pawar * layer, and 8x8 / 4x4 for non encode layers.
2187*c83a76b0SSuyog Pawar *
2188*c83a76b0SSuyog Pawar * \param[in] num_layers : Number of ME Layers
2189*c83a76b0SSuyog Pawar * \param[in] pai4_ht : Array storing ht at each layer
2190*c83a76b0SSuyog Pawar * \param[in] pai4_wd : Array storing wd at each layer
2191*c83a76b0SSuyog Pawar * \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each
2192*c83a76b0SSuyog Pawar * entry has num vertical units in that particular layer
2193*c83a76b0SSuyog Pawar *
2194*c83a76b0SSuyog Pawar * \return
2195*c83a76b0SSuyog Pawar * None
2196*c83a76b0SSuyog Pawar *
2197*c83a76b0SSuyog Pawar * \author
2198*c83a76b0SSuyog Pawar * Ittiam
2199*c83a76b0SSuyog Pawar *
2200*c83a76b0SSuyog Pawar *****************************************************************************
2201*c83a76b0SSuyog Pawar */
ihevce_coarse_me_get_lyr_prms_dep_mngr(WORD32 num_layers,WORD32 * pai4_ht,WORD32 * pai4_wd,WORD32 * pai4_num_vert_units_in_lyr)2202*c83a76b0SSuyog Pawar void ihevce_coarse_me_get_lyr_prms_dep_mngr(
2203*c83a76b0SSuyog Pawar WORD32 num_layers, WORD32 *pai4_ht, WORD32 *pai4_wd, WORD32 *pai4_num_vert_units_in_lyr)
2204*c83a76b0SSuyog Pawar {
2205*c83a76b0SSuyog Pawar /* Height of current and next layers */
2206*c83a76b0SSuyog Pawar WORD32 ht_c, ht_n;
2207*c83a76b0SSuyog Pawar /* Blk ht at a given layer and next layer*/
2208*c83a76b0SSuyog Pawar WORD32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n;
2209*c83a76b0SSuyog Pawar /* Number of vertical units in current and next layer */
2210*c83a76b0SSuyog Pawar WORD32 num_vert_c, num_vert_n;
2211*c83a76b0SSuyog Pawar
2212*c83a76b0SSuyog Pawar WORD32 ctb_size = 64, num_enc_layers = 1, use_4x4 = 1, i;
2213*c83a76b0SSuyog Pawar UWORD8 au1_encode[MAX_NUM_LAYERS];
2214*c83a76b0SSuyog Pawar
2215*c83a76b0SSuyog Pawar memset(au1_encode, 0, num_layers);
2216*c83a76b0SSuyog Pawar memset(au1_encode, 1, num_enc_layers);
2217*c83a76b0SSuyog Pawar
2218*c83a76b0SSuyog Pawar ht_n = pai4_ht[num_layers - 2];
2219*c83a76b0SSuyog Pawar ht_c = pai4_ht[num_layers - 1];
2220*c83a76b0SSuyog Pawar
2221*c83a76b0SSuyog Pawar /* compute blk ht and unit ht for c and n */
2222*c83a76b0SSuyog Pawar if(au1_encode[num_layers - 1])
2223*c83a76b0SSuyog Pawar {
2224*c83a76b0SSuyog Pawar blk_ht_c = 16;
2225*c83a76b0SSuyog Pawar unit_ht_c = ctb_size;
2226*c83a76b0SSuyog Pawar }
2227*c83a76b0SSuyog Pawar else
2228*c83a76b0SSuyog Pawar {
2229*c83a76b0SSuyog Pawar blk_ht_c = hme_get_blk_size(use_4x4, num_layers - 1, num_layers, 0);
2230*c83a76b0SSuyog Pawar unit_ht_c = blk_ht_c;
2231*c83a76b0SSuyog Pawar }
2232*c83a76b0SSuyog Pawar
2233*c83a76b0SSuyog Pawar num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c;
2234*c83a76b0SSuyog Pawar /* For new design in Coarsest HME layer we need */
2235*c83a76b0SSuyog Pawar /* one additional row extra at the end of frame */
2236*c83a76b0SSuyog Pawar /* hence num_vert_c is incremented by 1 */
2237*c83a76b0SSuyog Pawar num_vert_c++;
2238*c83a76b0SSuyog Pawar
2239*c83a76b0SSuyog Pawar /*************************************************************************/
2240*c83a76b0SSuyog Pawar /* Run through each layer, set the number of vertical units */
2241*c83a76b0SSuyog Pawar /*************************************************************************/
2242*c83a76b0SSuyog Pawar for(i = num_layers - 1; i > 0; i--)
2243*c83a76b0SSuyog Pawar {
2244*c83a76b0SSuyog Pawar pai4_num_vert_units_in_lyr[i] = num_vert_c;
2245*c83a76b0SSuyog Pawar
2246*c83a76b0SSuyog Pawar /* "n" is computed for first time */
2247*c83a76b0SSuyog Pawar ht_n = pai4_ht[i - 1];
2248*c83a76b0SSuyog Pawar blk_ht_n = hme_get_blk_size(use_4x4, i - 1, num_layers, 0);
2249*c83a76b0SSuyog Pawar unit_ht_n = blk_ht_n;
2250*c83a76b0SSuyog Pawar if(au1_encode[i - 1])
2251*c83a76b0SSuyog Pawar unit_ht_n = ctb_size;
2252*c83a76b0SSuyog Pawar
2253*c83a76b0SSuyog Pawar num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n;
2254*c83a76b0SSuyog Pawar
2255*c83a76b0SSuyog Pawar /* Compute the blk size and vert unit size in each layer */
2256*c83a76b0SSuyog Pawar /* "c" denotes curr layer, and "n" denotes the layer to which result */
2257*c83a76b0SSuyog Pawar /* is projected to */
2258*c83a76b0SSuyog Pawar ht_c = ht_n;
2259*c83a76b0SSuyog Pawar blk_ht_c = blk_ht_n;
2260*c83a76b0SSuyog Pawar unit_ht_c = unit_ht_n;
2261*c83a76b0SSuyog Pawar num_vert_c = num_vert_n;
2262*c83a76b0SSuyog Pawar }
2263*c83a76b0SSuyog Pawar
2264*c83a76b0SSuyog Pawar /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */
2265*c83a76b0SSuyog Pawar /* set the numebr of vertical units */
2266*c83a76b0SSuyog Pawar pai4_num_vert_units_in_lyr[0] = num_vert_c;
2267*c83a76b0SSuyog Pawar }
2268*c83a76b0SSuyog Pawar
2269*c83a76b0SSuyog Pawar /**
2270*c83a76b0SSuyog Pawar ********************************************************************************
2271*c83a76b0SSuyog Pawar * @fn hme_coarse_dep_mngr_alloc_mem()
2272*c83a76b0SSuyog Pawar *
2273*c83a76b0SSuyog Pawar * @brief Requests memory for HME Dep Mngr
2274*c83a76b0SSuyog Pawar *
2275*c83a76b0SSuyog Pawar * \param[in,out] ps_mem_tab : pointer to memory descriptors table
2276*c83a76b0SSuyog Pawar * \param[in] ps_init_prms : Create time static parameters
2277*c83a76b0SSuyog Pawar * \param[in] i4_mem_space : memspace in whihc memory request should be done
2278*c83a76b0SSuyog Pawar *
2279*c83a76b0SSuyog Pawar * @return number of memtabs
2280*c83a76b0SSuyog Pawar ********************************************************************************
2281*c83a76b0SSuyog Pawar */
hme_coarse_dep_mngr_alloc_mem(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_mem_space,WORD32 i4_num_proc_thrds,WORD32 i4_resolution_id)2282*c83a76b0SSuyog Pawar WORD32 hme_coarse_dep_mngr_alloc_mem(
2283*c83a76b0SSuyog Pawar iv_mem_rec_t *ps_mem_tab,
2284*c83a76b0SSuyog Pawar ihevce_static_cfg_params_t *ps_init_prms,
2285*c83a76b0SSuyog Pawar WORD32 i4_mem_space,
2286*c83a76b0SSuyog Pawar WORD32 i4_num_proc_thrds,
2287*c83a76b0SSuyog Pawar WORD32 i4_resolution_id)
2288*c83a76b0SSuyog Pawar {
2289*c83a76b0SSuyog Pawar WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
2290*c83a76b0SSuyog Pawar WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2291*c83a76b0SSuyog Pawar WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2292*c83a76b0SSuyog Pawar WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
2293*c83a76b0SSuyog Pawar WORD32 min_cu_size;
2294*c83a76b0SSuyog Pawar
2295*c83a76b0SSuyog Pawar /* get the min cu size from config params */
2296*c83a76b0SSuyog Pawar min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2297*c83a76b0SSuyog Pawar
2298*c83a76b0SSuyog Pawar min_cu_size = 1 << min_cu_size;
2299*c83a76b0SSuyog Pawar
2300*c83a76b0SSuyog Pawar /* Get the width and heights of different decomp layers */
2301*c83a76b0SSuyog Pawar *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2302*c83a76b0SSuyog Pawar SET_CTB_ALIGN(
2303*c83a76b0SSuyog Pawar ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2304*c83a76b0SSuyog Pawar
2305*c83a76b0SSuyog Pawar *a_ht =
2306*c83a76b0SSuyog Pawar ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2307*c83a76b0SSuyog Pawar SET_CTB_ALIGN(
2308*c83a76b0SSuyog Pawar ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2309*c83a76b0SSuyog Pawar
2310*c83a76b0SSuyog Pawar n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2311*c83a76b0SSuyog Pawar ASSERT(n_tot_layers >= 3);
2312*c83a76b0SSuyog Pawar
2313*c83a76b0SSuyog Pawar /* --- Get the number of vartical units in each layer for dep. mngr -- */
2314*c83a76b0SSuyog Pawar ihevce_coarse_me_get_lyr_prms_dep_mngr(
2315*c83a76b0SSuyog Pawar n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
2316*c83a76b0SSuyog Pawar
2317*c83a76b0SSuyog Pawar /* Fill memtabs for HME layers,except for L0 layer */
2318*c83a76b0SSuyog Pawar for(i = 1; i < n_tot_layers; i++)
2319*c83a76b0SSuyog Pawar {
2320*c83a76b0SSuyog Pawar n_dep_tabs += ihevce_dmgr_get_mem_recs(
2321*c83a76b0SSuyog Pawar &ps_mem_tab[n_dep_tabs],
2322*c83a76b0SSuyog Pawar DEP_MNGR_ROW_ROW_SYNC,
2323*c83a76b0SSuyog Pawar ai4_num_vert_units_in_lyr[i],
2324*c83a76b0SSuyog Pawar 1, /* Number of Col Tiles : Not supported in PreEnc */
2325*c83a76b0SSuyog Pawar i4_num_proc_thrds,
2326*c83a76b0SSuyog Pawar i4_mem_space);
2327*c83a76b0SSuyog Pawar }
2328*c83a76b0SSuyog Pawar
2329*c83a76b0SSuyog Pawar ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc());
2330*c83a76b0SSuyog Pawar
2331*c83a76b0SSuyog Pawar return (n_dep_tabs);
2332*c83a76b0SSuyog Pawar }
2333*c83a76b0SSuyog Pawar
2334*c83a76b0SSuyog Pawar /**
2335*c83a76b0SSuyog Pawar ********************************************************************************
2336*c83a76b0SSuyog Pawar * @fn hme_coarse_dep_mngr_init()
2337*c83a76b0SSuyog Pawar *
2338*c83a76b0SSuyog Pawar * @brief Assign memory for HME Dep Mngr
2339*c83a76b0SSuyog Pawar *
2340*c83a76b0SSuyog Pawar * \param[in,out] ps_mem_tab : pointer to memory descriptors table
2341*c83a76b0SSuyog Pawar * \param[in] ps_init_prms : Create time static parameters
2342*c83a76b0SSuyog Pawar * @param[in] pv_ctxt : ME ctxt
2343*c83a76b0SSuyog Pawar * \param[in] pv_osal_handle : Osal handle
2344*c83a76b0SSuyog Pawar *
2345*c83a76b0SSuyog Pawar * @return number of memtabs
2346*c83a76b0SSuyog Pawar ********************************************************************************
2347*c83a76b0SSuyog Pawar */
hme_coarse_dep_mngr_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,void * pv_ctxt,void * pv_osal_handle,WORD32 i4_num_proc_thrds,WORD32 i4_resolution_id)2348*c83a76b0SSuyog Pawar WORD32 hme_coarse_dep_mngr_init(
2349*c83a76b0SSuyog Pawar iv_mem_rec_t *ps_mem_tab,
2350*c83a76b0SSuyog Pawar ihevce_static_cfg_params_t *ps_init_prms,
2351*c83a76b0SSuyog Pawar void *pv_ctxt,
2352*c83a76b0SSuyog Pawar void *pv_osal_handle,
2353*c83a76b0SSuyog Pawar WORD32 i4_num_proc_thrds,
2354*c83a76b0SSuyog Pawar WORD32 i4_resolution_id)
2355*c83a76b0SSuyog Pawar {
2356*c83a76b0SSuyog Pawar WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
2357*c83a76b0SSuyog Pawar WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2358*c83a76b0SSuyog Pawar WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2359*c83a76b0SSuyog Pawar WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
2360*c83a76b0SSuyog Pawar WORD32 min_cu_size;
2361*c83a76b0SSuyog Pawar
2362*c83a76b0SSuyog Pawar coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2363*c83a76b0SSuyog Pawar
2364*c83a76b0SSuyog Pawar /* get the min cu size from config params */
2365*c83a76b0SSuyog Pawar min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2366*c83a76b0SSuyog Pawar
2367*c83a76b0SSuyog Pawar min_cu_size = 1 << min_cu_size;
2368*c83a76b0SSuyog Pawar
2369*c83a76b0SSuyog Pawar /* Get the width and heights of different decomp layers */
2370*c83a76b0SSuyog Pawar *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2371*c83a76b0SSuyog Pawar SET_CTB_ALIGN(
2372*c83a76b0SSuyog Pawar ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2373*c83a76b0SSuyog Pawar *a_ht =
2374*c83a76b0SSuyog Pawar ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2375*c83a76b0SSuyog Pawar SET_CTB_ALIGN(
2376*c83a76b0SSuyog Pawar ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2377*c83a76b0SSuyog Pawar
2378*c83a76b0SSuyog Pawar n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2379*c83a76b0SSuyog Pawar ASSERT(n_tot_layers >= 3);
2380*c83a76b0SSuyog Pawar
2381*c83a76b0SSuyog Pawar /* --- Get the number of vartical units in each layer for dep. mngr -- */
2382*c83a76b0SSuyog Pawar ihevce_coarse_me_get_lyr_prms_dep_mngr(
2383*c83a76b0SSuyog Pawar n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
2384*c83a76b0SSuyog Pawar
2385*c83a76b0SSuyog Pawar /* --- HME sync Dep Mngr Mem init -- */
2386*c83a76b0SSuyog Pawar for(i = 1; i < n_tot_layers; i++)
2387*c83a76b0SSuyog Pawar {
2388*c83a76b0SSuyog Pawar WORD32 num_blks_in_row, num_blks_in_pic, blk_size_shift;
2389*c83a76b0SSuyog Pawar
2390*c83a76b0SSuyog Pawar if(i == (n_tot_layers - 1)) /* coarsest layer */
2391*c83a76b0SSuyog Pawar blk_size_shift = 2;
2392*c83a76b0SSuyog Pawar else
2393*c83a76b0SSuyog Pawar blk_size_shift = 3; /* refine layers */
2394*c83a76b0SSuyog Pawar
2395*c83a76b0SSuyog Pawar GET_NUM_BLKS_IN_PIC(a_wd[i], a_ht[i], blk_size_shift, num_blks_in_row, num_blks_in_pic);
2396*c83a76b0SSuyog Pawar
2397*c83a76b0SSuyog Pawar /* Coarsest layer : 1 block extra, since the last block */
2398*c83a76b0SSuyog Pawar if(i == (n_tot_layers - 1)) /* in a row needs East block */
2399*c83a76b0SSuyog Pawar num_blks_in_row += 1;
2400*c83a76b0SSuyog Pawar
2401*c83a76b0SSuyog Pawar /* Note : i-1, only for HME layers, L0 is separate */
2402*c83a76b0SSuyog Pawar ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1] = ihevce_dmgr_init(
2403*c83a76b0SSuyog Pawar &ps_mem_tab[n_dep_tabs],
2404*c83a76b0SSuyog Pawar pv_osal_handle,
2405*c83a76b0SSuyog Pawar DEP_MNGR_ROW_ROW_SYNC,
2406*c83a76b0SSuyog Pawar ai4_num_vert_units_in_lyr[i],
2407*c83a76b0SSuyog Pawar num_blks_in_row,
2408*c83a76b0SSuyog Pawar 1, /* Number of Col Tiles : Not supported in PreEnc */
2409*c83a76b0SSuyog Pawar i4_num_proc_thrds,
2410*c83a76b0SSuyog Pawar 1 /*Sem disabled*/
2411*c83a76b0SSuyog Pawar );
2412*c83a76b0SSuyog Pawar
2413*c83a76b0SSuyog Pawar n_dep_tabs += ihevce_dmgr_get_num_mem_recs();
2414*c83a76b0SSuyog Pawar }
2415*c83a76b0SSuyog Pawar
2416*c83a76b0SSuyog Pawar return n_dep_tabs;
2417*c83a76b0SSuyog Pawar }
2418*c83a76b0SSuyog Pawar
2419*c83a76b0SSuyog Pawar /**
2420*c83a76b0SSuyog Pawar ********************************************************************************
2421*c83a76b0SSuyog Pawar * @fn hme_coarse_dep_mngr_reg_sem()
2422*c83a76b0SSuyog Pawar *
2423*c83a76b0SSuyog Pawar * @brief Assign semaphores for HME Dep Mngr
2424*c83a76b0SSuyog Pawar *
2425*c83a76b0SSuyog Pawar * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
2426*c83a76b0SSuyog Pawar * \param[in] ppv_sem_hdls : Arry of semaphore handles
2427*c83a76b0SSuyog Pawar * \param[in] i4_num_proc_thrds : Number of processing threads
2428*c83a76b0SSuyog Pawar *
2429*c83a76b0SSuyog Pawar * @return number of memtabs
2430*c83a76b0SSuyog Pawar ********************************************************************************
2431*c83a76b0SSuyog Pawar */
hme_coarse_dep_mngr_reg_sem(void * pv_ctxt,void ** ppv_sem_hdls,WORD32 i4_num_proc_thrds)2432*c83a76b0SSuyog Pawar void hme_coarse_dep_mngr_reg_sem(void *pv_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
2433*c83a76b0SSuyog Pawar {
2434*c83a76b0SSuyog Pawar WORD32 i;
2435*c83a76b0SSuyog Pawar coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2436*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_ctxt = ps_me_ctxt->aps_me_ctxt[0];
2437*c83a76b0SSuyog Pawar
2438*c83a76b0SSuyog Pawar /* --- HME sync Dep Mngr semaphore init -- */
2439*c83a76b0SSuyog Pawar for(i = 1; i < ps_ctxt->num_layers; i++)
2440*c83a76b0SSuyog Pawar {
2441*c83a76b0SSuyog Pawar ihevce_dmgr_reg_sem_hdls(
2442*c83a76b0SSuyog Pawar ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1], ppv_sem_hdls, i4_num_proc_thrds);
2443*c83a76b0SSuyog Pawar }
2444*c83a76b0SSuyog Pawar
2445*c83a76b0SSuyog Pawar return;
2446*c83a76b0SSuyog Pawar }
2447*c83a76b0SSuyog Pawar
2448*c83a76b0SSuyog Pawar /**
2449*c83a76b0SSuyog Pawar ********************************************************************************
2450*c83a76b0SSuyog Pawar * @fn hme_coarse_dep_mngr_delete()
2451*c83a76b0SSuyog Pawar *
2452*c83a76b0SSuyog Pawar * Destroy Coarse ME Dep Mngr module
2453*c83a76b0SSuyog Pawar * Note : Only Destroys the resources allocated in the module like
2454*c83a76b0SSuyog Pawar * semaphore,etc. Memory free is done Separately using memtabs
2455*c83a76b0SSuyog Pawar *
2456*c83a76b0SSuyog Pawar * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
2457*c83a76b0SSuyog Pawar * \param[in] ps_init_prms : Create time static parameters
2458*c83a76b0SSuyog Pawar *
2459*c83a76b0SSuyog Pawar * @return none
2460*c83a76b0SSuyog Pawar ********************************************************************************
2461*c83a76b0SSuyog Pawar */
hme_coarse_dep_mngr_delete(void * pv_me_ctxt,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_resolution_id)2462*c83a76b0SSuyog Pawar void hme_coarse_dep_mngr_delete(
2463*c83a76b0SSuyog Pawar void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id)
2464*c83a76b0SSuyog Pawar {
2465*c83a76b0SSuyog Pawar WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2466*c83a76b0SSuyog Pawar WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2467*c83a76b0SSuyog Pawar WORD32 n_enc_layers = 1, n_tot_layers, i;
2468*c83a76b0SSuyog Pawar WORD32 min_cu_size;
2469*c83a76b0SSuyog Pawar
2470*c83a76b0SSuyog Pawar coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
2471*c83a76b0SSuyog Pawar
2472*c83a76b0SSuyog Pawar /* get the min cu size from config params */
2473*c83a76b0SSuyog Pawar min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2474*c83a76b0SSuyog Pawar
2475*c83a76b0SSuyog Pawar min_cu_size = 1 << min_cu_size;
2476*c83a76b0SSuyog Pawar
2477*c83a76b0SSuyog Pawar /* Get the width and heights of different decomp layers */
2478*c83a76b0SSuyog Pawar *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2479*c83a76b0SSuyog Pawar SET_CTB_ALIGN(
2480*c83a76b0SSuyog Pawar ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2481*c83a76b0SSuyog Pawar *a_ht =
2482*c83a76b0SSuyog Pawar ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2483*c83a76b0SSuyog Pawar SET_CTB_ALIGN(
2484*c83a76b0SSuyog Pawar ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2485*c83a76b0SSuyog Pawar n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2486*c83a76b0SSuyog Pawar ASSERT(n_tot_layers >= 3);
2487*c83a76b0SSuyog Pawar
2488*c83a76b0SSuyog Pawar /* --- HME sync Dep Mngr Delete -- */
2489*c83a76b0SSuyog Pawar for(i = 1; i < n_tot_layers; i++)
2490*c83a76b0SSuyog Pawar {
2491*c83a76b0SSuyog Pawar /* Note : i-1, only for HME layers, L0 is separate */
2492*c83a76b0SSuyog Pawar ihevce_dmgr_del(ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1]);
2493*c83a76b0SSuyog Pawar }
2494*c83a76b0SSuyog Pawar }
2495*c83a76b0SSuyog Pawar
2496*c83a76b0SSuyog Pawar /**
2497*c83a76b0SSuyog Pawar *******************************************************************************
2498*c83a76b0SSuyog Pawar * @fn S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2499*c83a76b0SSuyog Pawar *
2500*c83a76b0SSuyog Pawar * @brief Fills up memtabs with memory information details required by HME
2501*c83a76b0SSuyog Pawar *
2502*c83a76b0SSuyog Pawar * @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
2503*c83a76b0SSuyog Pawar * up its requirements of memory
2504*c83a76b0SSuyog Pawar *
2505*c83a76b0SSuyog Pawar * @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2506*c83a76b0SSuyog Pawar * amt of memory
2507*c83a76b0SSuyog Pawar *
2508*c83a76b0SSuyog Pawar * @return Number of memtabs required
2509*c83a76b0SSuyog Pawar *******************************************************************************
2510*c83a76b0SSuyog Pawar */
hme_enc_alloc(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,WORD32 i4_num_me_frm_pllel)2511*c83a76b0SSuyog Pawar S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, WORD32 i4_num_me_frm_pllel)
2512*c83a76b0SSuyog Pawar {
2513*c83a76b0SSuyog Pawar S32 num, tot, i;
2514*c83a76b0SSuyog Pawar
2515*c83a76b0SSuyog Pawar /* Validation of init params */
2516*c83a76b0SSuyog Pawar if(-1 == hme_validate_init_prms(ps_prms))
2517*c83a76b0SSuyog Pawar return (-1);
2518*c83a76b0SSuyog Pawar
2519*c83a76b0SSuyog Pawar num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0, i4_num_me_frm_pllel);
2520*c83a76b0SSuyog Pawar tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
2521*c83a76b0SSuyog Pawar for(i = num; i < tot; i++)
2522*c83a76b0SSuyog Pawar {
2523*c83a76b0SSuyog Pawar ps_memtabs[i].size = 4;
2524*c83a76b0SSuyog Pawar ps_memtabs[i].align = 4;
2525*c83a76b0SSuyog Pawar ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
2526*c83a76b0SSuyog Pawar }
2527*c83a76b0SSuyog Pawar return (tot);
2528*c83a76b0SSuyog Pawar }
2529*c83a76b0SSuyog Pawar
2530*c83a76b0SSuyog Pawar /**
2531*c83a76b0SSuyog Pawar *******************************************************************************
2532*c83a76b0SSuyog Pawar * @fn S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2533*c83a76b0SSuyog Pawar *
2534*c83a76b0SSuyog Pawar * @brief Fills up memtabs with memory information details required by Coarse HME
2535*c83a76b0SSuyog Pawar *
2536*c83a76b0SSuyog Pawar * @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
2537*c83a76b0SSuyog Pawar * up its requirements of memory
2538*c83a76b0SSuyog Pawar *
2539*c83a76b0SSuyog Pawar * @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2540*c83a76b0SSuyog Pawar * amt of memory
2541*c83a76b0SSuyog Pawar *
2542*c83a76b0SSuyog Pawar * @return Number of memtabs required
2543*c83a76b0SSuyog Pawar *******************************************************************************
2544*c83a76b0SSuyog Pawar */
hme_coarse_alloc(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms)2545*c83a76b0SSuyog Pawar S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2546*c83a76b0SSuyog Pawar {
2547*c83a76b0SSuyog Pawar S32 num, tot, i;
2548*c83a76b0SSuyog Pawar
2549*c83a76b0SSuyog Pawar /* Validation of init params */
2550*c83a76b0SSuyog Pawar if(-1 == hme_validate_init_prms(ps_prms))
2551*c83a76b0SSuyog Pawar return (-1);
2552*c83a76b0SSuyog Pawar
2553*c83a76b0SSuyog Pawar num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0);
2554*c83a76b0SSuyog Pawar tot = hme_coarse_num_alloc();
2555*c83a76b0SSuyog Pawar for(i = num; i < tot; i++)
2556*c83a76b0SSuyog Pawar {
2557*c83a76b0SSuyog Pawar ps_memtabs[i].size = 4;
2558*c83a76b0SSuyog Pawar ps_memtabs[i].align = 4;
2559*c83a76b0SSuyog Pawar ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
2560*c83a76b0SSuyog Pawar }
2561*c83a76b0SSuyog Pawar return (tot);
2562*c83a76b0SSuyog Pawar }
2563*c83a76b0SSuyog Pawar
2564*c83a76b0SSuyog Pawar /**
2565*c83a76b0SSuyog Pawar *******************************************************************************
2566*c83a76b0SSuyog Pawar * @fn hme_coarse_dep_mngr_alloc
2567*c83a76b0SSuyog Pawar *
2568*c83a76b0SSuyog Pawar * @brief Fills up memtabs with memory information details required by Coarse HME
2569*c83a76b0SSuyog Pawar *
2570*c83a76b0SSuyog Pawar * \param[in,out] ps_mem_tab : pointer to memory descriptors table
2571*c83a76b0SSuyog Pawar * \param[in] ps_init_prms : Create time static parameters
2572*c83a76b0SSuyog Pawar * \param[in] i4_mem_space : memspace in whihc memory request should be done
2573*c83a76b0SSuyog Pawar *
2574*c83a76b0SSuyog Pawar * @return Number of memtabs required
2575*c83a76b0SSuyog Pawar *******************************************************************************
2576*c83a76b0SSuyog Pawar */
hme_coarse_dep_mngr_alloc(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_mem_space,WORD32 i4_num_proc_thrds,WORD32 i4_resolution_id)2577*c83a76b0SSuyog Pawar WORD32 hme_coarse_dep_mngr_alloc(
2578*c83a76b0SSuyog Pawar iv_mem_rec_t *ps_mem_tab,
2579*c83a76b0SSuyog Pawar ihevce_static_cfg_params_t *ps_init_prms,
2580*c83a76b0SSuyog Pawar WORD32 i4_mem_space,
2581*c83a76b0SSuyog Pawar WORD32 i4_num_proc_thrds,
2582*c83a76b0SSuyog Pawar WORD32 i4_resolution_id)
2583*c83a76b0SSuyog Pawar {
2584*c83a76b0SSuyog Pawar S32 num, tot, i;
2585*c83a76b0SSuyog Pawar
2586*c83a76b0SSuyog Pawar num = hme_coarse_dep_mngr_alloc_mem(
2587*c83a76b0SSuyog Pawar ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id);
2588*c83a76b0SSuyog Pawar tot = hme_coarse_dep_mngr_num_alloc();
2589*c83a76b0SSuyog Pawar for(i = num; i < tot; i++)
2590*c83a76b0SSuyog Pawar {
2591*c83a76b0SSuyog Pawar ps_mem_tab[i].i4_mem_size = 4;
2592*c83a76b0SSuyog Pawar ps_mem_tab[i].i4_mem_alignment = 4;
2593*c83a76b0SSuyog Pawar ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
2594*c83a76b0SSuyog Pawar }
2595*c83a76b0SSuyog Pawar return (tot);
2596*c83a76b0SSuyog Pawar }
2597*c83a76b0SSuyog Pawar
2598*c83a76b0SSuyog Pawar /**
2599*c83a76b0SSuyog Pawar ********************************************************************************
2600*c83a76b0SSuyog Pawar * @fn hme_coarse_init_ctxt()
2601*c83a76b0SSuyog Pawar *
2602*c83a76b0SSuyog Pawar * @brief initialise context memory
2603*c83a76b0SSuyog Pawar *
2604*c83a76b0SSuyog Pawar * @param[in] ps_prms : init prms
2605*c83a76b0SSuyog Pawar *
2606*c83a76b0SSuyog Pawar * @param[in] pv_ctxt : ME ctxt
2607*c83a76b0SSuyog Pawar *
2608*c83a76b0SSuyog Pawar * @return number of memtabs
2609*c83a76b0SSuyog Pawar ********************************************************************************
2610*c83a76b0SSuyog Pawar */
hme_coarse_init_ctxt(coarse_me_master_ctxt_t * ps_master_ctxt,hme_init_prms_t * ps_prms)2611*c83a76b0SSuyog Pawar void hme_coarse_init_ctxt(coarse_me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms)
2612*c83a76b0SSuyog Pawar {
2613*c83a76b0SSuyog Pawar S32 i, j, num_thrds;
2614*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_ctxt;
2615*c83a76b0SSuyog Pawar S32 num_rows_coarse;
2616*c83a76b0SSuyog Pawar
2617*c83a76b0SSuyog Pawar /* initialise the parameters inot context of all threads */
2618*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
2619*c83a76b0SSuyog Pawar {
2620*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2621*c83a76b0SSuyog Pawar
2622*c83a76b0SSuyog Pawar /* Copy the init prms to context */
2623*c83a76b0SSuyog Pawar ps_ctxt->s_init_prms = *ps_prms;
2624*c83a76b0SSuyog Pawar
2625*c83a76b0SSuyog Pawar /* Initialize some other variables in ctxt */
2626*c83a76b0SSuyog Pawar ps_ctxt->i4_prev_poc = -1;
2627*c83a76b0SSuyog Pawar
2628*c83a76b0SSuyog Pawar ps_ctxt->num_b_frms = ps_prms->num_b_frms;
2629*c83a76b0SSuyog Pawar
2630*c83a76b0SSuyog Pawar ps_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_ctxt->au1_ref_bits_tlu_lc[0][0];
2631*c83a76b0SSuyog Pawar ps_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_ctxt->au1_ref_bits_tlu_lc[1][0];
2632*c83a76b0SSuyog Pawar
2633*c83a76b0SSuyog Pawar /* Initialize num rows lookuptable */
2634*c83a76b0SSuyog Pawar ps_ctxt->i4_num_row_bufs = ps_prms->i4_num_proc_thrds + 1;
2635*c83a76b0SSuyog Pawar num_rows_coarse = ps_ctxt->i4_num_row_bufs;
2636*c83a76b0SSuyog Pawar for(i = 0; i < ((HEVCE_MAX_HEIGHT >> 1) >> 2); i++)
2637*c83a76b0SSuyog Pawar {
2638*c83a76b0SSuyog Pawar ps_ctxt->ai4_row_index[i] = (i % num_rows_coarse);
2639*c83a76b0SSuyog Pawar }
2640*c83a76b0SSuyog Pawar }
2641*c83a76b0SSuyog Pawar
2642*c83a76b0SSuyog Pawar /* since same layer desc pointer is stored in all the threads ctxt */
2643*c83a76b0SSuyog Pawar /* layer init is done only using 0th thread ctxt */
2644*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
2645*c83a76b0SSuyog Pawar
2646*c83a76b0SSuyog Pawar /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
2647*c83a76b0SSuyog Pawar for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2648*c83a76b0SSuyog Pawar {
2649*c83a76b0SSuyog Pawar for(j = 1; j < ps_ctxt->num_layers; j++)
2650*c83a76b0SSuyog Pawar {
2651*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer;
2652*c83a76b0SSuyog Pawar ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2653*c83a76b0SSuyog Pawar ps_layer->i4_poc = -1;
2654*c83a76b0SSuyog Pawar ps_layer->ppu1_list_inp = &ps_ctxt->apu1_list_inp[j][0];
2655*c83a76b0SSuyog Pawar memset(
2656*c83a76b0SSuyog Pawar ps_layer->s_global_mv, 0, sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES);
2657*c83a76b0SSuyog Pawar }
2658*c83a76b0SSuyog Pawar }
2659*c83a76b0SSuyog Pawar }
2660*c83a76b0SSuyog Pawar
2661*c83a76b0SSuyog Pawar /**
2662*c83a76b0SSuyog Pawar ********************************************************************************
2663*c83a76b0SSuyog Pawar * @fn hme_enc_init_ctxt()
2664*c83a76b0SSuyog Pawar *
2665*c83a76b0SSuyog Pawar * @brief initialise context memory
2666*c83a76b0SSuyog Pawar *
2667*c83a76b0SSuyog Pawar * @param[in] ps_prms : init prms
2668*c83a76b0SSuyog Pawar *
2669*c83a76b0SSuyog Pawar * @param[in] pv_ctxt : ME ctxt
2670*c83a76b0SSuyog Pawar *
2671*c83a76b0SSuyog Pawar * @return number of memtabs
2672*c83a76b0SSuyog Pawar ********************************************************************************
2673*c83a76b0SSuyog Pawar */
hme_enc_init_ctxt(me_master_ctxt_t * ps_master_ctxt,hme_init_prms_t * ps_prms,rc_quant_t * ps_rc_quant_ctxt)2674*c83a76b0SSuyog Pawar void hme_enc_init_ctxt(
2675*c83a76b0SSuyog Pawar me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms, rc_quant_t *ps_rc_quant_ctxt)
2676*c83a76b0SSuyog Pawar {
2677*c83a76b0SSuyog Pawar S32 i, j, num_thrds;
2678*c83a76b0SSuyog Pawar me_ctxt_t *ps_ctxt;
2679*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_frm_ctxt;
2680*c83a76b0SSuyog Pawar
2681*c83a76b0SSuyog Pawar /* initialise the parameters in context of all threads */
2682*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
2683*c83a76b0SSuyog Pawar {
2684*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2685*c83a76b0SSuyog Pawar /* Store Tile params base into ME context */
2686*c83a76b0SSuyog Pawar ps_ctxt->pv_tile_params_base = ps_master_ctxt->pv_tile_params_base;
2687*c83a76b0SSuyog Pawar
2688*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
2689*c83a76b0SSuyog Pawar {
2690*c83a76b0SSuyog Pawar ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
2691*c83a76b0SSuyog Pawar
2692*c83a76b0SSuyog Pawar /* Copy the init prms to context */
2693*c83a76b0SSuyog Pawar ps_ctxt->s_init_prms = *ps_prms;
2694*c83a76b0SSuyog Pawar
2695*c83a76b0SSuyog Pawar /* Initialize some other variables in ctxt */
2696*c83a76b0SSuyog Pawar ps_frm_ctxt->i4_prev_poc = INVALID_POC;
2697*c83a76b0SSuyog Pawar
2698*c83a76b0SSuyog Pawar ps_frm_ctxt->log_ctb_size = ps_prms->log_ctb_size;
2699*c83a76b0SSuyog Pawar
2700*c83a76b0SSuyog Pawar ps_frm_ctxt->num_b_frms = ps_prms->num_b_frms;
2701*c83a76b0SSuyog Pawar
2702*c83a76b0SSuyog Pawar ps_frm_ctxt->i4_is_prev_frame_reference = 0;
2703*c83a76b0SSuyog Pawar
2704*c83a76b0SSuyog Pawar ps_frm_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
2705*c83a76b0SSuyog Pawar
2706*c83a76b0SSuyog Pawar /* Initialize mv grids for L0 and L1 used in final refinement layer */
2707*c83a76b0SSuyog Pawar {
2708*c83a76b0SSuyog Pawar hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[0]);
2709*c83a76b0SSuyog Pawar hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[1]);
2710*c83a76b0SSuyog Pawar hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[0]);
2711*c83a76b0SSuyog Pawar hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[1]);
2712*c83a76b0SSuyog Pawar hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[0]);
2713*c83a76b0SSuyog Pawar hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[1]);
2714*c83a76b0SSuyog Pawar }
2715*c83a76b0SSuyog Pawar
2716*c83a76b0SSuyog Pawar ps_frm_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[0][0];
2717*c83a76b0SSuyog Pawar ps_frm_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[1][0];
2718*c83a76b0SSuyog Pawar }
2719*c83a76b0SSuyog Pawar }
2720*c83a76b0SSuyog Pawar
2721*c83a76b0SSuyog Pawar /* since same layer desc pointer is stored in all the threads ctxt */
2722*c83a76b0SSuyog Pawar /* layer init is done only using 0th thread ctxt */
2723*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
2724*c83a76b0SSuyog Pawar
2725*c83a76b0SSuyog Pawar ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[0];
2726*c83a76b0SSuyog Pawar
2727*c83a76b0SSuyog Pawar /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
2728*c83a76b0SSuyog Pawar for(i = 0; i < (ps_frm_ctxt->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1; i++)
2729*c83a76b0SSuyog Pawar {
2730*c83a76b0SSuyog Pawar /* only enocde layer is processed */
2731*c83a76b0SSuyog Pawar for(j = 0; j < 1; j++)
2732*c83a76b0SSuyog Pawar {
2733*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer;
2734*c83a76b0SSuyog Pawar ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2735*c83a76b0SSuyog Pawar ps_layer->i4_poc = INVALID_POC;
2736*c83a76b0SSuyog Pawar ps_layer->i4_is_free = 1;
2737*c83a76b0SSuyog Pawar ps_layer->ppu1_list_inp = &ps_frm_ctxt->apu1_list_inp[j][0];
2738*c83a76b0SSuyog Pawar ps_layer->ppu1_list_rec_fxfy = &ps_frm_ctxt->apu1_list_rec_fxfy[j][0];
2739*c83a76b0SSuyog Pawar ps_layer->ppu1_list_rec_hxfy = &ps_frm_ctxt->apu1_list_rec_hxfy[j][0];
2740*c83a76b0SSuyog Pawar ps_layer->ppu1_list_rec_fxhy = &ps_frm_ctxt->apu1_list_rec_fxhy[j][0];
2741*c83a76b0SSuyog Pawar ps_layer->ppu1_list_rec_hxhy = &ps_frm_ctxt->apu1_list_rec_hxhy[j][0];
2742*c83a76b0SSuyog Pawar ps_layer->ppv_dep_mngr_recon = &ps_frm_ctxt->apv_list_dep_mngr[j][0];
2743*c83a76b0SSuyog Pawar
2744*c83a76b0SSuyog Pawar memset(
2745*c83a76b0SSuyog Pawar ps_layer->s_global_mv,
2746*c83a76b0SSuyog Pawar 0,
2747*c83a76b0SSuyog Pawar sizeof(hme_mv_t) * ps_frm_ctxt->max_num_ref * NUM_GMV_LOBES);
2748*c83a76b0SSuyog Pawar }
2749*c83a76b0SSuyog Pawar }
2750*c83a76b0SSuyog Pawar }
2751*c83a76b0SSuyog Pawar
2752*c83a76b0SSuyog Pawar /**
2753*c83a76b0SSuyog Pawar *******************************************************************************
2754*c83a76b0SSuyog Pawar * @fn S32 hme_enc_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms,rc_quant_t *ps_rc_quant_ctxt)
2755*c83a76b0SSuyog Pawar *
2756*c83a76b0SSuyog Pawar * @brief Initialises the Encode Layer HME ctxt
2757*c83a76b0SSuyog Pawar *
2758*c83a76b0SSuyog Pawar * @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
2759*c83a76b0SSuyog Pawar * up its requirements of memory
2760*c83a76b0SSuyog Pawar *
2761*c83a76b0SSuyog Pawar * @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2762*c83a76b0SSuyog Pawar * amt of memory
2763*c83a76b0SSuyog Pawar *
2764*c83a76b0SSuyog Pawar * @return Number of memtabs required
2765*c83a76b0SSuyog Pawar *******************************************************************************
2766*c83a76b0SSuyog Pawar */
hme_enc_init(void * pv_ctxt,hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,rc_quant_t * ps_rc_quant_ctxt,WORD32 i4_num_me_frm_pllel)2767*c83a76b0SSuyog Pawar S32 hme_enc_init(
2768*c83a76b0SSuyog Pawar void *pv_ctxt,
2769*c83a76b0SSuyog Pawar hme_memtab_t *ps_memtabs,
2770*c83a76b0SSuyog Pawar hme_init_prms_t *ps_prms,
2771*c83a76b0SSuyog Pawar rc_quant_t *ps_rc_quant_ctxt,
2772*c83a76b0SSuyog Pawar WORD32 i4_num_me_frm_pllel)
2773*c83a76b0SSuyog Pawar {
2774*c83a76b0SSuyog Pawar S32 num, tot;
2775*c83a76b0SSuyog Pawar me_master_ctxt_t *ps_ctxt = (me_master_ctxt_t *)pv_ctxt;
2776*c83a76b0SSuyog Pawar
2777*c83a76b0SSuyog Pawar tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
2778*c83a76b0SSuyog Pawar /* Validation of init params */
2779*c83a76b0SSuyog Pawar if(-1 == hme_validate_init_prms(ps_prms))
2780*c83a76b0SSuyog Pawar return (-1);
2781*c83a76b0SSuyog Pawar
2782*c83a76b0SSuyog Pawar num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1, i4_num_me_frm_pllel);
2783*c83a76b0SSuyog Pawar if(num > tot)
2784*c83a76b0SSuyog Pawar return (-1);
2785*c83a76b0SSuyog Pawar
2786*c83a76b0SSuyog Pawar /* Initialize all enumerations based globals */
2787*c83a76b0SSuyog Pawar //hme_init_globals(); /* done as part of coarse me */
2788*c83a76b0SSuyog Pawar
2789*c83a76b0SSuyog Pawar /* Copy the memtabs into the context for returning during free */
2790*c83a76b0SSuyog Pawar memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
2791*c83a76b0SSuyog Pawar
2792*c83a76b0SSuyog Pawar /* initialize the context and related buffers */
2793*c83a76b0SSuyog Pawar hme_enc_init_ctxt(ps_ctxt, ps_prms, ps_rc_quant_ctxt);
2794*c83a76b0SSuyog Pawar return (0);
2795*c83a76b0SSuyog Pawar }
2796*c83a76b0SSuyog Pawar
2797*c83a76b0SSuyog Pawar /**
2798*c83a76b0SSuyog Pawar *******************************************************************************
2799*c83a76b0SSuyog Pawar * @fn S32 hme_coarse_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2800*c83a76b0SSuyog Pawar *
2801*c83a76b0SSuyog Pawar * @brief Initialises the Coarse HME ctxt
2802*c83a76b0SSuyog Pawar *
2803*c83a76b0SSuyog Pawar * @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
2804*c83a76b0SSuyog Pawar * up its requirements of memory
2805*c83a76b0SSuyog Pawar *
2806*c83a76b0SSuyog Pawar * @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2807*c83a76b0SSuyog Pawar * amt of memory
2808*c83a76b0SSuyog Pawar *
2809*c83a76b0SSuyog Pawar * @return Number of memtabs required
2810*c83a76b0SSuyog Pawar *******************************************************************************
2811*c83a76b0SSuyog Pawar */
hme_coarse_init(void * pv_ctxt,hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms)2812*c83a76b0SSuyog Pawar S32 hme_coarse_init(void *pv_ctxt, hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2813*c83a76b0SSuyog Pawar {
2814*c83a76b0SSuyog Pawar S32 num, tot;
2815*c83a76b0SSuyog Pawar coarse_me_master_ctxt_t *ps_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2816*c83a76b0SSuyog Pawar
2817*c83a76b0SSuyog Pawar tot = hme_coarse_num_alloc();
2818*c83a76b0SSuyog Pawar /* Validation of init params */
2819*c83a76b0SSuyog Pawar if(-1 == hme_validate_init_prms(ps_prms))
2820*c83a76b0SSuyog Pawar return (-1);
2821*c83a76b0SSuyog Pawar
2822*c83a76b0SSuyog Pawar num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1);
2823*c83a76b0SSuyog Pawar if(num > tot)
2824*c83a76b0SSuyog Pawar return (-1);
2825*c83a76b0SSuyog Pawar
2826*c83a76b0SSuyog Pawar /* Initialize all enumerations based globals */
2827*c83a76b0SSuyog Pawar hme_init_globals();
2828*c83a76b0SSuyog Pawar
2829*c83a76b0SSuyog Pawar /* Copy the memtabs into the context for returning during free */
2830*c83a76b0SSuyog Pawar memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
2831*c83a76b0SSuyog Pawar
2832*c83a76b0SSuyog Pawar /* initialize the context and related buffers */
2833*c83a76b0SSuyog Pawar hme_coarse_init_ctxt(ps_ctxt, ps_prms);
2834*c83a76b0SSuyog Pawar
2835*c83a76b0SSuyog Pawar return (0);
2836*c83a76b0SSuyog Pawar }
2837*c83a76b0SSuyog Pawar
2838*c83a76b0SSuyog Pawar /**
2839*c83a76b0SSuyog Pawar *******************************************************************************
2840*c83a76b0SSuyog Pawar * @fn S32 hme_set_resolution(void *pv_me_ctxt,
2841*c83a76b0SSuyog Pawar * S32 n_enc_layers,
2842*c83a76b0SSuyog Pawar * S32 *p_wd,
2843*c83a76b0SSuyog Pawar * S32 *p_ht
2844*c83a76b0SSuyog Pawar *
2845*c83a76b0SSuyog Pawar * @brief Sets up the layers based on resolution information.
2846*c83a76b0SSuyog Pawar *
2847*c83a76b0SSuyog Pawar * @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
2848*c83a76b0SSuyog Pawar *
2849*c83a76b0SSuyog Pawar * @param[in] n_enc_layers : Number of layers encoded
2850*c83a76b0SSuyog Pawar *
2851*c83a76b0SSuyog Pawar * @param[in] p_wd : Pointer to an array having widths for each encode layer
2852*c83a76b0SSuyog Pawar *
2853*c83a76b0SSuyog Pawar * @param[in] p_ht : Pointer to an array having heights for each encode layer
2854*c83a76b0SSuyog Pawar *
2855*c83a76b0SSuyog Pawar * @return void
2856*c83a76b0SSuyog Pawar *******************************************************************************
2857*c83a76b0SSuyog Pawar */
2858*c83a76b0SSuyog Pawar
hme_set_resolution(void * pv_me_ctxt,S32 n_enc_layers,S32 * p_wd,S32 * p_ht,S32 me_frm_id)2859*c83a76b0SSuyog Pawar void hme_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 me_frm_id)
2860*c83a76b0SSuyog Pawar {
2861*c83a76b0SSuyog Pawar S32 n_tot_layers, num_layers_explicit_search, i, j;
2862*c83a76b0SSuyog Pawar me_ctxt_t *ps_thrd_ctxt;
2863*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt;
2864*c83a76b0SSuyog Pawar
2865*c83a76b0SSuyog Pawar S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
2866*c83a76b0SSuyog Pawar S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
2867*c83a76b0SSuyog Pawar memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
2868*c83a76b0SSuyog Pawar memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
2869*c83a76b0SSuyog Pawar
2870*c83a76b0SSuyog Pawar ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
2871*c83a76b0SSuyog Pawar
2872*c83a76b0SSuyog Pawar ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
2873*c83a76b0SSuyog Pawar
2874*c83a76b0SSuyog Pawar /*************************************************************************/
2875*c83a76b0SSuyog Pawar /* Derive the number of HME layers, including both encoded and non encode*/
2876*c83a76b0SSuyog Pawar /* This function also derives the width and ht of each layer. */
2877*c83a76b0SSuyog Pawar /*************************************************************************/
2878*c83a76b0SSuyog Pawar n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2879*c83a76b0SSuyog Pawar num_layers_explicit_search = ps_thrd_ctxt->s_init_prms.num_layers_explicit_search;
2880*c83a76b0SSuyog Pawar if(num_layers_explicit_search <= 0)
2881*c83a76b0SSuyog Pawar num_layers_explicit_search = n_tot_layers - 1;
2882*c83a76b0SSuyog Pawar
2883*c83a76b0SSuyog Pawar num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
2884*c83a76b0SSuyog Pawar ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
2885*c83a76b0SSuyog Pawar memset(ps_ctxt->u1_encode, 0, n_tot_layers);
2886*c83a76b0SSuyog Pawar memset(ps_ctxt->u1_encode, 1, n_enc_layers);
2887*c83a76b0SSuyog Pawar
2888*c83a76b0SSuyog Pawar /* only encode layer should be processed */
2889*c83a76b0SSuyog Pawar ps_ctxt->num_layers = n_tot_layers;
2890*c83a76b0SSuyog Pawar
2891*c83a76b0SSuyog Pawar ps_ctxt->i4_wd = a_wd[0];
2892*c83a76b0SSuyog Pawar ps_ctxt->i4_ht = a_ht[0];
2893*c83a76b0SSuyog Pawar
2894*c83a76b0SSuyog Pawar /* Memtabs : Layers * num-ref + 1 */
2895*c83a76b0SSuyog Pawar for(i = 0; i < ps_ctxt->max_num_ref + 1; i++)
2896*c83a76b0SSuyog Pawar {
2897*c83a76b0SSuyog Pawar for(j = 0; j < 1; j++)
2898*c83a76b0SSuyog Pawar {
2899*c83a76b0SSuyog Pawar S32 wd, ht;
2900*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer;
2901*c83a76b0SSuyog Pawar U08 u1_enc = ps_ctxt->u1_encode[j];
2902*c83a76b0SSuyog Pawar wd = a_wd[j];
2903*c83a76b0SSuyog Pawar ht = a_ht[j];
2904*c83a76b0SSuyog Pawar ps_layer = ps_thrd_ctxt->as_ref_descr[i].aps_layers[j];
2905*c83a76b0SSuyog Pawar hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
2906*c83a76b0SSuyog Pawar }
2907*c83a76b0SSuyog Pawar }
2908*c83a76b0SSuyog Pawar }
2909*c83a76b0SSuyog Pawar
2910*c83a76b0SSuyog Pawar /**
2911*c83a76b0SSuyog Pawar *******************************************************************************
2912*c83a76b0SSuyog Pawar * @fn S32 hme_coarse_set_resolution(void *pv_me_ctxt,
2913*c83a76b0SSuyog Pawar * S32 n_enc_layers,
2914*c83a76b0SSuyog Pawar * S32 *p_wd,
2915*c83a76b0SSuyog Pawar * S32 *p_ht
2916*c83a76b0SSuyog Pawar *
2917*c83a76b0SSuyog Pawar * @brief Sets up the layers based on resolution information.
2918*c83a76b0SSuyog Pawar *
2919*c83a76b0SSuyog Pawar * @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
2920*c83a76b0SSuyog Pawar *
2921*c83a76b0SSuyog Pawar * @param[in] n_enc_layers : Number of layers encoded
2922*c83a76b0SSuyog Pawar *
2923*c83a76b0SSuyog Pawar * @param[in] p_wd : Pointer to an array having widths for each encode layer
2924*c83a76b0SSuyog Pawar *
2925*c83a76b0SSuyog Pawar * @param[in] p_ht : Pointer to an array having heights for each encode layer
2926*c83a76b0SSuyog Pawar *
2927*c83a76b0SSuyog Pawar * @return void
2928*c83a76b0SSuyog Pawar *******************************************************************************
2929*c83a76b0SSuyog Pawar */
2930*c83a76b0SSuyog Pawar
hme_coarse_set_resolution(void * pv_me_ctxt,S32 n_enc_layers,S32 * p_wd,S32 * p_ht)2931*c83a76b0SSuyog Pawar void hme_coarse_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht)
2932*c83a76b0SSuyog Pawar {
2933*c83a76b0SSuyog Pawar S32 n_tot_layers, num_layers_explicit_search, i, j;
2934*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_ctxt;
2935*c83a76b0SSuyog Pawar S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
2936*c83a76b0SSuyog Pawar S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
2937*c83a76b0SSuyog Pawar memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
2938*c83a76b0SSuyog Pawar memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
2939*c83a76b0SSuyog Pawar
2940*c83a76b0SSuyog Pawar ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
2941*c83a76b0SSuyog Pawar /*************************************************************************/
2942*c83a76b0SSuyog Pawar /* Derive the number of HME layers, including both encoded and non encode*/
2943*c83a76b0SSuyog Pawar /* This function also derives the width and ht of each layer. */
2944*c83a76b0SSuyog Pawar /*************************************************************************/
2945*c83a76b0SSuyog Pawar n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2946*c83a76b0SSuyog Pawar num_layers_explicit_search = ps_ctxt->s_init_prms.num_layers_explicit_search;
2947*c83a76b0SSuyog Pawar if(num_layers_explicit_search <= 0)
2948*c83a76b0SSuyog Pawar num_layers_explicit_search = n_tot_layers - 1;
2949*c83a76b0SSuyog Pawar
2950*c83a76b0SSuyog Pawar num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
2951*c83a76b0SSuyog Pawar ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
2952*c83a76b0SSuyog Pawar memset(ps_ctxt->u1_encode, 0, n_tot_layers);
2953*c83a76b0SSuyog Pawar memset(ps_ctxt->u1_encode, 1, n_enc_layers);
2954*c83a76b0SSuyog Pawar
2955*c83a76b0SSuyog Pawar /* encode layer should be excluded */
2956*c83a76b0SSuyog Pawar ps_ctxt->num_layers = n_tot_layers;
2957*c83a76b0SSuyog Pawar
2958*c83a76b0SSuyog Pawar memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
2959*c83a76b0SSuyog Pawar memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
2960*c83a76b0SSuyog Pawar
2961*c83a76b0SSuyog Pawar /* Memtabs : Layers * num-ref + 1 */
2962*c83a76b0SSuyog Pawar for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2963*c83a76b0SSuyog Pawar {
2964*c83a76b0SSuyog Pawar for(j = 1; j < n_tot_layers; j++)
2965*c83a76b0SSuyog Pawar {
2966*c83a76b0SSuyog Pawar S32 wd, ht;
2967*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer;
2968*c83a76b0SSuyog Pawar U08 u1_enc = ps_ctxt->u1_encode[j];
2969*c83a76b0SSuyog Pawar wd = a_wd[j];
2970*c83a76b0SSuyog Pawar ht = a_ht[j];
2971*c83a76b0SSuyog Pawar ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2972*c83a76b0SSuyog Pawar hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
2973*c83a76b0SSuyog Pawar }
2974*c83a76b0SSuyog Pawar }
2975*c83a76b0SSuyog Pawar }
2976*c83a76b0SSuyog Pawar
hme_find_descr_idx(me_ctxt_t * ps_ctxt,S32 i4_poc,S32 i4_idr_gop_num,S32 i4_num_me_frm_pllel)2977*c83a76b0SSuyog Pawar S32 hme_find_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_poc, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
2978*c83a76b0SSuyog Pawar {
2979*c83a76b0SSuyog Pawar S32 i;
2980*c83a76b0SSuyog Pawar
2981*c83a76b0SSuyog Pawar for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
2982*c83a76b0SSuyog Pawar {
2983*c83a76b0SSuyog Pawar if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc == i4_poc &&
2984*c83a76b0SSuyog Pawar ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_idr_gop_num)
2985*c83a76b0SSuyog Pawar return i;
2986*c83a76b0SSuyog Pawar }
2987*c83a76b0SSuyog Pawar /* Should not come here */
2988*c83a76b0SSuyog Pawar ASSERT(0);
2989*c83a76b0SSuyog Pawar return (-1);
2990*c83a76b0SSuyog Pawar }
2991*c83a76b0SSuyog Pawar
hme_coarse_find_descr_idx(coarse_me_ctxt_t * ps_ctxt,S32 i4_poc)2992*c83a76b0SSuyog Pawar S32 hme_coarse_find_descr_idx(coarse_me_ctxt_t *ps_ctxt, S32 i4_poc)
2993*c83a76b0SSuyog Pawar {
2994*c83a76b0SSuyog Pawar S32 i;
2995*c83a76b0SSuyog Pawar
2996*c83a76b0SSuyog Pawar for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2997*c83a76b0SSuyog Pawar {
2998*c83a76b0SSuyog Pawar if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == i4_poc)
2999*c83a76b0SSuyog Pawar return i;
3000*c83a76b0SSuyog Pawar }
3001*c83a76b0SSuyog Pawar /* Should not come here */
3002*c83a76b0SSuyog Pawar ASSERT(0);
3003*c83a76b0SSuyog Pawar return (-1);
3004*c83a76b0SSuyog Pawar }
3005*c83a76b0SSuyog Pawar
hme_find_free_descr_idx(me_ctxt_t * ps_ctxt,S32 i4_num_me_frm_pllel)3006*c83a76b0SSuyog Pawar S32 hme_find_free_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_num_me_frm_pllel)
3007*c83a76b0SSuyog Pawar {
3008*c83a76b0SSuyog Pawar S32 i;
3009*c83a76b0SSuyog Pawar
3010*c83a76b0SSuyog Pawar for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
3011*c83a76b0SSuyog Pawar {
3012*c83a76b0SSuyog Pawar if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free == 1)
3013*c83a76b0SSuyog Pawar {
3014*c83a76b0SSuyog Pawar ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free = 0;
3015*c83a76b0SSuyog Pawar return i;
3016*c83a76b0SSuyog Pawar }
3017*c83a76b0SSuyog Pawar }
3018*c83a76b0SSuyog Pawar /* Should not come here */
3019*c83a76b0SSuyog Pawar ASSERT(0);
3020*c83a76b0SSuyog Pawar return (-1);
3021*c83a76b0SSuyog Pawar }
3022*c83a76b0SSuyog Pawar
hme_coarse_find_free_descr_idx(void * pv_ctxt)3023*c83a76b0SSuyog Pawar S32 hme_coarse_find_free_descr_idx(void *pv_ctxt)
3024*c83a76b0SSuyog Pawar {
3025*c83a76b0SSuyog Pawar S32 i;
3026*c83a76b0SSuyog Pawar
3027*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_ctxt;
3028*c83a76b0SSuyog Pawar
3029*c83a76b0SSuyog Pawar for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
3030*c83a76b0SSuyog Pawar {
3031*c83a76b0SSuyog Pawar if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == -1)
3032*c83a76b0SSuyog Pawar return i;
3033*c83a76b0SSuyog Pawar }
3034*c83a76b0SSuyog Pawar /* Should not come here */
3035*c83a76b0SSuyog Pawar ASSERT(0);
3036*c83a76b0SSuyog Pawar return (-1);
3037*c83a76b0SSuyog Pawar }
3038*c83a76b0SSuyog Pawar
hme_discard_frm(void * pv_me_ctxt,S32 * p_pocs_to_remove,S32 i4_idr_gop_num,S32 i4_num_me_frm_pllel)3039*c83a76b0SSuyog Pawar void hme_discard_frm(
3040*c83a76b0SSuyog Pawar void *pv_me_ctxt, S32 *p_pocs_to_remove, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
3041*c83a76b0SSuyog Pawar {
3042*c83a76b0SSuyog Pawar me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
3043*c83a76b0SSuyog Pawar S32 count = 0, idx, i;
3044*c83a76b0SSuyog Pawar layers_descr_t *ps_descr;
3045*c83a76b0SSuyog Pawar
3046*c83a76b0SSuyog Pawar /* Search for the id of the layer descriptor that has this poc */
3047*c83a76b0SSuyog Pawar while(p_pocs_to_remove[count] != INVALID_POC)
3048*c83a76b0SSuyog Pawar {
3049*c83a76b0SSuyog Pawar ASSERT(count == 0);
3050*c83a76b0SSuyog Pawar idx = hme_find_descr_idx(
3051*c83a76b0SSuyog Pawar ps_ctxt, p_pocs_to_remove[count], i4_idr_gop_num, i4_num_me_frm_pllel);
3052*c83a76b0SSuyog Pawar ps_descr = &ps_ctxt->as_ref_descr[idx];
3053*c83a76b0SSuyog Pawar /*********************************************************************/
3054*c83a76b0SSuyog Pawar /* Setting i4_is_free = 1 in all layers invalidates this layer ctxt */
3055*c83a76b0SSuyog Pawar /* Now this can be used for a fresh picture. */
3056*c83a76b0SSuyog Pawar /*********************************************************************/
3057*c83a76b0SSuyog Pawar for(i = 0; i < 1; i++)
3058*c83a76b0SSuyog Pawar {
3059*c83a76b0SSuyog Pawar ps_descr->aps_layers[i]->i4_is_free = 1;
3060*c83a76b0SSuyog Pawar }
3061*c83a76b0SSuyog Pawar count++;
3062*c83a76b0SSuyog Pawar }
3063*c83a76b0SSuyog Pawar }
3064*c83a76b0SSuyog Pawar
hme_coarse_discard_frm(void * pv_me_ctxt,S32 * p_pocs_to_remove)3065*c83a76b0SSuyog Pawar void hme_coarse_discard_frm(void *pv_me_ctxt, S32 *p_pocs_to_remove)
3066*c83a76b0SSuyog Pawar {
3067*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
3068*c83a76b0SSuyog Pawar S32 count = 0, idx, i;
3069*c83a76b0SSuyog Pawar layers_descr_t *ps_descr;
3070*c83a76b0SSuyog Pawar
3071*c83a76b0SSuyog Pawar /* Search for the id of the layer descriptor that has this poc */
3072*c83a76b0SSuyog Pawar while(p_pocs_to_remove[count] != -1)
3073*c83a76b0SSuyog Pawar {
3074*c83a76b0SSuyog Pawar idx = hme_coarse_find_descr_idx(ps_ctxt, p_pocs_to_remove[count]);
3075*c83a76b0SSuyog Pawar ps_descr = &ps_ctxt->as_ref_descr[idx];
3076*c83a76b0SSuyog Pawar /*********************************************************************/
3077*c83a76b0SSuyog Pawar /* Setting poc = -1 in all layers invalidates this layer ctxt */
3078*c83a76b0SSuyog Pawar /* Now this can be used for a fresh picture. */
3079*c83a76b0SSuyog Pawar /*********************************************************************/
3080*c83a76b0SSuyog Pawar for(i = 1; i < ps_ctxt->num_layers; i++)
3081*c83a76b0SSuyog Pawar {
3082*c83a76b0SSuyog Pawar ps_descr->aps_layers[i]->i4_poc = -1;
3083*c83a76b0SSuyog Pawar }
3084*c83a76b0SSuyog Pawar count++;
3085*c83a76b0SSuyog Pawar }
3086*c83a76b0SSuyog Pawar }
3087*c83a76b0SSuyog Pawar
hme_update_layer_desc(layers_descr_t * ps_layers_desc,hme_ref_desc_t * ps_ref_desc,S32 start_lyr_id,S32 num_layers,layers_descr_t * ps_curr_desc)3088*c83a76b0SSuyog Pawar void hme_update_layer_desc(
3089*c83a76b0SSuyog Pawar layers_descr_t *ps_layers_desc,
3090*c83a76b0SSuyog Pawar hme_ref_desc_t *ps_ref_desc,
3091*c83a76b0SSuyog Pawar S32 start_lyr_id,
3092*c83a76b0SSuyog Pawar S32 num_layers,
3093*c83a76b0SSuyog Pawar layers_descr_t *ps_curr_desc)
3094*c83a76b0SSuyog Pawar {
3095*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer_ctxt, *ps_curr_layer;
3096*c83a76b0SSuyog Pawar S32 i;
3097*c83a76b0SSuyog Pawar for(i = start_lyr_id; i < num_layers; i++)
3098*c83a76b0SSuyog Pawar {
3099*c83a76b0SSuyog Pawar ps_layer_ctxt = ps_layers_desc->aps_layers[i];
3100*c83a76b0SSuyog Pawar ps_curr_layer = ps_curr_desc->aps_layers[i];
3101*c83a76b0SSuyog Pawar
3102*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_poc = ps_ref_desc->i4_poc;
3103*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_idr_gop_num = ps_ref_desc->i4_GOP_num;
3104*c83a76b0SSuyog Pawar
3105*c83a76b0SSuyog Pawar /* Copy the recon planes for the given reference pic at given layer */
3106*c83a76b0SSuyog Pawar ps_layer_ctxt->pu1_rec_fxfy = ps_ref_desc->as_ref_info[i].pu1_rec_fxfy;
3107*c83a76b0SSuyog Pawar ps_layer_ctxt->pu1_rec_hxfy = ps_ref_desc->as_ref_info[i].pu1_rec_hxfy;
3108*c83a76b0SSuyog Pawar ps_layer_ctxt->pu1_rec_fxhy = ps_ref_desc->as_ref_info[i].pu1_rec_fxhy;
3109*c83a76b0SSuyog Pawar ps_layer_ctxt->pu1_rec_hxhy = ps_ref_desc->as_ref_info[i].pu1_rec_hxhy;
3110*c83a76b0SSuyog Pawar
3111*c83a76b0SSuyog Pawar /*********************************************************************/
3112*c83a76b0SSuyog Pawar /* reconstruction strides, offsets and padding info are copied for */
3113*c83a76b0SSuyog Pawar /* this reference pic. It is assumed that these will be same across */
3114*c83a76b0SSuyog Pawar /* pics, so even the current pic has this info updated, though the */
3115*c83a76b0SSuyog Pawar /* current pic still does not have valid recon pointers. */
3116*c83a76b0SSuyog Pawar /*********************************************************************/
3117*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
3118*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_rec_offset = ps_ref_desc->as_ref_info[i].luma_offset;
3119*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
3120*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
3121*c83a76b0SSuyog Pawar
3122*c83a76b0SSuyog Pawar ps_curr_layer->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
3123*c83a76b0SSuyog Pawar ps_curr_layer->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
3124*c83a76b0SSuyog Pawar ps_curr_layer->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
3125*c83a76b0SSuyog Pawar }
3126*c83a76b0SSuyog Pawar }
3127*c83a76b0SSuyog Pawar
hme_add_inp(void * pv_me_ctxt,hme_inp_desc_t * ps_inp_desc,S32 me_frm_id,S32 i4_thrd_id)3128*c83a76b0SSuyog Pawar void hme_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, S32 me_frm_id, S32 i4_thrd_id)
3129*c83a76b0SSuyog Pawar {
3130*c83a76b0SSuyog Pawar layers_descr_t *ps_desc;
3131*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer_ctxt;
3132*c83a76b0SSuyog Pawar me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
3133*c83a76b0SSuyog Pawar me_ctxt_t *ps_thrd_ctxt;
3134*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt;
3135*c83a76b0SSuyog Pawar
3136*c83a76b0SSuyog Pawar hme_inp_buf_attr_t *ps_attr;
3137*c83a76b0SSuyog Pawar S32 i4_poc, idx, i, i4_prev_poc;
3138*c83a76b0SSuyog Pawar S32 num_thrds, prev_me_frm_id;
3139*c83a76b0SSuyog Pawar S32 i4_idr_gop_num, i4_is_reference;
3140*c83a76b0SSuyog Pawar
3141*c83a76b0SSuyog Pawar /* since same layer desc pointer is stored in all thread ctxt */
3142*c83a76b0SSuyog Pawar /* a free idx is obtained using 0th thread ctxt pointer */
3143*c83a76b0SSuyog Pawar
3144*c83a76b0SSuyog Pawar ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
3145*c83a76b0SSuyog Pawar
3146*c83a76b0SSuyog Pawar ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
3147*c83a76b0SSuyog Pawar
3148*c83a76b0SSuyog Pawar /* Deriving the previous poc from previous frames context */
3149*c83a76b0SSuyog Pawar if(me_frm_id == 0)
3150*c83a76b0SSuyog Pawar prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1);
3151*c83a76b0SSuyog Pawar else
3152*c83a76b0SSuyog Pawar prev_me_frm_id = me_frm_id - 1;
3153*c83a76b0SSuyog Pawar
3154*c83a76b0SSuyog Pawar i4_prev_poc = ps_thrd_ctxt->aps_me_frm_prms[prev_me_frm_id]->i4_curr_poc;
3155*c83a76b0SSuyog Pawar
3156*c83a76b0SSuyog Pawar /* Obtain an empty layer descriptor */
3157*c83a76b0SSuyog Pawar idx = hme_find_free_descr_idx(ps_thrd_ctxt, ps_master_ctxt->i4_num_me_frm_pllel);
3158*c83a76b0SSuyog Pawar ps_desc = &ps_thrd_ctxt->as_ref_descr[idx];
3159*c83a76b0SSuyog Pawar
3160*c83a76b0SSuyog Pawar /* initialise the parameters for all the threads */
3161*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
3162*c83a76b0SSuyog Pawar {
3163*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_tmp_frm_ctxt;
3164*c83a76b0SSuyog Pawar
3165*c83a76b0SSuyog Pawar ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
3166*c83a76b0SSuyog Pawar ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
3167*c83a76b0SSuyog Pawar
3168*c83a76b0SSuyog Pawar ps_tmp_frm_ctxt->ps_curr_descr = &ps_thrd_ctxt->as_ref_descr[idx];
3169*c83a76b0SSuyog Pawar
3170*c83a76b0SSuyog Pawar /* Do the initialization for the first thread alone */
3171*c83a76b0SSuyog Pawar i4_poc = ps_inp_desc->i4_poc;
3172*c83a76b0SSuyog Pawar i4_idr_gop_num = ps_inp_desc->i4_idr_gop_num;
3173*c83a76b0SSuyog Pawar i4_is_reference = ps_inp_desc->i4_is_reference;
3174*c83a76b0SSuyog Pawar /*Update poc id of previously encoded frm and curr frm */
3175*c83a76b0SSuyog Pawar ps_tmp_frm_ctxt->i4_prev_poc = i4_prev_poc;
3176*c83a76b0SSuyog Pawar ps_tmp_frm_ctxt->i4_curr_poc = i4_poc;
3177*c83a76b0SSuyog Pawar }
3178*c83a76b0SSuyog Pawar
3179*c83a76b0SSuyog Pawar /* since same layer desc pointer is stored in all thread ctxt */
3180*c83a76b0SSuyog Pawar /* following processing is done using 0th thread ctxt pointer */
3181*c83a76b0SSuyog Pawar ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3182*c83a76b0SSuyog Pawar
3183*c83a76b0SSuyog Pawar /* only encode layer */
3184*c83a76b0SSuyog Pawar for(i = 0; i < 1; i++)
3185*c83a76b0SSuyog Pawar {
3186*c83a76b0SSuyog Pawar ps_layer_ctxt = ps_desc->aps_layers[i];
3187*c83a76b0SSuyog Pawar ps_attr = &ps_inp_desc->s_layer_desc[i];
3188*c83a76b0SSuyog Pawar
3189*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_poc = i4_poc;
3190*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_idr_gop_num = i4_idr_gop_num;
3191*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_is_reference = i4_is_reference;
3192*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_non_ref_free = 0;
3193*c83a76b0SSuyog Pawar
3194*c83a76b0SSuyog Pawar /* If this layer is encoded, copy input attributes */
3195*c83a76b0SSuyog Pawar if(ps_ctxt->u1_encode[i])
3196*c83a76b0SSuyog Pawar {
3197*c83a76b0SSuyog Pawar ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
3198*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
3199*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_pad_x_inp = 0;
3200*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_pad_y_inp = 0;
3201*c83a76b0SSuyog Pawar }
3202*c83a76b0SSuyog Pawar else
3203*c83a76b0SSuyog Pawar {
3204*c83a76b0SSuyog Pawar /* If not encoded, then ME owns the buffer.*/
3205*c83a76b0SSuyog Pawar S32 wd, dst_stride;
3206*c83a76b0SSuyog Pawar
3207*c83a76b0SSuyog Pawar ASSERT(i != 0);
3208*c83a76b0SSuyog Pawar
3209*c83a76b0SSuyog Pawar wd = ps_ctxt->i4_wd;
3210*c83a76b0SSuyog Pawar
3211*c83a76b0SSuyog Pawar /* destination has padding on either side of 16 */
3212*c83a76b0SSuyog Pawar dst_stride = CEIL16((wd >> 1)) + 32 + 4;
3213*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_inp_stride = dst_stride;
3214*c83a76b0SSuyog Pawar }
3215*c83a76b0SSuyog Pawar }
3216*c83a76b0SSuyog Pawar
3217*c83a76b0SSuyog Pawar return;
3218*c83a76b0SSuyog Pawar }
3219*c83a76b0SSuyog Pawar
hme_coarse_add_inp(void * pv_me_ctxt,hme_inp_desc_t * ps_inp_desc,WORD32 i4_curr_idx)3220*c83a76b0SSuyog Pawar void hme_coarse_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, WORD32 i4_curr_idx)
3221*c83a76b0SSuyog Pawar {
3222*c83a76b0SSuyog Pawar layers_descr_t *ps_desc;
3223*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer_ctxt;
3224*c83a76b0SSuyog Pawar coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
3225*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_ctxt;
3226*c83a76b0SSuyog Pawar hme_inp_buf_attr_t *ps_attr;
3227*c83a76b0SSuyog Pawar S32 i4_poc, i;
3228*c83a76b0SSuyog Pawar S32 num_thrds;
3229*c83a76b0SSuyog Pawar
3230*c83a76b0SSuyog Pawar /* since same layer desc pointer is stored in all thread ctxt */
3231*c83a76b0SSuyog Pawar /* a free idx is obtained using 0th thread ctxt pointer */
3232*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3233*c83a76b0SSuyog Pawar
3234*c83a76b0SSuyog Pawar ps_desc = &ps_ctxt->as_ref_descr[i4_curr_idx];
3235*c83a76b0SSuyog Pawar
3236*c83a76b0SSuyog Pawar /* initialise the parameters for all the threads */
3237*c83a76b0SSuyog Pawar for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
3238*c83a76b0SSuyog Pawar {
3239*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
3240*c83a76b0SSuyog Pawar ps_ctxt->ps_curr_descr = &ps_ctxt->as_ref_descr[i4_curr_idx];
3241*c83a76b0SSuyog Pawar i4_poc = ps_inp_desc->i4_poc;
3242*c83a76b0SSuyog Pawar
3243*c83a76b0SSuyog Pawar /*Update poc id of previously encoded frm and curr frm */
3244*c83a76b0SSuyog Pawar ps_ctxt->i4_prev_poc = ps_ctxt->i4_curr_poc;
3245*c83a76b0SSuyog Pawar ps_ctxt->i4_curr_poc = i4_poc;
3246*c83a76b0SSuyog Pawar }
3247*c83a76b0SSuyog Pawar
3248*c83a76b0SSuyog Pawar /* since same layer desc pointer is stored in all thread ctxt */
3249*c83a76b0SSuyog Pawar /* following processing is done using 0th thread ctxt pointer */
3250*c83a76b0SSuyog Pawar ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3251*c83a76b0SSuyog Pawar
3252*c83a76b0SSuyog Pawar /* only non encode layer */
3253*c83a76b0SSuyog Pawar for(i = 1; i < ps_ctxt->num_layers; i++)
3254*c83a76b0SSuyog Pawar {
3255*c83a76b0SSuyog Pawar ps_layer_ctxt = ps_desc->aps_layers[i];
3256*c83a76b0SSuyog Pawar ps_attr = &ps_inp_desc->s_layer_desc[i];
3257*c83a76b0SSuyog Pawar
3258*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_poc = i4_poc;
3259*c83a76b0SSuyog Pawar /* If this layer is encoded, copy input attributes */
3260*c83a76b0SSuyog Pawar if(ps_ctxt->u1_encode[i])
3261*c83a76b0SSuyog Pawar {
3262*c83a76b0SSuyog Pawar ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
3263*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
3264*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_pad_x_inp = 0;
3265*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_pad_y_inp = 0;
3266*c83a76b0SSuyog Pawar }
3267*c83a76b0SSuyog Pawar else
3268*c83a76b0SSuyog Pawar {
3269*c83a76b0SSuyog Pawar /* If not encoded, then ME owns the buffer. */
3270*c83a76b0SSuyog Pawar /* decomp of lower layers happens on a seperate pass */
3271*c83a76b0SSuyog Pawar /* Coarse Me should export the pointers to the caller */
3272*c83a76b0SSuyog Pawar S32 wd, dst_stride;
3273*c83a76b0SSuyog Pawar
3274*c83a76b0SSuyog Pawar ASSERT(i != 0);
3275*c83a76b0SSuyog Pawar
3276*c83a76b0SSuyog Pawar wd = ps_ctxt->a_wd[i - 1];
3277*c83a76b0SSuyog Pawar
3278*c83a76b0SSuyog Pawar /* destination has padding on either side of 16 */
3279*c83a76b0SSuyog Pawar dst_stride = CEIL16((wd >> 1)) + 32 + 4;
3280*c83a76b0SSuyog Pawar ps_layer_ctxt->i4_inp_stride = dst_stride;
3281*c83a76b0SSuyog Pawar }
3282*c83a76b0SSuyog Pawar }
3283*c83a76b0SSuyog Pawar }
3284*c83a76b0SSuyog Pawar
hme_determine_num_results_per_part(U08 u1_layer_id,U08 u1_num_layers,ME_QUALITY_PRESETS_T e_quality_preset)3285*c83a76b0SSuyog Pawar static __inline U08 hme_determine_num_results_per_part(
3286*c83a76b0SSuyog Pawar U08 u1_layer_id, U08 u1_num_layers, ME_QUALITY_PRESETS_T e_quality_preset)
3287*c83a76b0SSuyog Pawar {
3288*c83a76b0SSuyog Pawar U08 u1_num_results_per_part = MAX_RESULTS_PER_PART;
3289*c83a76b0SSuyog Pawar
3290*c83a76b0SSuyog Pawar if((u1_layer_id == 0) && !!RESTRICT_NUM_PARTITION_LEVEL_L0ME_RESULTS_TO_1)
3291*c83a76b0SSuyog Pawar {
3292*c83a76b0SSuyog Pawar switch(e_quality_preset)
3293*c83a76b0SSuyog Pawar {
3294*c83a76b0SSuyog Pawar case ME_XTREME_SPEED_25:
3295*c83a76b0SSuyog Pawar case ME_XTREME_SPEED:
3296*c83a76b0SSuyog Pawar case ME_HIGH_SPEED:
3297*c83a76b0SSuyog Pawar case ME_MEDIUM_SPEED:
3298*c83a76b0SSuyog Pawar case ME_HIGH_QUALITY:
3299*c83a76b0SSuyog Pawar case ME_PRISTINE_QUALITY:
3300*c83a76b0SSuyog Pawar {
3301*c83a76b0SSuyog Pawar u1_num_results_per_part = 1;
3302*c83a76b0SSuyog Pawar
3303*c83a76b0SSuyog Pawar break;
3304*c83a76b0SSuyog Pawar }
3305*c83a76b0SSuyog Pawar default:
3306*c83a76b0SSuyog Pawar {
3307*c83a76b0SSuyog Pawar u1_num_results_per_part = MAX_RESULTS_PER_PART;
3308*c83a76b0SSuyog Pawar
3309*c83a76b0SSuyog Pawar break;
3310*c83a76b0SSuyog Pawar }
3311*c83a76b0SSuyog Pawar }
3312*c83a76b0SSuyog Pawar }
3313*c83a76b0SSuyog Pawar else if((u1_layer_id == 1) && !!RESTRICT_NUM_PARTITION_LEVEL_L1ME_RESULTS_TO_1)
3314*c83a76b0SSuyog Pawar {
3315*c83a76b0SSuyog Pawar switch(e_quality_preset)
3316*c83a76b0SSuyog Pawar {
3317*c83a76b0SSuyog Pawar case ME_XTREME_SPEED_25:
3318*c83a76b0SSuyog Pawar case ME_HIGH_QUALITY:
3319*c83a76b0SSuyog Pawar case ME_PRISTINE_QUALITY:
3320*c83a76b0SSuyog Pawar {
3321*c83a76b0SSuyog Pawar u1_num_results_per_part = 1;
3322*c83a76b0SSuyog Pawar
3323*c83a76b0SSuyog Pawar break;
3324*c83a76b0SSuyog Pawar }
3325*c83a76b0SSuyog Pawar default:
3326*c83a76b0SSuyog Pawar {
3327*c83a76b0SSuyog Pawar u1_num_results_per_part = MAX_RESULTS_PER_PART;
3328*c83a76b0SSuyog Pawar
3329*c83a76b0SSuyog Pawar break;
3330*c83a76b0SSuyog Pawar }
3331*c83a76b0SSuyog Pawar }
3332*c83a76b0SSuyog Pawar }
3333*c83a76b0SSuyog Pawar else if((u1_layer_id == 2) && (u1_num_layers > 3) && !!RESTRICT_NUM_PARTITION_LEVEL_L2ME_RESULTS_TO_1)
3334*c83a76b0SSuyog Pawar {
3335*c83a76b0SSuyog Pawar switch(e_quality_preset)
3336*c83a76b0SSuyog Pawar {
3337*c83a76b0SSuyog Pawar case ME_XTREME_SPEED_25:
3338*c83a76b0SSuyog Pawar case ME_XTREME_SPEED:
3339*c83a76b0SSuyog Pawar case ME_HIGH_SPEED:
3340*c83a76b0SSuyog Pawar case ME_MEDIUM_SPEED:
3341*c83a76b0SSuyog Pawar {
3342*c83a76b0SSuyog Pawar u1_num_results_per_part = 1;
3343*c83a76b0SSuyog Pawar
3344*c83a76b0SSuyog Pawar break;
3345*c83a76b0SSuyog Pawar }
3346*c83a76b0SSuyog Pawar default:
3347*c83a76b0SSuyog Pawar {
3348*c83a76b0SSuyog Pawar u1_num_results_per_part = MAX_RESULTS_PER_PART;
3349*c83a76b0SSuyog Pawar
3350*c83a76b0SSuyog Pawar break;
3351*c83a76b0SSuyog Pawar }
3352*c83a76b0SSuyog Pawar }
3353*c83a76b0SSuyog Pawar }
3354*c83a76b0SSuyog Pawar
3355*c83a76b0SSuyog Pawar return u1_num_results_per_part;
3356*c83a76b0SSuyog Pawar }
3357*c83a76b0SSuyog Pawar
hme_max_search_cands_per_search_cand_loc_populator(hme_frm_prms_t * ps_frm_prms,U08 * pu1_num_fpel_search_cands,U08 u1_layer_id,ME_QUALITY_PRESETS_T e_quality_preset)3358*c83a76b0SSuyog Pawar static __inline void hme_max_search_cands_per_search_cand_loc_populator(
3359*c83a76b0SSuyog Pawar hme_frm_prms_t *ps_frm_prms,
3360*c83a76b0SSuyog Pawar U08 *pu1_num_fpel_search_cands,
3361*c83a76b0SSuyog Pawar U08 u1_layer_id,
3362*c83a76b0SSuyog Pawar ME_QUALITY_PRESETS_T e_quality_preset)
3363*c83a76b0SSuyog Pawar {
3364*c83a76b0SSuyog Pawar if(0 == u1_layer_id)
3365*c83a76b0SSuyog Pawar {
3366*c83a76b0SSuyog Pawar S32 i;
3367*c83a76b0SSuyog Pawar
3368*c83a76b0SSuyog Pawar for(i = 0; i < NUM_SEARCH_CAND_LOCATIONS; i++)
3369*c83a76b0SSuyog Pawar {
3370*c83a76b0SSuyog Pawar switch(e_quality_preset)
3371*c83a76b0SSuyog Pawar {
3372*c83a76b0SSuyog Pawar #if RESTRICT_NUM_SEARCH_CANDS_PER_SEARCH_CAND_LOC
3373*c83a76b0SSuyog Pawar case ME_XTREME_SPEED_25:
3374*c83a76b0SSuyog Pawar case ME_XTREME_SPEED:
3375*c83a76b0SSuyog Pawar case ME_HIGH_SPEED:
3376*c83a76b0SSuyog Pawar case ME_MEDIUM_SPEED:
3377*c83a76b0SSuyog Pawar {
3378*c83a76b0SSuyog Pawar pu1_num_fpel_search_cands[i] = 1;
3379*c83a76b0SSuyog Pawar
3380*c83a76b0SSuyog Pawar break;
3381*c83a76b0SSuyog Pawar }
3382*c83a76b0SSuyog Pawar #endif
3383*c83a76b0SSuyog Pawar default:
3384*c83a76b0SSuyog Pawar {
3385*c83a76b0SSuyog Pawar pu1_num_fpel_search_cands[i] =
3386*c83a76b0SSuyog Pawar MAX(2,
3387*c83a76b0SSuyog Pawar MAX(ps_frm_prms->u1_num_active_ref_l0, ps_frm_prms->u1_num_active_ref_l1) *
3388*c83a76b0SSuyog Pawar ((COLOCATED == (SEARCH_CAND_LOCATIONS_T)i) + 1));
3389*c83a76b0SSuyog Pawar
3390*c83a76b0SSuyog Pawar break;
3391*c83a76b0SSuyog Pawar }
3392*c83a76b0SSuyog Pawar }
3393*c83a76b0SSuyog Pawar }
3394*c83a76b0SSuyog Pawar }
3395*c83a76b0SSuyog Pawar }
3396*c83a76b0SSuyog Pawar
3397*c83a76b0SSuyog Pawar static __inline U08
hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id,ME_QUALITY_PRESETS_T e_quality_preset)3398*c83a76b0SSuyog Pawar hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
3399*c83a76b0SSuyog Pawar {
3400*c83a76b0SSuyog Pawar U08 u1_num_cands = 2;
3401*c83a76b0SSuyog Pawar
3402*c83a76b0SSuyog Pawar if((u1_layer_id == 0) && !!RESTRICT_NUM_2NX2N_TU_RECUR_CANDS)
3403*c83a76b0SSuyog Pawar {
3404*c83a76b0SSuyog Pawar switch(e_quality_preset)
3405*c83a76b0SSuyog Pawar {
3406*c83a76b0SSuyog Pawar case ME_XTREME_SPEED_25:
3407*c83a76b0SSuyog Pawar case ME_XTREME_SPEED:
3408*c83a76b0SSuyog Pawar case ME_HIGH_SPEED:
3409*c83a76b0SSuyog Pawar case ME_MEDIUM_SPEED:
3410*c83a76b0SSuyog Pawar {
3411*c83a76b0SSuyog Pawar u1_num_cands = 1;
3412*c83a76b0SSuyog Pawar
3413*c83a76b0SSuyog Pawar break;
3414*c83a76b0SSuyog Pawar }
3415*c83a76b0SSuyog Pawar default:
3416*c83a76b0SSuyog Pawar {
3417*c83a76b0SSuyog Pawar u1_num_cands = 2;
3418*c83a76b0SSuyog Pawar
3419*c83a76b0SSuyog Pawar break;
3420*c83a76b0SSuyog Pawar }
3421*c83a76b0SSuyog Pawar }
3422*c83a76b0SSuyog Pawar }
3423*c83a76b0SSuyog Pawar
3424*c83a76b0SSuyog Pawar return u1_num_cands;
3425*c83a76b0SSuyog Pawar }
3426*c83a76b0SSuyog Pawar
3427*c83a76b0SSuyog Pawar static __inline U08
hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id,ME_QUALITY_PRESETS_T e_quality_preset)3428*c83a76b0SSuyog Pawar hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
3429*c83a76b0SSuyog Pawar {
3430*c83a76b0SSuyog Pawar U08 i;
3431*c83a76b0SSuyog Pawar
3432*c83a76b0SSuyog Pawar U08 u1_num_centers = 0;
3433*c83a76b0SSuyog Pawar
3434*c83a76b0SSuyog Pawar if(0 == u1_layer_id)
3435*c83a76b0SSuyog Pawar {
3436*c83a76b0SSuyog Pawar switch(e_quality_preset)
3437*c83a76b0SSuyog Pawar {
3438*c83a76b0SSuyog Pawar case ME_XTREME_SPEED_25:
3439*c83a76b0SSuyog Pawar {
3440*c83a76b0SSuyog Pawar for(i = 0; i < TOT_NUM_PARTS; i++)
3441*c83a76b0SSuyog Pawar {
3442*c83a76b0SSuyog Pawar u1_num_centers += gau1_num_best_results_XS25[i];
3443*c83a76b0SSuyog Pawar }
3444*c83a76b0SSuyog Pawar
3445*c83a76b0SSuyog Pawar break;
3446*c83a76b0SSuyog Pawar }
3447*c83a76b0SSuyog Pawar case ME_XTREME_SPEED:
3448*c83a76b0SSuyog Pawar {
3449*c83a76b0SSuyog Pawar for(i = 0; i < TOT_NUM_PARTS; i++)
3450*c83a76b0SSuyog Pawar {
3451*c83a76b0SSuyog Pawar u1_num_centers += gau1_num_best_results_XS[i];
3452*c83a76b0SSuyog Pawar }
3453*c83a76b0SSuyog Pawar
3454*c83a76b0SSuyog Pawar break;
3455*c83a76b0SSuyog Pawar }
3456*c83a76b0SSuyog Pawar case ME_HIGH_SPEED:
3457*c83a76b0SSuyog Pawar {
3458*c83a76b0SSuyog Pawar for(i = 0; i < TOT_NUM_PARTS; i++)
3459*c83a76b0SSuyog Pawar {
3460*c83a76b0SSuyog Pawar u1_num_centers += gau1_num_best_results_HS[i];
3461*c83a76b0SSuyog Pawar }
3462*c83a76b0SSuyog Pawar
3463*c83a76b0SSuyog Pawar break;
3464*c83a76b0SSuyog Pawar }
3465*c83a76b0SSuyog Pawar case ME_MEDIUM_SPEED:
3466*c83a76b0SSuyog Pawar {
3467*c83a76b0SSuyog Pawar for(i = 0; i < TOT_NUM_PARTS; i++)
3468*c83a76b0SSuyog Pawar {
3469*c83a76b0SSuyog Pawar u1_num_centers += gau1_num_best_results_MS[i];
3470*c83a76b0SSuyog Pawar }
3471*c83a76b0SSuyog Pawar
3472*c83a76b0SSuyog Pawar break;
3473*c83a76b0SSuyog Pawar }
3474*c83a76b0SSuyog Pawar case ME_HIGH_QUALITY:
3475*c83a76b0SSuyog Pawar {
3476*c83a76b0SSuyog Pawar for(i = 0; i < TOT_NUM_PARTS; i++)
3477*c83a76b0SSuyog Pawar {
3478*c83a76b0SSuyog Pawar u1_num_centers += gau1_num_best_results_HQ[i];
3479*c83a76b0SSuyog Pawar }
3480*c83a76b0SSuyog Pawar
3481*c83a76b0SSuyog Pawar break;
3482*c83a76b0SSuyog Pawar }
3483*c83a76b0SSuyog Pawar case ME_PRISTINE_QUALITY:
3484*c83a76b0SSuyog Pawar {
3485*c83a76b0SSuyog Pawar for(i = 0; i < TOT_NUM_PARTS; i++)
3486*c83a76b0SSuyog Pawar {
3487*c83a76b0SSuyog Pawar u1_num_centers += gau1_num_best_results_PQ[i];
3488*c83a76b0SSuyog Pawar }
3489*c83a76b0SSuyog Pawar
3490*c83a76b0SSuyog Pawar break;
3491*c83a76b0SSuyog Pawar }
3492*c83a76b0SSuyog Pawar }
3493*c83a76b0SSuyog Pawar }
3494*c83a76b0SSuyog Pawar
3495*c83a76b0SSuyog Pawar return u1_num_centers;
3496*c83a76b0SSuyog Pawar }
3497*c83a76b0SSuyog Pawar
hme_determine_max_num_subpel_refine_centers(U08 u1_layer_id,U08 u1_max_2Nx2N_subpel_cands,U08 u1_max_NxN_subpel_cands)3498*c83a76b0SSuyog Pawar static __inline U08 hme_determine_max_num_subpel_refine_centers(
3499*c83a76b0SSuyog Pawar U08 u1_layer_id, U08 u1_max_2Nx2N_subpel_cands, U08 u1_max_NxN_subpel_cands)
3500*c83a76b0SSuyog Pawar {
3501*c83a76b0SSuyog Pawar U08 u1_num_centers = 0;
3502*c83a76b0SSuyog Pawar
3503*c83a76b0SSuyog Pawar if(0 == u1_layer_id)
3504*c83a76b0SSuyog Pawar {
3505*c83a76b0SSuyog Pawar u1_num_centers += u1_max_2Nx2N_subpel_cands + 4 * u1_max_NxN_subpel_cands;
3506*c83a76b0SSuyog Pawar }
3507*c83a76b0SSuyog Pawar
3508*c83a76b0SSuyog Pawar return u1_num_centers;
3509*c83a76b0SSuyog Pawar }
3510*c83a76b0SSuyog Pawar
hme_set_refine_prms(void * pv_refine_prms,U08 u1_encode,S32 num_ref,S32 layer_id,S32 num_layers,S32 num_layers_explicit_search,S32 use_4x4,hme_frm_prms_t * ps_frm_prms,double ** ppd_intra_costs,me_coding_params_t * ps_me_coding_tools)3511*c83a76b0SSuyog Pawar void hme_set_refine_prms(
3512*c83a76b0SSuyog Pawar void *pv_refine_prms,
3513*c83a76b0SSuyog Pawar U08 u1_encode,
3514*c83a76b0SSuyog Pawar S32 num_ref,
3515*c83a76b0SSuyog Pawar S32 layer_id,
3516*c83a76b0SSuyog Pawar S32 num_layers,
3517*c83a76b0SSuyog Pawar S32 num_layers_explicit_search,
3518*c83a76b0SSuyog Pawar S32 use_4x4,
3519*c83a76b0SSuyog Pawar hme_frm_prms_t *ps_frm_prms,
3520*c83a76b0SSuyog Pawar double **ppd_intra_costs,
3521*c83a76b0SSuyog Pawar me_coding_params_t *ps_me_coding_tools)
3522*c83a76b0SSuyog Pawar {
3523*c83a76b0SSuyog Pawar refine_prms_t *ps_refine_prms = (refine_prms_t *)pv_refine_prms;
3524*c83a76b0SSuyog Pawar
3525*c83a76b0SSuyog Pawar ps_refine_prms->i4_encode = u1_encode;
3526*c83a76b0SSuyog Pawar ps_refine_prms->bidir_enabled = ps_frm_prms->bidir_enabled;
3527*c83a76b0SSuyog Pawar ps_refine_prms->i4_layer_id = layer_id;
3528*c83a76b0SSuyog Pawar /*************************************************************************/
3529*c83a76b0SSuyog Pawar /* Refinement layers have two lambdas, one for closed loop, another for */
3530*c83a76b0SSuyog Pawar /* open loop. Non encode layers use only open loop lambda. */
3531*c83a76b0SSuyog Pawar /*************************************************************************/
3532*c83a76b0SSuyog Pawar ps_refine_prms->lambda_inp = ps_frm_prms->i4_ol_sad_lambda_qf;
3533*c83a76b0SSuyog Pawar ps_refine_prms->lambda_recon = ps_frm_prms->i4_cl_sad_lambda_qf;
3534*c83a76b0SSuyog Pawar ps_refine_prms->lambda_q_shift = ps_frm_prms->lambda_q_shift;
3535*c83a76b0SSuyog Pawar ps_refine_prms->lambda_inp =
3536*c83a76b0SSuyog Pawar ((float)ps_refine_prms->lambda_inp) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
3537*c83a76b0SSuyog Pawar ps_refine_prms->lambda_recon =
3538*c83a76b0SSuyog Pawar ((float)ps_refine_prms->lambda_recon) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
3539*c83a76b0SSuyog Pawar
3540*c83a76b0SSuyog Pawar if((u1_encode) && (NULL != ppd_intra_costs))
3541*c83a76b0SSuyog Pawar {
3542*c83a76b0SSuyog Pawar ps_refine_prms->pd_intra_costs = ppd_intra_costs[layer_id];
3543*c83a76b0SSuyog Pawar }
3544*c83a76b0SSuyog Pawar
3545*c83a76b0SSuyog Pawar /* Explicit or implicit depends on number of layers having eplicit search */
3546*c83a76b0SSuyog Pawar if((layer_id == 0) || (num_layers - layer_id > num_layers_explicit_search))
3547*c83a76b0SSuyog Pawar {
3548*c83a76b0SSuyog Pawar ps_refine_prms->explicit_ref = 0;
3549*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_ref_fpel = MIN(2, num_ref);
3550*c83a76b0SSuyog Pawar }
3551*c83a76b0SSuyog Pawar else
3552*c83a76b0SSuyog Pawar {
3553*c83a76b0SSuyog Pawar ps_refine_prms->explicit_ref = 1;
3554*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_ref_fpel = num_ref;
3555*c83a76b0SSuyog Pawar }
3556*c83a76b0SSuyog Pawar
3557*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_HIGH;
3558*c83a76b0SSuyog Pawar
3559*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_hpel_refine = ps_me_coding_tools->i4_num_steps_hpel_refine;
3560*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_qpel_refine = ps_me_coding_tools->i4_num_steps_qpel_refine;
3561*c83a76b0SSuyog Pawar
3562*c83a76b0SSuyog Pawar if(u1_encode)
3563*c83a76b0SSuyog Pawar {
3564*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_mvbank_results = 1;
3565*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_rec_in_fpel = 1;
3566*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_fpel_refine = 1;
3567*c83a76b0SSuyog Pawar
3568*c83a76b0SSuyog Pawar if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
3569*c83a76b0SSuyog Pawar {
3570*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_fpel_results = 4;
3571*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_32x32_merge_results = 4;
3572*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_64x64_merge_results = 4;
3573*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
3574*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_satd_subpel = 1;
3575*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3576*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3577*c83a76b0SSuyog Pawar ps_refine_prms->u1_subpel_candt_threshold = 1;
3578*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3579*c83a76b0SSuyog Pawar ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
3580*c83a76b0SSuyog Pawar ps_refine_prms->limit_active_partitions = 0;
3581*c83a76b0SSuyog Pawar }
3582*c83a76b0SSuyog Pawar else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
3583*c83a76b0SSuyog Pawar {
3584*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_fpel_results = 4;
3585*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_32x32_merge_results = 4;
3586*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_64x64_merge_results = 4;
3587*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
3588*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_satd_subpel = 1;
3589*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3590*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3591*c83a76b0SSuyog Pawar ps_refine_prms->u1_subpel_candt_threshold = 2;
3592*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3593*c83a76b0SSuyog Pawar ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
3594*c83a76b0SSuyog Pawar ps_refine_prms->limit_active_partitions = 0;
3595*c83a76b0SSuyog Pawar }
3596*c83a76b0SSuyog Pawar else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
3597*c83a76b0SSuyog Pawar {
3598*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_fpel_results = 1;
3599*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_32x32_merge_results = 2;
3600*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_64x64_merge_results = 2;
3601*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3602*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_satd_subpel = 1;
3603*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3604*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3605*c83a76b0SSuyog Pawar ps_refine_prms->u1_subpel_candt_threshold = 3;
3606*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3607*c83a76b0SSuyog Pawar ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
3608*c83a76b0SSuyog Pawar ps_refine_prms->limit_active_partitions = 1;
3609*c83a76b0SSuyog Pawar }
3610*c83a76b0SSuyog Pawar else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
3611*c83a76b0SSuyog Pawar {
3612*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_fpel_results = 1;
3613*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_32x32_merge_results = 2;
3614*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_64x64_merge_results = 2;
3615*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3616*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3617*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3618*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_satd_subpel = 0;
3619*c83a76b0SSuyog Pawar ps_refine_prms->u1_subpel_candt_threshold = 0;
3620*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3621*c83a76b0SSuyog Pawar ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
3622*c83a76b0SSuyog Pawar ps_refine_prms->limit_active_partitions = 1;
3623*c83a76b0SSuyog Pawar }
3624*c83a76b0SSuyog Pawar else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
3625*c83a76b0SSuyog Pawar {
3626*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_fpel_results = 1;
3627*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_32x32_merge_results = 2;
3628*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_64x64_merge_results = 2;
3629*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3630*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_satd_subpel = 0;
3631*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3632*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_NxN = 0;
3633*c83a76b0SSuyog Pawar ps_refine_prms->u1_subpel_candt_threshold = 0;
3634*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3635*c83a76b0SSuyog Pawar ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
3636*c83a76b0SSuyog Pawar ps_refine_prms->limit_active_partitions = 1;
3637*c83a76b0SSuyog Pawar }
3638*c83a76b0SSuyog Pawar else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
3639*c83a76b0SSuyog Pawar {
3640*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_fpel_results = 1;
3641*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_32x32_merge_results = 2;
3642*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_64x64_merge_results = 2;
3643*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3644*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_satd_subpel = 0;
3645*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3646*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_NxN = 0;
3647*c83a76b0SSuyog Pawar ps_refine_prms->u1_subpel_candt_threshold = 0;
3648*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3649*c83a76b0SSuyog Pawar ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
3650*c83a76b0SSuyog Pawar ps_refine_prms->limit_active_partitions = 1;
3651*c83a76b0SSuyog Pawar }
3652*c83a76b0SSuyog Pawar }
3653*c83a76b0SSuyog Pawar else
3654*c83a76b0SSuyog Pawar {
3655*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_fpel_results = 2;
3656*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_rec_in_fpel = 0;
3657*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_fpel_refine = 1;
3658*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_hpel_refine = 0;
3659*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_qpel_refine = 0;
3660*c83a76b0SSuyog Pawar
3661*c83a76b0SSuyog Pawar if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
3662*c83a76b0SSuyog Pawar {
3663*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3664*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_satd_subpel = 1;
3665*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3666*c83a76b0SSuyog Pawar ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
3667*c83a76b0SSuyog Pawar }
3668*c83a76b0SSuyog Pawar else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
3669*c83a76b0SSuyog Pawar {
3670*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3671*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_satd_subpel = 0;
3672*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3673*c83a76b0SSuyog Pawar ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
3674*c83a76b0SSuyog Pawar }
3675*c83a76b0SSuyog Pawar else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
3676*c83a76b0SSuyog Pawar {
3677*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3678*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_satd_subpel = 0;
3679*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3680*c83a76b0SSuyog Pawar ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
3681*c83a76b0SSuyog Pawar }
3682*c83a76b0SSuyog Pawar else if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
3683*c83a76b0SSuyog Pawar {
3684*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
3685*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_satd_subpel = 1;
3686*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3687*c83a76b0SSuyog Pawar ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
3688*c83a76b0SSuyog Pawar }
3689*c83a76b0SSuyog Pawar else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
3690*c83a76b0SSuyog Pawar {
3691*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
3692*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_satd_subpel = 1;
3693*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3694*c83a76b0SSuyog Pawar ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
3695*c83a76b0SSuyog Pawar }
3696*c83a76b0SSuyog Pawar else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
3697*c83a76b0SSuyog Pawar {
3698*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3699*c83a76b0SSuyog Pawar ps_refine_prms->i4_use_satd_subpel = 1;
3700*c83a76b0SSuyog Pawar ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3701*c83a76b0SSuyog Pawar ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
3702*c83a76b0SSuyog Pawar }
3703*c83a76b0SSuyog Pawar
3704*c83a76b0SSuyog Pawar /* Following fields unused in the non-encode layers */
3705*c83a76b0SSuyog Pawar /* But setting the same to default values */
3706*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_32x32_merge_results = 4;
3707*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_64x64_merge_results = 4;
3708*c83a76b0SSuyog Pawar
3709*c83a76b0SSuyog Pawar if(!ps_frm_prms->bidir_enabled)
3710*c83a76b0SSuyog Pawar {
3711*c83a76b0SSuyog Pawar ps_refine_prms->limit_active_partitions = 0;
3712*c83a76b0SSuyog Pawar }
3713*c83a76b0SSuyog Pawar else
3714*c83a76b0SSuyog Pawar {
3715*c83a76b0SSuyog Pawar ps_refine_prms->limit_active_partitions = 1;
3716*c83a76b0SSuyog Pawar }
3717*c83a76b0SSuyog Pawar }
3718*c83a76b0SSuyog Pawar
3719*c83a76b0SSuyog Pawar ps_refine_prms->i4_enable_4x4_part =
3720*c83a76b0SSuyog Pawar hme_get_mv_blk_size(use_4x4, layer_id, num_layers, u1_encode);
3721*c83a76b0SSuyog Pawar
3722*c83a76b0SSuyog Pawar if(!ps_me_coding_tools->u1_l0_me_controlled_via_cmd_line)
3723*c83a76b0SSuyog Pawar {
3724*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
3725*c83a76b0SSuyog Pawar layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
3726*c83a76b0SSuyog Pawar
3727*c83a76b0SSuyog Pawar hme_max_search_cands_per_search_cand_loc_populator(
3728*c83a76b0SSuyog Pawar ps_frm_prms,
3729*c83a76b0SSuyog Pawar ps_refine_prms->au1_num_fpel_search_cands,
3730*c83a76b0SSuyog Pawar layer_id,
3731*c83a76b0SSuyog Pawar ps_me_coding_tools->e_me_quality_presets);
3732*c83a76b0SSuyog Pawar
3733*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_2nx2n_tu_recur_cands = hme_determine_max_2nx2n_tu_recur_cands(
3734*c83a76b0SSuyog Pawar layer_id, ps_me_coding_tools->e_me_quality_presets);
3735*c83a76b0SSuyog Pawar
3736*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_num_fpel_refine_centers = hme_determine_max_num_fpel_refine_centers(
3737*c83a76b0SSuyog Pawar layer_id, ps_me_coding_tools->e_me_quality_presets);
3738*c83a76b0SSuyog Pawar
3739*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_num_subpel_refine_centers =
3740*c83a76b0SSuyog Pawar hme_determine_max_num_subpel_refine_centers(
3741*c83a76b0SSuyog Pawar layer_id,
3742*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_2Nx2N,
3743*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_subpel_candts_NxN);
3744*c83a76b0SSuyog Pawar }
3745*c83a76b0SSuyog Pawar else
3746*c83a76b0SSuyog Pawar {
3747*c83a76b0SSuyog Pawar if(0 == layer_id)
3748*c83a76b0SSuyog Pawar {
3749*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_results_per_part =
3750*c83a76b0SSuyog Pawar ps_me_coding_tools->u1_num_results_per_part_in_l0me;
3751*c83a76b0SSuyog Pawar }
3752*c83a76b0SSuyog Pawar else if(1 == layer_id)
3753*c83a76b0SSuyog Pawar {
3754*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_results_per_part =
3755*c83a76b0SSuyog Pawar ps_me_coding_tools->u1_num_results_per_part_in_l1me;
3756*c83a76b0SSuyog Pawar }
3757*c83a76b0SSuyog Pawar else if((2 == layer_id) && (num_layers > 3))
3758*c83a76b0SSuyog Pawar {
3759*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_results_per_part =
3760*c83a76b0SSuyog Pawar ps_me_coding_tools->u1_num_results_per_part_in_l2me;
3761*c83a76b0SSuyog Pawar }
3762*c83a76b0SSuyog Pawar else
3763*c83a76b0SSuyog Pawar {
3764*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
3765*c83a76b0SSuyog Pawar layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
3766*c83a76b0SSuyog Pawar }
3767*c83a76b0SSuyog Pawar
3768*c83a76b0SSuyog Pawar memset(
3769*c83a76b0SSuyog Pawar ps_refine_prms->au1_num_fpel_search_cands,
3770*c83a76b0SSuyog Pawar ps_me_coding_tools->u1_max_num_coloc_cands,
3771*c83a76b0SSuyog Pawar sizeof(ps_refine_prms->au1_num_fpel_search_cands));
3772*c83a76b0SSuyog Pawar
3773*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_2nx2n_tu_recur_cands =
3774*c83a76b0SSuyog Pawar ps_me_coding_tools->u1_max_2nx2n_tu_recur_cands;
3775*c83a76b0SSuyog Pawar
3776*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_num_fpel_refine_centers =
3777*c83a76b0SSuyog Pawar ps_me_coding_tools->u1_max_num_fpel_refine_centers;
3778*c83a76b0SSuyog Pawar
3779*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_num_subpel_refine_centers =
3780*c83a76b0SSuyog Pawar ps_me_coding_tools->u1_max_num_subpel_refine_centers;
3781*c83a76b0SSuyog Pawar }
3782*c83a76b0SSuyog Pawar
3783*c83a76b0SSuyog Pawar if(layer_id != 0)
3784*c83a76b0SSuyog Pawar {
3785*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_mvbank_results = ps_refine_prms->i4_num_results_per_part;
3786*c83a76b0SSuyog Pawar }
3787*c83a76b0SSuyog Pawar
3788*c83a76b0SSuyog Pawar /* 4 * lambda */
3789*c83a76b0SSuyog Pawar ps_refine_prms->sdi_threshold =
3790*c83a76b0SSuyog Pawar (ps_refine_prms->lambda_recon + (1 << (ps_frm_prms->lambda_q_shift - 1))) >>
3791*c83a76b0SSuyog Pawar (ps_frm_prms->lambda_q_shift - 2);
3792*c83a76b0SSuyog Pawar
3793*c83a76b0SSuyog Pawar ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb =
3794*c83a76b0SSuyog Pawar MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON && ps_frm_prms->u1_is_cu_qp_delta_enabled;
3795*c83a76b0SSuyog Pawar }
3796*c83a76b0SSuyog Pawar
hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t * ps_attrs,S32 num_8x8_horz,S32 num_8x8_vert)3797*c83a76b0SSuyog Pawar void hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t *ps_attrs, S32 num_8x8_horz, S32 num_8x8_vert)
3798*c83a76b0SSuyog Pawar {
3799*c83a76b0SSuyog Pawar S32 cu_16x16_valid_flag = 0, merge_pattern_x, merge_pattern_y;
3800*c83a76b0SSuyog Pawar S32 blk, blk_x, blk_y;
3801*c83a76b0SSuyog Pawar S32 num_16x16_horz, num_16x16_vert;
3802*c83a76b0SSuyog Pawar blk_ctb_attrs_t *ps_blk_attrs = &ps_attrs->as_blk_attrs[0];
3803*c83a76b0SSuyog Pawar
3804*c83a76b0SSuyog Pawar num_16x16_horz = (num_8x8_horz + 1) >> 1;
3805*c83a76b0SSuyog Pawar num_16x16_vert = (num_8x8_vert + 1) >> 1;
3806*c83a76b0SSuyog Pawar ps_attrs->u1_num_blks_in_ctb = (U08)(num_16x16_horz * num_16x16_vert);
3807*c83a76b0SSuyog Pawar
3808*c83a76b0SSuyog Pawar /*************************************************************************/
3809*c83a76b0SSuyog Pawar /* Run through each blk assuming all 16x16 CUs valid. The order would be */
3810*c83a76b0SSuyog Pawar /* 0 1 4 5 */
3811*c83a76b0SSuyog Pawar /* 2 3 6 7 */
3812*c83a76b0SSuyog Pawar /* 8 9 12 13 */
3813*c83a76b0SSuyog Pawar /* 10 11 14 15 */
3814*c83a76b0SSuyog Pawar /* Out of these some may not be valid. For example, if num_16x16_horz is */
3815*c83a76b0SSuyog Pawar /* 2 and num_16x16_vert is 4, then right 2 columns not valid. In this */
3816*c83a76b0SSuyog Pawar /* case, blks 8-11 get encoding number of 4-7. Further, the variable */
3817*c83a76b0SSuyog Pawar /* cu_16x16_valid_flag will be 1111 0000 1111 0000. Also, the variable */
3818*c83a76b0SSuyog Pawar /* u1_merge_to_32x32_flag will be 1010, and u1_merge_to_64x64_flag 0 */
3819*c83a76b0SSuyog Pawar /*************************************************************************/
3820*c83a76b0SSuyog Pawar for(blk = 0; blk < 16; blk++)
3821*c83a76b0SSuyog Pawar {
3822*c83a76b0SSuyog Pawar U08 u1_blk_8x8_mask = 0xF;
3823*c83a76b0SSuyog Pawar blk_x = gau1_encode_to_raster_x[blk];
3824*c83a76b0SSuyog Pawar blk_y = gau1_encode_to_raster_y[blk];
3825*c83a76b0SSuyog Pawar if((blk_x >= num_16x16_horz) || (blk_y >= num_16x16_vert))
3826*c83a76b0SSuyog Pawar {
3827*c83a76b0SSuyog Pawar continue;
3828*c83a76b0SSuyog Pawar }
3829*c83a76b0SSuyog Pawar
3830*c83a76b0SSuyog Pawar /* The CU at encode location blk is valid */
3831*c83a76b0SSuyog Pawar cu_16x16_valid_flag |= (1 << blk);
3832*c83a76b0SSuyog Pawar ps_blk_attrs->u1_blk_id_in_full_ctb = blk;
3833*c83a76b0SSuyog Pawar ps_blk_attrs->u1_blk_x = blk_x;
3834*c83a76b0SSuyog Pawar ps_blk_attrs->u1_blk_y = blk_y;
3835*c83a76b0SSuyog Pawar
3836*c83a76b0SSuyog Pawar /* Disable blks 1 and 3 if the 16x16 blk overshoots on rt border */
3837*c83a76b0SSuyog Pawar if(((blk_x << 1) + 2) > num_8x8_horz)
3838*c83a76b0SSuyog Pawar u1_blk_8x8_mask &= 0x5;
3839*c83a76b0SSuyog Pawar /* Disable blks 2 and 3 if the 16x16 blk overshoots on bot border */
3840*c83a76b0SSuyog Pawar if(((blk_y << 1) + 2) > num_8x8_vert)
3841*c83a76b0SSuyog Pawar u1_blk_8x8_mask &= 0x3;
3842*c83a76b0SSuyog Pawar ps_blk_attrs->u1_blk_8x8_mask = u1_blk_8x8_mask;
3843*c83a76b0SSuyog Pawar ps_blk_attrs++;
3844*c83a76b0SSuyog Pawar }
3845*c83a76b0SSuyog Pawar
3846*c83a76b0SSuyog Pawar ps_attrs->cu_16x16_valid_flag = cu_16x16_valid_flag;
3847*c83a76b0SSuyog Pawar
3848*c83a76b0SSuyog Pawar /* 32x32 merge is logical combination of what merge is possible */
3849*c83a76b0SSuyog Pawar /* horizontally as well as vertically. */
3850*c83a76b0SSuyog Pawar if(num_8x8_horz < 4)
3851*c83a76b0SSuyog Pawar merge_pattern_x = 0x0;
3852*c83a76b0SSuyog Pawar else if(num_8x8_horz < 8)
3853*c83a76b0SSuyog Pawar merge_pattern_x = 0x5;
3854*c83a76b0SSuyog Pawar else
3855*c83a76b0SSuyog Pawar merge_pattern_x = 0xF;
3856*c83a76b0SSuyog Pawar
3857*c83a76b0SSuyog Pawar if(num_8x8_vert < 4)
3858*c83a76b0SSuyog Pawar merge_pattern_y = 0x0;
3859*c83a76b0SSuyog Pawar else if(num_8x8_vert < 8)
3860*c83a76b0SSuyog Pawar merge_pattern_y = 0x3;
3861*c83a76b0SSuyog Pawar else
3862*c83a76b0SSuyog Pawar merge_pattern_y = 0xF;
3863*c83a76b0SSuyog Pawar
3864*c83a76b0SSuyog Pawar ps_attrs->u1_merge_to_32x32_flag = (U08)(merge_pattern_x & merge_pattern_y);
3865*c83a76b0SSuyog Pawar
3866*c83a76b0SSuyog Pawar /* Do not attempt 64x64 merge if any blk invalid */
3867*c83a76b0SSuyog Pawar if(ps_attrs->u1_merge_to_32x32_flag != 0xF)
3868*c83a76b0SSuyog Pawar ps_attrs->u1_merge_to_64x64_flag = 0;
3869*c83a76b0SSuyog Pawar else
3870*c83a76b0SSuyog Pawar ps_attrs->u1_merge_to_64x64_flag = 1;
3871*c83a76b0SSuyog Pawar }
3872*c83a76b0SSuyog Pawar
hme_set_ctb_attrs(ctb_boundary_attrs_t * ps_attrs,S32 wd,S32 ht)3873*c83a76b0SSuyog Pawar void hme_set_ctb_attrs(ctb_boundary_attrs_t *ps_attrs, S32 wd, S32 ht)
3874*c83a76b0SSuyog Pawar {
3875*c83a76b0SSuyog Pawar S32 is_cropped_rt, is_cropped_bot;
3876*c83a76b0SSuyog Pawar
3877*c83a76b0SSuyog Pawar is_cropped_rt = ((wd & 63) != 0) ? 1 : 0;
3878*c83a76b0SSuyog Pawar is_cropped_bot = ((ht & 63) != 0) ? 1 : 0;
3879*c83a76b0SSuyog Pawar
3880*c83a76b0SSuyog Pawar if(is_cropped_rt)
3881*c83a76b0SSuyog Pawar {
3882*c83a76b0SSuyog Pawar hme_set_ctb_boundary_attrs(&ps_attrs[CTB_RT_PIC_BOUNDARY], (wd & 63) >> 3, 8);
3883*c83a76b0SSuyog Pawar }
3884*c83a76b0SSuyog Pawar if(is_cropped_bot)
3885*c83a76b0SSuyog Pawar {
3886*c83a76b0SSuyog Pawar hme_set_ctb_boundary_attrs(&ps_attrs[CTB_BOT_PIC_BOUNDARY], 8, (ht & 63) >> 3);
3887*c83a76b0SSuyog Pawar }
3888*c83a76b0SSuyog Pawar if(is_cropped_rt & is_cropped_bot)
3889*c83a76b0SSuyog Pawar {
3890*c83a76b0SSuyog Pawar hme_set_ctb_boundary_attrs(
3891*c83a76b0SSuyog Pawar &ps_attrs[CTB_BOT_RT_PIC_BOUNDARY], (wd & 63) >> 3, (ht & 63) >> 3);
3892*c83a76b0SSuyog Pawar }
3893*c83a76b0SSuyog Pawar hme_set_ctb_boundary_attrs(&ps_attrs[CTB_CENTRE], 8, 8);
3894*c83a76b0SSuyog Pawar }
3895*c83a76b0SSuyog Pawar
3896*c83a76b0SSuyog Pawar /**
3897*c83a76b0SSuyog Pawar ********************************************************************************
3898*c83a76b0SSuyog Pawar * @fn hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
3899*c83a76b0SSuyog Pawar *
3900*c83a76b0SSuyog Pawar * @brief When we have an mv with ref id "poc_to" for which predictor to be
3901*c83a76b0SSuyog Pawar * computed, and predictor is ref id "poc_from", this funciton returns
3902*c83a76b0SSuyog Pawar * scale factor in Q8 for such a purpose
3903*c83a76b0SSuyog Pawar *
3904*c83a76b0SSuyog Pawar * @param[in] curr_poc : input picture poc
3905*c83a76b0SSuyog Pawar *
3906*c83a76b0SSuyog Pawar * @param[in] poc_from : POC of the pic, pointed to by ref id to be scaled
3907*c83a76b0SSuyog Pawar *
3908*c83a76b0SSuyog Pawar * @param[in] poc_to : POC of hte pic, pointed to by ref id to be scaled to
3909*c83a76b0SSuyog Pawar *
3910*c83a76b0SSuyog Pawar * @return Scale factor in Q8 format
3911*c83a76b0SSuyog Pawar ********************************************************************************
3912*c83a76b0SSuyog Pawar */
hme_scale_for_ref_idx(S32 curr_poc,S32 poc_from,S32 poc_to)3913*c83a76b0SSuyog Pawar S16 hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
3914*c83a76b0SSuyog Pawar {
3915*c83a76b0SSuyog Pawar S32 td, tx, tb;
3916*c83a76b0SSuyog Pawar S16 i2_scf;
3917*c83a76b0SSuyog Pawar /*************************************************************************/
3918*c83a76b0SSuyog Pawar /* Approximate scale factor: 256 * num / denom */
3919*c83a76b0SSuyog Pawar /* num = curr_poc - poc_to, denom = curr_poc - poc_from */
3920*c83a76b0SSuyog Pawar /* Exact implementation as per standard. */
3921*c83a76b0SSuyog Pawar /*************************************************************************/
3922*c83a76b0SSuyog Pawar
3923*c83a76b0SSuyog Pawar tb = HME_CLIP((curr_poc - poc_to), -128, 127);
3924*c83a76b0SSuyog Pawar td = HME_CLIP((curr_poc - poc_from), -128, 127);
3925*c83a76b0SSuyog Pawar
3926*c83a76b0SSuyog Pawar tx = (16384 + (ABS(td) >> 1)) / td;
3927*c83a76b0SSuyog Pawar //i2_scf = HME_CLIP((((tb*tx)+32)>>6), -128, 127);
3928*c83a76b0SSuyog Pawar i2_scf = HME_CLIP((((tb * tx) + 32) >> 6), -4096, 4095);
3929*c83a76b0SSuyog Pawar
3930*c83a76b0SSuyog Pawar return (i2_scf);
3931*c83a76b0SSuyog Pawar }
3932*c83a76b0SSuyog Pawar
3933*c83a76b0SSuyog Pawar /**
3934*c83a76b0SSuyog Pawar ********************************************************************************
3935*c83a76b0SSuyog Pawar * @fn hme_process_frm_init
3936*c83a76b0SSuyog Pawar *
3937*c83a76b0SSuyog Pawar * @brief HME frame level initialsation processing function
3938*c83a76b0SSuyog Pawar *
3939*c83a76b0SSuyog Pawar * @param[in] pv_me_ctxt : ME ctxt pointer
3940*c83a76b0SSuyog Pawar *
3941*c83a76b0SSuyog Pawar * @param[in] ps_ref_map : Reference map prms pointer
3942*c83a76b0SSuyog Pawar *
3943*c83a76b0SSuyog Pawar * @param[in] ps_frm_prms :Pointer to frame params
3944*c83a76b0SSuyog Pawar *
3945*c83a76b0SSuyog Pawar * called only for encode layer
3946*c83a76b0SSuyog Pawar *
3947*c83a76b0SSuyog Pawar * @return Scale factor in Q8 format
3948*c83a76b0SSuyog Pawar ********************************************************************************
3949*c83a76b0SSuyog Pawar */
hme_process_frm_init(void * pv_me_ctxt,hme_ref_map_t * ps_ref_map,hme_frm_prms_t * ps_frm_prms,WORD32 i4_me_frm_id,WORD32 i4_num_me_frm_pllel)3950*c83a76b0SSuyog Pawar void hme_process_frm_init(
3951*c83a76b0SSuyog Pawar void *pv_me_ctxt,
3952*c83a76b0SSuyog Pawar hme_ref_map_t *ps_ref_map,
3953*c83a76b0SSuyog Pawar hme_frm_prms_t *ps_frm_prms,
3954*c83a76b0SSuyog Pawar WORD32 i4_me_frm_id,
3955*c83a76b0SSuyog Pawar WORD32 i4_num_me_frm_pllel)
3956*c83a76b0SSuyog Pawar {
3957*c83a76b0SSuyog Pawar me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
3958*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt = (me_frm_ctxt_t *)ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
3959*c83a76b0SSuyog Pawar
3960*c83a76b0SSuyog Pawar S32 i, j, desc_idx;
3961*c83a76b0SSuyog Pawar S16 i2_max_x = 0, i2_max_y = 0;
3962*c83a76b0SSuyog Pawar
3963*c83a76b0SSuyog Pawar /* Set the Qp of current frm passed by caller. Required for intra cost */
3964*c83a76b0SSuyog Pawar ps_ctxt->frm_qstep = ps_frm_prms->qstep;
3965*c83a76b0SSuyog Pawar ps_ctxt->qstep_ls8 = ps_frm_prms->qstep_ls8;
3966*c83a76b0SSuyog Pawar
3967*c83a76b0SSuyog Pawar /* Bidir enabled or not */
3968*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms = *ps_frm_prms;
3969*c83a76b0SSuyog Pawar
3970*c83a76b0SSuyog Pawar /*************************************************************************/
3971*c83a76b0SSuyog Pawar /* Set up the ref pic parameters across all layers. For this, we do the */
3972*c83a76b0SSuyog Pawar /* following: the application has given us a ref pic list, we go index */
3973*c83a76b0SSuyog Pawar /* by index and pick up the picture. A picture can be uniquely be mapped */
3974*c83a76b0SSuyog Pawar /* to a POC. So we search all layer descriptor array to find the POC */
3975*c83a76b0SSuyog Pawar /* Once found, we update all attributes in this descriptor. */
3976*c83a76b0SSuyog Pawar /* During this updation process we also create an index of descriptor id */
3977*c83a76b0SSuyog Pawar /* to ref id mapping. It is important to find the same POC in the layers */
3978*c83a76b0SSuyog Pawar /* descr strcture since it holds the pyramid inputs for non encode layers*/
3979*c83a76b0SSuyog Pawar /* Apart from this, e also update array containing the index of the descr*/
3980*c83a76b0SSuyog Pawar /* During processing for ease of access, each layer has a pointer to aray*/
3981*c83a76b0SSuyog Pawar /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
3982*c83a76b0SSuyog Pawar /* we update this too. */
3983*c83a76b0SSuyog Pawar /*************************************************************************/
3984*c83a76b0SSuyog Pawar ps_ctxt->num_ref_past = 0;
3985*c83a76b0SSuyog Pawar ps_ctxt->num_ref_future = 0;
3986*c83a76b0SSuyog Pawar for(i = 0; i < ps_ref_map->i4_num_ref; i++)
3987*c83a76b0SSuyog Pawar {
3988*c83a76b0SSuyog Pawar S32 ref_id_lc, idx;
3989*c83a76b0SSuyog Pawar hme_ref_desc_t *ps_ref_desc;
3990*c83a76b0SSuyog Pawar
3991*c83a76b0SSuyog Pawar ps_ref_desc = &ps_ref_map->as_ref_desc[i];
3992*c83a76b0SSuyog Pawar ref_id_lc = ps_ref_desc->i1_ref_id_lc;
3993*c83a76b0SSuyog Pawar /* Obtain the id of descriptor that contains this POC */
3994*c83a76b0SSuyog Pawar idx = hme_find_descr_idx(
3995*c83a76b0SSuyog Pawar ps_thrd_ctxt, ps_ref_desc->i4_poc, ps_ref_desc->i4_GOP_num, i4_num_me_frm_pllel);
3996*c83a76b0SSuyog Pawar
3997*c83a76b0SSuyog Pawar /* Update all layers in this descr with the reference attributes */
3998*c83a76b0SSuyog Pawar hme_update_layer_desc(
3999*c83a76b0SSuyog Pawar &ps_thrd_ctxt->as_ref_descr[idx],
4000*c83a76b0SSuyog Pawar ps_ref_desc,
4001*c83a76b0SSuyog Pawar 0,
4002*c83a76b0SSuyog Pawar 1, //ps_ctxt->num_layers,
4003*c83a76b0SSuyog Pawar ps_ctxt->ps_curr_descr);
4004*c83a76b0SSuyog Pawar
4005*c83a76b0SSuyog Pawar /* Update the pointer holder for the recon planes */
4006*c83a76b0SSuyog Pawar ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_inp = &ps_ctxt->apu1_list_inp[0][0];
4007*c83a76b0SSuyog Pawar ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxfy =
4008*c83a76b0SSuyog Pawar &ps_ctxt->apu1_list_rec_fxfy[0][0];
4009*c83a76b0SSuyog Pawar ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxfy =
4010*c83a76b0SSuyog Pawar &ps_ctxt->apu1_list_rec_hxfy[0][0];
4011*c83a76b0SSuyog Pawar ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxhy =
4012*c83a76b0SSuyog Pawar &ps_ctxt->apu1_list_rec_fxhy[0][0];
4013*c83a76b0SSuyog Pawar ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxhy =
4014*c83a76b0SSuyog Pawar &ps_ctxt->apu1_list_rec_hxhy[0][0];
4015*c83a76b0SSuyog Pawar ps_ctxt->ps_curr_descr->aps_layers[0]->ppv_dep_mngr_recon =
4016*c83a76b0SSuyog Pawar &ps_ctxt->apv_list_dep_mngr[0][0];
4017*c83a76b0SSuyog Pawar
4018*c83a76b0SSuyog Pawar /* Update the array having ref id lc to descr id mapping */
4019*c83a76b0SSuyog Pawar ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
4020*c83a76b0SSuyog Pawar
4021*c83a76b0SSuyog Pawar /* From ref id lc we need to work out the POC, So update this array */
4022*c83a76b0SSuyog Pawar ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
4023*c83a76b0SSuyog Pawar
4024*c83a76b0SSuyog Pawar /* When computing costs in L0 and L1 directions, we need the */
4025*c83a76b0SSuyog Pawar /* respective ref id L0 and L1, so update this mapping */
4026*c83a76b0SSuyog Pawar ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
4027*c83a76b0SSuyog Pawar ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
4028*c83a76b0SSuyog Pawar if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
4029*c83a76b0SSuyog Pawar {
4030*c83a76b0SSuyog Pawar ps_ctxt->au1_is_past[ref_id_lc] = 1;
4031*c83a76b0SSuyog Pawar ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
4032*c83a76b0SSuyog Pawar ps_ctxt->num_ref_past++;
4033*c83a76b0SSuyog Pawar }
4034*c83a76b0SSuyog Pawar else
4035*c83a76b0SSuyog Pawar {
4036*c83a76b0SSuyog Pawar ps_ctxt->au1_is_past[ref_id_lc] = 0;
4037*c83a76b0SSuyog Pawar ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
4038*c83a76b0SSuyog Pawar ps_ctxt->num_ref_future++;
4039*c83a76b0SSuyog Pawar }
4040*c83a76b0SSuyog Pawar
4041*c83a76b0SSuyog Pawar if(1 == ps_ctxt->i4_wt_pred_enable_flag)
4042*c83a76b0SSuyog Pawar {
4043*c83a76b0SSuyog Pawar /* copy the weight and offsets from current ref desc */
4044*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
4045*c83a76b0SSuyog Pawar
4046*c83a76b0SSuyog Pawar /* inv weight is stored in Q15 format */
4047*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4048*c83a76b0SSuyog Pawar ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
4049*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
4050*c83a76b0SSuyog Pawar }
4051*c83a76b0SSuyog Pawar else
4052*c83a76b0SSuyog Pawar {
4053*c83a76b0SSuyog Pawar /* store default wt and offset*/
4054*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
4055*c83a76b0SSuyog Pawar
4056*c83a76b0SSuyog Pawar /* inv weight is stored in Q15 format */
4057*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4058*c83a76b0SSuyog Pawar ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
4059*c83a76b0SSuyog Pawar
4060*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
4061*c83a76b0SSuyog Pawar }
4062*c83a76b0SSuyog Pawar }
4063*c83a76b0SSuyog Pawar
4064*c83a76b0SSuyog Pawar ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
4065*c83a76b0SSuyog Pawar ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
4066*c83a76b0SSuyog Pawar
4067*c83a76b0SSuyog Pawar /*************************************************************************/
4068*c83a76b0SSuyog Pawar /* Preparation of the TLU for bits for reference indices. */
4069*c83a76b0SSuyog Pawar /* Special case is that of numref = 2. (TEV) */
4070*c83a76b0SSuyog Pawar /* Other cases uses UEV */
4071*c83a76b0SSuyog Pawar /*************************************************************************/
4072*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_REF; i++)
4073*c83a76b0SSuyog Pawar {
4074*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
4075*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
4076*c83a76b0SSuyog Pawar }
4077*c83a76b0SSuyog Pawar
4078*c83a76b0SSuyog Pawar if(ps_ref_map->i4_num_ref == 2)
4079*c83a76b0SSuyog Pawar {
4080*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
4081*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
4082*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
4083*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
4084*c83a76b0SSuyog Pawar }
4085*c83a76b0SSuyog Pawar else if(ps_ref_map->i4_num_ref > 2)
4086*c83a76b0SSuyog Pawar {
4087*c83a76b0SSuyog Pawar for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4088*c83a76b0SSuyog Pawar {
4089*c83a76b0SSuyog Pawar S32 l0, l1;
4090*c83a76b0SSuyog Pawar l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
4091*c83a76b0SSuyog Pawar l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
4092*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
4093*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
4094*c83a76b0SSuyog Pawar }
4095*c83a76b0SSuyog Pawar }
4096*c83a76b0SSuyog Pawar
4097*c83a76b0SSuyog Pawar /*************************************************************************/
4098*c83a76b0SSuyog Pawar /* Preparation of the scaling factors for reference indices. The scale */
4099*c83a76b0SSuyog Pawar /* factor depends on distance of the two ref indices from current input */
4100*c83a76b0SSuyog Pawar /* in terms of poc delta. */
4101*c83a76b0SSuyog Pawar /*************************************************************************/
4102*c83a76b0SSuyog Pawar for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4103*c83a76b0SSuyog Pawar {
4104*c83a76b0SSuyog Pawar for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4105*c83a76b0SSuyog Pawar {
4106*c83a76b0SSuyog Pawar S16 i2_scf_q8;
4107*c83a76b0SSuyog Pawar S32 poc_from, poc_to;
4108*c83a76b0SSuyog Pawar
4109*c83a76b0SSuyog Pawar poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
4110*c83a76b0SSuyog Pawar poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
4111*c83a76b0SSuyog Pawar
4112*c83a76b0SSuyog Pawar i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
4113*c83a76b0SSuyog Pawar ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
4114*c83a76b0SSuyog Pawar }
4115*c83a76b0SSuyog Pawar }
4116*c83a76b0SSuyog Pawar
4117*c83a76b0SSuyog Pawar /*************************************************************************/
4118*c83a76b0SSuyog Pawar /* We store simplified look ups for 4 hpel planes and inp y plane for */
4119*c83a76b0SSuyog Pawar /* every layer and for every ref id in the layer. So update these lookups*/
4120*c83a76b0SSuyog Pawar /*************************************************************************/
4121*c83a76b0SSuyog Pawar for(i = 0; i < 1; i++)
4122*c83a76b0SSuyog Pawar {
4123*c83a76b0SSuyog Pawar U08 **ppu1_rec_fxfy, **ppu1_rec_hxfy, **ppu1_rec_fxhy, **ppu1_rec_hxhy;
4124*c83a76b0SSuyog Pawar U08 **ppu1_inp;
4125*c83a76b0SSuyog Pawar void **ppvlist_dep_mngr;
4126*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4127*c83a76b0SSuyog Pawar
4128*c83a76b0SSuyog Pawar ppvlist_dep_mngr = &ps_ctxt->apv_list_dep_mngr[i][0];
4129*c83a76b0SSuyog Pawar ppu1_rec_fxfy = &ps_ctxt->apu1_list_rec_fxfy[i][0];
4130*c83a76b0SSuyog Pawar ppu1_rec_hxfy = &ps_ctxt->apu1_list_rec_hxfy[i][0];
4131*c83a76b0SSuyog Pawar ppu1_rec_fxhy = &ps_ctxt->apu1_list_rec_fxhy[i][0];
4132*c83a76b0SSuyog Pawar ppu1_rec_hxhy = &ps_ctxt->apu1_list_rec_hxhy[i][0];
4133*c83a76b0SSuyog Pawar ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
4134*c83a76b0SSuyog Pawar for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4135*c83a76b0SSuyog Pawar {
4136*c83a76b0SSuyog Pawar hme_ref_desc_t *ps_ref_desc;
4137*c83a76b0SSuyog Pawar hme_ref_buf_info_t *ps_buf_info;
4138*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer;
4139*c83a76b0SSuyog Pawar S32 ref_id_lc;
4140*c83a76b0SSuyog Pawar
4141*c83a76b0SSuyog Pawar ps_ref_desc = &ps_ref_map->as_ref_desc[j];
4142*c83a76b0SSuyog Pawar ps_buf_info = &ps_ref_desc->as_ref_info[i];
4143*c83a76b0SSuyog Pawar ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4144*c83a76b0SSuyog Pawar
4145*c83a76b0SSuyog Pawar desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
4146*c83a76b0SSuyog Pawar ps_layer = ps_thrd_ctxt->as_ref_descr[desc_idx].aps_layers[i];
4147*c83a76b0SSuyog Pawar
4148*c83a76b0SSuyog Pawar ppu1_inp[j] = ps_buf_info->pu1_ref_src;
4149*c83a76b0SSuyog Pawar ppu1_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
4150*c83a76b0SSuyog Pawar ppu1_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
4151*c83a76b0SSuyog Pawar ppu1_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
4152*c83a76b0SSuyog Pawar ppu1_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
4153*c83a76b0SSuyog Pawar ppvlist_dep_mngr[j] = ps_buf_info->pv_dep_mngr;
4154*c83a76b0SSuyog Pawar
4155*c83a76b0SSuyog Pawar /* Update the curr descriptors reference pointers here */
4156*c83a76b0SSuyog Pawar ps_layer_ctxt->ppu1_list_inp[j] = ps_buf_info->pu1_ref_src;
4157*c83a76b0SSuyog Pawar ps_layer_ctxt->ppu1_list_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
4158*c83a76b0SSuyog Pawar ps_layer_ctxt->ppu1_list_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
4159*c83a76b0SSuyog Pawar ps_layer_ctxt->ppu1_list_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
4160*c83a76b0SSuyog Pawar ps_layer_ctxt->ppu1_list_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
4161*c83a76b0SSuyog Pawar }
4162*c83a76b0SSuyog Pawar }
4163*c83a76b0SSuyog Pawar /*************************************************************************/
4164*c83a76b0SSuyog Pawar /* The mv range for each layer is computed. For dyadic layers it will */
4165*c83a76b0SSuyog Pawar /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
4166*c83a76b0SSuyog Pawar /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
4167*c83a76b0SSuyog Pawar /*************************************************************************/
4168*c83a76b0SSuyog Pawar for(i = 0; i < 1; i++)
4169*c83a76b0SSuyog Pawar {
4170*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer_ctxt;
4171*c83a76b0SSuyog Pawar if(i == 0)
4172*c83a76b0SSuyog Pawar {
4173*c83a76b0SSuyog Pawar i2_max_x = ps_frm_prms->i2_mv_range_x;
4174*c83a76b0SSuyog Pawar i2_max_y = ps_frm_prms->i2_mv_range_y;
4175*c83a76b0SSuyog Pawar }
4176*c83a76b0SSuyog Pawar else
4177*c83a76b0SSuyog Pawar {
4178*c83a76b0SSuyog Pawar i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->i4_wd) / ps_ctxt->i4_wd));
4179*c83a76b0SSuyog Pawar i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->i4_ht) / ps_ctxt->i4_ht));
4180*c83a76b0SSuyog Pawar }
4181*c83a76b0SSuyog Pawar ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4182*c83a76b0SSuyog Pawar ps_layer_ctxt->i2_max_mv_x = i2_max_x;
4183*c83a76b0SSuyog Pawar ps_layer_ctxt->i2_max_mv_y = i2_max_y;
4184*c83a76b0SSuyog Pawar
4185*c83a76b0SSuyog Pawar /*********************************************************************/
4186*c83a76b0SSuyog Pawar /* Every layer maintains a reference id lc to POC mapping. This is */
4187*c83a76b0SSuyog Pawar /* because the mapping is unique for every frm. Also, in next frm, */
4188*c83a76b0SSuyog Pawar /* we require colocated mvs which means scaling according to temporal*/
4189*c83a76b0SSuyog Pawar /*distance. Hence this mapping needs to be maintained in every */
4190*c83a76b0SSuyog Pawar /* layer ctxt */
4191*c83a76b0SSuyog Pawar /*********************************************************************/
4192*c83a76b0SSuyog Pawar memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
4193*c83a76b0SSuyog Pawar if(ps_ref_map->i4_num_ref)
4194*c83a76b0SSuyog Pawar {
4195*c83a76b0SSuyog Pawar memcpy(
4196*c83a76b0SSuyog Pawar ps_layer_ctxt->ai4_ref_id_to_poc_lc,
4197*c83a76b0SSuyog Pawar ps_ctxt->ai4_ref_idx_to_poc_lc,
4198*c83a76b0SSuyog Pawar ps_ref_map->i4_num_ref * sizeof(S32));
4199*c83a76b0SSuyog Pawar }
4200*c83a76b0SSuyog Pawar }
4201*c83a76b0SSuyog Pawar
4202*c83a76b0SSuyog Pawar return;
4203*c83a76b0SSuyog Pawar }
4204*c83a76b0SSuyog Pawar
4205*c83a76b0SSuyog Pawar /**
4206*c83a76b0SSuyog Pawar ********************************************************************************
4207*c83a76b0SSuyog Pawar * @fn hme_coarse_process_frm_init
4208*c83a76b0SSuyog Pawar *
4209*c83a76b0SSuyog Pawar * @brief HME frame level initialsation processing function
4210*c83a76b0SSuyog Pawar *
4211*c83a76b0SSuyog Pawar * @param[in] pv_me_ctxt : ME ctxt pointer
4212*c83a76b0SSuyog Pawar *
4213*c83a76b0SSuyog Pawar * @param[in] ps_ref_map : Reference map prms pointer
4214*c83a76b0SSuyog Pawar *
4215*c83a76b0SSuyog Pawar * @param[in] ps_frm_prms :Pointer to frame params
4216*c83a76b0SSuyog Pawar *
4217*c83a76b0SSuyog Pawar * @return Scale factor in Q8 format
4218*c83a76b0SSuyog Pawar ********************************************************************************
4219*c83a76b0SSuyog Pawar */
hme_coarse_process_frm_init(void * pv_me_ctxt,hme_ref_map_t * ps_ref_map,hme_frm_prms_t * ps_frm_prms)4220*c83a76b0SSuyog Pawar void hme_coarse_process_frm_init(
4221*c83a76b0SSuyog Pawar void *pv_me_ctxt, hme_ref_map_t *ps_ref_map, hme_frm_prms_t *ps_frm_prms)
4222*c83a76b0SSuyog Pawar {
4223*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4224*c83a76b0SSuyog Pawar S32 i, j, desc_idx;
4225*c83a76b0SSuyog Pawar S16 i2_max_x = 0, i2_max_y = 0;
4226*c83a76b0SSuyog Pawar
4227*c83a76b0SSuyog Pawar /* Set the Qp of current frm passed by caller. Required for intra cost */
4228*c83a76b0SSuyog Pawar ps_ctxt->frm_qstep = ps_frm_prms->qstep;
4229*c83a76b0SSuyog Pawar
4230*c83a76b0SSuyog Pawar /* Bidir enabled or not */
4231*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms = *ps_frm_prms;
4232*c83a76b0SSuyog Pawar
4233*c83a76b0SSuyog Pawar /*************************************************************************/
4234*c83a76b0SSuyog Pawar /* Set up the ref pic parameters across all layers. For this, we do the */
4235*c83a76b0SSuyog Pawar /* following: the application has given us a ref pic list, we go index */
4236*c83a76b0SSuyog Pawar /* by index and pick up the picture. A picture can be uniquely be mapped */
4237*c83a76b0SSuyog Pawar /* to a POC. So we search all layer descriptor array to find the POC */
4238*c83a76b0SSuyog Pawar /* Once found, we update all attributes in this descriptor. */
4239*c83a76b0SSuyog Pawar /* During this updation process we also create an index of descriptor id */
4240*c83a76b0SSuyog Pawar /* to ref id mapping. It is important to find the same POC in the layers */
4241*c83a76b0SSuyog Pawar /* descr strcture since it holds the pyramid inputs for non encode layers*/
4242*c83a76b0SSuyog Pawar /* Apart from this, e also update array containing the index of the descr*/
4243*c83a76b0SSuyog Pawar /* During processing for ease of access, each layer has a pointer to aray*/
4244*c83a76b0SSuyog Pawar /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
4245*c83a76b0SSuyog Pawar /* we update this too. */
4246*c83a76b0SSuyog Pawar /*************************************************************************/
4247*c83a76b0SSuyog Pawar ps_ctxt->num_ref_past = 0;
4248*c83a76b0SSuyog Pawar ps_ctxt->num_ref_future = 0;
4249*c83a76b0SSuyog Pawar for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4250*c83a76b0SSuyog Pawar {
4251*c83a76b0SSuyog Pawar S32 ref_id_lc, idx;
4252*c83a76b0SSuyog Pawar hme_ref_desc_t *ps_ref_desc;
4253*c83a76b0SSuyog Pawar
4254*c83a76b0SSuyog Pawar ps_ref_desc = &ps_ref_map->as_ref_desc[i];
4255*c83a76b0SSuyog Pawar ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4256*c83a76b0SSuyog Pawar /* Obtain the id of descriptor that contains this POC */
4257*c83a76b0SSuyog Pawar idx = hme_coarse_find_descr_idx(ps_ctxt, ps_ref_desc->i4_poc);
4258*c83a76b0SSuyog Pawar
4259*c83a76b0SSuyog Pawar /* Update all layers in this descr with the reference attributes */
4260*c83a76b0SSuyog Pawar hme_update_layer_desc(
4261*c83a76b0SSuyog Pawar &ps_ctxt->as_ref_descr[idx],
4262*c83a76b0SSuyog Pawar ps_ref_desc,
4263*c83a76b0SSuyog Pawar 1,
4264*c83a76b0SSuyog Pawar ps_ctxt->num_layers - 1,
4265*c83a76b0SSuyog Pawar ps_ctxt->ps_curr_descr);
4266*c83a76b0SSuyog Pawar
4267*c83a76b0SSuyog Pawar /* Update the array having ref id lc to descr id mapping */
4268*c83a76b0SSuyog Pawar ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
4269*c83a76b0SSuyog Pawar
4270*c83a76b0SSuyog Pawar /* From ref id lc we need to work out the POC, So update this array */
4271*c83a76b0SSuyog Pawar ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
4272*c83a76b0SSuyog Pawar
4273*c83a76b0SSuyog Pawar /* From ref id lc we need to work out the display num, So update this array */
4274*c83a76b0SSuyog Pawar ps_ctxt->ai4_ref_idx_to_disp_num[ref_id_lc] = ps_ref_desc->i4_display_num;
4275*c83a76b0SSuyog Pawar
4276*c83a76b0SSuyog Pawar /* When computing costs in L0 and L1 directions, we need the */
4277*c83a76b0SSuyog Pawar /* respective ref id L0 and L1, so update this mapping */
4278*c83a76b0SSuyog Pawar ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
4279*c83a76b0SSuyog Pawar ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
4280*c83a76b0SSuyog Pawar if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
4281*c83a76b0SSuyog Pawar {
4282*c83a76b0SSuyog Pawar ps_ctxt->au1_is_past[ref_id_lc] = 1;
4283*c83a76b0SSuyog Pawar ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
4284*c83a76b0SSuyog Pawar ps_ctxt->num_ref_past++;
4285*c83a76b0SSuyog Pawar }
4286*c83a76b0SSuyog Pawar else
4287*c83a76b0SSuyog Pawar {
4288*c83a76b0SSuyog Pawar ps_ctxt->au1_is_past[ref_id_lc] = 0;
4289*c83a76b0SSuyog Pawar ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
4290*c83a76b0SSuyog Pawar ps_ctxt->num_ref_future++;
4291*c83a76b0SSuyog Pawar }
4292*c83a76b0SSuyog Pawar if(1 == ps_ctxt->i4_wt_pred_enable_flag)
4293*c83a76b0SSuyog Pawar {
4294*c83a76b0SSuyog Pawar /* copy the weight and offsets from current ref desc */
4295*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
4296*c83a76b0SSuyog Pawar
4297*c83a76b0SSuyog Pawar /* inv weight is stored in Q15 format */
4298*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4299*c83a76b0SSuyog Pawar ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
4300*c83a76b0SSuyog Pawar
4301*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
4302*c83a76b0SSuyog Pawar }
4303*c83a76b0SSuyog Pawar else
4304*c83a76b0SSuyog Pawar {
4305*c83a76b0SSuyog Pawar /* store default wt and offset*/
4306*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
4307*c83a76b0SSuyog Pawar
4308*c83a76b0SSuyog Pawar /* inv weight is stored in Q15 format */
4309*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4310*c83a76b0SSuyog Pawar ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
4311*c83a76b0SSuyog Pawar
4312*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
4313*c83a76b0SSuyog Pawar }
4314*c83a76b0SSuyog Pawar }
4315*c83a76b0SSuyog Pawar
4316*c83a76b0SSuyog Pawar ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
4317*c83a76b0SSuyog Pawar ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
4318*c83a76b0SSuyog Pawar
4319*c83a76b0SSuyog Pawar /*************************************************************************/
4320*c83a76b0SSuyog Pawar /* Preparation of the TLU for bits for reference indices. */
4321*c83a76b0SSuyog Pawar /* Special case is that of numref = 2. (TEV) */
4322*c83a76b0SSuyog Pawar /* Other cases uses UEV */
4323*c83a76b0SSuyog Pawar /*************************************************************************/
4324*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_REF; i++)
4325*c83a76b0SSuyog Pawar {
4326*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
4327*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
4328*c83a76b0SSuyog Pawar }
4329*c83a76b0SSuyog Pawar
4330*c83a76b0SSuyog Pawar if(ps_ref_map->i4_num_ref == 2)
4331*c83a76b0SSuyog Pawar {
4332*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
4333*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
4334*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
4335*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
4336*c83a76b0SSuyog Pawar }
4337*c83a76b0SSuyog Pawar else if(ps_ref_map->i4_num_ref > 2)
4338*c83a76b0SSuyog Pawar {
4339*c83a76b0SSuyog Pawar for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4340*c83a76b0SSuyog Pawar {
4341*c83a76b0SSuyog Pawar S32 l0, l1;
4342*c83a76b0SSuyog Pawar l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
4343*c83a76b0SSuyog Pawar l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
4344*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
4345*c83a76b0SSuyog Pawar ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
4346*c83a76b0SSuyog Pawar }
4347*c83a76b0SSuyog Pawar }
4348*c83a76b0SSuyog Pawar
4349*c83a76b0SSuyog Pawar /*************************************************************************/
4350*c83a76b0SSuyog Pawar /* Preparation of the scaling factors for reference indices. The scale */
4351*c83a76b0SSuyog Pawar /* factor depends on distance of the two ref indices from current input */
4352*c83a76b0SSuyog Pawar /* in terms of poc delta. */
4353*c83a76b0SSuyog Pawar /*************************************************************************/
4354*c83a76b0SSuyog Pawar for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4355*c83a76b0SSuyog Pawar {
4356*c83a76b0SSuyog Pawar for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4357*c83a76b0SSuyog Pawar {
4358*c83a76b0SSuyog Pawar S16 i2_scf_q8;
4359*c83a76b0SSuyog Pawar S32 poc_from, poc_to;
4360*c83a76b0SSuyog Pawar
4361*c83a76b0SSuyog Pawar poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
4362*c83a76b0SSuyog Pawar poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
4363*c83a76b0SSuyog Pawar
4364*c83a76b0SSuyog Pawar i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
4365*c83a76b0SSuyog Pawar ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
4366*c83a76b0SSuyog Pawar }
4367*c83a76b0SSuyog Pawar }
4368*c83a76b0SSuyog Pawar
4369*c83a76b0SSuyog Pawar /*************************************************************************/
4370*c83a76b0SSuyog Pawar /* We store simplified look ups for inp y plane for */
4371*c83a76b0SSuyog Pawar /* every layer and for every ref id in the layer. */
4372*c83a76b0SSuyog Pawar /*************************************************************************/
4373*c83a76b0SSuyog Pawar for(i = 1; i < ps_ctxt->num_layers; i++)
4374*c83a76b0SSuyog Pawar {
4375*c83a76b0SSuyog Pawar U08 **ppu1_inp;
4376*c83a76b0SSuyog Pawar
4377*c83a76b0SSuyog Pawar ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
4378*c83a76b0SSuyog Pawar for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4379*c83a76b0SSuyog Pawar {
4380*c83a76b0SSuyog Pawar hme_ref_desc_t *ps_ref_desc;
4381*c83a76b0SSuyog Pawar hme_ref_buf_info_t *ps_buf_info;
4382*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer;
4383*c83a76b0SSuyog Pawar S32 ref_id_lc;
4384*c83a76b0SSuyog Pawar
4385*c83a76b0SSuyog Pawar ps_ref_desc = &ps_ref_map->as_ref_desc[j];
4386*c83a76b0SSuyog Pawar ps_buf_info = &ps_ref_desc->as_ref_info[i];
4387*c83a76b0SSuyog Pawar ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4388*c83a76b0SSuyog Pawar
4389*c83a76b0SSuyog Pawar desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
4390*c83a76b0SSuyog Pawar ps_layer = ps_ctxt->as_ref_descr[desc_idx].aps_layers[i];
4391*c83a76b0SSuyog Pawar
4392*c83a76b0SSuyog Pawar ppu1_inp[j] = ps_layer->pu1_inp;
4393*c83a76b0SSuyog Pawar }
4394*c83a76b0SSuyog Pawar }
4395*c83a76b0SSuyog Pawar /*************************************************************************/
4396*c83a76b0SSuyog Pawar /* The mv range for each layer is computed. For dyadic layers it will */
4397*c83a76b0SSuyog Pawar /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
4398*c83a76b0SSuyog Pawar /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
4399*c83a76b0SSuyog Pawar /*************************************************************************/
4400*c83a76b0SSuyog Pawar
4401*c83a76b0SSuyog Pawar /* set to layer 0 search range params */
4402*c83a76b0SSuyog Pawar i2_max_x = ps_frm_prms->i2_mv_range_x;
4403*c83a76b0SSuyog Pawar i2_max_y = ps_frm_prms->i2_mv_range_y;
4404*c83a76b0SSuyog Pawar
4405*c83a76b0SSuyog Pawar for(i = 1; i < ps_ctxt->num_layers; i++)
4406*c83a76b0SSuyog Pawar {
4407*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer_ctxt;
4408*c83a76b0SSuyog Pawar
4409*c83a76b0SSuyog Pawar {
4410*c83a76b0SSuyog Pawar i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->a_wd[i]) / ps_ctxt->a_wd[i - 1]));
4411*c83a76b0SSuyog Pawar i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->a_ht[i]) / ps_ctxt->a_ht[i - 1]));
4412*c83a76b0SSuyog Pawar }
4413*c83a76b0SSuyog Pawar ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4414*c83a76b0SSuyog Pawar ps_layer_ctxt->i2_max_mv_x = i2_max_x;
4415*c83a76b0SSuyog Pawar ps_layer_ctxt->i2_max_mv_y = i2_max_y;
4416*c83a76b0SSuyog Pawar
4417*c83a76b0SSuyog Pawar /*********************************************************************/
4418*c83a76b0SSuyog Pawar /* Every layer maintains a reference id lc to POC mapping. This is */
4419*c83a76b0SSuyog Pawar /* because the mapping is unique for every frm. Also, in next frm, */
4420*c83a76b0SSuyog Pawar /* we require colocated mvs which means scaling according to temporal*/
4421*c83a76b0SSuyog Pawar /*distance. Hence this mapping needs to be maintained in every */
4422*c83a76b0SSuyog Pawar /* layer ctxt */
4423*c83a76b0SSuyog Pawar /*********************************************************************/
4424*c83a76b0SSuyog Pawar memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
4425*c83a76b0SSuyog Pawar if(ps_ref_map->i4_num_ref)
4426*c83a76b0SSuyog Pawar {
4427*c83a76b0SSuyog Pawar memcpy(
4428*c83a76b0SSuyog Pawar ps_layer_ctxt->ai4_ref_id_to_poc_lc,
4429*c83a76b0SSuyog Pawar ps_ctxt->ai4_ref_idx_to_poc_lc,
4430*c83a76b0SSuyog Pawar ps_ref_map->i4_num_ref * sizeof(S32));
4431*c83a76b0SSuyog Pawar memcpy(
4432*c83a76b0SSuyog Pawar ps_layer_ctxt->ai4_ref_id_to_disp_num,
4433*c83a76b0SSuyog Pawar ps_ctxt->ai4_ref_idx_to_disp_num,
4434*c83a76b0SSuyog Pawar ps_ref_map->i4_num_ref * sizeof(S32));
4435*c83a76b0SSuyog Pawar }
4436*c83a76b0SSuyog Pawar }
4437*c83a76b0SSuyog Pawar
4438*c83a76b0SSuyog Pawar return;
4439*c83a76b0SSuyog Pawar }
4440*c83a76b0SSuyog Pawar
4441*c83a76b0SSuyog Pawar /**
4442*c83a76b0SSuyog Pawar ********************************************************************************
4443*c83a76b0SSuyog Pawar * @fn hme_process_frm
4444*c83a76b0SSuyog Pawar *
4445*c83a76b0SSuyog Pawar * @brief HME frame level processing function
4446*c83a76b0SSuyog Pawar *
4447*c83a76b0SSuyog Pawar * @param[in] pv_me_ctxt : ME ctxt pointer
4448*c83a76b0SSuyog Pawar *
4449*c83a76b0SSuyog Pawar * @param[in] ps_ref_map : Reference map prms pointer
4450*c83a76b0SSuyog Pawar *
4451*c83a76b0SSuyog Pawar * @param[in] ppd_intra_costs : pointer to array of intra cost cost buffers for each layer
4452*c83a76b0SSuyog Pawar *
4453*c83a76b0SSuyog Pawar * @param[in] ps_frm_prms : pointer to Frame level parameters of HME
4454*c83a76b0SSuyog Pawar *
4455*c83a76b0SSuyog Pawar * @param[in] pf_ext_update_fxn : function pointer to update CTb results
4456*c83a76b0SSuyog Pawar *
4457*c83a76b0SSuyog Pawar * @param[in] pf_get_intra_cu_and_cost :function pointer to get intra cu size and cost
4458*c83a76b0SSuyog Pawar *
4459*c83a76b0SSuyog Pawar * @param[in] ps_multi_thrd_ctxt :function pointer to get intra cu size and cost
4460*c83a76b0SSuyog Pawar *
4461*c83a76b0SSuyog Pawar * @return Scale factor in Q8 format
4462*c83a76b0SSuyog Pawar ********************************************************************************
4463*c83a76b0SSuyog Pawar */
4464*c83a76b0SSuyog Pawar
hme_process_frm(void * pv_me_ctxt,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input,hme_ref_map_t * ps_ref_map,double ** ppd_intra_costs,hme_frm_prms_t * ps_frm_prms,PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,void * pv_coarse_layer,void * pv_multi_thrd_ctxt,S32 i4_frame_parallelism_level,S32 thrd_id,S32 i4_me_frm_id)4465*c83a76b0SSuyog Pawar void hme_process_frm(
4466*c83a76b0SSuyog Pawar void *pv_me_ctxt,
4467*c83a76b0SSuyog Pawar pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
4468*c83a76b0SSuyog Pawar hme_ref_map_t *ps_ref_map,
4469*c83a76b0SSuyog Pawar double **ppd_intra_costs,
4470*c83a76b0SSuyog Pawar hme_frm_prms_t *ps_frm_prms,
4471*c83a76b0SSuyog Pawar PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
4472*c83a76b0SSuyog Pawar void *pv_coarse_layer,
4473*c83a76b0SSuyog Pawar void *pv_multi_thrd_ctxt,
4474*c83a76b0SSuyog Pawar S32 i4_frame_parallelism_level,
4475*c83a76b0SSuyog Pawar S32 thrd_id,
4476*c83a76b0SSuyog Pawar S32 i4_me_frm_id)
4477*c83a76b0SSuyog Pawar {
4478*c83a76b0SSuyog Pawar refine_prms_t s_refine_prms;
4479*c83a76b0SSuyog Pawar me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
4480*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
4481*c83a76b0SSuyog Pawar
4482*c83a76b0SSuyog Pawar S32 lyr_job_type;
4483*c83a76b0SSuyog Pawar multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
4484*c83a76b0SSuyog Pawar layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
4485*c83a76b0SSuyog Pawar
4486*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
4487*c83a76b0SSuyog Pawar
4488*c83a76b0SSuyog Pawar lyr_job_type = ME_JOB_ENC_LYR;
4489*c83a76b0SSuyog Pawar /*************************************************************************/
4490*c83a76b0SSuyog Pawar /* Final L0 layer ME call */
4491*c83a76b0SSuyog Pawar /*************************************************************************/
4492*c83a76b0SSuyog Pawar {
4493*c83a76b0SSuyog Pawar /* Set the CTB attributes dependin on corner/rt edge/bot edge/center*/
4494*c83a76b0SSuyog Pawar hme_set_ctb_attrs(ps_ctxt->as_ctb_bound_attrs, ps_ctxt->i4_wd, ps_ctxt->i4_ht);
4495*c83a76b0SSuyog Pawar
4496*c83a76b0SSuyog Pawar hme_set_refine_prms(
4497*c83a76b0SSuyog Pawar &s_refine_prms,
4498*c83a76b0SSuyog Pawar ps_ctxt->u1_encode[0],
4499*c83a76b0SSuyog Pawar ps_ref_map->i4_num_ref,
4500*c83a76b0SSuyog Pawar 0,
4501*c83a76b0SSuyog Pawar ps_ctxt->num_layers,
4502*c83a76b0SSuyog Pawar ps_ctxt->num_layers_explicit_search,
4503*c83a76b0SSuyog Pawar ps_thrd_ctxt->s_init_prms.use_4x4,
4504*c83a76b0SSuyog Pawar ps_frm_prms,
4505*c83a76b0SSuyog Pawar ppd_intra_costs,
4506*c83a76b0SSuyog Pawar &ps_thrd_ctxt->s_init_prms.s_me_coding_tools);
4507*c83a76b0SSuyog Pawar
4508*c83a76b0SSuyog Pawar hme_refine(
4509*c83a76b0SSuyog Pawar ps_thrd_ctxt,
4510*c83a76b0SSuyog Pawar &s_refine_prms,
4511*c83a76b0SSuyog Pawar pf_ext_update_fxn,
4512*c83a76b0SSuyog Pawar ps_coarse_layer,
4513*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt,
4514*c83a76b0SSuyog Pawar lyr_job_type,
4515*c83a76b0SSuyog Pawar thrd_id,
4516*c83a76b0SSuyog Pawar i4_me_frm_id,
4517*c83a76b0SSuyog Pawar ps_l0_ipe_input);
4518*c83a76b0SSuyog Pawar
4519*c83a76b0SSuyog Pawar /* Set current ref pic status which will used as perv frame ref pic */
4520*c83a76b0SSuyog Pawar if(i4_frame_parallelism_level)
4521*c83a76b0SSuyog Pawar {
4522*c83a76b0SSuyog Pawar ps_ctxt->i4_is_prev_frame_reference = 0;
4523*c83a76b0SSuyog Pawar }
4524*c83a76b0SSuyog Pawar else
4525*c83a76b0SSuyog Pawar {
4526*c83a76b0SSuyog Pawar ps_ctxt->i4_is_prev_frame_reference =
4527*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->aps_cur_inp_me_prms[i4_me_frm_id]
4528*c83a76b0SSuyog Pawar ->ps_curr_inp->s_lap_out.i4_is_ref_pic;
4529*c83a76b0SSuyog Pawar }
4530*c83a76b0SSuyog Pawar }
4531*c83a76b0SSuyog Pawar
4532*c83a76b0SSuyog Pawar return;
4533*c83a76b0SSuyog Pawar }
4534*c83a76b0SSuyog Pawar
4535*c83a76b0SSuyog Pawar /**
4536*c83a76b0SSuyog Pawar ********************************************************************************
4537*c83a76b0SSuyog Pawar * @fn hme_coarse_process_frm
4538*c83a76b0SSuyog Pawar *
4539*c83a76b0SSuyog Pawar * @brief HME frame level processing function (coarse + refine)
4540*c83a76b0SSuyog Pawar *
4541*c83a76b0SSuyog Pawar * @param[in] pv_me_ctxt : ME ctxt pointer
4542*c83a76b0SSuyog Pawar *
4543*c83a76b0SSuyog Pawar * @param[in] ps_ref_map : Reference map prms pointer
4544*c83a76b0SSuyog Pawar *
4545*c83a76b0SSuyog Pawar * @param[in] ps_frm_prms : pointer to Frame level parameters of HME
4546*c83a76b0SSuyog Pawar *
4547*c83a76b0SSuyog Pawar * @param[in] ps_multi_thrd_ctxt :Multi thread related ctxt
4548*c83a76b0SSuyog Pawar *
4549*c83a76b0SSuyog Pawar * @return Scale factor in Q8 format
4550*c83a76b0SSuyog Pawar ********************************************************************************
4551*c83a76b0SSuyog Pawar */
4552*c83a76b0SSuyog Pawar
hme_coarse_process_frm(void * pv_me_ctxt,hme_ref_map_t * ps_ref_map,hme_frm_prms_t * ps_frm_prms,void * pv_multi_thrd_ctxt,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)4553*c83a76b0SSuyog Pawar void hme_coarse_process_frm(
4554*c83a76b0SSuyog Pawar void *pv_me_ctxt,
4555*c83a76b0SSuyog Pawar hme_ref_map_t *ps_ref_map,
4556*c83a76b0SSuyog Pawar hme_frm_prms_t *ps_frm_prms,
4557*c83a76b0SSuyog Pawar void *pv_multi_thrd_ctxt,
4558*c83a76b0SSuyog Pawar WORD32 i4_ping_pong,
4559*c83a76b0SSuyog Pawar void **ppv_dep_mngr_hme_sync)
4560*c83a76b0SSuyog Pawar {
4561*c83a76b0SSuyog Pawar S16 i2_max;
4562*c83a76b0SSuyog Pawar S32 layer_id;
4563*c83a76b0SSuyog Pawar coarse_prms_t s_coarse_prms;
4564*c83a76b0SSuyog Pawar refine_prms_t s_refine_prms;
4565*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4566*c83a76b0SSuyog Pawar S32 lyr_job_type;
4567*c83a76b0SSuyog Pawar multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
4568*c83a76b0SSuyog Pawar
4569*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
4570*c83a76b0SSuyog Pawar /*************************************************************************/
4571*c83a76b0SSuyog Pawar /* Fire processing of all layers, starting with coarsest layer. */
4572*c83a76b0SSuyog Pawar /*************************************************************************/
4573*c83a76b0SSuyog Pawar layer_id = ps_ctxt->num_layers - 1;
4574*c83a76b0SSuyog Pawar i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
4575*c83a76b0SSuyog Pawar i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
4576*c83a76b0SSuyog Pawar s_coarse_prms.i4_layer_id = layer_id;
4577*c83a76b0SSuyog Pawar {
4578*c83a76b0SSuyog Pawar S32 log_start_step;
4579*c83a76b0SSuyog Pawar /* Based on Preset, set the starting step size for Refinement */
4580*c83a76b0SSuyog Pawar if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets)
4581*c83a76b0SSuyog Pawar {
4582*c83a76b0SSuyog Pawar log_start_step = 0;
4583*c83a76b0SSuyog Pawar }
4584*c83a76b0SSuyog Pawar else
4585*c83a76b0SSuyog Pawar {
4586*c83a76b0SSuyog Pawar log_start_step = 1;
4587*c83a76b0SSuyog Pawar }
4588*c83a76b0SSuyog Pawar
4589*c83a76b0SSuyog Pawar s_coarse_prms.i4_max_iters = i2_max >> log_start_step;
4590*c83a76b0SSuyog Pawar s_coarse_prms.i4_start_step = 1 << log_start_step;
4591*c83a76b0SSuyog Pawar }
4592*c83a76b0SSuyog Pawar s_coarse_prms.i4_num_ref = ps_ref_map->i4_num_ref;
4593*c83a76b0SSuyog Pawar s_coarse_prms.do_full_search = 1;
4594*c83a76b0SSuyog Pawar if(s_coarse_prms.do_full_search)
4595*c83a76b0SSuyog Pawar {
4596*c83a76b0SSuyog Pawar /* Set to 2 or 4 */
4597*c83a76b0SSuyog Pawar if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
4598*c83a76b0SSuyog Pawar s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
4599*c83a76b0SSuyog Pawar else if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets >= ME_MEDIUM_SPEED)
4600*c83a76b0SSuyog Pawar s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
4601*c83a76b0SSuyog Pawar }
4602*c83a76b0SSuyog Pawar s_coarse_prms.num_results = ps_ctxt->max_num_results_coarse;
4603*c83a76b0SSuyog Pawar
4604*c83a76b0SSuyog Pawar /* Coarse layer uses only 1 lambda, i.e. the one for open loop ME */
4605*c83a76b0SSuyog Pawar s_coarse_prms.lambda = ps_frm_prms->i4_ol_sad_lambda_qf;
4606*c83a76b0SSuyog Pawar s_coarse_prms.lambda_q_shift = ps_frm_prms->lambda_q_shift;
4607*c83a76b0SSuyog Pawar s_coarse_prms.lambda = ((float)s_coarse_prms.lambda * (100.0 - ME_LAMBDA_DISCOUNT) / 100.0);
4608*c83a76b0SSuyog Pawar
4609*c83a76b0SSuyog Pawar hme_coarsest(ps_ctxt, &s_coarse_prms, ps_multi_thrd_ctxt, i4_ping_pong, ppv_dep_mngr_hme_sync);
4610*c83a76b0SSuyog Pawar
4611*c83a76b0SSuyog Pawar /* all refinement layer processed in the loop below */
4612*c83a76b0SSuyog Pawar layer_id--;
4613*c83a76b0SSuyog Pawar lyr_job_type = ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type + 1;
4614*c83a76b0SSuyog Pawar
4615*c83a76b0SSuyog Pawar /*************************************************************************/
4616*c83a76b0SSuyog Pawar /* This loop will run for all refine layers (non- encode layers) */
4617*c83a76b0SSuyog Pawar /*************************************************************************/
4618*c83a76b0SSuyog Pawar while(layer_id > 0)
4619*c83a76b0SSuyog Pawar {
4620*c83a76b0SSuyog Pawar hme_set_refine_prms(
4621*c83a76b0SSuyog Pawar &s_refine_prms,
4622*c83a76b0SSuyog Pawar ps_ctxt->u1_encode[layer_id],
4623*c83a76b0SSuyog Pawar ps_ref_map->i4_num_ref,
4624*c83a76b0SSuyog Pawar layer_id,
4625*c83a76b0SSuyog Pawar ps_ctxt->num_layers,
4626*c83a76b0SSuyog Pawar ps_ctxt->num_layers_explicit_search,
4627*c83a76b0SSuyog Pawar ps_ctxt->s_init_prms.use_4x4,
4628*c83a76b0SSuyog Pawar ps_frm_prms,
4629*c83a76b0SSuyog Pawar NULL,
4630*c83a76b0SSuyog Pawar &ps_ctxt->s_init_prms.s_me_coding_tools);
4631*c83a76b0SSuyog Pawar
4632*c83a76b0SSuyog Pawar hme_refine_no_encode(
4633*c83a76b0SSuyog Pawar ps_ctxt,
4634*c83a76b0SSuyog Pawar &s_refine_prms,
4635*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt,
4636*c83a76b0SSuyog Pawar lyr_job_type,
4637*c83a76b0SSuyog Pawar i4_ping_pong,
4638*c83a76b0SSuyog Pawar ppv_dep_mngr_hme_sync);
4639*c83a76b0SSuyog Pawar
4640*c83a76b0SSuyog Pawar layer_id--;
4641*c83a76b0SSuyog Pawar lyr_job_type++;
4642*c83a76b0SSuyog Pawar }
4643*c83a76b0SSuyog Pawar }
4644*c83a76b0SSuyog Pawar /**
4645*c83a76b0SSuyog Pawar ********************************************************************************
4646*c83a76b0SSuyog Pawar * @fn hme_fill_neighbour_mvs
4647*c83a76b0SSuyog Pawar *
4648*c83a76b0SSuyog Pawar * @brief HME neighbour MV population function
4649*c83a76b0SSuyog Pawar *
4650*c83a76b0SSuyog Pawar * @param[in] pps_mv_grid : MV grid array pointer
4651*c83a76b0SSuyog Pawar *
4652*c83a76b0SSuyog Pawar * @param[in] i4_ctb_x : CTB pos X
4653*c83a76b0SSuyog Pawar
4654*c83a76b0SSuyog Pawar * @param[in] i4_ctb_y : CTB pos Y
4655*c83a76b0SSuyog Pawar *
4656*c83a76b0SSuyog Pawar * @remarks : Needs to be populated for proper implementation of cost fxn
4657*c83a76b0SSuyog Pawar *
4658*c83a76b0SSuyog Pawar * @return Scale factor in Q8 format
4659*c83a76b0SSuyog Pawar ********************************************************************************
4660*c83a76b0SSuyog Pawar */
hme_fill_neighbour_mvs(mv_grid_t ** pps_mv_grid,S32 i4_ctb_x,S32 i4_ctb_y,S32 i4_num_ref,void * pv_ctxt)4661*c83a76b0SSuyog Pawar void hme_fill_neighbour_mvs(
4662*c83a76b0SSuyog Pawar mv_grid_t **pps_mv_grid, S32 i4_ctb_x, S32 i4_ctb_y, S32 i4_num_ref, void *pv_ctxt)
4663*c83a76b0SSuyog Pawar {
4664*c83a76b0SSuyog Pawar /* TODO : Needs to be populated for proper implementation of cost fxn */
4665*c83a76b0SSuyog Pawar ARG_NOT_USED(pps_mv_grid);
4666*c83a76b0SSuyog Pawar ARG_NOT_USED(i4_ctb_x);
4667*c83a76b0SSuyog Pawar ARG_NOT_USED(i4_ctb_y);
4668*c83a76b0SSuyog Pawar ARG_NOT_USED(i4_num_ref);
4669*c83a76b0SSuyog Pawar ARG_NOT_USED(pv_ctxt);
4670*c83a76b0SSuyog Pawar }
4671*c83a76b0SSuyog Pawar
4672*c83a76b0SSuyog Pawar /**
4673*c83a76b0SSuyog Pawar *******************************************************************************
4674*c83a76b0SSuyog Pawar * @fn void hme_get_active_pocs_list(void *pv_me_ctxt,
4675*c83a76b0SSuyog Pawar * S32 *p_pocs_buffered_in_me)
4676*c83a76b0SSuyog Pawar *
4677*c83a76b0SSuyog Pawar * @brief Returns the list of active POCs in ME ctxt
4678*c83a76b0SSuyog Pawar *
4679*c83a76b0SSuyog Pawar * @param[in] pv_me_ctxt : handle to ME context
4680*c83a76b0SSuyog Pawar *
4681*c83a76b0SSuyog Pawar * @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
4682*c83a76b0SSuyog Pawar * populates with pocs active
4683*c83a76b0SSuyog Pawar *
4684*c83a76b0SSuyog Pawar * @return void
4685*c83a76b0SSuyog Pawar *******************************************************************************
4686*c83a76b0SSuyog Pawar */
hme_get_active_pocs_list(void * pv_me_ctxt,S32 i4_num_me_frm_pllel)4687*c83a76b0SSuyog Pawar WORD32 hme_get_active_pocs_list(void *pv_me_ctxt, S32 i4_num_me_frm_pllel)
4688*c83a76b0SSuyog Pawar {
4689*c83a76b0SSuyog Pawar me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
4690*c83a76b0SSuyog Pawar S32 i, count = 0;
4691*c83a76b0SSuyog Pawar
4692*c83a76b0SSuyog Pawar for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
4693*c83a76b0SSuyog Pawar {
4694*c83a76b0SSuyog Pawar S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
4695*c83a76b0SSuyog Pawar S32 i4_is_free = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free;
4696*c83a76b0SSuyog Pawar
4697*c83a76b0SSuyog Pawar if((i4_is_free == 0) && (poc != INVALID_POC))
4698*c83a76b0SSuyog Pawar {
4699*c83a76b0SSuyog Pawar count++;
4700*c83a76b0SSuyog Pawar }
4701*c83a76b0SSuyog Pawar }
4702*c83a76b0SSuyog Pawar if(count == (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1)
4703*c83a76b0SSuyog Pawar {
4704*c83a76b0SSuyog Pawar return 1;
4705*c83a76b0SSuyog Pawar }
4706*c83a76b0SSuyog Pawar else
4707*c83a76b0SSuyog Pawar {
4708*c83a76b0SSuyog Pawar return 0;
4709*c83a76b0SSuyog Pawar }
4710*c83a76b0SSuyog Pawar }
4711*c83a76b0SSuyog Pawar
4712*c83a76b0SSuyog Pawar /**
4713*c83a76b0SSuyog Pawar *******************************************************************************
4714*c83a76b0SSuyog Pawar * @fn void hme_coarse_get_active_pocs_list(void *pv_me_ctxt,
4715*c83a76b0SSuyog Pawar * S32 *p_pocs_buffered_in_me)
4716*c83a76b0SSuyog Pawar *
4717*c83a76b0SSuyog Pawar * @brief Returns the list of active POCs in ME ctxt
4718*c83a76b0SSuyog Pawar *
4719*c83a76b0SSuyog Pawar * @param[in] pv_me_ctxt : handle to ME context
4720*c83a76b0SSuyog Pawar *
4721*c83a76b0SSuyog Pawar * @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
4722*c83a76b0SSuyog Pawar * populates with pocs active
4723*c83a76b0SSuyog Pawar *
4724*c83a76b0SSuyog Pawar * @return void
4725*c83a76b0SSuyog Pawar *******************************************************************************
4726*c83a76b0SSuyog Pawar */
hme_coarse_get_active_pocs_list(void * pv_me_ctxt,S32 * p_pocs_buffered_in_me)4727*c83a76b0SSuyog Pawar void hme_coarse_get_active_pocs_list(void *pv_me_ctxt, S32 *p_pocs_buffered_in_me)
4728*c83a76b0SSuyog Pawar {
4729*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4730*c83a76b0SSuyog Pawar S32 i, count = 0;
4731*c83a76b0SSuyog Pawar
4732*c83a76b0SSuyog Pawar for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
4733*c83a76b0SSuyog Pawar {
4734*c83a76b0SSuyog Pawar S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc;
4735*c83a76b0SSuyog Pawar
4736*c83a76b0SSuyog Pawar if(poc != -1)
4737*c83a76b0SSuyog Pawar {
4738*c83a76b0SSuyog Pawar p_pocs_buffered_in_me[count] = poc;
4739*c83a76b0SSuyog Pawar count++;
4740*c83a76b0SSuyog Pawar }
4741*c83a76b0SSuyog Pawar }
4742*c83a76b0SSuyog Pawar p_pocs_buffered_in_me[count] = -1;
4743*c83a76b0SSuyog Pawar }
4744*c83a76b0SSuyog Pawar
hme_get_blk_size(S32 use_4x4,S32 layer_id,S32 n_layers,S32 encode)4745*c83a76b0SSuyog Pawar S32 hme_get_blk_size(S32 use_4x4, S32 layer_id, S32 n_layers, S32 encode)
4746*c83a76b0SSuyog Pawar {
4747*c83a76b0SSuyog Pawar /* coarsest layer uses 4x4 blks, lowermost layer/encode layer uses 16x16 */
4748*c83a76b0SSuyog Pawar if(layer_id == n_layers - 1)
4749*c83a76b0SSuyog Pawar return 4;
4750*c83a76b0SSuyog Pawar else if((layer_id == 0) || (encode))
4751*c83a76b0SSuyog Pawar return 16;
4752*c83a76b0SSuyog Pawar
4753*c83a76b0SSuyog Pawar /* Intermediate non encode layers use 8 */
4754*c83a76b0SSuyog Pawar return 8;
4755*c83a76b0SSuyog Pawar }
4756