xref: /aosp_15_r20/external/libhevc/encoder/hme_refine.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 ******************************************************************************
22 * @file hme_refine.c
23 *
24 * @brief
25 *    Contains the implementation of the refinement layer searches and related
26 *    functionality like CU merge.
27 *
28 * @author
29 *    Ittiam
30 *
31 *
32 * List of Functions
33 *
34 *
35 ******************************************************************************
36 */
37 
38 /*****************************************************************************/
39 /* File Includes                                                             */
40 /*****************************************************************************/
41 /* System include files */
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <assert.h>
46 #include <stdarg.h>
47 #include <math.h>
48 #include <limits.h>
49 
50 /* User include files */
51 #include "ihevc_typedefs.h"
52 #include "itt_video_api.h"
53 #include "ihevce_api.h"
54 
55 #include "rc_cntrl_param.h"
56 #include "rc_frame_info_collector.h"
57 #include "rc_look_ahead_params.h"
58 
59 #include "ihevc_defs.h"
60 #include "ihevc_structs.h"
61 #include "ihevc_platform_macros.h"
62 #include "ihevc_deblk.h"
63 #include "ihevc_itrans_recon.h"
64 #include "ihevc_chroma_itrans_recon.h"
65 #include "ihevc_chroma_intra_pred.h"
66 #include "ihevc_intra_pred.h"
67 #include "ihevc_inter_pred.h"
68 #include "ihevc_mem_fns.h"
69 #include "ihevc_padding.h"
70 #include "ihevc_weighted_pred.h"
71 #include "ihevc_sao.h"
72 #include "ihevc_resi_trans.h"
73 #include "ihevc_quant_iquant_ssd.h"
74 #include "ihevc_cabac_tables.h"
75 
76 #include "ihevce_defs.h"
77 #include "ihevce_lap_enc_structs.h"
78 #include "ihevce_multi_thrd_structs.h"
79 #include "ihevce_multi_thrd_funcs.h"
80 #include "ihevce_me_common_defs.h"
81 #include "ihevce_had_satd.h"
82 #include "ihevce_error_codes.h"
83 #include "ihevce_bitstream.h"
84 #include "ihevce_cabac.h"
85 #include "ihevce_rdoq_macros.h"
86 #include "ihevce_function_selector.h"
87 #include "ihevce_enc_structs.h"
88 #include "ihevce_entropy_structs.h"
89 #include "ihevce_cmn_utils_instr_set_router.h"
90 #include "ihevce_enc_loop_structs.h"
91 #include "ihevce_bs_compute_ctb.h"
92 #include "ihevce_global_tables.h"
93 #include "ihevce_dep_mngr_interface.h"
94 #include "hme_datatype.h"
95 #include "hme_interface.h"
96 #include "hme_common_defs.h"
97 #include "hme_defs.h"
98 #include "ihevce_me_instr_set_router.h"
99 #include "hme_globals.h"
100 #include "hme_utils.h"
101 #include "hme_coarse.h"
102 #include "hme_fullpel.h"
103 #include "hme_subpel.h"
104 #include "hme_refine.h"
105 #include "hme_err_compute.h"
106 #include "hme_common_utils.h"
107 #include "hme_search_algo.h"
108 #include "ihevce_stasino_helpers.h"
109 #include "ihevce_common_utils.h"
110 
111 /*****************************************************************************/
112 /* Globals                                                                   */
113 /*****************************************************************************/
114 
115 /* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
116 UWORD8 gau1_raster_scan_to_ctb[4][4] = {
117     { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
118 };
119 
120 /*****************************************************************************/
121 /* Extern Fucntion declaration                                               */
122 /*****************************************************************************/
123 extern ctb_boundary_attrs_t *
124     get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
125 
126 typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
127     search_node_t *ps_search_node,
128     layer_ctxt_t *ps_curr_layer,
129     layer_ctxt_t *ps_coarse_layer,
130     S32 i4_pos_x,
131     S32 i4_pos_y,
132     S08 i1_ref_id,
133     S32 i4_result_id);
134 
135 typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
136     search_node_t *ps_search_node,
137     layer_ctxt_t *ps_curr_layer,
138     layer_ctxt_t *ps_coarse_layer,
139     S32 i4_pos_x,
140     S32 i4_pos_y,
141     S32 i4_num_act_ref_l0,
142     U08 u1_pred_dir,
143     U08 u1_default_ref_id,
144     S32 i4_result_id);
145 
146 /*****************************************************************************/
147 /* Function Definitions                                                      */
148 /*****************************************************************************/
149 
ihevce_no_wt_copy(coarse_me_ctxt_t * ps_ctxt,layer_ctxt_t * ps_curr_layer,pu_t * ps_pu,UWORD8 * pu1_temp_pred,WORD32 temp_stride,WORD32 blk_x,WORD32 blk_y)150 void ihevce_no_wt_copy(
151     coarse_me_ctxt_t *ps_ctxt,
152     layer_ctxt_t *ps_curr_layer,
153     pu_t *ps_pu,
154     UWORD8 *pu1_temp_pred,
155     WORD32 temp_stride,
156     WORD32 blk_x,
157     WORD32 blk_y)
158 {
159     UWORD8 *pu1_ref;
160     WORD32 ref_stride, ref_offset;
161     WORD32 row, col, i4_tmp;
162 
163     ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
164 
165     if(ps_pu->b2_pred_mode == PRED_L0)
166     {
167         WORD8 i1_ref_idx;
168 
169         i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
170         pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
171 
172         ref_stride = ps_curr_layer->i4_inp_stride;
173 
174         ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
175         ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
176 
177         pu1_ref += ref_offset;
178 
179         for(row = 0; row < temp_stride; row++)
180         {
181             for(col = 0; col < temp_stride; col++)
182             {
183                 i4_tmp = pu1_ref[col];
184                 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
185             }
186 
187             pu1_ref += ref_stride;
188             pu1_temp_pred += temp_stride;
189         }
190     }
191     else
192     {
193         WORD8 i1_ref_idx;
194 
195         i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
196         pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
197 
198         ref_stride = ps_curr_layer->i4_inp_stride;
199 
200         ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
201         ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
202 
203         pu1_ref += ref_offset;
204 
205         for(row = 0; row < temp_stride; row++)
206         {
207             for(col = 0; col < temp_stride; col++)
208             {
209                 i4_tmp = pu1_ref[col];
210                 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
211             }
212 
213             pu1_ref += ref_stride;
214             pu1_temp_pred += temp_stride;
215         }
216     }
217 }
218 
hme_add_clustered_mvs_as_merge_cands(cluster_data_t * ps_cluster_base,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,WORD32 i4_num_clusters,U08 u1_pred_dir)219 static WORD32 hme_add_clustered_mvs_as_merge_cands(
220     cluster_data_t *ps_cluster_base,
221     search_node_t *ps_merge_cand,
222     range_prms_t **pps_range_prms,
223     U08 *pu1_refid_to_pred_dir_list,
224     WORD32 i4_num_clusters,
225     U08 u1_pred_dir)
226 {
227     WORD32 i, j, k;
228     WORD32 i4_num_cands_added = 0;
229     WORD32 i4_num_mvs_in_cluster;
230 
231     for(i = 0; i < i4_num_clusters; i++)
232     {
233         cluster_data_t *ps_data = &ps_cluster_base[i];
234 
235         if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
236         {
237             i4_num_mvs_in_cluster = ps_data->num_mvs;
238 
239             for(j = 0; j < i4_num_mvs_in_cluster; j++)
240             {
241                 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
242                 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
243                 ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
244 
245                 CLIP_MV_WITHIN_RANGE(
246                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
247                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
248                     pps_range_prms[ps_data->ref_id],
249                     0,
250                     0,
251                     0);
252 
253                 for(k = 0; k < i4_num_cands_added; k++)
254                 {
255                     if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
256                        (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
257                        (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
258                     {
259                         break;
260                     }
261                 }
262 
263                 if(k == i4_num_cands_added)
264                 {
265                     i4_num_cands_added++;
266                 }
267             }
268         }
269     }
270 
271     return i4_num_cands_added;
272 }
273 
hme_add_me_best_as_merge_cands(search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_quality_preset,S32 i4_num_cands_added,U08 u1_pred_dir)274 static WORD32 hme_add_me_best_as_merge_cands(
275     search_results_t **pps_child_data_array,
276     inter_cu_results_t *ps_8x8cu_results,
277     search_node_t *ps_merge_cand,
278     range_prms_t **pps_range_prms,
279     U08 *pu1_refid_to_pred_dir_list,
280     S08 *pi1_past_list,
281     S08 *pi1_future_list,
282     BLK_SIZE_T e_blk_size,
283     ME_QUALITY_PRESETS_T e_quality_preset,
284     S32 i4_num_cands_added,
285     U08 u1_pred_dir)
286 {
287     WORD32 i, j, k;
288     WORD32 i4_max_cands_to_add;
289 
290     WORD32 i4_result_id = 0;
291 
292     ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
293     ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
294     ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
295     ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
296 
297     switch(e_quality_preset)
298     {
299     case ME_PRISTINE_QUALITY:
300     {
301         i4_max_cands_to_add = MAX_MERGE_CANDTS;
302 
303         break;
304     }
305     case ME_HIGH_QUALITY:
306     {
307         /* All 4 children are split and each grandchild contributes an MV */
308         /* and 2 best results per grandchild */
309         i4_max_cands_to_add = 4 * 4 * 2;
310 
311         break;
312     }
313     case ME_MEDIUM_SPEED:
314     {
315         i4_max_cands_to_add = 4 * 2 * 2;
316 
317         break;
318     }
319     case ME_HIGH_SPEED:
320     case ME_XTREME_SPEED:
321     case ME_XTREME_SPEED_25:
322     {
323         i4_max_cands_to_add = 4 * 2 * 1;
324 
325         break;
326     }
327     }
328 
329     while(i4_result_id < 4)
330     {
331         for(i = 0; i < 4; i++)
332         {
333             inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
334             inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
335 
336             if(!pps_child_data_array[i]->u1_split_flag)
337             {
338                 part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
339 
340                 if(ps_child_data->u1_num_best_results <= i4_result_id)
341                 {
342                     continue;
343                 }
344 
345                 if(ps_data->as_pu_results->pu.b1_intra_flag)
346                 {
347                     continue;
348                 }
349 
350                 for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
351                 {
352                     mv_t *ps_mv;
353 
354                     S08 i1_ref_idx;
355 
356                     pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
357 
358                     if(u1_pred_dir !=
359                        ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
360                     {
361                         continue;
362                     }
363 
364                     if(u1_pred_dir)
365                     {
366                         ps_mv = &ps_pu->mv.s_l1_mv;
367                         i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
368                     }
369                     else
370                     {
371                         ps_mv = &ps_pu->mv.s_l0_mv;
372                         i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
373                     }
374 
375                     if(-1 == i1_ref_idx)
376                     {
377                         continue;
378                     }
379 
380                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
381                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
382                     ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
383 
384                     CLIP_MV_WITHIN_RANGE(
385                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
386                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
387                         pps_range_prms[i1_ref_idx],
388                         0,
389                         0,
390                         0);
391 
392                     for(k = 0; k < i4_num_cands_added; k++)
393                     {
394                         if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
395                            (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
396                            (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
397                         {
398                             break;
399                         }
400                     }
401 
402                     if(k == i4_num_cands_added)
403                     {
404                         i4_num_cands_added++;
405 
406                         if(i4_max_cands_to_add <= i4_num_cands_added)
407                         {
408                             return i4_num_cands_added;
409                         }
410                     }
411                 }
412             }
413             else
414             {
415                 for(j = 0; j < 4; j++)
416                 {
417                     mv_t *ps_mv;
418 
419                     S08 i1_ref_idx;
420 
421                     part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
422                     pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
423 
424                     ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
425 
426                     if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
427                     {
428                         continue;
429                     }
430 
431                     if(ps_data->as_pu_results->pu.b1_intra_flag)
432                     {
433                         continue;
434                     }
435 
436                     if(u1_pred_dir !=
437                        ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
438                     {
439                         continue;
440                     }
441 
442                     if(u1_pred_dir)
443                     {
444                         ps_mv = &ps_pu->mv.s_l1_mv;
445                         i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
446                     }
447                     else
448                     {
449                         ps_mv = &ps_pu->mv.s_l0_mv;
450                         i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
451                     }
452 
453                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
454                     ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
455                     ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
456 
457                     CLIP_MV_WITHIN_RANGE(
458                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
459                         ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
460                         pps_range_prms[i1_ref_idx],
461                         0,
462                         0,
463                         0);
464 
465                     for(k = 0; k < i4_num_cands_added; k++)
466                     {
467                         if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
468                            (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
469                            (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
470                         {
471                             break;
472                         }
473                     }
474 
475                     if(k == i4_num_cands_added)
476                     {
477                         i4_num_cands_added++;
478 
479                         if(i4_max_cands_to_add <= i4_num_cands_added)
480                         {
481                             return i4_num_cands_added;
482                         }
483                     }
484                 }
485             }
486         }
487 
488         i4_result_id++;
489     }
490 
491     return i4_num_cands_added;
492 }
493 
hme_add_cands_for_merge_eval(ctb_cluster_info_t * ps_cluster_info,search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,range_prms_t ** pps_range_prms,search_node_t * ps_merge_cand,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,ME_QUALITY_PRESETS_T e_quality_preset,BLK_SIZE_T e_blk_size,U08 u1_pred_dir,U08 u1_blk_id)494 WORD32 hme_add_cands_for_merge_eval(
495     ctb_cluster_info_t *ps_cluster_info,
496     search_results_t **pps_child_data_array,
497     inter_cu_results_t *ps_8x8cu_results,
498     range_prms_t **pps_range_prms,
499     search_node_t *ps_merge_cand,
500     U08 *pu1_refid_to_pred_dir_list,
501     S08 *pi1_past_list,
502     S08 *pi1_future_list,
503     ME_QUALITY_PRESETS_T e_quality_preset,
504     BLK_SIZE_T e_blk_size,
505     U08 u1_pred_dir,
506     U08 u1_blk_id)
507 {
508     WORD32 i4_num_cands_added = 0;
509 
510     if(ME_PRISTINE_QUALITY == e_quality_preset)
511     {
512         cluster_data_t *ps_cluster_primo;
513 
514         WORD32 i4_num_clusters;
515 
516         if(BLK_32x32 == e_blk_size)
517         {
518             ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
519             i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
520         }
521         else
522         {
523             ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
524             i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
525         }
526 
527         i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
528             ps_cluster_primo,
529             ps_merge_cand,
530             pps_range_prms,
531             pu1_refid_to_pred_dir_list,
532             i4_num_clusters,
533             u1_pred_dir);
534     }
535 
536     i4_num_cands_added = hme_add_me_best_as_merge_cands(
537         pps_child_data_array,
538         ps_8x8cu_results,
539         ps_merge_cand,
540         pps_range_prms,
541         pu1_refid_to_pred_dir_list,
542         pi1_past_list,
543         pi1_future_list,
544         e_blk_size,
545         e_quality_preset,
546         i4_num_cands_added,
547         u1_pred_dir);
548 
549     return i4_num_cands_added;
550 }
551 
552 /**
553 ********************************************************************************
554 *  @fn   void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
555 *                                           S08 i1_ref_idx,
556 *                                           S32 i4_best_part_type,
557 *                                           S32 i4_is_vert)
558 *
559 *  @brief  Given a target partition orientation in the merged CU, and the
560 *          partition type of most likely partition this fxn picks up
561 *          candidates from the 4 constituent CUs and does refinement search
562 *          to identify best results for the merge CU across active partitions
563 *
564 *  @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
565 *                  these params, the search result structure is also derived and
566 *                 updated during the search
567 *
568 *  @param[in] i1_ref_idx : ID of the buffer within the search results to update.
569 *               Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
570 *
571 *  @param[in] i4_best_part_type : partition type of potential partition in the
572 *              merged CU, -1 if the merge process has not yet been able to
573 *              determine this.
574 *
575 *  @param[in] i4_is_vert : Whether target partition of merged CU is vertical
576 *             orientation or horizontal orientation.
577 *
578 *  @return Number of merge candidates
579 ********************************************************************************
580 */
hme_pick_eval_merge_candts(hme_merge_prms_t * ps_merge_prms,hme_subpel_prms_t * ps_subpel_prms,S32 i4_search_idx,S32 i4_best_part_type,S32 i4_is_vert,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 i4_frm_qstep,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)581 WORD32 hme_pick_eval_merge_candts(
582     hme_merge_prms_t *ps_merge_prms,
583     hme_subpel_prms_t *ps_subpel_prms,
584     S32 i4_search_idx,
585     S32 i4_best_part_type,
586     S32 i4_is_vert,
587     wgt_pred_ctxt_t *ps_wt_inp_prms,
588     S32 i4_frm_qstep,
589     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
590     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
591 {
592     S32 x_off, y_off;
593     search_node_t *ps_search_node;
594     S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
595     S32 i4_num_valid_parts;
596     pred_ctxt_t *ps_pred_ctxt;
597 
598     search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
599     S32 num_unique_nodes_cu_merge = 0;
600 
601     search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
602     CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
603     S32 i4_part_mask = ps_search_results->i4_part_mask;
604 
605     search_results_t *aps_child_results[4];
606     layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
607 
608     S32 i4_ref_stride, i, j;
609     result_upd_prms_t s_result_prms;
610 
611     BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
612     S32 i4_offset;
613 
614     /*************************************************************************/
615     /* Function pointer for SAD/SATD, array and prms structure to pass to    */
616     /* This function                                                         */
617     /*************************************************************************/
618     PF_SAD_FXN_T pf_err_compute;
619     S32 ai4_sad_grid[9][17];
620     err_prms_t s_err_prms;
621 
622     /*************************************************************************/
623     /* Allowed MV RANGE                                                      */
624     /*************************************************************************/
625     range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
626     PF_INTERP_FXN_T pf_qpel_interp;
627     PF_MV_COST_FXN pf_mv_cost_compute;
628     WORD32 pred_lx;
629     U08 *apu1_hpel_ref[4];
630 
631     interp_prms_t s_interp_prms;
632     S32 i4_interp_buf_id;
633 
634     S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
635     S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
636 
637     /* Sanity checks */
638     ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
639 
640     s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
641 
642     /* Initialize all the ptrs to child CUs for merge decision */
643     aps_child_results[0] = ps_merge_prms->ps_results_tl;
644     aps_child_results[1] = ps_merge_prms->ps_results_tr;
645     aps_child_results[2] = ps_merge_prms->ps_results_bl;
646     aps_child_results[3] = ps_merge_prms->ps_results_br;
647 
648     num_unique_nodes_cu_merge = 0;
649 
650     pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
651 
652     if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
653     {
654         num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
655             ps_merge_prms->ps_cluster_info,
656             aps_child_results,
657             ps_merge_prms->ps_8x8_cu_results,
658             pps_range_prms,
659             as_merge_unique_node,
660             ps_search_results->pu1_is_past,
661             ps_merge_prms->pi1_past_list,
662             ps_merge_prms->pi1_future_list,
663             ps_merge_prms->e_quality_preset,
664             e_blk_size,
665             i4_search_idx,
666             (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
667                 (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
668     }
669     else
670     {
671         /*************************************************************************/
672         /* Populate the list of unique search nodes in the child CUs for merge   */
673         /* evaluation                                                            */
674         /*************************************************************************/
675         for(i = 0; i < 4; i++)
676         {
677             search_node_t s_search_node;
678 
679             PART_TYPE_T e_part_type;
680             PART_ID_T e_part_id;
681 
682             WORD32 part_num;
683 
684             search_results_t *ps_child = aps_child_results[i];
685 
686             if(ps_child->ps_cu_results->u1_num_best_results)
687             {
688                 if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
689                      (1 == ps_child->ps_cu_results->u1_num_best_results)))
690                 {
691                     e_part_type =
692                         (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
693 
694                     ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
695 
696                     /* Insert mvs of NxN partitions. */
697                     for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
698                         part_num++)
699                     {
700                         e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
701 
702                         if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
703                         {
704                             s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
705                             if(s_search_node.s_mv.i2_mvx != INTRA_MV)
706                             {
707                                 CLIP_MV_WITHIN_RANGE(
708                                     s_search_node.s_mv.i2_mvx,
709                                     s_search_node.s_mv.i2_mvy,
710                                     pps_range_prms[s_search_node.i1_ref_idx],
711                                     0,
712                                     0,
713                                     0);
714 
715                                 INSERT_NEW_NODE_NOMAP(
716                                     as_merge_unique_node,
717                                     num_unique_nodes_cu_merge,
718                                     s_search_node,
719                                     1);
720                             }
721                         }
722                     }
723                 }
724             }
725             else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
726                            .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
727                       (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
728                                 .ps_cu_results->u1_num_best_results)))
729             {
730                 search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
731 
732                 for(j = 0; j < 4; j++)
733                 {
734                     e_part_type = (PART_TYPE_T)ps_results_root[j]
735                                       .ps_cu_results->ps_best_results[0]
736                                       .u1_part_type;
737 
738                     ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
739 
740                     /* Insert mvs of NxN partitions. */
741                     for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
742                         part_num++)
743                     {
744                         e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
745 
746                         if((ps_results_root[j]
747                                 .aps_part_results[i4_search_idx][e_part_id]
748                                 ->i1_ref_idx != -1) &&
749                            (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
750                                  .b1_intra_flag))
751                         {
752                             s_search_node =
753                                 *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
754                             if(s_search_node.s_mv.i2_mvx != INTRA_MV)
755                             {
756                                 CLIP_MV_WITHIN_RANGE(
757                                     s_search_node.s_mv.i2_mvx,
758                                     s_search_node.s_mv.i2_mvy,
759                                     pps_range_prms[s_search_node.i1_ref_idx],
760                                     0,
761                                     0,
762                                     0);
763 
764                                 INSERT_NEW_NODE_NOMAP(
765                                     as_merge_unique_node,
766                                     num_unique_nodes_cu_merge,
767                                     s_search_node,
768                                     1);
769                             }
770                         }
771                     }
772                 }
773             }
774         }
775     }
776 
777     if(0 == num_unique_nodes_cu_merge)
778     {
779         return 0;
780     }
781 
782     /*************************************************************************/
783     /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
784     /* fixed through this subpel refinement for this partition.              */
785     /* Note, we do not enable grid sads since one pt is evaluated per node   */
786     /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled.   */
787     /*************************************************************************/
788     i4_part_mask = ps_search_results->i4_part_mask;
789 
790     /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
791     if(ps_subpel_prms->i4_use_satd)
792     {
793         if(BLK_32x32 == e_blk_size)
794         {
795             pf_err_compute = hme_evalsatd_pt_pu_32x32;
796         }
797         else
798         {
799             pf_err_compute = hme_evalsatd_pt_pu_64x64;
800         }
801     }
802     else
803     {
804         pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
805     }
806 
807     i4_ref_stride = ps_curr_layer->i4_rec_stride;
808 
809     x_off = ps_merge_prms->ps_results_tl->u1_x_off;
810     y_off = ps_merge_prms->ps_results_tl->u1_y_off;
811     i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
812 
813     /*************************************************************************/
814     /* This array stores the ids of the partitions whose                     */
815     /* SADs are updated. Since the partitions whose SADs are updated may not */
816     /* be in contiguous order, we supply another level of indirection.       */
817     /*************************************************************************/
818     i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
819 
820     /* Initialize result params used for partition update */
821     s_result_prms.pf_mv_cost_compute = NULL;
822     s_result_prms.ps_search_results = ps_search_results;
823     s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
824     s_result_prms.i1_ref_idx = i4_search_idx;
825     s_result_prms.i4_part_mask = i4_part_mask;
826     s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
827     s_result_prms.i4_grid_mask = 1;
828 
829     /* One time Initialization of error params used for SAD/SATD compute */
830     s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
831     s_err_prms.i4_ref_stride = i4_ref_stride;
832     s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
833     s_err_prms.i4_grid_mask = 1;
834     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
835     s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
836     s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
837     s_err_prms.i4_step = 1;
838 
839     /*************************************************************************/
840     /* One time preparation of non changing interpolation params.            */
841     /*************************************************************************/
842     s_interp_prms.i4_ref_stride = i4_ref_stride;
843     s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
844     s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
845     s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
846     s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
847     i4_interp_buf_id = 0;
848 
849     pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
850 
851     /***************************************************************************/
852     /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
853     /* results                                                                 */
854     /***************************************************************************/
855     for(i = 0; i < num_unique_nodes_cu_merge; i++)
856     {
857         WORD8 i1_ref_idx;
858         ps_search_node = &as_merge_unique_node[i];
859 
860         /*********************************************************************/
861         /* Compute the base pointer for input, interpolated buffers          */
862         /* The base pointers point as follows:                               */
863         /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
864         /* To these, we need to add the offset of the current node           */
865         /*********************************************************************/
866         i1_ref_idx = ps_search_node->i1_ref_idx;
867         apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
868         apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
869         apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
870         apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
871 
872         s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
873 
874         pf_qpel_interp(
875             &s_interp_prms,
876             ps_search_node->s_mv.i2_mvx,
877             ps_search_node->s_mv.i2_mvy,
878             i4_interp_buf_id);
879 
880         pred_lx = i4_search_idx;
881         ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
882 
883         s_result_prms.u1_pred_lx = pred_lx;
884         s_result_prms.ps_search_node_base = ps_search_node;
885         s_err_prms.pu1_inp =
886             ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
887         s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
888         s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
889 
890         /* Carry out the SAD/SATD. This call also does the TU RECURSION.
891         Here the tu recursion logic is restricted with the size of the PU*/
892         pf_err_compute(&s_err_prms);
893 
894         if(ps_subpel_prms->u1_is_cu_noisy &&
895            ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
896         {
897             ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
898                 s_err_prms.pu1_ref,
899                 s_err_prms.i4_ref_stride,
900                 ai4_valid_part_ids,
901                 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
902                 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
903                 s_err_prms.pi4_sad_grid,
904                 ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
905                 ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
906                 ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
907                 i4_num_valid_parts,
908                 ps_wt_inp_prms->wpred_log_wdc,
909                 (BLK_32x32 == e_blk_size) ? 32 : 64);
910         }
911 
912         /* Update the mv's */
913         s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
914         s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
915 
916         /* Update best results */
917         hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
918     }
919 
920     /************************************************************************/
921     /* Update mv cost and total cost for each valid partition in the CU     */
922     /************************************************************************/
923     for(i = 0; i < TOT_NUM_PARTS; i++)
924     {
925         if(i4_part_mask & (1 << i))
926         {
927             WORD32 j;
928             WORD32 i4_mv_cost;
929 
930             ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
931 
932             for(j = 0;
933                 j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
934                 j++)
935             {
936                 if(ps_search_node->i1_ref_idx != -1)
937                 {
938                     pred_lx = i4_search_idx;
939                     ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
940 
941                     /* Prediction context should now deal with qpel units */
942                     HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
943 
944                     ps_search_node->u1_subpel_done = 1;
945                     ps_search_node->u1_is_avail = 1;
946 
947                     i4_mv_cost =
948                         pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
949 
950                     ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
951                     ps_search_node->i4_mv_cost = i4_mv_cost;
952 
953                     ps_search_node++;
954                 }
955             }
956         }
957     }
958 
959     return num_unique_nodes_cu_merge;
960 }
961 
962 #define CU_MERGE_MAX_INTRA_PARTS 4
963 
964 /**
965 ********************************************************************************
966 *  @fn     hme_try_merge_high_speed
967 *
968 *  @brief  Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
969 entity or with partititons for high speed preset
970 *
971 *  @param[in,out]  hme_merge_prms_t: Params for CU merge
972 *
973 *  @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
974 ********************************************************************************
975 */
hme_try_merge_high_speed(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,hme_subpel_prms_t * ps_subpel_prms,hme_merge_prms_t * ps_merge_prms,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result)976 CU_MERGE_RESULT_T hme_try_merge_high_speed(
977     me_ctxt_t *ps_thrd_ctxt,
978     me_frm_ctxt_t *ps_ctxt,
979     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
980     hme_subpel_prms_t *ps_subpel_prms,
981     hme_merge_prms_t *ps_merge_prms,
982     inter_pu_results_t *ps_pu_results,
983     pu_result_t *ps_pu_result)
984 {
985     search_results_t *ps_results_tl, *ps_results_tr;
986     search_results_t *ps_results_bl, *ps_results_br;
987 
988     S32 i;
989     S32 i4_search_idx;
990     S32 i4_cost_parent;
991     S32 intra_cu_size;
992     ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
993 
994     search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
995     wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
996 
997     S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
998     S32 is_vert = 0, i4_best_part_type = -1;
999     S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
1000     S32 i4_cost_children = 0;
1001     S32 i4_frm_qstep = ps_ctxt->frm_qstep;
1002     S32 i4_num_merge_cands_evaluated = 0;
1003     U08 u1_x_off = ps_results_merge->u1_x_off;
1004     U08 u1_y_off = ps_results_merge->u1_y_off;
1005     S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
1006 
1007     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
1008         ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
1009     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
1010         ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
1011     ps_results_tl = ps_merge_prms->ps_results_tl;
1012     ps_results_tr = ps_merge_prms->ps_results_tr;
1013     ps_results_bl = ps_merge_prms->ps_results_bl;
1014     ps_results_br = ps_merge_prms->ps_results_br;
1015 
1016     if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
1017     {
1018         i4_part_mask &= ~ENABLE_AMP;
1019     }
1020 
1021     if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
1022     {
1023         i4_part_mask &= ~ENABLE_AMP;
1024 
1025         i4_part_mask &= ~ENABLE_SMP;
1026     }
1027 
1028     ps_merge_prms->i4_num_pred_dir_actual = 0;
1029 
1030     /*************************************************************************/
1031     /* The logic for High speed CU merge goes as follows:                    */
1032     /*                                                                       */
1033     /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
1034     /*    exceed 7                                                           */
1035     /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
1036     /*    are identical                                                      */
1037     /* 3. Find the all unique mvs of best partitions of children CUs and     */
1038     /*    evaluate partial SATDs (all 17 partitions) for each unique mv. If  */
1039     /*    best parent cost is lower than sum of the best children costs      */
1040     /*    return CU_MERGE after seeding the best results else return CU_SPLIT*/
1041     /*                                                                       */
1042     /*************************************************************************/
1043 
1044     /* Count the number of best partitions in child CUs, early exit if > 7 */
1045     if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1046        (CU_32x32 == ps_results_merge->e_cu_size))
1047     {
1048         S32 num_parts_in_32x32 = 0;
1049         WORD32 i4_part_type;
1050 
1051         if(ps_results_tl->u1_split_flag)
1052         {
1053             num_parts_in_32x32 += 4;
1054 
1055 #define COST_INTERCHANGE 0
1056             i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
1057                                ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
1058                                ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
1059                                ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
1060         }
1061         else
1062         {
1063             i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
1064             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1065             i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1066         }
1067 
1068         if(ps_results_tr->u1_split_flag)
1069         {
1070             num_parts_in_32x32 += 4;
1071 
1072             i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
1073                                 ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
1074                                 ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
1075                                 ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
1076         }
1077         else
1078         {
1079             i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
1080             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1081             i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
1082         }
1083 
1084         if(ps_results_bl->u1_split_flag)
1085         {
1086             num_parts_in_32x32 += 4;
1087 
1088             i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
1089                                 ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
1090                                 ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
1091                                 ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
1092         }
1093         else
1094         {
1095             i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
1096             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1097             i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1098         }
1099 
1100         if(ps_results_br->u1_split_flag)
1101         {
1102             num_parts_in_32x32 += 4;
1103 
1104             i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
1105                                 ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
1106                                 ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
1107                                 ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
1108         }
1109         else
1110         {
1111             i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
1112             num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1113             i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
1114         }
1115 
1116         if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
1117         {
1118             return CU_SPLIT;
1119         }
1120 
1121         if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
1122            (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
1123         {
1124             return CU_SPLIT;
1125         }
1126     }
1127 
1128     /* Accumulate intra percentage before merge for early CU_SPLIT decision     */
1129     /* Note : Each intra part represent a NxN unit of the children CUs          */
1130     /* This is essentially 1/16th of the CUsize under consideration for merge   */
1131     if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
1132     {
1133         if(CU_64x64 == ps_results_merge->e_cu_size)
1134         {
1135             i4_intra_parts =
1136                 (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
1137                     ? 16
1138                     : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
1139         }
1140         else
1141         {
1142             switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
1143             {
1144             case 0:
1145             {
1146                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
1147                                        ->u1_inter_eval_enable)
1148                                      ? 16
1149                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1150                                             ->ps_child_node_tl->u1_intra_eval_enable);
1151 
1152                 break;
1153             }
1154             case 1:
1155             {
1156                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
1157                                        ->u1_inter_eval_enable)
1158                                      ? 16
1159                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1160                                             ->ps_child_node_tr->u1_intra_eval_enable);
1161 
1162                 break;
1163             }
1164             case 2:
1165             {
1166                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
1167                                        ->u1_inter_eval_enable)
1168                                      ? 16
1169                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1170                                             ->ps_child_node_bl->u1_intra_eval_enable);
1171 
1172                 break;
1173             }
1174             case 3:
1175             {
1176                 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
1177                                        ->u1_inter_eval_enable)
1178                                      ? 16
1179                                      : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1180                                             ->ps_child_node_br->u1_intra_eval_enable);
1181 
1182                 break;
1183             }
1184             }
1185         }
1186     }
1187     else
1188     {
1189         for(i = 0; i < 4; i++)
1190         {
1191             search_results_t *ps_results =
1192                 (i == 0) ? ps_results_tl
1193                          : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
1194 
1195             part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
1196 
1197             if(ps_results->u1_split_flag)
1198             {
1199                 U08 u1_x_off = ps_results->u1_x_off;
1200                 U08 u1_y_off = ps_results->u1_y_off;
1201                 U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
1202                                       2;
1203 
1204                 /* Special case to handle 8x8 CUs when 16x16 is split */
1205                 ASSERT(ps_results->e_cu_size == CU_16x16);
1206 
1207                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
1208 
1209                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1210                     i4_intra_parts += 1;
1211 
1212                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
1213 
1214                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1215                     i4_intra_parts += 1;
1216 
1217                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
1218 
1219                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1220                     i4_intra_parts += 1;
1221 
1222                 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
1223 
1224                 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1225                     i4_intra_parts += 1;
1226             }
1227             else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
1228             {
1229                 i4_intra_parts += 4;
1230             }
1231         }
1232     }
1233 
1234     /* Determine the max intra CU size indicated by IPE */
1235     intra_cu_size = CU_64x64;
1236     if(ps_cur_ipe_ctb->u1_split_flag)
1237     {
1238         intra_cu_size = CU_32x32;
1239         if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
1240         {
1241             intra_cu_size = CU_16x16;
1242         }
1243     }
1244 
1245     if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
1246         (intra_cu_size < ps_results_merge->e_cu_size) &&
1247         (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
1248        (i4_intra_parts == 16))
1249     {
1250         S32 i4_merge_outcome;
1251 
1252         i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
1253                                ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
1254                                   ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
1255                                : (!ps_cur_ipe_ctb->u1_split_flag);
1256 
1257         i4_merge_outcome = i4_merge_outcome ||
1258                            (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
1259 
1260         i4_merge_outcome = i4_merge_outcome &&
1261                            !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
1262 
1263         if(i4_merge_outcome)
1264         {
1265             inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1266             part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
1267             pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
1268 
1269             ps_cu_results->u1_num_best_results = 1;
1270             ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
1271             ps_cu_results->u1_x_off = u1_x_off;
1272             ps_cu_results->u1_y_off = u1_y_off;
1273 
1274             ps_best_result->u1_part_type = PRT_2Nx2N;
1275             ps_best_result->ai4_tu_split_flag[0] = 0;
1276             ps_best_result->ai4_tu_split_flag[1] = 0;
1277             ps_best_result->ai4_tu_split_flag[2] = 0;
1278             ps_best_result->ai4_tu_split_flag[3] = 0;
1279             ps_best_result->i4_tot_cost =
1280                 (CU_64x64 == ps_results_merge->e_cu_size)
1281                     ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
1282                     : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
1283 
1284             ps_pu->b1_intra_flag = 1;
1285             ps_pu->b4_pos_x = u1_x_off >> 2;
1286             ps_pu->b4_pos_y = u1_y_off >> 2;
1287             ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
1288             ps_pu->b4_ht = ps_pu->b4_wd;
1289             ps_pu->mv.i1_l0_ref_idx = -1;
1290             ps_pu->mv.i1_l1_ref_idx = -1;
1291             ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
1292             ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
1293             ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
1294             ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
1295 
1296             return CU_MERGED;
1297         }
1298         else
1299         {
1300             return CU_SPLIT;
1301         }
1302     }
1303 
1304     if(i4_intra_parts)
1305     {
1306         i4_part_mask = ENABLE_2Nx2N;
1307     }
1308 
1309     ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
1310 
1311     hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
1312 
1313     ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
1314     ps_merge_prms->i4_num_pred_dir_actual = 0;
1315 
1316     if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
1317     {
1318         S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
1319         S32 i4_num_valid_parts;
1320         S32 i4_sigma_array_offset;
1321 
1322         i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
1323 
1324         /*********************************************************************************************************************************************/
1325         /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values  */
1326         /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
1327         /* increment as there will be 256 4x4 blocks in a CTB                                                                                        */
1328         /*********************************************************************************************************************************************/
1329         i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
1330                                 (ps_merge_prms->ps_results_merge->u1_y_off * 4);
1331 
1332         for(i = 0; i < i4_num_valid_parts; i++)
1333         {
1334             S32 i4_part_id = ai4_valid_part_ids[i];
1335 
1336             hme_compute_final_sigma_of_pu_from_base_blocks(
1337                 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
1338                 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
1339                 au8_final_src_sigmaX,
1340                 au8_final_src_sigmaXSquared,
1341                 (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
1342                 4,
1343                 i4_part_id,
1344                 16);
1345         }
1346 
1347         ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
1348         ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
1349     }
1350 
1351     /*************************************************************************/
1352     /* Loop through all ref idx and pick the merge candts and refine based   */
1353     /* on the active partitions. At this stage num ref will be 1 or 2        */
1354     /*************************************************************************/
1355     for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
1356     {
1357         S32 i4_cands;
1358         U08 u1_pred_dir = 0;
1359 
1360         if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
1361         {
1362             u1_pred_dir = i4_search_idx;
1363         }
1364         else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
1365         {
1366             u1_pred_dir = 1;
1367         }
1368         else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
1369         {
1370             u1_pred_dir = 0;
1371         }
1372         else
1373         {
1374             ASSERT(0);
1375         }
1376 
1377         /* call the function to pick and evaluate the merge candts, given */
1378         /* a ref id and a part mask.                                      */
1379         i4_cands = hme_pick_eval_merge_candts(
1380             ps_merge_prms,
1381             ps_subpel_prms,
1382             u1_pred_dir,
1383             i4_best_part_type,
1384             is_vert,
1385             ps_wt_inp_prms,
1386             i4_frm_qstep,
1387             ps_cmn_utils_optimised_function_list,
1388             ps_me_optimised_function_list);
1389 
1390         if(i4_cands)
1391         {
1392             ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
1393                 u1_pred_dir;
1394             ps_merge_prms->i4_num_pred_dir_actual++;
1395         }
1396 
1397         i4_num_merge_cands_evaluated += i4_cands;
1398     }
1399 
1400     /* Call the decide_part_types function here */
1401     /* Populate the new PU struct with the results post subpel refinement*/
1402     if(i4_num_merge_cands_evaluated)
1403     {
1404         inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1405 
1406         hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
1407 
1408         ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
1409         ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
1410 
1411         hme_populate_pus(
1412             ps_thrd_ctxt,
1413             ps_ctxt,
1414             ps_subpel_prms,
1415             ps_results_merge,
1416             ps_cu_results,
1417             ps_pu_results,
1418             ps_pu_result,
1419             ps_merge_prms->ps_inter_ctb_prms,
1420             &ps_ctxt->s_wt_pred,
1421             ps_merge_prms->ps_layer_ctxt,
1422             ps_merge_prms->au1_pred_dir_searched,
1423             ps_merge_prms->i4_num_pred_dir_actual);
1424 
1425         ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
1426 
1427         hme_decide_part_types(
1428             ps_cu_results,
1429             ps_pu_results,
1430             ps_merge_prms->ps_inter_ctb_prms,
1431             ps_ctxt,
1432             ps_cmn_utils_optimised_function_list,
1433             ps_me_optimised_function_list
1434 
1435         );
1436 
1437         /*****************************************************************/
1438         /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL.                    */
1439         /*****************************************************************/
1440 #if DISABLE_INTRA_IN_BPICS
1441         if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
1442                  (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
1443 #endif
1444         {
1445             if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
1446             {
1447                 hme_insert_intra_nodes_post_bipred(
1448                     ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
1449             }
1450         }
1451     }
1452     else
1453     {
1454         return CU_SPLIT;
1455     }
1456 
1457     /* We check the best result of ref idx 0 and compare for parent vs child */
1458     if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1459        (CU_32x32 == ps_results_merge->e_cu_size))
1460     {
1461         i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
1462         /*********************************************************************/
1463         /* Add the cost of signaling the CU tree bits.                       */
1464         /* Assuming parent is not split, then we signal 1 bit for this parent*/
1465         /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
1466         /* So, 4*lambda is extra for children cost. :Lokesh                  */
1467         /*********************************************************************/
1468         {
1469             pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
1470 
1471             i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
1472         }
1473 
1474         if(i4_cost_parent < i4_cost_children)
1475         {
1476             return CU_MERGED;
1477         }
1478 
1479         return CU_SPLIT;
1480     }
1481     else
1482     {
1483         return CU_MERGED;
1484     }
1485 }
1486 
1487 #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift)                              \
1488     {                                                                                              \
1489         (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift);                               \
1490         (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift);                               \
1491         *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx;                                             \
1492     }
1493 
1494 /**
1495 ********************************************************************************
1496 *  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1497 *                               layer_mv_t *ps_layer_mv,
1498 *                               S32 i4_search_blk_x,
1499 *                               S32 i4_search_blk_y,
1500 *                               mvbank_update_prms_t *ps_prms)
1501 *
1502 *  @brief  Updates the mv bank in case there is no further encodign to be done
1503 *
1504 *  @param[in]  ps_search_results: contains results for the block just searched
1505 *
1506 *  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
1507 *
1508 *  @param[in] i4_search_blk_x  : col num of blk being searched
1509 *
1510 *  @param[in] i4_search_blk_y : row num of blk being searched
1511 *
1512 *  @param[in] ps_prms : contains certain parameters which govern how updatedone
1513 *
1514 *  @return None
1515 ********************************************************************************
1516 */
1517 
hme_update_mv_bank_noencode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1518 void hme_update_mv_bank_noencode(
1519     search_results_t *ps_search_results,
1520     layer_mv_t *ps_layer_mv,
1521     S32 i4_search_blk_x,
1522     S32 i4_search_blk_y,
1523     mvbank_update_prms_t *ps_prms)
1524 {
1525     hme_mv_t *ps_mv;
1526     hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1527     S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1528     S32 i4_blk_x, i4_blk_y, i4_offset;
1529     S32 i4_j, i4_ref_id;
1530     search_node_t *ps_search_node;
1531     search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
1532     search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
1533     search_node_t *ps_search_node_4x4_4;
1534 
1535     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1536     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1537     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1538 
1539     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1540 
1541     /* Identify the correct offset in the mvbank and the reference id buf */
1542     ps_mv = ps_layer_mv->ps_mv + i4_offset;
1543     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1544 
1545     /*************************************************************************/
1546     /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
1547     /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
1548     /* do a straightforward single update of results. This will have a 1-1   */
1549     /* correspondence.                                                       */
1550     /*************************************************************************/
1551     if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1552     {
1553         for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1554         {
1555             ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1556             for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1557             {
1558                 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
1559                 ps_mv++;
1560                 pi1_ref_idx++;
1561                 ps_search_node++;
1562             }
1563         }
1564         return;
1565     }
1566 
1567     /*************************************************************************/
1568     /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1569     /* case, we need to have NxN partitions enabled in search.               */
1570     /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1571     /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1572     /*************************************************************************/
1573     ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1574     ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1575     ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1576 
1577     /*************************************************************************/
1578     /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1579     /* hence the below check.                                                */
1580     /*************************************************************************/
1581     ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
1582 
1583     ps_mv1 = ps_mv;
1584     ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1585     ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1586     ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1587     pi1_ref_idx1 = pi1_ref_idx;
1588     pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1589     pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1590     pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1591 
1592     for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
1593     {
1594         ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1595 
1596         ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
1597 
1598         ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
1599 
1600         ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
1601 
1602         ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
1603 
1604         COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1605         ps_mv1++;
1606         pi1_ref_idx1++;
1607         ps_search_node_4x4_1++;
1608         COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1609         ps_mv2++;
1610         pi1_ref_idx2++;
1611         ps_search_node_4x4_2++;
1612         COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1613         ps_mv3++;
1614         pi1_ref_idx3++;
1615         ps_search_node_4x4_3++;
1616         COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1617         ps_mv4++;
1618         pi1_ref_idx4++;
1619         ps_search_node_4x4_4++;
1620 
1621         if(ps_layer_mv->i4_num_mvs_per_ref > 1)
1622         {
1623             COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
1624             ps_mv1++;
1625             pi1_ref_idx1++;
1626             COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
1627             ps_mv2++;
1628             pi1_ref_idx2++;
1629             COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
1630             ps_mv3++;
1631             pi1_ref_idx3++;
1632             COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
1633             ps_mv4++;
1634             pi1_ref_idx4++;
1635         }
1636 
1637         for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1638         {
1639             COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1640             ps_mv1++;
1641             pi1_ref_idx1++;
1642             ps_search_node_4x4_1++;
1643             COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1644             ps_mv2++;
1645             pi1_ref_idx2++;
1646             ps_search_node_4x4_2++;
1647             COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1648             ps_mv3++;
1649             pi1_ref_idx3++;
1650             ps_search_node_4x4_3++;
1651             COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1652             ps_mv4++;
1653             pi1_ref_idx4++;
1654             ps_search_node_4x4_4++;
1655         }
1656     }
1657 }
1658 
hme_update_mv_bank_encode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms,U08 * pu1_pred_dir_searched,S32 i4_num_act_ref_l0)1659 void hme_update_mv_bank_encode(
1660     search_results_t *ps_search_results,
1661     layer_mv_t *ps_layer_mv,
1662     S32 i4_search_blk_x,
1663     S32 i4_search_blk_y,
1664     mvbank_update_prms_t *ps_prms,
1665     U08 *pu1_pred_dir_searched,
1666     S32 i4_num_act_ref_l0)
1667 {
1668     hme_mv_t *ps_mv;
1669     hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1670     S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1671     S32 i4_blk_x, i4_blk_y, i4_offset;
1672     S32 j, i, num_parts;
1673     search_node_t *ps_search_node_tl, *ps_search_node_tr;
1674     search_node_t *ps_search_node_bl, *ps_search_node_br;
1675     search_node_t s_zero_mv;
1676     WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
1677 
1678     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1679     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1680     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1681 
1682     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1683 
1684     /* Identify the correct offset in the mvbank and the reference id buf */
1685     ps_mv = ps_layer_mv->ps_mv + i4_offset;
1686     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1687 
1688     ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
1689     ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
1690 
1691     /*************************************************************************/
1692     /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1693     /* hence the below check.                                                */
1694     /*************************************************************************/
1695     ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
1696 
1697     ps_mv1 = ps_mv;
1698     ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1699     ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1700     ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1701     pi1_ref_idx1 = pi1_ref_idx;
1702     pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1703     pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1704     pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1705 
1706     /* Initialize zero mv: default mv used for intra mvs */
1707     s_zero_mv.s_mv.i2_mvx = 0;
1708     s_zero_mv.s_mv.i2_mvy = 0;
1709     s_zero_mv.i1_ref_idx = 0;
1710 
1711     if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
1712        (ps_search_results->i4_part_mask & ENABLE_NxN))
1713     {
1714         i4_part_type = PRT_NxN;
1715     }
1716 
1717     for(i = 0; i < ps_prms->i4_num_ref; i++)
1718     {
1719         for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
1720         {
1721             WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
1722 
1723             num_parts = gau1_num_parts_in_part_type[i4_part_type];
1724 
1725             ps_search_node_tl =
1726                 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
1727 
1728             if(num_parts == 1)
1729             {
1730                 ps_search_node_tr = ps_search_node_tl;
1731                 ps_search_node_bl = ps_search_node_tl;
1732                 ps_search_node_br = ps_search_node_tl;
1733             }
1734             else if(num_parts == 2)
1735             {
1736                 /* For vertically oriented partitions, tl, bl pt to same result */
1737                 /* For horizontally oriented partition, tl, tr pt to same result */
1738                 /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1739                 /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1740                 /* and right 2 8x8 have 12x16R partition */
1741                 if(gau1_is_vert_part[i4_part_type])
1742                 {
1743                     ps_search_node_tr =
1744                         ps_search_results
1745                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1746                     ps_search_node_bl = ps_search_node_tl;
1747                 }
1748                 else
1749                 {
1750                     ps_search_node_tr = ps_search_node_tl;
1751                     ps_search_node_bl =
1752                         ps_search_results
1753                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1754                 }
1755                 ps_search_node_br =
1756                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1757             }
1758             else
1759             {
1760                 /* 4 unique results */
1761                 ps_search_node_tr =
1762                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1763                 ps_search_node_bl =
1764                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
1765                 ps_search_node_br =
1766                     ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
1767             }
1768 
1769             if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1770                 ps_search_node_tl++;
1771             if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1772                 ps_search_node_tr++;
1773             if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1774                 ps_search_node_bl++;
1775             if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1776                 ps_search_node_br++;
1777 
1778             COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1779             ps_mv1++;
1780             pi1_ref_idx1++;
1781             COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1782             ps_mv2++;
1783             pi1_ref_idx2++;
1784             COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1785             ps_mv3++;
1786             pi1_ref_idx3++;
1787             COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1788             ps_mv4++;
1789             pi1_ref_idx4++;
1790 
1791             if(ps_prms->i4_num_results_to_store > 1)
1792             {
1793                 ps_search_node_tl =
1794                     &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
1795 
1796                 if(num_parts == 1)
1797                 {
1798                     ps_search_node_tr = ps_search_node_tl;
1799                     ps_search_node_bl = ps_search_node_tl;
1800                     ps_search_node_br = ps_search_node_tl;
1801                 }
1802                 else if(num_parts == 2)
1803                 {
1804                     /* For vertically oriented partitions, tl, bl pt to same result */
1805                     /* For horizontally oriented partition, tl, tr pt to same result */
1806                     /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1807                     /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1808                     /* and right 2 8x8 have 12x16R partition */
1809                     if(gau1_is_vert_part[i4_part_type])
1810                     {
1811                         ps_search_node_tr =
1812                             &ps_search_results
1813                                  ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1814                         ps_search_node_bl = ps_search_node_tl;
1815                     }
1816                     else
1817                     {
1818                         ps_search_node_tr = ps_search_node_tl;
1819                         ps_search_node_bl =
1820                             &ps_search_results
1821                                  ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1822                     }
1823                     ps_search_node_br =
1824                         &ps_search_results
1825                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1826                 }
1827                 else
1828                 {
1829                     /* 4 unique results */
1830                     ps_search_node_tr =
1831                         &ps_search_results
1832                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1833                     ps_search_node_bl =
1834                         &ps_search_results
1835                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
1836                     ps_search_node_br =
1837                         &ps_search_results
1838                              ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
1839                 }
1840 
1841                 if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1842                     ps_search_node_tl++;
1843                 if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1844                     ps_search_node_tr++;
1845                 if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1846                     ps_search_node_bl++;
1847                 if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1848                     ps_search_node_br++;
1849 
1850                 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1851                 ps_mv1++;
1852                 pi1_ref_idx1++;
1853                 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1854                 ps_mv2++;
1855                 pi1_ref_idx2++;
1856                 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1857                 ps_mv3++;
1858                 pi1_ref_idx3++;
1859                 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1860                 ps_mv4++;
1861                 pi1_ref_idx4++;
1862             }
1863         }
1864     }
1865 }
1866 
1867 /**
1868 ********************************************************************************
1869 *  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1870 *                               layer_mv_t *ps_layer_mv,
1871 *                               S32 i4_search_blk_x,
1872 *                               S32 i4_search_blk_y,
1873 *                               mvbank_update_prms_t *ps_prms)
1874 *
1875 *  @brief  Updates the mv bank in case there is no further encodign to be done
1876 *
1877 *  @param[in]  ps_search_results: contains results for the block just searched
1878 *
1879 *  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
1880 *
1881 *  @param[in] i4_search_blk_x  : col num of blk being searched
1882 *
1883 *  @param[in] i4_search_blk_y : row num of blk being searched
1884 *
1885 *  @param[in] ps_prms : contains certain parameters which govern how updatedone
1886 *
1887 *  @return None
1888 ********************************************************************************
1889 */
1890 
hme_update_mv_bank_in_l1_me(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1891 void hme_update_mv_bank_in_l1_me(
1892     search_results_t *ps_search_results,
1893     layer_mv_t *ps_layer_mv,
1894     S32 i4_search_blk_x,
1895     S32 i4_search_blk_y,
1896     mvbank_update_prms_t *ps_prms)
1897 {
1898     hme_mv_t *ps_mv;
1899     hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1900     S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1901     S32 i4_blk_x, i4_blk_y, i4_offset;
1902     S32 i4_j, i4_ref_id;
1903     search_node_t *ps_search_node;
1904     search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
1905 
1906     i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1907     i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1908     i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1909 
1910     i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1911 
1912     /* Identify the correct offset in the mvbank and the reference id buf */
1913     ps_mv = ps_layer_mv->ps_mv + i4_offset;
1914     pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1915 
1916     /*************************************************************************/
1917     /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
1918     /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
1919     /* do a straightforward single update of results. This will have a 1-1   */
1920     /* correspondence.                                                       */
1921     /*************************************************************************/
1922     if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1923     {
1924         search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
1925 
1926         hme_mv_t *ps_mv_l0_root = ps_mv;
1927         hme_mv_t *ps_mv_l1_root =
1928             ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1929 
1930         U32 u4_num_l0_results_updated = 0;
1931         U32 u4_num_l1_results_updated = 0;
1932 
1933         S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
1934         S08 *pi1_ref_idx_l1_root =
1935             pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1936 
1937         for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1938         {
1939             U32 *pu4_num_results_updated;
1940             search_node_t **pps_result_nodes;
1941 
1942             U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
1943 
1944             if(u1_pred_dir_of_cur_ref)
1945             {
1946                 pu4_num_results_updated = &u4_num_l1_results_updated;
1947                 pps_result_nodes = &aps_result_nodes_sorted[1][0];
1948             }
1949             else
1950             {
1951                 pu4_num_results_updated = &u4_num_l0_results_updated;
1952                 pps_result_nodes = &aps_result_nodes_sorted[0][0];
1953             }
1954 
1955             ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1956 
1957             for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1958             {
1959                 hme_add_new_node_to_a_sorted_array(
1960                     &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
1961 
1962                 ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
1963                 (*pu4_num_results_updated)++;
1964             }
1965         }
1966 
1967         for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
1968         {
1969             COPY_SEARCH_RESULT(
1970                 &ps_mv_l0_root[i4_j],
1971                 &pi1_ref_idx_l0_root[i4_j],
1972                 aps_result_nodes_sorted[0][i4_j],
1973                 0);
1974         }
1975 
1976         for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
1977         {
1978             COPY_SEARCH_RESULT(
1979                 &ps_mv_l1_root[i4_j],
1980                 &pi1_ref_idx_l1_root[i4_j],
1981                 aps_result_nodes_sorted[1][i4_j],
1982                 0);
1983         }
1984 
1985         return;
1986     }
1987 
1988     /*************************************************************************/
1989     /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1990     /* case, we need to have NxN partitions enabled in search.               */
1991     /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1992     /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1993     /*************************************************************************/
1994     ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1995     ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1996     ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1997 
1998     /*************************************************************************/
1999     /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
2000     /* hence the below check.                                                */
2001     /*************************************************************************/
2002     ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
2003 
2004     ps_mv1 = ps_mv;
2005     ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
2006     ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
2007     ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
2008     pi1_ref_idx1 = pi1_ref_idx;
2009     pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
2010     pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
2011     pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
2012 
2013     {
2014         /* max ref frames * max results per partition * number of partitions (4x4, 8x8) */
2015         search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2016         U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2017 
2018         S32 i;
2019 
2020         hme_mv_t *ps_mv1_l0_root = ps_mv1;
2021         hme_mv_t *ps_mv1_l1_root =
2022             ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2023         hme_mv_t *ps_mv2_l0_root = ps_mv2;
2024         hme_mv_t *ps_mv2_l1_root =
2025             ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2026         hme_mv_t *ps_mv3_l0_root = ps_mv3;
2027         hme_mv_t *ps_mv3_l1_root =
2028             ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2029         hme_mv_t *ps_mv4_l0_root = ps_mv4;
2030         hme_mv_t *ps_mv4_l1_root =
2031             ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2032 
2033         U32 u4_num_l0_results_updated = 0;
2034         U32 u4_num_l1_results_updated = 0;
2035 
2036         S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
2037         S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
2038                                                             ps_layer_mv->i4_num_mvs_per_ref);
2039         S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
2040         S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
2041                                                             ps_layer_mv->i4_num_mvs_per_ref);
2042         S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
2043         S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
2044                                                             ps_layer_mv->i4_num_mvs_per_ref);
2045         S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
2046         S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
2047                                                             ps_layer_mv->i4_num_mvs_per_ref);
2048 
2049         for(i = 0; i < 4; i++)
2050         {
2051             hme_mv_t *ps_mv_l0_root;
2052             hme_mv_t *ps_mv_l1_root;
2053 
2054             S08 *pi1_ref_idx_l0_root;
2055             S08 *pi1_ref_idx_l1_root;
2056 
2057             for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
2058             {
2059                 U32 *pu4_num_results_updated;
2060                 search_node_t **pps_result_nodes;
2061                 U08 *pu1_cost_shifts_for_sorted_node;
2062 
2063                 U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
2064 
2065                 if(u1_pred_dir_of_cur_ref)
2066                 {
2067                     pu4_num_results_updated = &u4_num_l1_results_updated;
2068                     pps_result_nodes = &aps_result_nodes_sorted[1][0];
2069                     pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2070                 }
2071                 else
2072                 {
2073                     pu4_num_results_updated = &u4_num_l0_results_updated;
2074                     pps_result_nodes = &aps_result_nodes_sorted[0][0];
2075                     pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2076                 }
2077 
2078                 ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
2079 
2080                 ps_search_node_4x4 =
2081                     ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
2082 
2083                 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
2084                 {
2085                     hme_add_new_node_to_a_sorted_array(
2086                         &ps_search_node_4x4[i4_j],
2087                         pps_result_nodes,
2088                         pu1_cost_shifts_for_sorted_node,
2089                         *pu4_num_results_updated,
2090                         0);
2091 
2092                     (*pu4_num_results_updated)++;
2093 
2094                     hme_add_new_node_to_a_sorted_array(
2095                         &ps_search_node_8x8[i4_j],
2096                         pps_result_nodes,
2097                         pu1_cost_shifts_for_sorted_node,
2098                         *pu4_num_results_updated,
2099                         2);
2100 
2101                     (*pu4_num_results_updated)++;
2102                 }
2103             }
2104 
2105             switch(i)
2106             {
2107             case 0:
2108             {
2109                 ps_mv_l0_root = ps_mv1_l0_root;
2110                 ps_mv_l1_root = ps_mv1_l1_root;
2111 
2112                 pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
2113                 pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
2114 
2115                 break;
2116             }
2117             case 1:
2118             {
2119                 ps_mv_l0_root = ps_mv2_l0_root;
2120                 ps_mv_l1_root = ps_mv2_l1_root;
2121 
2122                 pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
2123                 pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
2124 
2125                 break;
2126             }
2127             case 2:
2128             {
2129                 ps_mv_l0_root = ps_mv3_l0_root;
2130                 ps_mv_l1_root = ps_mv3_l1_root;
2131 
2132                 pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
2133                 pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
2134 
2135                 break;
2136             }
2137             case 3:
2138             {
2139                 ps_mv_l0_root = ps_mv4_l0_root;
2140                 ps_mv_l1_root = ps_mv4_l1_root;
2141 
2142                 pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
2143                 pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
2144 
2145                 break;
2146             }
2147             }
2148 
2149             u4_num_l0_results_updated =
2150                 MIN((S32)u4_num_l0_results_updated,
2151                     ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2152 
2153             u4_num_l1_results_updated =
2154                 MIN((S32)u4_num_l1_results_updated,
2155                     ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
2156 
2157             for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
2158             {
2159                 COPY_SEARCH_RESULT(
2160                     &ps_mv_l0_root[i4_j],
2161                     &pi1_ref_idx_l0_root[i4_j],
2162                     aps_result_nodes_sorted[0][i4_j],
2163                     0);
2164             }
2165 
2166             for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
2167             {
2168                 COPY_SEARCH_RESULT(
2169                     &ps_mv_l1_root[i4_j],
2170                     &pi1_ref_idx_l1_root[i4_j],
2171                     aps_result_nodes_sorted[1][i4_j],
2172                     0);
2173             }
2174         }
2175     }
2176 }
2177 
2178 /**
2179 ******************************************************************************
2180 *  @brief Scales motion vector component projecte from a diff layer in same
2181 *         picture (so no ref id related delta poc scaling required)
2182 ******************************************************************************
2183 */
2184 
2185 #define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p)                                                  \
2186     ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
2187 /**
2188 ********************************************************************************
2189 *  @fn     hme_project_coloc_candt(search_node_t *ps_search_node,
2190 *                                   layer_ctxt_t *ps_curr_layer,
2191 *                                   layer_ctxt_t *ps_coarse_layer,
2192 *                                   S32 i4_pos_x,
2193 *                                   S32 i4_pos_y,
2194 *                                   S08 i1_ref_id,
2195 *                                   S08 i1_result_id)
2196 *
2197 *  @brief  From a coarser layer, projects a candidated situated at "colocated"
2198 *          position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
2199 *
2200 *  @param[out]  ps_search_node : contains the projected result
2201 *
2202 *  @param[in]   ps_curr_layer : current layer context
2203 *
2204 *  @param[in]   ps_coarse_layer  : coarser layer context
2205 *
2206 *  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
2207 *
2208 *  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
2209 *
2210 *  @param[in]   i1_ref_id : reference id for which the candidate required
2211 *
2212 *  @param[in]   i4_result_id : result id for which the candidate required
2213 *                              (0 : best result, 1 : next best)
2214 *
2215 *  @return None
2216 ********************************************************************************
2217 */
2218 
hme_project_coloc_candt(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2219 void hme_project_coloc_candt(
2220     search_node_t *ps_search_node,
2221     layer_ctxt_t *ps_curr_layer,
2222     layer_ctxt_t *ps_coarse_layer,
2223     S32 i4_pos_x,
2224     S32 i4_pos_y,
2225     S08 i1_ref_id,
2226     S32 i4_result_id)
2227 {
2228     S32 wd_c, ht_c, wd_p, ht_p;
2229     S32 blksize_p, blk_x, blk_y, i4_offset;
2230     layer_mv_t *ps_layer_mvbank;
2231     hme_mv_t *ps_mv;
2232     S08 *pi1_ref_idx;
2233 
2234     /* Width and ht of current and prev layers */
2235     wd_c = ps_curr_layer->i4_wd;
2236     ht_c = ps_curr_layer->i4_ht;
2237     wd_p = ps_coarse_layer->i4_wd;
2238     ht_p = ps_coarse_layer->i4_ht;
2239 
2240     ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2241     blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
2242 
2243     /* Safety check to avoid uninitialized access across temporal layers */
2244     i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2245     i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2246 
2247     /* Project the positions to prev layer */
2248     /* TODO: convert these to scale factors at pic level */
2249     blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
2250     blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
2251 
2252     /* Pick up the mvs from the location */
2253     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2254     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2255 
2256     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2257     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2258 
2259     ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2260     pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2261 
2262     ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
2263     ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
2264     ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2265     ps_search_node->u1_subpel_done = 0;
2266     if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2267     {
2268         ps_search_node->i1_ref_idx = i1_ref_id;
2269         ps_search_node->s_mv.i2_mvx = 0;
2270         ps_search_node->s_mv.i2_mvy = 0;
2271     }
2272 }
2273 
2274 /**
2275 ********************************************************************************
2276 *  @fn     hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
2277 *                                   layer_ctxt_t *ps_curr_layer,
2278 *                                   layer_ctxt_t *ps_coarse_layer,
2279 *                                   S32 i4_pos_x,
2280 *                                   S32 i4_pos_y,
2281 *                                   S08 i1_ref_id,
2282 *                                   S08 i1_result_id)
2283 *
2284 *  @brief  From a coarser layer, projects a candidated situated at "colocated"
2285 *          position in the picture when the ratios are dyadic
2286 *
2287 *  @param[out]  ps_search_node : contains the projected result
2288 *
2289 *  @param[in]   ps_curr_layer : current layer context
2290 *
2291 *  @param[in]   ps_coarse_layer  : coarser layer context
2292 *
2293 *  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
2294 *
2295 *  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
2296 *
2297 *  @param[in]   i1_ref_id : reference id for which the candidate required
2298 *
2299 *  @param[in]   i4_result_id : result id for which the candidate required
2300 *                              (0 : best result, 1 : next best)
2301 *
2302 *  @return None
2303 ********************************************************************************
2304 */
2305 
hme_project_coloc_candt_dyadic(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2306 void hme_project_coloc_candt_dyadic(
2307     search_node_t *ps_search_node,
2308     layer_ctxt_t *ps_curr_layer,
2309     layer_ctxt_t *ps_coarse_layer,
2310     S32 i4_pos_x,
2311     S32 i4_pos_y,
2312     S08 i1_ref_id,
2313     S32 i4_result_id)
2314 {
2315     S32 wd_c, ht_c, wd_p, ht_p;
2316     S32 blksize_p, blk_x, blk_y, i4_offset;
2317     layer_mv_t *ps_layer_mvbank;
2318     hme_mv_t *ps_mv;
2319     S08 *pi1_ref_idx;
2320 
2321     /* Width and ht of current and prev layers */
2322     wd_c = ps_curr_layer->i4_wd;
2323     ht_c = ps_curr_layer->i4_ht;
2324     wd_p = ps_coarse_layer->i4_wd;
2325     ht_p = ps_coarse_layer->i4_ht;
2326 
2327     ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2328     /* blksize_p = log2(wd) + 1 */
2329     blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2330 
2331     /* ASSERT for valid sizes */
2332     ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2333 
2334     /* Safety check to avoid uninitialized access across temporal layers */
2335     i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2336     i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2337 
2338     /* Project the positions to prev layer */
2339     /* TODO: convert these to scale factors at pic level */
2340     blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
2341     blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
2342 
2343     /* Pick up the mvs from the location */
2344     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2345     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2346 
2347     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2348     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2349 
2350     ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2351     pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2352 
2353     ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2354     ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2355     ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2356     if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2357     {
2358         ps_search_node->i1_ref_idx = i1_ref_id;
2359         ps_search_node->s_mv.i2_mvx = 0;
2360         ps_search_node->s_mv.i2_mvy = 0;
2361     }
2362 }
2363 
hme_project_coloc_candt_dyadic_implicit(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S32 i4_num_act_ref_l0,U08 u1_pred_dir,U08 u1_default_ref_id,S32 i4_result_id)2364 void hme_project_coloc_candt_dyadic_implicit(
2365     search_node_t *ps_search_node,
2366     layer_ctxt_t *ps_curr_layer,
2367     layer_ctxt_t *ps_coarse_layer,
2368     S32 i4_pos_x,
2369     S32 i4_pos_y,
2370     S32 i4_num_act_ref_l0,
2371     U08 u1_pred_dir,
2372     U08 u1_default_ref_id,
2373     S32 i4_result_id)
2374 {
2375     S32 wd_c, ht_c, wd_p, ht_p;
2376     S32 blksize_p, blk_x, blk_y, i4_offset;
2377     layer_mv_t *ps_layer_mvbank;
2378     hme_mv_t *ps_mv;
2379     S08 *pi1_ref_idx;
2380 
2381     /* Width and ht of current and prev layers */
2382     wd_c = ps_curr_layer->i4_wd;
2383     ht_c = ps_curr_layer->i4_ht;
2384     wd_p = ps_coarse_layer->i4_wd;
2385     ht_p = ps_coarse_layer->i4_ht;
2386 
2387     ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2388     blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2389 
2390     /* ASSERT for valid sizes */
2391     ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2392 
2393     /* Safety check to avoid uninitialized access across temporal layers */
2394     i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2395     i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2396     /* Project the positions to prev layer */
2397     /* TODO: convert these to scale factors at pic level */
2398     blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
2399     blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
2400 
2401     /* Pick up the mvs from the location */
2402     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2403     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2404 
2405     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2406     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2407 
2408     if(u1_pred_dir == 1)
2409     {
2410         ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2411         pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2412     }
2413 
2414     ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2415     ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2416     ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2417     if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2418     {
2419         ps_search_node->i1_ref_idx = u1_default_ref_id;
2420         ps_search_node->s_mv.i2_mvx = 0;
2421         ps_search_node->s_mv.i2_mvy = 0;
2422     }
2423 }
2424 
2425 #define SCALE_RANGE_PRMS(prm1, prm2, shift)                                                        \
2426     {                                                                                              \
2427         prm1.i2_min_x = prm2.i2_min_x << shift;                                                    \
2428         prm1.i2_max_x = prm2.i2_max_x << shift;                                                    \
2429         prm1.i2_min_y = prm2.i2_min_y << shift;                                                    \
2430         prm1.i2_max_y = prm2.i2_max_y << shift;                                                    \
2431     }
2432 
2433 #define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift)                                               \
2434     {                                                                                              \
2435         prm1->i2_min_x = prm2->i2_min_x << shift;                                                  \
2436         prm1->i2_max_x = prm2->i2_max_x << shift;                                                  \
2437         prm1->i2_min_y = prm2->i2_min_y << shift;                                                  \
2438         prm1->i2_max_y = prm2->i2_max_y << shift;                                                  \
2439     }
2440 
2441 /**
2442 ********************************************************************************
2443 *  @fn   void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
2444 *                       refine_layer_prms_t *ps_refine_prms)
2445 *
2446 *  @brief  Frame init of refinemnet layers in ME
2447 *
2448 *  @param[in,out]  ps_ctxt: ME Handle
2449 *
2450 *  @param[in]  ps_refine_prms : refinement layer prms
2451 *
2452 *  @return None
2453 ********************************************************************************
2454 */
hme_refine_frm_init(layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,layer_ctxt_t * ps_coarse_layer)2455 void hme_refine_frm_init(
2456     layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
2457 {
2458     /* local variables */
2459     BLK_SIZE_T e_result_blk_size = BLK_8x8;
2460     S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
2461 
2462     i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
2463 
2464     if(ps_refine_prms->explicit_ref)
2465     {
2466         i4_num_ref_fpel = i4_num_ref_prev_layer;
2467     }
2468     else
2469     {
2470         i4_num_ref_fpel = 2;
2471     }
2472 
2473     if(ps_refine_prms->i4_enable_4x4_part)
2474     {
2475         e_result_blk_size = BLK_4x4;
2476     }
2477 
2478     i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
2479 
2480     hme_init_mv_bank(
2481         ps_curr_layer,
2482         e_result_blk_size,
2483         i4_num_ref_fpel,
2484         ps_refine_prms->i4_num_mvbank_results,
2485         ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
2486 }
2487 
2488 #if 1  //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
2489 /**
2490 ********************************************************************************
2491 *  @fn   void hme_init_clusters_16x16
2492 *               (
2493 *                   cluster_16x16_blk_t *ps_cluster_blk_16x16
2494 *               )
2495 *
2496 *  @brief  Intialisations for the structs used in clustering algorithm
2497 *
2498 *  @param[in/out]  ps_cluster_blk_16x16: pointer to structure containing clusters
2499 *                                        of 16x16 block
2500 *
2501 *  @return None
2502 ********************************************************************************
2503 */
2504 static __inline void
hme_init_clusters_16x16(cluster_16x16_blk_t * ps_cluster_blk_16x16,S32 bidir_enabled)2505     hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
2506 {
2507     S32 i;
2508 
2509     ps_cluster_blk_16x16->num_clusters = 0;
2510     ps_cluster_blk_16x16->intra_mv_area = 0;
2511     ps_cluster_blk_16x16->best_inter_cost = 0;
2512 
2513     for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
2514     {
2515         ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
2516             bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
2517 
2518         ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
2519 
2520         ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
2521         ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
2522     }
2523     for(i = 0; i < MAX_NUM_REF; i++)
2524     {
2525         ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
2526     }
2527 }
2528 
2529 /**
2530 ********************************************************************************
2531 *  @fn   void hme_init_clusters_32x32
2532 *               (
2533 *                   cluster_32x32_blk_t *ps_cluster_blk_32x32
2534 *               )
2535 *
2536 *  @brief  Intialisations for the structs used in clustering algorithm
2537 *
2538 *  @param[in/out]  ps_cluster_blk_32x32: pointer to structure containing clusters
2539 *                                        of 32x32 block
2540 *
2541 *  @return None
2542 ********************************************************************************
2543 */
2544 static __inline void
hme_init_clusters_32x32(cluster_32x32_blk_t * ps_cluster_blk_32x32,S32 bidir_enabled)2545     hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
2546 {
2547     S32 i;
2548 
2549     ps_cluster_blk_32x32->num_clusters = 0;
2550     ps_cluster_blk_32x32->intra_mv_area = 0;
2551     ps_cluster_blk_32x32->best_alt_ref = -1;
2552     ps_cluster_blk_32x32->best_uni_ref = -1;
2553     ps_cluster_blk_32x32->best_inter_cost = 0;
2554     ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
2555 
2556     for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
2557     {
2558         ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
2559             bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
2560         ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
2561 
2562         ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
2563         ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
2564     }
2565     for(i = 0; i < MAX_NUM_REF; i++)
2566     {
2567         ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
2568     }
2569 }
2570 
2571 /**
2572 ********************************************************************************
2573 *  @fn   void hme_init_clusters_64x64
2574 *               (
2575 *                   cluster_64x64_blk_t *ps_cluster_blk_64x64
2576 *               )
2577 *
2578 *  @brief  Intialisations for the structs used in clustering algorithm
2579 *
2580 *  @param[in/out]  ps_cluster_blk_64x64: pointer to structure containing clusters
2581 *                                        of 64x64 block
2582 *
2583 *  @return None
2584 ********************************************************************************
2585 */
2586 static __inline void
hme_init_clusters_64x64(cluster_64x64_blk_t * ps_cluster_blk_64x64,S32 bidir_enabled)2587     hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
2588 {
2589     S32 i;
2590 
2591     ps_cluster_blk_64x64->num_clusters = 0;
2592     ps_cluster_blk_64x64->intra_mv_area = 0;
2593     ps_cluster_blk_64x64->best_alt_ref = -1;
2594     ps_cluster_blk_64x64->best_uni_ref = -1;
2595     ps_cluster_blk_64x64->best_inter_cost = 0;
2596 
2597     for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
2598     {
2599         ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
2600             bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
2601         ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
2602 
2603         ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
2604         ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
2605     }
2606     for(i = 0; i < MAX_NUM_REF; i++)
2607     {
2608         ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
2609     }
2610 }
2611 
2612 /**
2613 ********************************************************************************
2614 *  @fn   void hme_sort_and_assign_top_ref_ids_areawise
2615 *               (
2616 *                   ctb_cluster_info_t *ps_ctb_cluster_info
2617 *               )
2618 *
2619 *  @brief  Finds best_uni_ref and best_alt_ref
2620 *
2621 *  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2622 *
2623 *  @param[in]  bidir_enabled: flag that indicates whether or not bi-pred is
2624 *                             enabled
2625 *
2626 *  @param[in]  block_width: width of the block in pels
2627 *
2628 *  @param[in]  e_cu_pos: position of the block within the CTB
2629 *
2630 *  @return None
2631 ********************************************************************************
2632 */
hme_sort_and_assign_top_ref_ids_areawise(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width,CU_POS_T e_cu_pos)2633 void hme_sort_and_assign_top_ref_ids_areawise(
2634     ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
2635 {
2636     cluster_32x32_blk_t *ps_32x32 = NULL;
2637     cluster_64x64_blk_t *ps_64x64 = NULL;
2638     cluster_data_t *ps_data;
2639 
2640     S32 j, k;
2641 
2642     S32 ai4_uni_area[MAX_NUM_REF];
2643     S32 ai4_bi_area[MAX_NUM_REF];
2644     S32 ai4_ref_id_found[MAX_NUM_REF];
2645     S32 ai4_ref_id[MAX_NUM_REF];
2646 
2647     S32 best_uni_ref = -1, best_alt_ref = -1;
2648     S32 num_clusters;
2649     S32 num_ref = 0;
2650     S32 num_clusters_evaluated = 0;
2651     S32 is_cur_blk_valid;
2652 
2653     if(32 == block_width)
2654     {
2655         is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
2656         ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
2657         num_clusters = ps_32x32->num_clusters;
2658         ps_data = &ps_32x32->as_cluster_data[0];
2659     }
2660     else
2661     {
2662         is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
2663         ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
2664         num_clusters = ps_64x64->num_clusters;
2665         ps_data = &ps_64x64->as_cluster_data[0];
2666     }
2667 
2668 #if !ENABLE_4CTB_EVALUATION
2669     if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
2670     {
2671         return;
2672     }
2673 #endif
2674     if(num_clusters == 0)
2675     {
2676         return;
2677     }
2678     else if(!is_cur_blk_valid)
2679     {
2680         return;
2681     }
2682 
2683     memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
2684     memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
2685     memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
2686     memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
2687 
2688     for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
2689     {
2690         S32 ref_id;
2691 
2692         if(!ps_data->is_valid_cluster)
2693         {
2694             continue;
2695         }
2696 
2697         ref_id = ps_data->ref_id;
2698 
2699         num_clusters_evaluated++;
2700 
2701         ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
2702         ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
2703 
2704         if(!ai4_ref_id_found[ref_id])
2705         {
2706             ai4_ref_id[ref_id] = ref_id;
2707             ai4_ref_id_found[ref_id] = 1;
2708             num_ref++;
2709         }
2710     }
2711 
2712     {
2713         S32 ai4_ref_id_temp[MAX_NUM_REF];
2714 
2715         memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
2716 
2717         for(k = 1; k < MAX_NUM_REF; k++)
2718         {
2719             if(ai4_uni_area[k] > ai4_uni_area[0])
2720             {
2721                 SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
2722                 SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
2723             }
2724         }
2725 
2726         best_uni_ref = ai4_ref_id_temp[0];
2727     }
2728 
2729     if(bidir_enabled)
2730     {
2731         for(k = 1; k < MAX_NUM_REF; k++)
2732         {
2733             if(ai4_bi_area[k] > ai4_bi_area[0])
2734             {
2735                 SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
2736                 SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
2737             }
2738         }
2739 
2740         if(!ai4_bi_area[0])
2741         {
2742             best_alt_ref = -1;
2743 
2744             if(32 == block_width)
2745             {
2746                 SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2747             }
2748             else
2749             {
2750                 SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2751             }
2752 
2753             return;
2754         }
2755 
2756         if(best_uni_ref == ai4_ref_id[0])
2757         {
2758             for(k = 2; k < MAX_NUM_REF; k++)
2759             {
2760                 if(ai4_bi_area[k] > ai4_bi_area[1])
2761                 {
2762                     SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
2763                     SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
2764                 }
2765             }
2766 
2767             best_alt_ref = ai4_ref_id[1];
2768         }
2769         else
2770         {
2771             best_alt_ref = ai4_ref_id[0];
2772         }
2773     }
2774 
2775     if(32 == block_width)
2776     {
2777         SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2778     }
2779     else
2780     {
2781         SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2782     }
2783 }
2784 
2785 /**
2786 ********************************************************************************
2787 *  @fn   void hme_find_top_ref_ids
2788 *               (
2789 *                   ctb_cluster_info_t *ps_ctb_cluster_info
2790 *               )
2791 *
2792 *  @brief  Finds best_uni_ref and best_alt_ref
2793 *
2794 *  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2795 *
2796 *  @return None
2797 ********************************************************************************
2798 */
hme_find_top_ref_ids(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width)2799 void hme_find_top_ref_ids(
2800     ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
2801 {
2802     S32 i;
2803 
2804     if(32 == block_width)
2805     {
2806         for(i = 0; i < 4; i++)
2807         {
2808             hme_sort_and_assign_top_ref_ids_areawise(
2809                 ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
2810         }
2811     }
2812     else if(64 == block_width)
2813     {
2814         hme_sort_and_assign_top_ref_ids_areawise(
2815             ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
2816     }
2817 }
2818 
2819 /**
2820 ********************************************************************************
2821 *  @fn   void hme_boot_out_outlier
2822 *               (
2823 *                   ctb_cluster_info_t *ps_ctb_cluster_info
2824 *               )
2825 *
2826 *  @brief  Removes outlier clusters before CU tree population
2827 *
2828 *  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2829 *
2830 *  @return None
2831 ********************************************************************************
2832 */
hme_boot_out_outlier(ctb_cluster_info_t * ps_ctb_cluster_info,S32 blk_width)2833 void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
2834 {
2835     cluster_32x32_blk_t *ps_32x32;
2836 
2837     S32 i;
2838 
2839     cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
2840 
2841     S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
2842 
2843     if(32 == blk_width)
2844     {
2845         /* 32x32 clusters */
2846         for(i = 0; i < 4; i++)
2847         {
2848             ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
2849 
2850             if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2851             {
2852                 BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
2853             }
2854         }
2855     }
2856     else if(64 == blk_width)
2857     {
2858         /* 64x64 clusters */
2859         if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2860         {
2861             BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
2862         }
2863     }
2864 }
2865 
2866 /**
2867 ********************************************************************************
2868 *  @fn   void hme_update_cluster_attributes
2869 *               (
2870 *                   cluster_data_t *ps_cluster_data,
2871 *                   S32 mvx,
2872 *                   S32 mvy,
2873 *                   PART_ID_T e_part_id
2874 *               )
2875 *
2876 *  @brief  Implementation fo the clustering algorithm
2877 *
2878 *  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
2879 *
2880 *  @param[in]  mvx : x co-ordinate of the motion vector
2881 *
2882 *  @param[in]  mvy : y co-ordinate of the motion vector
2883 *
2884 *  @param[in]  ref_idx : ref_id of the motion vector
2885 *
2886 *  @param[in]  e_part_id : partition id of the motion vector
2887 *
2888 *  @return None
2889 ********************************************************************************
2890 */
hme_update_cluster_attributes(cluster_data_t * ps_cluster_data,S32 mvx,S32 mvy,S32 mvdx,S32 mvdy,S32 ref_id,S32 sdi,U08 is_part_of_bi,PART_ID_T e_part_id)2891 static __inline void hme_update_cluster_attributes(
2892     cluster_data_t *ps_cluster_data,
2893     S32 mvx,
2894     S32 mvy,
2895     S32 mvdx,
2896     S32 mvdy,
2897     S32 ref_id,
2898     S32 sdi,
2899     U08 is_part_of_bi,
2900     PART_ID_T e_part_id)
2901 {
2902     LWORD64 i8_mvx_sum_q8;
2903     LWORD64 i8_mvy_sum_q8;
2904 
2905     S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
2906     S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
2907 
2908     if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
2909     {
2910         ps_cluster_data->min_x = mvx;
2911     }
2912     else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
2913     {
2914         ps_cluster_data->max_x = mvx;
2915     }
2916 
2917     if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
2918     {
2919         ps_cluster_data->min_y = mvy;
2920     }
2921     else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
2922     {
2923         ps_cluster_data->max_y = mvy;
2924     }
2925 
2926     {
2927         S32 num_mvs = ps_cluster_data->num_mvs;
2928 
2929         ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
2930         ps_cluster_data->as_mv[num_mvs].mvx = mvx;
2931         ps_cluster_data->as_mv[num_mvs].mvy = mvy;
2932 
2933         /***************************/
2934         ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
2935         ps_cluster_data->as_mv[num_mvs].sdi = sdi;
2936         /**************************/
2937     }
2938 
2939     /* Updation of centroid */
2940     {
2941         i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
2942         i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
2943 
2944         ps_cluster_data->num_mvs++;
2945 
2946         ps_cluster_data->s_centroid.i4_pos_x_q8 =
2947             (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
2948         ps_cluster_data->s_centroid.i4_pos_y_q8 =
2949             (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
2950     }
2951 
2952     ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
2953 
2954     if(is_part_of_bi)
2955     {
2956         ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
2957     }
2958     else
2959     {
2960         ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
2961     }
2962 }
2963 
2964 /**
2965 ********************************************************************************
2966 *  @fn   void hme_try_cluster_merge
2967 *               (
2968 *                   cluster_data_t *ps_cluster_data,
2969 *                   S32 *pi4_num_clusters,
2970 *                   S32 idx_of_updated_cluster
2971 *               )
2972 *
2973 *  @brief  Implementation fo the clustering algorithm
2974 *
2975 *  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
2976 *
2977 *  @param[in/out]  pi4_num_clusters : pointer to number of clusters
2978 *
2979 *  @param[in]  idx_of_updated_cluster : index of the cluster most recently
2980 *                                       updated
2981 *
2982 *  @return Nothing
2983 ********************************************************************************
2984 */
hme_try_cluster_merge(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S32 idx_of_updated_cluster)2985 void hme_try_cluster_merge(
2986     cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
2987 {
2988     centroid_t *ps_centroid;
2989 
2990     S32 cur_pos_x_q8;
2991     S32 cur_pos_y_q8;
2992     S32 i;
2993     S32 max_dist_from_centroid;
2994     S32 mvd;
2995     S32 mvdx_q8;
2996     S32 mvdx;
2997     S32 mvdy_q8;
2998     S32 mvdy;
2999     S32 num_clusters, num_clusters_evaluated;
3000     S32 other_pos_x_q8;
3001     S32 other_pos_y_q8;
3002 
3003     cluster_data_t *ps_root = ps_cluster_data;
3004     cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
3005     centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
3006 
3007     /* Merge is superfluous if num_clusters is 1 */
3008     if(*pu1_num_clusters == 1)
3009     {
3010         return;
3011     }
3012 
3013     cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
3014     cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
3015 
3016     max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
3017 
3018     num_clusters = *pu1_num_clusters;
3019     num_clusters_evaluated = 0;
3020 
3021     for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
3022     {
3023         if(!ps_cluster_data->is_valid_cluster)
3024         {
3025             continue;
3026         }
3027         if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
3028         {
3029             num_clusters_evaluated++;
3030             continue;
3031         }
3032 
3033         ps_centroid = &ps_cluster_data->s_centroid;
3034 
3035         other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
3036         other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
3037 
3038         mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
3039         mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
3040         mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3041         mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3042 
3043         mvd = ABS(mvdx) + ABS(mvdy);
3044 
3045         if(mvd <= (max_dist_from_centroid >> 1))
3046         {
3047             /* 0 => no updates */
3048             /* 1 => min updated */
3049             /* 2 => max updated */
3050             S32 minmax_x_update_id;
3051             S32 minmax_y_update_id;
3052 
3053             LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
3054             LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
3055             LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
3056             LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
3057 
3058             (*pu1_num_clusters)--;
3059 
3060             ps_cluster_data->is_valid_cluster = 0;
3061 
3062             memcpy(
3063                 &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
3064                 ps_cluster_data->as_mv,
3065                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3066 
3067             ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
3068             ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
3069             ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3070             ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3071             i8_mv_x_sum_self += i8_mv_x_sum_cousin;
3072             i8_mv_y_sum_self += i8_mv_y_sum_cousin;
3073 
3074             ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
3075             ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
3076 
3077             minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
3078                                      ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
3079                                      : 1;
3080             minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
3081                                      ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
3082                                      : 1;
3083 
3084             /* Updation of centroid spread */
3085             switch(minmax_x_update_id + (minmax_y_update_id << 2))
3086             {
3087             case 1:
3088             {
3089                 S32 mvd, mvd_q8;
3090 
3091                 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3092 
3093                 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3094                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3095 
3096                 if(mvd > (max_dist_from_centroid))
3097                 {
3098                     ps_cluster_data->max_dist_from_centroid = mvd;
3099                 }
3100                 break;
3101             }
3102             case 2:
3103             {
3104                 S32 mvd, mvd_q8;
3105 
3106                 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3107 
3108                 mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3109                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3110 
3111                 if(mvd > (max_dist_from_centroid))
3112                 {
3113                     ps_cluster_data->max_dist_from_centroid = mvd;
3114                 }
3115                 break;
3116             }
3117             case 4:
3118             {
3119                 S32 mvd, mvd_q8;
3120 
3121                 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3122 
3123                 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3124                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3125 
3126                 if(mvd > (max_dist_from_centroid))
3127                 {
3128                     ps_cluster_data->max_dist_from_centroid = mvd;
3129                 }
3130                 break;
3131             }
3132             case 5:
3133             {
3134                 S32 mvd;
3135                 S32 mvdx, mvdx_q8;
3136                 S32 mvdy, mvdy_q8;
3137 
3138                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3139                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3140 
3141                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3142                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3143 
3144                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3145 
3146                 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3147                 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3148 
3149                 if(mvd > max_dist_from_centroid)
3150                 {
3151                     ps_cluster_data->max_dist_from_centroid = mvd;
3152                 }
3153                 break;
3154             }
3155             case 6:
3156             {
3157                 S32 mvd;
3158                 S32 mvdx, mvdx_q8;
3159                 S32 mvdy, mvdy_q8;
3160 
3161                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3162                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3163 
3164                 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3165                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3166 
3167                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3168 
3169                 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3170                 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3171 
3172                 if(mvd > max_dist_from_centroid)
3173                 {
3174                     ps_cluster_data->max_dist_from_centroid = mvd;
3175                 }
3176                 break;
3177             }
3178             case 8:
3179             {
3180                 S32 mvd, mvd_q8;
3181 
3182                 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3183 
3184                 mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3185                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3186 
3187                 if(mvd > (max_dist_from_centroid))
3188                 {
3189                     ps_cluster_data->max_dist_from_centroid = mvd;
3190                 }
3191                 break;
3192             }
3193             case 9:
3194             {
3195                 S32 mvd;
3196                 S32 mvdx, mvdx_q8;
3197                 S32 mvdy, mvdy_q8;
3198 
3199                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3200                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3201 
3202                 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3203                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3204 
3205                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3206 
3207                 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3208                 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3209 
3210                 if(mvd > max_dist_from_centroid)
3211                 {
3212                     ps_cluster_data->max_dist_from_centroid = mvd;
3213                 }
3214                 break;
3215             }
3216             case 10:
3217             {
3218                 S32 mvd;
3219                 S32 mvdx, mvdx_q8;
3220                 S32 mvdy, mvdy_q8;
3221 
3222                 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3223                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3224 
3225                 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3226                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3227 
3228                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3229 
3230                 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3231                 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3232 
3233                 if(mvd > ps_cluster_data->max_dist_from_centroid)
3234                 {
3235                     ps_cluster_data->max_dist_from_centroid = mvd;
3236                 }
3237                 break;
3238             }
3239             default:
3240             {
3241                 break;
3242             }
3243             }
3244 
3245             hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
3246 
3247             return;
3248         }
3249 
3250         num_clusters_evaluated++;
3251     }
3252 }
3253 
3254 /**
3255 ********************************************************************************
3256 *  @fn   void hme_find_and_update_clusters
3257 *               (
3258 *                   cluster_data_t *ps_cluster_data,
3259 *                   S32 *pi4_num_clusters,
3260 *                   S32 mvx,
3261 *                   S32 mvy,
3262 *                   S32 ref_idx,
3263 *                   PART_ID_T e_part_id
3264 *               )
3265 *
3266 *  @brief  Implementation fo the clustering algorithm
3267 *
3268 *  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
3269 *
3270 *  @param[in/out]  pi4_num_clusters : pointer to number of clusters
3271 *
3272 *  @param[in]  mvx : x co-ordinate of the motion vector
3273 *
3274 *  @param[in]  mvy : y co-ordinate of the motion vector
3275 *
3276 *  @param[in]  ref_idx : ref_id of the motion vector
3277 *
3278 *  @param[in]  e_part_id : partition id of the motion vector
3279 *
3280 *  @return None
3281 ********************************************************************************
3282 */
hme_find_and_update_clusters(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S16 i2_mv_x,S16 i2_mv_y,U08 i1_ref_idx,S32 i4_sdi,PART_ID_T e_part_id,U08 is_part_of_bi)3283 void hme_find_and_update_clusters(
3284     cluster_data_t *ps_cluster_data,
3285     U08 *pu1_num_clusters,
3286     S16 i2_mv_x,
3287     S16 i2_mv_y,
3288     U08 i1_ref_idx,
3289     S32 i4_sdi,
3290     PART_ID_T e_part_id,
3291     U08 is_part_of_bi)
3292 {
3293     S32 i;
3294     S32 min_mvd_cluster_id = -1;
3295     S32 mvd, mvd_limit, mvdx, mvdy;
3296     S32 min_mvdx, min_mvdy;
3297 
3298     S32 min_mvd = MAX_32BIT_VAL;
3299     S32 num_clusters = *pu1_num_clusters;
3300 
3301     S32 mvx = i2_mv_x;
3302     S32 mvy = i2_mv_y;
3303     S32 ref_idx = i1_ref_idx;
3304     S32 sdi = i4_sdi;
3305     S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
3306 
3307     if(num_clusters == 0)
3308     {
3309         cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
3310 
3311         ps_data->num_mvs = 1;
3312         ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3313         ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3314         ps_data->ref_id = ref_idx;
3315         ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3316         ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3317         ps_data->as_mv[0].mvx = mvx;
3318         ps_data->as_mv[0].mvy = mvy;
3319 
3320         /***************************/
3321         ps_data->as_mv[0].is_uni = !is_part_of_bi;
3322         ps_data->as_mv[0].sdi = sdi;
3323         if(is_part_of_bi)
3324         {
3325             ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3326         }
3327         else
3328         {
3329             ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3330         }
3331         /**************************/
3332         ps_data->max_x = mvx;
3333         ps_data->min_x = mvx;
3334         ps_data->max_y = mvy;
3335         ps_data->min_y = mvy;
3336 
3337         ps_data->is_valid_cluster = 1;
3338 
3339         *pu1_num_clusters = 1;
3340     }
3341     else
3342     {
3343         S32 num_clusters_evaluated = 0;
3344 
3345         for(i = 0; num_clusters_evaluated < num_clusters; i++)
3346         {
3347             cluster_data_t *ps_data = &ps_cluster_data[i];
3348 
3349             centroid_t *ps_centroid;
3350 
3351             S32 mvx_q8;
3352             S32 mvy_q8;
3353             S32 posx_q8;
3354             S32 posy_q8;
3355             S32 mvdx_q8;
3356             S32 mvdy_q8;
3357 
3358             /* In anticipation of a possible merging of clusters */
3359             if(ps_data->is_valid_cluster == 0)
3360             {
3361                 new_cluster_idx = i;
3362                 continue;
3363             }
3364 
3365             if(ref_idx != ps_data->ref_id)
3366             {
3367                 num_clusters_evaluated++;
3368                 continue;
3369             }
3370 
3371             ps_centroid = &ps_data->s_centroid;
3372             posx_q8 = ps_centroid->i4_pos_x_q8;
3373             posy_q8 = ps_centroid->i4_pos_y_q8;
3374 
3375             mvx_q8 = mvx << 8;
3376             mvy_q8 = mvy << 8;
3377 
3378             mvdx_q8 = posx_q8 - mvx_q8;
3379             mvdy_q8 = posy_q8 - mvy_q8;
3380 
3381             mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
3382             mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
3383 
3384             mvd = ABS(mvdx) + ABS(mvdy);
3385 
3386             if(mvd < min_mvd)
3387             {
3388                 min_mvd = mvd;
3389                 min_mvdx = mvdx;
3390                 min_mvdy = mvdy;
3391                 min_mvd_cluster_id = i;
3392             }
3393 
3394             num_clusters_evaluated++;
3395         }
3396 
3397         mvd_limit = (min_mvd_cluster_id == -1)
3398                         ? ps_cluster_data[0].max_dist_from_centroid
3399                         : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
3400 
3401         /* This condition implies that min_mvd has been updated */
3402         if(min_mvd <= mvd_limit)
3403         {
3404             hme_update_cluster_attributes(
3405                 &ps_cluster_data[min_mvd_cluster_id],
3406                 mvx,
3407                 mvy,
3408                 min_mvdx,
3409                 min_mvdy,
3410                 ref_idx,
3411                 sdi,
3412                 is_part_of_bi,
3413                 e_part_id);
3414 
3415             if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
3416             {
3417                 hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
3418             }
3419         }
3420         else
3421         {
3422             cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
3423                                           ? &ps_cluster_data[num_clusters]
3424                                           : &ps_cluster_data[new_cluster_idx];
3425 
3426             ps_data->num_mvs = 1;
3427             ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3428             ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3429             ps_data->ref_id = ref_idx;
3430             ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3431             ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3432             ps_data->as_mv[0].mvx = mvx;
3433             ps_data->as_mv[0].mvy = mvy;
3434 
3435             /***************************/
3436             ps_data->as_mv[0].is_uni = !is_part_of_bi;
3437             ps_data->as_mv[0].sdi = sdi;
3438             if(is_part_of_bi)
3439             {
3440                 ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3441             }
3442             else
3443             {
3444                 ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3445             }
3446             /**************************/
3447             ps_data->max_x = mvx;
3448             ps_data->min_x = mvx;
3449             ps_data->max_y = mvy;
3450             ps_data->min_y = mvy;
3451 
3452             ps_data->is_valid_cluster = 1;
3453 
3454             num_clusters++;
3455             *pu1_num_clusters = num_clusters;
3456         }
3457     }
3458 }
3459 
3460 /**
3461 ********************************************************************************
3462 *  @fn   void hme_update_32x32_cluster_attributes
3463 *               (
3464 *                   cluster_32x32_blk_t *ps_blk_32x32,
3465 *                   cluster_data_t *ps_cluster_data
3466 *               )
3467 *
3468 *  @brief  Updates attributes for 32x32 clusters based on the attributes of
3469 *          the constituent 16x16 clusters
3470 *
3471 *  @param[out]  ps_blk_32x32: structure containing 32x32 block results
3472 *
3473 *  @param[in]  ps_cluster_data : structure containing 16x16 block results
3474 *
3475 *  @return None
3476 ********************************************************************************
3477 */
hme_update_32x32_cluster_attributes(cluster_32x32_blk_t * ps_blk_32x32,cluster_data_t * ps_cluster_data)3478 void hme_update_32x32_cluster_attributes(
3479     cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
3480 {
3481     cluster_data_t *ps_cur_cluster_32;
3482 
3483     S32 i;
3484     S32 mvd_limit;
3485 
3486     S32 num_clusters = ps_blk_32x32->num_clusters;
3487 
3488     if(0 == num_clusters)
3489     {
3490         ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3491 
3492         ps_blk_32x32->num_clusters++;
3493         ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3494 
3495         ps_cur_cluster_32->is_valid_cluster = 1;
3496 
3497         ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3498         ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3499         ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3500 
3501         memcpy(
3502             ps_cur_cluster_32->as_mv,
3503             ps_cluster_data->as_mv,
3504             sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3505 
3506         ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3507 
3508         ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3509 
3510         ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3511         ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3512         ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3513         ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3514 
3515         ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3516     }
3517     else
3518     {
3519         centroid_t *ps_centroid;
3520 
3521         S32 cur_posx_q8, cur_posy_q8;
3522         S32 min_mvd_cluster_id = -1;
3523         S32 mvd;
3524         S32 mvdx;
3525         S32 mvdy;
3526         S32 mvdx_min;
3527         S32 mvdy_min;
3528         S32 mvdx_q8;
3529         S32 mvdy_q8;
3530 
3531         S32 num_clusters_evaluated = 0;
3532 
3533         S32 mvd_min = MAX_32BIT_VAL;
3534 
3535         S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3536         S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3537 
3538         for(i = 0; num_clusters_evaluated < num_clusters; i++)
3539         {
3540             ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
3541 
3542             if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
3543             {
3544                 num_clusters_evaluated++;
3545                 continue;
3546             }
3547             if(!ps_cluster_data->is_valid_cluster)
3548             {
3549                 continue;
3550             }
3551 
3552             num_clusters_evaluated++;
3553 
3554             ps_centroid = &ps_cur_cluster_32->s_centroid;
3555 
3556             cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3557             cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3558 
3559             mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3560             mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3561 
3562             mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3563             mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3564 
3565             mvd = ABS(mvdx) + ABS(mvdy);
3566 
3567             if(mvd < mvd_min)
3568             {
3569                 mvd_min = mvd;
3570                 mvdx_min = mvdx;
3571                 mvdy_min = mvdy;
3572                 min_mvd_cluster_id = i;
3573             }
3574         }
3575 
3576         ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3577 
3578         mvd_limit = (min_mvd_cluster_id == -1)
3579                         ? ps_cur_cluster_32[0].max_dist_from_centroid
3580                         : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
3581 
3582         if(mvd_min <= mvd_limit)
3583         {
3584             LWORD64 i8_updated_posx;
3585             LWORD64 i8_updated_posy;
3586             WORD32 minmax_updated_x = 0;
3587             WORD32 minmax_updated_y = 0;
3588 
3589             ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
3590 
3591             ps_centroid = &ps_cur_cluster_32->s_centroid;
3592 
3593             ps_cur_cluster_32->is_valid_cluster = 1;
3594 
3595             ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
3596             ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3597             ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3598 
3599             memcpy(
3600                 &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
3601                 ps_cluster_data->as_mv,
3602                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3603 
3604             if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
3605             {
3606                 ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
3607                 minmax_updated_x = 1;
3608             }
3609             else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
3610             {
3611                 ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3612                 minmax_updated_x = 2;
3613             }
3614 
3615             if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
3616             {
3617                 ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3618                 minmax_updated_y = 1;
3619             }
3620             else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
3621             {
3622                 ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3623                 minmax_updated_y = 2;
3624             }
3625 
3626             switch((minmax_updated_y << 2) + minmax_updated_x)
3627             {
3628             case 1:
3629             {
3630                 S32 mvd, mvd_q8;
3631 
3632                 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3633                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3634 
3635                 if(mvd > (mvd_limit))
3636                 {
3637                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3638                 }
3639                 break;
3640             }
3641             case 2:
3642             {
3643                 S32 mvd, mvd_q8;
3644 
3645                 mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3646                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3647 
3648                 if(mvd > (mvd_limit))
3649                 {
3650                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3651                 }
3652                 break;
3653             }
3654             case 4:
3655             {
3656                 S32 mvd, mvd_q8;
3657 
3658                 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3659                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3660 
3661                 if(mvd > (mvd_limit))
3662                 {
3663                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3664                 }
3665                 break;
3666             }
3667             case 5:
3668             {
3669                 S32 mvd;
3670                 S32 mvdx, mvdx_q8;
3671                 S32 mvdy, mvdy_q8;
3672 
3673                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3674                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3675 
3676                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3677                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3678 
3679                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3680 
3681                 if(mvd > mvd_limit)
3682                 {
3683                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3684                 }
3685                 break;
3686             }
3687             case 6:
3688             {
3689                 S32 mvd;
3690                 S32 mvdx, mvdx_q8;
3691                 S32 mvdy, mvdy_q8;
3692 
3693                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3694                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3695 
3696                 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3697                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3698 
3699                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3700 
3701                 if(mvd > mvd_limit)
3702                 {
3703                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3704                 }
3705                 break;
3706             }
3707             case 8:
3708             {
3709                 S32 mvd, mvd_q8;
3710 
3711                 mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3712                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3713 
3714                 if(mvd > (mvd_limit))
3715                 {
3716                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3717                 }
3718                 break;
3719             }
3720             case 9:
3721             {
3722                 S32 mvd;
3723                 S32 mvdx, mvdx_q8;
3724                 S32 mvdy, mvdy_q8;
3725 
3726                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3727                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3728 
3729                 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3730                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3731 
3732                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3733 
3734                 if(mvd > mvd_limit)
3735                 {
3736                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3737                 }
3738                 break;
3739             }
3740             case 10:
3741             {
3742                 S32 mvd;
3743                 S32 mvdx, mvdx_q8;
3744                 S32 mvdy, mvdy_q8;
3745 
3746                 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3747                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3748 
3749                 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3750                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3751 
3752                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3753 
3754                 if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
3755                 {
3756                     ps_cur_cluster_32->max_dist_from_centroid = mvd;
3757                 }
3758                 break;
3759             }
3760             default:
3761             {
3762                 break;
3763             }
3764             }
3765 
3766             i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
3767                               ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
3768             i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
3769                               ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
3770 
3771             ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
3772 
3773             ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
3774             ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
3775         }
3776         else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
3777         {
3778             ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
3779 
3780             ps_blk_32x32->num_clusters++;
3781             ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3782 
3783             ps_cur_cluster_32->is_valid_cluster = 1;
3784 
3785             ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3786             ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3787             ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3788 
3789             memcpy(
3790                 ps_cur_cluster_32->as_mv,
3791                 ps_cluster_data->as_mv,
3792                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3793 
3794             ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3795 
3796             ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3797 
3798             ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3799             ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3800             ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3801             ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3802 
3803             ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3804         }
3805     }
3806 }
3807 
3808 /**
3809 ********************************************************************************
3810 *  @fn   void hme_update_64x64_cluster_attributes
3811 *               (
3812 *                   cluster_64x64_blk_t *ps_blk_32x32,
3813 *                   cluster_data_t *ps_cluster_data
3814 *               )
3815 *
3816 *  @brief  Updates attributes for 64x64 clusters based on the attributes of
3817 *          the constituent 16x16 clusters
3818 *
3819 *  @param[out]  ps_blk_64x64: structure containing 64x64 block results
3820 *
3821 *  @param[in]  ps_cluster_data : structure containing 32x32 block results
3822 *
3823 *  @return None
3824 ********************************************************************************
3825 */
hme_update_64x64_cluster_attributes(cluster_64x64_blk_t * ps_blk_64x64,cluster_data_t * ps_cluster_data)3826 void hme_update_64x64_cluster_attributes(
3827     cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
3828 {
3829     cluster_data_t *ps_cur_cluster_64;
3830 
3831     S32 i;
3832     S32 mvd_limit;
3833 
3834     S32 num_clusters = ps_blk_64x64->num_clusters;
3835 
3836     if(0 == num_clusters)
3837     {
3838         ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
3839 
3840         ps_blk_64x64->num_clusters++;
3841         ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
3842 
3843         ps_cur_cluster_64->is_valid_cluster = 1;
3844 
3845         ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
3846         ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3847         ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3848 
3849         memcpy(
3850             ps_cur_cluster_64->as_mv,
3851             ps_cluster_data->as_mv,
3852             sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3853 
3854         ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
3855 
3856         ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
3857 
3858         ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
3859         ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
3860         ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
3861         ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
3862 
3863         ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
3864     }
3865     else
3866     {
3867         centroid_t *ps_centroid;
3868 
3869         S32 cur_posx_q8, cur_posy_q8;
3870         S32 min_mvd_cluster_id = -1;
3871         S32 mvd;
3872         S32 mvdx;
3873         S32 mvdy;
3874         S32 mvdx_min;
3875         S32 mvdy_min;
3876         S32 mvdx_q8;
3877         S32 mvdy_q8;
3878 
3879         S32 num_clusters_evaluated = 0;
3880 
3881         S32 mvd_min = MAX_32BIT_VAL;
3882 
3883         S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3884         S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3885 
3886         for(i = 0; num_clusters_evaluated < num_clusters; i++)
3887         {
3888             ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
3889 
3890             if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
3891             {
3892                 num_clusters_evaluated++;
3893                 continue;
3894             }
3895 
3896             if(!ps_cur_cluster_64->is_valid_cluster)
3897             {
3898                 continue;
3899             }
3900 
3901             num_clusters_evaluated++;
3902 
3903             ps_centroid = &ps_cur_cluster_64->s_centroid;
3904 
3905             cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3906             cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3907 
3908             mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3909             mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3910 
3911             mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3912             mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3913 
3914             mvd = ABS(mvdx) + ABS(mvdy);
3915 
3916             if(mvd < mvd_min)
3917             {
3918                 mvd_min = mvd;
3919                 mvdx_min = mvdx;
3920                 mvdy_min = mvdy;
3921                 min_mvd_cluster_id = i;
3922             }
3923         }
3924 
3925         ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
3926 
3927         mvd_limit = (min_mvd_cluster_id == -1)
3928                         ? ps_cur_cluster_64[0].max_dist_from_centroid
3929                         : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
3930 
3931         if(mvd_min <= mvd_limit)
3932         {
3933             LWORD64 i8_updated_posx;
3934             LWORD64 i8_updated_posy;
3935             WORD32 minmax_updated_x = 0;
3936             WORD32 minmax_updated_y = 0;
3937 
3938             ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
3939 
3940             ps_centroid = &ps_cur_cluster_64->s_centroid;
3941 
3942             ps_cur_cluster_64->is_valid_cluster = 1;
3943 
3944             ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
3945             ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3946             ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3947 
3948             memcpy(
3949                 &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
3950                 ps_cluster_data->as_mv,
3951                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3952 
3953             if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
3954             {
3955                 ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3956                 minmax_updated_x = 1;
3957             }
3958             else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
3959             {
3960                 ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3961                 minmax_updated_x = 2;
3962             }
3963 
3964             if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
3965             {
3966                 ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3967                 minmax_updated_y = 1;
3968             }
3969             else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
3970             {
3971                 ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3972                 minmax_updated_y = 2;
3973             }
3974 
3975             switch((minmax_updated_y << 2) + minmax_updated_x)
3976             {
3977             case 1:
3978             {
3979                 S32 mvd, mvd_q8;
3980 
3981                 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
3982                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3983 
3984                 if(mvd > (mvd_limit))
3985                 {
3986                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
3987                 }
3988                 break;
3989             }
3990             case 2:
3991             {
3992                 S32 mvd, mvd_q8;
3993 
3994                 mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
3995                 mvd = (mvd_q8 + (1 << 7)) >> 8;
3996 
3997                 if(mvd > (mvd_limit))
3998                 {
3999                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4000                 }
4001                 break;
4002             }
4003             case 4:
4004             {
4005                 S32 mvd, mvd_q8;
4006 
4007                 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4008                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4009 
4010                 if(mvd > (mvd_limit))
4011                 {
4012                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4013                 }
4014                 break;
4015             }
4016             case 5:
4017             {
4018                 S32 mvd;
4019                 S32 mvdx, mvdx_q8;
4020                 S32 mvdy, mvdy_q8;
4021 
4022                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4023                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4024 
4025                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4026                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4027 
4028                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4029 
4030                 if(mvd > mvd_limit)
4031                 {
4032                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4033                 }
4034                 break;
4035             }
4036             case 6:
4037             {
4038                 S32 mvd;
4039                 S32 mvdx, mvdx_q8;
4040                 S32 mvdy, mvdy_q8;
4041 
4042                 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4043                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4044 
4045                 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4046                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4047 
4048                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4049 
4050                 if(mvd > mvd_limit)
4051                 {
4052                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4053                 }
4054                 break;
4055             }
4056             case 8:
4057             {
4058                 S32 mvd, mvd_q8;
4059 
4060                 mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4061                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4062 
4063                 if(mvd > (mvd_limit))
4064                 {
4065                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4066                 }
4067                 break;
4068             }
4069             case 9:
4070             {
4071                 S32 mvd;
4072                 S32 mvdx, mvdx_q8;
4073                 S32 mvdy, mvdy_q8;
4074 
4075                 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4076                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4077 
4078                 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4079                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4080 
4081                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4082 
4083                 if(mvd > mvd_limit)
4084                 {
4085                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4086                 }
4087                 break;
4088             }
4089             case 10:
4090             {
4091                 S32 mvd;
4092                 S32 mvdx, mvdx_q8;
4093                 S32 mvdy, mvdy_q8;
4094 
4095                 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4096                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4097 
4098                 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4099                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4100 
4101                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4102 
4103                 if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
4104                 {
4105                     ps_cur_cluster_64->max_dist_from_centroid = mvd;
4106                 }
4107                 break;
4108             }
4109             default:
4110             {
4111                 break;
4112             }
4113             }
4114 
4115             i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
4116                               ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
4117             i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
4118                               ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
4119 
4120             ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
4121 
4122             ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
4123             ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
4124         }
4125         else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
4126         {
4127             ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
4128 
4129             ps_blk_64x64->num_clusters++;
4130             ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
4131 
4132             ps_cur_cluster_64->is_valid_cluster = 1;
4133 
4134             ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
4135             ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
4136             ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
4137 
4138             memcpy(
4139                 &ps_cur_cluster_64->as_mv[0],
4140                 ps_cluster_data->as_mv,
4141                 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
4142 
4143             ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
4144 
4145             ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
4146 
4147             ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
4148             ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
4149             ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
4150             ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
4151 
4152             ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
4153         }
4154     }
4155 }
4156 
4157 /**
4158 ********************************************************************************
4159 *  @fn   void hme_update_32x32_clusters
4160 *               (
4161 *                   cluster_32x32_blk_t *ps_blk_32x32,
4162 *                   cluster_16x16_blk_t *ps_blk_16x16
4163 *               )
4164 *
4165 *  @brief  Updates attributes for 32x32 clusters based on the attributes of
4166 *          the constituent 16x16 clusters
4167 *
4168 *  @param[out]  ps_blk_32x32: structure containing 32x32 block results
4169 *
4170 *  @param[in]  ps_blk_16x16 : structure containing 16x16 block results
4171 *
4172 *  @return None
4173 ********************************************************************************
4174 */
4175 static __inline void
hme_update_32x32_clusters(cluster_32x32_blk_t * ps_blk_32x32,cluster_16x16_blk_t * ps_blk_16x16)4176     hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
4177 {
4178     cluster_16x16_blk_t *ps_blk_16x16_cur;
4179     cluster_data_t *ps_cur_cluster;
4180 
4181     S32 i, j;
4182     S32 num_clusters_cur_16x16_blk;
4183 
4184     for(i = 0; i < 4; i++)
4185     {
4186         S32 num_clusters_evaluated = 0;
4187 
4188         ps_blk_16x16_cur = &ps_blk_16x16[i];
4189 
4190         num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
4191 
4192         ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
4193 
4194         ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
4195 
4196         for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
4197         {
4198             ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
4199 
4200             if(!ps_cur_cluster->is_valid_cluster)
4201             {
4202                 continue;
4203             }
4204 
4205             hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
4206 
4207             num_clusters_evaluated++;
4208         }
4209     }
4210 }
4211 
4212 /**
4213 ********************************************************************************
4214 *  @fn   void hme_update_64x64_clusters
4215 *               (
4216 *                   cluster_64x64_blk_t *ps_blk_64x64,
4217 *                   cluster_32x32_blk_t *ps_blk_32x32
4218 *               )
4219 *
4220 *  @brief  Updates attributes for 64x64 clusters based on the attributes of
4221 *          the constituent 16x16 clusters
4222 *
4223 *  @param[out]  ps_blk_64x64: structure containing 32x32 block results
4224 *
4225 *  @param[in]  ps_blk_32x32 : structure containing 16x16 block results
4226 *
4227 *  @return None
4228 ********************************************************************************
4229 */
4230 static __inline void
hme_update_64x64_clusters(cluster_64x64_blk_t * ps_blk_64x64,cluster_32x32_blk_t * ps_blk_32x32)4231     hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
4232 {
4233     cluster_32x32_blk_t *ps_blk_32x32_cur;
4234     cluster_data_t *ps_cur_cluster;
4235 
4236     S32 i, j;
4237     S32 num_clusters_cur_32x32_blk;
4238 
4239     for(i = 0; i < 4; i++)
4240     {
4241         S32 num_clusters_evaluated = 0;
4242 
4243         ps_blk_32x32_cur = &ps_blk_32x32[i];
4244 
4245         num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
4246 
4247         ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
4248         ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
4249 
4250         for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
4251         {
4252             ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
4253 
4254             if(!ps_cur_cluster->is_valid_cluster)
4255             {
4256                 continue;
4257             }
4258 
4259             hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
4260 
4261             num_clusters_evaluated++;
4262         }
4263     }
4264 }
4265 
4266 /**
4267 ********************************************************************************
4268 *  @fn   void hme_try_merge_clusters_blksize_gt_16
4269 *               (
4270 *                   cluster_data_t *ps_cluster_data,
4271 *                   S32 num_clusters
4272 *               )
4273 *
4274 *  @brief  Merging clusters from blocks of size 32x32 and greater
4275 *
4276 *  @param[in/out]  ps_cluster_data: structure containing cluster data
4277 *
4278 *  @param[in/out]  pi4_num_clusters : pointer to number of clusters
4279 *
4280 *  @return Success or failure
4281 ********************************************************************************
4282 */
hme_try_merge_clusters_blksize_gt_16(cluster_data_t * ps_cluster_data,S32 num_clusters)4283 S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
4284 {
4285     centroid_t *ps_cur_centroid;
4286     cluster_data_t *ps_cur_cluster;
4287 
4288     S32 i, mvd;
4289     S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
4290 
4291     centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
4292 
4293     S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
4294     S32 ref_id = ps_cluster_data->ref_id;
4295 
4296     S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
4297     S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
4298     S32 num_clusters_evaluated = 1;
4299     S32 ret_value = 0;
4300 
4301     if(1 >= num_clusters)
4302     {
4303         return ret_value;
4304     }
4305 
4306     for(i = 1; num_clusters_evaluated < num_clusters; i++)
4307     {
4308         S32 cur_posx_q8;
4309         S32 cur_posy_q8;
4310 
4311         ps_cur_cluster = &ps_cluster_data[i];
4312 
4313         if((ref_id != ps_cur_cluster->ref_id))
4314         {
4315             num_clusters_evaluated++;
4316             continue;
4317         }
4318 
4319         if((!ps_cur_cluster->is_valid_cluster))
4320         {
4321             continue;
4322         }
4323 
4324         num_clusters_evaluated++;
4325 
4326         ps_cur_centroid = &ps_cur_cluster->s_centroid;
4327 
4328         cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
4329         cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
4330 
4331         mvdx_q8 = cur_posx_q8 - node0_posx_q8;
4332         mvdy_q8 = cur_posy_q8 - node0_posy_q8;
4333 
4334         mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4335         mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4336 
4337         mvd = ABS(mvdx) + ABS(mvdy);
4338 
4339         if(mvd <= (mvd_limit >> 1))
4340         {
4341             LWORD64 i8_updated_posx;
4342             LWORD64 i8_updated_posy;
4343             WORD32 minmax_updated_x = 0;
4344             WORD32 minmax_updated_y = 0;
4345 
4346             ps_cur_cluster->is_valid_cluster = 0;
4347 
4348             ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
4349             ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
4350             ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
4351 
4352             memcpy(
4353                 &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
4354                 ps_cur_cluster->as_mv,
4355                 sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
4356 
4357             if(mvdx > 0)
4358             {
4359                 ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
4360                 minmax_updated_x = 1;
4361             }
4362             else
4363             {
4364                 ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
4365                 minmax_updated_x = 2;
4366             }
4367 
4368             if(mvdy > 0)
4369             {
4370                 ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
4371                 minmax_updated_y = 1;
4372             }
4373             else
4374             {
4375                 ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
4376                 minmax_updated_y = 2;
4377             }
4378 
4379             switch((minmax_updated_y << 2) + minmax_updated_x)
4380             {
4381             case 1:
4382             {
4383                 S32 mvd, mvd_q8;
4384 
4385                 mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4386                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4387 
4388                 if(mvd > (mvd_limit))
4389                 {
4390                     ps_cluster_data->max_dist_from_centroid = mvd;
4391                 }
4392                 break;
4393             }
4394             case 2:
4395             {
4396                 S32 mvd, mvd_q8;
4397 
4398                 mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4399                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4400 
4401                 if(mvd > (mvd_limit))
4402                 {
4403                     ps_cluster_data->max_dist_from_centroid = mvd;
4404                 }
4405                 break;
4406             }
4407             case 4:
4408             {
4409                 S32 mvd, mvd_q8;
4410 
4411                 mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4412                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4413 
4414                 if(mvd > (mvd_limit))
4415                 {
4416                     ps_cluster_data->max_dist_from_centroid = mvd;
4417                 }
4418                 break;
4419             }
4420             case 5:
4421             {
4422                 S32 mvd;
4423                 S32 mvdx, mvdx_q8;
4424                 S32 mvdy, mvdy_q8;
4425 
4426                 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4427                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4428 
4429                 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4430                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4431 
4432                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4433 
4434                 if(mvd > mvd_limit)
4435                 {
4436                     ps_cluster_data->max_dist_from_centroid = mvd;
4437                 }
4438                 break;
4439             }
4440             case 6:
4441             {
4442                 S32 mvd;
4443                 S32 mvdx, mvdx_q8;
4444                 S32 mvdy, mvdy_q8;
4445 
4446                 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4447                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4448 
4449                 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4450                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4451 
4452                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4453 
4454                 if(mvd > mvd_limit)
4455                 {
4456                     ps_cluster_data->max_dist_from_centroid = mvd;
4457                 }
4458                 break;
4459             }
4460             case 8:
4461             {
4462                 S32 mvd, mvd_q8;
4463 
4464                 mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4465                 mvd = (mvd_q8 + (1 << 7)) >> 8;
4466 
4467                 if(mvd > (mvd_limit))
4468                 {
4469                     ps_cluster_data->max_dist_from_centroid = mvd;
4470                 }
4471                 break;
4472             }
4473             case 9:
4474             {
4475                 S32 mvd;
4476                 S32 mvdx, mvdx_q8;
4477                 S32 mvdy, mvdy_q8;
4478 
4479                 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4480                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4481 
4482                 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4483                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4484 
4485                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4486 
4487                 if(mvd > mvd_limit)
4488                 {
4489                     ps_cluster_data->max_dist_from_centroid = mvd;
4490                 }
4491                 break;
4492             }
4493             case 10:
4494             {
4495                 S32 mvd;
4496                 S32 mvdx, mvdx_q8;
4497                 S32 mvdy, mvdy_q8;
4498 
4499                 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4500                 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4501 
4502                 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4503                 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4504 
4505                 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4506 
4507                 if(mvd > ps_cluster_data->max_dist_from_centroid)
4508                 {
4509                     ps_cluster_data->max_dist_from_centroid = mvd;
4510                 }
4511                 break;
4512             }
4513             default:
4514             {
4515                 break;
4516             }
4517             }
4518 
4519             i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
4520                               ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
4521             i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
4522                               ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
4523 
4524             ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
4525 
4526             ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
4527             ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
4528 
4529             if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
4530             {
4531                 num_clusters--;
4532                 num_clusters_evaluated = 1;
4533                 i = 0;
4534                 ret_value++;
4535             }
4536             else
4537             {
4538                 ret_value++;
4539 
4540                 return ret_value;
4541             }
4542         }
4543     }
4544 
4545     if(ret_value)
4546     {
4547         for(i = 1; i < (num_clusters + ret_value); i++)
4548         {
4549             if(ps_cluster_data[i].is_valid_cluster)
4550             {
4551                 break;
4552             }
4553         }
4554         if(i == (num_clusters + ret_value))
4555         {
4556             return ret_value;
4557         }
4558     }
4559     else
4560     {
4561         i = 1;
4562     }
4563 
4564     return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
4565            ret_value;
4566 }
4567 
4568 /**
4569 ********************************************************************************
4570 *  @fn   S32 hme_determine_validity_32x32
4571 *               (
4572 *                   ctb_cluster_info_t *ps_ctb_cluster_info
4573 *               )
4574 *
4575 *  @brief  Determines whther current 32x32 block needs to be evaluated in enc_loop
4576 *           while recursing through the CU tree or not
4577 *
4578 *  @param[in]  ps_cluster_data: structure containing cluster data
4579 *
4580 *  @return Success or failure
4581 ********************************************************************************
4582 */
hme_determine_validity_32x32(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4583 __inline S32 hme_determine_validity_32x32(
4584     ctb_cluster_info_t *ps_ctb_cluster_info,
4585     S32 *pi4_children_nodes_required,
4586     S32 blk_validity_wrt_pic_bndry,
4587     S32 parent_blk_validity_wrt_pic_bndry)
4588 {
4589     cluster_data_t *ps_data;
4590 
4591     cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4592     cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4593 
4594     S32 num_clusters = ps_32x32_blk->num_clusters;
4595     S32 num_clusters_parent = ps_64x64_blk->num_clusters;
4596 
4597     if(!blk_validity_wrt_pic_bndry)
4598     {
4599         *pi4_children_nodes_required = 1;
4600         return 0;
4601     }
4602 
4603     if(!parent_blk_validity_wrt_pic_bndry)
4604     {
4605         *pi4_children_nodes_required = 1;
4606         return 1;
4607     }
4608 
4609     if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4610     {
4611         *pi4_children_nodes_required = 1;
4612         return 0;
4613     }
4614 
4615     if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4616     {
4617         *pi4_children_nodes_required = 1;
4618 
4619         return 1;
4620     }
4621     else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4622     {
4623         *pi4_children_nodes_required = 0;
4624 
4625         return 1;
4626     }
4627     else
4628     {
4629         if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4630         {
4631             *pi4_children_nodes_required = 0;
4632             return 1;
4633         }
4634         else
4635         {
4636             S32 i;
4637 
4638             S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
4639             S32 min_area = MAX_32BIT_VAL;
4640             S32 num_clusters_evaluated = 0;
4641 
4642             for(i = 0; num_clusters_evaluated < num_clusters; i++)
4643             {
4644                 ps_data = &ps_32x32_blk->as_cluster_data[i];
4645 
4646                 if(!ps_data->is_valid_cluster)
4647                 {
4648                     continue;
4649                 }
4650 
4651                 num_clusters_evaluated++;
4652 
4653                 if(ps_data->area_in_pixels < min_area)
4654                 {
4655                     min_area = ps_data->area_in_pixels;
4656                 }
4657             }
4658 
4659             if((min_area << 4) < area_of_parent)
4660             {
4661                 *pi4_children_nodes_required = 1;
4662                 return 0;
4663             }
4664             else
4665             {
4666                 *pi4_children_nodes_required = 0;
4667                 return 1;
4668             }
4669         }
4670     }
4671 }
4672 
4673 /**
4674 ********************************************************************************
4675 *  @fn   S32 hme_determine_validity_16x16
4676 *               (
4677 *                   ctb_cluster_info_t *ps_ctb_cluster_info
4678 *               )
4679 *
4680 *  @brief  Determines whther current 16x16 block needs to be evaluated in enc_loop
4681 *           while recursing through the CU tree or not
4682 *
4683 *  @param[in]  ps_cluster_data: structure containing cluster data
4684 *
4685 *  @return Success or failure
4686 ********************************************************************************
4687 */
hme_determine_validity_16x16(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4688 __inline S32 hme_determine_validity_16x16(
4689     ctb_cluster_info_t *ps_ctb_cluster_info,
4690     S32 *pi4_children_nodes_required,
4691     S32 blk_validity_wrt_pic_bndry,
4692     S32 parent_blk_validity_wrt_pic_bndry)
4693 {
4694     cluster_data_t *ps_data;
4695 
4696     cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
4697     cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4698     cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4699 
4700     S32 num_clusters = ps_16x16_blk->num_clusters;
4701     S32 num_clusters_parent = ps_32x32_blk->num_clusters;
4702     S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
4703 
4704     if(!blk_validity_wrt_pic_bndry)
4705     {
4706         *pi4_children_nodes_required = 1;
4707         return 0;
4708     }
4709 
4710     if(!parent_blk_validity_wrt_pic_bndry)
4711     {
4712         *pi4_children_nodes_required = 1;
4713         return 1;
4714     }
4715 
4716     if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
4717        (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
4718     {
4719         *pi4_children_nodes_required = 1;
4720         return 1;
4721     }
4722 
4723     /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
4724     /* implies nc_64 > 3 when num_clusters_parent < 3 & */
4725     if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4726     {
4727         if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4728         {
4729             *pi4_children_nodes_required = 0;
4730 
4731             return 1;
4732         }
4733         else
4734         {
4735             *pi4_children_nodes_required = 1;
4736 
4737             return 0;
4738         }
4739     }
4740     /* Implies nc_64 >= 3 */
4741     else
4742     {
4743         if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4744         {
4745             *pi4_children_nodes_required = 0;
4746             return 1;
4747         }
4748         else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4749         {
4750             *pi4_children_nodes_required = 1;
4751             return 0;
4752         }
4753         else
4754         {
4755             S32 i;
4756 
4757             S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
4758             S32 min_area = MAX_32BIT_VAL;
4759             S32 num_clusters_evaluated = 0;
4760 
4761             for(i = 0; num_clusters_evaluated < num_clusters; i++)
4762             {
4763                 ps_data = &ps_16x16_blk->as_cluster_data[i];
4764 
4765                 if(!ps_data->is_valid_cluster)
4766                 {
4767                     continue;
4768                 }
4769 
4770                 num_clusters_evaluated++;
4771 
4772                 if(ps_data->area_in_pixels < min_area)
4773                 {
4774                     min_area = ps_data->area_in_pixels;
4775                 }
4776             }
4777 
4778             if((min_area << 4) < area_of_parent)
4779             {
4780                 *pi4_children_nodes_required = 1;
4781                 return 0;
4782             }
4783             else
4784             {
4785                 *pi4_children_nodes_required = 0;
4786                 return 1;
4787             }
4788         }
4789     }
4790 }
4791 
4792 /**
4793 ********************************************************************************
4794 *  @fn   void hme_build_cu_tree
4795 *               (
4796 *                   ctb_cluster_info_t *ps_ctb_cluster_info,
4797 *                   cur_ctb_cu_tree_t *ps_cu_tree,
4798 *                   S32 tree_depth,
4799 *                   CU_POS_T e_grand_parent_blk_pos,
4800 *                   CU_POS_T e_parent_blk_pos,
4801 *                   CU_POS_T e_cur_blk_pos
4802 *               )
4803 *
4804 *  @brief  Recursive function for CU tree initialisation
4805 *
4806 *  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
4807 *                                   corresponding to all block sizes from 64x64
4808 *                                   to 16x16
4809 *
4810 *  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
4811 *                                applicable
4812 *
4813 *  @param[in]  e_cur_blk_pos: position of current block wrt parent
4814 *
4815 *  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
4816 *
4817 *  @param[in]  tree_depth : specifies depth of the CU tree
4818 *
4819 *  @return Nothing
4820 ********************************************************************************
4821 */
hme_build_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4822 void hme_build_cu_tree(
4823     ctb_cluster_info_t *ps_ctb_cluster_info,
4824     cur_ctb_cu_tree_t *ps_cu_tree,
4825     S32 tree_depth,
4826     CU_POS_T e_grandparent_blk_pos,
4827     CU_POS_T e_parent_blk_pos,
4828     CU_POS_T e_cur_blk_pos)
4829 {
4830     ihevce_cu_tree_init(
4831         ps_cu_tree,
4832         ps_ctb_cluster_info->ps_cu_tree_root,
4833         &ps_ctb_cluster_info->nodes_created_in_cu_tree,
4834         tree_depth,
4835         e_grandparent_blk_pos,
4836         e_parent_blk_pos,
4837         e_cur_blk_pos);
4838 }
4839 
4840 /**
4841 ********************************************************************************
4842 *  @fn   S32 hme_sdi_based_cluster_spread_eligibility
4843 *               (
4844 *                   cluster_32x32_blk_t *ps_blk_32x32
4845 *               )
4846 *
4847 *  @brief  Determines whether the spread of high SDI MV's around each cluster
4848 *          center is below a pre-determined threshold
4849 *
4850 *  @param[in]  ps_blk_32x32: structure containing pointers to clusters
4851 *                                   corresponding to all block sizes from 64x64
4852 *                                   to 16x16
4853 *
4854 *  @return 1 if the spread is constrained, else 0
4855 ********************************************************************************
4856 */
4857 __inline S32
hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t * ps_blk_32x32,S32 sdi_threshold)4858     hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
4859 {
4860     S32 cumulative_mv_distance;
4861     S32 i, j;
4862     S32 num_high_sdi_mvs;
4863 
4864     S32 num_clusters = ps_blk_32x32->num_clusters;
4865 
4866     for(i = 0; i < num_clusters; i++)
4867     {
4868         cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
4869 
4870         num_high_sdi_mvs = 0;
4871         cumulative_mv_distance = 0;
4872 
4873         for(j = 0; j < ps_data->num_mvs; j++)
4874         {
4875             mv_data_t *ps_mv = &ps_data->as_mv[j];
4876 
4877             if(ps_mv->sdi >= sdi_threshold)
4878             {
4879                 num_high_sdi_mvs++;
4880 
4881                 COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
4882             }
4883         }
4884 
4885         if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
4886         {
4887             return 0;
4888         }
4889     }
4890 
4891     return 1;
4892 }
4893 
4894 /**
4895 ********************************************************************************
4896 *  @fn   S32 hme_populate_cu_tree
4897 *               (
4898 *                   ctb_cluster_info_t *ps_ctb_cluster_info,
4899 *                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
4900 *                   cur_ctb_cu_tree_t *ps_cu_tree,
4901 *                   S32 tree_depth,
4902 *                   CU_POS_T e_parent_blk_pos,
4903 *                   CU_POS_T e_cur_blk_pos
4904 *               )
4905 *
4906 *  @brief  Recursive function for CU tree population based on output of
4907 *          clustering algorithm
4908 *
4909 *  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
4910 *                                   corresponding to all block sizes from 64x64
4911 *                                   to 16x16
4912 *
4913 *  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
4914 applicable
4915 *
4916 *  @param[in]  e_cur_blk_pos: position of current block wrt parent
4917 *
4918 *  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
4919 *
4920 *  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
4921 *
4922 *  @param[in]  tree_depth : specifies depth of the CU tree
4923 *
4924 *  @param[in]  ipe_decision_precedence : specifies whether precedence should
4925 *               be given to decisions made either by IPE(1) or clustering algos.
4926 *
4927 *  @return 1 if re-evaluation of parent node's validity is not required,
4928 else 0
4929 ********************************************************************************
4930 */
hme_populate_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,ME_QUALITY_PRESETS_T e_quality_preset,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4931 void hme_populate_cu_tree(
4932     ctb_cluster_info_t *ps_ctb_cluster_info,
4933     cur_ctb_cu_tree_t *ps_cu_tree,
4934     S32 tree_depth,
4935     ME_QUALITY_PRESETS_T e_quality_preset,
4936     CU_POS_T e_grandparent_blk_pos,
4937     CU_POS_T e_parent_blk_pos,
4938     CU_POS_T e_cur_blk_pos)
4939 {
4940     S32 area_of_cur_blk;
4941     S32 area_limit_for_me_decision_precedence;
4942     S32 children_nodes_required;
4943     S32 intra_mv_area;
4944     S32 intra_eval_enable;
4945     S32 inter_eval_enable;
4946     S32 ipe_decision_precedence;
4947     S32 node_validity;
4948     S32 num_clusters;
4949 
4950     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
4951 
4952     if(NULL == ps_cu_tree)
4953     {
4954         return;
4955     }
4956 
4957     switch(tree_depth)
4958     {
4959     case 0:
4960     {
4961         /* 64x64 block */
4962         S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
4963 
4964         cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
4965 
4966         area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
4967         area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
4968         children_nodes_required = 0;
4969         intra_mv_area = ps_blk_64x64->intra_mv_area;
4970 
4971         ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
4972 
4973         intra_eval_enable = ipe_decision_precedence;
4974         inter_eval_enable = !!ps_blk_64x64->num_clusters;
4975 
4976 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4977         if(e_quality_preset >= ME_HIGH_QUALITY)
4978         {
4979             inter_eval_enable = 1;
4980             node_validity = (blk_32x32_mask == 0xf);
4981 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
4982             ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
4983 #endif
4984             break;
4985         }
4986 #endif
4987 
4988 #if ENABLE_4CTB_EVALUATION
4989         node_validity = (blk_32x32_mask == 0xf);
4990 
4991         break;
4992 #else
4993         {
4994             S32 i;
4995 
4996             num_clusters = ps_blk_64x64->num_clusters;
4997 
4998             node_validity = (ipe_decision_precedence)
4999                                 ? (!ps_cur_ipe_ctb->u1_split_flag)
5000                                 : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
5001 
5002             for(i = 0; i < MAX_NUM_REF; i++)
5003             {
5004                 node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
5005                                                   MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5006             }
5007 
5008             node_validity = node_validity && (blk_32x32_mask == 0xf);
5009         }
5010         break;
5011 #endif
5012     }
5013     case 1:
5014     {
5015         /* 32x32 block */
5016         S32 is_percent_intra_area_gt_threshold;
5017 
5018         cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
5019 
5020         S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
5021 
5022 #if !ENABLE_4CTB_EVALUATION
5023         S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
5024         S32 best_intra_cost =
5025             ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5026               ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
5027                   4) < 0)
5028                 ? MAX_32BIT_VAL
5029                 : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5030                    ps_ctb_cluster_info->i4_frame_qstep *
5031                        ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
5032         S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5033         S32 cost_differential = (best_inter_cost - best_cost);
5034 #endif
5035 
5036         area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
5037         area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5038         intra_mv_area = ps_blk_32x32->intra_mv_area;
5039         is_percent_intra_area_gt_threshold =
5040             (intra_mv_area > area_limit_for_me_decision_precedence);
5041         ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5042 
5043         intra_eval_enable = ipe_decision_precedence;
5044         inter_eval_enable = !!ps_blk_32x32->num_clusters;
5045         children_nodes_required = 1;
5046 
5047 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5048         if(e_quality_preset >= ME_HIGH_QUALITY)
5049         {
5050             inter_eval_enable = 1;
5051             node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5052 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5053             ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
5054 #endif
5055             break;
5056         }
5057 #endif
5058 
5059 #if ENABLE_4CTB_EVALUATION
5060         node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5061 
5062         break;
5063 #else
5064         {
5065             S32 i;
5066             num_clusters = ps_blk_32x32->num_clusters;
5067 
5068             if(ipe_decision_precedence)
5069             {
5070                 node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
5071                 node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5072             }
5073             else
5074             {
5075                 node_validity =
5076                     ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
5077                     (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
5078                     (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5079 
5080                 for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
5081                 {
5082                     node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
5083                                                       MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5084                 }
5085 
5086                 if(node_validity)
5087                 {
5088                     node_validity = node_validity &&
5089                                     hme_sdi_based_cluster_spread_eligibility(
5090                                         ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
5091                 }
5092             }
5093         }
5094 
5095         break;
5096 #endif
5097     }
5098     case 2:
5099     {
5100         cluster_16x16_blk_t *ps_blk_16x16 =
5101             &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
5102 
5103         S32 blk_8x8_mask =
5104             ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5105 
5106         area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
5107         area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5108         children_nodes_required = 1;
5109         intra_mv_area = ps_blk_16x16->intra_mv_area;
5110         ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5111         num_clusters = ps_blk_16x16->num_clusters;
5112 
5113         intra_eval_enable = ipe_decision_precedence;
5114         inter_eval_enable = 1;
5115 
5116 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5117         if(e_quality_preset >= ME_HIGH_QUALITY)
5118         {
5119             node_validity =
5120                 !ps_ctb_cluster_info
5121                      ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5122             children_nodes_required = !node_validity;
5123             break;
5124         }
5125 #endif
5126 
5127 #if ENABLE_4CTB_EVALUATION
5128         node_validity = (blk_8x8_mask == 0xf);
5129 
5130 #if ENABLE_CU_TREE_CULLING
5131         {
5132             cur_ctb_cu_tree_t *ps_32x32_root = NULL;
5133 
5134             switch(e_parent_blk_pos)
5135             {
5136             case POS_TL:
5137             {
5138                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5139 
5140                 break;
5141             }
5142             case POS_TR:
5143             {
5144                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5145 
5146                 break;
5147             }
5148             case POS_BL:
5149             {
5150                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5151 
5152                 break;
5153             }
5154             case POS_BR:
5155             {
5156                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5157 
5158                 break;
5159             }
5160             default:
5161             {
5162                 DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos);
5163                 break;
5164             }
5165             }
5166 
5167             if(ps_32x32_root->is_node_valid)
5168             {
5169                 node_validity =
5170                     node_validity &&
5171                     !ps_ctb_cluster_info
5172                          ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5173                 children_nodes_required = !node_validity;
5174             }
5175         }
5176 #endif
5177 
5178         break;
5179 #else
5180 
5181         if(ipe_decision_precedence)
5182         {
5183             S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
5184                                      .as_intra16_analyse[e_cur_blk_pos]
5185                                      .b1_merge_flag);
5186             S32 valid_flag = (blk_8x8_mask == 0xf);
5187 
5188             node_validity = merge_flag_16 && valid_flag;
5189         }
5190         else
5191         {
5192             node_validity = (blk_8x8_mask == 0xf);
5193         }
5194 
5195         break;
5196 #endif
5197     }
5198     case 3:
5199     {
5200         S32 blk_8x8_mask =
5201             ps_ctb_cluster_info
5202                 ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
5203         S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
5204                                  .as_intra16_analyse[e_parent_blk_pos]
5205                                  .b1_merge_flag);
5206         S32 merge_flag_32 =
5207             (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
5208 
5209         intra_eval_enable = !merge_flag_16 || !merge_flag_32;
5210         inter_eval_enable = 1;
5211         children_nodes_required = 0;
5212 
5213 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5214         if(e_quality_preset >= ME_HIGH_QUALITY)
5215         {
5216             node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5217             break;
5218         }
5219 #endif
5220 
5221 #if ENABLE_4CTB_EVALUATION
5222         node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5223 
5224         break;
5225 #else
5226         {
5227             cur_ctb_cu_tree_t *ps_32x32_root;
5228             cur_ctb_cu_tree_t *ps_16x16_root;
5229             cluster_32x32_blk_t *ps_32x32_blk;
5230 
5231             switch(e_grandparent_blk_pos)
5232             {
5233             case POS_TL:
5234             {
5235                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5236 
5237                 break;
5238             }
5239             case POS_TR:
5240             {
5241                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5242 
5243                 break;
5244             }
5245             case POS_BL:
5246             {
5247                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5248 
5249                 break;
5250             }
5251             case POS_BR:
5252             {
5253                 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5254 
5255                 break;
5256             }
5257             default:
5258             {
5259                 DBG_PRINTF("Invalid block position %d\n", e_grandparent_blk_pos);
5260                 break;
5261             }
5262             }
5263 
5264             switch(e_parent_blk_pos)
5265             {
5266             case POS_TL:
5267             {
5268                 ps_16x16_root = ps_32x32_root->ps_child_node_tl;
5269 
5270                 break;
5271             }
5272             case POS_TR:
5273             {
5274                 ps_16x16_root = ps_32x32_root->ps_child_node_tr;
5275 
5276                 break;
5277             }
5278             case POS_BL:
5279             {
5280                 ps_16x16_root = ps_32x32_root->ps_child_node_bl;
5281 
5282                 break;
5283             }
5284             case POS_BR:
5285             {
5286                 ps_16x16_root = ps_32x32_root->ps_child_node_br;
5287 
5288                 break;
5289             }
5290             default:
5291             {
5292                 DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos);
5293                 break;
5294             }
5295             }
5296 
5297             ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
5298 
5299             node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
5300                             ((!ps_32x32_root->is_node_valid) ||
5301                              (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
5302                              (!ps_16x16_root->is_node_valid));
5303 
5304             break;
5305         }
5306 #endif
5307     }
5308     }
5309 
5310     /* Fill the current cu_tree node */
5311     ps_cu_tree->is_node_valid = node_validity;
5312     ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
5313     ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
5314 
5315     if(children_nodes_required)
5316     {
5317         tree_depth++;
5318 
5319         hme_populate_cu_tree(
5320             ps_ctb_cluster_info,
5321             ps_cu_tree->ps_child_node_tl,
5322             tree_depth,
5323             e_quality_preset,
5324             e_parent_blk_pos,
5325             e_cur_blk_pos,
5326             POS_TL);
5327 
5328         hme_populate_cu_tree(
5329             ps_ctb_cluster_info,
5330             ps_cu_tree->ps_child_node_tr,
5331             tree_depth,
5332             e_quality_preset,
5333             e_parent_blk_pos,
5334             e_cur_blk_pos,
5335             POS_TR);
5336 
5337         hme_populate_cu_tree(
5338             ps_ctb_cluster_info,
5339             ps_cu_tree->ps_child_node_bl,
5340             tree_depth,
5341             e_quality_preset,
5342             e_parent_blk_pos,
5343             e_cur_blk_pos,
5344             POS_BL);
5345 
5346         hme_populate_cu_tree(
5347             ps_ctb_cluster_info,
5348             ps_cu_tree->ps_child_node_br,
5349             tree_depth,
5350             e_quality_preset,
5351             e_parent_blk_pos,
5352             e_cur_blk_pos,
5353             POS_BR);
5354     }
5355 }
5356 
5357 /**
5358 ********************************************************************************
5359 *  @fn   void hme_analyse_mv_clustering
5360 *               (
5361 *                   search_results_t *ps_search_results,
5362 *                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
5363 *                   cur_ctb_cu_tree_t *ps_cu_tree
5364 *               )
5365 *
5366 *  @brief  Implementation for the clustering algorithm
5367 *
5368 *  @param[in]  ps_search_results: structure containing 16x16 block results
5369 *
5370 *  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
5371 *
5372 *  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
5373 *
5374 *  @return None
5375 ********************************************************************************
5376 */
hme_analyse_mv_clustering(search_results_t * ps_search_results,inter_cu_results_t * ps_16x16_cu_results,inter_cu_results_t * ps_8x8_cu_results,ctb_cluster_info_t * ps_ctb_cluster_info,S08 * pi1_future_list,S08 * pi1_past_list,S32 bidir_enabled,ME_QUALITY_PRESETS_T e_quality_preset)5377 void hme_analyse_mv_clustering(
5378     search_results_t *ps_search_results,
5379     inter_cu_results_t *ps_16x16_cu_results,
5380     inter_cu_results_t *ps_8x8_cu_results,
5381     ctb_cluster_info_t *ps_ctb_cluster_info,
5382     S08 *pi1_future_list,
5383     S08 *pi1_past_list,
5384     S32 bidir_enabled,
5385     ME_QUALITY_PRESETS_T e_quality_preset)
5386 {
5387     cluster_16x16_blk_t *ps_blk_16x16;
5388     cluster_32x32_blk_t *ps_blk_32x32;
5389     cluster_64x64_blk_t *ps_blk_64x64;
5390 
5391     part_type_results_t *ps_best_result;
5392     pu_result_t *aps_part_result[MAX_NUM_PARTS];
5393     pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
5394 
5395     PART_ID_T e_part_id;
5396     PART_TYPE_T e_part_type;
5397 
5398     S32 enable_64x64_merge;
5399     S32 i, j, k;
5400     S32 mvx, mvy;
5401     S32 num_parts;
5402     S32 ref_idx;
5403     S32 ai4_pred_mode[MAX_NUM_PARTS];
5404 
5405     S32 num_32x32_merges = 0;
5406 
5407     /*****************************************/
5408     /*****************************************/
5409     /********* Enter ye who is HQ ************/
5410     /*****************************************/
5411     /*****************************************/
5412 
5413     ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
5414 
5415     /* Initialise data in each of the clusters */
5416     for(i = 0; i < 16; i++)
5417     {
5418         ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5419 
5420 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5421         if(e_quality_preset < ME_HIGH_QUALITY)
5422         {
5423             hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5424         }
5425         else
5426         {
5427             ps_blk_16x16->best_inter_cost = 0;
5428             ps_blk_16x16->intra_mv_area = 0;
5429         }
5430 #else
5431         hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5432 #endif
5433     }
5434 
5435     for(i = 0; i < 4; i++)
5436     {
5437         ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5438 
5439 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5440         if(e_quality_preset < ME_HIGH_QUALITY)
5441         {
5442             hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5443         }
5444         else
5445         {
5446             ps_blk_32x32->best_inter_cost = 0;
5447             ps_blk_32x32->intra_mv_area = 0;
5448         }
5449 #else
5450         hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5451 #endif
5452     }
5453 
5454 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5455     if(e_quality_preset < ME_HIGH_QUALITY)
5456     {
5457         hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5458     }
5459     else
5460     {
5461         ps_blk_64x64->best_inter_cost = 0;
5462         ps_blk_64x64->intra_mv_area = 0;
5463     }
5464 #else
5465     hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5466 #endif
5467 
5468     /* Initialise data for all nodes in the CU tree */
5469     hme_build_cu_tree(
5470         ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
5471 
5472     if(e_quality_preset >= ME_HIGH_QUALITY)
5473     {
5474         memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
5475     }
5476 
5477 #if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
5478     return;
5479 #endif
5480 
5481     for(i = 0; i < 16; i++)
5482     {
5483         S32 blk_8x8_mask;
5484         S32 is_16x16_blk_valid;
5485         S32 num_clusters_updated;
5486         S32 num_clusters;
5487 
5488         blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
5489 
5490         ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5491 
5492         is_16x16_blk_valid = (blk_8x8_mask == 0xf);
5493 
5494         if(is_16x16_blk_valid)
5495         {
5496             /* Use 8x8 data when 16x16 CU is split */
5497             if(ps_search_results[i].u1_split_flag)
5498             {
5499                 S32 blk_8x8_idx = i << 2;
5500 
5501                 num_parts = 4;
5502                 e_part_type = PRT_NxN;
5503 
5504                 for(j = 0; j < num_parts; j++, blk_8x8_idx++)
5505                 {
5506                     /* Only 2Nx2N partition supported for 8x8 block */
5507                     ASSERT(
5508                         ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
5509                         ((PART_TYPE_T)PRT_2Nx2N));
5510 
5511                     aps_part_result[j] =
5512                         &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
5513                     aps_inferior_parts[j] =
5514                         &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
5515                     ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5516                 }
5517             }
5518             else
5519             {
5520                 ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
5521 
5522                 e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
5523                 num_parts = gau1_num_parts_in_part_type[e_part_type];
5524 
5525                 for(j = 0; j < num_parts; j++)
5526                 {
5527                     aps_part_result[j] = &ps_best_result->as_pu_results[j];
5528                     aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
5529                     ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5530                 }
5531 
5532                 ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
5533             }
5534 
5535             for(j = 0; j < num_parts; j++)
5536             {
5537                 pu_result_t *ps_part_result = aps_part_result[j];
5538 
5539                 S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
5540 
5541                 e_part_id = ge_part_type_to_part_id[e_part_type][j];
5542 
5543                 /* Skip clustering if best mode is intra */
5544                 if((ps_part_result->pu.b1_intra_flag))
5545                 {
5546                     ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
5547                     ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
5548                     continue;
5549                 }
5550                 else
5551                 {
5552                     ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
5553                 }
5554 
5555 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5556                 if(e_quality_preset >= ME_HIGH_QUALITY)
5557                 {
5558                     continue;
5559                 }
5560 #endif
5561 
5562                 for(k = 0; k < num_mvs; k++)
5563                 {
5564                     mv_t *ps_mv;
5565 
5566                     pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
5567 
5568                     S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
5569 
5570                     ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
5571 
5572                     mvx = ps_mv->i2_mvx;
5573                     mvy = ps_mv->i2_mvy;
5574 
5575                     ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
5576                                          : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
5577 
5578                     num_clusters = ps_blk_16x16->num_clusters;
5579 
5580                     hme_find_and_update_clusters(
5581                         ps_blk_16x16->as_cluster_data,
5582                         &(ps_blk_16x16->num_clusters),
5583                         mvx,
5584                         mvy,
5585                         ref_idx,
5586                         ps_part_result->i4_sdi,
5587                         e_part_id,
5588                         (ai4_pred_mode[j] == 2));
5589 
5590                     num_clusters_updated = (ps_blk_16x16->num_clusters);
5591 
5592                     ps_blk_16x16->au1_num_clusters[ref_idx] +=
5593                         (num_clusters_updated - num_clusters);
5594                 }
5595             }
5596         }
5597     }
5598 
5599     /* Search for 32x32 clusters */
5600     for(i = 0; i < 4; i++)
5601     {
5602         S32 num_clusters_merged;
5603 
5604         S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
5605 
5606         if(is_32x32_blk_valid)
5607         {
5608             ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5609             ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
5610 
5611 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5612             if(e_quality_preset >= ME_HIGH_QUALITY)
5613             {
5614                 for(j = 0; j < 4; j++, ps_blk_16x16++)
5615                 {
5616                     ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
5617 
5618                     ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
5619                 }
5620                 continue;
5621             }
5622 #endif
5623 
5624             hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
5625 
5626             if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
5627             {
5628                 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5629                     ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
5630 
5631                 if(num_clusters_merged)
5632                 {
5633                     ps_blk_32x32->num_clusters -= num_clusters_merged;
5634 
5635                     UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
5636                 }
5637             }
5638         }
5639     }
5640 
5641 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5642     /* Eliminate outlier 32x32 clusters */
5643     if(e_quality_preset < ME_HIGH_QUALITY)
5644 #endif
5645     {
5646         hme_boot_out_outlier(ps_ctb_cluster_info, 32);
5647 
5648         /* Find best_uni_ref and best_alt_ref */
5649         hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
5650     }
5651 
5652     /* Populate the CU tree for depths 1 and higher */
5653     {
5654         cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
5655         cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
5656         cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
5657         cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
5658         cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
5659 
5660         hme_populate_cu_tree(
5661             ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
5662 
5663         num_32x32_merges += (ps_tl->is_node_valid == 1);
5664 
5665         hme_populate_cu_tree(
5666             ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
5667 
5668         num_32x32_merges += (ps_tr->is_node_valid == 1);
5669 
5670         hme_populate_cu_tree(
5671             ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
5672 
5673         num_32x32_merges += (ps_bl->is_node_valid == 1);
5674 
5675         hme_populate_cu_tree(
5676             ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
5677 
5678         num_32x32_merges += (ps_br->is_node_valid == 1);
5679     }
5680 
5681 #if !ENABLE_4CTB_EVALUATION
5682     if(e_quality_preset < ME_HIGH_QUALITY)
5683     {
5684         enable_64x64_merge = (num_32x32_merges >= 3);
5685     }
5686 #else
5687     if(e_quality_preset < ME_HIGH_QUALITY)
5688     {
5689         enable_64x64_merge = 1;
5690     }
5691 #endif
5692 
5693 #if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5694     if(e_quality_preset >= ME_HIGH_QUALITY)
5695     {
5696         enable_64x64_merge = 1;
5697     }
5698 #else
5699     if(e_quality_preset >= ME_HIGH_QUALITY)
5700     {
5701         enable_64x64_merge = (num_32x32_merges >= 3);
5702     }
5703 #endif
5704 
5705     if(enable_64x64_merge)
5706     {
5707         S32 num_clusters_merged;
5708 
5709         ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
5710 
5711 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5712         if(e_quality_preset >= ME_HIGH_QUALITY)
5713         {
5714             for(j = 0; j < 4; j++, ps_blk_32x32++)
5715             {
5716                 ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
5717 
5718                 ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
5719             }
5720         }
5721         else
5722 #endif
5723         {
5724             hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
5725 
5726             if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
5727             {
5728                 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5729                     ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
5730 
5731                 if(num_clusters_merged)
5732                 {
5733                     ps_blk_64x64->num_clusters -= num_clusters_merged;
5734 
5735                     UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
5736                 }
5737             }
5738         }
5739 
5740 #if !ENABLE_4CTB_EVALUATION
5741         if(e_quality_preset < ME_HIGH_QUALITY)
5742         {
5743             S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
5744             S32 best_intra_cost =
5745                 ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5746                   ps_ctb_cluster_info->i4_frame_qstep *
5747                       ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
5748                     ? MAX_32BIT_VAL
5749                     : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5750                        ps_ctb_cluster_info->i4_frame_qstep *
5751                            ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
5752             S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5753             S32 cost_differential = (best_inter_cost - best_cost);
5754 
5755             enable_64x64_merge =
5756                 ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
5757         }
5758 #endif
5759     }
5760 
5761     if(enable_64x64_merge)
5762     {
5763 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5764         if(e_quality_preset < ME_HIGH_QUALITY)
5765 #endif
5766         {
5767             hme_boot_out_outlier(ps_ctb_cluster_info, 64);
5768 
5769             hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
5770         }
5771 
5772         hme_populate_cu_tree(
5773             ps_ctb_cluster_info,
5774             ps_ctb_cluster_info->ps_cu_tree_root,
5775             0,
5776             e_quality_preset,
5777             POS_NA,
5778             POS_NA,
5779             POS_NA);
5780     }
5781 }
5782 #endif
5783 
hme_merge_prms_init(hme_merge_prms_t * ps_prms,layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,me_frm_ctxt_t * ps_me_ctxt,range_prms_t * ps_range_prms_rec,range_prms_t * ps_range_prms_inp,mv_grid_t ** pps_mv_grid,inter_ctb_prms_t * ps_inter_ctb_prms,S32 i4_num_pred_dir,S32 i4_32x32_id,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_me_quality_presets)5784 static __inline void hme_merge_prms_init(
5785     hme_merge_prms_t *ps_prms,
5786     layer_ctxt_t *ps_curr_layer,
5787     refine_prms_t *ps_refine_prms,
5788     me_frm_ctxt_t *ps_me_ctxt,
5789     range_prms_t *ps_range_prms_rec,
5790     range_prms_t *ps_range_prms_inp,
5791     mv_grid_t **pps_mv_grid,
5792     inter_ctb_prms_t *ps_inter_ctb_prms,
5793     S32 i4_num_pred_dir,
5794     S32 i4_32x32_id,
5795     BLK_SIZE_T e_blk_size,
5796     ME_QUALITY_PRESETS_T e_me_quality_presets)
5797 {
5798     S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
5799     S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
5800 
5801     /* Currently not enabling segmentation info from prev layers */
5802     ps_prms->i4_seg_info_avail = 0;
5803     ps_prms->i4_part_mask = 0;
5804 
5805     /* Number of reference pics in which to do merge */
5806     ps_prms->i4_num_ref = i4_num_pred_dir;
5807 
5808     /* Layer ctxt info */
5809     ps_prms->ps_layer_ctxt = ps_curr_layer;
5810 
5811     ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
5812 
5813     /* Top left, top right, bottom left and bottom right 16x16 units */
5814     if(BLK_32x32 == e_blk_size)
5815     {
5816         ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
5817         ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
5818         ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
5819         ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
5820 
5821         /* Merge results stored here */
5822         ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
5823 
5824         /* This could be lesser than the number of 16x16results generated*/
5825         /* For now, keeping it to be same                                */
5826         ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
5827         ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
5828         ps_prms->ps_results_grandchild = NULL;
5829     }
5830     else
5831     {
5832         ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
5833         ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
5834         ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
5835         ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
5836 
5837         /* Merge results stored here */
5838         ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
5839 
5840         ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
5841         ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
5842         ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
5843     }
5844 
5845     if(i4_use_rec)
5846     {
5847         WORD32 ref_ctr;
5848 
5849         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5850         {
5851             ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
5852         }
5853     }
5854     else
5855     {
5856         WORD32 ref_ctr;
5857 
5858         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5859         {
5860             ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
5861         }
5862     }
5863     ps_prms->i4_use_rec = i4_use_rec;
5864 
5865     ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
5866 
5867     ps_prms->pps_mv_grid = pps_mv_grid;
5868 
5869     ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
5870 
5871     ps_prms->e_quality_preset = e_me_quality_presets;
5872     ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
5873     ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
5874     ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
5875 }
5876 
5877 /**
5878 ********************************************************************************
5879 *  @fn   void hme_refine(me_ctxt_t *ps_ctxt,
5880 *                       refine_layer_prms_t *ps_refine_prms)
5881 *
5882 *  @brief  Top level entry point for refinement ME
5883 *
5884 *  @param[in,out]  ps_ctxt: ME Handle
5885 *
5886 *  @param[in]  ps_refine_prms : refinement layer prms
5887 *
5888 *  @return None
5889 ********************************************************************************
5890 */
hme_refine(me_ctxt_t * ps_thrd_ctxt,refine_prms_t * ps_refine_prms,PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,layer_ctxt_t * ps_coarse_layer,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,S32 thrd_id,S32 me_frm_id,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input)5891 void hme_refine(
5892     me_ctxt_t *ps_thrd_ctxt,
5893     refine_prms_t *ps_refine_prms,
5894     PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
5895     layer_ctxt_t *ps_coarse_layer,
5896     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
5897     S32 lyr_job_type,
5898     S32 thrd_id,
5899     S32 me_frm_id,
5900     pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
5901 {
5902     inter_ctb_prms_t s_common_frm_prms;
5903 
5904     BLK_SIZE_T e_search_blk_size, e_result_blk_size;
5905     WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
5906     me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
5907     ME_QUALITY_PRESETS_T e_me_quality_presets =
5908         ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
5909 
5910     WORD32 num_rows_proc = 0;
5911     WORD32 num_act_ref_pics;
5912     WORD16 i2_prev_enc_frm_max_mv_y;
5913     WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
5914 
5915     /*************************************************************************/
5916     /* Complexity of search: Low to High                                     */
5917     /*************************************************************************/
5918     SEARCH_COMPLEXITY_T e_search_complexity;
5919 
5920     /*************************************************************************/
5921     /* to store the PU results which are passed to the decide_part_types     */
5922     /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
5923     /*************************************************************************/
5924 
5925     pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
5926     inter_pu_results_t as_inter_pu_results[4];
5927     inter_pu_results_t *ps_pu_results = as_inter_pu_results;
5928 
5929     /*************************************************************************/
5930     /* Config parameter structures for varius ME submodules                  */
5931     /*************************************************************************/
5932     hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
5933     hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
5934     hme_merge_prms_t s_merge_prms_64x64;
5935     hme_search_prms_t s_search_prms_blk;
5936     mvbank_update_prms_t s_mv_update_prms;
5937     hme_ctb_prms_t s_ctb_prms;
5938     hme_subpel_prms_t s_subpel_prms;
5939     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
5940     ctb_cluster_info_t *ps_ctb_cluster_info;
5941     fpel_srch_cand_init_data_t s_srch_cand_init_data;
5942 
5943     /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
5944     S32 en_merge_32x32;
5945     /* 5 lsb's specify whether or not merge algorithm is required */
5946     /* to be executed or not. Relevant only in PQ. Ought to be */
5947     /* used in conjunction with en_merge_32x32 and */
5948     /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
5949     /* required when all children are deemed to be intras */
5950     S32 en_merge_execution;
5951 
5952     /*************************************************************************/
5953     /* All types of search candidates for predictor based search.            */
5954     /*************************************************************************/
5955     S32 num_init_candts = 0;
5956     S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
5957     S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
5958     search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
5959     search_node_t as_top_neighbours[4], as_left_neighbours[3];
5960 
5961     pf_get_wt_inp fp_get_wt_inp;
5962 
5963     search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
5964     U32 au4_unique_node_map[MAP_X_MAX * 2];
5965 
5966     /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
5967     ctb_boundary_attrs_t *ps_ctb_bound_attrs;
5968 
5969     /*************************************************************************/
5970     /* points ot the search results for the blk level search (8x8/16x16)     */
5971     /*************************************************************************/
5972     search_results_t *ps_search_results;
5973 
5974     /*************************************************************************/
5975     /* Coordinates                                                           */
5976     /*************************************************************************/
5977     S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
5978     S32 pos_x, pos_y;
5979     S32 blk_id_in_full_ctb;
5980 
5981     /*************************************************************************/
5982     /* Related to dimensions of block being searched and pic dimensions      */
5983     /*************************************************************************/
5984     S32 blk_4x4_to_16x16;
5985     S32 blk_wd, blk_ht, blk_size_shift;
5986     S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
5987     S32 num_results_prev_layer;
5988 
5989     /*************************************************************************/
5990     /* Size of a basic unit for this layer. For non encode layers, we search */
5991     /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
5992     /* basic unit size is the ctb size.                                      */
5993     /*************************************************************************/
5994     S32 unit_size;
5995 
5996     /*************************************************************************/
5997     /* Local variable storing results of any 4 CU merge to bigger CU         */
5998     /*************************************************************************/
5999     CU_MERGE_RESULT_T e_merge_result;
6000 
6001     /*************************************************************************/
6002     /* This mv grid stores results during and after fpel search, during      */
6003     /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
6004     /* meant for the 2 directions of search (l0 and l1).                     */
6005     /*************************************************************************/
6006     mv_grid_t *aps_mv_grid[2];
6007 
6008     /*************************************************************************/
6009     /* Pointers to context in current and coarser layers                     */
6010     /*************************************************************************/
6011     layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
6012 
6013     /*************************************************************************/
6014     /* to store mv range per blk, and picture limit, allowed search range    */
6015     /* range prms in hpel and qpel units as well                             */
6016     /*************************************************************************/
6017     range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
6018     range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
6019     range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
6020 
6021     /*************************************************************************/
6022     /* These variables are used to track number of references at different   */
6023     /* stages of ME.                                                         */
6024     /*************************************************************************/
6025     S32 i4_num_pred_dir;
6026     S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
6027     S32 lambda_recon = ps_refine_prms->lambda_recon;
6028 
6029     /* Counts successful merge to 32x32 every CTB (0-4) */
6030     S32 merge_count_32x32;
6031 
6032     S32 ai4_id_coloc[14], ai4_id_Z[2];
6033     U08 au1_search_candidate_list_index[2];
6034     S32 ai4_num_coloc_cands[2];
6035     U08 u1_pred_dir, u1_pred_dir_ctr;
6036 
6037     /*************************************************************************/
6038     /* Input pointer and stride                                              */
6039     /*************************************************************************/
6040     U08 *pu1_inp;
6041     S32 i4_inp_stride;
6042     S32 end_of_frame;
6043     S32 num_sync_units_in_row, num_sync_units_in_tile;
6044 
6045     /*************************************************************************/
6046     /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
6047     /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
6048     /* we need to stop merges and force 8x8 CUs for that 16x16 blk           */
6049     /*************************************************************************/
6050     S32 blk_8x8_mask;
6051     S32 ai4_blk_8x8_mask[16];
6052     U08 au1_is_64x64Blk_noisy[1];
6053     U08 au1_is_32x32Blk_noisy[4];
6054     U08 au1_is_16x16Blk_noisy[16];
6055 
6056     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
6057         ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
6058     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
6059         ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
6060 
6061     ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
6062 
6063     /*************************************************************************/
6064     /* Pointers to current and coarse layer are needed for projection */
6065     /* Pointer to prev layer are needed for other candts like coloc   */
6066     /*************************************************************************/
6067     ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
6068 
6069     ps_prev_layer = hme_get_past_layer_ctxt(
6070         ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
6071 
6072     num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
6073 
6074     /* Function pointer is selected based on the C vc X86 macro */
6075 
6076     fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
6077 
6078     i4_inp_stride = ps_curr_layer->i4_inp_stride;
6079     i4_pic_wd = ps_curr_layer->i4_wd;
6080     i4_pic_ht = ps_curr_layer->i4_ht;
6081     e_search_complexity = ps_refine_prms->e_search_complexity;
6082     end_of_frame = 0;
6083 
6084     /* This points to all the initial candts */
6085     ps_search_candts = &as_search_candts[0];
6086 
6087     /* mv grid being huge strucutre is part of context */
6088     aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
6089     aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
6090 
6091     /*************************************************************************/
6092     /* If the current layer is encoded (since it may be multicast or final   */
6093     /* layer (finest)), then we use 16x16 blk size with some selected parts  */
6094     /* If the current layer is not encoded, then we use 8x8 blk size, with   */
6095     /* enable or disable of 4x4 partitions depending on the input prms       */
6096     /*************************************************************************/
6097     e_search_blk_size = BLK_16x16;
6098     blk_wd = blk_ht = 16;
6099     blk_size_shift = 4;
6100     e_result_blk_size = BLK_8x8;
6101     s_mv_update_prms.i4_shift = 1;
6102 
6103     if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
6104     {
6105         blk_4x4_to_16x16 = 1;
6106     }
6107     else
6108     {
6109         blk_4x4_to_16x16 = 0;
6110     }
6111 
6112     unit_size = 1 << ps_ctxt->log_ctb_size;
6113     s_search_prms_blk.i4_inp_stride = unit_size;
6114 
6115     /* This is required to properly update the layer mv bank */
6116     s_mv_update_prms.e_search_blk_size = e_search_blk_size;
6117     s_search_prms_blk.e_blk_size = e_search_blk_size;
6118 
6119     /*************************************************************************/
6120     /* If current layer is explicit, then the number of ref frames are to    */
6121     /* be same as previous layer. Else it will be 2                          */
6122     /*************************************************************************/
6123     i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
6124     i4_num_pred_dir =
6125         (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
6126         1;
6127 
6128 #if USE_MODIFIED == 1
6129     s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6130 #else
6131     s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6132 #endif
6133 
6134     i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
6135     if(i4_num_ref_prev_layer <= 2)
6136     {
6137         i4_num_ref_each_dir = 1;
6138     }
6139     else
6140     {
6141         i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
6142     }
6143 
6144     s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
6145     s_mv_update_prms.i4_num_results_to_store =
6146         MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
6147                                                 : (i4_num_act_ref_l0 > 1) + 1,
6148             ps_refine_prms->i4_num_results_per_part);
6149 
6150     /*************************************************************************/
6151     /* Initialization of merge params for 16x16 to 32x32 merge.              */
6152     /* There are 4 32x32 units in a CTB, so 4 param structures initialized   */
6153     /*************************************************************************/
6154     {
6155         hme_merge_prms_t *aps_merge_prms[4];
6156         aps_merge_prms[0] = &s_merge_prms_32x32_tl;
6157         aps_merge_prms[1] = &s_merge_prms_32x32_tr;
6158         aps_merge_prms[2] = &s_merge_prms_32x32_bl;
6159         aps_merge_prms[3] = &s_merge_prms_32x32_br;
6160         for(i = 0; i < 4; i++)
6161         {
6162             hme_merge_prms_init(
6163                 aps_merge_prms[i],
6164                 ps_curr_layer,
6165                 ps_refine_prms,
6166                 ps_ctxt,
6167                 as_range_prms_rec,
6168                 as_range_prms_inp,
6169                 &aps_mv_grid[0],
6170                 &s_common_frm_prms,
6171                 i4_num_pred_dir,
6172                 i,
6173                 BLK_32x32,
6174                 e_me_quality_presets);
6175         }
6176     }
6177 
6178     /*************************************************************************/
6179     /* Initialization of merge params for 32x32 to 64x64 merge.              */
6180     /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB    */
6181     /*************************************************************************/
6182     {
6183         hme_merge_prms_init(
6184             &s_merge_prms_64x64,
6185             ps_curr_layer,
6186             ps_refine_prms,
6187             ps_ctxt,
6188             as_range_prms_rec,
6189             as_range_prms_inp,
6190             &aps_mv_grid[0],
6191             &s_common_frm_prms,
6192             i4_num_pred_dir,
6193             0,
6194             BLK_64x64,
6195             e_me_quality_presets);
6196     }
6197 
6198     /* Pointers to cu_results are initialised here */
6199     {
6200         WORD32 i;
6201 
6202         ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
6203 
6204         for(i = 0; i < 4; i++)
6205         {
6206             ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
6207         }
6208 
6209         for(i = 0; i < 16; i++)
6210         {
6211             ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
6212         }
6213     }
6214 
6215     /*************************************************************************/
6216     /* SUBPEL Params initialized here                                        */
6217     /*************************************************************************/
6218     {
6219         s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
6220         s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
6221         s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
6222 
6223         s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
6224         s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
6225         s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
6226 
6227         s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
6228         s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
6229 
6230         s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
6231 
6232         s_subpel_prms.i4_inp_stride = unit_size;
6233 
6234         s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
6235         s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
6236         s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
6237 
6238         s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
6239 
6240         {
6241             WORD32 ref_ctr;
6242             for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6243             {
6244                 s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
6245                 s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
6246             }
6247         }
6248         s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
6249 
6250 #if USE_MODIFIED == 0
6251         s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6252 #else
6253         s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6254 #endif
6255         s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
6256 
6257         /* BI Refinement done only if this field is 1 */
6258         s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
6259 
6260         s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
6261 
6262         s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6263         s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6264         s_subpel_prms.u1_max_num_subpel_refine_centers =
6265             ps_refine_prms->u1_max_num_subpel_refine_centers;
6266     }
6267 
6268     /* inter_ctb_prms_t struct initialisation */
6269     {
6270         inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
6271         hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
6272 
6273         ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
6274         ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
6275         ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
6276         ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
6277         ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
6278         ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
6279         ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
6280         ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
6281         ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
6282         ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
6283         ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6284         ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6285         ps_inter_ctb_prms->i4_lamda = lambda_recon;
6286         ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
6287         ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
6288         ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
6289         ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
6290         ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
6291         ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
6292         ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
6293             ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
6294     }
6295 
6296     for(i = 0; i < MAX_INIT_CANDTS; i++)
6297     {
6298         ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
6299         ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
6300 
6301         INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
6302     }
6303     num_act_ref_pics =
6304         ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6305 
6306     if(num_act_ref_pics)
6307     {
6308         hme_search_cand_data_init(
6309             ai4_id_Z,
6310             ai4_id_coloc,
6311             ai4_num_coloc_cands,
6312             au1_search_candidate_list_index,
6313             i4_num_act_ref_l0,
6314             i4_num_act_ref_l1,
6315             ps_ctxt->s_frm_prms.bidir_enabled,
6316             blk_4x4_to_16x16);
6317     }
6318 
6319     if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
6320     {
6321         ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6322         ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
6323     }
6324     else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
6325     {
6326         ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6327     }
6328 
6329     for(i = 0; i < 3; i++)
6330     {
6331         search_node_t *ps_search_node;
6332         ps_search_node = &as_left_neighbours[i];
6333         INIT_SEARCH_NODE(ps_search_node, 0);
6334         ps_search_node = &as_top_neighbours[i];
6335         INIT_SEARCH_NODE(ps_search_node, 0);
6336     }
6337 
6338     INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
6339     as_left_neighbours[2].u1_is_avail = 0;
6340 
6341     /*************************************************************************/
6342     /* Initialize all the search results structure here. We update all the   */
6343     /* search results to default values, and configure things like blk sizes */
6344     /*************************************************************************/
6345     if(num_act_ref_pics)
6346     {
6347         S32 i4_x, i4_y;
6348         /* 16x16 results */
6349         for(i = 0; i < 16; i++)
6350         {
6351             search_results_t *ps_search_results;
6352             S32 pred_lx;
6353             ps_search_results = &ps_ctxt->as_search_results_16x16[i];
6354             i4_x = (S32)gau1_encode_to_raster_x[i];
6355             i4_y = (S32)gau1_encode_to_raster_y[i];
6356             i4_x <<= 4;
6357             i4_y <<= 4;
6358 
6359             hme_init_search_results(
6360                 ps_search_results,
6361                 i4_num_pred_dir,
6362                 ps_refine_prms->i4_num_fpel_results,
6363                 ps_refine_prms->i4_num_results_per_part,
6364                 e_search_blk_size,
6365                 i4_x,
6366                 i4_y,
6367                 &ps_ctxt->au1_is_past[0]);
6368 
6369             for(pred_lx = 0; pred_lx < 2; pred_lx++)
6370             {
6371                 pred_ctxt_t *ps_pred_ctxt;
6372 
6373                 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6374 
6375                 hme_init_pred_ctxt_encode(
6376                     ps_pred_ctxt,
6377                     ps_search_results,
6378                     ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6379                     ps_search_candts[ai4_id_Z[0]].ps_search_node,
6380                     aps_mv_grid[pred_lx],
6381                     pred_lx,
6382                     lambda_recon,
6383                     ps_refine_prms->lambda_q_shift,
6384                     &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6385                     &ps_ctxt->ai2_ref_scf[0]);
6386             }
6387         }
6388 
6389         for(i = 0; i < 4; i++)
6390         {
6391             search_results_t *ps_search_results;
6392             S32 pred_lx;
6393             ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6394 
6395             i4_x = (S32)gau1_encode_to_raster_x[i];
6396             i4_y = (S32)gau1_encode_to_raster_y[i];
6397             i4_x <<= 5;
6398             i4_y <<= 5;
6399 
6400             hme_init_search_results(
6401                 ps_search_results,
6402                 i4_num_pred_dir,
6403                 ps_refine_prms->i4_num_32x32_merge_results,
6404                 ps_refine_prms->i4_num_results_per_part,
6405                 BLK_32x32,
6406                 i4_x,
6407                 i4_y,
6408                 &ps_ctxt->au1_is_past[0]);
6409 
6410             for(pred_lx = 0; pred_lx < 2; pred_lx++)
6411             {
6412                 pred_ctxt_t *ps_pred_ctxt;
6413 
6414                 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6415 
6416                 hme_init_pred_ctxt_encode(
6417                     ps_pred_ctxt,
6418                     ps_search_results,
6419                     ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6420                     ps_search_candts[ai4_id_Z[0]].ps_search_node,
6421                     aps_mv_grid[pred_lx],
6422                     pred_lx,
6423                     lambda_recon,
6424                     ps_refine_prms->lambda_q_shift,
6425                     &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6426                     &ps_ctxt->ai2_ref_scf[0]);
6427             }
6428         }
6429 
6430         {
6431             search_results_t *ps_search_results;
6432             S32 pred_lx;
6433             ps_search_results = &ps_ctxt->s_search_results_64x64;
6434 
6435             hme_init_search_results(
6436                 ps_search_results,
6437                 i4_num_pred_dir,
6438                 ps_refine_prms->i4_num_64x64_merge_results,
6439                 ps_refine_prms->i4_num_results_per_part,
6440                 BLK_64x64,
6441                 0,
6442                 0,
6443                 &ps_ctxt->au1_is_past[0]);
6444 
6445             for(pred_lx = 0; pred_lx < 2; pred_lx++)
6446             {
6447                 pred_ctxt_t *ps_pred_ctxt;
6448 
6449                 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6450 
6451                 hme_init_pred_ctxt_encode(
6452                     ps_pred_ctxt,
6453                     ps_search_results,
6454                     ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6455                     ps_search_candts[ai4_id_Z[0]].ps_search_node,
6456                     aps_mv_grid[pred_lx],
6457                     pred_lx,
6458                     lambda_recon,
6459                     ps_refine_prms->lambda_q_shift,
6460                     &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6461                     &ps_ctxt->ai2_ref_scf[0]);
6462             }
6463         }
6464     }
6465 
6466     /* Initialise the structure used in clustering  */
6467     if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6468     {
6469         ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
6470 
6471         ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
6472         ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
6473         ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
6474         ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
6475         ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
6476         ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
6477         ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
6478     }
6479 
6480     /*********************************************************************/
6481     /* Initialize the dyn. search range params. for each reference index */
6482     /* in current layer ctxt                                             */
6483     /*********************************************************************/
6484 
6485     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
6486     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
6487     {
6488         WORD32 ref_ctr;
6489         /* set no. of act ref in L0 for further use at frame level */
6490         ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
6491             ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6492 
6493         for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
6494         {
6495             INIT_DYN_SEARCH_PRMS(
6496                 &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
6497                 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
6498         }
6499     }
6500     /*************************************************************************/
6501     /* Now that the candidates have been ordered, to choose the right number */
6502     /* of initial candidates.                                                */
6503     /*************************************************************************/
6504     if(blk_4x4_to_16x16)
6505     {
6506         if(i4_num_ref_prev_layer > 2)
6507         {
6508             if(e_search_complexity == SEARCH_CX_LOW)
6509                 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6510             else if(e_search_complexity == SEARCH_CX_MED)
6511                 num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6512             else if(e_search_complexity == SEARCH_CX_HIGH)
6513                 num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6514             else
6515                 ASSERT(0);
6516         }
6517         else if(i4_num_ref_prev_layer == 2)
6518         {
6519             if(e_search_complexity == SEARCH_CX_LOW)
6520                 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6521             else if(e_search_complexity == SEARCH_CX_MED)
6522                 num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6523             else if(e_search_complexity == SEARCH_CX_HIGH)
6524                 num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6525             else
6526                 ASSERT(0);
6527         }
6528         else
6529         {
6530             if(e_search_complexity == SEARCH_CX_LOW)
6531                 num_init_candts = 5;
6532             else if(e_search_complexity == SEARCH_CX_MED)
6533                 num_init_candts = 12;
6534             else if(e_search_complexity == SEARCH_CX_HIGH)
6535                 num_init_candts = 19;
6536             else
6537                 ASSERT(0);
6538         }
6539     }
6540     else
6541     {
6542         if(i4_num_ref_prev_layer > 2)
6543         {
6544             if(e_search_complexity == SEARCH_CX_LOW)
6545                 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6546             else if(e_search_complexity == SEARCH_CX_MED)
6547                 num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6548             else if(e_search_complexity == SEARCH_CX_HIGH)
6549                 num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6550             else
6551                 ASSERT(0);
6552         }
6553         else if(i4_num_ref_prev_layer == 2)
6554         {
6555             if(e_search_complexity == SEARCH_CX_LOW)
6556                 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6557             else if(e_search_complexity == SEARCH_CX_MED)
6558                 num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6559             else if(e_search_complexity == SEARCH_CX_HIGH)
6560                 num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6561             else
6562                 ASSERT(0);
6563         }
6564         else
6565         {
6566             if(e_search_complexity == SEARCH_CX_LOW)
6567                 num_init_candts = 5;
6568             else if(e_search_complexity == SEARCH_CX_MED)
6569                 num_init_candts = 11;
6570             else if(e_search_complexity == SEARCH_CX_HIGH)
6571                 num_init_candts = 16;
6572             else
6573                 ASSERT(0);
6574         }
6575     }
6576 
6577     /*************************************************************************/
6578     /* The following search parameters are fixed throughout the search across*/
6579     /* all blks. So these are configured outside processing loop             */
6580     /*************************************************************************/
6581     s_search_prms_blk.i4_num_init_candts = num_init_candts;
6582     s_search_prms_blk.i4_start_step = 1;
6583     s_search_prms_blk.i4_use_satd = 0;
6584     s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
6585     /* we use recon only for encoded layers, otherwise it is not available */
6586     s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
6587 
6588     s_search_prms_blk.ps_search_candts = ps_search_candts;
6589     if(s_search_prms_blk.i4_use_rec)
6590     {
6591         WORD32 ref_ctr;
6592         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6593             s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
6594     }
6595     else
6596     {
6597         WORD32 ref_ctr;
6598         for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6599             s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
6600     }
6601 
6602     /*************************************************************************/
6603     /* Initialize coordinates. Meaning as follows                            */
6604     /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
6605     /* blk_y : same as above, y coord.                                       */
6606     /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
6607     /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
6608     /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
6609     /* corner of the picture. Always multiple of 64.                         */
6610     /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
6611     /*************************************************************************/
6612     blk_y = 0;
6613     blk_id_in_ctb = 0;
6614     i4_ctb_y = 0;
6615 
6616     /*************************************************************************/
6617     /* Picture limit on all 4 sides. This will be used to set mv limits for  */
6618     /* every block given its coordinate. Note thsi assumes that the min amt  */
6619     /* of padding to right of pic is equal to the blk size. If we go all the */
6620     /* way upto 64x64, then the min padding on right size of picture should  */
6621     /* be 64, and also on bottom side of picture.                            */
6622     /*************************************************************************/
6623     SET_PIC_LIMIT(
6624         s_pic_limit_inp,
6625         ps_curr_layer->i4_pad_x_rec,
6626         ps_curr_layer->i4_pad_y_rec,
6627         ps_curr_layer->i4_wd,
6628         ps_curr_layer->i4_ht,
6629         s_search_prms_blk.i4_num_steps_post_refine);
6630 
6631     SET_PIC_LIMIT(
6632         s_pic_limit_rec,
6633         ps_curr_layer->i4_pad_x_rec,
6634         ps_curr_layer->i4_pad_y_rec,
6635         ps_curr_layer->i4_wd,
6636         ps_curr_layer->i4_ht,
6637         s_search_prms_blk.i4_num_steps_post_refine);
6638 
6639     /*************************************************************************/
6640     /* set the MV limit per ref. pic.                                        */
6641     /*    - P pic. : Based on the config params.                             */
6642     /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
6643     /*************************************************************************/
6644     hme_set_mv_limit_using_dvsr_data(
6645         ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
6646     s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
6647     s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6648     s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6649     s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
6650     s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
6651     s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
6652     s_srch_cand_init_data.ps_search_cands = ps_search_candts;
6653     s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
6654     s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
6655     s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
6656     s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
6657 
6658     while(0 == end_of_frame)
6659     {
6660         job_queue_t *ps_job;
6661         frm_ctb_ctxt_t *ps_frm_ctb_prms;
6662         ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6663 
6664         WORD32 i4_max_mv_x_in_ctb;
6665         WORD32 i4_max_mv_y_in_ctb;
6666         void *pv_dep_mngr_encloop_dep_me;
6667         WORD32 offset_val, check_dep_pos, set_dep_pos;
6668         WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
6669 
6670         pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
6671 
6672         ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
6673 
6674         /* Get the current row from the job queue */
6675         ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
6676             ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
6677 
6678         /* If all rows are done, set the end of process flag to 1, */
6679         /* and the current row to -1 */
6680         if(NULL == ps_job)
6681         {
6682             blk_y = -1;
6683             i4_ctb_y = -1;
6684             tile_col_idx = -1;
6685             end_of_frame = 1;
6686 
6687             continue;
6688         }
6689 
6690         /* set the output dependency after picking up the row */
6691         ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
6692 
6693         /* Obtain the current row's details from the job */
6694         {
6695             ihevce_tile_params_t *ps_col_tile_params;
6696 
6697             i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
6698             /* Obtain the current colum tile index from the job */
6699             tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
6700 
6701             /* in encode layer block are 16x16 and CTB is 64 x 64 */
6702             /* note if ctb is 32x32 the this calc needs to be changed */
6703             num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6704                                     ps_ctxt->log_ctb_size;
6705 
6706             /* The tile parameter for the col. idx. Use only the properties
6707             which is same for all the bottom tiles like width, start_x, etc.
6708             Don't use height, start_y, etc.                                  */
6709             ps_col_tile_params =
6710                 ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
6711             /* in encode layer block are 16x16 and CTB is 64 x 64 */
6712             /* note if ctb is 32x32 the this calc needs to be changed */
6713             num_sync_units_in_tile =
6714                 (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6715                 ps_ctxt->log_ctb_size;
6716 
6717             i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
6718             i4_ctb_x = i4_first_ctb_x;
6719 
6720             if(!num_act_ref_pics)
6721             {
6722                 for(i4_ctb_x = i4_first_ctb_x;
6723                     i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
6724                     i4_ctb_x++)
6725                 {
6726                     S32 blk_i = 0, blk_j = 0;
6727                     /* set the dependency for the corresponding row in enc loop */
6728                     ihevce_dmgr_set_row_row_sync(
6729                         pv_dep_mngr_encloop_dep_me,
6730                         (i4_ctb_x + 1),
6731                         i4_ctb_y,
6732                         tile_col_idx /* Col Tile No. */);
6733                 }
6734 
6735                 continue;
6736             }
6737 
6738             /* increment the number of rows proc */
6739             num_rows_proc++;
6740 
6741             /* Set Variables for Dep. Checking and Setting */
6742             set_dep_pos = i4_ctb_y + 1;
6743             if(i4_ctb_y > 0)
6744             {
6745                 offset_val = 2;
6746                 check_dep_pos = i4_ctb_y - 1;
6747             }
6748             else
6749             {
6750                 /* First row should run without waiting */
6751                 offset_val = -1;
6752                 check_dep_pos = 0;
6753             }
6754 
6755             /* row ctb out pointer  */
6756             ps_ctxt->ps_ctb_analyse_curr_row =
6757                 ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6758 
6759             /* Row level CU Tree buffer */
6760             ps_ctxt->ps_cu_tree_curr_row =
6761                 ps_ctxt->ps_cu_tree_base +
6762                 i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
6763 
6764             ps_ctxt->ps_me_ctb_data_curr_row =
6765                 ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6766         }
6767 
6768         /* This flag says the CTB under processing is at the start of tile in horz dir.*/
6769         left_ctb_in_diff_tile = 1;
6770 
6771         /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var,                                 */
6772         /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
6773         {
6774             S32 i4_ref_id, i4_bits_req;
6775 
6776             for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
6777                                             ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
6778                 i4_ref_id++)
6779             {
6780                 GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
6781 
6782                 if(i4_bits_req > 12)
6783                 {
6784                     ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
6785                 }
6786                 else
6787                 {
6788                     ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
6789                 }
6790             }
6791 
6792             s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
6793         }
6794 
6795         /* if non-encode layer then i4_ctb_x will be same as blk_x */
6796         /* loop over all the units is a row                        */
6797         for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
6798             i4_ctb_x++)
6799         {
6800             ihevce_ctb_noise_params *ps_ctb_noise_params =
6801                 &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
6802 
6803             s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
6804             s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
6805 
6806             ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
6807             ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
6808             /* Initialize ptr to current IPE CTB */
6809             ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
6810                              i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6811             {
6812                 ps_ctb_bound_attrs =
6813                     get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
6814 
6815                 en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
6816                 num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
6817             }
6818 
6819             /* Block to initialise pointers to part_type_results_t */
6820             /* in each size-specific inter_cu_results_t  */
6821             {
6822                 WORD32 i;
6823 
6824                 for(i = 0; i < 64; i++)
6825                 {
6826                     ps_ctxt->as_cu8x8_results[i].ps_best_results =
6827                         ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6828                             .as_8x8_block_data[i]
6829                             .as_best_results;
6830                     ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
6831                 }
6832 
6833                 for(i = 0; i < 16; i++)
6834                 {
6835                     ps_ctxt->as_cu16x16_results[i].ps_best_results =
6836                         ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
6837                     ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
6838                 }
6839 
6840                 for(i = 0; i < 4; i++)
6841                 {
6842                     ps_ctxt->as_cu32x32_results[i].ps_best_results =
6843                         ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6844                             .as_32x32_block_data[i]
6845                             .as_best_results;
6846                     ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
6847                 }
6848 
6849                 ps_ctxt->s_cu64x64_results.ps_best_results =
6850                     ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
6851                 ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
6852             }
6853 
6854             if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6855             {
6856                 ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
6857                 ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
6858                 ps_ctb_cluster_info->ps_cu_tree_root =
6859                     ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
6860                 ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
6861             }
6862 
6863             if(ME_PRISTINE_QUALITY != e_me_quality_presets)
6864             {
6865                 S32 i4_nodes_created_in_cu_tree = 1;
6866 
6867                 ihevce_cu_tree_init(
6868                     (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6869                     (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6870                     &i4_nodes_created_in_cu_tree,
6871                     0,
6872                     POS_NA,
6873                     POS_NA,
6874                     POS_NA);
6875             }
6876 
6877             memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
6878 
6879             if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
6880             {
6881                 S32 j;
6882 
6883                 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6884 
6885                 ps_cur_ipe_ctb =
6886                     ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
6887                 lambda_recon =
6888                     hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
6889 
6890                 lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
6891 
6892                 for(i = 0; i < 4; i++)
6893                 {
6894                     ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6895 
6896                     for(j = 0; j < 2; j++)
6897                     {
6898                         ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6899                     }
6900                 }
6901                 ps_search_results = &ps_ctxt->s_search_results_64x64;
6902 
6903                 for(j = 0; j < 2; j++)
6904                 {
6905                     ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6906                 }
6907 
6908                 s_common_frm_prms.i4_lamda = lambda_recon;
6909             }
6910             else
6911             {
6912                 lambda_recon = ps_refine_prms->lambda_recon;
6913             }
6914 
6915             /*********************************************************************/
6916             /* replicate the inp buffer at blk or ctb level for each ref id,     */
6917             /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
6918             /* thereby avoiding a bloat up of memory. If we did all references   */
6919             /* weighted pred, we will end up with a duplicate copy of each ref   */
6920             /* at each layer, since we need to preserve the original reference.  */
6921             /* ToDo: Need to observe performance with this mechanism and compare */
6922             /* with case where ref is weighted.                                  */
6923             /*********************************************************************/
6924             fp_get_wt_inp(
6925                 ps_curr_layer,
6926                 &ps_ctxt->s_wt_pred,
6927                 unit_size,
6928                 s_common_frm_prms.i4_ctb_x_off,
6929                 s_common_frm_prms.i4_ctb_y_off,
6930                 unit_size,
6931                 ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
6932                 ps_ctxt->i4_wt_pred_enable_flag);
6933 
6934             if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
6935             {
6936 #if TEMPORAL_NOISE_DETECT
6937                 {
6938                     WORD32 had_block_size = 16;
6939                     WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
6940                                            ? 64
6941                                            : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
6942                     WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
6943                                             ? 64
6944                                             : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
6945                     WORD32 num_pred_dir = i4_num_pred_dir;
6946                     WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
6947                     WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
6948 
6949                     WORD32 i;
6950                     WORD32 noise_detected;
6951                     WORD32 ctb_size;
6952                     WORD32 num_comp_had_blocks;
6953                     WORD32 noisy_block_cnt;
6954                     WORD32 index_8x8_block;
6955                     WORD32 num_8x8_in_ctb_row;
6956 
6957                     WORD32 ht_offset;
6958                     WORD32 wd_offset;
6959                     WORD32 block_ht;
6960                     WORD32 block_wd;
6961 
6962                     WORD32 num_horz_blocks;
6963                     WORD32 num_vert_blocks;
6964 
6965                     WORD32 mean;
6966                     UWORD32 variance_8x8;
6967 
6968                     WORD32 hh_energy_percent;
6969 
6970                     /* variables to hold the constant values. The variable values held are decided by the HAD block size */
6971                     WORD32 min_noisy_block_cnt;
6972                     WORD32 min_coeffs_above_avg;
6973                     WORD32 min_coeff_avg_energy;
6974 
6975                     /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
6976                     WORD32 i4_cu_x_off, i4_cu_y_off;
6977                     WORD32 is_noisy;
6978 
6979                     /* intialise the variables holding the constants */
6980                     if(had_block_size == 8)
6981                     {
6982                         min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8;  //6;//
6983                         min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
6984                         min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
6985                     }
6986                     else
6987                     {
6988                         min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16;  //7;//
6989                         min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
6990                         min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
6991                     }
6992 
6993                     /* initialize the variables */
6994                     noise_detected = 0;
6995                     noisy_block_cnt = 0;
6996                     hh_energy_percent = 0;
6997                     variance_8x8 = 0;
6998                     block_ht = ctb_height;
6999                     block_wd = ctb_width;
7000 
7001                     mean = 0;
7002 
7003                     ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
7004                     num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
7005 
7006                     num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
7007                     num_vert_blocks = block_ht / had_block_size;  //ctb_height / had_block_size;
7008 
7009                     ht_offset = -had_block_size;
7010                     wd_offset = -had_block_size;
7011 
7012                     num_8x8_in_ctb_row = block_wd / 8;  // number of 8x8 in this ctb
7013                     for(i = 0; i < num_comp_had_blocks; i++)
7014                     {
7015                         if(i % num_horz_blocks == 0)
7016                         {
7017                             wd_offset = -had_block_size;
7018                             ht_offset += had_block_size;
7019                         }
7020                         wd_offset += had_block_size;
7021 
7022                         /* CU level offsets */
7023                         i4_cu_x_off = i4_x_off + (i % 4) * 16;  //+ (i % 4) * 16
7024                         i4_cu_y_off = i4_y_off + (i / 4) * 16;
7025 
7026                         /* if 50 % or more of the CU is noisy then the return value is 1 */
7027                         is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7028                             ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7029                             (i % 4) * 16,
7030                             (i / 4) * 16,
7031                             16);
7032 
7033                         /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
7034                         if(is_noisy)
7035                         {
7036                             index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
7037                                               (i % num_horz_blocks) * 2;
7038                             noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
7039                                 16,
7040                                 ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
7041                                     ? 64
7042                                     : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
7043                                 ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
7044                                     ? 64
7045                                     : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
7046                                 ps_ctb_noise_params,
7047                                 &s_srch_cand_init_data,
7048                                 &s_search_prms_blk,
7049                                 ps_ctxt,
7050                                 num_pred_dir,
7051                                 i4_num_act_ref_l0,
7052                                 i4_num_act_ref_l1,
7053                                 i4_cu_x_off,
7054                                 i4_cu_y_off,
7055                                 &ps_ctxt->s_wt_pred,
7056                                 unit_size,
7057                                 index_8x8_block,
7058                                 num_horz_blocks,
7059                                 /*num_8x8_in_ctb_row*/ 8,  // this should be a variable extra
7060                                 i);
7061                         } /* if 16x16 is noisy */
7062                     } /* loop over for all 16x16*/
7063 
7064                     if(noisy_block_cnt >= min_noisy_block_cnt)
7065                     {
7066                         noise_detected = 1;
7067                     }
7068 
7069                     /* write back the noise presence detected for the current CTB to the structure */
7070                     ps_ctb_noise_params->i4_noise_present = noise_detected;
7071                 }
7072 #endif
7073 
7074 #if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
7075                 if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
7076                    ps_ctb_noise_params->i4_noise_present)
7077                 {
7078                     memset(
7079                         ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7080                         1,
7081                         sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7082                 }
7083 #endif
7084 
7085                 for(i = 0; i < 16; i++)
7086                 {
7087                     au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7088                         ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
7089                 }
7090 
7091                 for(i = 0; i < 4; i++)
7092                 {
7093                     au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7094                         ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
7095                 }
7096 
7097                 for(i = 0; i < 1; i++)
7098                 {
7099                     au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7100                         ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
7101                 }
7102 
7103                 if(ps_ctxt->s_frm_prms.bidir_enabled &&
7104                    (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
7105                     MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
7106                 {
7107                     ps_ctb_noise_params->i4_noise_present = 0;
7108                     memset(
7109                         ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7110                         0,
7111                         sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7112                 }
7113 
7114 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7115                 for(i = 0; i < 4; i++)
7116                 {
7117                     S32 j;
7118                     S32 lambda;
7119 
7120                     if(au1_is_32x32Blk_noisy[i])
7121                     {
7122                         lambda = lambda_recon;
7123                         lambda =
7124                             ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7125 
7126                         ps_search_results = &ps_ctxt->as_search_results_32x32[i];
7127 
7128                         for(j = 0; j < 2; j++)
7129                         {
7130                             ps_search_results->as_pred_ctxt[j].lambda = lambda;
7131                         }
7132                     }
7133                 }
7134 
7135                 {
7136                     S32 j;
7137                     S32 lambda;
7138 
7139                     if(au1_is_64x64Blk_noisy[0])
7140                     {
7141                         lambda = lambda_recon;
7142                         lambda =
7143                             ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7144 
7145                         ps_search_results = &ps_ctxt->s_search_results_64x64;
7146 
7147                         for(j = 0; j < 2; j++)
7148                         {
7149                             ps_search_results->as_pred_ctxt[j].lambda = lambda;
7150                         }
7151                     }
7152                 }
7153 #endif
7154                 if(au1_is_64x64Blk_noisy[0])
7155                 {
7156                     U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7157                                                              (s_common_frm_prms.i4_ctb_y_off *
7158                                                               ps_curr_layer->i4_inp_stride));
7159 
7160                     hme_compute_sigmaX_and_sigmaXSquared(
7161                         pu1_inp,
7162                         ps_curr_layer->i4_inp_stride,
7163                         ps_ctxt->au4_4x4_src_sigmaX,
7164                         ps_ctxt->au4_4x4_src_sigmaXSquared,
7165                         4,
7166                         4,
7167                         64,
7168                         64,
7169                         1,
7170                         16);
7171                 }
7172                 else
7173                 {
7174                     for(i = 0; i < 4; i++)
7175                     {
7176                         if(au1_is_32x32Blk_noisy[i])
7177                         {
7178                             U08 *pu1_inp =
7179                                 ps_curr_layer->pu1_inp +
7180                                 (s_common_frm_prms.i4_ctb_x_off +
7181                                  (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
7182 
7183                             U08 u1_cu_size = 32;
7184                             WORD32 i4_inp_buf_offset =
7185                                 (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
7186                                  ((i % 2) * u1_cu_size));
7187 
7188                             U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
7189                             U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
7190                             S32 i4_sigma_arr_offset =
7191                                 (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
7192                                  ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
7193 
7194                             hme_compute_sigmaX_and_sigmaXSquared(
7195                                 pu1_inp + i4_inp_buf_offset,
7196                                 ps_curr_layer->i4_inp_stride,
7197                                 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
7198                                 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
7199                                 4,
7200                                 4,
7201                                 32,
7202                                 32,
7203                                 1,
7204                                 16);
7205                         }
7206                         else
7207                         {
7208                             S32 j;
7209 
7210                             U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
7211                             U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
7212                             S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
7213                                 (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
7214                                  ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
7215 
7216                             for(j = 0; j < 4; j++)
7217                             {
7218                                 U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
7219                                 U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
7220                                 S32 i4_16x16_blk_index_in_ctb =
7221                                     i4_16x16_blk_start_index_in_i_th_32x32_blk +
7222                                     ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
7223                                     ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
7224 
7225                                 //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
7226 
7227                                 if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
7228                                 {
7229                                     U08 *pu1_inp =
7230                                         ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7231                                                                   (s_common_frm_prms.i4_ctb_y_off *
7232                                                                    ps_curr_layer->i4_inp_stride));
7233 
7234                                     U08 u1_cu_size = 16;
7235                                     WORD32 i4_inp_buf_offset =
7236                                         (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
7237                                          ((i4_16x16_blk_index_in_ctb / 4) *
7238                                           (u1_cu_size * ps_curr_layer->i4_inp_stride)));
7239 
7240                                     U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
7241                                     U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
7242                                     S32 i4_sigma_arr_offset =
7243                                         (((i4_16x16_blk_index_in_ctb % 4) *
7244                                           u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
7245                                          ((i4_16x16_blk_index_in_ctb / 4) *
7246                                           u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
7247 
7248                                     hme_compute_sigmaX_and_sigmaXSquared(
7249                                         pu1_inp + i4_inp_buf_offset,
7250                                         ps_curr_layer->i4_inp_stride,
7251                                         (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
7252                                         (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
7253                                         4,
7254                                         4,
7255                                         16,
7256                                         16,
7257                                         1,
7258                                         16);
7259                                 }
7260                             }
7261                         }
7262                     }
7263                 }
7264             }
7265             else
7266             {
7267                 memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
7268 
7269                 memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
7270 
7271                 memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
7272             }
7273 
7274             for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
7275             {
7276                 S32 ref_ctr;
7277                 U08 au1_pred_dir_searched[2];
7278                 U08 u1_is_cu_noisy;
7279                 ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
7280 
7281                 {
7282                     blk_x = (i4_ctb_x << 2) +
7283                             (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
7284                     blk_y = (i4_ctb_y << 2) +
7285                             (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
7286 
7287                     blk_id_in_full_ctb =
7288                         ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
7289                     blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
7290                     ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
7291                     s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
7292                     s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
7293                 }
7294 
7295                 /* get the current input blk point */
7296                 pos_x = blk_x << blk_size_shift;
7297                 pos_y = blk_y << blk_size_shift;
7298                 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
7299 
7300                 /*********************************************************************/
7301                 /* For every blk in the picture, the search range needs to be derived*/
7302                 /* Any blk can have any mv, but practical search constraints are     */
7303                 /* imposed by the picture boundary and amt of padding.               */
7304                 /*********************************************************************/
7305                 /* MV limit is different based on ref. PIC */
7306                 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7307                 {
7308                     if(!s_search_prms_blk.i4_use_rec)
7309                     {
7310                         hme_derive_search_range(
7311                             &as_range_prms_inp[ref_ctr],
7312                             &s_pic_limit_inp,
7313                             &as_mv_limit[ref_ctr],
7314                             pos_x,
7315                             pos_y,
7316                             blk_wd,
7317                             blk_ht);
7318                     }
7319                     else
7320                     {
7321                         hme_derive_search_range(
7322                             &as_range_prms_rec[ref_ctr],
7323                             &s_pic_limit_rec,
7324                             &as_mv_limit[ref_ctr],
7325                             pos_x,
7326                             pos_y,
7327                             blk_wd,
7328                             blk_ht);
7329                     }
7330                 }
7331                 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
7332                 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
7333                 /* Select search results from a suitable search result in the context */
7334                 {
7335                     ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
7336 
7337                     if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
7338                     {
7339                         S32 i;
7340 
7341                         for(i = 0; i < 2; i++)
7342                         {
7343                             ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
7344                         }
7345                     }
7346                 }
7347 
7348                 u1_is_cu_noisy = au1_is_16x16Blk_noisy
7349                     [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
7350 
7351                 s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
7352 
7353 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7354                 if(u1_is_cu_noisy)
7355                 {
7356                     S32 j;
7357                     S32 lambda;
7358 
7359                     lambda = lambda_recon;
7360                     lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7361 
7362                     for(j = 0; j < 2; j++)
7363                     {
7364                         ps_search_results->as_pred_ctxt[j].lambda = lambda;
7365                     }
7366                 }
7367                 else
7368                 {
7369                     S32 j;
7370                     S32 lambda;
7371 
7372                     lambda = lambda_recon;
7373 
7374                     for(j = 0; j < 2; j++)
7375                     {
7376                         ps_search_results->as_pred_ctxt[j].lambda = lambda;
7377                     }
7378                 }
7379 #endif
7380 
7381                 s_search_prms_blk.ps_search_results = ps_search_results;
7382 
7383                 s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
7384                     pu1_inp,
7385                     i4_inp_stride,
7386                     ps_refine_prms->limit_active_partitions,
7387                     ps_ctxt->ps_hme_frm_prms->bidir_enabled,
7388                     ps_ctxt->u1_is_curFrame_a_refFrame,
7389                     blk_8x8_mask,
7390                     e_me_quality_presets);
7391 
7392                 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
7393                 {
7394                     ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
7395                         s_search_prms_blk.i4_part_mask;
7396                 }
7397 
7398                 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
7399                 {
7400                     /* Setting u1_num_active_refs to 2 */
7401                     /* for the sole purpose of the */
7402                     /* function called below */
7403                     ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
7404 
7405                     hme_reset_search_results(
7406                         ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
7407 
7408                     ps_search_results->u1_num_active_ref = i4_num_pred_dir;
7409                 }
7410 
7411                 if(0 == blk_id_in_ctb)
7412                 {
7413                     UWORD8 u1_ctr;
7414                     for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
7415                                               ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
7416                         u1_ctr++)
7417                     {
7418                         WORD32 i4_max_dep_ctb_y;
7419                         WORD32 i4_max_dep_ctb_x;
7420 
7421                         /* Set max mv in ctb units */
7422                         i4_max_mv_x_in_ctb =
7423                             (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7424                             ps_ctxt->log_ctb_size;
7425 
7426                         i4_max_mv_y_in_ctb =
7427                             (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7428                             ps_ctxt->log_ctb_size;
7429                         /********************************************************************/
7430                         /* Set max ctb_x and ctb_y dependency on reference picture          */
7431                         /* Note +1 is due to delayed deblock, SAO, subpel plan dependency   */
7432                         /********************************************************************/
7433                         i4_max_dep_ctb_x = CLIP3(
7434                             (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
7435                             0,
7436                             ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
7437                         i4_max_dep_ctb_y = CLIP3(
7438                             (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
7439                             0,
7440                             ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
7441 
7442                         ihevce_dmgr_map_chk_sync(
7443                             ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
7444                             ps_ctxt->thrd_id,
7445                             i4_ctb_x,
7446                             i4_ctb_y,
7447                             i4_max_mv_x_in_ctb,
7448                             i4_max_mv_y_in_ctb);
7449                     }
7450                 }
7451 
7452                 /* Loop across different Ref IDx */
7453                 for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
7454                 {
7455                     S32 resultid;
7456                     S08 u1_default_ref_id;
7457                     S32 i4_num_srch_cands = 0;
7458                     S32 i4_num_refinement_iterations;
7459                     S32 i4_refine_iter_ctr;
7460 
7461                     if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
7462                        (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
7463                     {
7464                         u1_pred_dir = u1_pred_dir_ctr;
7465                     }
7466                     else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
7467                     {
7468                         u1_pred_dir = 1;
7469                     }
7470 
7471                     u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
7472                                                            : ps_ctxt->ai1_future_list[0];
7473                     au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
7474 
7475                     i4_num_srch_cands = 0;
7476                     resultid = 0;
7477 
7478                     /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
7479                     if(0 == blk_id_in_ctb)
7480                     {
7481                         /*****************************************************************/
7482                         /* Initialize the mv grid with results of neighbours for the next*/
7483                         /* ctb.                                                          */
7484                         /*****************************************************************/
7485                         hme_fill_ctb_neighbour_mvs(
7486                             ps_curr_layer,
7487                             blk_x,
7488                             blk_y,
7489                             aps_mv_grid[u1_pred_dir],
7490                             u1_pred_dir_ctr,
7491                             u1_default_ref_id,
7492                             ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7493                     }
7494 
7495                     s_search_prms_blk.i1_ref_idx = u1_pred_dir;
7496 
7497                     {
7498                         if((blk_id_in_full_ctb % 4) == 0)
7499                         {
7500                             ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
7501                                 .as_pred_ctxt[u1_pred_dir]
7502                                 .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
7503                         }
7504 
7505                         if(blk_id_in_full_ctb == 0)
7506                         {
7507                             ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
7508                         }
7509 
7510                         ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
7511                             !gau1_encode_to_raster_y[blk_id_in_full_ctb];
7512                     }
7513 
7514                     {
7515                         S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
7516                         S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
7517                         U08 u1_is_blk_at_ctb_boundary = !y;
7518 
7519                         s_srch_cand_init_data.u1_is_left_available =
7520                             !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
7521 
7522                         if(u1_is_blk_at_ctb_boundary)
7523                         {
7524                             s_srch_cand_init_data.u1_is_topRight_available = 0;
7525                             s_srch_cand_init_data.u1_is_topLeft_available = 0;
7526                             s_srch_cand_init_data.u1_is_top_available = 0;
7527                         }
7528                         else
7529                         {
7530                             s_srch_cand_init_data.u1_is_topRight_available =
7531                                 gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
7532                             s_srch_cand_init_data.u1_is_top_available = 1;
7533                             s_srch_cand_init_data.u1_is_topLeft_available =
7534                                 s_srch_cand_init_data.u1_is_left_available;
7535                         }
7536                     }
7537 
7538                     s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
7539                     s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
7540                     s_srch_cand_init_data.i4_pos_x = pos_x;
7541                     s_srch_cand_init_data.i4_pos_y = pos_y;
7542                     s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
7543                     s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
7544                     s_srch_cand_init_data.u1_search_candidate_list_index =
7545                         au1_search_candidate_list_index[u1_pred_dir];
7546 
7547                     i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
7548 
7549                     /* Note this block also clips the MV range for all candidates */
7550                     {
7551                         S08 i1_check_for_mult_refs;
7552 
7553                         i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
7554                                                              : (ps_ctxt->num_ref_past > 1);
7555 
7556                         ps_me_optimised_function_list->pf_mv_clipper(
7557                             &s_search_prms_blk,
7558                             i4_num_srch_cands,
7559                             i1_check_for_mult_refs,
7560                             ps_refine_prms->i4_num_steps_fpel_refine,
7561                             ps_refine_prms->i4_num_steps_hpel_refine,
7562                             ps_refine_prms->i4_num_steps_qpel_refine);
7563                     }
7564 
7565 #if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
7566                     i4_num_refinement_iterations =
7567                         ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
7568                             ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
7569                             : 1;
7570 #else
7571                     i4_num_refinement_iterations =
7572                         ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
7573 #endif
7574 
7575 #if ENABLE_EXPLICIT_SEARCH_IN_PQ
7576                     if(e_me_quality_presets == ME_PRISTINE_QUALITY)
7577                     {
7578                         i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
7579                                                                           : i4_num_act_ref_l1;
7580                     }
7581 #endif
7582 
7583                     for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
7584                         i4_refine_iter_ctr++)
7585                     {
7586                         S32 center_x;
7587                         S32 center_y;
7588                         S32 center_ref_idx;
7589 
7590                         S08 *pi1_pred_dir_to_ref_idx =
7591                             (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
7592 
7593                         {
7594                             WORD32 i4_i;
7595 
7596                             for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
7597                             {
7598                                 ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7599                                 ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7600                                 ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
7601                                     MAX_SIGNED_16BIT_VAL;
7602                                 ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
7603                                 ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
7604                                 ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
7605 
7606                                 if(ps_refine_prms->i4_num_results_per_part == 2)
7607                                 {
7608                                     ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
7609                                         MAX_SIGNED_16BIT_VAL;
7610                                     ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
7611                                         MAX_SIGNED_16BIT_VAL;
7612                                     ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
7613                                         MAX_SIGNED_16BIT_VAL;
7614                                     ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
7615                                     ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
7616                                     ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
7617                                 }
7618                             }
7619 
7620                             s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
7621                             s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
7622                         }
7623 
7624                         {
7625                             search_node_t *ps_coloc_node;
7626 
7627                             S32 i = 0;
7628 
7629                             if(i4_num_refinement_iterations > 1)
7630                             {
7631                                 for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
7632                                 {
7633                                     ps_coloc_node =
7634                                         s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
7635                                             .ps_search_node;
7636 
7637                                     if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
7638                                        ps_coloc_node->i1_ref_idx)
7639                                     {
7640                                         break;
7641                                     }
7642                                 }
7643 
7644                                 if(i == ai4_num_coloc_cands[u1_pred_dir])
7645                                 {
7646                                     i = 0;
7647                                 }
7648                             }
7649                             else
7650                             {
7651                                 ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
7652                                                     .ps_search_node;
7653                             }
7654 
7655                             hme_set_mvp_node(
7656                                 ps_search_results,
7657                                 ps_coloc_node,
7658                                 u1_pred_dir,
7659                                 (i4_num_refinement_iterations > 1)
7660                                     ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
7661                                     : u1_default_ref_id);
7662 
7663                             center_x = ps_coloc_node->ps_mv->i2_mvx;
7664                             center_y = ps_coloc_node->ps_mv->i2_mvy;
7665                             center_ref_idx = ps_coloc_node->i1_ref_idx;
7666                         }
7667 
7668                         /* Full-Pel search */
7669                         {
7670                             S32 num_unique_nodes;
7671 
7672                             memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
7673 
7674                             num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
7675                                 as_unique_search_nodes,
7676                                 s_search_prms_blk.ps_search_candts,
7677                                 au4_unique_node_map,
7678                                 pi1_pred_dir_to_ref_idx,
7679                                 i4_num_srch_cands,
7680                                 s_search_prms_blk.i4_num_init_candts,
7681                                 i4_refine_iter_ctr,
7682                                 i4_num_refinement_iterations,
7683                                 i4_num_act_ref_l0,
7684                                 center_ref_idx,
7685                                 center_x,
7686                                 center_y,
7687                                 ps_ctxt->s_frm_prms.bidir_enabled,
7688                                 e_me_quality_presets);
7689 
7690                             /*************************************************************************/
7691                             /* This array stores the ids of the partitions whose                     */
7692                             /* SADs are updated. Since the partitions whose SADs are updated may not */
7693                             /* be in contiguous order, we supply another level of indirection.       */
7694                             /*************************************************************************/
7695                             ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
7696                                 s_search_prms_blk.i4_part_mask,
7697                                 &ps_fullpel_refine_ctxt->ai4_part_id[0]);
7698 
7699                             if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
7700                             {
7701                                 S32 i;
7702                                 /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
7703                                 S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
7704                                                             (s_search_prms_blk.i4_cu_y_off * 4);
7705 
7706                                 for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
7707                                 {
7708                                     S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
7709 
7710                                     hme_compute_final_sigma_of_pu_from_base_blocks(
7711                                         ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
7712                                         ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
7713                                         au8_final_src_sigmaX,
7714                                         au8_final_src_sigmaXSquared,
7715                                         16,
7716                                         4,
7717                                         i4_part_id,
7718                                         16);
7719                                 }
7720 
7721                                 s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7722                                 s_common_frm_prms.pu8_part_src_sigmaXSquared =
7723                                     au8_final_src_sigmaXSquared;
7724 
7725                                 s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7726                                 s_search_prms_blk.pu8_part_src_sigmaXSquared =
7727                                     au8_final_src_sigmaXSquared;
7728                             }
7729 
7730                             if(0 == num_unique_nodes)
7731                             {
7732                                 continue;
7733                             }
7734 
7735                             if(num_unique_nodes >= 2)
7736                             {
7737                                 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7738                                 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
7739                                 if(ps_ctxt->i4_pic_type != IV_P_FRAME)
7740                                 {
7741                                     if(ps_ctxt->i4_temporal_layer == 1)
7742                                     {
7743                                         hme_fullpel_cand_sifter(
7744                                             &s_search_prms_blk,
7745                                             ps_curr_layer,
7746                                             &ps_ctxt->s_wt_pred,
7747                                             ALPHA_FOR_NOISE_TERM_IN_ME,
7748                                             u1_is_cu_noisy,
7749                                             ps_me_optimised_function_list);
7750                                     }
7751                                     else
7752                                     {
7753                                         hme_fullpel_cand_sifter(
7754                                             &s_search_prms_blk,
7755                                             ps_curr_layer,
7756                                             &ps_ctxt->s_wt_pred,
7757                                             ALPHA_FOR_NOISE_TERM_IN_ME,
7758                                             u1_is_cu_noisy,
7759                                             ps_me_optimised_function_list);
7760                                     }
7761                                 }
7762                                 else
7763                                 {
7764                                     hme_fullpel_cand_sifter(
7765                                         &s_search_prms_blk,
7766                                         ps_curr_layer,
7767                                         &ps_ctxt->s_wt_pred,
7768                                         ALPHA_FOR_NOISE_TERM_IN_ME_P,
7769                                         u1_is_cu_noisy,
7770                                         ps_me_optimised_function_list);
7771                                 }
7772                             }
7773 
7774                             s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7775 
7776                             hme_fullpel_refine(
7777                                 ps_refine_prms,
7778                                 &s_search_prms_blk,
7779                                 ps_curr_layer,
7780                                 &ps_ctxt->s_wt_pred,
7781                                 au4_unique_node_map,
7782                                 num_unique_nodes,
7783                                 blk_8x8_mask,
7784                                 center_x,
7785                                 center_y,
7786                                 center_ref_idx,
7787                                 e_me_quality_presets,
7788                                 ps_me_optimised_function_list);
7789                         }
7790 
7791                         /* Sub-Pel search */
7792                         {
7793                             hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7794 
7795                             s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
7796                                 &ps_ctxt->s_buf_mgr,
7797                                 INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
7798                             /* MV limit is different based on ref. PIC */
7799                             for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7800                             {
7801                                 SCALE_RANGE_PRMS(
7802                                     as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
7803                                 SCALE_RANGE_PRMS(
7804                                     as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
7805                             }
7806                             s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
7807                             s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
7808 
7809                             hme_subpel_refine_cu_hs(
7810                                 &s_subpel_prms,
7811                                 ps_curr_layer,
7812                                 ps_search_results,
7813                                 u1_pred_dir,
7814                                 &ps_ctxt->s_wt_pred,
7815                                 blk_8x8_mask,
7816                                 ps_ctxt->ps_func_selector,
7817                                 ps_cmn_utils_optimised_function_list,
7818                                 ps_me_optimised_function_list);
7819                         }
7820                     }
7821                 }
7822                 /* Populate the new PU struct with the results post subpel refinement*/
7823                 {
7824                     inter_cu_results_t *ps_cu_results;
7825                     WORD32 best_inter_cost, intra_cost, posx, posy;
7826 
7827                     UWORD8 intra_8x8_enabled = 0;
7828 
7829                     /*  cost of 16x16 cu parent  */
7830                     WORD32 parent_cost = MAX_32BIT_VAL;
7831 
7832                     /*  cost of 8x8 cu children  */
7833                     /*********************************************************************/
7834                     /* Assuming parent is not split, then we signal 1 bit for this parent*/
7835                     /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
7836                     /* So, 4*lambda is extra for children cost.                          */
7837                     /*********************************************************************/
7838                     WORD32 child_cost = 0;
7839 
7840                     ps_cu_results = ps_search_results->ps_cu_results;
7841 
7842                     /* Initialize the pu_results pointers to the first struct in the stack array */
7843                     ps_pu_results = as_inter_pu_results;
7844 
7845                     hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7846 
7847                     hme_populate_pus(
7848                         ps_thrd_ctxt,
7849                         ps_ctxt,
7850                         &s_subpel_prms,
7851                         ps_search_results,
7852                         ps_cu_results,
7853                         ps_pu_results,
7854                         &(as_pu_results[0][0][0]),
7855                         &s_common_frm_prms,
7856                         &ps_ctxt->s_wt_pred,
7857                         ps_curr_layer,
7858                         au1_pred_dir_searched,
7859                         i4_num_pred_dir);
7860 
7861                     ps_cu_results->i4_inp_offset =
7862                         (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
7863 
7864                     hme_decide_part_types(
7865                         ps_cu_results,
7866                         ps_pu_results,
7867                         &s_common_frm_prms,
7868                         ps_ctxt,
7869                         ps_cmn_utils_optimised_function_list,
7870                         ps_me_optimised_function_list
7871 
7872                     );
7873 
7874                     /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
7875                     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
7876                     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
7877                     {
7878                         WORD32 res_ctr;
7879 
7880                         for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
7881                         {
7882                             WORD32 num_part = 2, part_ctr;
7883                             part_type_results_t *ps_best_results =
7884                                 &ps_cu_results->ps_best_results[res_ctr];
7885 
7886                             if(PRT_2Nx2N == ps_best_results->u1_part_type)
7887                                 num_part = 1;
7888 
7889                             for(part_ctr = 0; part_ctr < num_part; part_ctr++)
7890                             {
7891                                 pu_result_t *ps_pu_results =
7892                                     &ps_best_results->as_pu_results[part_ctr];
7893 
7894                                 ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
7895 
7896                                 hme_update_dynamic_search_params(
7897                                     &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
7898                                          .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
7899                                     ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
7900 
7901                                 /* Sanity Check */
7902                                 ASSERT(
7903                                     ps_pu_results->pu.mv.i1_l0_ref_idx <
7904                                     ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7905 
7906                                 /* No L1 for P Pic. */
7907                                 ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
7908                                 /* No BI for P Pic. */
7909                                 ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
7910                             }
7911                         }
7912                     }
7913 
7914                     /*****************************************************************/
7915                     /* INSERT INTRA RESULTS AT 16x16 LEVEL.                          */
7916                     /*****************************************************************/
7917 
7918 #if DISABLE_INTRA_IN_BPICS
7919                     if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7920                              (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
7921 #endif
7922                     {
7923                         if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
7924                         {
7925                             hme_insert_intra_nodes_post_bipred(
7926                                 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
7927                         }
7928                     }
7929 
7930 #if DISABLE_INTRA_IN_BPICS
7931                     if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7932                        (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
7933                     {
7934                         intra_8x8_enabled = 0;
7935                     }
7936                     else
7937 #endif
7938                     {
7939                         /*TRAQO intra flag updation*/
7940                         if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
7941                         {
7942                             best_inter_cost =
7943                                 ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
7944                             intra_cost =
7945                                 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7946                             /*@16x16 level*/
7947                             posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
7948                                     << 2) >>
7949                                    4;
7950                             posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
7951                                     << 2) >>
7952                                    4;
7953                         }
7954                         else
7955                         {
7956                             best_inter_cost =
7957                                 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7958                             posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
7959                                     << 2) >>
7960                                    3;
7961                             posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
7962                                     << 2) >>
7963                                    3;
7964                         }
7965 
7966                         /* Disable intra16/32/64 flags based on split flags recommended by IPE */
7967                         if(ps_cur_ipe_ctb->u1_split_flag)
7968                         {
7969                             /* Id of the 32x32 block, 16x16 block in a CTB */
7970                             WORD32 i4_32x32_id =
7971                                 (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
7972                             WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
7973                                                  ((ps_cu_results->u1_x_off >> 4) & 0x1);
7974 
7975                             if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
7976                             {
7977                                 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7978                                        .as_intra16_analyse[i4_16x16_id]
7979                                        .b1_split_flag)
7980                                 {
7981                                     intra_8x8_enabled =
7982                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7983                                             .as_intra16_analyse[i4_16x16_id]
7984                                             .as_intra8_analyse[0]
7985                                             .b1_valid_cu;
7986                                     intra_8x8_enabled &=
7987                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7988                                             .as_intra16_analyse[i4_16x16_id]
7989                                             .as_intra8_analyse[1]
7990                                             .b1_valid_cu;
7991                                     intra_8x8_enabled &=
7992                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7993                                             .as_intra16_analyse[i4_16x16_id]
7994                                             .as_intra8_analyse[2]
7995                                             .b1_valid_cu;
7996                                     intra_8x8_enabled &=
7997                                         ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7998                                             .as_intra16_analyse[i4_16x16_id]
7999                                             .as_intra8_analyse[3]
8000                                             .b1_valid_cu;
8001                                 }
8002                             }
8003                         }
8004                     }
8005 
8006                     if(blk_8x8_mask == 0xf)
8007                     {
8008                         parent_cost =
8009                             ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
8010                         ps_search_results->u1_split_flag = 0;
8011                     }
8012                     else
8013                     {
8014                         ps_search_results->u1_split_flag = 1;
8015                     }
8016 
8017                     ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
8018 
8019                     if(s_common_frm_prms.u1_is_cu_noisy)
8020                     {
8021                         intra_8x8_enabled = 0;
8022                     }
8023 
8024                     /* Evalaute 8x8 if NxN part id is enabled */
8025                     if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
8026                     {
8027                         /* Populates the PU's for the 4 8x8's in one call */
8028                         hme_populate_pus_8x8_cu(
8029                             ps_thrd_ctxt,
8030                             ps_ctxt,
8031                             &s_subpel_prms,
8032                             ps_search_results,
8033                             ps_cu_results,
8034                             ps_pu_results,
8035                             &(as_pu_results[0][0][0]),
8036                             &s_common_frm_prms,
8037                             au1_pred_dir_searched,
8038                             i4_num_pred_dir,
8039                             blk_8x8_mask);
8040 
8041                         /* Re-initialize the pu_results pointers to the first struct in the stack array */
8042                         ps_pu_results = as_inter_pu_results;
8043 
8044                         for(i = 0; i < 4; i++)
8045                         {
8046                             if((blk_8x8_mask & (1 << i)))
8047                             {
8048                                 if(ps_cu_results->i4_part_mask)
8049                                 {
8050                                     hme_decide_part_types(
8051                                         ps_cu_results,
8052                                         ps_pu_results,
8053                                         &s_common_frm_prms,
8054                                         ps_ctxt,
8055                                         ps_cmn_utils_optimised_function_list,
8056                                         ps_me_optimised_function_list
8057 
8058                                     );
8059                                 }
8060                                 /*****************************************************************/
8061                                 /* INSERT INTRA RESULTS AT 8x8 LEVEL.                          */
8062                                 /*****************************************************************/
8063 #if DISABLE_INTRA_IN_BPICS
8064                                 if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
8065                                          (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
8066                                           TEMPORAL_LAYER_DISABLE)))
8067 #endif
8068                                 {
8069                                     if(!(DISABLE_INTRA_WHEN_NOISY &&
8070                                          s_common_frm_prms.u1_is_cu_noisy))
8071                                     {
8072                                         hme_insert_intra_nodes_post_bipred(
8073                                             ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
8074                                     }
8075                                 }
8076 
8077                                 child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
8078                             }
8079 
8080                             ps_cu_results++;
8081                             ps_pu_results++;
8082                         }
8083 
8084                         /* Compare 16x16 vs 8x8 cost */
8085                         if(child_cost < parent_cost)
8086                         {
8087                             ps_search_results->best_cu_cost = child_cost;
8088                             ps_search_results->u1_split_flag = 1;
8089                         }
8090                     }
8091                 }
8092 
8093                 hme_update_mv_bank_encode(
8094                     ps_search_results,
8095                     ps_curr_layer->ps_layer_mvbank,
8096                     blk_x,
8097                     blk_y,
8098                     &s_mv_update_prms,
8099                     au1_pred_dir_searched,
8100                     i4_num_act_ref_l0);
8101 
8102                 /*********************************************************************/
8103                 /* Map the best results to an MV Grid. This is a 18x18 grid that is  */
8104                 /* useful for doing things like predictor for cost calculation or    */
8105                 /* also for merge calculations if need be.                           */
8106                 /*********************************************************************/
8107                 hme_map_mvs_to_grid(
8108                     &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
8109             }
8110 
8111             /* Set the CU tree nodes appropriately */
8112             if(e_me_quality_presets != ME_PRISTINE_QUALITY)
8113             {
8114                 WORD32 i, j;
8115 
8116                 for(i = 0; i < 16; i++)
8117                 {
8118                     cur_ctb_cu_tree_t *ps_tree_node =
8119                         ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
8120                     search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
8121 
8122                     switch(i >> 2)
8123                     {
8124                     case 0:
8125                     {
8126                         ps_tree_node = ps_tree_node->ps_child_node_tl;
8127 
8128                         break;
8129                     }
8130                     case 1:
8131                     {
8132                         ps_tree_node = ps_tree_node->ps_child_node_tr;
8133 
8134                         break;
8135                     }
8136                     case 2:
8137                     {
8138                         ps_tree_node = ps_tree_node->ps_child_node_bl;
8139 
8140                         break;
8141                     }
8142                     case 3:
8143                     {
8144                         ps_tree_node = ps_tree_node->ps_child_node_br;
8145 
8146                         break;
8147                     }
8148                     }
8149 
8150                     switch(i % 4)
8151                     {
8152                     case 0:
8153                     {
8154                         ps_tree_node = ps_tree_node->ps_child_node_tl;
8155 
8156                         break;
8157                     }
8158                     case 1:
8159                     {
8160                         ps_tree_node = ps_tree_node->ps_child_node_tr;
8161 
8162                         break;
8163                     }
8164                     case 2:
8165                     {
8166                         ps_tree_node = ps_tree_node->ps_child_node_bl;
8167 
8168                         break;
8169                     }
8170                     case 3:
8171                     {
8172                         ps_tree_node = ps_tree_node->ps_child_node_br;
8173 
8174                         break;
8175                     }
8176                     }
8177 
8178                     if(ai4_blk_8x8_mask[i] == 15)
8179                     {
8180                         if(!ps_results->u1_split_flag)
8181                         {
8182                             ps_tree_node->is_node_valid = 1;
8183                             NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
8184                         }
8185                         else
8186                         {
8187                             ps_tree_node->is_node_valid = 0;
8188                             ENABLE_THE_CHILDREN_NODES(ps_tree_node);
8189                         }
8190                     }
8191                     else
8192                     {
8193                         cur_ctb_cu_tree_t *ps_tree_child;
8194 
8195                         ps_tree_node->is_node_valid = 0;
8196 
8197                         for(j = 0; j < 4; j++)
8198                         {
8199                             switch(j)
8200                             {
8201                             case 0:
8202                             {
8203                                 ps_tree_child = ps_tree_node->ps_child_node_tl;
8204 
8205                                 break;
8206                             }
8207                             case 1:
8208                             {
8209                                 ps_tree_child = ps_tree_node->ps_child_node_tr;
8210 
8211                                 break;
8212                             }
8213                             case 2:
8214                             {
8215                                 ps_tree_child = ps_tree_node->ps_child_node_bl;
8216 
8217                                 break;
8218                             }
8219                             case 3:
8220                             {
8221                                 ps_tree_child = ps_tree_node->ps_child_node_br;
8222 
8223                                 break;
8224                             }
8225                             }
8226 
8227                             ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
8228                         }
8229                     }
8230                 }
8231             }
8232 
8233             if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8234             {
8235                 cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
8236 
8237                 hme_analyse_mv_clustering(
8238                     ps_ctxt->as_search_results_16x16,
8239                     ps_ctxt->as_cu16x16_results,
8240                     ps_ctxt->as_cu8x8_results,
8241                     ps_ctxt->ps_ctb_cluster_info,
8242                     ps_ctxt->ai1_future_list,
8243                     ps_ctxt->ai1_past_list,
8244                     ps_ctxt->s_frm_prms.bidir_enabled,
8245                     e_me_quality_presets);
8246 
8247 #if DISABLE_BLK_MERGE_WHEN_NOISY
8248                 ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
8249                 ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
8250                 ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
8251                 ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
8252                 ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
8253                 ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
8254                 ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
8255                 ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
8256                 ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
8257                 ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
8258 #endif
8259 
8260                 en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
8261                                  (ps_tree->ps_child_node_tr->is_node_valid << 1) |
8262                                  (ps_tree->ps_child_node_bl->is_node_valid << 2) |
8263                                  (ps_tree->ps_child_node_br->is_node_valid << 3);
8264 
8265                 en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
8266                                      (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
8267                                      (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
8268                                      (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
8269                                      (ps_tree->u1_inter_eval_enable << 4);
8270             }
8271             else
8272             {
8273                 en_merge_execution = 0x1f;
8274 
8275 #if DISABLE_BLK_MERGE_WHEN_NOISY
8276                 en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
8277                                  ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
8278                                  ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
8279                                  ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
8280 #endif
8281             }
8282 
8283             /* Re-initialize the pu_results pointers to the first struct in the stack array */
8284             ps_pu_results = as_inter_pu_results;
8285 
8286             {
8287                 WORD32 ref_ctr;
8288 
8289                 s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
8290                 s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
8291 
8292                 /* MV limit is different based on ref. PIC */
8293                 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8294                 {
8295                     SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
8296                     SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
8297                 }
8298 
8299                 e_merge_result = CU_SPLIT;
8300                 merge_count_32x32 = 0;
8301 
8302                 if((en_merge_32x32 & 1) && (en_merge_execution & 1))
8303                 {
8304                     range_prms_t *ps_pic_limit;
8305                     if(s_merge_prms_32x32_tl.i4_use_rec == 1)
8306                     {
8307                         ps_pic_limit = &s_pic_limit_rec;
8308                     }
8309                     else
8310                     {
8311                         ps_pic_limit = &s_pic_limit_inp;
8312                     }
8313                     /* MV limit is different based on ref. PIC */
8314                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8315                     {
8316                         hme_derive_search_range(
8317                             s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8318                             ps_pic_limit,
8319                             &as_mv_limit[ref_ctr],
8320                             i4_ctb_x << 6,
8321                             i4_ctb_y << 6,
8322                             32,
8323                             32);
8324 
8325                         SCALE_RANGE_PRMS_POINTERS(
8326                             s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8327                             s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8328                             2);
8329                     }
8330                     s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
8331                     s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
8332                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
8333 
8334                     e_merge_result = hme_try_merge_high_speed(
8335                         ps_thrd_ctxt,
8336                         ps_ctxt,
8337                         ps_cur_ipe_ctb,
8338                         &s_subpel_prms,
8339                         &s_merge_prms_32x32_tl,
8340                         ps_pu_results,
8341                         &as_pu_results[0][0][0]);
8342 
8343                     if(e_merge_result == CU_MERGED)
8344                     {
8345                         inter_cu_results_t *ps_cu_results =
8346                             s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
8347 
8348                         if(!((ps_cu_results->u1_num_best_results == 1) &&
8349                              (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8350                         {
8351                             hme_map_mvs_to_grid(
8352                                 &aps_mv_grid[0],
8353                                 s_merge_prms_32x32_tl.ps_results_merge,
8354                                 s_merge_prms_32x32_tl.au1_pred_dir_searched,
8355                                 s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
8356                         }
8357 
8358                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8359                         {
8360                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8361                                 .ps_child_node_tl->is_node_valid = 1;
8362                             NULLIFY_THE_CHILDREN_NODES(
8363                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8364                                     .ps_child_node_tl);
8365                         }
8366 
8367                         merge_count_32x32++;
8368                         e_merge_result = CU_SPLIT;
8369                     }
8370                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8371                     {
8372 #if ENABLE_CU_TREE_CULLING
8373                         cur_ctb_cu_tree_t *ps_tree =
8374                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8375 
8376                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8377                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8378                         ENABLE_THE_CHILDREN_NODES(ps_tree);
8379                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8380                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8381                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8382                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8383 #endif
8384                     }
8385                 }
8386                 else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
8387                 {
8388 #if ENABLE_CU_TREE_CULLING
8389                     cur_ctb_cu_tree_t *ps_tree =
8390                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8391 
8392                     ENABLE_THE_CHILDREN_NODES(ps_tree);
8393                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8394                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8395                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8396                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8397 #endif
8398 
8399                     if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
8400                     {
8401                         ps_tree->is_node_valid = 0;
8402                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8403                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8404                     }
8405                 }
8406 
8407                 if((en_merge_32x32 & 2) && (en_merge_execution & 2))
8408                 {
8409                     range_prms_t *ps_pic_limit;
8410                     if(s_merge_prms_32x32_tr.i4_use_rec == 1)
8411                     {
8412                         ps_pic_limit = &s_pic_limit_rec;
8413                     }
8414                     else
8415                     {
8416                         ps_pic_limit = &s_pic_limit_inp;
8417                     }
8418                     /* MV limit is different based on ref. PIC */
8419                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8420                     {
8421                         hme_derive_search_range(
8422                             s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8423                             ps_pic_limit,
8424                             &as_mv_limit[ref_ctr],
8425                             (i4_ctb_x << 6) + 32,
8426                             i4_ctb_y << 6,
8427                             32,
8428                             32);
8429                         SCALE_RANGE_PRMS_POINTERS(
8430                             s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8431                             s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8432                             2);
8433                     }
8434                     s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
8435                     s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
8436                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
8437 
8438                     e_merge_result = hme_try_merge_high_speed(
8439                         ps_thrd_ctxt,
8440                         ps_ctxt,
8441                         ps_cur_ipe_ctb,
8442                         &s_subpel_prms,
8443                         &s_merge_prms_32x32_tr,
8444                         ps_pu_results,
8445                         &as_pu_results[0][0][0]);
8446 
8447                     if(e_merge_result == CU_MERGED)
8448                     {
8449                         inter_cu_results_t *ps_cu_results =
8450                             s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
8451 
8452                         if(!((ps_cu_results->u1_num_best_results == 1) &&
8453                              (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8454                         {
8455                             hme_map_mvs_to_grid(
8456                                 &aps_mv_grid[0],
8457                                 s_merge_prms_32x32_tr.ps_results_merge,
8458                                 s_merge_prms_32x32_tr.au1_pred_dir_searched,
8459                                 s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
8460                         }
8461 
8462                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8463                         {
8464                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8465                                 .ps_child_node_tr->is_node_valid = 1;
8466                             NULLIFY_THE_CHILDREN_NODES(
8467                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8468                                     .ps_child_node_tr);
8469                         }
8470 
8471                         merge_count_32x32++;
8472                         e_merge_result = CU_SPLIT;
8473                     }
8474                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8475                     {
8476 #if ENABLE_CU_TREE_CULLING
8477                         cur_ctb_cu_tree_t *ps_tree =
8478                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8479 
8480                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8481                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8482                         ENABLE_THE_CHILDREN_NODES(ps_tree);
8483                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8484                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8485                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8486                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8487 #endif
8488                     }
8489                 }
8490                 else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
8491                 {
8492 #if ENABLE_CU_TREE_CULLING
8493                     cur_ctb_cu_tree_t *ps_tree =
8494                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8495 
8496                     ENABLE_THE_CHILDREN_NODES(ps_tree);
8497                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8498                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8499                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8500                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8501 #endif
8502 
8503                     if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
8504                     {
8505                         ps_tree->is_node_valid = 0;
8506                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8507                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8508                     }
8509                 }
8510 
8511                 if((en_merge_32x32 & 4) && (en_merge_execution & 4))
8512                 {
8513                     range_prms_t *ps_pic_limit;
8514                     if(s_merge_prms_32x32_bl.i4_use_rec == 1)
8515                     {
8516                         ps_pic_limit = &s_pic_limit_rec;
8517                     }
8518                     else
8519                     {
8520                         ps_pic_limit = &s_pic_limit_inp;
8521                     }
8522                     /* MV limit is different based on ref. PIC */
8523                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8524                     {
8525                         hme_derive_search_range(
8526                             s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8527                             ps_pic_limit,
8528                             &as_mv_limit[ref_ctr],
8529                             i4_ctb_x << 6,
8530                             (i4_ctb_y << 6) + 32,
8531                             32,
8532                             32);
8533                         SCALE_RANGE_PRMS_POINTERS(
8534                             s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8535                             s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8536                             2);
8537                     }
8538                     s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
8539                     s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
8540                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
8541 
8542                     e_merge_result = hme_try_merge_high_speed(
8543                         ps_thrd_ctxt,
8544                         ps_ctxt,
8545                         ps_cur_ipe_ctb,
8546                         &s_subpel_prms,
8547                         &s_merge_prms_32x32_bl,
8548                         ps_pu_results,
8549                         &as_pu_results[0][0][0]);
8550 
8551                     if(e_merge_result == CU_MERGED)
8552                     {
8553                         inter_cu_results_t *ps_cu_results =
8554                             s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
8555 
8556                         if(!((ps_cu_results->u1_num_best_results == 1) &&
8557                              (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8558                         {
8559                             hme_map_mvs_to_grid(
8560                                 &aps_mv_grid[0],
8561                                 s_merge_prms_32x32_bl.ps_results_merge,
8562                                 s_merge_prms_32x32_bl.au1_pred_dir_searched,
8563                                 s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
8564                         }
8565 
8566                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8567                         {
8568                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8569                                 .ps_child_node_bl->is_node_valid = 1;
8570                             NULLIFY_THE_CHILDREN_NODES(
8571                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8572                                     .ps_child_node_bl);
8573                         }
8574 
8575                         merge_count_32x32++;
8576                         e_merge_result = CU_SPLIT;
8577                     }
8578                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8579                     {
8580 #if ENABLE_CU_TREE_CULLING
8581                         cur_ctb_cu_tree_t *ps_tree =
8582                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8583 
8584                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8585                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8586                         ENABLE_THE_CHILDREN_NODES(ps_tree);
8587                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8588                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8589                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8590                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8591 #endif
8592                     }
8593                 }
8594                 else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
8595                 {
8596 #if ENABLE_CU_TREE_CULLING
8597                     cur_ctb_cu_tree_t *ps_tree =
8598                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8599 
8600                     ENABLE_THE_CHILDREN_NODES(ps_tree);
8601                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8602                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8603                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8604                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8605 #endif
8606 
8607                     if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
8608                     {
8609                         ps_tree->is_node_valid = 0;
8610                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8611                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8612                     }
8613                 }
8614 
8615                 if((en_merge_32x32 & 8) && (en_merge_execution & 8))
8616                 {
8617                     range_prms_t *ps_pic_limit;
8618                     if(s_merge_prms_32x32_br.i4_use_rec == 1)
8619                     {
8620                         ps_pic_limit = &s_pic_limit_rec;
8621                     }
8622                     else
8623                     {
8624                         ps_pic_limit = &s_pic_limit_inp;
8625                     }
8626                     /* MV limit is different based on ref. PIC */
8627                     for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8628                     {
8629                         hme_derive_search_range(
8630                             s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8631                             ps_pic_limit,
8632                             &as_mv_limit[ref_ctr],
8633                             (i4_ctb_x << 6) + 32,
8634                             (i4_ctb_y << 6) + 32,
8635                             32,
8636                             32);
8637 
8638                         SCALE_RANGE_PRMS_POINTERS(
8639                             s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8640                             s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8641                             2);
8642                     }
8643                     s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
8644                     s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
8645                     s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
8646 
8647                     e_merge_result = hme_try_merge_high_speed(
8648                         ps_thrd_ctxt,
8649                         ps_ctxt,
8650                         ps_cur_ipe_ctb,
8651                         &s_subpel_prms,
8652                         &s_merge_prms_32x32_br,
8653                         ps_pu_results,
8654                         &as_pu_results[0][0][0]);
8655 
8656                     if(e_merge_result == CU_MERGED)
8657                     {
8658                         /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
8659 
8660                         if(!((ps_cu_results->u1_num_best_results == 1) &&
8661                         (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8662                         {
8663                         hme_map_mvs_to_grid
8664                         (
8665                         &aps_mv_grid[0],
8666                         s_merge_prms_32x32_br.ps_results_merge,
8667                         s_merge_prms_32x32_br.au1_pred_dir_searched,
8668                         s_merge_prms_32x32_br.i4_num_pred_dir_actual
8669                         );
8670                         }*/
8671 
8672                         if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8673                         {
8674                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8675                                 .ps_child_node_br->is_node_valid = 1;
8676                             NULLIFY_THE_CHILDREN_NODES(
8677                                 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8678                                     .ps_child_node_br);
8679                         }
8680 
8681                         merge_count_32x32++;
8682                         e_merge_result = CU_SPLIT;
8683                     }
8684                     else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8685                     {
8686 #if ENABLE_CU_TREE_CULLING
8687                         cur_ctb_cu_tree_t *ps_tree =
8688                             ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8689 
8690                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8691                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8692                         ENABLE_THE_CHILDREN_NODES(ps_tree);
8693                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8694                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8695                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8696                         ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8697 #endif
8698                     }
8699                 }
8700                 else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
8701                 {
8702 #if ENABLE_CU_TREE_CULLING
8703                     cur_ctb_cu_tree_t *ps_tree =
8704                         ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8705 
8706                     ENABLE_THE_CHILDREN_NODES(ps_tree);
8707                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8708                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8709                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8710                     ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8711 #endif
8712 
8713                     if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
8714                     {
8715                         ps_tree->is_node_valid = 0;
8716                         ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8717                         en_merge_execution = (en_merge_execution & (~(1 << 4)));
8718                     }
8719                 }
8720 
8721                 /* Try merging all 32x32 to 64x64 candts */
8722                 if(((en_merge_32x32 & 0xf) == 0xf) &&
8723                    (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
8724                     ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
8725                     if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
8726                          !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
8727                         (e_me_quality_presets != ME_XTREME_SPEED_25)))
8728                     {
8729                         range_prms_t *ps_pic_limit;
8730                         if(s_merge_prms_64x64.i4_use_rec == 1)
8731                         {
8732                             ps_pic_limit = &s_pic_limit_rec;
8733                         }
8734                         else
8735                         {
8736                             ps_pic_limit = &s_pic_limit_inp;
8737                         }
8738                         /* MV limit is different based on ref. PIC */
8739                         for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8740                         {
8741                             hme_derive_search_range(
8742                                 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8743                                 ps_pic_limit,
8744                                 &as_mv_limit[ref_ctr],
8745                                 i4_ctb_x << 6,
8746                                 i4_ctb_y << 6,
8747                                 64,
8748                                 64);
8749 
8750                             SCALE_RANGE_PRMS_POINTERS(
8751                                 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8752                                 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8753                                 2);
8754                         }
8755                         s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
8756                         s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
8757                         s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
8758 
8759                         e_merge_result = hme_try_merge_high_speed(
8760                             ps_thrd_ctxt,
8761                             ps_ctxt,
8762                             ps_cur_ipe_ctb,
8763                             &s_subpel_prms,
8764                             &s_merge_prms_64x64,
8765                             ps_pu_results,
8766                             &as_pu_results[0][0][0]);
8767 
8768                         if((e_merge_result == CU_MERGED) &&
8769                            (ME_PRISTINE_QUALITY != e_me_quality_presets))
8770                         {
8771                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8772                                 .is_node_valid = 1;
8773                             NULLIFY_THE_CHILDREN_NODES(
8774                                 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
8775                         }
8776                         else if(
8777                             (e_merge_result == CU_SPLIT) &&
8778                             (ME_PRISTINE_QUALITY == e_me_quality_presets))
8779                         {
8780                             ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8781                                 .is_node_valid = 0;
8782                         }
8783                     }
8784 
8785                 /*****************************************************************/
8786                 /* UPDATION OF RESULT TO EXTERNAL STRUCTURES                     */
8787                 /*****************************************************************/
8788                 pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
8789 
8790                 {
8791 #ifdef _DEBUG
8792                     S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
8793                                  ? 64
8794                                  : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
8795                     S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
8796                                  ? 64
8797                                  : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
8798                     ASSERT(
8799                         (wd * ht) ==
8800                         ihevce_compute_area_of_valid_cus_in_ctb(
8801                             &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
8802 #endif
8803                 }
8804             }
8805 
8806             /* set the dependency for the corresponding row in enc loop */
8807             ihevce_dmgr_set_row_row_sync(
8808                 pv_dep_mngr_encloop_dep_me,
8809                 (i4_ctb_x + 1),
8810                 i4_ctb_y,
8811                 tile_col_idx /* Col Tile No. */);
8812 
8813             left_ctb_in_diff_tile = 0;
8814         }
8815     }
8816 }
8817 
8818 /**
8819 ********************************************************************************
8820 *  @fn   void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
8821 *                       refine_layer_prms_t *ps_refine_prms)
8822 *
8823 *  @brief  Top level entry point for refinement ME
8824 *
8825 *  @param[in,out]  ps_ctxt: ME Handle
8826 *
8827 *  @param[in]  ps_refine_prms : refinement layer prms
8828 *
8829 *  @return None
8830 ********************************************************************************
8831 */
hme_refine_no_encode(coarse_me_ctxt_t * ps_ctxt,refine_prms_t * ps_refine_prms,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)8832 void hme_refine_no_encode(
8833     coarse_me_ctxt_t *ps_ctxt,
8834     refine_prms_t *ps_refine_prms,
8835     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
8836     S32 lyr_job_type,
8837     WORD32 i4_ping_pong,
8838     void **ppv_dep_mngr_hme_sync)
8839 {
8840     BLK_SIZE_T e_search_blk_size, e_result_blk_size;
8841     ME_QUALITY_PRESETS_T e_me_quality_presets =
8842         ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
8843 
8844     /*************************************************************************/
8845     /* Complexity of search: Low to High                                     */
8846     /*************************************************************************/
8847     SEARCH_COMPLEXITY_T e_search_complexity;
8848 
8849     /*************************************************************************/
8850     /* Config parameter structures for varius ME submodules                  */
8851     /*************************************************************************/
8852     hme_search_prms_t s_search_prms_blk;
8853     mvbank_update_prms_t s_mv_update_prms;
8854 
8855     /*************************************************************************/
8856     /* All types of search candidates for predictor based search.            */
8857     /*************************************************************************/
8858     S32 num_init_candts = 0;
8859     search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
8860     search_node_t as_top_neighbours[4], as_left_neighbours[3];
8861     search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
8862     search_node_t *ps_candt_l, *ps_candt_t;
8863     search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
8864     search_node_t *ps_candt_prj_bl[2];
8865     search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
8866     search_node_t *ps_candt_prj_coloc[2];
8867 
8868     pf_get_wt_inp fp_get_wt_inp;
8869 
8870     search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
8871     U32 au4_unique_node_map[MAP_X_MAX * 2];
8872 
8873     /*EIID */
8874     WORD32 i4_num_inter_wins = 0;  //debug code to find stat of
8875     WORD32 i4_num_comparisions = 0;  //debug code
8876     WORD32 i4_threshold_multiplier;
8877     WORD32 i4_threshold_divider;
8878     WORD32 i4_temporal_layer =
8879         ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
8880 
8881     /*************************************************************************/
8882     /* points ot the search results for the blk level search (8x8/16x16)     */
8883     /*************************************************************************/
8884     search_results_t *ps_search_results;
8885 
8886     /*************************************************************************/
8887     /* Coordinates                                                           */
8888     /*************************************************************************/
8889     S32 blk_x, i4_ctb_x, blk_id_in_ctb;
8890     //S32 i4_ctb_y;
8891     S32 pos_x, pos_y;
8892     S32 blk_id_in_full_ctb;
8893     S32 i4_num_srch_cands;
8894 
8895     S32 blk_y;
8896 
8897     /*************************************************************************/
8898     /* Related to dimensions of block being searched and pic dimensions      */
8899     /*************************************************************************/
8900     S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
8901     S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
8902     S32 num_results_prev_layer;
8903 
8904     /*************************************************************************/
8905     /* Size of a basic unit for this layer. For non encode layers, we search */
8906     /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
8907     /* basic unit size is the ctb size.                                      */
8908     /*************************************************************************/
8909     S32 unit_size;
8910 
8911     /*************************************************************************/
8912     /* Pointers to context in current and coarser layers                     */
8913     /*************************************************************************/
8914     layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
8915 
8916     /*************************************************************************/
8917     /* to store mv range per blk, and picture limit, allowed search range    */
8918     /* range prms in hpel and qpel units as well                             */
8919     /*************************************************************************/
8920     range_prms_t s_range_prms_inp, s_range_prms_rec;
8921     range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
8922     /*************************************************************************/
8923     /* These variables are used to track number of references at different   */
8924     /* stages of ME.                                                         */
8925     /*************************************************************************/
8926     S32 i4_num_ref_fpel, i4_num_ref_before_merge;
8927     S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
8928     S32 lambda_inp = ps_refine_prms->lambda_inp;
8929 
8930     /*************************************************************************/
8931     /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
8932     /* Explicit means it searches on all active ref idx.                     */
8933     /*************************************************************************/
8934     S32 curr_layer_implicit, prev_layer_implicit;
8935 
8936     /*************************************************************************/
8937     /* Variables for loop counts                                             */
8938     /*************************************************************************/
8939     S32 id;
8940     S08 i1_ref_idx;
8941 
8942     /*************************************************************************/
8943     /* Input pointer and stride                                              */
8944     /*************************************************************************/
8945     U08 *pu1_inp;
8946     S32 i4_inp_stride;
8947 
8948     S32 end_of_frame;
8949 
8950     S32 num_sync_units_in_row;
8951 
8952     PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
8953     ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
8954 
8955     /*************************************************************************/
8956     /* Pointers to current and coarse layer are needed for projection */
8957     /* Pointer to prev layer are needed for other candts like coloc   */
8958     /*************************************************************************/
8959     ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
8960 
8961     ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
8962 
8963     num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
8964 
8965     /* Function pointer is selected based on the C vc X86 macro */
8966 
8967     fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
8968                         ->pf_get_wt_inp_8x8;
8969 
8970     i4_inp_stride = ps_curr_layer->i4_inp_stride;
8971     i4_pic_wd = ps_curr_layer->i4_wd;
8972     i4_pic_ht = ps_curr_layer->i4_ht;
8973     e_search_complexity = ps_refine_prms->e_search_complexity;
8974 
8975     end_of_frame = 0;
8976 
8977     /* If the previous layer is non-encode layer, then use dyadic projection */
8978     if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
8979         pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
8980     else
8981         pf_hme_project_coloc_candt = hme_project_coloc_candt;
8982 
8983     /* This points to all the initial candts */
8984     ps_search_candts = &as_search_candts[0];
8985 
8986     {
8987         e_search_blk_size = BLK_8x8;
8988         blk_wd = blk_ht = 8;
8989         blk_size_shift = 3;
8990         s_mv_update_prms.i4_shift = 0;
8991         /*********************************************************************/
8992         /* In case we do not encode this layer, we search 8x8 with or without*/
8993         /* enable 4x4 SAD.                                                   */
8994         /*********************************************************************/
8995         {
8996             S32 i4_mask = (ENABLE_2Nx2N);
8997 
8998             e_result_blk_size = BLK_8x8;
8999             if(ps_refine_prms->i4_enable_4x4_part)
9000             {
9001                 i4_mask |= (ENABLE_NxN);
9002                 e_result_blk_size = BLK_4x4;
9003                 s_mv_update_prms.i4_shift = 1;
9004             }
9005 
9006             s_search_prms_blk.i4_part_mask = i4_mask;
9007         }
9008 
9009         unit_size = blk_wd;
9010         s_search_prms_blk.i4_inp_stride = unit_size;
9011     }
9012 
9013     /* This is required to properly update the layer mv bank */
9014     s_mv_update_prms.e_search_blk_size = e_search_blk_size;
9015     s_search_prms_blk.e_blk_size = e_search_blk_size;
9016 
9017     /*************************************************************************/
9018     /* If current layer is explicit, then the number of ref frames are to    */
9019     /* be same as previous layer. Else it will be 2                          */
9020     /*************************************************************************/
9021     i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
9022     if(ps_refine_prms->explicit_ref)
9023     {
9024         curr_layer_implicit = 0;
9025         i4_num_ref_fpel = i4_num_ref_prev_layer;
9026         /* 100578 : Using same mv cost fun. for all presets. */
9027         s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
9028     }
9029     else
9030     {
9031         i4_num_ref_fpel = 2;
9032         curr_layer_implicit = 1;
9033         {
9034             if(ME_MEDIUM_SPEED > e_me_quality_presets)
9035             {
9036                 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
9037             }
9038             else
9039             {
9040 #if USE_MODIFIED == 1
9041                 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
9042 #else
9043                 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
9044 #endif
9045             }
9046         }
9047     }
9048 
9049     i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
9050     if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
9051            IV_IDR_FRAME ||
9052        ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
9053     {
9054         i4_num_ref_fpel = 1;
9055     }
9056     if(i4_num_ref_prev_layer <= 2)
9057     {
9058         prev_layer_implicit = 1;
9059         curr_layer_implicit = 1;
9060         i4_num_ref_each_dir = 1;
9061     }
9062     else
9063     {
9064         /* It is assumed that we have equal number of references in each dir */
9065         //ASSERT(!(i4_num_ref_prev_layer & 1));
9066         prev_layer_implicit = 0;
9067         i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
9068     }
9069     s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
9070     s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
9071     s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
9072 
9073     /* this can be kept to 1 or 2 */
9074     i4_num_ref_before_merge = 2;
9075     i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
9076 
9077     /* Set up place holders to hold the search nodes of each initial candt */
9078     for(i = 0; i < MAX_INIT_CANDTS; i++)
9079     {
9080         ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
9081         INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
9082     }
9083 
9084     /* redundant, but doing it here since it is used in pred ctxt init */
9085     ps_candt_zeromv = ps_search_candts[0].ps_search_node;
9086     for(i = 0; i < 3; i++)
9087     {
9088         search_node_t *ps_search_node;
9089         ps_search_node = &as_left_neighbours[i];
9090         INIT_SEARCH_NODE(ps_search_node, 0);
9091         ps_search_node = &as_top_neighbours[i];
9092         INIT_SEARCH_NODE(ps_search_node, 0);
9093     }
9094 
9095     INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
9096     /* bottom left node always not available for the blk being searched */
9097     as_left_neighbours[2].u1_is_avail = 0;
9098     /*************************************************************************/
9099     /* Initialize all the search results structure here. We update all the   */
9100     /* search results to default values, and configure things like blk sizes */
9101     /*************************************************************************/
9102     if(ps_refine_prms->i4_encode == 0)
9103     {
9104         S32 pred_lx;
9105         search_results_t *ps_search_results;
9106 
9107         ps_search_results = &ps_ctxt->s_search_results_8x8;
9108         hme_init_search_results(
9109             ps_search_results,
9110             i4_num_ref_fpel,
9111             ps_refine_prms->i4_num_fpel_results,
9112             ps_refine_prms->i4_num_results_per_part,
9113             e_search_blk_size,
9114             0,
9115             0,
9116             &ps_ctxt->au1_is_past[0]);
9117         for(pred_lx = 0; pred_lx < 2; pred_lx++)
9118         {
9119             hme_init_pred_ctxt_no_encode(
9120                 &ps_search_results->as_pred_ctxt[pred_lx],
9121                 ps_search_results,
9122                 &as_top_neighbours[0],
9123                 &as_left_neighbours[0],
9124                 &ps_candt_prj_coloc[0],
9125                 ps_candt_zeromv,
9126                 ps_candt_zeromv,
9127                 pred_lx,
9128                 lambda_inp,
9129                 ps_refine_prms->lambda_q_shift,
9130                 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
9131                 &ps_ctxt->ai2_ref_scf[0]);
9132         }
9133     }
9134 
9135     /*********************************************************************/
9136     /* Initialize the dyn. search range params. for each reference index */
9137     /* in current layer ctxt                                             */
9138     /*********************************************************************/
9139     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
9140     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
9141     {
9142         WORD32 ref_ctr;
9143 
9144         for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
9145         {
9146             INIT_DYN_SEARCH_PRMS(
9147                 &ps_ctxt->s_coarse_dyn_range_prms
9148                      .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
9149                 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
9150         }
9151     }
9152 
9153     /* Next set up initial candidates according to a given set of rules.   */
9154     /* The number of initial candidates affects the quality of ME in the   */
9155     /* case of motion with multiple degrees of freedom. In case of simple  */
9156     /* translational motion, a current and a few causal and non causal     */
9157     /* candts would suffice. More candidates help to cover more complex    */
9158     /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
9159     /* where multiple ref helps etc.                                       */
9160     /* The candidate choice also depends on the following parameters.      */
9161     /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH         */
9162     /* Whether we encode or not, and the type of search across reference   */
9163     /* i.e. the previous layer may have been explicit/implicit and curr    */
9164     /* layer may be explicit/implicit                                      */
9165 
9166     /* 0, 0, L, T, projected coloc best always presnt by default */
9167     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
9168     ps_candt_zeromv = ps_search_candts[id].ps_search_node;
9169     ps_search_candts[id].u1_num_steps_refine = 0;
9170     ps_candt_zeromv->s_mv.i2_mvx = 0;
9171     ps_candt_zeromv->s_mv.i2_mvy = 0;
9172 
9173     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
9174     ps_candt_l = ps_search_candts[id].ps_search_node;
9175     ps_search_candts[id].u1_num_steps_refine = 0;
9176 
9177     /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
9178     /* not at the CTB boundary use the causal T and */
9179     /* not the projected T, although the candidate is */
9180     /* still pointed to by ps_candt_prj_t[0] */
9181     if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9182     {
9183         /* Using Projected top to eliminate sync */
9184         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9185             PROJECTED_TOP0, e_me_quality_presets);
9186         ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
9187         ps_search_candts[id].u1_num_steps_refine = 1;
9188     }
9189     else
9190     {
9191         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9192             SPATIAL_TOP0, e_me_quality_presets);
9193         ps_candt_t = ps_search_candts[id].ps_search_node;
9194         ps_search_candts[id].u1_num_steps_refine = 0;
9195     }
9196 
9197     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9198         PROJECTED_COLOC0, e_me_quality_presets);
9199     ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
9200     ps_search_candts[id].u1_num_steps_refine = 1;
9201 
9202     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9203         PROJECTED_COLOC1, e_me_quality_presets);
9204     ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
9205     ps_search_candts[id].u1_num_steps_refine = 1;
9206 
9207     if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9208     {
9209         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9210             PROJECTED_TOP_RIGHT0, e_me_quality_presets);
9211         ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
9212         ps_search_candts[id].u1_num_steps_refine = 1;
9213 
9214         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9215             PROJECTED_TOP_LEFT0, e_me_quality_presets);
9216         ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
9217         ps_search_candts[id].u1_num_steps_refine = 1;
9218     }
9219     else
9220     {
9221         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9222             SPATIAL_TOP_RIGHT0, e_me_quality_presets);
9223         ps_candt_tr = ps_search_candts[id].ps_search_node;
9224         ps_search_candts[id].u1_num_steps_refine = 0;
9225 
9226         id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9227             SPATIAL_TOP_LEFT0, e_me_quality_presets);
9228         ps_candt_tl = ps_search_candts[id].ps_search_node;
9229         ps_search_candts[id].u1_num_steps_refine = 0;
9230     }
9231 
9232     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9233         PROJECTED_RIGHT0, e_me_quality_presets);
9234     ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
9235     ps_search_candts[id].u1_num_steps_refine = 1;
9236 
9237     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9238         PROJECTED_BOTTOM0, e_me_quality_presets);
9239     ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
9240     ps_search_candts[id].u1_num_steps_refine = 1;
9241 
9242     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9243         PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
9244     ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
9245     ps_search_candts[id].u1_num_steps_refine = 1;
9246 
9247     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9248         PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
9249     ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
9250     ps_search_candts[id].u1_num_steps_refine = 1;
9251 
9252     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9253         PROJECTED_RIGHT1, e_me_quality_presets);
9254     ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
9255     ps_search_candts[id].u1_num_steps_refine = 1;
9256 
9257     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9258         PROJECTED_BOTTOM1, e_me_quality_presets);
9259     ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
9260     ps_search_candts[id].u1_num_steps_refine = 1;
9261 
9262     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9263         PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
9264     ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
9265     ps_search_candts[id].u1_num_steps_refine = 1;
9266 
9267     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9268         PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
9269     ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
9270     ps_search_candts[id].u1_num_steps_refine = 1;
9271 
9272     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
9273     ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
9274     ps_search_candts[id].u1_num_steps_refine = 1;
9275 
9276     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9277         PROJECTED_TOP_RIGHT1, e_me_quality_presets);
9278     ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
9279     ps_search_candts[id].u1_num_steps_refine = 1;
9280 
9281     id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9282         PROJECTED_TOP_LEFT1, e_me_quality_presets);
9283     ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
9284     ps_search_candts[id].u1_num_steps_refine = 1;
9285 
9286     /*************************************************************************/
9287     /* Now that the candidates have been ordered, to choose the right number */
9288     /* of initial candidates.                                                */
9289     /*************************************************************************/
9290     if(curr_layer_implicit && !prev_layer_implicit)
9291     {
9292         if(e_search_complexity == SEARCH_CX_LOW)
9293             num_init_candts = 7;
9294         else if(e_search_complexity == SEARCH_CX_MED)
9295             num_init_candts = 13;
9296         else if(e_search_complexity == SEARCH_CX_HIGH)
9297             num_init_candts = 18;
9298         else
9299             ASSERT(0);
9300     }
9301     else
9302     {
9303         if(e_search_complexity == SEARCH_CX_LOW)
9304             num_init_candts = 5;
9305         else if(e_search_complexity == SEARCH_CX_MED)
9306             num_init_candts = 11;
9307         else if(e_search_complexity == SEARCH_CX_HIGH)
9308             num_init_candts = 16;
9309         else
9310             ASSERT(0);
9311     }
9312 
9313     if(ME_XTREME_SPEED_25 == e_me_quality_presets)
9314     {
9315         num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
9316     }
9317 
9318     /*************************************************************************/
9319     /* The following search parameters are fixed throughout the search across*/
9320     /* all blks. So these are configured outside processing loop             */
9321     /*************************************************************************/
9322     s_search_prms_blk.i4_num_init_candts = num_init_candts;
9323     s_search_prms_blk.i4_start_step = 1;
9324     s_search_prms_blk.i4_use_satd = 0;
9325     s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
9326     /* we use recon only for encoded layers, otherwise it is not available */
9327     s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
9328 
9329     s_search_prms_blk.ps_search_candts = ps_search_candts;
9330     /* We use the same mv_range for all ref. pic. So assign to member 0 */
9331     if(s_search_prms_blk.i4_use_rec)
9332         s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
9333     else
9334         s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
9335     /*************************************************************************/
9336     /* Initialize coordinates. Meaning as follows                            */
9337     /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
9338     /* blk_y : same as above, y coord.                                       */
9339     /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
9340     /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
9341     /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
9342     /* corner of the picture. Always multiple of 64.                         */
9343     /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
9344     /*************************************************************************/
9345     blk_y = 0;
9346     blk_id_in_ctb = 0;
9347 
9348     GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
9349 
9350     /* Get the number of sync units in a row based on encode/non enocde layer */
9351     num_sync_units_in_row = num_blks_in_row;
9352 
9353     /*************************************************************************/
9354     /* Picture limit on all 4 sides. This will be used to set mv limits for  */
9355     /* every block given its coordinate. Note thsi assumes that the min amt  */
9356     /* of padding to right of pic is equal to the blk size. If we go all the */
9357     /* way upto 64x64, then the min padding on right size of picture should  */
9358     /* be 64, and also on bottom side of picture.                            */
9359     /*************************************************************************/
9360     SET_PIC_LIMIT(
9361         s_pic_limit_inp,
9362         ps_curr_layer->i4_pad_x_inp,
9363         ps_curr_layer->i4_pad_y_inp,
9364         ps_curr_layer->i4_wd,
9365         ps_curr_layer->i4_ht,
9366         s_search_prms_blk.i4_num_steps_post_refine);
9367 
9368     SET_PIC_LIMIT(
9369         s_pic_limit_rec,
9370         ps_curr_layer->i4_pad_x_rec,
9371         ps_curr_layer->i4_pad_y_rec,
9372         ps_curr_layer->i4_wd,
9373         ps_curr_layer->i4_ht,
9374         s_search_prms_blk.i4_num_steps_post_refine);
9375 
9376     /*************************************************************************/
9377     /* set the MV limit per ref. pic.                                        */
9378     /*    - P pic. : Based on the config params.                             */
9379     /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
9380     /*************************************************************************/
9381     {
9382         WORD32 ref_ctr;
9383         /* Only for B/b pic. */
9384         if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
9385         {
9386             WORD16 i2_mv_y_per_poc, i2_max_mv_y;
9387             WORD32 cur_poc, ref_poc, abs_poc_diff;
9388 
9389             cur_poc = ps_ctxt->i4_curr_poc;
9390 
9391             /* Get abs MAX for symmetric search */
9392             i2_mv_y_per_poc = MAX(
9393                 ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
9394                 (ABS(ps_ctxt->s_coarse_dyn_range_prms
9395                          .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
9396 
9397             for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9398             {
9399                 ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
9400                 abs_poc_diff = ABS((cur_poc - ref_poc));
9401                 /* Get the cur. max MV based on POC distance */
9402                 i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
9403                 i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
9404 
9405                 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9406                 as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
9407                 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9408                 as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
9409             }
9410         }
9411         else
9412         {
9413             /* Set the Config. File Params for P pic. */
9414             for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9415             {
9416                 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9417                 as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
9418                 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9419                 as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
9420             }
9421         }
9422     }
9423 
9424     /* EIID: Calculate threshold based on quality preset and/or temporal layers */
9425     if(e_me_quality_presets == ME_MEDIUM_SPEED)
9426     {
9427         i4_threshold_multiplier = 1;
9428         i4_threshold_divider = 4;
9429     }
9430     else if(e_me_quality_presets == ME_HIGH_SPEED)
9431     {
9432         i4_threshold_multiplier = 1;
9433         i4_threshold_divider = 2;
9434     }
9435     else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
9436     {
9437 #if OLD_XTREME_SPEED
9438         /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
9439         i4_temporal_layer = 1;
9440 #endif
9441         if(i4_temporal_layer == 0)
9442         {
9443             i4_threshold_multiplier = 3;
9444             i4_threshold_divider = 4;
9445         }
9446         else if(i4_temporal_layer == 1)
9447         {
9448             i4_threshold_multiplier = 3;
9449             i4_threshold_divider = 4;
9450         }
9451         else if(i4_temporal_layer == 2)
9452         {
9453             i4_threshold_multiplier = 1;
9454             i4_threshold_divider = 1;
9455         }
9456         else
9457         {
9458             i4_threshold_multiplier = 5;
9459             i4_threshold_divider = 4;
9460         }
9461     }
9462     else if(e_me_quality_presets == ME_HIGH_QUALITY)
9463     {
9464         i4_threshold_multiplier = 1;
9465         i4_threshold_divider = 1;
9466     }
9467 
9468     /*************************************************************************/
9469     /*************************************************************************/
9470     /*************************************************************************/
9471     /* START OF THE CORE LOOP                                                */
9472     /* If Encode is 0, then we just loop over each blk                       */
9473     /*************************************************************************/
9474     /*************************************************************************/
9475     /*************************************************************************/
9476     while(0 == end_of_frame)
9477     {
9478         job_queue_t *ps_job;
9479         ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row;  //EIID
9480         WORD32 i4_ctb_row_ctr;  //counter to calculate CTB row counter. It's (row_ctr /4)
9481         WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4;  //calculations verified for L1 only
9482         //+3 to get ceil values when divided by 4
9483         WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
9484             8 * 8;  //considering CTB size 32x32 at L1. hardcoded for now
9485         //if there is variable for ctb size use that and this variable can be derived
9486         WORD32 offset_val, check_dep_pos, set_dep_pos;
9487         void *pv_hme_dep_mngr;
9488         ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
9489 
9490         /* Get the current layer HME Dep Mngr       */
9491         /* Note : Use layer_id - 1 in HME layers    */
9492 
9493         pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
9494 
9495         /* Get the current row from the job queue */
9496         ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
9497             ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
9498 
9499         /* If all rows are done, set the end of process flag to 1, */
9500         /* and the current row to -1 */
9501         if(NULL == ps_job)
9502         {
9503             blk_y = -1;
9504             end_of_frame = 1;
9505 
9506             continue;
9507         }
9508 
9509         if(1 == ps_ctxt->s_frm_prms.is_i_pic)
9510         {
9511             /* set the output dependency of current row */
9512             ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
9513             continue;
9514         }
9515 
9516         blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
9517         blk_x = 0;
9518         i4_ctb_x = 0;
9519 
9520         /* wait for Corresponding Pre intra Job to be completed */
9521         if(1 == ps_refine_prms->i4_layer_id)
9522         {
9523             volatile UWORD32 i4_l1_done;
9524             volatile UWORD32 *pi4_l1_done;
9525             pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
9526                               ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
9527             i4_l1_done = *pi4_l1_done;
9528             while(!i4_l1_done)
9529             {
9530                 i4_l1_done = *pi4_l1_done;
9531             }
9532         }
9533         /* Set Variables for Dep. Checking and Setting */
9534         set_dep_pos = blk_y + 1;
9535         if(blk_y > 0)
9536         {
9537             offset_val = 2;
9538             check_dep_pos = blk_y - 1;
9539         }
9540         else
9541         {
9542             /* First row should run without waiting */
9543             offset_val = -1;
9544             check_dep_pos = 0;
9545         }
9546 
9547         /* EIID: calculate ed_blk_ctxt pointer for current row */
9548         /* valid for only layer-1. not varified and used for other layers */
9549         i4_ctb_row_ctr = blk_y / 4;
9550         ps_ed_blk_ctxt_curr_row =
9551             ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
9552                                   i4_num_4x4_blocks_in_ctb_at_l1);  //valid for L1 only
9553         ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
9554 
9555         /* if non-encode layer then i4_ctb_x will be same as blk_x */
9556         /* loop over all the units is a row                        */
9557         for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
9558         {
9559             ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb;  //EIDD
9560             ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
9561             WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
9562 
9563             /* Wait till top row block is processed   */
9564             /* Currently checking till top right block*/
9565 
9566             /* Disabled since all candidates, except for */
9567             /* L and C, are projected from the coarser layer, */
9568             /* only in ME_HIGH_SPEED mode */
9569             if((ME_MEDIUM_SPEED > e_me_quality_presets))
9570             {
9571                 if(i4_ctb_x < (num_sync_units_in_row - 1))
9572                 {
9573                     ihevce_dmgr_chk_row_row_sync(
9574                         pv_hme_dep_mngr,
9575                         i4_ctb_x,
9576                         offset_val,
9577                         check_dep_pos,
9578                         0, /* Col Tile No. : Not supported in PreEnc*/
9579                         ps_ctxt->thrd_id);
9580                 }
9581             }
9582 
9583             {
9584                 /* for non encoder layer only one block is processed */
9585                 num_blks_in_this_ctb = 1;
9586             }
9587 
9588             /* EIID: derive ed_ctxt ptr for current CTB */
9589             ps_ed_blk_ctxt_curr_ctb =
9590                 ps_ed_blk_ctxt_curr_row +
9591                 (i4_ctb_blk_ctr *
9592                  i4_num_4x4_blocks_in_ctb_at_l1);  //currently valid for l1 layer only
9593             ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
9594 
9595             /* loop over all the blocks in CTB will always be 1 */
9596             for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
9597             {
9598                 {
9599                     /* non encode layer */
9600                     blk_x = i4_ctb_x;
9601                     blk_id_in_full_ctb = 0;
9602                     s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
9603                 }
9604 
9605                 /* get the current input blk point */
9606                 pos_x = blk_x << blk_size_shift;
9607                 pos_y = blk_y << blk_size_shift;
9608                 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
9609 
9610                 /*********************************************************************/
9611                 /* replicate the inp buffer at blk or ctb level for each ref id,     */
9612                 /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
9613                 /* thereby avoiding a bloat up of memory. If we did all references   */
9614                 /* weighted pred, we will end up with a duplicate copy of each ref   */
9615                 /* at each layer, since we need to preserve the original reference.  */
9616                 /* ToDo: Need to observe performance with this mechanism and compare */
9617                 /* with case where ref is weighted.                                  */
9618                 /*********************************************************************/
9619                 if(blk_id_in_ctb == 0)
9620                 {
9621                     fp_get_wt_inp(
9622                         ps_curr_layer,
9623                         &ps_ctxt->s_wt_pred,
9624                         unit_size,
9625                         pos_x,
9626                         pos_y,
9627                         unit_size,
9628                         ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
9629                         ps_ctxt->i4_wt_pred_enable_flag);
9630                 }
9631 
9632                 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
9633                 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
9634                 /* Select search results from a suitable search result in the context */
9635                 {
9636                     ps_search_results = &ps_ctxt->s_search_results_8x8;
9637                 }
9638 
9639                 s_search_prms_blk.ps_search_results = ps_search_results;
9640 
9641                 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
9642                 hme_reset_search_results(
9643                     ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
9644 
9645                 /* Loop across different Ref IDx */
9646                 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
9647                 {
9648                     S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
9649                     S32 prev_blk_offset = 6;
9650                     S32 resultid;
9651 
9652                     /*********************************************************************/
9653                     /* For every blk in the picture, the search range needs to be derived*/
9654                     /* Any blk can have any mv, but practical search constraints are     */
9655                     /* imposed by the picture boundary and amt of padding.               */
9656                     /*********************************************************************/
9657                     /* MV limit is different based on ref. PIC */
9658                     hme_derive_search_range(
9659                         &s_range_prms_inp,
9660                         &s_pic_limit_inp,
9661                         &as_mv_limit[i1_ref_idx],
9662                         pos_x,
9663                         pos_y,
9664                         blk_wd,
9665                         blk_ht);
9666                     hme_derive_search_range(
9667                         &s_range_prms_rec,
9668                         &s_pic_limit_rec,
9669                         &as_mv_limit[i1_ref_idx],
9670                         pos_x,
9671                         pos_y,
9672                         blk_wd,
9673                         blk_ht);
9674 
9675                     s_search_prms_blk.i1_ref_idx = i1_ref_idx;
9676                     ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
9677 
9678                     i4_num_srch_cands = 1;
9679 
9680                     if(1 != ps_refine_prms->i4_layer_id)
9681                     {
9682                         S32 x, y;
9683                         x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9684                         y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9685 
9686                         if(ME_MEDIUM_SPEED > e_me_quality_presets)
9687                         {
9688                             hme_get_spatial_candt(
9689                                 ps_curr_layer,
9690                                 e_search_blk_size,
9691                                 blk_x,
9692                                 blk_y,
9693                                 i1_ref_idx,
9694                                 &as_top_neighbours[0],
9695                                 &as_left_neighbours[0],
9696                                 0,
9697                                 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9698                                 0,
9699                                 ps_refine_prms->i4_encode);
9700 
9701                             *ps_candt_tr = as_top_neighbours[3];
9702                             *ps_candt_t = as_top_neighbours[1];
9703                             *ps_candt_tl = as_top_neighbours[0];
9704                             i4_num_srch_cands += 3;
9705                         }
9706                         else
9707                         {
9708                             layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9709                             S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9710                             S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9711                             search_node_t *ps_search_node;
9712                             S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9713                             hme_mv_t *ps_mv, *ps_mv_base;
9714                             S08 *pi1_ref_idx, *pi1_ref_idx_base;
9715                             S32 jump = 1, mvs_in_blk, mvs_in_row;
9716                             S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9717 
9718                             if(i4_blk_size1 != i4_blk_size2)
9719                             {
9720                                 blk_x_temp <<= 1;
9721                                 blk_y_temp <<= 1;
9722                                 jump = 2;
9723                                 if((i4_blk_size1 << 2) == i4_blk_size2)
9724                                 {
9725                                     blk_x_temp <<= 1;
9726                                     blk_y_temp <<= 1;
9727                                     jump = 4;
9728                                 }
9729                             }
9730 
9731                             mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9732                             mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9733 
9734                             /* Adjust teh blk coord to point to top left locn */
9735                             blk_x_temp -= 1;
9736                             blk_y_temp -= 1;
9737 
9738                             /* Pick up the mvs from the location */
9739                             i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9740                             i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9741 
9742                             ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9743                             pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9744 
9745                             ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9746                             pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9747 
9748                             ps_mv_base = ps_mv;
9749                             pi1_ref_idx_base = pi1_ref_idx;
9750 
9751                             ps_search_node = &as_left_neighbours[0];
9752                             ps_mv = ps_mv_base + mvs_in_row;
9753                             pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9754                             COPY_MV_TO_SEARCH_NODE(
9755                                 ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
9756 
9757                             i4_num_srch_cands++;
9758                         }
9759                     }
9760                     else
9761                     {
9762                         S32 x, y;
9763                         x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9764                         y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9765 
9766                         if(ME_MEDIUM_SPEED > e_me_quality_presets)
9767                         {
9768                             hme_get_spatial_candt_in_l1_me(
9769                                 ps_curr_layer,
9770                                 e_search_blk_size,
9771                                 blk_x,
9772                                 blk_y,
9773                                 i1_ref_idx,
9774                                 !ps_search_results->pu1_is_past[i1_ref_idx],
9775                                 &as_top_neighbours[0],
9776                                 &as_left_neighbours[0],
9777                                 0,
9778                                 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9779                                 0,
9780                                 ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
9781                                 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
9782 
9783                             *ps_candt_tr = as_top_neighbours[3];
9784                             *ps_candt_t = as_top_neighbours[1];
9785                             *ps_candt_tl = as_top_neighbours[0];
9786 
9787                             i4_num_srch_cands += 3;
9788                         }
9789                         else
9790                         {
9791                             layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9792                             S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9793                             S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9794                             S32 i4_mv_pos_in_implicit_array;
9795                             search_node_t *ps_search_node;
9796                             S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9797                             hme_mv_t *ps_mv, *ps_mv_base;
9798                             S08 *pi1_ref_idx, *pi1_ref_idx_base;
9799                             S32 jump = 1, mvs_in_blk, mvs_in_row;
9800                             S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9801                             U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
9802                             S32 i4_num_results_in_given_dir =
9803                                 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9804                                                        ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
9805                                                     : (ps_layer_mvbank->i4_num_mvs_per_ref *
9806                                                        ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
9807 
9808                             if(i4_blk_size1 != i4_blk_size2)
9809                             {
9810                                 blk_x_temp <<= 1;
9811                                 blk_y_temp <<= 1;
9812                                 jump = 2;
9813                                 if((i4_blk_size1 << 2) == i4_blk_size2)
9814                                 {
9815                                     blk_x_temp <<= 1;
9816                                     blk_y_temp <<= 1;
9817                                     jump = 4;
9818                                 }
9819                             }
9820 
9821                             mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9822                             mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9823 
9824                             /* Adjust teh blk coord to point to top left locn */
9825                             blk_x_temp -= 1;
9826                             blk_y_temp -= 1;
9827 
9828                             /* Pick up the mvs from the location */
9829                             i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9830                             i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9831 
9832                             i4_offset +=
9833                                 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9834                                                        ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
9835                                                     : 0);
9836 
9837                             ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9838                             pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9839 
9840                             ps_mv_base = ps_mv;
9841                             pi1_ref_idx_base = pi1_ref_idx;
9842 
9843                             {
9844                                 /* ps_mv and pi1_ref_idx now point to the top left locn */
9845                                 ps_search_node = &as_left_neighbours[0];
9846                                 ps_mv = ps_mv_base + mvs_in_row;
9847                                 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9848 
9849                                 i4_mv_pos_in_implicit_array =
9850                                     hme_find_pos_of_implicitly_stored_ref_id(
9851                                         pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
9852 
9853                                 if(-1 != i4_mv_pos_in_implicit_array)
9854                                 {
9855                                     COPY_MV_TO_SEARCH_NODE(
9856                                         ps_search_node,
9857                                         &ps_mv[i4_mv_pos_in_implicit_array],
9858                                         &pi1_ref_idx[i4_mv_pos_in_implicit_array],
9859                                         i1_ref_idx,
9860                                         shift);
9861                                 }
9862                                 else
9863                                 {
9864                                     ps_search_node->u1_is_avail = 0;
9865                                     ps_search_node->s_mv.i2_mvx = 0;
9866                                     ps_search_node->s_mv.i2_mvy = 0;
9867                                     ps_search_node->i1_ref_idx = i1_ref_idx;
9868                                 }
9869 
9870                                 i4_num_srch_cands++;
9871                             }
9872                         }
9873                     }
9874 
9875                     *ps_candt_l = as_left_neighbours[0];
9876 
9877                     /* when 16x16 is searched in an encode layer, and the prev layer */
9878                     /* stores results for 4x4 blks, we project 5 candts corresponding */
9879                     /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
9880                     /* However in other cases, only 2,2 best and 2nd best reqd */
9881                     resultid = 0;
9882                     pf_hme_project_coloc_candt(
9883                         ps_candt_prj_coloc[0],
9884                         ps_curr_layer,
9885                         ps_coarse_layer,
9886                         pos_x + 2,
9887                         pos_y + 2,
9888                         i1_ref_idx,
9889                         resultid);
9890 
9891                     i4_num_srch_cands++;
9892 
9893                     resultid = 1;
9894                     if(num_results_prev_layer > 1)
9895                     {
9896                         pf_hme_project_coloc_candt(
9897                             ps_candt_prj_coloc[1],
9898                             ps_curr_layer,
9899                             ps_coarse_layer,
9900                             pos_x + 2,
9901                             pos_y + 2,
9902                             i1_ref_idx,
9903                             resultid);
9904 
9905                         i4_num_srch_cands++;
9906                     }
9907 
9908                     resultid = 0;
9909 
9910                     if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9911                     {
9912                         pf_hme_project_coloc_candt(
9913                             ps_candt_prj_t[0],
9914                             ps_curr_layer,
9915                             ps_coarse_layer,
9916                             pos_x,
9917                             pos_y - prev_blk_offset,
9918                             i1_ref_idx,
9919                             resultid);
9920 
9921                         i4_num_srch_cands++;
9922                     }
9923 
9924                     {
9925                         pf_hme_project_coloc_candt(
9926                             ps_candt_prj_br[0],
9927                             ps_curr_layer,
9928                             ps_coarse_layer,
9929                             pos_x + next_blk_offset,
9930                             pos_y + next_blk_offset,
9931                             i1_ref_idx,
9932                             resultid);
9933                         pf_hme_project_coloc_candt(
9934                             ps_candt_prj_bl[0],
9935                             ps_curr_layer,
9936                             ps_coarse_layer,
9937                             pos_x - prev_blk_offset,
9938                             pos_y + next_blk_offset,
9939                             i1_ref_idx,
9940                             resultid);
9941                         pf_hme_project_coloc_candt(
9942                             ps_candt_prj_r[0],
9943                             ps_curr_layer,
9944                             ps_coarse_layer,
9945                             pos_x + next_blk_offset,
9946                             pos_y,
9947                             i1_ref_idx,
9948                             resultid);
9949                         pf_hme_project_coloc_candt(
9950                             ps_candt_prj_b[0],
9951                             ps_curr_layer,
9952                             ps_coarse_layer,
9953                             pos_x,
9954                             pos_y + next_blk_offset,
9955                             i1_ref_idx,
9956                             resultid);
9957 
9958                         i4_num_srch_cands += 4;
9959 
9960                         if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9961                         {
9962                             pf_hme_project_coloc_candt(
9963                                 ps_candt_prj_tr[0],
9964                                 ps_curr_layer,
9965                                 ps_coarse_layer,
9966                                 pos_x + next_blk_offset,
9967                                 pos_y - prev_blk_offset,
9968                                 i1_ref_idx,
9969                                 resultid);
9970                             pf_hme_project_coloc_candt(
9971                                 ps_candt_prj_tl[0],
9972                                 ps_curr_layer,
9973                                 ps_coarse_layer,
9974                                 pos_x - prev_blk_offset,
9975                                 pos_y - prev_blk_offset,
9976                                 i1_ref_idx,
9977                                 resultid);
9978 
9979                             i4_num_srch_cands += 2;
9980                         }
9981                     }
9982                     if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
9983                     {
9984                         resultid = 1;
9985                         pf_hme_project_coloc_candt(
9986                             ps_candt_prj_br[1],
9987                             ps_curr_layer,
9988                             ps_coarse_layer,
9989                             pos_x + next_blk_offset,
9990                             pos_y + next_blk_offset,
9991                             i1_ref_idx,
9992                             resultid);
9993                         pf_hme_project_coloc_candt(
9994                             ps_candt_prj_bl[1],
9995                             ps_curr_layer,
9996                             ps_coarse_layer,
9997                             pos_x - prev_blk_offset,
9998                             pos_y + next_blk_offset,
9999                             i1_ref_idx,
10000                             resultid);
10001                         pf_hme_project_coloc_candt(
10002                             ps_candt_prj_r[1],
10003                             ps_curr_layer,
10004                             ps_coarse_layer,
10005                             pos_x + next_blk_offset,
10006                             pos_y,
10007                             i1_ref_idx,
10008                             resultid);
10009                         pf_hme_project_coloc_candt(
10010                             ps_candt_prj_b[1],
10011                             ps_curr_layer,
10012                             ps_coarse_layer,
10013                             pos_x,
10014                             pos_y + next_blk_offset,
10015                             i1_ref_idx,
10016                             resultid);
10017 
10018                         i4_num_srch_cands += 4;
10019 
10020                         pf_hme_project_coloc_candt(
10021                             ps_candt_prj_tr[1],
10022                             ps_curr_layer,
10023                             ps_coarse_layer,
10024                             pos_x + next_blk_offset,
10025                             pos_y - prev_blk_offset,
10026                             i1_ref_idx,
10027                             resultid);
10028                         pf_hme_project_coloc_candt(
10029                             ps_candt_prj_tl[1],
10030                             ps_curr_layer,
10031                             ps_coarse_layer,
10032                             pos_x - prev_blk_offset,
10033                             pos_y - prev_blk_offset,
10034                             i1_ref_idx,
10035                             resultid);
10036                         pf_hme_project_coloc_candt(
10037                             ps_candt_prj_t[1],
10038                             ps_curr_layer,
10039                             ps_coarse_layer,
10040                             pos_x,
10041                             pos_y - prev_blk_offset,
10042                             i1_ref_idx,
10043                             resultid);
10044 
10045                         i4_num_srch_cands += 3;
10046                     }
10047 
10048                     /* Note this block also clips the MV range for all candidates */
10049 #ifdef _DEBUG
10050                     {
10051                         S32 candt;
10052                         range_prms_t *ps_range_prms;
10053 
10054                         S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
10055                         for(candt = 0; candt < i4_num_srch_cands; candt++)
10056                         {
10057                             search_node_t *ps_search_node;
10058 
10059                             ps_search_node =
10060                                 s_search_prms_blk.ps_search_candts[candt].ps_search_node;
10061 
10062                             ps_range_prms = s_search_prms_blk.aps_mv_range[0];
10063 
10064                             if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
10065                                (ps_search_node->i1_ref_idx < 0))
10066                             {
10067                                 ASSERT(0);
10068                             }
10069                         }
10070                     }
10071 #endif
10072 
10073                     {
10074                         S32 srch_cand;
10075                         S32 num_unique_nodes = 0;
10076                         S32 num_nodes_searched = 0;
10077                         S32 num_best_cand = 0;
10078                         S08 i1_grid_enable = 0;
10079                         search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
10080                         /* has list of valid partition to search terminated by -1 */
10081                         S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
10082                         S32 center_x;
10083                         S32 center_y;
10084 
10085                         /* indicates if the centre point of grid needs to be explicitly added for search */
10086                         S32 add_centre = 0;
10087 
10088                         memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
10089                         center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
10090                         center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
10091 
10092                         for(srch_cand = 0;
10093                             (srch_cand < i4_num_srch_cands) &&
10094                             (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
10095                             srch_cand++)
10096                         {
10097                             search_node_t s_search_node_temp =
10098                                 s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
10099 
10100                             s_search_node_temp.i1_ref_idx = i1_ref_idx;  //TEMP FIX;
10101 
10102                             /* Clip the motion vectors as well here since after clipping
10103                             two candidates can become same and they will be removed during deduplication */
10104                             CLIP_MV_WITHIN_RANGE(
10105                                 s_search_node_temp.s_mv.i2_mvx,
10106                                 s_search_node_temp.s_mv.i2_mvy,
10107                                 s_search_prms_blk.aps_mv_range[0],
10108                                 ps_refine_prms->i4_num_steps_fpel_refine,
10109                                 ps_refine_prms->i4_num_steps_hpel_refine,
10110                                 ps_refine_prms->i4_num_steps_qpel_refine);
10111 
10112                             /* PT_C */
10113                             INSERT_NEW_NODE(
10114                                 as_unique_search_nodes,
10115                                 num_unique_nodes,
10116                                 s_search_node_temp,
10117                                 0,
10118                                 au4_unique_node_map,
10119                                 center_x,
10120                                 center_y,
10121                                 1);
10122 
10123                             num_nodes_searched += 1;
10124                         }
10125                         num_unique_nodes =
10126                             MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
10127 
10128                         /* If number of candidates projected/number of candidates to be refined are more than 2,
10129                         then filter out and choose the best two here */
10130                         if(num_unique_nodes >= 2)
10131                         {
10132                             S32 num_results;
10133                             S32 cnt;
10134                             S32 *pi4_valid_part_ids;
10135                             s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10136                             s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10137                             pi4_valid_part_ids = &ai4_valid_part_ids[0];
10138 
10139                             /* pi4_valid_part_ids is updated inside */
10140                             hme_pred_search_no_encode(
10141                                 &s_search_prms_blk,
10142                                 ps_curr_layer,
10143                                 &ps_ctxt->s_wt_pred,
10144                                 pi4_valid_part_ids,
10145                                 1,
10146                                 e_me_quality_presets,
10147                                 i1_grid_enable,
10148                                 (ihevce_me_optimised_function_list_t *)
10149                                     ps_ctxt->pv_me_optimised_function_list
10150 
10151                             );
10152 
10153                             num_best_cand = 0;
10154                             cnt = 0;
10155                             num_results = ps_search_results->u1_num_results_per_part;
10156 
10157                             while((id = pi4_valid_part_ids[cnt++]) >= 0)
10158                             {
10159                                 num_results =
10160                                     MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
10161 
10162                                 for(i = 0; i < num_results; i++)
10163                                 {
10164                                     search_node_t s_search_node_temp;
10165                                     s_search_node_temp =
10166                                         *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
10167                                     if(s_search_node_temp.i1_ref_idx >= 0)
10168                                     {
10169                                         INSERT_NEW_NODE_NOMAP(
10170                                             as_best_two_proj_node,
10171                                             num_best_cand,
10172                                             s_search_node_temp,
10173                                             0);
10174                                     }
10175                                 }
10176                             }
10177                         }
10178                         else
10179                         {
10180                             add_centre = 1;
10181                             num_best_cand = num_unique_nodes;
10182                             as_best_two_proj_node[0] = as_unique_search_nodes[0];
10183                         }
10184 
10185                         num_unique_nodes = 0;
10186                         num_nodes_searched = 0;
10187 
10188                         if(1 == num_best_cand)
10189                         {
10190                             search_node_t s_search_node_temp = as_best_two_proj_node[0];
10191                             S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
10192                             S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
10193                             S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
10194 
10195                             i1_grid_enable = 1;
10196 
10197                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10198                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10199                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10200 
10201                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10202                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10203                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10204 
10205                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10206                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10207                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10208 
10209                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10210                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10211                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10212 
10213                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10214                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10215                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10216 
10217                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10218                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10219                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10220 
10221                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10222                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10223                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10224 
10225                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10226                             as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10227                             as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10228 
10229                             if(add_centre)
10230                             {
10231                                 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10232                                 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10233                                 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10234                             }
10235                         }
10236                         else
10237                         {
10238                             /* For the candidates where refinement was required, choose the best two */
10239                             for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
10240                             {
10241                                 search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
10242                                 WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
10243                                 WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
10244 
10245                                 /* Because there may not be two best unique candidates (because of clipping),
10246                                 second best candidate can be uninitialized, ignore that */
10247                                 if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
10248                                    s_search_node_temp.i1_ref_idx < 0)
10249                                 {
10250                                     num_nodes_searched++;
10251                                     continue;
10252                                 }
10253 
10254                                 /* PT_C */
10255                                 /* Since the center point has already be evaluated and best results are persistent,
10256                                 it will not be evaluated again */
10257                                 if(add_centre) /* centre point added explicitly again if search results is not updated */
10258                                 {
10259                                     INSERT_NEW_NODE(
10260                                         as_unique_search_nodes,
10261                                         num_unique_nodes,
10262                                         s_search_node_temp,
10263                                         0,
10264                                         au4_unique_node_map,
10265                                         center_x,
10266                                         center_y,
10267                                         1);
10268                                 }
10269 
10270                                 /* PT_L */
10271                                 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10272                                 s_search_node_temp.s_mv.i2_mvy = mv_y;
10273                                 INSERT_NEW_NODE(
10274                                     as_unique_search_nodes,
10275                                     num_unique_nodes,
10276                                     s_search_node_temp,
10277                                     0,
10278                                     au4_unique_node_map,
10279                                     center_x,
10280                                     center_y,
10281                                     1);
10282 
10283                                 /* PT_T */
10284                                 s_search_node_temp.s_mv.i2_mvx = mv_x;
10285                                 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10286                                 INSERT_NEW_NODE(
10287                                     as_unique_search_nodes,
10288                                     num_unique_nodes,
10289                                     s_search_node_temp,
10290                                     0,
10291                                     au4_unique_node_map,
10292                                     center_x,
10293                                     center_y,
10294                                     1);
10295 
10296                                 /* PT_R */
10297                                 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10298                                 s_search_node_temp.s_mv.i2_mvy = mv_y;
10299                                 INSERT_NEW_NODE(
10300                                     as_unique_search_nodes,
10301                                     num_unique_nodes,
10302                                     s_search_node_temp,
10303                                     0,
10304                                     au4_unique_node_map,
10305                                     center_x,
10306                                     center_y,
10307                                     1);
10308 
10309                                 /* PT_B */
10310                                 s_search_node_temp.s_mv.i2_mvx = mv_x;
10311                                 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10312                                 INSERT_NEW_NODE(
10313                                     as_unique_search_nodes,
10314                                     num_unique_nodes,
10315                                     s_search_node_temp,
10316                                     0,
10317                                     au4_unique_node_map,
10318                                     center_x,
10319                                     center_y,
10320                                     1);
10321 
10322                                 /* PT_TL */
10323                                 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10324                                 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10325                                 INSERT_NEW_NODE(
10326                                     as_unique_search_nodes,
10327                                     num_unique_nodes,
10328                                     s_search_node_temp,
10329                                     0,
10330                                     au4_unique_node_map,
10331                                     center_x,
10332                                     center_y,
10333                                     1);
10334 
10335                                 /* PT_TR */
10336                                 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10337                                 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10338                                 INSERT_NEW_NODE(
10339                                     as_unique_search_nodes,
10340                                     num_unique_nodes,
10341                                     s_search_node_temp,
10342                                     0,
10343                                     au4_unique_node_map,
10344                                     center_x,
10345                                     center_y,
10346                                     1);
10347 
10348                                 /* PT_BL */
10349                                 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10350                                 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10351                                 INSERT_NEW_NODE(
10352                                     as_unique_search_nodes,
10353                                     num_unique_nodes,
10354                                     s_search_node_temp,
10355                                     0,
10356                                     au4_unique_node_map,
10357                                     center_x,
10358                                     center_y,
10359                                     1);
10360 
10361                                 /* PT_BR */
10362                                 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10363                                 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10364                                 INSERT_NEW_NODE(
10365                                     as_unique_search_nodes,
10366                                     num_unique_nodes,
10367                                     s_search_node_temp,
10368                                     0,
10369                                     au4_unique_node_map,
10370                                     center_x,
10371                                     center_y,
10372                                     1);
10373                             }
10374                         }
10375 
10376                         s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10377                         s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10378 
10379                         /*****************************************************************/
10380                         /* Call the search algorithm, this includes:                     */
10381                         /* Pre-Search-Refinement (for coarse candts)                     */
10382                         /* Search on each candidate                                      */
10383                         /* Post Search Refinement on winners/other new candidates        */
10384                         /*****************************************************************/
10385 
10386                         hme_pred_search_no_encode(
10387                             &s_search_prms_blk,
10388                             ps_curr_layer,
10389                             &ps_ctxt->s_wt_pred,
10390                             ai4_valid_part_ids,
10391                             0,
10392                             e_me_quality_presets,
10393                             i1_grid_enable,
10394                             (ihevce_me_optimised_function_list_t *)
10395                                 ps_ctxt->pv_me_optimised_function_list);
10396 
10397                         i1_grid_enable = 0;
10398                     }
10399                 }
10400 
10401                 /* for non encode layer update MV and end processing for block */
10402                 {
10403                     WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
10404                     search_node_t *ps_search_node;
10405                     /* now update the reqd results back to the layer mv bank. */
10406                     if(1 == ps_refine_prms->i4_layer_id)
10407                     {
10408                         hme_update_mv_bank_in_l1_me(
10409                             ps_search_results,
10410                             ps_curr_layer->ps_layer_mvbank,
10411                             blk_x,
10412                             blk_y,
10413                             &s_mv_update_prms);
10414                     }
10415                     else
10416                     {
10417                         hme_update_mv_bank_noencode(
10418                             ps_search_results,
10419                             ps_curr_layer->ps_layer_mvbank,
10420                             blk_x,
10421                             blk_y,
10422                             &s_mv_update_prms);
10423                     }
10424 
10425                     /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
10426                     /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
10427                     if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10428                     {
10429                         WORD32 i4_j;
10430                         layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
10431 
10432                         //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
10433                         /* Not considering this for Dyn. Search Update */
10434                         {
10435                             for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10436                                 i4_ref_id++)
10437                             {
10438                                 ps_search_node =
10439                                     ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10440 
10441                                 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
10442                                 {
10443                                     hme_update_dynamic_search_params(
10444                                         &ps_ctxt->s_coarse_dyn_range_prms
10445                                              .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
10446                                                                [i4_ref_id],
10447                                         ps_search_node->s_mv.i2_mvy);
10448 
10449                                     ps_search_node++;
10450                                 }
10451                             }
10452                         }
10453                     }
10454 
10455                     if(1 == ps_refine_prms->i4_layer_id)
10456                     {
10457                         WORD32 wt_pred_val, log_wt_pred_val;
10458                         WORD32 ref_id_of_nearest_poc = 0;
10459                         WORD32 max_val = 0x7fffffff;
10460                         WORD32 max_l0_val = 0x7fffffff;
10461                         WORD32 max_l1_val = 0x7fffffff;
10462                         WORD32 cur_val;
10463                         WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
10464 
10465                         WORD32 bestl0_sad = 0x7fffffff;
10466                         WORD32 bestl1_sad = 0x7fffffff;
10467                         search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
10468 
10469                         for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10470                             i4_ref_id++)
10471                         {
10472                             wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
10473                             log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
10474 
10475                             ps_search_node =
10476                                 ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10477 
10478                             i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
10479                                                      ((1 << log_wt_pred_val) >> 1)) >>
10480                                                     log_wt_pred_val;
10481 
10482                             i4_local_cost_weighted_pred =
10483                                 i4_local_weighted_sad +
10484                                 (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
10485                             //the loop is redundant as the results are already sorted based on total cost
10486                             //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
10487                             {
10488                                 if(i4_local_cost_weighted_pred < min_cost)
10489                                 {
10490                                     min_cost = i4_local_cost_weighted_pred;
10491                                     min_sad = i4_local_weighted_sad;
10492                                 }
10493                             }
10494 
10495                             /* For P frame, calculate the nearest poc which is either P or I frame*/
10496                             if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10497                             {
10498                                 if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
10499                                 {
10500                                     cur_val =
10501                                         ABS(ps_ctxt->i4_curr_poc -
10502                                             ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
10503                                     if(cur_val < max_val)
10504                                     {
10505                                         max_val = cur_val;
10506                                         ref_id_of_nearest_poc = i4_ref_id;
10507                                     }
10508                                 }
10509                             }
10510                         }
10511                         /*Store me cost wrt. to past frame only for P frame  */
10512                         if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10513                         {
10514                             if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10515                             {
10516                                 WORD16 i2_mvx, i2_mvy;
10517 
10518                                 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10519                                 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10520                                 WORD32 z_scan_idx =
10521                                     gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10522                                 WORD32 wt, log_wt;
10523 
10524                                 /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10525                                 <= (1 + ps_ctxt->num_b_frms));*/
10526 
10527                                 /*obtain mvx and mvy */
10528                                 i2_mvx =
10529                                     ps_search_results
10530                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10531                                         ->s_mv.i2_mvx;
10532                                 i2_mvy =
10533                                     ps_search_results
10534                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10535                                         ->s_mv.i2_mvy;
10536 
10537                                 /*register the min cost for l1 me in blk context */
10538                                 wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
10539                                 log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
10540 
10541                                 /*register the min cost for l1 me in blk context */
10542                                 ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
10543                                     ((ps_search_results
10544                                           ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10545                                           ->i4_sad *
10546                                       wt) +
10547                                      ((1 << log_wt) >> 1)) >>
10548                                     log_wt;
10549                                 ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
10550                                     ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
10551                                     (ps_search_results
10552                                          ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10553                                          ->i4_tot_cost -
10554                                      ps_search_results
10555                                          ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10556                                          ->i4_sad);
10557                                 /*for complexity change detection*/
10558                                 ps_ctxt->i4_num_blks++;
10559                                 if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
10560                                    (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
10561                                 {
10562                                     ps_ctxt->i4_num_blks_high_sad++;
10563                                 }
10564                             }
10565                         }
10566                     }
10567 
10568                     /* EIID: Early inter intra decisions */
10569                     /* tap L1 level SAD for inter intra decisions */
10570                     if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
10571                        (!ps_ctxt->s_frm_prms
10572                              .is_i_pic))  //for high-quality preset->disable early decisions
10573                     {
10574                         if(1 == ps_refine_prms->i4_layer_id)
10575                         {
10576                             WORD32 i4_min_sad_cost_8x8_block = min_cost;
10577                             ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
10578                             WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10579                             WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10580                             WORD32 z_scan_idx =
10581                                 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10582                             ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
10583 
10584                             /*register the min cost for l1 me in blk context */
10585                             ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10586                                 i4_min_sad_cost_8x8_block;
10587                             i4_num_comparisions++;
10588 
10589                             /* take early inter-intra decision here */
10590                             ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
10591 #if DISABLE_INTRA_IN_BPICS
10592                             if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
10593                                (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
10594                             {
10595                                 ps_curr_ed_blk_ctxt->intra_or_inter =
10596                                     2; /*eval only inter if inter cost is less */
10597                                 i4_num_inter_wins++;
10598                             }
10599                             else
10600 #endif
10601                             {
10602                                 if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
10603                                    ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
10604                                      i4_threshold_multiplier) /
10605                                     i4_threshold_divider))
10606                                 {
10607                                     ps_curr_ed_blk_ctxt->intra_or_inter =
10608                                         2; /*eval only inter if inter cost is less */
10609                                     i4_num_inter_wins++;
10610                                 }
10611                             }
10612 
10613                             //{
10614                             //  DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
10615                             //      blk_x,blk_y,
10616                             //      i4_ctb_blk_ctr, i4_ctb_row_ctr,
10617                             //      ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
10618                             //      i4_min_sad_cost_8x8_block
10619                             //      );
10620                             //}
10621 
10622                         }  //end of layer-1
10623                     }  //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
10624                     else
10625                     {
10626                         if(1 == ps_refine_prms->i4_layer_id)
10627                         {
10628                             WORD32 i4_min_sad_cost_8x8_block = min_cost;
10629                             WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10630                             WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10631                             WORD32 z_scan_idx =
10632                                 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10633 
10634                             /*register the min cost for l1 me in blk context */
10635                             ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10636                                 i4_min_sad_cost_8x8_block;
10637                         }
10638                     }
10639                     if(1 == ps_refine_prms->i4_layer_id)
10640                     {
10641                         WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10642                         WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10643                         WORD32 z_scan_idx =
10644                             gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10645 
10646                         ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
10647                             min_sad;
10648 
10649                         if(min_cost <
10650                            ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
10651                         {
10652                             ps_ctxt->i4_L1_hme_best_cost += min_cost;
10653                             ps_ctxt->i4_L1_hme_sad += min_sad;
10654                             ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
10655                         }
10656                         else
10657                         {
10658                             ps_ctxt->i4_L1_hme_best_cost +=
10659                                 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
10660                             ps_ctxt->i4_L1_hme_sad +=
10661                                 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10662                             ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
10663                                 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10664                         }
10665                     }
10666                 }
10667             }
10668 
10669             /* Update the number of blocks processed in the current row */
10670             if((ME_MEDIUM_SPEED > e_me_quality_presets))
10671             {
10672                 ihevce_dmgr_set_row_row_sync(
10673                     pv_hme_dep_mngr,
10674                     (i4_ctb_x + 1),
10675                     blk_y,
10676                     0 /* Col Tile No. : Not supported in PreEnc*/);
10677             }
10678         }
10679 
10680         /* set the output dependency after completion of row */
10681         ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
10682     }
10683 }
10684