1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 ******************************************************************************
22 * @file hme_refine.c
23 *
24 * @brief
25 * Contains the implementation of the refinement layer searches and related
26 * functionality like CU merge.
27 *
28 * @author
29 * Ittiam
30 *
31 *
32 * List of Functions
33 *
34 *
35 ******************************************************************************
36 */
37
38 /*****************************************************************************/
39 /* File Includes */
40 /*****************************************************************************/
41 /* System include files */
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <assert.h>
46 #include <stdarg.h>
47 #include <math.h>
48 #include <limits.h>
49
50 /* User include files */
51 #include "ihevc_typedefs.h"
52 #include "itt_video_api.h"
53 #include "ihevce_api.h"
54
55 #include "rc_cntrl_param.h"
56 #include "rc_frame_info_collector.h"
57 #include "rc_look_ahead_params.h"
58
59 #include "ihevc_defs.h"
60 #include "ihevc_structs.h"
61 #include "ihevc_platform_macros.h"
62 #include "ihevc_deblk.h"
63 #include "ihevc_itrans_recon.h"
64 #include "ihevc_chroma_itrans_recon.h"
65 #include "ihevc_chroma_intra_pred.h"
66 #include "ihevc_intra_pred.h"
67 #include "ihevc_inter_pred.h"
68 #include "ihevc_mem_fns.h"
69 #include "ihevc_padding.h"
70 #include "ihevc_weighted_pred.h"
71 #include "ihevc_sao.h"
72 #include "ihevc_resi_trans.h"
73 #include "ihevc_quant_iquant_ssd.h"
74 #include "ihevc_cabac_tables.h"
75
76 #include "ihevce_defs.h"
77 #include "ihevce_lap_enc_structs.h"
78 #include "ihevce_multi_thrd_structs.h"
79 #include "ihevce_multi_thrd_funcs.h"
80 #include "ihevce_me_common_defs.h"
81 #include "ihevce_had_satd.h"
82 #include "ihevce_error_codes.h"
83 #include "ihevce_bitstream.h"
84 #include "ihevce_cabac.h"
85 #include "ihevce_rdoq_macros.h"
86 #include "ihevce_function_selector.h"
87 #include "ihevce_enc_structs.h"
88 #include "ihevce_entropy_structs.h"
89 #include "ihevce_cmn_utils_instr_set_router.h"
90 #include "ihevce_enc_loop_structs.h"
91 #include "ihevce_bs_compute_ctb.h"
92 #include "ihevce_global_tables.h"
93 #include "ihevce_dep_mngr_interface.h"
94 #include "hme_datatype.h"
95 #include "hme_interface.h"
96 #include "hme_common_defs.h"
97 #include "hme_defs.h"
98 #include "ihevce_me_instr_set_router.h"
99 #include "hme_globals.h"
100 #include "hme_utils.h"
101 #include "hme_coarse.h"
102 #include "hme_fullpel.h"
103 #include "hme_subpel.h"
104 #include "hme_refine.h"
105 #include "hme_err_compute.h"
106 #include "hme_common_utils.h"
107 #include "hme_search_algo.h"
108 #include "ihevce_stasino_helpers.h"
109 #include "ihevce_common_utils.h"
110
111 /*****************************************************************************/
112 /* Globals */
113 /*****************************************************************************/
114
115 /* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
116 UWORD8 gau1_raster_scan_to_ctb[4][4] = {
117 { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
118 };
119
120 /*****************************************************************************/
121 /* Extern Fucntion declaration */
122 /*****************************************************************************/
123 extern ctb_boundary_attrs_t *
124 get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
125
126 typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
127 search_node_t *ps_search_node,
128 layer_ctxt_t *ps_curr_layer,
129 layer_ctxt_t *ps_coarse_layer,
130 S32 i4_pos_x,
131 S32 i4_pos_y,
132 S08 i1_ref_id,
133 S32 i4_result_id);
134
135 typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
136 search_node_t *ps_search_node,
137 layer_ctxt_t *ps_curr_layer,
138 layer_ctxt_t *ps_coarse_layer,
139 S32 i4_pos_x,
140 S32 i4_pos_y,
141 S32 i4_num_act_ref_l0,
142 U08 u1_pred_dir,
143 U08 u1_default_ref_id,
144 S32 i4_result_id);
145
146 /*****************************************************************************/
147 /* Function Definitions */
148 /*****************************************************************************/
149
ihevce_no_wt_copy(coarse_me_ctxt_t * ps_ctxt,layer_ctxt_t * ps_curr_layer,pu_t * ps_pu,UWORD8 * pu1_temp_pred,WORD32 temp_stride,WORD32 blk_x,WORD32 blk_y)150 void ihevce_no_wt_copy(
151 coarse_me_ctxt_t *ps_ctxt,
152 layer_ctxt_t *ps_curr_layer,
153 pu_t *ps_pu,
154 UWORD8 *pu1_temp_pred,
155 WORD32 temp_stride,
156 WORD32 blk_x,
157 WORD32 blk_y)
158 {
159 UWORD8 *pu1_ref;
160 WORD32 ref_stride, ref_offset;
161 WORD32 row, col, i4_tmp;
162
163 ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
164
165 if(ps_pu->b2_pred_mode == PRED_L0)
166 {
167 WORD8 i1_ref_idx;
168
169 i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
170 pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
171
172 ref_stride = ps_curr_layer->i4_inp_stride;
173
174 ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
175 ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
176
177 pu1_ref += ref_offset;
178
179 for(row = 0; row < temp_stride; row++)
180 {
181 for(col = 0; col < temp_stride; col++)
182 {
183 i4_tmp = pu1_ref[col];
184 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
185 }
186
187 pu1_ref += ref_stride;
188 pu1_temp_pred += temp_stride;
189 }
190 }
191 else
192 {
193 WORD8 i1_ref_idx;
194
195 i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
196 pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
197
198 ref_stride = ps_curr_layer->i4_inp_stride;
199
200 ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
201 ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
202
203 pu1_ref += ref_offset;
204
205 for(row = 0; row < temp_stride; row++)
206 {
207 for(col = 0; col < temp_stride; col++)
208 {
209 i4_tmp = pu1_ref[col];
210 pu1_temp_pred[col] = CLIP_U8(i4_tmp);
211 }
212
213 pu1_ref += ref_stride;
214 pu1_temp_pred += temp_stride;
215 }
216 }
217 }
218
hme_add_clustered_mvs_as_merge_cands(cluster_data_t * ps_cluster_base,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,WORD32 i4_num_clusters,U08 u1_pred_dir)219 static WORD32 hme_add_clustered_mvs_as_merge_cands(
220 cluster_data_t *ps_cluster_base,
221 search_node_t *ps_merge_cand,
222 range_prms_t **pps_range_prms,
223 U08 *pu1_refid_to_pred_dir_list,
224 WORD32 i4_num_clusters,
225 U08 u1_pred_dir)
226 {
227 WORD32 i, j, k;
228 WORD32 i4_num_cands_added = 0;
229 WORD32 i4_num_mvs_in_cluster;
230
231 for(i = 0; i < i4_num_clusters; i++)
232 {
233 cluster_data_t *ps_data = &ps_cluster_base[i];
234
235 if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
236 {
237 i4_num_mvs_in_cluster = ps_data->num_mvs;
238
239 for(j = 0; j < i4_num_mvs_in_cluster; j++)
240 {
241 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
242 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
243 ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
244
245 CLIP_MV_WITHIN_RANGE(
246 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
247 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
248 pps_range_prms[ps_data->ref_id],
249 0,
250 0,
251 0);
252
253 for(k = 0; k < i4_num_cands_added; k++)
254 {
255 if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
256 (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
257 (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
258 {
259 break;
260 }
261 }
262
263 if(k == i4_num_cands_added)
264 {
265 i4_num_cands_added++;
266 }
267 }
268 }
269 }
270
271 return i4_num_cands_added;
272 }
273
hme_add_me_best_as_merge_cands(search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_quality_preset,S32 i4_num_cands_added,U08 u1_pred_dir)274 static WORD32 hme_add_me_best_as_merge_cands(
275 search_results_t **pps_child_data_array,
276 inter_cu_results_t *ps_8x8cu_results,
277 search_node_t *ps_merge_cand,
278 range_prms_t **pps_range_prms,
279 U08 *pu1_refid_to_pred_dir_list,
280 S08 *pi1_past_list,
281 S08 *pi1_future_list,
282 BLK_SIZE_T e_blk_size,
283 ME_QUALITY_PRESETS_T e_quality_preset,
284 S32 i4_num_cands_added,
285 U08 u1_pred_dir)
286 {
287 WORD32 i, j, k;
288 WORD32 i4_max_cands_to_add;
289
290 WORD32 i4_result_id = 0;
291
292 ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
293 ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
294 ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
295 ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
296
297 switch(e_quality_preset)
298 {
299 case ME_PRISTINE_QUALITY:
300 {
301 i4_max_cands_to_add = MAX_MERGE_CANDTS;
302
303 break;
304 }
305 case ME_HIGH_QUALITY:
306 {
307 /* All 4 children are split and each grandchild contributes an MV */
308 /* and 2 best results per grandchild */
309 i4_max_cands_to_add = 4 * 4 * 2;
310
311 break;
312 }
313 case ME_MEDIUM_SPEED:
314 {
315 i4_max_cands_to_add = 4 * 2 * 2;
316
317 break;
318 }
319 case ME_HIGH_SPEED:
320 case ME_XTREME_SPEED:
321 case ME_XTREME_SPEED_25:
322 {
323 i4_max_cands_to_add = 4 * 2 * 1;
324
325 break;
326 }
327 }
328
329 while(i4_result_id < 4)
330 {
331 for(i = 0; i < 4; i++)
332 {
333 inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
334 inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
335
336 if(!pps_child_data_array[i]->u1_split_flag)
337 {
338 part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
339
340 if(ps_child_data->u1_num_best_results <= i4_result_id)
341 {
342 continue;
343 }
344
345 if(ps_data->as_pu_results->pu.b1_intra_flag)
346 {
347 continue;
348 }
349
350 for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
351 {
352 mv_t *ps_mv;
353
354 S08 i1_ref_idx;
355
356 pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
357
358 if(u1_pred_dir !=
359 ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
360 {
361 continue;
362 }
363
364 if(u1_pred_dir)
365 {
366 ps_mv = &ps_pu->mv.s_l1_mv;
367 i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
368 }
369 else
370 {
371 ps_mv = &ps_pu->mv.s_l0_mv;
372 i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
373 }
374
375 if(-1 == i1_ref_idx)
376 {
377 continue;
378 }
379
380 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
381 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
382 ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
383
384 CLIP_MV_WITHIN_RANGE(
385 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
386 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
387 pps_range_prms[i1_ref_idx],
388 0,
389 0,
390 0);
391
392 for(k = 0; k < i4_num_cands_added; k++)
393 {
394 if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
395 (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
396 (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
397 {
398 break;
399 }
400 }
401
402 if(k == i4_num_cands_added)
403 {
404 i4_num_cands_added++;
405
406 if(i4_max_cands_to_add <= i4_num_cands_added)
407 {
408 return i4_num_cands_added;
409 }
410 }
411 }
412 }
413 else
414 {
415 for(j = 0; j < 4; j++)
416 {
417 mv_t *ps_mv;
418
419 S08 i1_ref_idx;
420
421 part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
422 pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
423
424 ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
425
426 if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
427 {
428 continue;
429 }
430
431 if(ps_data->as_pu_results->pu.b1_intra_flag)
432 {
433 continue;
434 }
435
436 if(u1_pred_dir !=
437 ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
438 {
439 continue;
440 }
441
442 if(u1_pred_dir)
443 {
444 ps_mv = &ps_pu->mv.s_l1_mv;
445 i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
446 }
447 else
448 {
449 ps_mv = &ps_pu->mv.s_l0_mv;
450 i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
451 }
452
453 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
454 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
455 ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
456
457 CLIP_MV_WITHIN_RANGE(
458 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
459 ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
460 pps_range_prms[i1_ref_idx],
461 0,
462 0,
463 0);
464
465 for(k = 0; k < i4_num_cands_added; k++)
466 {
467 if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
468 (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
469 (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
470 {
471 break;
472 }
473 }
474
475 if(k == i4_num_cands_added)
476 {
477 i4_num_cands_added++;
478
479 if(i4_max_cands_to_add <= i4_num_cands_added)
480 {
481 return i4_num_cands_added;
482 }
483 }
484 }
485 }
486 }
487
488 i4_result_id++;
489 }
490
491 return i4_num_cands_added;
492 }
493
hme_add_cands_for_merge_eval(ctb_cluster_info_t * ps_cluster_info,search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,range_prms_t ** pps_range_prms,search_node_t * ps_merge_cand,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,ME_QUALITY_PRESETS_T e_quality_preset,BLK_SIZE_T e_blk_size,U08 u1_pred_dir,U08 u1_blk_id)494 WORD32 hme_add_cands_for_merge_eval(
495 ctb_cluster_info_t *ps_cluster_info,
496 search_results_t **pps_child_data_array,
497 inter_cu_results_t *ps_8x8cu_results,
498 range_prms_t **pps_range_prms,
499 search_node_t *ps_merge_cand,
500 U08 *pu1_refid_to_pred_dir_list,
501 S08 *pi1_past_list,
502 S08 *pi1_future_list,
503 ME_QUALITY_PRESETS_T e_quality_preset,
504 BLK_SIZE_T e_blk_size,
505 U08 u1_pred_dir,
506 U08 u1_blk_id)
507 {
508 WORD32 i4_num_cands_added = 0;
509
510 if(ME_PRISTINE_QUALITY == e_quality_preset)
511 {
512 cluster_data_t *ps_cluster_primo;
513
514 WORD32 i4_num_clusters;
515
516 if(BLK_32x32 == e_blk_size)
517 {
518 ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
519 i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
520 }
521 else
522 {
523 ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
524 i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
525 }
526
527 i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
528 ps_cluster_primo,
529 ps_merge_cand,
530 pps_range_prms,
531 pu1_refid_to_pred_dir_list,
532 i4_num_clusters,
533 u1_pred_dir);
534 }
535
536 i4_num_cands_added = hme_add_me_best_as_merge_cands(
537 pps_child_data_array,
538 ps_8x8cu_results,
539 ps_merge_cand,
540 pps_range_prms,
541 pu1_refid_to_pred_dir_list,
542 pi1_past_list,
543 pi1_future_list,
544 e_blk_size,
545 e_quality_preset,
546 i4_num_cands_added,
547 u1_pred_dir);
548
549 return i4_num_cands_added;
550 }
551
552 /**
553 ********************************************************************************
554 * @fn void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
555 * S08 i1_ref_idx,
556 * S32 i4_best_part_type,
557 * S32 i4_is_vert)
558 *
559 * @brief Given a target partition orientation in the merged CU, and the
560 * partition type of most likely partition this fxn picks up
561 * candidates from the 4 constituent CUs and does refinement search
562 * to identify best results for the merge CU across active partitions
563 *
564 * @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
565 * these params, the search result structure is also derived and
566 * updated during the search
567 *
568 * @param[in] i1_ref_idx : ID of the buffer within the search results to update.
569 * Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
570 *
571 * @param[in] i4_best_part_type : partition type of potential partition in the
572 * merged CU, -1 if the merge process has not yet been able to
573 * determine this.
574 *
575 * @param[in] i4_is_vert : Whether target partition of merged CU is vertical
576 * orientation or horizontal orientation.
577 *
578 * @return Number of merge candidates
579 ********************************************************************************
580 */
hme_pick_eval_merge_candts(hme_merge_prms_t * ps_merge_prms,hme_subpel_prms_t * ps_subpel_prms,S32 i4_search_idx,S32 i4_best_part_type,S32 i4_is_vert,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 i4_frm_qstep,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)581 WORD32 hme_pick_eval_merge_candts(
582 hme_merge_prms_t *ps_merge_prms,
583 hme_subpel_prms_t *ps_subpel_prms,
584 S32 i4_search_idx,
585 S32 i4_best_part_type,
586 S32 i4_is_vert,
587 wgt_pred_ctxt_t *ps_wt_inp_prms,
588 S32 i4_frm_qstep,
589 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
590 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
591 {
592 S32 x_off, y_off;
593 search_node_t *ps_search_node;
594 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
595 S32 i4_num_valid_parts;
596 pred_ctxt_t *ps_pred_ctxt;
597
598 search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
599 S32 num_unique_nodes_cu_merge = 0;
600
601 search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
602 CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
603 S32 i4_part_mask = ps_search_results->i4_part_mask;
604
605 search_results_t *aps_child_results[4];
606 layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
607
608 S32 i4_ref_stride, i, j;
609 result_upd_prms_t s_result_prms;
610
611 BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
612 S32 i4_offset;
613
614 /*************************************************************************/
615 /* Function pointer for SAD/SATD, array and prms structure to pass to */
616 /* This function */
617 /*************************************************************************/
618 PF_SAD_FXN_T pf_err_compute;
619 S32 ai4_sad_grid[9][17];
620 err_prms_t s_err_prms;
621
622 /*************************************************************************/
623 /* Allowed MV RANGE */
624 /*************************************************************************/
625 range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
626 PF_INTERP_FXN_T pf_qpel_interp;
627 PF_MV_COST_FXN pf_mv_cost_compute;
628 WORD32 pred_lx;
629 U08 *apu1_hpel_ref[4];
630
631 interp_prms_t s_interp_prms;
632 S32 i4_interp_buf_id;
633
634 S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
635 S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
636
637 /* Sanity checks */
638 ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
639
640 s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
641
642 /* Initialize all the ptrs to child CUs for merge decision */
643 aps_child_results[0] = ps_merge_prms->ps_results_tl;
644 aps_child_results[1] = ps_merge_prms->ps_results_tr;
645 aps_child_results[2] = ps_merge_prms->ps_results_bl;
646 aps_child_results[3] = ps_merge_prms->ps_results_br;
647
648 num_unique_nodes_cu_merge = 0;
649
650 pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
651
652 if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
653 {
654 num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
655 ps_merge_prms->ps_cluster_info,
656 aps_child_results,
657 ps_merge_prms->ps_8x8_cu_results,
658 pps_range_prms,
659 as_merge_unique_node,
660 ps_search_results->pu1_is_past,
661 ps_merge_prms->pi1_past_list,
662 ps_merge_prms->pi1_future_list,
663 ps_merge_prms->e_quality_preset,
664 e_blk_size,
665 i4_search_idx,
666 (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
667 (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
668 }
669 else
670 {
671 /*************************************************************************/
672 /* Populate the list of unique search nodes in the child CUs for merge */
673 /* evaluation */
674 /*************************************************************************/
675 for(i = 0; i < 4; i++)
676 {
677 search_node_t s_search_node;
678
679 PART_TYPE_T e_part_type;
680 PART_ID_T e_part_id;
681
682 WORD32 part_num;
683
684 search_results_t *ps_child = aps_child_results[i];
685
686 if(ps_child->ps_cu_results->u1_num_best_results)
687 {
688 if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
689 (1 == ps_child->ps_cu_results->u1_num_best_results)))
690 {
691 e_part_type =
692 (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
693
694 ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
695
696 /* Insert mvs of NxN partitions. */
697 for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
698 part_num++)
699 {
700 e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
701
702 if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
703 {
704 s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
705 if(s_search_node.s_mv.i2_mvx != INTRA_MV)
706 {
707 CLIP_MV_WITHIN_RANGE(
708 s_search_node.s_mv.i2_mvx,
709 s_search_node.s_mv.i2_mvy,
710 pps_range_prms[s_search_node.i1_ref_idx],
711 0,
712 0,
713 0);
714
715 INSERT_NEW_NODE_NOMAP(
716 as_merge_unique_node,
717 num_unique_nodes_cu_merge,
718 s_search_node,
719 1);
720 }
721 }
722 }
723 }
724 }
725 else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
726 .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
727 (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
728 .ps_cu_results->u1_num_best_results)))
729 {
730 search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
731
732 for(j = 0; j < 4; j++)
733 {
734 e_part_type = (PART_TYPE_T)ps_results_root[j]
735 .ps_cu_results->ps_best_results[0]
736 .u1_part_type;
737
738 ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
739
740 /* Insert mvs of NxN partitions. */
741 for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
742 part_num++)
743 {
744 e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
745
746 if((ps_results_root[j]
747 .aps_part_results[i4_search_idx][e_part_id]
748 ->i1_ref_idx != -1) &&
749 (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
750 .b1_intra_flag))
751 {
752 s_search_node =
753 *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
754 if(s_search_node.s_mv.i2_mvx != INTRA_MV)
755 {
756 CLIP_MV_WITHIN_RANGE(
757 s_search_node.s_mv.i2_mvx,
758 s_search_node.s_mv.i2_mvy,
759 pps_range_prms[s_search_node.i1_ref_idx],
760 0,
761 0,
762 0);
763
764 INSERT_NEW_NODE_NOMAP(
765 as_merge_unique_node,
766 num_unique_nodes_cu_merge,
767 s_search_node,
768 1);
769 }
770 }
771 }
772 }
773 }
774 }
775 }
776
777 if(0 == num_unique_nodes_cu_merge)
778 {
779 return 0;
780 }
781
782 /*************************************************************************/
783 /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
784 /* fixed through this subpel refinement for this partition. */
785 /* Note, we do not enable grid sads since one pt is evaluated per node */
786 /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled. */
787 /*************************************************************************/
788 i4_part_mask = ps_search_results->i4_part_mask;
789
790 /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
791 if(ps_subpel_prms->i4_use_satd)
792 {
793 if(BLK_32x32 == e_blk_size)
794 {
795 pf_err_compute = hme_evalsatd_pt_pu_32x32;
796 }
797 else
798 {
799 pf_err_compute = hme_evalsatd_pt_pu_64x64;
800 }
801 }
802 else
803 {
804 pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
805 }
806
807 i4_ref_stride = ps_curr_layer->i4_rec_stride;
808
809 x_off = ps_merge_prms->ps_results_tl->u1_x_off;
810 y_off = ps_merge_prms->ps_results_tl->u1_y_off;
811 i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
812
813 /*************************************************************************/
814 /* This array stores the ids of the partitions whose */
815 /* SADs are updated. Since the partitions whose SADs are updated may not */
816 /* be in contiguous order, we supply another level of indirection. */
817 /*************************************************************************/
818 i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
819
820 /* Initialize result params used for partition update */
821 s_result_prms.pf_mv_cost_compute = NULL;
822 s_result_prms.ps_search_results = ps_search_results;
823 s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
824 s_result_prms.i1_ref_idx = i4_search_idx;
825 s_result_prms.i4_part_mask = i4_part_mask;
826 s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
827 s_result_prms.i4_grid_mask = 1;
828
829 /* One time Initialization of error params used for SAD/SATD compute */
830 s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
831 s_err_prms.i4_ref_stride = i4_ref_stride;
832 s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
833 s_err_prms.i4_grid_mask = 1;
834 s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
835 s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
836 s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
837 s_err_prms.i4_step = 1;
838
839 /*************************************************************************/
840 /* One time preparation of non changing interpolation params. */
841 /*************************************************************************/
842 s_interp_prms.i4_ref_stride = i4_ref_stride;
843 s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
844 s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
845 s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
846 s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
847 i4_interp_buf_id = 0;
848
849 pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
850
851 /***************************************************************************/
852 /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
853 /* results */
854 /***************************************************************************/
855 for(i = 0; i < num_unique_nodes_cu_merge; i++)
856 {
857 WORD8 i1_ref_idx;
858 ps_search_node = &as_merge_unique_node[i];
859
860 /*********************************************************************/
861 /* Compute the base pointer for input, interpolated buffers */
862 /* The base pointers point as follows: */
863 /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
864 /* To these, we need to add the offset of the current node */
865 /*********************************************************************/
866 i1_ref_idx = ps_search_node->i1_ref_idx;
867 apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
868 apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
869 apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
870 apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
871
872 s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
873
874 pf_qpel_interp(
875 &s_interp_prms,
876 ps_search_node->s_mv.i2_mvx,
877 ps_search_node->s_mv.i2_mvy,
878 i4_interp_buf_id);
879
880 pred_lx = i4_search_idx;
881 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
882
883 s_result_prms.u1_pred_lx = pred_lx;
884 s_result_prms.ps_search_node_base = ps_search_node;
885 s_err_prms.pu1_inp =
886 ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
887 s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
888 s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
889
890 /* Carry out the SAD/SATD. This call also does the TU RECURSION.
891 Here the tu recursion logic is restricted with the size of the PU*/
892 pf_err_compute(&s_err_prms);
893
894 if(ps_subpel_prms->u1_is_cu_noisy &&
895 ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
896 {
897 ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
898 s_err_prms.pu1_ref,
899 s_err_prms.i4_ref_stride,
900 ai4_valid_part_ids,
901 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
902 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
903 s_err_prms.pi4_sad_grid,
904 ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
905 ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
906 ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
907 i4_num_valid_parts,
908 ps_wt_inp_prms->wpred_log_wdc,
909 (BLK_32x32 == e_blk_size) ? 32 : 64);
910 }
911
912 /* Update the mv's */
913 s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
914 s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
915
916 /* Update best results */
917 hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
918 }
919
920 /************************************************************************/
921 /* Update mv cost and total cost for each valid partition in the CU */
922 /************************************************************************/
923 for(i = 0; i < TOT_NUM_PARTS; i++)
924 {
925 if(i4_part_mask & (1 << i))
926 {
927 WORD32 j;
928 WORD32 i4_mv_cost;
929
930 ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
931
932 for(j = 0;
933 j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
934 j++)
935 {
936 if(ps_search_node->i1_ref_idx != -1)
937 {
938 pred_lx = i4_search_idx;
939 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
940
941 /* Prediction context should now deal with qpel units */
942 HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
943
944 ps_search_node->u1_subpel_done = 1;
945 ps_search_node->u1_is_avail = 1;
946
947 i4_mv_cost =
948 pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
949
950 ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
951 ps_search_node->i4_mv_cost = i4_mv_cost;
952
953 ps_search_node++;
954 }
955 }
956 }
957 }
958
959 return num_unique_nodes_cu_merge;
960 }
961
962 #define CU_MERGE_MAX_INTRA_PARTS 4
963
964 /**
965 ********************************************************************************
966 * @fn hme_try_merge_high_speed
967 *
968 * @brief Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
969 entity or with partititons for high speed preset
970 *
971 * @param[in,out] hme_merge_prms_t: Params for CU merge
972 *
973 * @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
974 ********************************************************************************
975 */
hme_try_merge_high_speed(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,hme_subpel_prms_t * ps_subpel_prms,hme_merge_prms_t * ps_merge_prms,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result)976 CU_MERGE_RESULT_T hme_try_merge_high_speed(
977 me_ctxt_t *ps_thrd_ctxt,
978 me_frm_ctxt_t *ps_ctxt,
979 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
980 hme_subpel_prms_t *ps_subpel_prms,
981 hme_merge_prms_t *ps_merge_prms,
982 inter_pu_results_t *ps_pu_results,
983 pu_result_t *ps_pu_result)
984 {
985 search_results_t *ps_results_tl, *ps_results_tr;
986 search_results_t *ps_results_bl, *ps_results_br;
987
988 S32 i;
989 S32 i4_search_idx;
990 S32 i4_cost_parent;
991 S32 intra_cu_size;
992 ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
993
994 search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
995 wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
996
997 S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
998 S32 is_vert = 0, i4_best_part_type = -1;
999 S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
1000 S32 i4_cost_children = 0;
1001 S32 i4_frm_qstep = ps_ctxt->frm_qstep;
1002 S32 i4_num_merge_cands_evaluated = 0;
1003 U08 u1_x_off = ps_results_merge->u1_x_off;
1004 U08 u1_y_off = ps_results_merge->u1_y_off;
1005 S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
1006
1007 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
1008 ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
1009 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
1010 ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
1011 ps_results_tl = ps_merge_prms->ps_results_tl;
1012 ps_results_tr = ps_merge_prms->ps_results_tr;
1013 ps_results_bl = ps_merge_prms->ps_results_bl;
1014 ps_results_br = ps_merge_prms->ps_results_br;
1015
1016 if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
1017 {
1018 i4_part_mask &= ~ENABLE_AMP;
1019 }
1020
1021 if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
1022 {
1023 i4_part_mask &= ~ENABLE_AMP;
1024
1025 i4_part_mask &= ~ENABLE_SMP;
1026 }
1027
1028 ps_merge_prms->i4_num_pred_dir_actual = 0;
1029
1030 /*************************************************************************/
1031 /* The logic for High speed CU merge goes as follows: */
1032 /* */
1033 /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
1034 /* exceed 7 */
1035 /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
1036 /* are identical */
1037 /* 3. Find the all unique mvs of best partitions of children CUs and */
1038 /* evaluate partial SATDs (all 17 partitions) for each unique mv. If */
1039 /* best parent cost is lower than sum of the best children costs */
1040 /* return CU_MERGE after seeding the best results else return CU_SPLIT*/
1041 /* */
1042 /*************************************************************************/
1043
1044 /* Count the number of best partitions in child CUs, early exit if > 7 */
1045 if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1046 (CU_32x32 == ps_results_merge->e_cu_size))
1047 {
1048 S32 num_parts_in_32x32 = 0;
1049 WORD32 i4_part_type;
1050
1051 if(ps_results_tl->u1_split_flag)
1052 {
1053 num_parts_in_32x32 += 4;
1054
1055 #define COST_INTERCHANGE 0
1056 i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
1057 ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
1058 ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
1059 ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
1060 }
1061 else
1062 {
1063 i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
1064 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1065 i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1066 }
1067
1068 if(ps_results_tr->u1_split_flag)
1069 {
1070 num_parts_in_32x32 += 4;
1071
1072 i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
1073 ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
1074 ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
1075 ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
1076 }
1077 else
1078 {
1079 i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
1080 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1081 i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
1082 }
1083
1084 if(ps_results_bl->u1_split_flag)
1085 {
1086 num_parts_in_32x32 += 4;
1087
1088 i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
1089 ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
1090 ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
1091 ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
1092 }
1093 else
1094 {
1095 i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
1096 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1097 i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1098 }
1099
1100 if(ps_results_br->u1_split_flag)
1101 {
1102 num_parts_in_32x32 += 4;
1103
1104 i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
1105 ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
1106 ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
1107 ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
1108 }
1109 else
1110 {
1111 i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
1112 num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1113 i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
1114 }
1115
1116 if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
1117 {
1118 return CU_SPLIT;
1119 }
1120
1121 if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
1122 (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
1123 {
1124 return CU_SPLIT;
1125 }
1126 }
1127
1128 /* Accumulate intra percentage before merge for early CU_SPLIT decision */
1129 /* Note : Each intra part represent a NxN unit of the children CUs */
1130 /* This is essentially 1/16th of the CUsize under consideration for merge */
1131 if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
1132 {
1133 if(CU_64x64 == ps_results_merge->e_cu_size)
1134 {
1135 i4_intra_parts =
1136 (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
1137 ? 16
1138 : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
1139 }
1140 else
1141 {
1142 switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
1143 {
1144 case 0:
1145 {
1146 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
1147 ->u1_inter_eval_enable)
1148 ? 16
1149 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1150 ->ps_child_node_tl->u1_intra_eval_enable);
1151
1152 break;
1153 }
1154 case 1:
1155 {
1156 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
1157 ->u1_inter_eval_enable)
1158 ? 16
1159 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1160 ->ps_child_node_tr->u1_intra_eval_enable);
1161
1162 break;
1163 }
1164 case 2:
1165 {
1166 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
1167 ->u1_inter_eval_enable)
1168 ? 16
1169 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1170 ->ps_child_node_bl->u1_intra_eval_enable);
1171
1172 break;
1173 }
1174 case 3:
1175 {
1176 i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
1177 ->u1_inter_eval_enable)
1178 ? 16
1179 : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1180 ->ps_child_node_br->u1_intra_eval_enable);
1181
1182 break;
1183 }
1184 }
1185 }
1186 }
1187 else
1188 {
1189 for(i = 0; i < 4; i++)
1190 {
1191 search_results_t *ps_results =
1192 (i == 0) ? ps_results_tl
1193 : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
1194
1195 part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
1196
1197 if(ps_results->u1_split_flag)
1198 {
1199 U08 u1_x_off = ps_results->u1_x_off;
1200 U08 u1_y_off = ps_results->u1_y_off;
1201 U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
1202 2;
1203
1204 /* Special case to handle 8x8 CUs when 16x16 is split */
1205 ASSERT(ps_results->e_cu_size == CU_16x16);
1206
1207 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
1208
1209 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1210 i4_intra_parts += 1;
1211
1212 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
1213
1214 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1215 i4_intra_parts += 1;
1216
1217 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
1218
1219 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1220 i4_intra_parts += 1;
1221
1222 ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
1223
1224 if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1225 i4_intra_parts += 1;
1226 }
1227 else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
1228 {
1229 i4_intra_parts += 4;
1230 }
1231 }
1232 }
1233
1234 /* Determine the max intra CU size indicated by IPE */
1235 intra_cu_size = CU_64x64;
1236 if(ps_cur_ipe_ctb->u1_split_flag)
1237 {
1238 intra_cu_size = CU_32x32;
1239 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
1240 {
1241 intra_cu_size = CU_16x16;
1242 }
1243 }
1244
1245 if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
1246 (intra_cu_size < ps_results_merge->e_cu_size) &&
1247 (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
1248 (i4_intra_parts == 16))
1249 {
1250 S32 i4_merge_outcome;
1251
1252 i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
1253 ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
1254 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
1255 : (!ps_cur_ipe_ctb->u1_split_flag);
1256
1257 i4_merge_outcome = i4_merge_outcome ||
1258 (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
1259
1260 i4_merge_outcome = i4_merge_outcome &&
1261 !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
1262
1263 if(i4_merge_outcome)
1264 {
1265 inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1266 part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
1267 pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
1268
1269 ps_cu_results->u1_num_best_results = 1;
1270 ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
1271 ps_cu_results->u1_x_off = u1_x_off;
1272 ps_cu_results->u1_y_off = u1_y_off;
1273
1274 ps_best_result->u1_part_type = PRT_2Nx2N;
1275 ps_best_result->ai4_tu_split_flag[0] = 0;
1276 ps_best_result->ai4_tu_split_flag[1] = 0;
1277 ps_best_result->ai4_tu_split_flag[2] = 0;
1278 ps_best_result->ai4_tu_split_flag[3] = 0;
1279 ps_best_result->i4_tot_cost =
1280 (CU_64x64 == ps_results_merge->e_cu_size)
1281 ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
1282 : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
1283
1284 ps_pu->b1_intra_flag = 1;
1285 ps_pu->b4_pos_x = u1_x_off >> 2;
1286 ps_pu->b4_pos_y = u1_y_off >> 2;
1287 ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
1288 ps_pu->b4_ht = ps_pu->b4_wd;
1289 ps_pu->mv.i1_l0_ref_idx = -1;
1290 ps_pu->mv.i1_l1_ref_idx = -1;
1291 ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
1292 ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
1293 ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
1294 ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
1295
1296 return CU_MERGED;
1297 }
1298 else
1299 {
1300 return CU_SPLIT;
1301 }
1302 }
1303
1304 if(i4_intra_parts)
1305 {
1306 i4_part_mask = ENABLE_2Nx2N;
1307 }
1308
1309 ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
1310
1311 hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
1312
1313 ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
1314 ps_merge_prms->i4_num_pred_dir_actual = 0;
1315
1316 if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
1317 {
1318 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
1319 S32 i4_num_valid_parts;
1320 S32 i4_sigma_array_offset;
1321
1322 i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
1323
1324 /*********************************************************************************************************************************************/
1325 /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values */
1326 /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
1327 /* increment as there will be 256 4x4 blocks in a CTB */
1328 /*********************************************************************************************************************************************/
1329 i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
1330 (ps_merge_prms->ps_results_merge->u1_y_off * 4);
1331
1332 for(i = 0; i < i4_num_valid_parts; i++)
1333 {
1334 S32 i4_part_id = ai4_valid_part_ids[i];
1335
1336 hme_compute_final_sigma_of_pu_from_base_blocks(
1337 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
1338 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
1339 au8_final_src_sigmaX,
1340 au8_final_src_sigmaXSquared,
1341 (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
1342 4,
1343 i4_part_id,
1344 16);
1345 }
1346
1347 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
1348 ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
1349 }
1350
1351 /*************************************************************************/
1352 /* Loop through all ref idx and pick the merge candts and refine based */
1353 /* on the active partitions. At this stage num ref will be 1 or 2 */
1354 /*************************************************************************/
1355 for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
1356 {
1357 S32 i4_cands;
1358 U08 u1_pred_dir = 0;
1359
1360 if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
1361 {
1362 u1_pred_dir = i4_search_idx;
1363 }
1364 else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
1365 {
1366 u1_pred_dir = 1;
1367 }
1368 else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
1369 {
1370 u1_pred_dir = 0;
1371 }
1372 else
1373 {
1374 ASSERT(0);
1375 }
1376
1377 /* call the function to pick and evaluate the merge candts, given */
1378 /* a ref id and a part mask. */
1379 i4_cands = hme_pick_eval_merge_candts(
1380 ps_merge_prms,
1381 ps_subpel_prms,
1382 u1_pred_dir,
1383 i4_best_part_type,
1384 is_vert,
1385 ps_wt_inp_prms,
1386 i4_frm_qstep,
1387 ps_cmn_utils_optimised_function_list,
1388 ps_me_optimised_function_list);
1389
1390 if(i4_cands)
1391 {
1392 ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
1393 u1_pred_dir;
1394 ps_merge_prms->i4_num_pred_dir_actual++;
1395 }
1396
1397 i4_num_merge_cands_evaluated += i4_cands;
1398 }
1399
1400 /* Call the decide_part_types function here */
1401 /* Populate the new PU struct with the results post subpel refinement*/
1402 if(i4_num_merge_cands_evaluated)
1403 {
1404 inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1405
1406 hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
1407
1408 ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
1409 ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
1410
1411 hme_populate_pus(
1412 ps_thrd_ctxt,
1413 ps_ctxt,
1414 ps_subpel_prms,
1415 ps_results_merge,
1416 ps_cu_results,
1417 ps_pu_results,
1418 ps_pu_result,
1419 ps_merge_prms->ps_inter_ctb_prms,
1420 &ps_ctxt->s_wt_pred,
1421 ps_merge_prms->ps_layer_ctxt,
1422 ps_merge_prms->au1_pred_dir_searched,
1423 ps_merge_prms->i4_num_pred_dir_actual);
1424
1425 ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
1426
1427 hme_decide_part_types(
1428 ps_cu_results,
1429 ps_pu_results,
1430 ps_merge_prms->ps_inter_ctb_prms,
1431 ps_ctxt,
1432 ps_cmn_utils_optimised_function_list,
1433 ps_me_optimised_function_list
1434
1435 );
1436
1437 /*****************************************************************/
1438 /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL. */
1439 /*****************************************************************/
1440 #if DISABLE_INTRA_IN_BPICS
1441 if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
1442 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
1443 #endif
1444 {
1445 if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
1446 {
1447 hme_insert_intra_nodes_post_bipred(
1448 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
1449 }
1450 }
1451 }
1452 else
1453 {
1454 return CU_SPLIT;
1455 }
1456
1457 /* We check the best result of ref idx 0 and compare for parent vs child */
1458 if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1459 (CU_32x32 == ps_results_merge->e_cu_size))
1460 {
1461 i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
1462 /*********************************************************************/
1463 /* Add the cost of signaling the CU tree bits. */
1464 /* Assuming parent is not split, then we signal 1 bit for this parent*/
1465 /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
1466 /* So, 4*lambda is extra for children cost. :Lokesh */
1467 /*********************************************************************/
1468 {
1469 pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
1470
1471 i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
1472 }
1473
1474 if(i4_cost_parent < i4_cost_children)
1475 {
1476 return CU_MERGED;
1477 }
1478
1479 return CU_SPLIT;
1480 }
1481 else
1482 {
1483 return CU_MERGED;
1484 }
1485 }
1486
1487 #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \
1488 { \
1489 (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift); \
1490 (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift); \
1491 *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx; \
1492 }
1493
1494 /**
1495 ********************************************************************************
1496 * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1497 * layer_mv_t *ps_layer_mv,
1498 * S32 i4_search_blk_x,
1499 * S32 i4_search_blk_y,
1500 * mvbank_update_prms_t *ps_prms)
1501 *
1502 * @brief Updates the mv bank in case there is no further encodign to be done
1503 *
1504 * @param[in] ps_search_results: contains results for the block just searched
1505 *
1506 * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things
1507 *
1508 * @param[in] i4_search_blk_x : col num of blk being searched
1509 *
1510 * @param[in] i4_search_blk_y : row num of blk being searched
1511 *
1512 * @param[in] ps_prms : contains certain parameters which govern how updatedone
1513 *
1514 * @return None
1515 ********************************************************************************
1516 */
1517
hme_update_mv_bank_noencode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1518 void hme_update_mv_bank_noencode(
1519 search_results_t *ps_search_results,
1520 layer_mv_t *ps_layer_mv,
1521 S32 i4_search_blk_x,
1522 S32 i4_search_blk_y,
1523 mvbank_update_prms_t *ps_prms)
1524 {
1525 hme_mv_t *ps_mv;
1526 hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1527 S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1528 S32 i4_blk_x, i4_blk_y, i4_offset;
1529 S32 i4_j, i4_ref_id;
1530 search_node_t *ps_search_node;
1531 search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
1532 search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
1533 search_node_t *ps_search_node_4x4_4;
1534
1535 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1536 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1537 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1538
1539 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1540
1541 /* Identify the correct offset in the mvbank and the reference id buf */
1542 ps_mv = ps_layer_mv->ps_mv + i4_offset;
1543 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1544
1545 /*************************************************************************/
1546 /* Supposing we store the mvs in the same blk size as we searched (e.g. */
1547 /* we searched 8x8 blks and store results for 8x8 blks), then we can */
1548 /* do a straightforward single update of results. This will have a 1-1 */
1549 /* correspondence. */
1550 /*************************************************************************/
1551 if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1552 {
1553 for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1554 {
1555 ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1556 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1557 {
1558 COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
1559 ps_mv++;
1560 pi1_ref_idx++;
1561 ps_search_node++;
1562 }
1563 }
1564 return;
1565 }
1566
1567 /*************************************************************************/
1568 /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1569 /* case, we need to have NxN partitions enabled in search. */
1570 /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1571 /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1572 /*************************************************************************/
1573 ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1574 ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1575 ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1576
1577 /*************************************************************************/
1578 /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1579 /* hence the below check. */
1580 /*************************************************************************/
1581 ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
1582
1583 ps_mv1 = ps_mv;
1584 ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1585 ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1586 ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1587 pi1_ref_idx1 = pi1_ref_idx;
1588 pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1589 pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1590 pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1591
1592 for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
1593 {
1594 ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1595
1596 ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
1597
1598 ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
1599
1600 ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
1601
1602 ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
1603
1604 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1605 ps_mv1++;
1606 pi1_ref_idx1++;
1607 ps_search_node_4x4_1++;
1608 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1609 ps_mv2++;
1610 pi1_ref_idx2++;
1611 ps_search_node_4x4_2++;
1612 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1613 ps_mv3++;
1614 pi1_ref_idx3++;
1615 ps_search_node_4x4_3++;
1616 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1617 ps_mv4++;
1618 pi1_ref_idx4++;
1619 ps_search_node_4x4_4++;
1620
1621 if(ps_layer_mv->i4_num_mvs_per_ref > 1)
1622 {
1623 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
1624 ps_mv1++;
1625 pi1_ref_idx1++;
1626 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
1627 ps_mv2++;
1628 pi1_ref_idx2++;
1629 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
1630 ps_mv3++;
1631 pi1_ref_idx3++;
1632 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
1633 ps_mv4++;
1634 pi1_ref_idx4++;
1635 }
1636
1637 for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1638 {
1639 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1640 ps_mv1++;
1641 pi1_ref_idx1++;
1642 ps_search_node_4x4_1++;
1643 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1644 ps_mv2++;
1645 pi1_ref_idx2++;
1646 ps_search_node_4x4_2++;
1647 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1648 ps_mv3++;
1649 pi1_ref_idx3++;
1650 ps_search_node_4x4_3++;
1651 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1652 ps_mv4++;
1653 pi1_ref_idx4++;
1654 ps_search_node_4x4_4++;
1655 }
1656 }
1657 }
1658
hme_update_mv_bank_encode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms,U08 * pu1_pred_dir_searched,S32 i4_num_act_ref_l0)1659 void hme_update_mv_bank_encode(
1660 search_results_t *ps_search_results,
1661 layer_mv_t *ps_layer_mv,
1662 S32 i4_search_blk_x,
1663 S32 i4_search_blk_y,
1664 mvbank_update_prms_t *ps_prms,
1665 U08 *pu1_pred_dir_searched,
1666 S32 i4_num_act_ref_l0)
1667 {
1668 hme_mv_t *ps_mv;
1669 hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1670 S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1671 S32 i4_blk_x, i4_blk_y, i4_offset;
1672 S32 j, i, num_parts;
1673 search_node_t *ps_search_node_tl, *ps_search_node_tr;
1674 search_node_t *ps_search_node_bl, *ps_search_node_br;
1675 search_node_t s_zero_mv;
1676 WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
1677
1678 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1679 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1680 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1681
1682 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1683
1684 /* Identify the correct offset in the mvbank and the reference id buf */
1685 ps_mv = ps_layer_mv->ps_mv + i4_offset;
1686 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1687
1688 ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
1689 ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
1690
1691 /*************************************************************************/
1692 /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1693 /* hence the below check. */
1694 /*************************************************************************/
1695 ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
1696
1697 ps_mv1 = ps_mv;
1698 ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1699 ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1700 ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1701 pi1_ref_idx1 = pi1_ref_idx;
1702 pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1703 pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1704 pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1705
1706 /* Initialize zero mv: default mv used for intra mvs */
1707 s_zero_mv.s_mv.i2_mvx = 0;
1708 s_zero_mv.s_mv.i2_mvy = 0;
1709 s_zero_mv.i1_ref_idx = 0;
1710
1711 if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
1712 (ps_search_results->i4_part_mask & ENABLE_NxN))
1713 {
1714 i4_part_type = PRT_NxN;
1715 }
1716
1717 for(i = 0; i < ps_prms->i4_num_ref; i++)
1718 {
1719 for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
1720 {
1721 WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
1722
1723 num_parts = gau1_num_parts_in_part_type[i4_part_type];
1724
1725 ps_search_node_tl =
1726 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
1727
1728 if(num_parts == 1)
1729 {
1730 ps_search_node_tr = ps_search_node_tl;
1731 ps_search_node_bl = ps_search_node_tl;
1732 ps_search_node_br = ps_search_node_tl;
1733 }
1734 else if(num_parts == 2)
1735 {
1736 /* For vertically oriented partitions, tl, bl pt to same result */
1737 /* For horizontally oriented partition, tl, tr pt to same result */
1738 /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1739 /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1740 /* and right 2 8x8 have 12x16R partition */
1741 if(gau1_is_vert_part[i4_part_type])
1742 {
1743 ps_search_node_tr =
1744 ps_search_results
1745 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1746 ps_search_node_bl = ps_search_node_tl;
1747 }
1748 else
1749 {
1750 ps_search_node_tr = ps_search_node_tl;
1751 ps_search_node_bl =
1752 ps_search_results
1753 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1754 }
1755 ps_search_node_br =
1756 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1757 }
1758 else
1759 {
1760 /* 4 unique results */
1761 ps_search_node_tr =
1762 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1763 ps_search_node_bl =
1764 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
1765 ps_search_node_br =
1766 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
1767 }
1768
1769 if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1770 ps_search_node_tl++;
1771 if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1772 ps_search_node_tr++;
1773 if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1774 ps_search_node_bl++;
1775 if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1776 ps_search_node_br++;
1777
1778 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1779 ps_mv1++;
1780 pi1_ref_idx1++;
1781 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1782 ps_mv2++;
1783 pi1_ref_idx2++;
1784 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1785 ps_mv3++;
1786 pi1_ref_idx3++;
1787 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1788 ps_mv4++;
1789 pi1_ref_idx4++;
1790
1791 if(ps_prms->i4_num_results_to_store > 1)
1792 {
1793 ps_search_node_tl =
1794 &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
1795
1796 if(num_parts == 1)
1797 {
1798 ps_search_node_tr = ps_search_node_tl;
1799 ps_search_node_bl = ps_search_node_tl;
1800 ps_search_node_br = ps_search_node_tl;
1801 }
1802 else if(num_parts == 2)
1803 {
1804 /* For vertically oriented partitions, tl, bl pt to same result */
1805 /* For horizontally oriented partition, tl, tr pt to same result */
1806 /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1807 /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1808 /* and right 2 8x8 have 12x16R partition */
1809 if(gau1_is_vert_part[i4_part_type])
1810 {
1811 ps_search_node_tr =
1812 &ps_search_results
1813 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1814 ps_search_node_bl = ps_search_node_tl;
1815 }
1816 else
1817 {
1818 ps_search_node_tr = ps_search_node_tl;
1819 ps_search_node_bl =
1820 &ps_search_results
1821 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1822 }
1823 ps_search_node_br =
1824 &ps_search_results
1825 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1826 }
1827 else
1828 {
1829 /* 4 unique results */
1830 ps_search_node_tr =
1831 &ps_search_results
1832 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1833 ps_search_node_bl =
1834 &ps_search_results
1835 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
1836 ps_search_node_br =
1837 &ps_search_results
1838 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
1839 }
1840
1841 if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1842 ps_search_node_tl++;
1843 if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1844 ps_search_node_tr++;
1845 if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1846 ps_search_node_bl++;
1847 if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1848 ps_search_node_br++;
1849
1850 COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1851 ps_mv1++;
1852 pi1_ref_idx1++;
1853 COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1854 ps_mv2++;
1855 pi1_ref_idx2++;
1856 COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1857 ps_mv3++;
1858 pi1_ref_idx3++;
1859 COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1860 ps_mv4++;
1861 pi1_ref_idx4++;
1862 }
1863 }
1864 }
1865 }
1866
1867 /**
1868 ********************************************************************************
1869 * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1870 * layer_mv_t *ps_layer_mv,
1871 * S32 i4_search_blk_x,
1872 * S32 i4_search_blk_y,
1873 * mvbank_update_prms_t *ps_prms)
1874 *
1875 * @brief Updates the mv bank in case there is no further encodign to be done
1876 *
1877 * @param[in] ps_search_results: contains results for the block just searched
1878 *
1879 * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things
1880 *
1881 * @param[in] i4_search_blk_x : col num of blk being searched
1882 *
1883 * @param[in] i4_search_blk_y : row num of blk being searched
1884 *
1885 * @param[in] ps_prms : contains certain parameters which govern how updatedone
1886 *
1887 * @return None
1888 ********************************************************************************
1889 */
1890
hme_update_mv_bank_in_l1_me(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1891 void hme_update_mv_bank_in_l1_me(
1892 search_results_t *ps_search_results,
1893 layer_mv_t *ps_layer_mv,
1894 S32 i4_search_blk_x,
1895 S32 i4_search_blk_y,
1896 mvbank_update_prms_t *ps_prms)
1897 {
1898 hme_mv_t *ps_mv;
1899 hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1900 S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1901 S32 i4_blk_x, i4_blk_y, i4_offset;
1902 S32 i4_j, i4_ref_id;
1903 search_node_t *ps_search_node;
1904 search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
1905
1906 i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1907 i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1908 i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1909
1910 i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1911
1912 /* Identify the correct offset in the mvbank and the reference id buf */
1913 ps_mv = ps_layer_mv->ps_mv + i4_offset;
1914 pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1915
1916 /*************************************************************************/
1917 /* Supposing we store the mvs in the same blk size as we searched (e.g. */
1918 /* we searched 8x8 blks and store results for 8x8 blks), then we can */
1919 /* do a straightforward single update of results. This will have a 1-1 */
1920 /* correspondence. */
1921 /*************************************************************************/
1922 if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1923 {
1924 search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
1925
1926 hme_mv_t *ps_mv_l0_root = ps_mv;
1927 hme_mv_t *ps_mv_l1_root =
1928 ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1929
1930 U32 u4_num_l0_results_updated = 0;
1931 U32 u4_num_l1_results_updated = 0;
1932
1933 S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
1934 S08 *pi1_ref_idx_l1_root =
1935 pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1936
1937 for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1938 {
1939 U32 *pu4_num_results_updated;
1940 search_node_t **pps_result_nodes;
1941
1942 U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
1943
1944 if(u1_pred_dir_of_cur_ref)
1945 {
1946 pu4_num_results_updated = &u4_num_l1_results_updated;
1947 pps_result_nodes = &aps_result_nodes_sorted[1][0];
1948 }
1949 else
1950 {
1951 pu4_num_results_updated = &u4_num_l0_results_updated;
1952 pps_result_nodes = &aps_result_nodes_sorted[0][0];
1953 }
1954
1955 ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1956
1957 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1958 {
1959 hme_add_new_node_to_a_sorted_array(
1960 &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
1961
1962 ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
1963 (*pu4_num_results_updated)++;
1964 }
1965 }
1966
1967 for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
1968 {
1969 COPY_SEARCH_RESULT(
1970 &ps_mv_l0_root[i4_j],
1971 &pi1_ref_idx_l0_root[i4_j],
1972 aps_result_nodes_sorted[0][i4_j],
1973 0);
1974 }
1975
1976 for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
1977 {
1978 COPY_SEARCH_RESULT(
1979 &ps_mv_l1_root[i4_j],
1980 &pi1_ref_idx_l1_root[i4_j],
1981 aps_result_nodes_sorted[1][i4_j],
1982 0);
1983 }
1984
1985 return;
1986 }
1987
1988 /*************************************************************************/
1989 /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1990 /* case, we need to have NxN partitions enabled in search. */
1991 /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1992 /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1993 /*************************************************************************/
1994 ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1995 ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1996 ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1997
1998 /*************************************************************************/
1999 /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
2000 /* hence the below check. */
2001 /*************************************************************************/
2002 ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
2003
2004 ps_mv1 = ps_mv;
2005 ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
2006 ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
2007 ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
2008 pi1_ref_idx1 = pi1_ref_idx;
2009 pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
2010 pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
2011 pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
2012
2013 {
2014 /* max ref frames * max results per partition * number of partitions (4x4, 8x8) */
2015 search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2016 U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2017
2018 S32 i;
2019
2020 hme_mv_t *ps_mv1_l0_root = ps_mv1;
2021 hme_mv_t *ps_mv1_l1_root =
2022 ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2023 hme_mv_t *ps_mv2_l0_root = ps_mv2;
2024 hme_mv_t *ps_mv2_l1_root =
2025 ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2026 hme_mv_t *ps_mv3_l0_root = ps_mv3;
2027 hme_mv_t *ps_mv3_l1_root =
2028 ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2029 hme_mv_t *ps_mv4_l0_root = ps_mv4;
2030 hme_mv_t *ps_mv4_l1_root =
2031 ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2032
2033 U32 u4_num_l0_results_updated = 0;
2034 U32 u4_num_l1_results_updated = 0;
2035
2036 S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
2037 S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
2038 ps_layer_mv->i4_num_mvs_per_ref);
2039 S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
2040 S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
2041 ps_layer_mv->i4_num_mvs_per_ref);
2042 S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
2043 S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
2044 ps_layer_mv->i4_num_mvs_per_ref);
2045 S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
2046 S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
2047 ps_layer_mv->i4_num_mvs_per_ref);
2048
2049 for(i = 0; i < 4; i++)
2050 {
2051 hme_mv_t *ps_mv_l0_root;
2052 hme_mv_t *ps_mv_l1_root;
2053
2054 S08 *pi1_ref_idx_l0_root;
2055 S08 *pi1_ref_idx_l1_root;
2056
2057 for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
2058 {
2059 U32 *pu4_num_results_updated;
2060 search_node_t **pps_result_nodes;
2061 U08 *pu1_cost_shifts_for_sorted_node;
2062
2063 U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
2064
2065 if(u1_pred_dir_of_cur_ref)
2066 {
2067 pu4_num_results_updated = &u4_num_l1_results_updated;
2068 pps_result_nodes = &aps_result_nodes_sorted[1][0];
2069 pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2070 }
2071 else
2072 {
2073 pu4_num_results_updated = &u4_num_l0_results_updated;
2074 pps_result_nodes = &aps_result_nodes_sorted[0][0];
2075 pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2076 }
2077
2078 ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
2079
2080 ps_search_node_4x4 =
2081 ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
2082
2083 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
2084 {
2085 hme_add_new_node_to_a_sorted_array(
2086 &ps_search_node_4x4[i4_j],
2087 pps_result_nodes,
2088 pu1_cost_shifts_for_sorted_node,
2089 *pu4_num_results_updated,
2090 0);
2091
2092 (*pu4_num_results_updated)++;
2093
2094 hme_add_new_node_to_a_sorted_array(
2095 &ps_search_node_8x8[i4_j],
2096 pps_result_nodes,
2097 pu1_cost_shifts_for_sorted_node,
2098 *pu4_num_results_updated,
2099 2);
2100
2101 (*pu4_num_results_updated)++;
2102 }
2103 }
2104
2105 switch(i)
2106 {
2107 case 0:
2108 {
2109 ps_mv_l0_root = ps_mv1_l0_root;
2110 ps_mv_l1_root = ps_mv1_l1_root;
2111
2112 pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
2113 pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
2114
2115 break;
2116 }
2117 case 1:
2118 {
2119 ps_mv_l0_root = ps_mv2_l0_root;
2120 ps_mv_l1_root = ps_mv2_l1_root;
2121
2122 pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
2123 pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
2124
2125 break;
2126 }
2127 case 2:
2128 {
2129 ps_mv_l0_root = ps_mv3_l0_root;
2130 ps_mv_l1_root = ps_mv3_l1_root;
2131
2132 pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
2133 pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
2134
2135 break;
2136 }
2137 case 3:
2138 {
2139 ps_mv_l0_root = ps_mv4_l0_root;
2140 ps_mv_l1_root = ps_mv4_l1_root;
2141
2142 pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
2143 pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
2144
2145 break;
2146 }
2147 }
2148
2149 u4_num_l0_results_updated =
2150 MIN((S32)u4_num_l0_results_updated,
2151 ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2152
2153 u4_num_l1_results_updated =
2154 MIN((S32)u4_num_l1_results_updated,
2155 ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
2156
2157 for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
2158 {
2159 COPY_SEARCH_RESULT(
2160 &ps_mv_l0_root[i4_j],
2161 &pi1_ref_idx_l0_root[i4_j],
2162 aps_result_nodes_sorted[0][i4_j],
2163 0);
2164 }
2165
2166 for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
2167 {
2168 COPY_SEARCH_RESULT(
2169 &ps_mv_l1_root[i4_j],
2170 &pi1_ref_idx_l1_root[i4_j],
2171 aps_result_nodes_sorted[1][i4_j],
2172 0);
2173 }
2174 }
2175 }
2176 }
2177
2178 /**
2179 ******************************************************************************
2180 * @brief Scales motion vector component projecte from a diff layer in same
2181 * picture (so no ref id related delta poc scaling required)
2182 ******************************************************************************
2183 */
2184
2185 #define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p) \
2186 ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
2187 /**
2188 ********************************************************************************
2189 * @fn hme_project_coloc_candt(search_node_t *ps_search_node,
2190 * layer_ctxt_t *ps_curr_layer,
2191 * layer_ctxt_t *ps_coarse_layer,
2192 * S32 i4_pos_x,
2193 * S32 i4_pos_y,
2194 * S08 i1_ref_id,
2195 * S08 i1_result_id)
2196 *
2197 * @brief From a coarser layer, projects a candidated situated at "colocated"
2198 * position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
2199 *
2200 * @param[out] ps_search_node : contains the projected result
2201 *
2202 * @param[in] ps_curr_layer : current layer context
2203 *
2204 * @param[in] ps_coarse_layer : coarser layer context
2205 *
2206 * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer)
2207 *
2208 * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer)
2209 *
2210 * @param[in] i1_ref_id : reference id for which the candidate required
2211 *
2212 * @param[in] i4_result_id : result id for which the candidate required
2213 * (0 : best result, 1 : next best)
2214 *
2215 * @return None
2216 ********************************************************************************
2217 */
2218
hme_project_coloc_candt(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2219 void hme_project_coloc_candt(
2220 search_node_t *ps_search_node,
2221 layer_ctxt_t *ps_curr_layer,
2222 layer_ctxt_t *ps_coarse_layer,
2223 S32 i4_pos_x,
2224 S32 i4_pos_y,
2225 S08 i1_ref_id,
2226 S32 i4_result_id)
2227 {
2228 S32 wd_c, ht_c, wd_p, ht_p;
2229 S32 blksize_p, blk_x, blk_y, i4_offset;
2230 layer_mv_t *ps_layer_mvbank;
2231 hme_mv_t *ps_mv;
2232 S08 *pi1_ref_idx;
2233
2234 /* Width and ht of current and prev layers */
2235 wd_c = ps_curr_layer->i4_wd;
2236 ht_c = ps_curr_layer->i4_ht;
2237 wd_p = ps_coarse_layer->i4_wd;
2238 ht_p = ps_coarse_layer->i4_ht;
2239
2240 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2241 blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
2242
2243 /* Safety check to avoid uninitialized access across temporal layers */
2244 i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2245 i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2246
2247 /* Project the positions to prev layer */
2248 /* TODO: convert these to scale factors at pic level */
2249 blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
2250 blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
2251
2252 /* Pick up the mvs from the location */
2253 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2254 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2255
2256 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2257 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2258
2259 ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2260 pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2261
2262 ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
2263 ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
2264 ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2265 ps_search_node->u1_subpel_done = 0;
2266 if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2267 {
2268 ps_search_node->i1_ref_idx = i1_ref_id;
2269 ps_search_node->s_mv.i2_mvx = 0;
2270 ps_search_node->s_mv.i2_mvy = 0;
2271 }
2272 }
2273
2274 /**
2275 ********************************************************************************
2276 * @fn hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
2277 * layer_ctxt_t *ps_curr_layer,
2278 * layer_ctxt_t *ps_coarse_layer,
2279 * S32 i4_pos_x,
2280 * S32 i4_pos_y,
2281 * S08 i1_ref_id,
2282 * S08 i1_result_id)
2283 *
2284 * @brief From a coarser layer, projects a candidated situated at "colocated"
2285 * position in the picture when the ratios are dyadic
2286 *
2287 * @param[out] ps_search_node : contains the projected result
2288 *
2289 * @param[in] ps_curr_layer : current layer context
2290 *
2291 * @param[in] ps_coarse_layer : coarser layer context
2292 *
2293 * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer)
2294 *
2295 * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer)
2296 *
2297 * @param[in] i1_ref_id : reference id for which the candidate required
2298 *
2299 * @param[in] i4_result_id : result id for which the candidate required
2300 * (0 : best result, 1 : next best)
2301 *
2302 * @return None
2303 ********************************************************************************
2304 */
2305
hme_project_coloc_candt_dyadic(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2306 void hme_project_coloc_candt_dyadic(
2307 search_node_t *ps_search_node,
2308 layer_ctxt_t *ps_curr_layer,
2309 layer_ctxt_t *ps_coarse_layer,
2310 S32 i4_pos_x,
2311 S32 i4_pos_y,
2312 S08 i1_ref_id,
2313 S32 i4_result_id)
2314 {
2315 S32 wd_c, ht_c, wd_p, ht_p;
2316 S32 blksize_p, blk_x, blk_y, i4_offset;
2317 layer_mv_t *ps_layer_mvbank;
2318 hme_mv_t *ps_mv;
2319 S08 *pi1_ref_idx;
2320
2321 /* Width and ht of current and prev layers */
2322 wd_c = ps_curr_layer->i4_wd;
2323 ht_c = ps_curr_layer->i4_ht;
2324 wd_p = ps_coarse_layer->i4_wd;
2325 ht_p = ps_coarse_layer->i4_ht;
2326
2327 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2328 /* blksize_p = log2(wd) + 1 */
2329 blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2330
2331 /* ASSERT for valid sizes */
2332 ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2333
2334 /* Safety check to avoid uninitialized access across temporal layers */
2335 i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2336 i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2337
2338 /* Project the positions to prev layer */
2339 /* TODO: convert these to scale factors at pic level */
2340 blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p);
2341 blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p);
2342
2343 /* Pick up the mvs from the location */
2344 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2345 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2346
2347 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2348 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2349
2350 ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2351 pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2352
2353 ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2354 ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2355 ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2356 if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2357 {
2358 ps_search_node->i1_ref_idx = i1_ref_id;
2359 ps_search_node->s_mv.i2_mvx = 0;
2360 ps_search_node->s_mv.i2_mvy = 0;
2361 }
2362 }
2363
hme_project_coloc_candt_dyadic_implicit(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S32 i4_num_act_ref_l0,U08 u1_pred_dir,U08 u1_default_ref_id,S32 i4_result_id)2364 void hme_project_coloc_candt_dyadic_implicit(
2365 search_node_t *ps_search_node,
2366 layer_ctxt_t *ps_curr_layer,
2367 layer_ctxt_t *ps_coarse_layer,
2368 S32 i4_pos_x,
2369 S32 i4_pos_y,
2370 S32 i4_num_act_ref_l0,
2371 U08 u1_pred_dir,
2372 U08 u1_default_ref_id,
2373 S32 i4_result_id)
2374 {
2375 S32 wd_c, ht_c, wd_p, ht_p;
2376 S32 blksize_p, blk_x, blk_y, i4_offset;
2377 layer_mv_t *ps_layer_mvbank;
2378 hme_mv_t *ps_mv;
2379 S08 *pi1_ref_idx;
2380
2381 /* Width and ht of current and prev layers */
2382 wd_c = ps_curr_layer->i4_wd;
2383 ht_c = ps_curr_layer->i4_ht;
2384 wd_p = ps_coarse_layer->i4_wd;
2385 ht_p = ps_coarse_layer->i4_ht;
2386
2387 ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2388 blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2389
2390 /* ASSERT for valid sizes */
2391 ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2392
2393 /* Safety check to avoid uninitialized access across temporal layers */
2394 i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2395 i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2396 /* Project the positions to prev layer */
2397 /* TODO: convert these to scale factors at pic level */
2398 blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p);
2399 blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p);
2400
2401 /* Pick up the mvs from the location */
2402 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2403 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2404
2405 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2406 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2407
2408 if(u1_pred_dir == 1)
2409 {
2410 ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2411 pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2412 }
2413
2414 ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2415 ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2416 ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2417 if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2418 {
2419 ps_search_node->i1_ref_idx = u1_default_ref_id;
2420 ps_search_node->s_mv.i2_mvx = 0;
2421 ps_search_node->s_mv.i2_mvy = 0;
2422 }
2423 }
2424
2425 #define SCALE_RANGE_PRMS(prm1, prm2, shift) \
2426 { \
2427 prm1.i2_min_x = prm2.i2_min_x << shift; \
2428 prm1.i2_max_x = prm2.i2_max_x << shift; \
2429 prm1.i2_min_y = prm2.i2_min_y << shift; \
2430 prm1.i2_max_y = prm2.i2_max_y << shift; \
2431 }
2432
2433 #define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift) \
2434 { \
2435 prm1->i2_min_x = prm2->i2_min_x << shift; \
2436 prm1->i2_max_x = prm2->i2_max_x << shift; \
2437 prm1->i2_min_y = prm2->i2_min_y << shift; \
2438 prm1->i2_max_y = prm2->i2_max_y << shift; \
2439 }
2440
2441 /**
2442 ********************************************************************************
2443 * @fn void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
2444 * refine_layer_prms_t *ps_refine_prms)
2445 *
2446 * @brief Frame init of refinemnet layers in ME
2447 *
2448 * @param[in,out] ps_ctxt: ME Handle
2449 *
2450 * @param[in] ps_refine_prms : refinement layer prms
2451 *
2452 * @return None
2453 ********************************************************************************
2454 */
hme_refine_frm_init(layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,layer_ctxt_t * ps_coarse_layer)2455 void hme_refine_frm_init(
2456 layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
2457 {
2458 /* local variables */
2459 BLK_SIZE_T e_result_blk_size = BLK_8x8;
2460 S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
2461
2462 i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
2463
2464 if(ps_refine_prms->explicit_ref)
2465 {
2466 i4_num_ref_fpel = i4_num_ref_prev_layer;
2467 }
2468 else
2469 {
2470 i4_num_ref_fpel = 2;
2471 }
2472
2473 if(ps_refine_prms->i4_enable_4x4_part)
2474 {
2475 e_result_blk_size = BLK_4x4;
2476 }
2477
2478 i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
2479
2480 hme_init_mv_bank(
2481 ps_curr_layer,
2482 e_result_blk_size,
2483 i4_num_ref_fpel,
2484 ps_refine_prms->i4_num_mvbank_results,
2485 ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
2486 }
2487
2488 #if 1 //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
2489 /**
2490 ********************************************************************************
2491 * @fn void hme_init_clusters_16x16
2492 * (
2493 * cluster_16x16_blk_t *ps_cluster_blk_16x16
2494 * )
2495 *
2496 * @brief Intialisations for the structs used in clustering algorithm
2497 *
2498 * @param[in/out] ps_cluster_blk_16x16: pointer to structure containing clusters
2499 * of 16x16 block
2500 *
2501 * @return None
2502 ********************************************************************************
2503 */
2504 static __inline void
hme_init_clusters_16x16(cluster_16x16_blk_t * ps_cluster_blk_16x16,S32 bidir_enabled)2505 hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
2506 {
2507 S32 i;
2508
2509 ps_cluster_blk_16x16->num_clusters = 0;
2510 ps_cluster_blk_16x16->intra_mv_area = 0;
2511 ps_cluster_blk_16x16->best_inter_cost = 0;
2512
2513 for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
2514 {
2515 ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
2516 bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
2517
2518 ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
2519
2520 ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
2521 ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
2522 }
2523 for(i = 0; i < MAX_NUM_REF; i++)
2524 {
2525 ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
2526 }
2527 }
2528
2529 /**
2530 ********************************************************************************
2531 * @fn void hme_init_clusters_32x32
2532 * (
2533 * cluster_32x32_blk_t *ps_cluster_blk_32x32
2534 * )
2535 *
2536 * @brief Intialisations for the structs used in clustering algorithm
2537 *
2538 * @param[in/out] ps_cluster_blk_32x32: pointer to structure containing clusters
2539 * of 32x32 block
2540 *
2541 * @return None
2542 ********************************************************************************
2543 */
2544 static __inline void
hme_init_clusters_32x32(cluster_32x32_blk_t * ps_cluster_blk_32x32,S32 bidir_enabled)2545 hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
2546 {
2547 S32 i;
2548
2549 ps_cluster_blk_32x32->num_clusters = 0;
2550 ps_cluster_blk_32x32->intra_mv_area = 0;
2551 ps_cluster_blk_32x32->best_alt_ref = -1;
2552 ps_cluster_blk_32x32->best_uni_ref = -1;
2553 ps_cluster_blk_32x32->best_inter_cost = 0;
2554 ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
2555
2556 for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
2557 {
2558 ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
2559 bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
2560 ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
2561
2562 ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
2563 ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
2564 }
2565 for(i = 0; i < MAX_NUM_REF; i++)
2566 {
2567 ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
2568 }
2569 }
2570
2571 /**
2572 ********************************************************************************
2573 * @fn void hme_init_clusters_64x64
2574 * (
2575 * cluster_64x64_blk_t *ps_cluster_blk_64x64
2576 * )
2577 *
2578 * @brief Intialisations for the structs used in clustering algorithm
2579 *
2580 * @param[in/out] ps_cluster_blk_64x64: pointer to structure containing clusters
2581 * of 64x64 block
2582 *
2583 * @return None
2584 ********************************************************************************
2585 */
2586 static __inline void
hme_init_clusters_64x64(cluster_64x64_blk_t * ps_cluster_blk_64x64,S32 bidir_enabled)2587 hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
2588 {
2589 S32 i;
2590
2591 ps_cluster_blk_64x64->num_clusters = 0;
2592 ps_cluster_blk_64x64->intra_mv_area = 0;
2593 ps_cluster_blk_64x64->best_alt_ref = -1;
2594 ps_cluster_blk_64x64->best_uni_ref = -1;
2595 ps_cluster_blk_64x64->best_inter_cost = 0;
2596
2597 for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
2598 {
2599 ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
2600 bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
2601 ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
2602
2603 ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
2604 ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
2605 }
2606 for(i = 0; i < MAX_NUM_REF; i++)
2607 {
2608 ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
2609 }
2610 }
2611
2612 /**
2613 ********************************************************************************
2614 * @fn void hme_sort_and_assign_top_ref_ids_areawise
2615 * (
2616 * ctb_cluster_info_t *ps_ctb_cluster_info
2617 * )
2618 *
2619 * @brief Finds best_uni_ref and best_alt_ref
2620 *
2621 * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
2622 *
2623 * @param[in] bidir_enabled: flag that indicates whether or not bi-pred is
2624 * enabled
2625 *
2626 * @param[in] block_width: width of the block in pels
2627 *
2628 * @param[in] e_cu_pos: position of the block within the CTB
2629 *
2630 * @return None
2631 ********************************************************************************
2632 */
hme_sort_and_assign_top_ref_ids_areawise(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width,CU_POS_T e_cu_pos)2633 void hme_sort_and_assign_top_ref_ids_areawise(
2634 ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
2635 {
2636 cluster_32x32_blk_t *ps_32x32 = NULL;
2637 cluster_64x64_blk_t *ps_64x64 = NULL;
2638 cluster_data_t *ps_data;
2639
2640 S32 j, k;
2641
2642 S32 ai4_uni_area[MAX_NUM_REF];
2643 S32 ai4_bi_area[MAX_NUM_REF];
2644 S32 ai4_ref_id_found[MAX_NUM_REF];
2645 S32 ai4_ref_id[MAX_NUM_REF];
2646
2647 S32 best_uni_ref = -1, best_alt_ref = -1;
2648 S32 num_clusters;
2649 S32 num_ref = 0;
2650 S32 num_clusters_evaluated = 0;
2651 S32 is_cur_blk_valid;
2652
2653 if(32 == block_width)
2654 {
2655 is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
2656 ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
2657 num_clusters = ps_32x32->num_clusters;
2658 ps_data = &ps_32x32->as_cluster_data[0];
2659 }
2660 else
2661 {
2662 is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
2663 ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
2664 num_clusters = ps_64x64->num_clusters;
2665 ps_data = &ps_64x64->as_cluster_data[0];
2666 }
2667
2668 #if !ENABLE_4CTB_EVALUATION
2669 if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
2670 {
2671 return;
2672 }
2673 #endif
2674 if(num_clusters == 0)
2675 {
2676 return;
2677 }
2678 else if(!is_cur_blk_valid)
2679 {
2680 return;
2681 }
2682
2683 memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
2684 memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
2685 memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
2686 memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
2687
2688 for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
2689 {
2690 S32 ref_id;
2691
2692 if(!ps_data->is_valid_cluster)
2693 {
2694 continue;
2695 }
2696
2697 ref_id = ps_data->ref_id;
2698
2699 num_clusters_evaluated++;
2700
2701 ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
2702 ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
2703
2704 if(!ai4_ref_id_found[ref_id])
2705 {
2706 ai4_ref_id[ref_id] = ref_id;
2707 ai4_ref_id_found[ref_id] = 1;
2708 num_ref++;
2709 }
2710 }
2711
2712 {
2713 S32 ai4_ref_id_temp[MAX_NUM_REF];
2714
2715 memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
2716
2717 for(k = 1; k < MAX_NUM_REF; k++)
2718 {
2719 if(ai4_uni_area[k] > ai4_uni_area[0])
2720 {
2721 SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
2722 SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
2723 }
2724 }
2725
2726 best_uni_ref = ai4_ref_id_temp[0];
2727 }
2728
2729 if(bidir_enabled)
2730 {
2731 for(k = 1; k < MAX_NUM_REF; k++)
2732 {
2733 if(ai4_bi_area[k] > ai4_bi_area[0])
2734 {
2735 SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
2736 SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
2737 }
2738 }
2739
2740 if(!ai4_bi_area[0])
2741 {
2742 best_alt_ref = -1;
2743
2744 if(32 == block_width)
2745 {
2746 SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2747 }
2748 else
2749 {
2750 SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2751 }
2752
2753 return;
2754 }
2755
2756 if(best_uni_ref == ai4_ref_id[0])
2757 {
2758 for(k = 2; k < MAX_NUM_REF; k++)
2759 {
2760 if(ai4_bi_area[k] > ai4_bi_area[1])
2761 {
2762 SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
2763 SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
2764 }
2765 }
2766
2767 best_alt_ref = ai4_ref_id[1];
2768 }
2769 else
2770 {
2771 best_alt_ref = ai4_ref_id[0];
2772 }
2773 }
2774
2775 if(32 == block_width)
2776 {
2777 SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2778 }
2779 else
2780 {
2781 SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2782 }
2783 }
2784
2785 /**
2786 ********************************************************************************
2787 * @fn void hme_find_top_ref_ids
2788 * (
2789 * ctb_cluster_info_t *ps_ctb_cluster_info
2790 * )
2791 *
2792 * @brief Finds best_uni_ref and best_alt_ref
2793 *
2794 * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
2795 *
2796 * @return None
2797 ********************************************************************************
2798 */
hme_find_top_ref_ids(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width)2799 void hme_find_top_ref_ids(
2800 ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
2801 {
2802 S32 i;
2803
2804 if(32 == block_width)
2805 {
2806 for(i = 0; i < 4; i++)
2807 {
2808 hme_sort_and_assign_top_ref_ids_areawise(
2809 ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
2810 }
2811 }
2812 else if(64 == block_width)
2813 {
2814 hme_sort_and_assign_top_ref_ids_areawise(
2815 ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
2816 }
2817 }
2818
2819 /**
2820 ********************************************************************************
2821 * @fn void hme_boot_out_outlier
2822 * (
2823 * ctb_cluster_info_t *ps_ctb_cluster_info
2824 * )
2825 *
2826 * @brief Removes outlier clusters before CU tree population
2827 *
2828 * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
2829 *
2830 * @return None
2831 ********************************************************************************
2832 */
hme_boot_out_outlier(ctb_cluster_info_t * ps_ctb_cluster_info,S32 blk_width)2833 void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
2834 {
2835 cluster_32x32_blk_t *ps_32x32;
2836
2837 S32 i;
2838
2839 cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
2840
2841 S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
2842
2843 if(32 == blk_width)
2844 {
2845 /* 32x32 clusters */
2846 for(i = 0; i < 4; i++)
2847 {
2848 ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
2849
2850 if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2851 {
2852 BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
2853 }
2854 }
2855 }
2856 else if(64 == blk_width)
2857 {
2858 /* 64x64 clusters */
2859 if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2860 {
2861 BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
2862 }
2863 }
2864 }
2865
2866 /**
2867 ********************************************************************************
2868 * @fn void hme_update_cluster_attributes
2869 * (
2870 * cluster_data_t *ps_cluster_data,
2871 * S32 mvx,
2872 * S32 mvy,
2873 * PART_ID_T e_part_id
2874 * )
2875 *
2876 * @brief Implementation fo the clustering algorithm
2877 *
2878 * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
2879 *
2880 * @param[in] mvx : x co-ordinate of the motion vector
2881 *
2882 * @param[in] mvy : y co-ordinate of the motion vector
2883 *
2884 * @param[in] ref_idx : ref_id of the motion vector
2885 *
2886 * @param[in] e_part_id : partition id of the motion vector
2887 *
2888 * @return None
2889 ********************************************************************************
2890 */
hme_update_cluster_attributes(cluster_data_t * ps_cluster_data,S32 mvx,S32 mvy,S32 mvdx,S32 mvdy,S32 ref_id,S32 sdi,U08 is_part_of_bi,PART_ID_T e_part_id)2891 static __inline void hme_update_cluster_attributes(
2892 cluster_data_t *ps_cluster_data,
2893 S32 mvx,
2894 S32 mvy,
2895 S32 mvdx,
2896 S32 mvdy,
2897 S32 ref_id,
2898 S32 sdi,
2899 U08 is_part_of_bi,
2900 PART_ID_T e_part_id)
2901 {
2902 LWORD64 i8_mvx_sum_q8;
2903 LWORD64 i8_mvy_sum_q8;
2904
2905 S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
2906 S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
2907
2908 if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
2909 {
2910 ps_cluster_data->min_x = mvx;
2911 }
2912 else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
2913 {
2914 ps_cluster_data->max_x = mvx;
2915 }
2916
2917 if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
2918 {
2919 ps_cluster_data->min_y = mvy;
2920 }
2921 else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
2922 {
2923 ps_cluster_data->max_y = mvy;
2924 }
2925
2926 {
2927 S32 num_mvs = ps_cluster_data->num_mvs;
2928
2929 ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
2930 ps_cluster_data->as_mv[num_mvs].mvx = mvx;
2931 ps_cluster_data->as_mv[num_mvs].mvy = mvy;
2932
2933 /***************************/
2934 ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
2935 ps_cluster_data->as_mv[num_mvs].sdi = sdi;
2936 /**************************/
2937 }
2938
2939 /* Updation of centroid */
2940 {
2941 i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
2942 i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
2943
2944 ps_cluster_data->num_mvs++;
2945
2946 ps_cluster_data->s_centroid.i4_pos_x_q8 =
2947 (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
2948 ps_cluster_data->s_centroid.i4_pos_y_q8 =
2949 (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
2950 }
2951
2952 ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
2953
2954 if(is_part_of_bi)
2955 {
2956 ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
2957 }
2958 else
2959 {
2960 ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
2961 }
2962 }
2963
2964 /**
2965 ********************************************************************************
2966 * @fn void hme_try_cluster_merge
2967 * (
2968 * cluster_data_t *ps_cluster_data,
2969 * S32 *pi4_num_clusters,
2970 * S32 idx_of_updated_cluster
2971 * )
2972 *
2973 * @brief Implementation fo the clustering algorithm
2974 *
2975 * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
2976 *
2977 * @param[in/out] pi4_num_clusters : pointer to number of clusters
2978 *
2979 * @param[in] idx_of_updated_cluster : index of the cluster most recently
2980 * updated
2981 *
2982 * @return Nothing
2983 ********************************************************************************
2984 */
hme_try_cluster_merge(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S32 idx_of_updated_cluster)2985 void hme_try_cluster_merge(
2986 cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
2987 {
2988 centroid_t *ps_centroid;
2989
2990 S32 cur_pos_x_q8;
2991 S32 cur_pos_y_q8;
2992 S32 i;
2993 S32 max_dist_from_centroid;
2994 S32 mvd;
2995 S32 mvdx_q8;
2996 S32 mvdx;
2997 S32 mvdy_q8;
2998 S32 mvdy;
2999 S32 num_clusters, num_clusters_evaluated;
3000 S32 other_pos_x_q8;
3001 S32 other_pos_y_q8;
3002
3003 cluster_data_t *ps_root = ps_cluster_data;
3004 cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
3005 centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
3006
3007 /* Merge is superfluous if num_clusters is 1 */
3008 if(*pu1_num_clusters == 1)
3009 {
3010 return;
3011 }
3012
3013 cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
3014 cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
3015
3016 max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
3017
3018 num_clusters = *pu1_num_clusters;
3019 num_clusters_evaluated = 0;
3020
3021 for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
3022 {
3023 if(!ps_cluster_data->is_valid_cluster)
3024 {
3025 continue;
3026 }
3027 if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
3028 {
3029 num_clusters_evaluated++;
3030 continue;
3031 }
3032
3033 ps_centroid = &ps_cluster_data->s_centroid;
3034
3035 other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
3036 other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
3037
3038 mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
3039 mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
3040 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3041 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3042
3043 mvd = ABS(mvdx) + ABS(mvdy);
3044
3045 if(mvd <= (max_dist_from_centroid >> 1))
3046 {
3047 /* 0 => no updates */
3048 /* 1 => min updated */
3049 /* 2 => max updated */
3050 S32 minmax_x_update_id;
3051 S32 minmax_y_update_id;
3052
3053 LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
3054 LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
3055 LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
3056 LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
3057
3058 (*pu1_num_clusters)--;
3059
3060 ps_cluster_data->is_valid_cluster = 0;
3061
3062 memcpy(
3063 &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
3064 ps_cluster_data->as_mv,
3065 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3066
3067 ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
3068 ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
3069 ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3070 ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3071 i8_mv_x_sum_self += i8_mv_x_sum_cousin;
3072 i8_mv_y_sum_self += i8_mv_y_sum_cousin;
3073
3074 ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
3075 ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
3076
3077 minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
3078 ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
3079 : 1;
3080 minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
3081 ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
3082 : 1;
3083
3084 /* Updation of centroid spread */
3085 switch(minmax_x_update_id + (minmax_y_update_id << 2))
3086 {
3087 case 1:
3088 {
3089 S32 mvd, mvd_q8;
3090
3091 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3092
3093 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3094 mvd = (mvd_q8 + (1 << 7)) >> 8;
3095
3096 if(mvd > (max_dist_from_centroid))
3097 {
3098 ps_cluster_data->max_dist_from_centroid = mvd;
3099 }
3100 break;
3101 }
3102 case 2:
3103 {
3104 S32 mvd, mvd_q8;
3105
3106 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3107
3108 mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3109 mvd = (mvd_q8 + (1 << 7)) >> 8;
3110
3111 if(mvd > (max_dist_from_centroid))
3112 {
3113 ps_cluster_data->max_dist_from_centroid = mvd;
3114 }
3115 break;
3116 }
3117 case 4:
3118 {
3119 S32 mvd, mvd_q8;
3120
3121 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3122
3123 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3124 mvd = (mvd_q8 + (1 << 7)) >> 8;
3125
3126 if(mvd > (max_dist_from_centroid))
3127 {
3128 ps_cluster_data->max_dist_from_centroid = mvd;
3129 }
3130 break;
3131 }
3132 case 5:
3133 {
3134 S32 mvd;
3135 S32 mvdx, mvdx_q8;
3136 S32 mvdy, mvdy_q8;
3137
3138 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3139 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3140
3141 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3142 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3143
3144 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3145
3146 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3147 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3148
3149 if(mvd > max_dist_from_centroid)
3150 {
3151 ps_cluster_data->max_dist_from_centroid = mvd;
3152 }
3153 break;
3154 }
3155 case 6:
3156 {
3157 S32 mvd;
3158 S32 mvdx, mvdx_q8;
3159 S32 mvdy, mvdy_q8;
3160
3161 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3162 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3163
3164 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3165 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3166
3167 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3168
3169 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3170 ps_cur_cluster->min_y = ps_cluster_data->min_y;
3171
3172 if(mvd > max_dist_from_centroid)
3173 {
3174 ps_cluster_data->max_dist_from_centroid = mvd;
3175 }
3176 break;
3177 }
3178 case 8:
3179 {
3180 S32 mvd, mvd_q8;
3181
3182 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3183
3184 mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3185 mvd = (mvd_q8 + (1 << 7)) >> 8;
3186
3187 if(mvd > (max_dist_from_centroid))
3188 {
3189 ps_cluster_data->max_dist_from_centroid = mvd;
3190 }
3191 break;
3192 }
3193 case 9:
3194 {
3195 S32 mvd;
3196 S32 mvdx, mvdx_q8;
3197 S32 mvdy, mvdy_q8;
3198
3199 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3200 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3201
3202 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3203 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3204
3205 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3206
3207 ps_cur_cluster->min_x = ps_cluster_data->min_x;
3208 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3209
3210 if(mvd > max_dist_from_centroid)
3211 {
3212 ps_cluster_data->max_dist_from_centroid = mvd;
3213 }
3214 break;
3215 }
3216 case 10:
3217 {
3218 S32 mvd;
3219 S32 mvdx, mvdx_q8;
3220 S32 mvdy, mvdy_q8;
3221
3222 mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3223 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3224
3225 mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3226 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3227
3228 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3229
3230 ps_cur_cluster->max_x = ps_cluster_data->max_x;
3231 ps_cur_cluster->max_y = ps_cluster_data->max_y;
3232
3233 if(mvd > ps_cluster_data->max_dist_from_centroid)
3234 {
3235 ps_cluster_data->max_dist_from_centroid = mvd;
3236 }
3237 break;
3238 }
3239 default:
3240 {
3241 break;
3242 }
3243 }
3244
3245 hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
3246
3247 return;
3248 }
3249
3250 num_clusters_evaluated++;
3251 }
3252 }
3253
3254 /**
3255 ********************************************************************************
3256 * @fn void hme_find_and_update_clusters
3257 * (
3258 * cluster_data_t *ps_cluster_data,
3259 * S32 *pi4_num_clusters,
3260 * S32 mvx,
3261 * S32 mvy,
3262 * S32 ref_idx,
3263 * PART_ID_T e_part_id
3264 * )
3265 *
3266 * @brief Implementation fo the clustering algorithm
3267 *
3268 * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
3269 *
3270 * @param[in/out] pi4_num_clusters : pointer to number of clusters
3271 *
3272 * @param[in] mvx : x co-ordinate of the motion vector
3273 *
3274 * @param[in] mvy : y co-ordinate of the motion vector
3275 *
3276 * @param[in] ref_idx : ref_id of the motion vector
3277 *
3278 * @param[in] e_part_id : partition id of the motion vector
3279 *
3280 * @return None
3281 ********************************************************************************
3282 */
hme_find_and_update_clusters(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S16 i2_mv_x,S16 i2_mv_y,U08 i1_ref_idx,S32 i4_sdi,PART_ID_T e_part_id,U08 is_part_of_bi)3283 void hme_find_and_update_clusters(
3284 cluster_data_t *ps_cluster_data,
3285 U08 *pu1_num_clusters,
3286 S16 i2_mv_x,
3287 S16 i2_mv_y,
3288 U08 i1_ref_idx,
3289 S32 i4_sdi,
3290 PART_ID_T e_part_id,
3291 U08 is_part_of_bi)
3292 {
3293 S32 i;
3294 S32 min_mvd_cluster_id = -1;
3295 S32 mvd, mvd_limit, mvdx, mvdy;
3296 S32 min_mvdx, min_mvdy;
3297
3298 S32 min_mvd = MAX_32BIT_VAL;
3299 S32 num_clusters = *pu1_num_clusters;
3300
3301 S32 mvx = i2_mv_x;
3302 S32 mvy = i2_mv_y;
3303 S32 ref_idx = i1_ref_idx;
3304 S32 sdi = i4_sdi;
3305 S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
3306
3307 if(num_clusters == 0)
3308 {
3309 cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
3310
3311 ps_data->num_mvs = 1;
3312 ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3313 ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3314 ps_data->ref_id = ref_idx;
3315 ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3316 ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3317 ps_data->as_mv[0].mvx = mvx;
3318 ps_data->as_mv[0].mvy = mvy;
3319
3320 /***************************/
3321 ps_data->as_mv[0].is_uni = !is_part_of_bi;
3322 ps_data->as_mv[0].sdi = sdi;
3323 if(is_part_of_bi)
3324 {
3325 ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3326 }
3327 else
3328 {
3329 ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3330 }
3331 /**************************/
3332 ps_data->max_x = mvx;
3333 ps_data->min_x = mvx;
3334 ps_data->max_y = mvy;
3335 ps_data->min_y = mvy;
3336
3337 ps_data->is_valid_cluster = 1;
3338
3339 *pu1_num_clusters = 1;
3340 }
3341 else
3342 {
3343 S32 num_clusters_evaluated = 0;
3344
3345 for(i = 0; num_clusters_evaluated < num_clusters; i++)
3346 {
3347 cluster_data_t *ps_data = &ps_cluster_data[i];
3348
3349 centroid_t *ps_centroid;
3350
3351 S32 mvx_q8;
3352 S32 mvy_q8;
3353 S32 posx_q8;
3354 S32 posy_q8;
3355 S32 mvdx_q8;
3356 S32 mvdy_q8;
3357
3358 /* In anticipation of a possible merging of clusters */
3359 if(ps_data->is_valid_cluster == 0)
3360 {
3361 new_cluster_idx = i;
3362 continue;
3363 }
3364
3365 if(ref_idx != ps_data->ref_id)
3366 {
3367 num_clusters_evaluated++;
3368 continue;
3369 }
3370
3371 ps_centroid = &ps_data->s_centroid;
3372 posx_q8 = ps_centroid->i4_pos_x_q8;
3373 posy_q8 = ps_centroid->i4_pos_y_q8;
3374
3375 mvx_q8 = mvx << 8;
3376 mvy_q8 = mvy << 8;
3377
3378 mvdx_q8 = posx_q8 - mvx_q8;
3379 mvdy_q8 = posy_q8 - mvy_q8;
3380
3381 mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
3382 mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
3383
3384 mvd = ABS(mvdx) + ABS(mvdy);
3385
3386 if(mvd < min_mvd)
3387 {
3388 min_mvd = mvd;
3389 min_mvdx = mvdx;
3390 min_mvdy = mvdy;
3391 min_mvd_cluster_id = i;
3392 }
3393
3394 num_clusters_evaluated++;
3395 }
3396
3397 mvd_limit = (min_mvd_cluster_id == -1)
3398 ? ps_cluster_data[0].max_dist_from_centroid
3399 : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
3400
3401 /* This condition implies that min_mvd has been updated */
3402 if(min_mvd <= mvd_limit)
3403 {
3404 hme_update_cluster_attributes(
3405 &ps_cluster_data[min_mvd_cluster_id],
3406 mvx,
3407 mvy,
3408 min_mvdx,
3409 min_mvdy,
3410 ref_idx,
3411 sdi,
3412 is_part_of_bi,
3413 e_part_id);
3414
3415 if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
3416 {
3417 hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
3418 }
3419 }
3420 else
3421 {
3422 cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
3423 ? &ps_cluster_data[num_clusters]
3424 : &ps_cluster_data[new_cluster_idx];
3425
3426 ps_data->num_mvs = 1;
3427 ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3428 ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3429 ps_data->ref_id = ref_idx;
3430 ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3431 ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3432 ps_data->as_mv[0].mvx = mvx;
3433 ps_data->as_mv[0].mvy = mvy;
3434
3435 /***************************/
3436 ps_data->as_mv[0].is_uni = !is_part_of_bi;
3437 ps_data->as_mv[0].sdi = sdi;
3438 if(is_part_of_bi)
3439 {
3440 ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3441 }
3442 else
3443 {
3444 ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3445 }
3446 /**************************/
3447 ps_data->max_x = mvx;
3448 ps_data->min_x = mvx;
3449 ps_data->max_y = mvy;
3450 ps_data->min_y = mvy;
3451
3452 ps_data->is_valid_cluster = 1;
3453
3454 num_clusters++;
3455 *pu1_num_clusters = num_clusters;
3456 }
3457 }
3458 }
3459
3460 /**
3461 ********************************************************************************
3462 * @fn void hme_update_32x32_cluster_attributes
3463 * (
3464 * cluster_32x32_blk_t *ps_blk_32x32,
3465 * cluster_data_t *ps_cluster_data
3466 * )
3467 *
3468 * @brief Updates attributes for 32x32 clusters based on the attributes of
3469 * the constituent 16x16 clusters
3470 *
3471 * @param[out] ps_blk_32x32: structure containing 32x32 block results
3472 *
3473 * @param[in] ps_cluster_data : structure containing 16x16 block results
3474 *
3475 * @return None
3476 ********************************************************************************
3477 */
hme_update_32x32_cluster_attributes(cluster_32x32_blk_t * ps_blk_32x32,cluster_data_t * ps_cluster_data)3478 void hme_update_32x32_cluster_attributes(
3479 cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
3480 {
3481 cluster_data_t *ps_cur_cluster_32;
3482
3483 S32 i;
3484 S32 mvd_limit;
3485
3486 S32 num_clusters = ps_blk_32x32->num_clusters;
3487
3488 if(0 == num_clusters)
3489 {
3490 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3491
3492 ps_blk_32x32->num_clusters++;
3493 ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3494
3495 ps_cur_cluster_32->is_valid_cluster = 1;
3496
3497 ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3498 ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3499 ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3500
3501 memcpy(
3502 ps_cur_cluster_32->as_mv,
3503 ps_cluster_data->as_mv,
3504 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3505
3506 ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3507
3508 ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3509
3510 ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3511 ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3512 ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3513 ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3514
3515 ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3516 }
3517 else
3518 {
3519 centroid_t *ps_centroid;
3520
3521 S32 cur_posx_q8, cur_posy_q8;
3522 S32 min_mvd_cluster_id = -1;
3523 S32 mvd;
3524 S32 mvdx;
3525 S32 mvdy;
3526 S32 mvdx_min;
3527 S32 mvdy_min;
3528 S32 mvdx_q8;
3529 S32 mvdy_q8;
3530
3531 S32 num_clusters_evaluated = 0;
3532
3533 S32 mvd_min = MAX_32BIT_VAL;
3534
3535 S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3536 S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3537
3538 for(i = 0; num_clusters_evaluated < num_clusters; i++)
3539 {
3540 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
3541
3542 if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
3543 {
3544 num_clusters_evaluated++;
3545 continue;
3546 }
3547 if(!ps_cluster_data->is_valid_cluster)
3548 {
3549 continue;
3550 }
3551
3552 num_clusters_evaluated++;
3553
3554 ps_centroid = &ps_cur_cluster_32->s_centroid;
3555
3556 cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3557 cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3558
3559 mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3560 mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3561
3562 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3563 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3564
3565 mvd = ABS(mvdx) + ABS(mvdy);
3566
3567 if(mvd < mvd_min)
3568 {
3569 mvd_min = mvd;
3570 mvdx_min = mvdx;
3571 mvdy_min = mvdy;
3572 min_mvd_cluster_id = i;
3573 }
3574 }
3575
3576 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3577
3578 mvd_limit = (min_mvd_cluster_id == -1)
3579 ? ps_cur_cluster_32[0].max_dist_from_centroid
3580 : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
3581
3582 if(mvd_min <= mvd_limit)
3583 {
3584 LWORD64 i8_updated_posx;
3585 LWORD64 i8_updated_posy;
3586 WORD32 minmax_updated_x = 0;
3587 WORD32 minmax_updated_y = 0;
3588
3589 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
3590
3591 ps_centroid = &ps_cur_cluster_32->s_centroid;
3592
3593 ps_cur_cluster_32->is_valid_cluster = 1;
3594
3595 ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
3596 ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3597 ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3598
3599 memcpy(
3600 &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
3601 ps_cluster_data->as_mv,
3602 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3603
3604 if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
3605 {
3606 ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
3607 minmax_updated_x = 1;
3608 }
3609 else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
3610 {
3611 ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3612 minmax_updated_x = 2;
3613 }
3614
3615 if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
3616 {
3617 ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3618 minmax_updated_y = 1;
3619 }
3620 else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
3621 {
3622 ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3623 minmax_updated_y = 2;
3624 }
3625
3626 switch((minmax_updated_y << 2) + minmax_updated_x)
3627 {
3628 case 1:
3629 {
3630 S32 mvd, mvd_q8;
3631
3632 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3633 mvd = (mvd_q8 + (1 << 7)) >> 8;
3634
3635 if(mvd > (mvd_limit))
3636 {
3637 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3638 }
3639 break;
3640 }
3641 case 2:
3642 {
3643 S32 mvd, mvd_q8;
3644
3645 mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3646 mvd = (mvd_q8 + (1 << 7)) >> 8;
3647
3648 if(mvd > (mvd_limit))
3649 {
3650 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3651 }
3652 break;
3653 }
3654 case 4:
3655 {
3656 S32 mvd, mvd_q8;
3657
3658 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3659 mvd = (mvd_q8 + (1 << 7)) >> 8;
3660
3661 if(mvd > (mvd_limit))
3662 {
3663 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3664 }
3665 break;
3666 }
3667 case 5:
3668 {
3669 S32 mvd;
3670 S32 mvdx, mvdx_q8;
3671 S32 mvdy, mvdy_q8;
3672
3673 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3674 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3675
3676 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3677 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3678
3679 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3680
3681 if(mvd > mvd_limit)
3682 {
3683 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3684 }
3685 break;
3686 }
3687 case 6:
3688 {
3689 S32 mvd;
3690 S32 mvdx, mvdx_q8;
3691 S32 mvdy, mvdy_q8;
3692
3693 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3694 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3695
3696 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3697 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3698
3699 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3700
3701 if(mvd > mvd_limit)
3702 {
3703 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3704 }
3705 break;
3706 }
3707 case 8:
3708 {
3709 S32 mvd, mvd_q8;
3710
3711 mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3712 mvd = (mvd_q8 + (1 << 7)) >> 8;
3713
3714 if(mvd > (mvd_limit))
3715 {
3716 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3717 }
3718 break;
3719 }
3720 case 9:
3721 {
3722 S32 mvd;
3723 S32 mvdx, mvdx_q8;
3724 S32 mvdy, mvdy_q8;
3725
3726 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3727 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3728
3729 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3730 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3731
3732 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3733
3734 if(mvd > mvd_limit)
3735 {
3736 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3737 }
3738 break;
3739 }
3740 case 10:
3741 {
3742 S32 mvd;
3743 S32 mvdx, mvdx_q8;
3744 S32 mvdy, mvdy_q8;
3745
3746 mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3747 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3748
3749 mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3750 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3751
3752 mvd = (mvdx > mvdy) ? mvdx : mvdy;
3753
3754 if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
3755 {
3756 ps_cur_cluster_32->max_dist_from_centroid = mvd;
3757 }
3758 break;
3759 }
3760 default:
3761 {
3762 break;
3763 }
3764 }
3765
3766 i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
3767 ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
3768 i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
3769 ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
3770
3771 ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
3772
3773 ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
3774 ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
3775 }
3776 else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
3777 {
3778 ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
3779
3780 ps_blk_32x32->num_clusters++;
3781 ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3782
3783 ps_cur_cluster_32->is_valid_cluster = 1;
3784
3785 ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3786 ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3787 ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3788
3789 memcpy(
3790 ps_cur_cluster_32->as_mv,
3791 ps_cluster_data->as_mv,
3792 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3793
3794 ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3795
3796 ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3797
3798 ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3799 ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3800 ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3801 ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3802
3803 ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3804 }
3805 }
3806 }
3807
3808 /**
3809 ********************************************************************************
3810 * @fn void hme_update_64x64_cluster_attributes
3811 * (
3812 * cluster_64x64_blk_t *ps_blk_32x32,
3813 * cluster_data_t *ps_cluster_data
3814 * )
3815 *
3816 * @brief Updates attributes for 64x64 clusters based on the attributes of
3817 * the constituent 16x16 clusters
3818 *
3819 * @param[out] ps_blk_64x64: structure containing 64x64 block results
3820 *
3821 * @param[in] ps_cluster_data : structure containing 32x32 block results
3822 *
3823 * @return None
3824 ********************************************************************************
3825 */
hme_update_64x64_cluster_attributes(cluster_64x64_blk_t * ps_blk_64x64,cluster_data_t * ps_cluster_data)3826 void hme_update_64x64_cluster_attributes(
3827 cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
3828 {
3829 cluster_data_t *ps_cur_cluster_64;
3830
3831 S32 i;
3832 S32 mvd_limit;
3833
3834 S32 num_clusters = ps_blk_64x64->num_clusters;
3835
3836 if(0 == num_clusters)
3837 {
3838 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
3839
3840 ps_blk_64x64->num_clusters++;
3841 ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
3842
3843 ps_cur_cluster_64->is_valid_cluster = 1;
3844
3845 ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
3846 ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3847 ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3848
3849 memcpy(
3850 ps_cur_cluster_64->as_mv,
3851 ps_cluster_data->as_mv,
3852 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3853
3854 ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
3855
3856 ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
3857
3858 ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
3859 ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
3860 ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
3861 ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
3862
3863 ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
3864 }
3865 else
3866 {
3867 centroid_t *ps_centroid;
3868
3869 S32 cur_posx_q8, cur_posy_q8;
3870 S32 min_mvd_cluster_id = -1;
3871 S32 mvd;
3872 S32 mvdx;
3873 S32 mvdy;
3874 S32 mvdx_min;
3875 S32 mvdy_min;
3876 S32 mvdx_q8;
3877 S32 mvdy_q8;
3878
3879 S32 num_clusters_evaluated = 0;
3880
3881 S32 mvd_min = MAX_32BIT_VAL;
3882
3883 S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3884 S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3885
3886 for(i = 0; num_clusters_evaluated < num_clusters; i++)
3887 {
3888 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
3889
3890 if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
3891 {
3892 num_clusters_evaluated++;
3893 continue;
3894 }
3895
3896 if(!ps_cur_cluster_64->is_valid_cluster)
3897 {
3898 continue;
3899 }
3900
3901 num_clusters_evaluated++;
3902
3903 ps_centroid = &ps_cur_cluster_64->s_centroid;
3904
3905 cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3906 cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3907
3908 mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3909 mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3910
3911 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3912 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3913
3914 mvd = ABS(mvdx) + ABS(mvdy);
3915
3916 if(mvd < mvd_min)
3917 {
3918 mvd_min = mvd;
3919 mvdx_min = mvdx;
3920 mvdy_min = mvdy;
3921 min_mvd_cluster_id = i;
3922 }
3923 }
3924
3925 ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
3926
3927 mvd_limit = (min_mvd_cluster_id == -1)
3928 ? ps_cur_cluster_64[0].max_dist_from_centroid
3929 : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
3930
3931 if(mvd_min <= mvd_limit)
3932 {
3933 LWORD64 i8_updated_posx;
3934 LWORD64 i8_updated_posy;
3935 WORD32 minmax_updated_x = 0;
3936 WORD32 minmax_updated_y = 0;
3937
3938 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
3939
3940 ps_centroid = &ps_cur_cluster_64->s_centroid;
3941
3942 ps_cur_cluster_64->is_valid_cluster = 1;
3943
3944 ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
3945 ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3946 ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3947
3948 memcpy(
3949 &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
3950 ps_cluster_data->as_mv,
3951 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3952
3953 if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
3954 {
3955 ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3956 minmax_updated_x = 1;
3957 }
3958 else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
3959 {
3960 ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3961 minmax_updated_x = 2;
3962 }
3963
3964 if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
3965 {
3966 ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3967 minmax_updated_y = 1;
3968 }
3969 else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
3970 {
3971 ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3972 minmax_updated_y = 2;
3973 }
3974
3975 switch((minmax_updated_y << 2) + minmax_updated_x)
3976 {
3977 case 1:
3978 {
3979 S32 mvd, mvd_q8;
3980
3981 mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
3982 mvd = (mvd_q8 + (1 << 7)) >> 8;
3983
3984 if(mvd > (mvd_limit))
3985 {
3986 ps_cur_cluster_64->max_dist_from_centroid = mvd;
3987 }
3988 break;
3989 }
3990 case 2:
3991 {
3992 S32 mvd, mvd_q8;
3993
3994 mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
3995 mvd = (mvd_q8 + (1 << 7)) >> 8;
3996
3997 if(mvd > (mvd_limit))
3998 {
3999 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4000 }
4001 break;
4002 }
4003 case 4:
4004 {
4005 S32 mvd, mvd_q8;
4006
4007 mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4008 mvd = (mvd_q8 + (1 << 7)) >> 8;
4009
4010 if(mvd > (mvd_limit))
4011 {
4012 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4013 }
4014 break;
4015 }
4016 case 5:
4017 {
4018 S32 mvd;
4019 S32 mvdx, mvdx_q8;
4020 S32 mvdy, mvdy_q8;
4021
4022 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4023 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4024
4025 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4026 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4027
4028 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4029
4030 if(mvd > mvd_limit)
4031 {
4032 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4033 }
4034 break;
4035 }
4036 case 6:
4037 {
4038 S32 mvd;
4039 S32 mvdx, mvdx_q8;
4040 S32 mvdy, mvdy_q8;
4041
4042 mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4043 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4044
4045 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4046 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4047
4048 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4049
4050 if(mvd > mvd_limit)
4051 {
4052 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4053 }
4054 break;
4055 }
4056 case 8:
4057 {
4058 S32 mvd, mvd_q8;
4059
4060 mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4061 mvd = (mvd_q8 + (1 << 7)) >> 8;
4062
4063 if(mvd > (mvd_limit))
4064 {
4065 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4066 }
4067 break;
4068 }
4069 case 9:
4070 {
4071 S32 mvd;
4072 S32 mvdx, mvdx_q8;
4073 S32 mvdy, mvdy_q8;
4074
4075 mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4076 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4077
4078 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4079 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4080
4081 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4082
4083 if(mvd > mvd_limit)
4084 {
4085 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4086 }
4087 break;
4088 }
4089 case 10:
4090 {
4091 S32 mvd;
4092 S32 mvdx, mvdx_q8;
4093 S32 mvdy, mvdy_q8;
4094
4095 mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4096 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4097
4098 mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4099 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4100
4101 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4102
4103 if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
4104 {
4105 ps_cur_cluster_64->max_dist_from_centroid = mvd;
4106 }
4107 break;
4108 }
4109 default:
4110 {
4111 break;
4112 }
4113 }
4114
4115 i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
4116 ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
4117 i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
4118 ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
4119
4120 ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
4121
4122 ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
4123 ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
4124 }
4125 else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
4126 {
4127 ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
4128
4129 ps_blk_64x64->num_clusters++;
4130 ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
4131
4132 ps_cur_cluster_64->is_valid_cluster = 1;
4133
4134 ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
4135 ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
4136 ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
4137
4138 memcpy(
4139 &ps_cur_cluster_64->as_mv[0],
4140 ps_cluster_data->as_mv,
4141 sizeof(mv_data_t) * ps_cluster_data->num_mvs);
4142
4143 ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
4144
4145 ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
4146
4147 ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
4148 ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
4149 ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
4150 ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
4151
4152 ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
4153 }
4154 }
4155 }
4156
4157 /**
4158 ********************************************************************************
4159 * @fn void hme_update_32x32_clusters
4160 * (
4161 * cluster_32x32_blk_t *ps_blk_32x32,
4162 * cluster_16x16_blk_t *ps_blk_16x16
4163 * )
4164 *
4165 * @brief Updates attributes for 32x32 clusters based on the attributes of
4166 * the constituent 16x16 clusters
4167 *
4168 * @param[out] ps_blk_32x32: structure containing 32x32 block results
4169 *
4170 * @param[in] ps_blk_16x16 : structure containing 16x16 block results
4171 *
4172 * @return None
4173 ********************************************************************************
4174 */
4175 static __inline void
hme_update_32x32_clusters(cluster_32x32_blk_t * ps_blk_32x32,cluster_16x16_blk_t * ps_blk_16x16)4176 hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
4177 {
4178 cluster_16x16_blk_t *ps_blk_16x16_cur;
4179 cluster_data_t *ps_cur_cluster;
4180
4181 S32 i, j;
4182 S32 num_clusters_cur_16x16_blk;
4183
4184 for(i = 0; i < 4; i++)
4185 {
4186 S32 num_clusters_evaluated = 0;
4187
4188 ps_blk_16x16_cur = &ps_blk_16x16[i];
4189
4190 num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
4191
4192 ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
4193
4194 ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
4195
4196 for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
4197 {
4198 ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
4199
4200 if(!ps_cur_cluster->is_valid_cluster)
4201 {
4202 continue;
4203 }
4204
4205 hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
4206
4207 num_clusters_evaluated++;
4208 }
4209 }
4210 }
4211
4212 /**
4213 ********************************************************************************
4214 * @fn void hme_update_64x64_clusters
4215 * (
4216 * cluster_64x64_blk_t *ps_blk_64x64,
4217 * cluster_32x32_blk_t *ps_blk_32x32
4218 * )
4219 *
4220 * @brief Updates attributes for 64x64 clusters based on the attributes of
4221 * the constituent 16x16 clusters
4222 *
4223 * @param[out] ps_blk_64x64: structure containing 32x32 block results
4224 *
4225 * @param[in] ps_blk_32x32 : structure containing 16x16 block results
4226 *
4227 * @return None
4228 ********************************************************************************
4229 */
4230 static __inline void
hme_update_64x64_clusters(cluster_64x64_blk_t * ps_blk_64x64,cluster_32x32_blk_t * ps_blk_32x32)4231 hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
4232 {
4233 cluster_32x32_blk_t *ps_blk_32x32_cur;
4234 cluster_data_t *ps_cur_cluster;
4235
4236 S32 i, j;
4237 S32 num_clusters_cur_32x32_blk;
4238
4239 for(i = 0; i < 4; i++)
4240 {
4241 S32 num_clusters_evaluated = 0;
4242
4243 ps_blk_32x32_cur = &ps_blk_32x32[i];
4244
4245 num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
4246
4247 ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
4248 ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
4249
4250 for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
4251 {
4252 ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
4253
4254 if(!ps_cur_cluster->is_valid_cluster)
4255 {
4256 continue;
4257 }
4258
4259 hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
4260
4261 num_clusters_evaluated++;
4262 }
4263 }
4264 }
4265
4266 /**
4267 ********************************************************************************
4268 * @fn void hme_try_merge_clusters_blksize_gt_16
4269 * (
4270 * cluster_data_t *ps_cluster_data,
4271 * S32 num_clusters
4272 * )
4273 *
4274 * @brief Merging clusters from blocks of size 32x32 and greater
4275 *
4276 * @param[in/out] ps_cluster_data: structure containing cluster data
4277 *
4278 * @param[in/out] pi4_num_clusters : pointer to number of clusters
4279 *
4280 * @return Success or failure
4281 ********************************************************************************
4282 */
hme_try_merge_clusters_blksize_gt_16(cluster_data_t * ps_cluster_data,S32 num_clusters)4283 S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
4284 {
4285 centroid_t *ps_cur_centroid;
4286 cluster_data_t *ps_cur_cluster;
4287
4288 S32 i, mvd;
4289 S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
4290
4291 centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
4292
4293 S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
4294 S32 ref_id = ps_cluster_data->ref_id;
4295
4296 S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
4297 S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
4298 S32 num_clusters_evaluated = 1;
4299 S32 ret_value = 0;
4300
4301 if(1 >= num_clusters)
4302 {
4303 return ret_value;
4304 }
4305
4306 for(i = 1; num_clusters_evaluated < num_clusters; i++)
4307 {
4308 S32 cur_posx_q8;
4309 S32 cur_posy_q8;
4310
4311 ps_cur_cluster = &ps_cluster_data[i];
4312
4313 if((ref_id != ps_cur_cluster->ref_id))
4314 {
4315 num_clusters_evaluated++;
4316 continue;
4317 }
4318
4319 if((!ps_cur_cluster->is_valid_cluster))
4320 {
4321 continue;
4322 }
4323
4324 num_clusters_evaluated++;
4325
4326 ps_cur_centroid = &ps_cur_cluster->s_centroid;
4327
4328 cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
4329 cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
4330
4331 mvdx_q8 = cur_posx_q8 - node0_posx_q8;
4332 mvdy_q8 = cur_posy_q8 - node0_posy_q8;
4333
4334 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4335 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4336
4337 mvd = ABS(mvdx) + ABS(mvdy);
4338
4339 if(mvd <= (mvd_limit >> 1))
4340 {
4341 LWORD64 i8_updated_posx;
4342 LWORD64 i8_updated_posy;
4343 WORD32 minmax_updated_x = 0;
4344 WORD32 minmax_updated_y = 0;
4345
4346 ps_cur_cluster->is_valid_cluster = 0;
4347
4348 ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
4349 ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
4350 ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
4351
4352 memcpy(
4353 &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
4354 ps_cur_cluster->as_mv,
4355 sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
4356
4357 if(mvdx > 0)
4358 {
4359 ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
4360 minmax_updated_x = 1;
4361 }
4362 else
4363 {
4364 ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
4365 minmax_updated_x = 2;
4366 }
4367
4368 if(mvdy > 0)
4369 {
4370 ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
4371 minmax_updated_y = 1;
4372 }
4373 else
4374 {
4375 ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
4376 minmax_updated_y = 2;
4377 }
4378
4379 switch((minmax_updated_y << 2) + minmax_updated_x)
4380 {
4381 case 1:
4382 {
4383 S32 mvd, mvd_q8;
4384
4385 mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4386 mvd = (mvd_q8 + (1 << 7)) >> 8;
4387
4388 if(mvd > (mvd_limit))
4389 {
4390 ps_cluster_data->max_dist_from_centroid = mvd;
4391 }
4392 break;
4393 }
4394 case 2:
4395 {
4396 S32 mvd, mvd_q8;
4397
4398 mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4399 mvd = (mvd_q8 + (1 << 7)) >> 8;
4400
4401 if(mvd > (mvd_limit))
4402 {
4403 ps_cluster_data->max_dist_from_centroid = mvd;
4404 }
4405 break;
4406 }
4407 case 4:
4408 {
4409 S32 mvd, mvd_q8;
4410
4411 mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4412 mvd = (mvd_q8 + (1 << 7)) >> 8;
4413
4414 if(mvd > (mvd_limit))
4415 {
4416 ps_cluster_data->max_dist_from_centroid = mvd;
4417 }
4418 break;
4419 }
4420 case 5:
4421 {
4422 S32 mvd;
4423 S32 mvdx, mvdx_q8;
4424 S32 mvdy, mvdy_q8;
4425
4426 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4427 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4428
4429 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4430 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4431
4432 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4433
4434 if(mvd > mvd_limit)
4435 {
4436 ps_cluster_data->max_dist_from_centroid = mvd;
4437 }
4438 break;
4439 }
4440 case 6:
4441 {
4442 S32 mvd;
4443 S32 mvdx, mvdx_q8;
4444 S32 mvdy, mvdy_q8;
4445
4446 mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4447 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4448
4449 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4450 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4451
4452 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4453
4454 if(mvd > mvd_limit)
4455 {
4456 ps_cluster_data->max_dist_from_centroid = mvd;
4457 }
4458 break;
4459 }
4460 case 8:
4461 {
4462 S32 mvd, mvd_q8;
4463
4464 mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4465 mvd = (mvd_q8 + (1 << 7)) >> 8;
4466
4467 if(mvd > (mvd_limit))
4468 {
4469 ps_cluster_data->max_dist_from_centroid = mvd;
4470 }
4471 break;
4472 }
4473 case 9:
4474 {
4475 S32 mvd;
4476 S32 mvdx, mvdx_q8;
4477 S32 mvdy, mvdy_q8;
4478
4479 mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4480 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4481
4482 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4483 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4484
4485 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4486
4487 if(mvd > mvd_limit)
4488 {
4489 ps_cluster_data->max_dist_from_centroid = mvd;
4490 }
4491 break;
4492 }
4493 case 10:
4494 {
4495 S32 mvd;
4496 S32 mvdx, mvdx_q8;
4497 S32 mvdy, mvdy_q8;
4498
4499 mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4500 mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4501
4502 mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4503 mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4504
4505 mvd = (mvdx > mvdy) ? mvdx : mvdy;
4506
4507 if(mvd > ps_cluster_data->max_dist_from_centroid)
4508 {
4509 ps_cluster_data->max_dist_from_centroid = mvd;
4510 }
4511 break;
4512 }
4513 default:
4514 {
4515 break;
4516 }
4517 }
4518
4519 i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
4520 ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
4521 i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
4522 ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
4523
4524 ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
4525
4526 ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
4527 ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
4528
4529 if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
4530 {
4531 num_clusters--;
4532 num_clusters_evaluated = 1;
4533 i = 0;
4534 ret_value++;
4535 }
4536 else
4537 {
4538 ret_value++;
4539
4540 return ret_value;
4541 }
4542 }
4543 }
4544
4545 if(ret_value)
4546 {
4547 for(i = 1; i < (num_clusters + ret_value); i++)
4548 {
4549 if(ps_cluster_data[i].is_valid_cluster)
4550 {
4551 break;
4552 }
4553 }
4554 if(i == (num_clusters + ret_value))
4555 {
4556 return ret_value;
4557 }
4558 }
4559 else
4560 {
4561 i = 1;
4562 }
4563
4564 return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
4565 ret_value;
4566 }
4567
4568 /**
4569 ********************************************************************************
4570 * @fn S32 hme_determine_validity_32x32
4571 * (
4572 * ctb_cluster_info_t *ps_ctb_cluster_info
4573 * )
4574 *
4575 * @brief Determines whther current 32x32 block needs to be evaluated in enc_loop
4576 * while recursing through the CU tree or not
4577 *
4578 * @param[in] ps_cluster_data: structure containing cluster data
4579 *
4580 * @return Success or failure
4581 ********************************************************************************
4582 */
hme_determine_validity_32x32(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4583 __inline S32 hme_determine_validity_32x32(
4584 ctb_cluster_info_t *ps_ctb_cluster_info,
4585 S32 *pi4_children_nodes_required,
4586 S32 blk_validity_wrt_pic_bndry,
4587 S32 parent_blk_validity_wrt_pic_bndry)
4588 {
4589 cluster_data_t *ps_data;
4590
4591 cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4592 cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4593
4594 S32 num_clusters = ps_32x32_blk->num_clusters;
4595 S32 num_clusters_parent = ps_64x64_blk->num_clusters;
4596
4597 if(!blk_validity_wrt_pic_bndry)
4598 {
4599 *pi4_children_nodes_required = 1;
4600 return 0;
4601 }
4602
4603 if(!parent_blk_validity_wrt_pic_bndry)
4604 {
4605 *pi4_children_nodes_required = 1;
4606 return 1;
4607 }
4608
4609 if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4610 {
4611 *pi4_children_nodes_required = 1;
4612 return 0;
4613 }
4614
4615 if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4616 {
4617 *pi4_children_nodes_required = 1;
4618
4619 return 1;
4620 }
4621 else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4622 {
4623 *pi4_children_nodes_required = 0;
4624
4625 return 1;
4626 }
4627 else
4628 {
4629 if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4630 {
4631 *pi4_children_nodes_required = 0;
4632 return 1;
4633 }
4634 else
4635 {
4636 S32 i;
4637
4638 S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
4639 S32 min_area = MAX_32BIT_VAL;
4640 S32 num_clusters_evaluated = 0;
4641
4642 for(i = 0; num_clusters_evaluated < num_clusters; i++)
4643 {
4644 ps_data = &ps_32x32_blk->as_cluster_data[i];
4645
4646 if(!ps_data->is_valid_cluster)
4647 {
4648 continue;
4649 }
4650
4651 num_clusters_evaluated++;
4652
4653 if(ps_data->area_in_pixels < min_area)
4654 {
4655 min_area = ps_data->area_in_pixels;
4656 }
4657 }
4658
4659 if((min_area << 4) < area_of_parent)
4660 {
4661 *pi4_children_nodes_required = 1;
4662 return 0;
4663 }
4664 else
4665 {
4666 *pi4_children_nodes_required = 0;
4667 return 1;
4668 }
4669 }
4670 }
4671 }
4672
4673 /**
4674 ********************************************************************************
4675 * @fn S32 hme_determine_validity_16x16
4676 * (
4677 * ctb_cluster_info_t *ps_ctb_cluster_info
4678 * )
4679 *
4680 * @brief Determines whther current 16x16 block needs to be evaluated in enc_loop
4681 * while recursing through the CU tree or not
4682 *
4683 * @param[in] ps_cluster_data: structure containing cluster data
4684 *
4685 * @return Success or failure
4686 ********************************************************************************
4687 */
hme_determine_validity_16x16(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4688 __inline S32 hme_determine_validity_16x16(
4689 ctb_cluster_info_t *ps_ctb_cluster_info,
4690 S32 *pi4_children_nodes_required,
4691 S32 blk_validity_wrt_pic_bndry,
4692 S32 parent_blk_validity_wrt_pic_bndry)
4693 {
4694 cluster_data_t *ps_data;
4695
4696 cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
4697 cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4698 cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4699
4700 S32 num_clusters = ps_16x16_blk->num_clusters;
4701 S32 num_clusters_parent = ps_32x32_blk->num_clusters;
4702 S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
4703
4704 if(!blk_validity_wrt_pic_bndry)
4705 {
4706 *pi4_children_nodes_required = 1;
4707 return 0;
4708 }
4709
4710 if(!parent_blk_validity_wrt_pic_bndry)
4711 {
4712 *pi4_children_nodes_required = 1;
4713 return 1;
4714 }
4715
4716 if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
4717 (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
4718 {
4719 *pi4_children_nodes_required = 1;
4720 return 1;
4721 }
4722
4723 /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
4724 /* implies nc_64 > 3 when num_clusters_parent < 3 & */
4725 if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4726 {
4727 if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4728 {
4729 *pi4_children_nodes_required = 0;
4730
4731 return 1;
4732 }
4733 else
4734 {
4735 *pi4_children_nodes_required = 1;
4736
4737 return 0;
4738 }
4739 }
4740 /* Implies nc_64 >= 3 */
4741 else
4742 {
4743 if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4744 {
4745 *pi4_children_nodes_required = 0;
4746 return 1;
4747 }
4748 else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4749 {
4750 *pi4_children_nodes_required = 1;
4751 return 0;
4752 }
4753 else
4754 {
4755 S32 i;
4756
4757 S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
4758 S32 min_area = MAX_32BIT_VAL;
4759 S32 num_clusters_evaluated = 0;
4760
4761 for(i = 0; num_clusters_evaluated < num_clusters; i++)
4762 {
4763 ps_data = &ps_16x16_blk->as_cluster_data[i];
4764
4765 if(!ps_data->is_valid_cluster)
4766 {
4767 continue;
4768 }
4769
4770 num_clusters_evaluated++;
4771
4772 if(ps_data->area_in_pixels < min_area)
4773 {
4774 min_area = ps_data->area_in_pixels;
4775 }
4776 }
4777
4778 if((min_area << 4) < area_of_parent)
4779 {
4780 *pi4_children_nodes_required = 1;
4781 return 0;
4782 }
4783 else
4784 {
4785 *pi4_children_nodes_required = 0;
4786 return 1;
4787 }
4788 }
4789 }
4790 }
4791
4792 /**
4793 ********************************************************************************
4794 * @fn void hme_build_cu_tree
4795 * (
4796 * ctb_cluster_info_t *ps_ctb_cluster_info,
4797 * cur_ctb_cu_tree_t *ps_cu_tree,
4798 * S32 tree_depth,
4799 * CU_POS_T e_grand_parent_blk_pos,
4800 * CU_POS_T e_parent_blk_pos,
4801 * CU_POS_T e_cur_blk_pos
4802 * )
4803 *
4804 * @brief Recursive function for CU tree initialisation
4805 *
4806 * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters
4807 * corresponding to all block sizes from 64x64
4808 * to 16x16
4809 *
4810 * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if
4811 * applicable
4812 *
4813 * @param[in] e_cur_blk_pos: position of current block wrt parent
4814 *
4815 * @param[out] ps_cu_tree : represents CU tree used in CU recursion
4816 *
4817 * @param[in] tree_depth : specifies depth of the CU tree
4818 *
4819 * @return Nothing
4820 ********************************************************************************
4821 */
hme_build_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4822 void hme_build_cu_tree(
4823 ctb_cluster_info_t *ps_ctb_cluster_info,
4824 cur_ctb_cu_tree_t *ps_cu_tree,
4825 S32 tree_depth,
4826 CU_POS_T e_grandparent_blk_pos,
4827 CU_POS_T e_parent_blk_pos,
4828 CU_POS_T e_cur_blk_pos)
4829 {
4830 ihevce_cu_tree_init(
4831 ps_cu_tree,
4832 ps_ctb_cluster_info->ps_cu_tree_root,
4833 &ps_ctb_cluster_info->nodes_created_in_cu_tree,
4834 tree_depth,
4835 e_grandparent_blk_pos,
4836 e_parent_blk_pos,
4837 e_cur_blk_pos);
4838 }
4839
4840 /**
4841 ********************************************************************************
4842 * @fn S32 hme_sdi_based_cluster_spread_eligibility
4843 * (
4844 * cluster_32x32_blk_t *ps_blk_32x32
4845 * )
4846 *
4847 * @brief Determines whether the spread of high SDI MV's around each cluster
4848 * center is below a pre-determined threshold
4849 *
4850 * @param[in] ps_blk_32x32: structure containing pointers to clusters
4851 * corresponding to all block sizes from 64x64
4852 * to 16x16
4853 *
4854 * @return 1 if the spread is constrained, else 0
4855 ********************************************************************************
4856 */
4857 __inline S32
hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t * ps_blk_32x32,S32 sdi_threshold)4858 hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
4859 {
4860 S32 cumulative_mv_distance;
4861 S32 i, j;
4862 S32 num_high_sdi_mvs;
4863
4864 S32 num_clusters = ps_blk_32x32->num_clusters;
4865
4866 for(i = 0; i < num_clusters; i++)
4867 {
4868 cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
4869
4870 num_high_sdi_mvs = 0;
4871 cumulative_mv_distance = 0;
4872
4873 for(j = 0; j < ps_data->num_mvs; j++)
4874 {
4875 mv_data_t *ps_mv = &ps_data->as_mv[j];
4876
4877 if(ps_mv->sdi >= sdi_threshold)
4878 {
4879 num_high_sdi_mvs++;
4880
4881 COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
4882 }
4883 }
4884
4885 if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
4886 {
4887 return 0;
4888 }
4889 }
4890
4891 return 1;
4892 }
4893
4894 /**
4895 ********************************************************************************
4896 * @fn S32 hme_populate_cu_tree
4897 * (
4898 * ctb_cluster_info_t *ps_ctb_cluster_info,
4899 * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
4900 * cur_ctb_cu_tree_t *ps_cu_tree,
4901 * S32 tree_depth,
4902 * CU_POS_T e_parent_blk_pos,
4903 * CU_POS_T e_cur_blk_pos
4904 * )
4905 *
4906 * @brief Recursive function for CU tree population based on output of
4907 * clustering algorithm
4908 *
4909 * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters
4910 * corresponding to all block sizes from 64x64
4911 * to 16x16
4912 *
4913 * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if
4914 applicable
4915 *
4916 * @param[in] e_cur_blk_pos: position of current block wrt parent
4917 *
4918 * @param[in] ps_cur_ipe_ctb : output container for ipe analyses
4919 *
4920 * @param[out] ps_cu_tree : represents CU tree used in CU recursion
4921 *
4922 * @param[in] tree_depth : specifies depth of the CU tree
4923 *
4924 * @param[in] ipe_decision_precedence : specifies whether precedence should
4925 * be given to decisions made either by IPE(1) or clustering algos.
4926 *
4927 * @return 1 if re-evaluation of parent node's validity is not required,
4928 else 0
4929 ********************************************************************************
4930 */
hme_populate_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,ME_QUALITY_PRESETS_T e_quality_preset,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4931 void hme_populate_cu_tree(
4932 ctb_cluster_info_t *ps_ctb_cluster_info,
4933 cur_ctb_cu_tree_t *ps_cu_tree,
4934 S32 tree_depth,
4935 ME_QUALITY_PRESETS_T e_quality_preset,
4936 CU_POS_T e_grandparent_blk_pos,
4937 CU_POS_T e_parent_blk_pos,
4938 CU_POS_T e_cur_blk_pos)
4939 {
4940 S32 area_of_cur_blk;
4941 S32 area_limit_for_me_decision_precedence;
4942 S32 children_nodes_required;
4943 S32 intra_mv_area;
4944 S32 intra_eval_enable;
4945 S32 inter_eval_enable;
4946 S32 ipe_decision_precedence;
4947 S32 node_validity;
4948 S32 num_clusters;
4949
4950 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
4951
4952 if(NULL == ps_cu_tree)
4953 {
4954 return;
4955 }
4956
4957 switch(tree_depth)
4958 {
4959 case 0:
4960 {
4961 /* 64x64 block */
4962 S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
4963
4964 cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
4965
4966 area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
4967 area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
4968 children_nodes_required = 0;
4969 intra_mv_area = ps_blk_64x64->intra_mv_area;
4970
4971 ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
4972
4973 intra_eval_enable = ipe_decision_precedence;
4974 inter_eval_enable = !!ps_blk_64x64->num_clusters;
4975
4976 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4977 if(e_quality_preset >= ME_HIGH_QUALITY)
4978 {
4979 inter_eval_enable = 1;
4980 node_validity = (blk_32x32_mask == 0xf);
4981 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
4982 ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
4983 #endif
4984 break;
4985 }
4986 #endif
4987
4988 #if ENABLE_4CTB_EVALUATION
4989 node_validity = (blk_32x32_mask == 0xf);
4990
4991 break;
4992 #else
4993 {
4994 S32 i;
4995
4996 num_clusters = ps_blk_64x64->num_clusters;
4997
4998 node_validity = (ipe_decision_precedence)
4999 ? (!ps_cur_ipe_ctb->u1_split_flag)
5000 : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
5001
5002 for(i = 0; i < MAX_NUM_REF; i++)
5003 {
5004 node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
5005 MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5006 }
5007
5008 node_validity = node_validity && (blk_32x32_mask == 0xf);
5009 }
5010 break;
5011 #endif
5012 }
5013 case 1:
5014 {
5015 /* 32x32 block */
5016 S32 is_percent_intra_area_gt_threshold;
5017
5018 cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
5019
5020 S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
5021
5022 #if !ENABLE_4CTB_EVALUATION
5023 S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
5024 S32 best_intra_cost =
5025 ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5026 ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
5027 4) < 0)
5028 ? MAX_32BIT_VAL
5029 : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5030 ps_ctb_cluster_info->i4_frame_qstep *
5031 ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
5032 S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5033 S32 cost_differential = (best_inter_cost - best_cost);
5034 #endif
5035
5036 area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
5037 area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5038 intra_mv_area = ps_blk_32x32->intra_mv_area;
5039 is_percent_intra_area_gt_threshold =
5040 (intra_mv_area > area_limit_for_me_decision_precedence);
5041 ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5042
5043 intra_eval_enable = ipe_decision_precedence;
5044 inter_eval_enable = !!ps_blk_32x32->num_clusters;
5045 children_nodes_required = 1;
5046
5047 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5048 if(e_quality_preset >= ME_HIGH_QUALITY)
5049 {
5050 inter_eval_enable = 1;
5051 node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5052 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5053 ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
5054 #endif
5055 break;
5056 }
5057 #endif
5058
5059 #if ENABLE_4CTB_EVALUATION
5060 node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5061
5062 break;
5063 #else
5064 {
5065 S32 i;
5066 num_clusters = ps_blk_32x32->num_clusters;
5067
5068 if(ipe_decision_precedence)
5069 {
5070 node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
5071 node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5072 }
5073 else
5074 {
5075 node_validity =
5076 ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
5077 (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
5078 (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5079
5080 for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
5081 {
5082 node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
5083 MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5084 }
5085
5086 if(node_validity)
5087 {
5088 node_validity = node_validity &&
5089 hme_sdi_based_cluster_spread_eligibility(
5090 ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
5091 }
5092 }
5093 }
5094
5095 break;
5096 #endif
5097 }
5098 case 2:
5099 {
5100 cluster_16x16_blk_t *ps_blk_16x16 =
5101 &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
5102
5103 S32 blk_8x8_mask =
5104 ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5105
5106 area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
5107 area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5108 children_nodes_required = 1;
5109 intra_mv_area = ps_blk_16x16->intra_mv_area;
5110 ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5111 num_clusters = ps_blk_16x16->num_clusters;
5112
5113 intra_eval_enable = ipe_decision_precedence;
5114 inter_eval_enable = 1;
5115
5116 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5117 if(e_quality_preset >= ME_HIGH_QUALITY)
5118 {
5119 node_validity =
5120 !ps_ctb_cluster_info
5121 ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5122 children_nodes_required = !node_validity;
5123 break;
5124 }
5125 #endif
5126
5127 #if ENABLE_4CTB_EVALUATION
5128 node_validity = (blk_8x8_mask == 0xf);
5129
5130 #if ENABLE_CU_TREE_CULLING
5131 {
5132 cur_ctb_cu_tree_t *ps_32x32_root = NULL;
5133
5134 switch(e_parent_blk_pos)
5135 {
5136 case POS_TL:
5137 {
5138 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5139
5140 break;
5141 }
5142 case POS_TR:
5143 {
5144 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5145
5146 break;
5147 }
5148 case POS_BL:
5149 {
5150 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5151
5152 break;
5153 }
5154 case POS_BR:
5155 {
5156 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5157
5158 break;
5159 }
5160 default:
5161 {
5162 DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos);
5163 break;
5164 }
5165 }
5166
5167 if(ps_32x32_root->is_node_valid)
5168 {
5169 node_validity =
5170 node_validity &&
5171 !ps_ctb_cluster_info
5172 ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5173 children_nodes_required = !node_validity;
5174 }
5175 }
5176 #endif
5177
5178 break;
5179 #else
5180
5181 if(ipe_decision_precedence)
5182 {
5183 S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
5184 .as_intra16_analyse[e_cur_blk_pos]
5185 .b1_merge_flag);
5186 S32 valid_flag = (blk_8x8_mask == 0xf);
5187
5188 node_validity = merge_flag_16 && valid_flag;
5189 }
5190 else
5191 {
5192 node_validity = (blk_8x8_mask == 0xf);
5193 }
5194
5195 break;
5196 #endif
5197 }
5198 case 3:
5199 {
5200 S32 blk_8x8_mask =
5201 ps_ctb_cluster_info
5202 ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
5203 S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
5204 .as_intra16_analyse[e_parent_blk_pos]
5205 .b1_merge_flag);
5206 S32 merge_flag_32 =
5207 (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
5208
5209 intra_eval_enable = !merge_flag_16 || !merge_flag_32;
5210 inter_eval_enable = 1;
5211 children_nodes_required = 0;
5212
5213 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5214 if(e_quality_preset >= ME_HIGH_QUALITY)
5215 {
5216 node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5217 break;
5218 }
5219 #endif
5220
5221 #if ENABLE_4CTB_EVALUATION
5222 node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5223
5224 break;
5225 #else
5226 {
5227 cur_ctb_cu_tree_t *ps_32x32_root;
5228 cur_ctb_cu_tree_t *ps_16x16_root;
5229 cluster_32x32_blk_t *ps_32x32_blk;
5230
5231 switch(e_grandparent_blk_pos)
5232 {
5233 case POS_TL:
5234 {
5235 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5236
5237 break;
5238 }
5239 case POS_TR:
5240 {
5241 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5242
5243 break;
5244 }
5245 case POS_BL:
5246 {
5247 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5248
5249 break;
5250 }
5251 case POS_BR:
5252 {
5253 ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5254
5255 break;
5256 }
5257 default:
5258 {
5259 DBG_PRINTF("Invalid block position %d\n", e_grandparent_blk_pos);
5260 break;
5261 }
5262 }
5263
5264 switch(e_parent_blk_pos)
5265 {
5266 case POS_TL:
5267 {
5268 ps_16x16_root = ps_32x32_root->ps_child_node_tl;
5269
5270 break;
5271 }
5272 case POS_TR:
5273 {
5274 ps_16x16_root = ps_32x32_root->ps_child_node_tr;
5275
5276 break;
5277 }
5278 case POS_BL:
5279 {
5280 ps_16x16_root = ps_32x32_root->ps_child_node_bl;
5281
5282 break;
5283 }
5284 case POS_BR:
5285 {
5286 ps_16x16_root = ps_32x32_root->ps_child_node_br;
5287
5288 break;
5289 }
5290 default:
5291 {
5292 DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos);
5293 break;
5294 }
5295 }
5296
5297 ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
5298
5299 node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
5300 ((!ps_32x32_root->is_node_valid) ||
5301 (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
5302 (!ps_16x16_root->is_node_valid));
5303
5304 break;
5305 }
5306 #endif
5307 }
5308 }
5309
5310 /* Fill the current cu_tree node */
5311 ps_cu_tree->is_node_valid = node_validity;
5312 ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
5313 ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
5314
5315 if(children_nodes_required)
5316 {
5317 tree_depth++;
5318
5319 hme_populate_cu_tree(
5320 ps_ctb_cluster_info,
5321 ps_cu_tree->ps_child_node_tl,
5322 tree_depth,
5323 e_quality_preset,
5324 e_parent_blk_pos,
5325 e_cur_blk_pos,
5326 POS_TL);
5327
5328 hme_populate_cu_tree(
5329 ps_ctb_cluster_info,
5330 ps_cu_tree->ps_child_node_tr,
5331 tree_depth,
5332 e_quality_preset,
5333 e_parent_blk_pos,
5334 e_cur_blk_pos,
5335 POS_TR);
5336
5337 hme_populate_cu_tree(
5338 ps_ctb_cluster_info,
5339 ps_cu_tree->ps_child_node_bl,
5340 tree_depth,
5341 e_quality_preset,
5342 e_parent_blk_pos,
5343 e_cur_blk_pos,
5344 POS_BL);
5345
5346 hme_populate_cu_tree(
5347 ps_ctb_cluster_info,
5348 ps_cu_tree->ps_child_node_br,
5349 tree_depth,
5350 e_quality_preset,
5351 e_parent_blk_pos,
5352 e_cur_blk_pos,
5353 POS_BR);
5354 }
5355 }
5356
5357 /**
5358 ********************************************************************************
5359 * @fn void hme_analyse_mv_clustering
5360 * (
5361 * search_results_t *ps_search_results,
5362 * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
5363 * cur_ctb_cu_tree_t *ps_cu_tree
5364 * )
5365 *
5366 * @brief Implementation for the clustering algorithm
5367 *
5368 * @param[in] ps_search_results: structure containing 16x16 block results
5369 *
5370 * @param[in] ps_cur_ipe_ctb : output container for ipe analyses
5371 *
5372 * @param[out] ps_cu_tree : represents CU tree used in CU recursion
5373 *
5374 * @return None
5375 ********************************************************************************
5376 */
hme_analyse_mv_clustering(search_results_t * ps_search_results,inter_cu_results_t * ps_16x16_cu_results,inter_cu_results_t * ps_8x8_cu_results,ctb_cluster_info_t * ps_ctb_cluster_info,S08 * pi1_future_list,S08 * pi1_past_list,S32 bidir_enabled,ME_QUALITY_PRESETS_T e_quality_preset)5377 void hme_analyse_mv_clustering(
5378 search_results_t *ps_search_results,
5379 inter_cu_results_t *ps_16x16_cu_results,
5380 inter_cu_results_t *ps_8x8_cu_results,
5381 ctb_cluster_info_t *ps_ctb_cluster_info,
5382 S08 *pi1_future_list,
5383 S08 *pi1_past_list,
5384 S32 bidir_enabled,
5385 ME_QUALITY_PRESETS_T e_quality_preset)
5386 {
5387 cluster_16x16_blk_t *ps_blk_16x16;
5388 cluster_32x32_blk_t *ps_blk_32x32;
5389 cluster_64x64_blk_t *ps_blk_64x64;
5390
5391 part_type_results_t *ps_best_result;
5392 pu_result_t *aps_part_result[MAX_NUM_PARTS];
5393 pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
5394
5395 PART_ID_T e_part_id;
5396 PART_TYPE_T e_part_type;
5397
5398 S32 enable_64x64_merge;
5399 S32 i, j, k;
5400 S32 mvx, mvy;
5401 S32 num_parts;
5402 S32 ref_idx;
5403 S32 ai4_pred_mode[MAX_NUM_PARTS];
5404
5405 S32 num_32x32_merges = 0;
5406
5407 /*****************************************/
5408 /*****************************************/
5409 /********* Enter ye who is HQ ************/
5410 /*****************************************/
5411 /*****************************************/
5412
5413 ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
5414
5415 /* Initialise data in each of the clusters */
5416 for(i = 0; i < 16; i++)
5417 {
5418 ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5419
5420 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5421 if(e_quality_preset < ME_HIGH_QUALITY)
5422 {
5423 hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5424 }
5425 else
5426 {
5427 ps_blk_16x16->best_inter_cost = 0;
5428 ps_blk_16x16->intra_mv_area = 0;
5429 }
5430 #else
5431 hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5432 #endif
5433 }
5434
5435 for(i = 0; i < 4; i++)
5436 {
5437 ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5438
5439 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5440 if(e_quality_preset < ME_HIGH_QUALITY)
5441 {
5442 hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5443 }
5444 else
5445 {
5446 ps_blk_32x32->best_inter_cost = 0;
5447 ps_blk_32x32->intra_mv_area = 0;
5448 }
5449 #else
5450 hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5451 #endif
5452 }
5453
5454 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5455 if(e_quality_preset < ME_HIGH_QUALITY)
5456 {
5457 hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5458 }
5459 else
5460 {
5461 ps_blk_64x64->best_inter_cost = 0;
5462 ps_blk_64x64->intra_mv_area = 0;
5463 }
5464 #else
5465 hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5466 #endif
5467
5468 /* Initialise data for all nodes in the CU tree */
5469 hme_build_cu_tree(
5470 ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
5471
5472 if(e_quality_preset >= ME_HIGH_QUALITY)
5473 {
5474 memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
5475 }
5476
5477 #if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
5478 return;
5479 #endif
5480
5481 for(i = 0; i < 16; i++)
5482 {
5483 S32 blk_8x8_mask;
5484 S32 is_16x16_blk_valid;
5485 S32 num_clusters_updated;
5486 S32 num_clusters;
5487
5488 blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
5489
5490 ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5491
5492 is_16x16_blk_valid = (blk_8x8_mask == 0xf);
5493
5494 if(is_16x16_blk_valid)
5495 {
5496 /* Use 8x8 data when 16x16 CU is split */
5497 if(ps_search_results[i].u1_split_flag)
5498 {
5499 S32 blk_8x8_idx = i << 2;
5500
5501 num_parts = 4;
5502 e_part_type = PRT_NxN;
5503
5504 for(j = 0; j < num_parts; j++, blk_8x8_idx++)
5505 {
5506 /* Only 2Nx2N partition supported for 8x8 block */
5507 ASSERT(
5508 ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
5509 ((PART_TYPE_T)PRT_2Nx2N));
5510
5511 aps_part_result[j] =
5512 &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
5513 aps_inferior_parts[j] =
5514 &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
5515 ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5516 }
5517 }
5518 else
5519 {
5520 ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
5521
5522 e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
5523 num_parts = gau1_num_parts_in_part_type[e_part_type];
5524
5525 for(j = 0; j < num_parts; j++)
5526 {
5527 aps_part_result[j] = &ps_best_result->as_pu_results[j];
5528 aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
5529 ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5530 }
5531
5532 ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
5533 }
5534
5535 for(j = 0; j < num_parts; j++)
5536 {
5537 pu_result_t *ps_part_result = aps_part_result[j];
5538
5539 S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
5540
5541 e_part_id = ge_part_type_to_part_id[e_part_type][j];
5542
5543 /* Skip clustering if best mode is intra */
5544 if((ps_part_result->pu.b1_intra_flag))
5545 {
5546 ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
5547 ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
5548 continue;
5549 }
5550 else
5551 {
5552 ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
5553 }
5554
5555 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5556 if(e_quality_preset >= ME_HIGH_QUALITY)
5557 {
5558 continue;
5559 }
5560 #endif
5561
5562 for(k = 0; k < num_mvs; k++)
5563 {
5564 mv_t *ps_mv;
5565
5566 pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
5567
5568 S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
5569
5570 ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
5571
5572 mvx = ps_mv->i2_mvx;
5573 mvy = ps_mv->i2_mvy;
5574
5575 ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
5576 : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
5577
5578 num_clusters = ps_blk_16x16->num_clusters;
5579
5580 hme_find_and_update_clusters(
5581 ps_blk_16x16->as_cluster_data,
5582 &(ps_blk_16x16->num_clusters),
5583 mvx,
5584 mvy,
5585 ref_idx,
5586 ps_part_result->i4_sdi,
5587 e_part_id,
5588 (ai4_pred_mode[j] == 2));
5589
5590 num_clusters_updated = (ps_blk_16x16->num_clusters);
5591
5592 ps_blk_16x16->au1_num_clusters[ref_idx] +=
5593 (num_clusters_updated - num_clusters);
5594 }
5595 }
5596 }
5597 }
5598
5599 /* Search for 32x32 clusters */
5600 for(i = 0; i < 4; i++)
5601 {
5602 S32 num_clusters_merged;
5603
5604 S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
5605
5606 if(is_32x32_blk_valid)
5607 {
5608 ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5609 ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
5610
5611 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5612 if(e_quality_preset >= ME_HIGH_QUALITY)
5613 {
5614 for(j = 0; j < 4; j++, ps_blk_16x16++)
5615 {
5616 ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
5617
5618 ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
5619 }
5620 continue;
5621 }
5622 #endif
5623
5624 hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
5625
5626 if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
5627 {
5628 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5629 ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
5630
5631 if(num_clusters_merged)
5632 {
5633 ps_blk_32x32->num_clusters -= num_clusters_merged;
5634
5635 UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
5636 }
5637 }
5638 }
5639 }
5640
5641 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5642 /* Eliminate outlier 32x32 clusters */
5643 if(e_quality_preset < ME_HIGH_QUALITY)
5644 #endif
5645 {
5646 hme_boot_out_outlier(ps_ctb_cluster_info, 32);
5647
5648 /* Find best_uni_ref and best_alt_ref */
5649 hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
5650 }
5651
5652 /* Populate the CU tree for depths 1 and higher */
5653 {
5654 cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
5655 cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
5656 cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
5657 cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
5658 cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
5659
5660 hme_populate_cu_tree(
5661 ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
5662
5663 num_32x32_merges += (ps_tl->is_node_valid == 1);
5664
5665 hme_populate_cu_tree(
5666 ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
5667
5668 num_32x32_merges += (ps_tr->is_node_valid == 1);
5669
5670 hme_populate_cu_tree(
5671 ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
5672
5673 num_32x32_merges += (ps_bl->is_node_valid == 1);
5674
5675 hme_populate_cu_tree(
5676 ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
5677
5678 num_32x32_merges += (ps_br->is_node_valid == 1);
5679 }
5680
5681 #if !ENABLE_4CTB_EVALUATION
5682 if(e_quality_preset < ME_HIGH_QUALITY)
5683 {
5684 enable_64x64_merge = (num_32x32_merges >= 3);
5685 }
5686 #else
5687 if(e_quality_preset < ME_HIGH_QUALITY)
5688 {
5689 enable_64x64_merge = 1;
5690 }
5691 #endif
5692
5693 #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5694 if(e_quality_preset >= ME_HIGH_QUALITY)
5695 {
5696 enable_64x64_merge = 1;
5697 }
5698 #else
5699 if(e_quality_preset >= ME_HIGH_QUALITY)
5700 {
5701 enable_64x64_merge = (num_32x32_merges >= 3);
5702 }
5703 #endif
5704
5705 if(enable_64x64_merge)
5706 {
5707 S32 num_clusters_merged;
5708
5709 ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
5710
5711 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5712 if(e_quality_preset >= ME_HIGH_QUALITY)
5713 {
5714 for(j = 0; j < 4; j++, ps_blk_32x32++)
5715 {
5716 ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
5717
5718 ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
5719 }
5720 }
5721 else
5722 #endif
5723 {
5724 hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
5725
5726 if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
5727 {
5728 num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5729 ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
5730
5731 if(num_clusters_merged)
5732 {
5733 ps_blk_64x64->num_clusters -= num_clusters_merged;
5734
5735 UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
5736 }
5737 }
5738 }
5739
5740 #if !ENABLE_4CTB_EVALUATION
5741 if(e_quality_preset < ME_HIGH_QUALITY)
5742 {
5743 S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
5744 S32 best_intra_cost =
5745 ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5746 ps_ctb_cluster_info->i4_frame_qstep *
5747 ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
5748 ? MAX_32BIT_VAL
5749 : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5750 ps_ctb_cluster_info->i4_frame_qstep *
5751 ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
5752 S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5753 S32 cost_differential = (best_inter_cost - best_cost);
5754
5755 enable_64x64_merge =
5756 ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
5757 }
5758 #endif
5759 }
5760
5761 if(enable_64x64_merge)
5762 {
5763 #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5764 if(e_quality_preset < ME_HIGH_QUALITY)
5765 #endif
5766 {
5767 hme_boot_out_outlier(ps_ctb_cluster_info, 64);
5768
5769 hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
5770 }
5771
5772 hme_populate_cu_tree(
5773 ps_ctb_cluster_info,
5774 ps_ctb_cluster_info->ps_cu_tree_root,
5775 0,
5776 e_quality_preset,
5777 POS_NA,
5778 POS_NA,
5779 POS_NA);
5780 }
5781 }
5782 #endif
5783
hme_merge_prms_init(hme_merge_prms_t * ps_prms,layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,me_frm_ctxt_t * ps_me_ctxt,range_prms_t * ps_range_prms_rec,range_prms_t * ps_range_prms_inp,mv_grid_t ** pps_mv_grid,inter_ctb_prms_t * ps_inter_ctb_prms,S32 i4_num_pred_dir,S32 i4_32x32_id,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_me_quality_presets)5784 static __inline void hme_merge_prms_init(
5785 hme_merge_prms_t *ps_prms,
5786 layer_ctxt_t *ps_curr_layer,
5787 refine_prms_t *ps_refine_prms,
5788 me_frm_ctxt_t *ps_me_ctxt,
5789 range_prms_t *ps_range_prms_rec,
5790 range_prms_t *ps_range_prms_inp,
5791 mv_grid_t **pps_mv_grid,
5792 inter_ctb_prms_t *ps_inter_ctb_prms,
5793 S32 i4_num_pred_dir,
5794 S32 i4_32x32_id,
5795 BLK_SIZE_T e_blk_size,
5796 ME_QUALITY_PRESETS_T e_me_quality_presets)
5797 {
5798 S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
5799 S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
5800
5801 /* Currently not enabling segmentation info from prev layers */
5802 ps_prms->i4_seg_info_avail = 0;
5803 ps_prms->i4_part_mask = 0;
5804
5805 /* Number of reference pics in which to do merge */
5806 ps_prms->i4_num_ref = i4_num_pred_dir;
5807
5808 /* Layer ctxt info */
5809 ps_prms->ps_layer_ctxt = ps_curr_layer;
5810
5811 ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
5812
5813 /* Top left, top right, bottom left and bottom right 16x16 units */
5814 if(BLK_32x32 == e_blk_size)
5815 {
5816 ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
5817 ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
5818 ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
5819 ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
5820
5821 /* Merge results stored here */
5822 ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
5823
5824 /* This could be lesser than the number of 16x16results generated*/
5825 /* For now, keeping it to be same */
5826 ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
5827 ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
5828 ps_prms->ps_results_grandchild = NULL;
5829 }
5830 else
5831 {
5832 ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
5833 ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
5834 ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
5835 ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
5836
5837 /* Merge results stored here */
5838 ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
5839
5840 ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
5841 ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
5842 ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
5843 }
5844
5845 if(i4_use_rec)
5846 {
5847 WORD32 ref_ctr;
5848
5849 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5850 {
5851 ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
5852 }
5853 }
5854 else
5855 {
5856 WORD32 ref_ctr;
5857
5858 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5859 {
5860 ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
5861 }
5862 }
5863 ps_prms->i4_use_rec = i4_use_rec;
5864
5865 ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
5866
5867 ps_prms->pps_mv_grid = pps_mv_grid;
5868
5869 ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
5870
5871 ps_prms->e_quality_preset = e_me_quality_presets;
5872 ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
5873 ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
5874 ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
5875 }
5876
5877 /**
5878 ********************************************************************************
5879 * @fn void hme_refine(me_ctxt_t *ps_ctxt,
5880 * refine_layer_prms_t *ps_refine_prms)
5881 *
5882 * @brief Top level entry point for refinement ME
5883 *
5884 * @param[in,out] ps_ctxt: ME Handle
5885 *
5886 * @param[in] ps_refine_prms : refinement layer prms
5887 *
5888 * @return None
5889 ********************************************************************************
5890 */
hme_refine(me_ctxt_t * ps_thrd_ctxt,refine_prms_t * ps_refine_prms,PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,layer_ctxt_t * ps_coarse_layer,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,S32 thrd_id,S32 me_frm_id,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input)5891 void hme_refine(
5892 me_ctxt_t *ps_thrd_ctxt,
5893 refine_prms_t *ps_refine_prms,
5894 PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
5895 layer_ctxt_t *ps_coarse_layer,
5896 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
5897 S32 lyr_job_type,
5898 S32 thrd_id,
5899 S32 me_frm_id,
5900 pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
5901 {
5902 inter_ctb_prms_t s_common_frm_prms;
5903
5904 BLK_SIZE_T e_search_blk_size, e_result_blk_size;
5905 WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
5906 me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
5907 ME_QUALITY_PRESETS_T e_me_quality_presets =
5908 ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
5909
5910 WORD32 num_rows_proc = 0;
5911 WORD32 num_act_ref_pics;
5912 WORD16 i2_prev_enc_frm_max_mv_y;
5913 WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
5914
5915 /*************************************************************************/
5916 /* Complexity of search: Low to High */
5917 /*************************************************************************/
5918 SEARCH_COMPLEXITY_T e_search_complexity;
5919
5920 /*************************************************************************/
5921 /* to store the PU results which are passed to the decide_part_types */
5922 /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
5923 /*************************************************************************/
5924
5925 pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
5926 inter_pu_results_t as_inter_pu_results[4];
5927 inter_pu_results_t *ps_pu_results = as_inter_pu_results;
5928
5929 /*************************************************************************/
5930 /* Config parameter structures for varius ME submodules */
5931 /*************************************************************************/
5932 hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
5933 hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
5934 hme_merge_prms_t s_merge_prms_64x64;
5935 hme_search_prms_t s_search_prms_blk;
5936 mvbank_update_prms_t s_mv_update_prms;
5937 hme_ctb_prms_t s_ctb_prms;
5938 hme_subpel_prms_t s_subpel_prms;
5939 fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
5940 ctb_cluster_info_t *ps_ctb_cluster_info;
5941 fpel_srch_cand_init_data_t s_srch_cand_init_data;
5942
5943 /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
5944 S32 en_merge_32x32;
5945 /* 5 lsb's specify whether or not merge algorithm is required */
5946 /* to be executed or not. Relevant only in PQ. Ought to be */
5947 /* used in conjunction with en_merge_32x32 and */
5948 /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
5949 /* required when all children are deemed to be intras */
5950 S32 en_merge_execution;
5951
5952 /*************************************************************************/
5953 /* All types of search candidates for predictor based search. */
5954 /*************************************************************************/
5955 S32 num_init_candts = 0;
5956 S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
5957 S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
5958 search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
5959 search_node_t as_top_neighbours[4], as_left_neighbours[3];
5960
5961 pf_get_wt_inp fp_get_wt_inp;
5962
5963 search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
5964 U32 au4_unique_node_map[MAP_X_MAX * 2];
5965
5966 /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
5967 ctb_boundary_attrs_t *ps_ctb_bound_attrs;
5968
5969 /*************************************************************************/
5970 /* points ot the search results for the blk level search (8x8/16x16) */
5971 /*************************************************************************/
5972 search_results_t *ps_search_results;
5973
5974 /*************************************************************************/
5975 /* Coordinates */
5976 /*************************************************************************/
5977 S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
5978 S32 pos_x, pos_y;
5979 S32 blk_id_in_full_ctb;
5980
5981 /*************************************************************************/
5982 /* Related to dimensions of block being searched and pic dimensions */
5983 /*************************************************************************/
5984 S32 blk_4x4_to_16x16;
5985 S32 blk_wd, blk_ht, blk_size_shift;
5986 S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
5987 S32 num_results_prev_layer;
5988
5989 /*************************************************************************/
5990 /* Size of a basic unit for this layer. For non encode layers, we search */
5991 /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
5992 /* basic unit size is the ctb size. */
5993 /*************************************************************************/
5994 S32 unit_size;
5995
5996 /*************************************************************************/
5997 /* Local variable storing results of any 4 CU merge to bigger CU */
5998 /*************************************************************************/
5999 CU_MERGE_RESULT_T e_merge_result;
6000
6001 /*************************************************************************/
6002 /* This mv grid stores results during and after fpel search, during */
6003 /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
6004 /* meant for the 2 directions of search (l0 and l1). */
6005 /*************************************************************************/
6006 mv_grid_t *aps_mv_grid[2];
6007
6008 /*************************************************************************/
6009 /* Pointers to context in current and coarser layers */
6010 /*************************************************************************/
6011 layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
6012
6013 /*************************************************************************/
6014 /* to store mv range per blk, and picture limit, allowed search range */
6015 /* range prms in hpel and qpel units as well */
6016 /*************************************************************************/
6017 range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
6018 range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
6019 range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
6020
6021 /*************************************************************************/
6022 /* These variables are used to track number of references at different */
6023 /* stages of ME. */
6024 /*************************************************************************/
6025 S32 i4_num_pred_dir;
6026 S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
6027 S32 lambda_recon = ps_refine_prms->lambda_recon;
6028
6029 /* Counts successful merge to 32x32 every CTB (0-4) */
6030 S32 merge_count_32x32;
6031
6032 S32 ai4_id_coloc[14], ai4_id_Z[2];
6033 U08 au1_search_candidate_list_index[2];
6034 S32 ai4_num_coloc_cands[2];
6035 U08 u1_pred_dir, u1_pred_dir_ctr;
6036
6037 /*************************************************************************/
6038 /* Input pointer and stride */
6039 /*************************************************************************/
6040 U08 *pu1_inp;
6041 S32 i4_inp_stride;
6042 S32 end_of_frame;
6043 S32 num_sync_units_in_row, num_sync_units_in_tile;
6044
6045 /*************************************************************************/
6046 /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
6047 /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
6048 /* we need to stop merges and force 8x8 CUs for that 16x16 blk */
6049 /*************************************************************************/
6050 S32 blk_8x8_mask;
6051 S32 ai4_blk_8x8_mask[16];
6052 U08 au1_is_64x64Blk_noisy[1];
6053 U08 au1_is_32x32Blk_noisy[4];
6054 U08 au1_is_16x16Blk_noisy[16];
6055
6056 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
6057 ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
6058 ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
6059 ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
6060
6061 ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
6062
6063 /*************************************************************************/
6064 /* Pointers to current and coarse layer are needed for projection */
6065 /* Pointer to prev layer are needed for other candts like coloc */
6066 /*************************************************************************/
6067 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
6068
6069 ps_prev_layer = hme_get_past_layer_ctxt(
6070 ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
6071
6072 num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
6073
6074 /* Function pointer is selected based on the C vc X86 macro */
6075
6076 fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
6077
6078 i4_inp_stride = ps_curr_layer->i4_inp_stride;
6079 i4_pic_wd = ps_curr_layer->i4_wd;
6080 i4_pic_ht = ps_curr_layer->i4_ht;
6081 e_search_complexity = ps_refine_prms->e_search_complexity;
6082 end_of_frame = 0;
6083
6084 /* This points to all the initial candts */
6085 ps_search_candts = &as_search_candts[0];
6086
6087 /* mv grid being huge strucutre is part of context */
6088 aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
6089 aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
6090
6091 /*************************************************************************/
6092 /* If the current layer is encoded (since it may be multicast or final */
6093 /* layer (finest)), then we use 16x16 blk size with some selected parts */
6094 /* If the current layer is not encoded, then we use 8x8 blk size, with */
6095 /* enable or disable of 4x4 partitions depending on the input prms */
6096 /*************************************************************************/
6097 e_search_blk_size = BLK_16x16;
6098 blk_wd = blk_ht = 16;
6099 blk_size_shift = 4;
6100 e_result_blk_size = BLK_8x8;
6101 s_mv_update_prms.i4_shift = 1;
6102
6103 if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
6104 {
6105 blk_4x4_to_16x16 = 1;
6106 }
6107 else
6108 {
6109 blk_4x4_to_16x16 = 0;
6110 }
6111
6112 unit_size = 1 << ps_ctxt->log_ctb_size;
6113 s_search_prms_blk.i4_inp_stride = unit_size;
6114
6115 /* This is required to properly update the layer mv bank */
6116 s_mv_update_prms.e_search_blk_size = e_search_blk_size;
6117 s_search_prms_blk.e_blk_size = e_search_blk_size;
6118
6119 /*************************************************************************/
6120 /* If current layer is explicit, then the number of ref frames are to */
6121 /* be same as previous layer. Else it will be 2 */
6122 /*************************************************************************/
6123 i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
6124 i4_num_pred_dir =
6125 (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
6126 1;
6127
6128 #if USE_MODIFIED == 1
6129 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6130 #else
6131 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6132 #endif
6133
6134 i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
6135 if(i4_num_ref_prev_layer <= 2)
6136 {
6137 i4_num_ref_each_dir = 1;
6138 }
6139 else
6140 {
6141 i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
6142 }
6143
6144 s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
6145 s_mv_update_prms.i4_num_results_to_store =
6146 MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
6147 : (i4_num_act_ref_l0 > 1) + 1,
6148 ps_refine_prms->i4_num_results_per_part);
6149
6150 /*************************************************************************/
6151 /* Initialization of merge params for 16x16 to 32x32 merge. */
6152 /* There are 4 32x32 units in a CTB, so 4 param structures initialized */
6153 /*************************************************************************/
6154 {
6155 hme_merge_prms_t *aps_merge_prms[4];
6156 aps_merge_prms[0] = &s_merge_prms_32x32_tl;
6157 aps_merge_prms[1] = &s_merge_prms_32x32_tr;
6158 aps_merge_prms[2] = &s_merge_prms_32x32_bl;
6159 aps_merge_prms[3] = &s_merge_prms_32x32_br;
6160 for(i = 0; i < 4; i++)
6161 {
6162 hme_merge_prms_init(
6163 aps_merge_prms[i],
6164 ps_curr_layer,
6165 ps_refine_prms,
6166 ps_ctxt,
6167 as_range_prms_rec,
6168 as_range_prms_inp,
6169 &aps_mv_grid[0],
6170 &s_common_frm_prms,
6171 i4_num_pred_dir,
6172 i,
6173 BLK_32x32,
6174 e_me_quality_presets);
6175 }
6176 }
6177
6178 /*************************************************************************/
6179 /* Initialization of merge params for 32x32 to 64x64 merge. */
6180 /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB */
6181 /*************************************************************************/
6182 {
6183 hme_merge_prms_init(
6184 &s_merge_prms_64x64,
6185 ps_curr_layer,
6186 ps_refine_prms,
6187 ps_ctxt,
6188 as_range_prms_rec,
6189 as_range_prms_inp,
6190 &aps_mv_grid[0],
6191 &s_common_frm_prms,
6192 i4_num_pred_dir,
6193 0,
6194 BLK_64x64,
6195 e_me_quality_presets);
6196 }
6197
6198 /* Pointers to cu_results are initialised here */
6199 {
6200 WORD32 i;
6201
6202 ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
6203
6204 for(i = 0; i < 4; i++)
6205 {
6206 ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
6207 }
6208
6209 for(i = 0; i < 16; i++)
6210 {
6211 ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
6212 }
6213 }
6214
6215 /*************************************************************************/
6216 /* SUBPEL Params initialized here */
6217 /*************************************************************************/
6218 {
6219 s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
6220 s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
6221 s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
6222
6223 s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
6224 s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
6225 s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
6226
6227 s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
6228 s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
6229
6230 s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
6231
6232 s_subpel_prms.i4_inp_stride = unit_size;
6233
6234 s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
6235 s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
6236 s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
6237
6238 s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
6239
6240 {
6241 WORD32 ref_ctr;
6242 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6243 {
6244 s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
6245 s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
6246 }
6247 }
6248 s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
6249
6250 #if USE_MODIFIED == 0
6251 s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6252 #else
6253 s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6254 #endif
6255 s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
6256
6257 /* BI Refinement done only if this field is 1 */
6258 s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
6259
6260 s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
6261
6262 s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6263 s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6264 s_subpel_prms.u1_max_num_subpel_refine_centers =
6265 ps_refine_prms->u1_max_num_subpel_refine_centers;
6266 }
6267
6268 /* inter_ctb_prms_t struct initialisation */
6269 {
6270 inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
6271 hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
6272
6273 ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
6274 ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
6275 ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
6276 ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
6277 ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
6278 ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
6279 ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
6280 ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
6281 ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
6282 ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
6283 ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6284 ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6285 ps_inter_ctb_prms->i4_lamda = lambda_recon;
6286 ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
6287 ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
6288 ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
6289 ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
6290 ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
6291 ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
6292 ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
6293 ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
6294 }
6295
6296 for(i = 0; i < MAX_INIT_CANDTS; i++)
6297 {
6298 ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
6299 ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
6300
6301 INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
6302 }
6303 num_act_ref_pics =
6304 ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6305
6306 if(num_act_ref_pics)
6307 {
6308 hme_search_cand_data_init(
6309 ai4_id_Z,
6310 ai4_id_coloc,
6311 ai4_num_coloc_cands,
6312 au1_search_candidate_list_index,
6313 i4_num_act_ref_l0,
6314 i4_num_act_ref_l1,
6315 ps_ctxt->s_frm_prms.bidir_enabled,
6316 blk_4x4_to_16x16);
6317 }
6318
6319 if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
6320 {
6321 ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6322 ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
6323 }
6324 else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
6325 {
6326 ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6327 }
6328
6329 for(i = 0; i < 3; i++)
6330 {
6331 search_node_t *ps_search_node;
6332 ps_search_node = &as_left_neighbours[i];
6333 INIT_SEARCH_NODE(ps_search_node, 0);
6334 ps_search_node = &as_top_neighbours[i];
6335 INIT_SEARCH_NODE(ps_search_node, 0);
6336 }
6337
6338 INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
6339 as_left_neighbours[2].u1_is_avail = 0;
6340
6341 /*************************************************************************/
6342 /* Initialize all the search results structure here. We update all the */
6343 /* search results to default values, and configure things like blk sizes */
6344 /*************************************************************************/
6345 if(num_act_ref_pics)
6346 {
6347 S32 i4_x, i4_y;
6348 /* 16x16 results */
6349 for(i = 0; i < 16; i++)
6350 {
6351 search_results_t *ps_search_results;
6352 S32 pred_lx;
6353 ps_search_results = &ps_ctxt->as_search_results_16x16[i];
6354 i4_x = (S32)gau1_encode_to_raster_x[i];
6355 i4_y = (S32)gau1_encode_to_raster_y[i];
6356 i4_x <<= 4;
6357 i4_y <<= 4;
6358
6359 hme_init_search_results(
6360 ps_search_results,
6361 i4_num_pred_dir,
6362 ps_refine_prms->i4_num_fpel_results,
6363 ps_refine_prms->i4_num_results_per_part,
6364 e_search_blk_size,
6365 i4_x,
6366 i4_y,
6367 &ps_ctxt->au1_is_past[0]);
6368
6369 for(pred_lx = 0; pred_lx < 2; pred_lx++)
6370 {
6371 pred_ctxt_t *ps_pred_ctxt;
6372
6373 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6374
6375 hme_init_pred_ctxt_encode(
6376 ps_pred_ctxt,
6377 ps_search_results,
6378 ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6379 ps_search_candts[ai4_id_Z[0]].ps_search_node,
6380 aps_mv_grid[pred_lx],
6381 pred_lx,
6382 lambda_recon,
6383 ps_refine_prms->lambda_q_shift,
6384 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6385 &ps_ctxt->ai2_ref_scf[0]);
6386 }
6387 }
6388
6389 for(i = 0; i < 4; i++)
6390 {
6391 search_results_t *ps_search_results;
6392 S32 pred_lx;
6393 ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6394
6395 i4_x = (S32)gau1_encode_to_raster_x[i];
6396 i4_y = (S32)gau1_encode_to_raster_y[i];
6397 i4_x <<= 5;
6398 i4_y <<= 5;
6399
6400 hme_init_search_results(
6401 ps_search_results,
6402 i4_num_pred_dir,
6403 ps_refine_prms->i4_num_32x32_merge_results,
6404 ps_refine_prms->i4_num_results_per_part,
6405 BLK_32x32,
6406 i4_x,
6407 i4_y,
6408 &ps_ctxt->au1_is_past[0]);
6409
6410 for(pred_lx = 0; pred_lx < 2; pred_lx++)
6411 {
6412 pred_ctxt_t *ps_pred_ctxt;
6413
6414 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6415
6416 hme_init_pred_ctxt_encode(
6417 ps_pred_ctxt,
6418 ps_search_results,
6419 ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6420 ps_search_candts[ai4_id_Z[0]].ps_search_node,
6421 aps_mv_grid[pred_lx],
6422 pred_lx,
6423 lambda_recon,
6424 ps_refine_prms->lambda_q_shift,
6425 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6426 &ps_ctxt->ai2_ref_scf[0]);
6427 }
6428 }
6429
6430 {
6431 search_results_t *ps_search_results;
6432 S32 pred_lx;
6433 ps_search_results = &ps_ctxt->s_search_results_64x64;
6434
6435 hme_init_search_results(
6436 ps_search_results,
6437 i4_num_pred_dir,
6438 ps_refine_prms->i4_num_64x64_merge_results,
6439 ps_refine_prms->i4_num_results_per_part,
6440 BLK_64x64,
6441 0,
6442 0,
6443 &ps_ctxt->au1_is_past[0]);
6444
6445 for(pred_lx = 0; pred_lx < 2; pred_lx++)
6446 {
6447 pred_ctxt_t *ps_pred_ctxt;
6448
6449 ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6450
6451 hme_init_pred_ctxt_encode(
6452 ps_pred_ctxt,
6453 ps_search_results,
6454 ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6455 ps_search_candts[ai4_id_Z[0]].ps_search_node,
6456 aps_mv_grid[pred_lx],
6457 pred_lx,
6458 lambda_recon,
6459 ps_refine_prms->lambda_q_shift,
6460 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6461 &ps_ctxt->ai2_ref_scf[0]);
6462 }
6463 }
6464 }
6465
6466 /* Initialise the structure used in clustering */
6467 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6468 {
6469 ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
6470
6471 ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
6472 ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
6473 ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
6474 ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
6475 ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
6476 ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
6477 ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
6478 }
6479
6480 /*********************************************************************/
6481 /* Initialize the dyn. search range params. for each reference index */
6482 /* in current layer ctxt */
6483 /*********************************************************************/
6484
6485 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
6486 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
6487 {
6488 WORD32 ref_ctr;
6489 /* set no. of act ref in L0 for further use at frame level */
6490 ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
6491 ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6492
6493 for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
6494 {
6495 INIT_DYN_SEARCH_PRMS(
6496 &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
6497 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
6498 }
6499 }
6500 /*************************************************************************/
6501 /* Now that the candidates have been ordered, to choose the right number */
6502 /* of initial candidates. */
6503 /*************************************************************************/
6504 if(blk_4x4_to_16x16)
6505 {
6506 if(i4_num_ref_prev_layer > 2)
6507 {
6508 if(e_search_complexity == SEARCH_CX_LOW)
6509 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6510 else if(e_search_complexity == SEARCH_CX_MED)
6511 num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6512 else if(e_search_complexity == SEARCH_CX_HIGH)
6513 num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6514 else
6515 ASSERT(0);
6516 }
6517 else if(i4_num_ref_prev_layer == 2)
6518 {
6519 if(e_search_complexity == SEARCH_CX_LOW)
6520 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6521 else if(e_search_complexity == SEARCH_CX_MED)
6522 num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6523 else if(e_search_complexity == SEARCH_CX_HIGH)
6524 num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6525 else
6526 ASSERT(0);
6527 }
6528 else
6529 {
6530 if(e_search_complexity == SEARCH_CX_LOW)
6531 num_init_candts = 5;
6532 else if(e_search_complexity == SEARCH_CX_MED)
6533 num_init_candts = 12;
6534 else if(e_search_complexity == SEARCH_CX_HIGH)
6535 num_init_candts = 19;
6536 else
6537 ASSERT(0);
6538 }
6539 }
6540 else
6541 {
6542 if(i4_num_ref_prev_layer > 2)
6543 {
6544 if(e_search_complexity == SEARCH_CX_LOW)
6545 num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6546 else if(e_search_complexity == SEARCH_CX_MED)
6547 num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6548 else if(e_search_complexity == SEARCH_CX_HIGH)
6549 num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6550 else
6551 ASSERT(0);
6552 }
6553 else if(i4_num_ref_prev_layer == 2)
6554 {
6555 if(e_search_complexity == SEARCH_CX_LOW)
6556 num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6557 else if(e_search_complexity == SEARCH_CX_MED)
6558 num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6559 else if(e_search_complexity == SEARCH_CX_HIGH)
6560 num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6561 else
6562 ASSERT(0);
6563 }
6564 else
6565 {
6566 if(e_search_complexity == SEARCH_CX_LOW)
6567 num_init_candts = 5;
6568 else if(e_search_complexity == SEARCH_CX_MED)
6569 num_init_candts = 11;
6570 else if(e_search_complexity == SEARCH_CX_HIGH)
6571 num_init_candts = 16;
6572 else
6573 ASSERT(0);
6574 }
6575 }
6576
6577 /*************************************************************************/
6578 /* The following search parameters are fixed throughout the search across*/
6579 /* all blks. So these are configured outside processing loop */
6580 /*************************************************************************/
6581 s_search_prms_blk.i4_num_init_candts = num_init_candts;
6582 s_search_prms_blk.i4_start_step = 1;
6583 s_search_prms_blk.i4_use_satd = 0;
6584 s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
6585 /* we use recon only for encoded layers, otherwise it is not available */
6586 s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
6587
6588 s_search_prms_blk.ps_search_candts = ps_search_candts;
6589 if(s_search_prms_blk.i4_use_rec)
6590 {
6591 WORD32 ref_ctr;
6592 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6593 s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
6594 }
6595 else
6596 {
6597 WORD32 ref_ctr;
6598 for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6599 s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
6600 }
6601
6602 /*************************************************************************/
6603 /* Initialize coordinates. Meaning as follows */
6604 /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */
6605 /* blk_y : same as above, y coord. */
6606 /* num_blks_in_this_ctb : number of blks in this given ctb that starts */
6607 /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */
6608 /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */
6609 /* corner of the picture. Always multiple of 64. */
6610 /* blk_id_in_ctb : encode order id of the blk in the ctb. */
6611 /*************************************************************************/
6612 blk_y = 0;
6613 blk_id_in_ctb = 0;
6614 i4_ctb_y = 0;
6615
6616 /*************************************************************************/
6617 /* Picture limit on all 4 sides. This will be used to set mv limits for */
6618 /* every block given its coordinate. Note thsi assumes that the min amt */
6619 /* of padding to right of pic is equal to the blk size. If we go all the */
6620 /* way upto 64x64, then the min padding on right size of picture should */
6621 /* be 64, and also on bottom side of picture. */
6622 /*************************************************************************/
6623 SET_PIC_LIMIT(
6624 s_pic_limit_inp,
6625 ps_curr_layer->i4_pad_x_rec,
6626 ps_curr_layer->i4_pad_y_rec,
6627 ps_curr_layer->i4_wd,
6628 ps_curr_layer->i4_ht,
6629 s_search_prms_blk.i4_num_steps_post_refine);
6630
6631 SET_PIC_LIMIT(
6632 s_pic_limit_rec,
6633 ps_curr_layer->i4_pad_x_rec,
6634 ps_curr_layer->i4_pad_y_rec,
6635 ps_curr_layer->i4_wd,
6636 ps_curr_layer->i4_ht,
6637 s_search_prms_blk.i4_num_steps_post_refine);
6638
6639 /*************************************************************************/
6640 /* set the MV limit per ref. pic. */
6641 /* - P pic. : Based on the config params. */
6642 /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
6643 /*************************************************************************/
6644 hme_set_mv_limit_using_dvsr_data(
6645 ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
6646 s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
6647 s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6648 s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6649 s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
6650 s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
6651 s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
6652 s_srch_cand_init_data.ps_search_cands = ps_search_candts;
6653 s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
6654 s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
6655 s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
6656 s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
6657
6658 while(0 == end_of_frame)
6659 {
6660 job_queue_t *ps_job;
6661 frm_ctb_ctxt_t *ps_frm_ctb_prms;
6662 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6663
6664 WORD32 i4_max_mv_x_in_ctb;
6665 WORD32 i4_max_mv_y_in_ctb;
6666 void *pv_dep_mngr_encloop_dep_me;
6667 WORD32 offset_val, check_dep_pos, set_dep_pos;
6668 WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
6669
6670 pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
6671
6672 ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
6673
6674 /* Get the current row from the job queue */
6675 ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
6676 ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
6677
6678 /* If all rows are done, set the end of process flag to 1, */
6679 /* and the current row to -1 */
6680 if(NULL == ps_job)
6681 {
6682 blk_y = -1;
6683 i4_ctb_y = -1;
6684 tile_col_idx = -1;
6685 end_of_frame = 1;
6686
6687 continue;
6688 }
6689
6690 /* set the output dependency after picking up the row */
6691 ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
6692
6693 /* Obtain the current row's details from the job */
6694 {
6695 ihevce_tile_params_t *ps_col_tile_params;
6696
6697 i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
6698 /* Obtain the current colum tile index from the job */
6699 tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
6700
6701 /* in encode layer block are 16x16 and CTB is 64 x 64 */
6702 /* note if ctb is 32x32 the this calc needs to be changed */
6703 num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6704 ps_ctxt->log_ctb_size;
6705
6706 /* The tile parameter for the col. idx. Use only the properties
6707 which is same for all the bottom tiles like width, start_x, etc.
6708 Don't use height, start_y, etc. */
6709 ps_col_tile_params =
6710 ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
6711 /* in encode layer block are 16x16 and CTB is 64 x 64 */
6712 /* note if ctb is 32x32 the this calc needs to be changed */
6713 num_sync_units_in_tile =
6714 (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6715 ps_ctxt->log_ctb_size;
6716
6717 i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
6718 i4_ctb_x = i4_first_ctb_x;
6719
6720 if(!num_act_ref_pics)
6721 {
6722 for(i4_ctb_x = i4_first_ctb_x;
6723 i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
6724 i4_ctb_x++)
6725 {
6726 S32 blk_i = 0, blk_j = 0;
6727 /* set the dependency for the corresponding row in enc loop */
6728 ihevce_dmgr_set_row_row_sync(
6729 pv_dep_mngr_encloop_dep_me,
6730 (i4_ctb_x + 1),
6731 i4_ctb_y,
6732 tile_col_idx /* Col Tile No. */);
6733 }
6734
6735 continue;
6736 }
6737
6738 /* increment the number of rows proc */
6739 num_rows_proc++;
6740
6741 /* Set Variables for Dep. Checking and Setting */
6742 set_dep_pos = i4_ctb_y + 1;
6743 if(i4_ctb_y > 0)
6744 {
6745 offset_val = 2;
6746 check_dep_pos = i4_ctb_y - 1;
6747 }
6748 else
6749 {
6750 /* First row should run without waiting */
6751 offset_val = -1;
6752 check_dep_pos = 0;
6753 }
6754
6755 /* row ctb out pointer */
6756 ps_ctxt->ps_ctb_analyse_curr_row =
6757 ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6758
6759 /* Row level CU Tree buffer */
6760 ps_ctxt->ps_cu_tree_curr_row =
6761 ps_ctxt->ps_cu_tree_base +
6762 i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
6763
6764 ps_ctxt->ps_me_ctb_data_curr_row =
6765 ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6766 }
6767
6768 /* This flag says the CTB under processing is at the start of tile in horz dir.*/
6769 left_ctb_in_diff_tile = 1;
6770
6771 /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var, */
6772 /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
6773 {
6774 S32 i4_ref_id, i4_bits_req;
6775
6776 for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
6777 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
6778 i4_ref_id++)
6779 {
6780 GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
6781
6782 if(i4_bits_req > 12)
6783 {
6784 ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
6785 }
6786 else
6787 {
6788 ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
6789 }
6790 }
6791
6792 s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
6793 }
6794
6795 /* if non-encode layer then i4_ctb_x will be same as blk_x */
6796 /* loop over all the units is a row */
6797 for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
6798 i4_ctb_x++)
6799 {
6800 ihevce_ctb_noise_params *ps_ctb_noise_params =
6801 &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
6802
6803 s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
6804 s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
6805
6806 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
6807 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
6808 /* Initialize ptr to current IPE CTB */
6809 ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
6810 i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6811 {
6812 ps_ctb_bound_attrs =
6813 get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
6814
6815 en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
6816 num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
6817 }
6818
6819 /* Block to initialise pointers to part_type_results_t */
6820 /* in each size-specific inter_cu_results_t */
6821 {
6822 WORD32 i;
6823
6824 for(i = 0; i < 64; i++)
6825 {
6826 ps_ctxt->as_cu8x8_results[i].ps_best_results =
6827 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6828 .as_8x8_block_data[i]
6829 .as_best_results;
6830 ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
6831 }
6832
6833 for(i = 0; i < 16; i++)
6834 {
6835 ps_ctxt->as_cu16x16_results[i].ps_best_results =
6836 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
6837 ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
6838 }
6839
6840 for(i = 0; i < 4; i++)
6841 {
6842 ps_ctxt->as_cu32x32_results[i].ps_best_results =
6843 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6844 .as_32x32_block_data[i]
6845 .as_best_results;
6846 ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
6847 }
6848
6849 ps_ctxt->s_cu64x64_results.ps_best_results =
6850 ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
6851 ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
6852 }
6853
6854 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6855 {
6856 ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
6857 ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
6858 ps_ctb_cluster_info->ps_cu_tree_root =
6859 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
6860 ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
6861 }
6862
6863 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
6864 {
6865 S32 i4_nodes_created_in_cu_tree = 1;
6866
6867 ihevce_cu_tree_init(
6868 (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6869 (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6870 &i4_nodes_created_in_cu_tree,
6871 0,
6872 POS_NA,
6873 POS_NA,
6874 POS_NA);
6875 }
6876
6877 memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
6878
6879 if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
6880 {
6881 S32 j;
6882
6883 ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6884
6885 ps_cur_ipe_ctb =
6886 ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
6887 lambda_recon =
6888 hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
6889
6890 lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
6891
6892 for(i = 0; i < 4; i++)
6893 {
6894 ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6895
6896 for(j = 0; j < 2; j++)
6897 {
6898 ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6899 }
6900 }
6901 ps_search_results = &ps_ctxt->s_search_results_64x64;
6902
6903 for(j = 0; j < 2; j++)
6904 {
6905 ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6906 }
6907
6908 s_common_frm_prms.i4_lamda = lambda_recon;
6909 }
6910 else
6911 {
6912 lambda_recon = ps_refine_prms->lambda_recon;
6913 }
6914
6915 /*********************************************************************/
6916 /* replicate the inp buffer at blk or ctb level for each ref id, */
6917 /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
6918 /* thereby avoiding a bloat up of memory. If we did all references */
6919 /* weighted pred, we will end up with a duplicate copy of each ref */
6920 /* at each layer, since we need to preserve the original reference. */
6921 /* ToDo: Need to observe performance with this mechanism and compare */
6922 /* with case where ref is weighted. */
6923 /*********************************************************************/
6924 fp_get_wt_inp(
6925 ps_curr_layer,
6926 &ps_ctxt->s_wt_pred,
6927 unit_size,
6928 s_common_frm_prms.i4_ctb_x_off,
6929 s_common_frm_prms.i4_ctb_y_off,
6930 unit_size,
6931 ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
6932 ps_ctxt->i4_wt_pred_enable_flag);
6933
6934 if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
6935 {
6936 #if TEMPORAL_NOISE_DETECT
6937 {
6938 WORD32 had_block_size = 16;
6939 WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
6940 ? 64
6941 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
6942 WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
6943 ? 64
6944 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
6945 WORD32 num_pred_dir = i4_num_pred_dir;
6946 WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
6947 WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
6948
6949 WORD32 i;
6950 WORD32 noise_detected;
6951 WORD32 ctb_size;
6952 WORD32 num_comp_had_blocks;
6953 WORD32 noisy_block_cnt;
6954 WORD32 index_8x8_block;
6955 WORD32 num_8x8_in_ctb_row;
6956
6957 WORD32 ht_offset;
6958 WORD32 wd_offset;
6959 WORD32 block_ht;
6960 WORD32 block_wd;
6961
6962 WORD32 num_horz_blocks;
6963 WORD32 num_vert_blocks;
6964
6965 WORD32 mean;
6966 UWORD32 variance_8x8;
6967
6968 WORD32 hh_energy_percent;
6969
6970 /* variables to hold the constant values. The variable values held are decided by the HAD block size */
6971 WORD32 min_noisy_block_cnt;
6972 WORD32 min_coeffs_above_avg;
6973 WORD32 min_coeff_avg_energy;
6974
6975 /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
6976 WORD32 i4_cu_x_off, i4_cu_y_off;
6977 WORD32 is_noisy;
6978
6979 /* intialise the variables holding the constants */
6980 if(had_block_size == 8)
6981 {
6982 min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8; //6;//
6983 min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
6984 min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
6985 }
6986 else
6987 {
6988 min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16; //7;//
6989 min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
6990 min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
6991 }
6992
6993 /* initialize the variables */
6994 noise_detected = 0;
6995 noisy_block_cnt = 0;
6996 hh_energy_percent = 0;
6997 variance_8x8 = 0;
6998 block_ht = ctb_height;
6999 block_wd = ctb_width;
7000
7001 mean = 0;
7002
7003 ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
7004 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
7005
7006 num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size;
7007 num_vert_blocks = block_ht / had_block_size; //ctb_height / had_block_size;
7008
7009 ht_offset = -had_block_size;
7010 wd_offset = -had_block_size;
7011
7012 num_8x8_in_ctb_row = block_wd / 8; // number of 8x8 in this ctb
7013 for(i = 0; i < num_comp_had_blocks; i++)
7014 {
7015 if(i % num_horz_blocks == 0)
7016 {
7017 wd_offset = -had_block_size;
7018 ht_offset += had_block_size;
7019 }
7020 wd_offset += had_block_size;
7021
7022 /* CU level offsets */
7023 i4_cu_x_off = i4_x_off + (i % 4) * 16; //+ (i % 4) * 16
7024 i4_cu_y_off = i4_y_off + (i / 4) * 16;
7025
7026 /* if 50 % or more of the CU is noisy then the return value is 1 */
7027 is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7028 ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7029 (i % 4) * 16,
7030 (i / 4) * 16,
7031 16);
7032
7033 /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
7034 if(is_noisy)
7035 {
7036 index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
7037 (i % num_horz_blocks) * 2;
7038 noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
7039 16,
7040 ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
7041 ? 64
7042 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
7043 ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
7044 ? 64
7045 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
7046 ps_ctb_noise_params,
7047 &s_srch_cand_init_data,
7048 &s_search_prms_blk,
7049 ps_ctxt,
7050 num_pred_dir,
7051 i4_num_act_ref_l0,
7052 i4_num_act_ref_l1,
7053 i4_cu_x_off,
7054 i4_cu_y_off,
7055 &ps_ctxt->s_wt_pred,
7056 unit_size,
7057 index_8x8_block,
7058 num_horz_blocks,
7059 /*num_8x8_in_ctb_row*/ 8, // this should be a variable extra
7060 i);
7061 } /* if 16x16 is noisy */
7062 } /* loop over for all 16x16*/
7063
7064 if(noisy_block_cnt >= min_noisy_block_cnt)
7065 {
7066 noise_detected = 1;
7067 }
7068
7069 /* write back the noise presence detected for the current CTB to the structure */
7070 ps_ctb_noise_params->i4_noise_present = noise_detected;
7071 }
7072 #endif
7073
7074 #if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
7075 if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
7076 ps_ctb_noise_params->i4_noise_present)
7077 {
7078 memset(
7079 ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7080 1,
7081 sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7082 }
7083 #endif
7084
7085 for(i = 0; i < 16; i++)
7086 {
7087 au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7088 ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
7089 }
7090
7091 for(i = 0; i < 4; i++)
7092 {
7093 au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7094 ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
7095 }
7096
7097 for(i = 0; i < 1; i++)
7098 {
7099 au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7100 ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
7101 }
7102
7103 if(ps_ctxt->s_frm_prms.bidir_enabled &&
7104 (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
7105 MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
7106 {
7107 ps_ctb_noise_params->i4_noise_present = 0;
7108 memset(
7109 ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7110 0,
7111 sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7112 }
7113
7114 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7115 for(i = 0; i < 4; i++)
7116 {
7117 S32 j;
7118 S32 lambda;
7119
7120 if(au1_is_32x32Blk_noisy[i])
7121 {
7122 lambda = lambda_recon;
7123 lambda =
7124 ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7125
7126 ps_search_results = &ps_ctxt->as_search_results_32x32[i];
7127
7128 for(j = 0; j < 2; j++)
7129 {
7130 ps_search_results->as_pred_ctxt[j].lambda = lambda;
7131 }
7132 }
7133 }
7134
7135 {
7136 S32 j;
7137 S32 lambda;
7138
7139 if(au1_is_64x64Blk_noisy[0])
7140 {
7141 lambda = lambda_recon;
7142 lambda =
7143 ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7144
7145 ps_search_results = &ps_ctxt->s_search_results_64x64;
7146
7147 for(j = 0; j < 2; j++)
7148 {
7149 ps_search_results->as_pred_ctxt[j].lambda = lambda;
7150 }
7151 }
7152 }
7153 #endif
7154 if(au1_is_64x64Blk_noisy[0])
7155 {
7156 U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7157 (s_common_frm_prms.i4_ctb_y_off *
7158 ps_curr_layer->i4_inp_stride));
7159
7160 hme_compute_sigmaX_and_sigmaXSquared(
7161 pu1_inp,
7162 ps_curr_layer->i4_inp_stride,
7163 ps_ctxt->au4_4x4_src_sigmaX,
7164 ps_ctxt->au4_4x4_src_sigmaXSquared,
7165 4,
7166 4,
7167 64,
7168 64,
7169 1,
7170 16);
7171 }
7172 else
7173 {
7174 for(i = 0; i < 4; i++)
7175 {
7176 if(au1_is_32x32Blk_noisy[i])
7177 {
7178 U08 *pu1_inp =
7179 ps_curr_layer->pu1_inp +
7180 (s_common_frm_prms.i4_ctb_x_off +
7181 (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
7182
7183 U08 u1_cu_size = 32;
7184 WORD32 i4_inp_buf_offset =
7185 (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
7186 ((i % 2) * u1_cu_size));
7187
7188 U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
7189 U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
7190 S32 i4_sigma_arr_offset =
7191 (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
7192 ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
7193
7194 hme_compute_sigmaX_and_sigmaXSquared(
7195 pu1_inp + i4_inp_buf_offset,
7196 ps_curr_layer->i4_inp_stride,
7197 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
7198 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
7199 4,
7200 4,
7201 32,
7202 32,
7203 1,
7204 16);
7205 }
7206 else
7207 {
7208 S32 j;
7209
7210 U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
7211 U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
7212 S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
7213 (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
7214 ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
7215
7216 for(j = 0; j < 4; j++)
7217 {
7218 U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
7219 U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
7220 S32 i4_16x16_blk_index_in_ctb =
7221 i4_16x16_blk_start_index_in_i_th_32x32_blk +
7222 ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
7223 ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
7224
7225 //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
7226
7227 if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
7228 {
7229 U08 *pu1_inp =
7230 ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7231 (s_common_frm_prms.i4_ctb_y_off *
7232 ps_curr_layer->i4_inp_stride));
7233
7234 U08 u1_cu_size = 16;
7235 WORD32 i4_inp_buf_offset =
7236 (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
7237 ((i4_16x16_blk_index_in_ctb / 4) *
7238 (u1_cu_size * ps_curr_layer->i4_inp_stride)));
7239
7240 U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
7241 U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
7242 S32 i4_sigma_arr_offset =
7243 (((i4_16x16_blk_index_in_ctb % 4) *
7244 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
7245 ((i4_16x16_blk_index_in_ctb / 4) *
7246 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
7247
7248 hme_compute_sigmaX_and_sigmaXSquared(
7249 pu1_inp + i4_inp_buf_offset,
7250 ps_curr_layer->i4_inp_stride,
7251 (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
7252 (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
7253 4,
7254 4,
7255 16,
7256 16,
7257 1,
7258 16);
7259 }
7260 }
7261 }
7262 }
7263 }
7264 }
7265 else
7266 {
7267 memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
7268
7269 memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
7270
7271 memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
7272 }
7273
7274 for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
7275 {
7276 S32 ref_ctr;
7277 U08 au1_pred_dir_searched[2];
7278 U08 u1_is_cu_noisy;
7279 ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
7280
7281 {
7282 blk_x = (i4_ctb_x << 2) +
7283 (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
7284 blk_y = (i4_ctb_y << 2) +
7285 (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
7286
7287 blk_id_in_full_ctb =
7288 ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
7289 blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
7290 ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
7291 s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
7292 s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
7293 }
7294
7295 /* get the current input blk point */
7296 pos_x = blk_x << blk_size_shift;
7297 pos_y = blk_y << blk_size_shift;
7298 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
7299
7300 /*********************************************************************/
7301 /* For every blk in the picture, the search range needs to be derived*/
7302 /* Any blk can have any mv, but practical search constraints are */
7303 /* imposed by the picture boundary and amt of padding. */
7304 /*********************************************************************/
7305 /* MV limit is different based on ref. PIC */
7306 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7307 {
7308 if(!s_search_prms_blk.i4_use_rec)
7309 {
7310 hme_derive_search_range(
7311 &as_range_prms_inp[ref_ctr],
7312 &s_pic_limit_inp,
7313 &as_mv_limit[ref_ctr],
7314 pos_x,
7315 pos_y,
7316 blk_wd,
7317 blk_ht);
7318 }
7319 else
7320 {
7321 hme_derive_search_range(
7322 &as_range_prms_rec[ref_ctr],
7323 &s_pic_limit_rec,
7324 &as_mv_limit[ref_ctr],
7325 pos_x,
7326 pos_y,
7327 blk_wd,
7328 blk_ht);
7329 }
7330 }
7331 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
7332 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
7333 /* Select search results from a suitable search result in the context */
7334 {
7335 ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
7336
7337 if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
7338 {
7339 S32 i;
7340
7341 for(i = 0; i < 2; i++)
7342 {
7343 ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
7344 }
7345 }
7346 }
7347
7348 u1_is_cu_noisy = au1_is_16x16Blk_noisy
7349 [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
7350
7351 s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
7352
7353 #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7354 if(u1_is_cu_noisy)
7355 {
7356 S32 j;
7357 S32 lambda;
7358
7359 lambda = lambda_recon;
7360 lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7361
7362 for(j = 0; j < 2; j++)
7363 {
7364 ps_search_results->as_pred_ctxt[j].lambda = lambda;
7365 }
7366 }
7367 else
7368 {
7369 S32 j;
7370 S32 lambda;
7371
7372 lambda = lambda_recon;
7373
7374 for(j = 0; j < 2; j++)
7375 {
7376 ps_search_results->as_pred_ctxt[j].lambda = lambda;
7377 }
7378 }
7379 #endif
7380
7381 s_search_prms_blk.ps_search_results = ps_search_results;
7382
7383 s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
7384 pu1_inp,
7385 i4_inp_stride,
7386 ps_refine_prms->limit_active_partitions,
7387 ps_ctxt->ps_hme_frm_prms->bidir_enabled,
7388 ps_ctxt->u1_is_curFrame_a_refFrame,
7389 blk_8x8_mask,
7390 e_me_quality_presets);
7391
7392 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
7393 {
7394 ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
7395 s_search_prms_blk.i4_part_mask;
7396 }
7397
7398 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
7399 {
7400 /* Setting u1_num_active_refs to 2 */
7401 /* for the sole purpose of the */
7402 /* function called below */
7403 ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
7404
7405 hme_reset_search_results(
7406 ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
7407
7408 ps_search_results->u1_num_active_ref = i4_num_pred_dir;
7409 }
7410
7411 if(0 == blk_id_in_ctb)
7412 {
7413 UWORD8 u1_ctr;
7414 for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
7415 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
7416 u1_ctr++)
7417 {
7418 WORD32 i4_max_dep_ctb_y;
7419 WORD32 i4_max_dep_ctb_x;
7420
7421 /* Set max mv in ctb units */
7422 i4_max_mv_x_in_ctb =
7423 (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7424 ps_ctxt->log_ctb_size;
7425
7426 i4_max_mv_y_in_ctb =
7427 (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7428 ps_ctxt->log_ctb_size;
7429 /********************************************************************/
7430 /* Set max ctb_x and ctb_y dependency on reference picture */
7431 /* Note +1 is due to delayed deblock, SAO, subpel plan dependency */
7432 /********************************************************************/
7433 i4_max_dep_ctb_x = CLIP3(
7434 (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
7435 0,
7436 ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
7437 i4_max_dep_ctb_y = CLIP3(
7438 (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
7439 0,
7440 ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
7441
7442 ihevce_dmgr_map_chk_sync(
7443 ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
7444 ps_ctxt->thrd_id,
7445 i4_ctb_x,
7446 i4_ctb_y,
7447 i4_max_mv_x_in_ctb,
7448 i4_max_mv_y_in_ctb);
7449 }
7450 }
7451
7452 /* Loop across different Ref IDx */
7453 for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
7454 {
7455 S32 resultid;
7456 S08 u1_default_ref_id;
7457 S32 i4_num_srch_cands = 0;
7458 S32 i4_num_refinement_iterations;
7459 S32 i4_refine_iter_ctr;
7460
7461 if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
7462 (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
7463 {
7464 u1_pred_dir = u1_pred_dir_ctr;
7465 }
7466 else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
7467 {
7468 u1_pred_dir = 1;
7469 }
7470
7471 u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
7472 : ps_ctxt->ai1_future_list[0];
7473 au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
7474
7475 i4_num_srch_cands = 0;
7476 resultid = 0;
7477
7478 /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
7479 if(0 == blk_id_in_ctb)
7480 {
7481 /*****************************************************************/
7482 /* Initialize the mv grid with results of neighbours for the next*/
7483 /* ctb. */
7484 /*****************************************************************/
7485 hme_fill_ctb_neighbour_mvs(
7486 ps_curr_layer,
7487 blk_x,
7488 blk_y,
7489 aps_mv_grid[u1_pred_dir],
7490 u1_pred_dir_ctr,
7491 u1_default_ref_id,
7492 ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7493 }
7494
7495 s_search_prms_blk.i1_ref_idx = u1_pred_dir;
7496
7497 {
7498 if((blk_id_in_full_ctb % 4) == 0)
7499 {
7500 ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
7501 .as_pred_ctxt[u1_pred_dir]
7502 .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
7503 }
7504
7505 if(blk_id_in_full_ctb == 0)
7506 {
7507 ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
7508 }
7509
7510 ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
7511 !gau1_encode_to_raster_y[blk_id_in_full_ctb];
7512 }
7513
7514 {
7515 S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
7516 S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
7517 U08 u1_is_blk_at_ctb_boundary = !y;
7518
7519 s_srch_cand_init_data.u1_is_left_available =
7520 !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
7521
7522 if(u1_is_blk_at_ctb_boundary)
7523 {
7524 s_srch_cand_init_data.u1_is_topRight_available = 0;
7525 s_srch_cand_init_data.u1_is_topLeft_available = 0;
7526 s_srch_cand_init_data.u1_is_top_available = 0;
7527 }
7528 else
7529 {
7530 s_srch_cand_init_data.u1_is_topRight_available =
7531 gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
7532 s_srch_cand_init_data.u1_is_top_available = 1;
7533 s_srch_cand_init_data.u1_is_topLeft_available =
7534 s_srch_cand_init_data.u1_is_left_available;
7535 }
7536 }
7537
7538 s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
7539 s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
7540 s_srch_cand_init_data.i4_pos_x = pos_x;
7541 s_srch_cand_init_data.i4_pos_y = pos_y;
7542 s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
7543 s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
7544 s_srch_cand_init_data.u1_search_candidate_list_index =
7545 au1_search_candidate_list_index[u1_pred_dir];
7546
7547 i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
7548
7549 /* Note this block also clips the MV range for all candidates */
7550 {
7551 S08 i1_check_for_mult_refs;
7552
7553 i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
7554 : (ps_ctxt->num_ref_past > 1);
7555
7556 ps_me_optimised_function_list->pf_mv_clipper(
7557 &s_search_prms_blk,
7558 i4_num_srch_cands,
7559 i1_check_for_mult_refs,
7560 ps_refine_prms->i4_num_steps_fpel_refine,
7561 ps_refine_prms->i4_num_steps_hpel_refine,
7562 ps_refine_prms->i4_num_steps_qpel_refine);
7563 }
7564
7565 #if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
7566 i4_num_refinement_iterations =
7567 ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
7568 ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
7569 : 1;
7570 #else
7571 i4_num_refinement_iterations =
7572 ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
7573 #endif
7574
7575 #if ENABLE_EXPLICIT_SEARCH_IN_PQ
7576 if(e_me_quality_presets == ME_PRISTINE_QUALITY)
7577 {
7578 i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
7579 : i4_num_act_ref_l1;
7580 }
7581 #endif
7582
7583 for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
7584 i4_refine_iter_ctr++)
7585 {
7586 S32 center_x;
7587 S32 center_y;
7588 S32 center_ref_idx;
7589
7590 S08 *pi1_pred_dir_to_ref_idx =
7591 (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
7592
7593 {
7594 WORD32 i4_i;
7595
7596 for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
7597 {
7598 ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7599 ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7600 ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
7601 MAX_SIGNED_16BIT_VAL;
7602 ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
7603 ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
7604 ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
7605
7606 if(ps_refine_prms->i4_num_results_per_part == 2)
7607 {
7608 ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
7609 MAX_SIGNED_16BIT_VAL;
7610 ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
7611 MAX_SIGNED_16BIT_VAL;
7612 ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
7613 MAX_SIGNED_16BIT_VAL;
7614 ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
7615 ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
7616 ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
7617 }
7618 }
7619
7620 s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
7621 s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
7622 }
7623
7624 {
7625 search_node_t *ps_coloc_node;
7626
7627 S32 i = 0;
7628
7629 if(i4_num_refinement_iterations > 1)
7630 {
7631 for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
7632 {
7633 ps_coloc_node =
7634 s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
7635 .ps_search_node;
7636
7637 if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
7638 ps_coloc_node->i1_ref_idx)
7639 {
7640 break;
7641 }
7642 }
7643
7644 if(i == ai4_num_coloc_cands[u1_pred_dir])
7645 {
7646 i = 0;
7647 }
7648 }
7649 else
7650 {
7651 ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
7652 .ps_search_node;
7653 }
7654
7655 hme_set_mvp_node(
7656 ps_search_results,
7657 ps_coloc_node,
7658 u1_pred_dir,
7659 (i4_num_refinement_iterations > 1)
7660 ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
7661 : u1_default_ref_id);
7662
7663 center_x = ps_coloc_node->ps_mv->i2_mvx;
7664 center_y = ps_coloc_node->ps_mv->i2_mvy;
7665 center_ref_idx = ps_coloc_node->i1_ref_idx;
7666 }
7667
7668 /* Full-Pel search */
7669 {
7670 S32 num_unique_nodes;
7671
7672 memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
7673
7674 num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
7675 as_unique_search_nodes,
7676 s_search_prms_blk.ps_search_candts,
7677 au4_unique_node_map,
7678 pi1_pred_dir_to_ref_idx,
7679 i4_num_srch_cands,
7680 s_search_prms_blk.i4_num_init_candts,
7681 i4_refine_iter_ctr,
7682 i4_num_refinement_iterations,
7683 i4_num_act_ref_l0,
7684 center_ref_idx,
7685 center_x,
7686 center_y,
7687 ps_ctxt->s_frm_prms.bidir_enabled,
7688 e_me_quality_presets);
7689
7690 /*************************************************************************/
7691 /* This array stores the ids of the partitions whose */
7692 /* SADs are updated. Since the partitions whose SADs are updated may not */
7693 /* be in contiguous order, we supply another level of indirection. */
7694 /*************************************************************************/
7695 ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
7696 s_search_prms_blk.i4_part_mask,
7697 &ps_fullpel_refine_ctxt->ai4_part_id[0]);
7698
7699 if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
7700 {
7701 S32 i;
7702 /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
7703 S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
7704 (s_search_prms_blk.i4_cu_y_off * 4);
7705
7706 for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
7707 {
7708 S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
7709
7710 hme_compute_final_sigma_of_pu_from_base_blocks(
7711 ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
7712 ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
7713 au8_final_src_sigmaX,
7714 au8_final_src_sigmaXSquared,
7715 16,
7716 4,
7717 i4_part_id,
7718 16);
7719 }
7720
7721 s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7722 s_common_frm_prms.pu8_part_src_sigmaXSquared =
7723 au8_final_src_sigmaXSquared;
7724
7725 s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7726 s_search_prms_blk.pu8_part_src_sigmaXSquared =
7727 au8_final_src_sigmaXSquared;
7728 }
7729
7730 if(0 == num_unique_nodes)
7731 {
7732 continue;
7733 }
7734
7735 if(num_unique_nodes >= 2)
7736 {
7737 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7738 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
7739 if(ps_ctxt->i4_pic_type != IV_P_FRAME)
7740 {
7741 if(ps_ctxt->i4_temporal_layer == 1)
7742 {
7743 hme_fullpel_cand_sifter(
7744 &s_search_prms_blk,
7745 ps_curr_layer,
7746 &ps_ctxt->s_wt_pred,
7747 ALPHA_FOR_NOISE_TERM_IN_ME,
7748 u1_is_cu_noisy,
7749 ps_me_optimised_function_list);
7750 }
7751 else
7752 {
7753 hme_fullpel_cand_sifter(
7754 &s_search_prms_blk,
7755 ps_curr_layer,
7756 &ps_ctxt->s_wt_pred,
7757 ALPHA_FOR_NOISE_TERM_IN_ME,
7758 u1_is_cu_noisy,
7759 ps_me_optimised_function_list);
7760 }
7761 }
7762 else
7763 {
7764 hme_fullpel_cand_sifter(
7765 &s_search_prms_blk,
7766 ps_curr_layer,
7767 &ps_ctxt->s_wt_pred,
7768 ALPHA_FOR_NOISE_TERM_IN_ME_P,
7769 u1_is_cu_noisy,
7770 ps_me_optimised_function_list);
7771 }
7772 }
7773
7774 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7775
7776 hme_fullpel_refine(
7777 ps_refine_prms,
7778 &s_search_prms_blk,
7779 ps_curr_layer,
7780 &ps_ctxt->s_wt_pred,
7781 au4_unique_node_map,
7782 num_unique_nodes,
7783 blk_8x8_mask,
7784 center_x,
7785 center_y,
7786 center_ref_idx,
7787 e_me_quality_presets,
7788 ps_me_optimised_function_list);
7789 }
7790
7791 /* Sub-Pel search */
7792 {
7793 hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7794
7795 s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
7796 &ps_ctxt->s_buf_mgr,
7797 INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
7798 /* MV limit is different based on ref. PIC */
7799 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7800 {
7801 SCALE_RANGE_PRMS(
7802 as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
7803 SCALE_RANGE_PRMS(
7804 as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
7805 }
7806 s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
7807 s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
7808
7809 hme_subpel_refine_cu_hs(
7810 &s_subpel_prms,
7811 ps_curr_layer,
7812 ps_search_results,
7813 u1_pred_dir,
7814 &ps_ctxt->s_wt_pred,
7815 blk_8x8_mask,
7816 ps_ctxt->ps_func_selector,
7817 ps_cmn_utils_optimised_function_list,
7818 ps_me_optimised_function_list);
7819 }
7820 }
7821 }
7822 /* Populate the new PU struct with the results post subpel refinement*/
7823 {
7824 inter_cu_results_t *ps_cu_results;
7825 WORD32 best_inter_cost, intra_cost, posx, posy;
7826
7827 UWORD8 intra_8x8_enabled = 0;
7828
7829 /* cost of 16x16 cu parent */
7830 WORD32 parent_cost = MAX_32BIT_VAL;
7831
7832 /* cost of 8x8 cu children */
7833 /*********************************************************************/
7834 /* Assuming parent is not split, then we signal 1 bit for this parent*/
7835 /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
7836 /* So, 4*lambda is extra for children cost. */
7837 /*********************************************************************/
7838 WORD32 child_cost = 0;
7839
7840 ps_cu_results = ps_search_results->ps_cu_results;
7841
7842 /* Initialize the pu_results pointers to the first struct in the stack array */
7843 ps_pu_results = as_inter_pu_results;
7844
7845 hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7846
7847 hme_populate_pus(
7848 ps_thrd_ctxt,
7849 ps_ctxt,
7850 &s_subpel_prms,
7851 ps_search_results,
7852 ps_cu_results,
7853 ps_pu_results,
7854 &(as_pu_results[0][0][0]),
7855 &s_common_frm_prms,
7856 &ps_ctxt->s_wt_pred,
7857 ps_curr_layer,
7858 au1_pred_dir_searched,
7859 i4_num_pred_dir);
7860
7861 ps_cu_results->i4_inp_offset =
7862 (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
7863
7864 hme_decide_part_types(
7865 ps_cu_results,
7866 ps_pu_results,
7867 &s_common_frm_prms,
7868 ps_ctxt,
7869 ps_cmn_utils_optimised_function_list,
7870 ps_me_optimised_function_list
7871
7872 );
7873
7874 /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
7875 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
7876 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
7877 {
7878 WORD32 res_ctr;
7879
7880 for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
7881 {
7882 WORD32 num_part = 2, part_ctr;
7883 part_type_results_t *ps_best_results =
7884 &ps_cu_results->ps_best_results[res_ctr];
7885
7886 if(PRT_2Nx2N == ps_best_results->u1_part_type)
7887 num_part = 1;
7888
7889 for(part_ctr = 0; part_ctr < num_part; part_ctr++)
7890 {
7891 pu_result_t *ps_pu_results =
7892 &ps_best_results->as_pu_results[part_ctr];
7893
7894 ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
7895
7896 hme_update_dynamic_search_params(
7897 &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
7898 .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
7899 ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
7900
7901 /* Sanity Check */
7902 ASSERT(
7903 ps_pu_results->pu.mv.i1_l0_ref_idx <
7904 ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7905
7906 /* No L1 for P Pic. */
7907 ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
7908 /* No BI for P Pic. */
7909 ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
7910 }
7911 }
7912 }
7913
7914 /*****************************************************************/
7915 /* INSERT INTRA RESULTS AT 16x16 LEVEL. */
7916 /*****************************************************************/
7917
7918 #if DISABLE_INTRA_IN_BPICS
7919 if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7920 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
7921 #endif
7922 {
7923 if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
7924 {
7925 hme_insert_intra_nodes_post_bipred(
7926 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
7927 }
7928 }
7929
7930 #if DISABLE_INTRA_IN_BPICS
7931 if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7932 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
7933 {
7934 intra_8x8_enabled = 0;
7935 }
7936 else
7937 #endif
7938 {
7939 /*TRAQO intra flag updation*/
7940 if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
7941 {
7942 best_inter_cost =
7943 ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
7944 intra_cost =
7945 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7946 /*@16x16 level*/
7947 posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
7948 << 2) >>
7949 4;
7950 posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
7951 << 2) >>
7952 4;
7953 }
7954 else
7955 {
7956 best_inter_cost =
7957 ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7958 posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
7959 << 2) >>
7960 3;
7961 posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
7962 << 2) >>
7963 3;
7964 }
7965
7966 /* Disable intra16/32/64 flags based on split flags recommended by IPE */
7967 if(ps_cur_ipe_ctb->u1_split_flag)
7968 {
7969 /* Id of the 32x32 block, 16x16 block in a CTB */
7970 WORD32 i4_32x32_id =
7971 (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
7972 WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
7973 ((ps_cu_results->u1_x_off >> 4) & 0x1);
7974
7975 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
7976 {
7977 if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7978 .as_intra16_analyse[i4_16x16_id]
7979 .b1_split_flag)
7980 {
7981 intra_8x8_enabled =
7982 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7983 .as_intra16_analyse[i4_16x16_id]
7984 .as_intra8_analyse[0]
7985 .b1_valid_cu;
7986 intra_8x8_enabled &=
7987 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7988 .as_intra16_analyse[i4_16x16_id]
7989 .as_intra8_analyse[1]
7990 .b1_valid_cu;
7991 intra_8x8_enabled &=
7992 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7993 .as_intra16_analyse[i4_16x16_id]
7994 .as_intra8_analyse[2]
7995 .b1_valid_cu;
7996 intra_8x8_enabled &=
7997 ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7998 .as_intra16_analyse[i4_16x16_id]
7999 .as_intra8_analyse[3]
8000 .b1_valid_cu;
8001 }
8002 }
8003 }
8004 }
8005
8006 if(blk_8x8_mask == 0xf)
8007 {
8008 parent_cost =
8009 ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
8010 ps_search_results->u1_split_flag = 0;
8011 }
8012 else
8013 {
8014 ps_search_results->u1_split_flag = 1;
8015 }
8016
8017 ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
8018
8019 if(s_common_frm_prms.u1_is_cu_noisy)
8020 {
8021 intra_8x8_enabled = 0;
8022 }
8023
8024 /* Evalaute 8x8 if NxN part id is enabled */
8025 if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
8026 {
8027 /* Populates the PU's for the 4 8x8's in one call */
8028 hme_populate_pus_8x8_cu(
8029 ps_thrd_ctxt,
8030 ps_ctxt,
8031 &s_subpel_prms,
8032 ps_search_results,
8033 ps_cu_results,
8034 ps_pu_results,
8035 &(as_pu_results[0][0][0]),
8036 &s_common_frm_prms,
8037 au1_pred_dir_searched,
8038 i4_num_pred_dir,
8039 blk_8x8_mask);
8040
8041 /* Re-initialize the pu_results pointers to the first struct in the stack array */
8042 ps_pu_results = as_inter_pu_results;
8043
8044 for(i = 0; i < 4; i++)
8045 {
8046 if((blk_8x8_mask & (1 << i)))
8047 {
8048 if(ps_cu_results->i4_part_mask)
8049 {
8050 hme_decide_part_types(
8051 ps_cu_results,
8052 ps_pu_results,
8053 &s_common_frm_prms,
8054 ps_ctxt,
8055 ps_cmn_utils_optimised_function_list,
8056 ps_me_optimised_function_list
8057
8058 );
8059 }
8060 /*****************************************************************/
8061 /* INSERT INTRA RESULTS AT 8x8 LEVEL. */
8062 /*****************************************************************/
8063 #if DISABLE_INTRA_IN_BPICS
8064 if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
8065 (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
8066 TEMPORAL_LAYER_DISABLE)))
8067 #endif
8068 {
8069 if(!(DISABLE_INTRA_WHEN_NOISY &&
8070 s_common_frm_prms.u1_is_cu_noisy))
8071 {
8072 hme_insert_intra_nodes_post_bipred(
8073 ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
8074 }
8075 }
8076
8077 child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
8078 }
8079
8080 ps_cu_results++;
8081 ps_pu_results++;
8082 }
8083
8084 /* Compare 16x16 vs 8x8 cost */
8085 if(child_cost < parent_cost)
8086 {
8087 ps_search_results->best_cu_cost = child_cost;
8088 ps_search_results->u1_split_flag = 1;
8089 }
8090 }
8091 }
8092
8093 hme_update_mv_bank_encode(
8094 ps_search_results,
8095 ps_curr_layer->ps_layer_mvbank,
8096 blk_x,
8097 blk_y,
8098 &s_mv_update_prms,
8099 au1_pred_dir_searched,
8100 i4_num_act_ref_l0);
8101
8102 /*********************************************************************/
8103 /* Map the best results to an MV Grid. This is a 18x18 grid that is */
8104 /* useful for doing things like predictor for cost calculation or */
8105 /* also for merge calculations if need be. */
8106 /*********************************************************************/
8107 hme_map_mvs_to_grid(
8108 &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
8109 }
8110
8111 /* Set the CU tree nodes appropriately */
8112 if(e_me_quality_presets != ME_PRISTINE_QUALITY)
8113 {
8114 WORD32 i, j;
8115
8116 for(i = 0; i < 16; i++)
8117 {
8118 cur_ctb_cu_tree_t *ps_tree_node =
8119 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
8120 search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
8121
8122 switch(i >> 2)
8123 {
8124 case 0:
8125 {
8126 ps_tree_node = ps_tree_node->ps_child_node_tl;
8127
8128 break;
8129 }
8130 case 1:
8131 {
8132 ps_tree_node = ps_tree_node->ps_child_node_tr;
8133
8134 break;
8135 }
8136 case 2:
8137 {
8138 ps_tree_node = ps_tree_node->ps_child_node_bl;
8139
8140 break;
8141 }
8142 case 3:
8143 {
8144 ps_tree_node = ps_tree_node->ps_child_node_br;
8145
8146 break;
8147 }
8148 }
8149
8150 switch(i % 4)
8151 {
8152 case 0:
8153 {
8154 ps_tree_node = ps_tree_node->ps_child_node_tl;
8155
8156 break;
8157 }
8158 case 1:
8159 {
8160 ps_tree_node = ps_tree_node->ps_child_node_tr;
8161
8162 break;
8163 }
8164 case 2:
8165 {
8166 ps_tree_node = ps_tree_node->ps_child_node_bl;
8167
8168 break;
8169 }
8170 case 3:
8171 {
8172 ps_tree_node = ps_tree_node->ps_child_node_br;
8173
8174 break;
8175 }
8176 }
8177
8178 if(ai4_blk_8x8_mask[i] == 15)
8179 {
8180 if(!ps_results->u1_split_flag)
8181 {
8182 ps_tree_node->is_node_valid = 1;
8183 NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
8184 }
8185 else
8186 {
8187 ps_tree_node->is_node_valid = 0;
8188 ENABLE_THE_CHILDREN_NODES(ps_tree_node);
8189 }
8190 }
8191 else
8192 {
8193 cur_ctb_cu_tree_t *ps_tree_child;
8194
8195 ps_tree_node->is_node_valid = 0;
8196
8197 for(j = 0; j < 4; j++)
8198 {
8199 switch(j)
8200 {
8201 case 0:
8202 {
8203 ps_tree_child = ps_tree_node->ps_child_node_tl;
8204
8205 break;
8206 }
8207 case 1:
8208 {
8209 ps_tree_child = ps_tree_node->ps_child_node_tr;
8210
8211 break;
8212 }
8213 case 2:
8214 {
8215 ps_tree_child = ps_tree_node->ps_child_node_bl;
8216
8217 break;
8218 }
8219 case 3:
8220 {
8221 ps_tree_child = ps_tree_node->ps_child_node_br;
8222
8223 break;
8224 }
8225 }
8226
8227 ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
8228 }
8229 }
8230 }
8231 }
8232
8233 if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8234 {
8235 cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
8236
8237 hme_analyse_mv_clustering(
8238 ps_ctxt->as_search_results_16x16,
8239 ps_ctxt->as_cu16x16_results,
8240 ps_ctxt->as_cu8x8_results,
8241 ps_ctxt->ps_ctb_cluster_info,
8242 ps_ctxt->ai1_future_list,
8243 ps_ctxt->ai1_past_list,
8244 ps_ctxt->s_frm_prms.bidir_enabled,
8245 e_me_quality_presets);
8246
8247 #if DISABLE_BLK_MERGE_WHEN_NOISY
8248 ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
8249 ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
8250 ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
8251 ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
8252 ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
8253 ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
8254 ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
8255 ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
8256 ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
8257 ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
8258 #endif
8259
8260 en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
8261 (ps_tree->ps_child_node_tr->is_node_valid << 1) |
8262 (ps_tree->ps_child_node_bl->is_node_valid << 2) |
8263 (ps_tree->ps_child_node_br->is_node_valid << 3);
8264
8265 en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
8266 (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
8267 (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
8268 (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
8269 (ps_tree->u1_inter_eval_enable << 4);
8270 }
8271 else
8272 {
8273 en_merge_execution = 0x1f;
8274
8275 #if DISABLE_BLK_MERGE_WHEN_NOISY
8276 en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
8277 ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
8278 ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
8279 ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
8280 #endif
8281 }
8282
8283 /* Re-initialize the pu_results pointers to the first struct in the stack array */
8284 ps_pu_results = as_inter_pu_results;
8285
8286 {
8287 WORD32 ref_ctr;
8288
8289 s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
8290 s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
8291
8292 /* MV limit is different based on ref. PIC */
8293 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8294 {
8295 SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
8296 SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
8297 }
8298
8299 e_merge_result = CU_SPLIT;
8300 merge_count_32x32 = 0;
8301
8302 if((en_merge_32x32 & 1) && (en_merge_execution & 1))
8303 {
8304 range_prms_t *ps_pic_limit;
8305 if(s_merge_prms_32x32_tl.i4_use_rec == 1)
8306 {
8307 ps_pic_limit = &s_pic_limit_rec;
8308 }
8309 else
8310 {
8311 ps_pic_limit = &s_pic_limit_inp;
8312 }
8313 /* MV limit is different based on ref. PIC */
8314 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8315 {
8316 hme_derive_search_range(
8317 s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8318 ps_pic_limit,
8319 &as_mv_limit[ref_ctr],
8320 i4_ctb_x << 6,
8321 i4_ctb_y << 6,
8322 32,
8323 32);
8324
8325 SCALE_RANGE_PRMS_POINTERS(
8326 s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8327 s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8328 2);
8329 }
8330 s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
8331 s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
8332 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
8333
8334 e_merge_result = hme_try_merge_high_speed(
8335 ps_thrd_ctxt,
8336 ps_ctxt,
8337 ps_cur_ipe_ctb,
8338 &s_subpel_prms,
8339 &s_merge_prms_32x32_tl,
8340 ps_pu_results,
8341 &as_pu_results[0][0][0]);
8342
8343 if(e_merge_result == CU_MERGED)
8344 {
8345 inter_cu_results_t *ps_cu_results =
8346 s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
8347
8348 if(!((ps_cu_results->u1_num_best_results == 1) &&
8349 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8350 {
8351 hme_map_mvs_to_grid(
8352 &aps_mv_grid[0],
8353 s_merge_prms_32x32_tl.ps_results_merge,
8354 s_merge_prms_32x32_tl.au1_pred_dir_searched,
8355 s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
8356 }
8357
8358 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8359 {
8360 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8361 .ps_child_node_tl->is_node_valid = 1;
8362 NULLIFY_THE_CHILDREN_NODES(
8363 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8364 .ps_child_node_tl);
8365 }
8366
8367 merge_count_32x32++;
8368 e_merge_result = CU_SPLIT;
8369 }
8370 else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8371 {
8372 #if ENABLE_CU_TREE_CULLING
8373 cur_ctb_cu_tree_t *ps_tree =
8374 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8375
8376 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8377 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8378 ENABLE_THE_CHILDREN_NODES(ps_tree);
8379 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8380 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8381 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8382 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8383 #endif
8384 }
8385 }
8386 else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
8387 {
8388 #if ENABLE_CU_TREE_CULLING
8389 cur_ctb_cu_tree_t *ps_tree =
8390 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8391
8392 ENABLE_THE_CHILDREN_NODES(ps_tree);
8393 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8394 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8395 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8396 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8397 #endif
8398
8399 if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
8400 {
8401 ps_tree->is_node_valid = 0;
8402 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8403 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8404 }
8405 }
8406
8407 if((en_merge_32x32 & 2) && (en_merge_execution & 2))
8408 {
8409 range_prms_t *ps_pic_limit;
8410 if(s_merge_prms_32x32_tr.i4_use_rec == 1)
8411 {
8412 ps_pic_limit = &s_pic_limit_rec;
8413 }
8414 else
8415 {
8416 ps_pic_limit = &s_pic_limit_inp;
8417 }
8418 /* MV limit is different based on ref. PIC */
8419 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8420 {
8421 hme_derive_search_range(
8422 s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8423 ps_pic_limit,
8424 &as_mv_limit[ref_ctr],
8425 (i4_ctb_x << 6) + 32,
8426 i4_ctb_y << 6,
8427 32,
8428 32);
8429 SCALE_RANGE_PRMS_POINTERS(
8430 s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8431 s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8432 2);
8433 }
8434 s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
8435 s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
8436 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
8437
8438 e_merge_result = hme_try_merge_high_speed(
8439 ps_thrd_ctxt,
8440 ps_ctxt,
8441 ps_cur_ipe_ctb,
8442 &s_subpel_prms,
8443 &s_merge_prms_32x32_tr,
8444 ps_pu_results,
8445 &as_pu_results[0][0][0]);
8446
8447 if(e_merge_result == CU_MERGED)
8448 {
8449 inter_cu_results_t *ps_cu_results =
8450 s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
8451
8452 if(!((ps_cu_results->u1_num_best_results == 1) &&
8453 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8454 {
8455 hme_map_mvs_to_grid(
8456 &aps_mv_grid[0],
8457 s_merge_prms_32x32_tr.ps_results_merge,
8458 s_merge_prms_32x32_tr.au1_pred_dir_searched,
8459 s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
8460 }
8461
8462 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8463 {
8464 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8465 .ps_child_node_tr->is_node_valid = 1;
8466 NULLIFY_THE_CHILDREN_NODES(
8467 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8468 .ps_child_node_tr);
8469 }
8470
8471 merge_count_32x32++;
8472 e_merge_result = CU_SPLIT;
8473 }
8474 else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8475 {
8476 #if ENABLE_CU_TREE_CULLING
8477 cur_ctb_cu_tree_t *ps_tree =
8478 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8479
8480 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8481 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8482 ENABLE_THE_CHILDREN_NODES(ps_tree);
8483 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8484 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8485 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8486 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8487 #endif
8488 }
8489 }
8490 else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
8491 {
8492 #if ENABLE_CU_TREE_CULLING
8493 cur_ctb_cu_tree_t *ps_tree =
8494 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8495
8496 ENABLE_THE_CHILDREN_NODES(ps_tree);
8497 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8498 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8499 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8500 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8501 #endif
8502
8503 if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
8504 {
8505 ps_tree->is_node_valid = 0;
8506 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8507 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8508 }
8509 }
8510
8511 if((en_merge_32x32 & 4) && (en_merge_execution & 4))
8512 {
8513 range_prms_t *ps_pic_limit;
8514 if(s_merge_prms_32x32_bl.i4_use_rec == 1)
8515 {
8516 ps_pic_limit = &s_pic_limit_rec;
8517 }
8518 else
8519 {
8520 ps_pic_limit = &s_pic_limit_inp;
8521 }
8522 /* MV limit is different based on ref. PIC */
8523 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8524 {
8525 hme_derive_search_range(
8526 s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8527 ps_pic_limit,
8528 &as_mv_limit[ref_ctr],
8529 i4_ctb_x << 6,
8530 (i4_ctb_y << 6) + 32,
8531 32,
8532 32);
8533 SCALE_RANGE_PRMS_POINTERS(
8534 s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8535 s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8536 2);
8537 }
8538 s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
8539 s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
8540 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
8541
8542 e_merge_result = hme_try_merge_high_speed(
8543 ps_thrd_ctxt,
8544 ps_ctxt,
8545 ps_cur_ipe_ctb,
8546 &s_subpel_prms,
8547 &s_merge_prms_32x32_bl,
8548 ps_pu_results,
8549 &as_pu_results[0][0][0]);
8550
8551 if(e_merge_result == CU_MERGED)
8552 {
8553 inter_cu_results_t *ps_cu_results =
8554 s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
8555
8556 if(!((ps_cu_results->u1_num_best_results == 1) &&
8557 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8558 {
8559 hme_map_mvs_to_grid(
8560 &aps_mv_grid[0],
8561 s_merge_prms_32x32_bl.ps_results_merge,
8562 s_merge_prms_32x32_bl.au1_pred_dir_searched,
8563 s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
8564 }
8565
8566 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8567 {
8568 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8569 .ps_child_node_bl->is_node_valid = 1;
8570 NULLIFY_THE_CHILDREN_NODES(
8571 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8572 .ps_child_node_bl);
8573 }
8574
8575 merge_count_32x32++;
8576 e_merge_result = CU_SPLIT;
8577 }
8578 else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8579 {
8580 #if ENABLE_CU_TREE_CULLING
8581 cur_ctb_cu_tree_t *ps_tree =
8582 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8583
8584 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8585 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8586 ENABLE_THE_CHILDREN_NODES(ps_tree);
8587 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8588 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8589 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8590 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8591 #endif
8592 }
8593 }
8594 else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
8595 {
8596 #if ENABLE_CU_TREE_CULLING
8597 cur_ctb_cu_tree_t *ps_tree =
8598 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8599
8600 ENABLE_THE_CHILDREN_NODES(ps_tree);
8601 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8602 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8603 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8604 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8605 #endif
8606
8607 if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
8608 {
8609 ps_tree->is_node_valid = 0;
8610 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8611 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8612 }
8613 }
8614
8615 if((en_merge_32x32 & 8) && (en_merge_execution & 8))
8616 {
8617 range_prms_t *ps_pic_limit;
8618 if(s_merge_prms_32x32_br.i4_use_rec == 1)
8619 {
8620 ps_pic_limit = &s_pic_limit_rec;
8621 }
8622 else
8623 {
8624 ps_pic_limit = &s_pic_limit_inp;
8625 }
8626 /* MV limit is different based on ref. PIC */
8627 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8628 {
8629 hme_derive_search_range(
8630 s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8631 ps_pic_limit,
8632 &as_mv_limit[ref_ctr],
8633 (i4_ctb_x << 6) + 32,
8634 (i4_ctb_y << 6) + 32,
8635 32,
8636 32);
8637
8638 SCALE_RANGE_PRMS_POINTERS(
8639 s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8640 s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8641 2);
8642 }
8643 s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
8644 s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
8645 s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
8646
8647 e_merge_result = hme_try_merge_high_speed(
8648 ps_thrd_ctxt,
8649 ps_ctxt,
8650 ps_cur_ipe_ctb,
8651 &s_subpel_prms,
8652 &s_merge_prms_32x32_br,
8653 ps_pu_results,
8654 &as_pu_results[0][0][0]);
8655
8656 if(e_merge_result == CU_MERGED)
8657 {
8658 /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
8659
8660 if(!((ps_cu_results->u1_num_best_results == 1) &&
8661 (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8662 {
8663 hme_map_mvs_to_grid
8664 (
8665 &aps_mv_grid[0],
8666 s_merge_prms_32x32_br.ps_results_merge,
8667 s_merge_prms_32x32_br.au1_pred_dir_searched,
8668 s_merge_prms_32x32_br.i4_num_pred_dir_actual
8669 );
8670 }*/
8671
8672 if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8673 {
8674 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8675 .ps_child_node_br->is_node_valid = 1;
8676 NULLIFY_THE_CHILDREN_NODES(
8677 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8678 .ps_child_node_br);
8679 }
8680
8681 merge_count_32x32++;
8682 e_merge_result = CU_SPLIT;
8683 }
8684 else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8685 {
8686 #if ENABLE_CU_TREE_CULLING
8687 cur_ctb_cu_tree_t *ps_tree =
8688 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8689
8690 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8691 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8692 ENABLE_THE_CHILDREN_NODES(ps_tree);
8693 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8694 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8695 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8696 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8697 #endif
8698 }
8699 }
8700 else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
8701 {
8702 #if ENABLE_CU_TREE_CULLING
8703 cur_ctb_cu_tree_t *ps_tree =
8704 ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8705
8706 ENABLE_THE_CHILDREN_NODES(ps_tree);
8707 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8708 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8709 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8710 ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8711 #endif
8712
8713 if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
8714 {
8715 ps_tree->is_node_valid = 0;
8716 ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8717 en_merge_execution = (en_merge_execution & (~(1 << 4)));
8718 }
8719 }
8720
8721 /* Try merging all 32x32 to 64x64 candts */
8722 if(((en_merge_32x32 & 0xf) == 0xf) &&
8723 (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
8724 ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
8725 if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
8726 !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
8727 (e_me_quality_presets != ME_XTREME_SPEED_25)))
8728 {
8729 range_prms_t *ps_pic_limit;
8730 if(s_merge_prms_64x64.i4_use_rec == 1)
8731 {
8732 ps_pic_limit = &s_pic_limit_rec;
8733 }
8734 else
8735 {
8736 ps_pic_limit = &s_pic_limit_inp;
8737 }
8738 /* MV limit is different based on ref. PIC */
8739 for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8740 {
8741 hme_derive_search_range(
8742 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8743 ps_pic_limit,
8744 &as_mv_limit[ref_ctr],
8745 i4_ctb_x << 6,
8746 i4_ctb_y << 6,
8747 64,
8748 64);
8749
8750 SCALE_RANGE_PRMS_POINTERS(
8751 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8752 s_merge_prms_64x64.aps_mv_range[ref_ctr],
8753 2);
8754 }
8755 s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
8756 s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
8757 s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
8758
8759 e_merge_result = hme_try_merge_high_speed(
8760 ps_thrd_ctxt,
8761 ps_ctxt,
8762 ps_cur_ipe_ctb,
8763 &s_subpel_prms,
8764 &s_merge_prms_64x64,
8765 ps_pu_results,
8766 &as_pu_results[0][0][0]);
8767
8768 if((e_merge_result == CU_MERGED) &&
8769 (ME_PRISTINE_QUALITY != e_me_quality_presets))
8770 {
8771 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8772 .is_node_valid = 1;
8773 NULLIFY_THE_CHILDREN_NODES(
8774 ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
8775 }
8776 else if(
8777 (e_merge_result == CU_SPLIT) &&
8778 (ME_PRISTINE_QUALITY == e_me_quality_presets))
8779 {
8780 ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8781 .is_node_valid = 0;
8782 }
8783 }
8784
8785 /*****************************************************************/
8786 /* UPDATION OF RESULT TO EXTERNAL STRUCTURES */
8787 /*****************************************************************/
8788 pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
8789
8790 {
8791 #ifdef _DEBUG
8792 S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
8793 ? 64
8794 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
8795 S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
8796 ? 64
8797 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
8798 ASSERT(
8799 (wd * ht) ==
8800 ihevce_compute_area_of_valid_cus_in_ctb(
8801 &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
8802 #endif
8803 }
8804 }
8805
8806 /* set the dependency for the corresponding row in enc loop */
8807 ihevce_dmgr_set_row_row_sync(
8808 pv_dep_mngr_encloop_dep_me,
8809 (i4_ctb_x + 1),
8810 i4_ctb_y,
8811 tile_col_idx /* Col Tile No. */);
8812
8813 left_ctb_in_diff_tile = 0;
8814 }
8815 }
8816 }
8817
8818 /**
8819 ********************************************************************************
8820 * @fn void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
8821 * refine_layer_prms_t *ps_refine_prms)
8822 *
8823 * @brief Top level entry point for refinement ME
8824 *
8825 * @param[in,out] ps_ctxt: ME Handle
8826 *
8827 * @param[in] ps_refine_prms : refinement layer prms
8828 *
8829 * @return None
8830 ********************************************************************************
8831 */
hme_refine_no_encode(coarse_me_ctxt_t * ps_ctxt,refine_prms_t * ps_refine_prms,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)8832 void hme_refine_no_encode(
8833 coarse_me_ctxt_t *ps_ctxt,
8834 refine_prms_t *ps_refine_prms,
8835 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
8836 S32 lyr_job_type,
8837 WORD32 i4_ping_pong,
8838 void **ppv_dep_mngr_hme_sync)
8839 {
8840 BLK_SIZE_T e_search_blk_size, e_result_blk_size;
8841 ME_QUALITY_PRESETS_T e_me_quality_presets =
8842 ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
8843
8844 /*************************************************************************/
8845 /* Complexity of search: Low to High */
8846 /*************************************************************************/
8847 SEARCH_COMPLEXITY_T e_search_complexity;
8848
8849 /*************************************************************************/
8850 /* Config parameter structures for varius ME submodules */
8851 /*************************************************************************/
8852 hme_search_prms_t s_search_prms_blk;
8853 mvbank_update_prms_t s_mv_update_prms;
8854
8855 /*************************************************************************/
8856 /* All types of search candidates for predictor based search. */
8857 /*************************************************************************/
8858 S32 num_init_candts = 0;
8859 search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
8860 search_node_t as_top_neighbours[4], as_left_neighbours[3];
8861 search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
8862 search_node_t *ps_candt_l, *ps_candt_t;
8863 search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
8864 search_node_t *ps_candt_prj_bl[2];
8865 search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
8866 search_node_t *ps_candt_prj_coloc[2];
8867
8868 pf_get_wt_inp fp_get_wt_inp;
8869
8870 search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
8871 U32 au4_unique_node_map[MAP_X_MAX * 2];
8872
8873 /*EIID */
8874 WORD32 i4_num_inter_wins = 0; //debug code to find stat of
8875 WORD32 i4_num_comparisions = 0; //debug code
8876 WORD32 i4_threshold_multiplier;
8877 WORD32 i4_threshold_divider;
8878 WORD32 i4_temporal_layer =
8879 ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
8880
8881 /*************************************************************************/
8882 /* points ot the search results for the blk level search (8x8/16x16) */
8883 /*************************************************************************/
8884 search_results_t *ps_search_results;
8885
8886 /*************************************************************************/
8887 /* Coordinates */
8888 /*************************************************************************/
8889 S32 blk_x, i4_ctb_x, blk_id_in_ctb;
8890 //S32 i4_ctb_y;
8891 S32 pos_x, pos_y;
8892 S32 blk_id_in_full_ctb;
8893 S32 i4_num_srch_cands;
8894
8895 S32 blk_y;
8896
8897 /*************************************************************************/
8898 /* Related to dimensions of block being searched and pic dimensions */
8899 /*************************************************************************/
8900 S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
8901 S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
8902 S32 num_results_prev_layer;
8903
8904 /*************************************************************************/
8905 /* Size of a basic unit for this layer. For non encode layers, we search */
8906 /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
8907 /* basic unit size is the ctb size. */
8908 /*************************************************************************/
8909 S32 unit_size;
8910
8911 /*************************************************************************/
8912 /* Pointers to context in current and coarser layers */
8913 /*************************************************************************/
8914 layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
8915
8916 /*************************************************************************/
8917 /* to store mv range per blk, and picture limit, allowed search range */
8918 /* range prms in hpel and qpel units as well */
8919 /*************************************************************************/
8920 range_prms_t s_range_prms_inp, s_range_prms_rec;
8921 range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
8922 /*************************************************************************/
8923 /* These variables are used to track number of references at different */
8924 /* stages of ME. */
8925 /*************************************************************************/
8926 S32 i4_num_ref_fpel, i4_num_ref_before_merge;
8927 S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
8928 S32 lambda_inp = ps_refine_prms->lambda_inp;
8929
8930 /*************************************************************************/
8931 /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
8932 /* Explicit means it searches on all active ref idx. */
8933 /*************************************************************************/
8934 S32 curr_layer_implicit, prev_layer_implicit;
8935
8936 /*************************************************************************/
8937 /* Variables for loop counts */
8938 /*************************************************************************/
8939 S32 id;
8940 S08 i1_ref_idx;
8941
8942 /*************************************************************************/
8943 /* Input pointer and stride */
8944 /*************************************************************************/
8945 U08 *pu1_inp;
8946 S32 i4_inp_stride;
8947
8948 S32 end_of_frame;
8949
8950 S32 num_sync_units_in_row;
8951
8952 PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
8953 ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
8954
8955 /*************************************************************************/
8956 /* Pointers to current and coarse layer are needed for projection */
8957 /* Pointer to prev layer are needed for other candts like coloc */
8958 /*************************************************************************/
8959 ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
8960
8961 ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
8962
8963 num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
8964
8965 /* Function pointer is selected based on the C vc X86 macro */
8966
8967 fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
8968 ->pf_get_wt_inp_8x8;
8969
8970 i4_inp_stride = ps_curr_layer->i4_inp_stride;
8971 i4_pic_wd = ps_curr_layer->i4_wd;
8972 i4_pic_ht = ps_curr_layer->i4_ht;
8973 e_search_complexity = ps_refine_prms->e_search_complexity;
8974
8975 end_of_frame = 0;
8976
8977 /* If the previous layer is non-encode layer, then use dyadic projection */
8978 if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
8979 pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
8980 else
8981 pf_hme_project_coloc_candt = hme_project_coloc_candt;
8982
8983 /* This points to all the initial candts */
8984 ps_search_candts = &as_search_candts[0];
8985
8986 {
8987 e_search_blk_size = BLK_8x8;
8988 blk_wd = blk_ht = 8;
8989 blk_size_shift = 3;
8990 s_mv_update_prms.i4_shift = 0;
8991 /*********************************************************************/
8992 /* In case we do not encode this layer, we search 8x8 with or without*/
8993 /* enable 4x4 SAD. */
8994 /*********************************************************************/
8995 {
8996 S32 i4_mask = (ENABLE_2Nx2N);
8997
8998 e_result_blk_size = BLK_8x8;
8999 if(ps_refine_prms->i4_enable_4x4_part)
9000 {
9001 i4_mask |= (ENABLE_NxN);
9002 e_result_blk_size = BLK_4x4;
9003 s_mv_update_prms.i4_shift = 1;
9004 }
9005
9006 s_search_prms_blk.i4_part_mask = i4_mask;
9007 }
9008
9009 unit_size = blk_wd;
9010 s_search_prms_blk.i4_inp_stride = unit_size;
9011 }
9012
9013 /* This is required to properly update the layer mv bank */
9014 s_mv_update_prms.e_search_blk_size = e_search_blk_size;
9015 s_search_prms_blk.e_blk_size = e_search_blk_size;
9016
9017 /*************************************************************************/
9018 /* If current layer is explicit, then the number of ref frames are to */
9019 /* be same as previous layer. Else it will be 2 */
9020 /*************************************************************************/
9021 i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
9022 if(ps_refine_prms->explicit_ref)
9023 {
9024 curr_layer_implicit = 0;
9025 i4_num_ref_fpel = i4_num_ref_prev_layer;
9026 /* 100578 : Using same mv cost fun. for all presets. */
9027 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
9028 }
9029 else
9030 {
9031 i4_num_ref_fpel = 2;
9032 curr_layer_implicit = 1;
9033 {
9034 if(ME_MEDIUM_SPEED > e_me_quality_presets)
9035 {
9036 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
9037 }
9038 else
9039 {
9040 #if USE_MODIFIED == 1
9041 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
9042 #else
9043 s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
9044 #endif
9045 }
9046 }
9047 }
9048
9049 i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
9050 if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
9051 IV_IDR_FRAME ||
9052 ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
9053 {
9054 i4_num_ref_fpel = 1;
9055 }
9056 if(i4_num_ref_prev_layer <= 2)
9057 {
9058 prev_layer_implicit = 1;
9059 curr_layer_implicit = 1;
9060 i4_num_ref_each_dir = 1;
9061 }
9062 else
9063 {
9064 /* It is assumed that we have equal number of references in each dir */
9065 //ASSERT(!(i4_num_ref_prev_layer & 1));
9066 prev_layer_implicit = 0;
9067 i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
9068 }
9069 s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
9070 s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
9071 s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
9072
9073 /* this can be kept to 1 or 2 */
9074 i4_num_ref_before_merge = 2;
9075 i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
9076
9077 /* Set up place holders to hold the search nodes of each initial candt */
9078 for(i = 0; i < MAX_INIT_CANDTS; i++)
9079 {
9080 ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
9081 INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
9082 }
9083
9084 /* redundant, but doing it here since it is used in pred ctxt init */
9085 ps_candt_zeromv = ps_search_candts[0].ps_search_node;
9086 for(i = 0; i < 3; i++)
9087 {
9088 search_node_t *ps_search_node;
9089 ps_search_node = &as_left_neighbours[i];
9090 INIT_SEARCH_NODE(ps_search_node, 0);
9091 ps_search_node = &as_top_neighbours[i];
9092 INIT_SEARCH_NODE(ps_search_node, 0);
9093 }
9094
9095 INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
9096 /* bottom left node always not available for the blk being searched */
9097 as_left_neighbours[2].u1_is_avail = 0;
9098 /*************************************************************************/
9099 /* Initialize all the search results structure here. We update all the */
9100 /* search results to default values, and configure things like blk sizes */
9101 /*************************************************************************/
9102 if(ps_refine_prms->i4_encode == 0)
9103 {
9104 S32 pred_lx;
9105 search_results_t *ps_search_results;
9106
9107 ps_search_results = &ps_ctxt->s_search_results_8x8;
9108 hme_init_search_results(
9109 ps_search_results,
9110 i4_num_ref_fpel,
9111 ps_refine_prms->i4_num_fpel_results,
9112 ps_refine_prms->i4_num_results_per_part,
9113 e_search_blk_size,
9114 0,
9115 0,
9116 &ps_ctxt->au1_is_past[0]);
9117 for(pred_lx = 0; pred_lx < 2; pred_lx++)
9118 {
9119 hme_init_pred_ctxt_no_encode(
9120 &ps_search_results->as_pred_ctxt[pred_lx],
9121 ps_search_results,
9122 &as_top_neighbours[0],
9123 &as_left_neighbours[0],
9124 &ps_candt_prj_coloc[0],
9125 ps_candt_zeromv,
9126 ps_candt_zeromv,
9127 pred_lx,
9128 lambda_inp,
9129 ps_refine_prms->lambda_q_shift,
9130 &ps_ctxt->apu1_ref_bits_tlu_lc[0],
9131 &ps_ctxt->ai2_ref_scf[0]);
9132 }
9133 }
9134
9135 /*********************************************************************/
9136 /* Initialize the dyn. search range params. for each reference index */
9137 /* in current layer ctxt */
9138 /*********************************************************************/
9139 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
9140 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
9141 {
9142 WORD32 ref_ctr;
9143
9144 for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
9145 {
9146 INIT_DYN_SEARCH_PRMS(
9147 &ps_ctxt->s_coarse_dyn_range_prms
9148 .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
9149 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
9150 }
9151 }
9152
9153 /* Next set up initial candidates according to a given set of rules. */
9154 /* The number of initial candidates affects the quality of ME in the */
9155 /* case of motion with multiple degrees of freedom. In case of simple */
9156 /* translational motion, a current and a few causal and non causal */
9157 /* candts would suffice. More candidates help to cover more complex */
9158 /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
9159 /* where multiple ref helps etc. */
9160 /* The candidate choice also depends on the following parameters. */
9161 /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH */
9162 /* Whether we encode or not, and the type of search across reference */
9163 /* i.e. the previous layer may have been explicit/implicit and curr */
9164 /* layer may be explicit/implicit */
9165
9166 /* 0, 0, L, T, projected coloc best always presnt by default */
9167 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
9168 ps_candt_zeromv = ps_search_candts[id].ps_search_node;
9169 ps_search_candts[id].u1_num_steps_refine = 0;
9170 ps_candt_zeromv->s_mv.i2_mvx = 0;
9171 ps_candt_zeromv->s_mv.i2_mvy = 0;
9172
9173 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
9174 ps_candt_l = ps_search_candts[id].ps_search_node;
9175 ps_search_candts[id].u1_num_steps_refine = 0;
9176
9177 /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
9178 /* not at the CTB boundary use the causal T and */
9179 /* not the projected T, although the candidate is */
9180 /* still pointed to by ps_candt_prj_t[0] */
9181 if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9182 {
9183 /* Using Projected top to eliminate sync */
9184 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9185 PROJECTED_TOP0, e_me_quality_presets);
9186 ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
9187 ps_search_candts[id].u1_num_steps_refine = 1;
9188 }
9189 else
9190 {
9191 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9192 SPATIAL_TOP0, e_me_quality_presets);
9193 ps_candt_t = ps_search_candts[id].ps_search_node;
9194 ps_search_candts[id].u1_num_steps_refine = 0;
9195 }
9196
9197 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9198 PROJECTED_COLOC0, e_me_quality_presets);
9199 ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
9200 ps_search_candts[id].u1_num_steps_refine = 1;
9201
9202 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9203 PROJECTED_COLOC1, e_me_quality_presets);
9204 ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
9205 ps_search_candts[id].u1_num_steps_refine = 1;
9206
9207 if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9208 {
9209 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9210 PROJECTED_TOP_RIGHT0, e_me_quality_presets);
9211 ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
9212 ps_search_candts[id].u1_num_steps_refine = 1;
9213
9214 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9215 PROJECTED_TOP_LEFT0, e_me_quality_presets);
9216 ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
9217 ps_search_candts[id].u1_num_steps_refine = 1;
9218 }
9219 else
9220 {
9221 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9222 SPATIAL_TOP_RIGHT0, e_me_quality_presets);
9223 ps_candt_tr = ps_search_candts[id].ps_search_node;
9224 ps_search_candts[id].u1_num_steps_refine = 0;
9225
9226 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9227 SPATIAL_TOP_LEFT0, e_me_quality_presets);
9228 ps_candt_tl = ps_search_candts[id].ps_search_node;
9229 ps_search_candts[id].u1_num_steps_refine = 0;
9230 }
9231
9232 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9233 PROJECTED_RIGHT0, e_me_quality_presets);
9234 ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
9235 ps_search_candts[id].u1_num_steps_refine = 1;
9236
9237 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9238 PROJECTED_BOTTOM0, e_me_quality_presets);
9239 ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
9240 ps_search_candts[id].u1_num_steps_refine = 1;
9241
9242 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9243 PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
9244 ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
9245 ps_search_candts[id].u1_num_steps_refine = 1;
9246
9247 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9248 PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
9249 ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
9250 ps_search_candts[id].u1_num_steps_refine = 1;
9251
9252 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9253 PROJECTED_RIGHT1, e_me_quality_presets);
9254 ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
9255 ps_search_candts[id].u1_num_steps_refine = 1;
9256
9257 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9258 PROJECTED_BOTTOM1, e_me_quality_presets);
9259 ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
9260 ps_search_candts[id].u1_num_steps_refine = 1;
9261
9262 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9263 PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
9264 ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
9265 ps_search_candts[id].u1_num_steps_refine = 1;
9266
9267 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9268 PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
9269 ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
9270 ps_search_candts[id].u1_num_steps_refine = 1;
9271
9272 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
9273 ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
9274 ps_search_candts[id].u1_num_steps_refine = 1;
9275
9276 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9277 PROJECTED_TOP_RIGHT1, e_me_quality_presets);
9278 ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
9279 ps_search_candts[id].u1_num_steps_refine = 1;
9280
9281 id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9282 PROJECTED_TOP_LEFT1, e_me_quality_presets);
9283 ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
9284 ps_search_candts[id].u1_num_steps_refine = 1;
9285
9286 /*************************************************************************/
9287 /* Now that the candidates have been ordered, to choose the right number */
9288 /* of initial candidates. */
9289 /*************************************************************************/
9290 if(curr_layer_implicit && !prev_layer_implicit)
9291 {
9292 if(e_search_complexity == SEARCH_CX_LOW)
9293 num_init_candts = 7;
9294 else if(e_search_complexity == SEARCH_CX_MED)
9295 num_init_candts = 13;
9296 else if(e_search_complexity == SEARCH_CX_HIGH)
9297 num_init_candts = 18;
9298 else
9299 ASSERT(0);
9300 }
9301 else
9302 {
9303 if(e_search_complexity == SEARCH_CX_LOW)
9304 num_init_candts = 5;
9305 else if(e_search_complexity == SEARCH_CX_MED)
9306 num_init_candts = 11;
9307 else if(e_search_complexity == SEARCH_CX_HIGH)
9308 num_init_candts = 16;
9309 else
9310 ASSERT(0);
9311 }
9312
9313 if(ME_XTREME_SPEED_25 == e_me_quality_presets)
9314 {
9315 num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
9316 }
9317
9318 /*************************************************************************/
9319 /* The following search parameters are fixed throughout the search across*/
9320 /* all blks. So these are configured outside processing loop */
9321 /*************************************************************************/
9322 s_search_prms_blk.i4_num_init_candts = num_init_candts;
9323 s_search_prms_blk.i4_start_step = 1;
9324 s_search_prms_blk.i4_use_satd = 0;
9325 s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
9326 /* we use recon only for encoded layers, otherwise it is not available */
9327 s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
9328
9329 s_search_prms_blk.ps_search_candts = ps_search_candts;
9330 /* We use the same mv_range for all ref. pic. So assign to member 0 */
9331 if(s_search_prms_blk.i4_use_rec)
9332 s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
9333 else
9334 s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
9335 /*************************************************************************/
9336 /* Initialize coordinates. Meaning as follows */
9337 /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */
9338 /* blk_y : same as above, y coord. */
9339 /* num_blks_in_this_ctb : number of blks in this given ctb that starts */
9340 /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */
9341 /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */
9342 /* corner of the picture. Always multiple of 64. */
9343 /* blk_id_in_ctb : encode order id of the blk in the ctb. */
9344 /*************************************************************************/
9345 blk_y = 0;
9346 blk_id_in_ctb = 0;
9347
9348 GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
9349
9350 /* Get the number of sync units in a row based on encode/non enocde layer */
9351 num_sync_units_in_row = num_blks_in_row;
9352
9353 /*************************************************************************/
9354 /* Picture limit on all 4 sides. This will be used to set mv limits for */
9355 /* every block given its coordinate. Note thsi assumes that the min amt */
9356 /* of padding to right of pic is equal to the blk size. If we go all the */
9357 /* way upto 64x64, then the min padding on right size of picture should */
9358 /* be 64, and also on bottom side of picture. */
9359 /*************************************************************************/
9360 SET_PIC_LIMIT(
9361 s_pic_limit_inp,
9362 ps_curr_layer->i4_pad_x_inp,
9363 ps_curr_layer->i4_pad_y_inp,
9364 ps_curr_layer->i4_wd,
9365 ps_curr_layer->i4_ht,
9366 s_search_prms_blk.i4_num_steps_post_refine);
9367
9368 SET_PIC_LIMIT(
9369 s_pic_limit_rec,
9370 ps_curr_layer->i4_pad_x_rec,
9371 ps_curr_layer->i4_pad_y_rec,
9372 ps_curr_layer->i4_wd,
9373 ps_curr_layer->i4_ht,
9374 s_search_prms_blk.i4_num_steps_post_refine);
9375
9376 /*************************************************************************/
9377 /* set the MV limit per ref. pic. */
9378 /* - P pic. : Based on the config params. */
9379 /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
9380 /*************************************************************************/
9381 {
9382 WORD32 ref_ctr;
9383 /* Only for B/b pic. */
9384 if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
9385 {
9386 WORD16 i2_mv_y_per_poc, i2_max_mv_y;
9387 WORD32 cur_poc, ref_poc, abs_poc_diff;
9388
9389 cur_poc = ps_ctxt->i4_curr_poc;
9390
9391 /* Get abs MAX for symmetric search */
9392 i2_mv_y_per_poc = MAX(
9393 ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
9394 (ABS(ps_ctxt->s_coarse_dyn_range_prms
9395 .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
9396
9397 for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9398 {
9399 ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
9400 abs_poc_diff = ABS((cur_poc - ref_poc));
9401 /* Get the cur. max MV based on POC distance */
9402 i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
9403 i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
9404
9405 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9406 as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
9407 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9408 as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
9409 }
9410 }
9411 else
9412 {
9413 /* Set the Config. File Params for P pic. */
9414 for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9415 {
9416 as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9417 as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
9418 as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9419 as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
9420 }
9421 }
9422 }
9423
9424 /* EIID: Calculate threshold based on quality preset and/or temporal layers */
9425 if(e_me_quality_presets == ME_MEDIUM_SPEED)
9426 {
9427 i4_threshold_multiplier = 1;
9428 i4_threshold_divider = 4;
9429 }
9430 else if(e_me_quality_presets == ME_HIGH_SPEED)
9431 {
9432 i4_threshold_multiplier = 1;
9433 i4_threshold_divider = 2;
9434 }
9435 else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
9436 {
9437 #if OLD_XTREME_SPEED
9438 /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
9439 i4_temporal_layer = 1;
9440 #endif
9441 if(i4_temporal_layer == 0)
9442 {
9443 i4_threshold_multiplier = 3;
9444 i4_threshold_divider = 4;
9445 }
9446 else if(i4_temporal_layer == 1)
9447 {
9448 i4_threshold_multiplier = 3;
9449 i4_threshold_divider = 4;
9450 }
9451 else if(i4_temporal_layer == 2)
9452 {
9453 i4_threshold_multiplier = 1;
9454 i4_threshold_divider = 1;
9455 }
9456 else
9457 {
9458 i4_threshold_multiplier = 5;
9459 i4_threshold_divider = 4;
9460 }
9461 }
9462 else if(e_me_quality_presets == ME_HIGH_QUALITY)
9463 {
9464 i4_threshold_multiplier = 1;
9465 i4_threshold_divider = 1;
9466 }
9467
9468 /*************************************************************************/
9469 /*************************************************************************/
9470 /*************************************************************************/
9471 /* START OF THE CORE LOOP */
9472 /* If Encode is 0, then we just loop over each blk */
9473 /*************************************************************************/
9474 /*************************************************************************/
9475 /*************************************************************************/
9476 while(0 == end_of_frame)
9477 {
9478 job_queue_t *ps_job;
9479 ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row; //EIID
9480 WORD32 i4_ctb_row_ctr; //counter to calculate CTB row counter. It's (row_ctr /4)
9481 WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4; //calculations verified for L1 only
9482 //+3 to get ceil values when divided by 4
9483 WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
9484 8 * 8; //considering CTB size 32x32 at L1. hardcoded for now
9485 //if there is variable for ctb size use that and this variable can be derived
9486 WORD32 offset_val, check_dep_pos, set_dep_pos;
9487 void *pv_hme_dep_mngr;
9488 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
9489
9490 /* Get the current layer HME Dep Mngr */
9491 /* Note : Use layer_id - 1 in HME layers */
9492
9493 pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
9494
9495 /* Get the current row from the job queue */
9496 ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
9497 ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
9498
9499 /* If all rows are done, set the end of process flag to 1, */
9500 /* and the current row to -1 */
9501 if(NULL == ps_job)
9502 {
9503 blk_y = -1;
9504 end_of_frame = 1;
9505
9506 continue;
9507 }
9508
9509 if(1 == ps_ctxt->s_frm_prms.is_i_pic)
9510 {
9511 /* set the output dependency of current row */
9512 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
9513 continue;
9514 }
9515
9516 blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
9517 blk_x = 0;
9518 i4_ctb_x = 0;
9519
9520 /* wait for Corresponding Pre intra Job to be completed */
9521 if(1 == ps_refine_prms->i4_layer_id)
9522 {
9523 volatile UWORD32 i4_l1_done;
9524 volatile UWORD32 *pi4_l1_done;
9525 pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
9526 ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
9527 i4_l1_done = *pi4_l1_done;
9528 while(!i4_l1_done)
9529 {
9530 i4_l1_done = *pi4_l1_done;
9531 }
9532 }
9533 /* Set Variables for Dep. Checking and Setting */
9534 set_dep_pos = blk_y + 1;
9535 if(blk_y > 0)
9536 {
9537 offset_val = 2;
9538 check_dep_pos = blk_y - 1;
9539 }
9540 else
9541 {
9542 /* First row should run without waiting */
9543 offset_val = -1;
9544 check_dep_pos = 0;
9545 }
9546
9547 /* EIID: calculate ed_blk_ctxt pointer for current row */
9548 /* valid for only layer-1. not varified and used for other layers */
9549 i4_ctb_row_ctr = blk_y / 4;
9550 ps_ed_blk_ctxt_curr_row =
9551 ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
9552 i4_num_4x4_blocks_in_ctb_at_l1); //valid for L1 only
9553 ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
9554
9555 /* if non-encode layer then i4_ctb_x will be same as blk_x */
9556 /* loop over all the units is a row */
9557 for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
9558 {
9559 ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb; //EIDD
9560 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
9561 WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
9562
9563 /* Wait till top row block is processed */
9564 /* Currently checking till top right block*/
9565
9566 /* Disabled since all candidates, except for */
9567 /* L and C, are projected from the coarser layer, */
9568 /* only in ME_HIGH_SPEED mode */
9569 if((ME_MEDIUM_SPEED > e_me_quality_presets))
9570 {
9571 if(i4_ctb_x < (num_sync_units_in_row - 1))
9572 {
9573 ihevce_dmgr_chk_row_row_sync(
9574 pv_hme_dep_mngr,
9575 i4_ctb_x,
9576 offset_val,
9577 check_dep_pos,
9578 0, /* Col Tile No. : Not supported in PreEnc*/
9579 ps_ctxt->thrd_id);
9580 }
9581 }
9582
9583 {
9584 /* for non encoder layer only one block is processed */
9585 num_blks_in_this_ctb = 1;
9586 }
9587
9588 /* EIID: derive ed_ctxt ptr for current CTB */
9589 ps_ed_blk_ctxt_curr_ctb =
9590 ps_ed_blk_ctxt_curr_row +
9591 (i4_ctb_blk_ctr *
9592 i4_num_4x4_blocks_in_ctb_at_l1); //currently valid for l1 layer only
9593 ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
9594
9595 /* loop over all the blocks in CTB will always be 1 */
9596 for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
9597 {
9598 {
9599 /* non encode layer */
9600 blk_x = i4_ctb_x;
9601 blk_id_in_full_ctb = 0;
9602 s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
9603 }
9604
9605 /* get the current input blk point */
9606 pos_x = blk_x << blk_size_shift;
9607 pos_y = blk_y << blk_size_shift;
9608 pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
9609
9610 /*********************************************************************/
9611 /* replicate the inp buffer at blk or ctb level for each ref id, */
9612 /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
9613 /* thereby avoiding a bloat up of memory. If we did all references */
9614 /* weighted pred, we will end up with a duplicate copy of each ref */
9615 /* at each layer, since we need to preserve the original reference. */
9616 /* ToDo: Need to observe performance with this mechanism and compare */
9617 /* with case where ref is weighted. */
9618 /*********************************************************************/
9619 if(blk_id_in_ctb == 0)
9620 {
9621 fp_get_wt_inp(
9622 ps_curr_layer,
9623 &ps_ctxt->s_wt_pred,
9624 unit_size,
9625 pos_x,
9626 pos_y,
9627 unit_size,
9628 ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
9629 ps_ctxt->i4_wt_pred_enable_flag);
9630 }
9631
9632 s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
9633 s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
9634 /* Select search results from a suitable search result in the context */
9635 {
9636 ps_search_results = &ps_ctxt->s_search_results_8x8;
9637 }
9638
9639 s_search_prms_blk.ps_search_results = ps_search_results;
9640
9641 /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
9642 hme_reset_search_results(
9643 ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
9644
9645 /* Loop across different Ref IDx */
9646 for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
9647 {
9648 S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
9649 S32 prev_blk_offset = 6;
9650 S32 resultid;
9651
9652 /*********************************************************************/
9653 /* For every blk in the picture, the search range needs to be derived*/
9654 /* Any blk can have any mv, but practical search constraints are */
9655 /* imposed by the picture boundary and amt of padding. */
9656 /*********************************************************************/
9657 /* MV limit is different based on ref. PIC */
9658 hme_derive_search_range(
9659 &s_range_prms_inp,
9660 &s_pic_limit_inp,
9661 &as_mv_limit[i1_ref_idx],
9662 pos_x,
9663 pos_y,
9664 blk_wd,
9665 blk_ht);
9666 hme_derive_search_range(
9667 &s_range_prms_rec,
9668 &s_pic_limit_rec,
9669 &as_mv_limit[i1_ref_idx],
9670 pos_x,
9671 pos_y,
9672 blk_wd,
9673 blk_ht);
9674
9675 s_search_prms_blk.i1_ref_idx = i1_ref_idx;
9676 ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
9677
9678 i4_num_srch_cands = 1;
9679
9680 if(1 != ps_refine_prms->i4_layer_id)
9681 {
9682 S32 x, y;
9683 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9684 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9685
9686 if(ME_MEDIUM_SPEED > e_me_quality_presets)
9687 {
9688 hme_get_spatial_candt(
9689 ps_curr_layer,
9690 e_search_blk_size,
9691 blk_x,
9692 blk_y,
9693 i1_ref_idx,
9694 &as_top_neighbours[0],
9695 &as_left_neighbours[0],
9696 0,
9697 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9698 0,
9699 ps_refine_prms->i4_encode);
9700
9701 *ps_candt_tr = as_top_neighbours[3];
9702 *ps_candt_t = as_top_neighbours[1];
9703 *ps_candt_tl = as_top_neighbours[0];
9704 i4_num_srch_cands += 3;
9705 }
9706 else
9707 {
9708 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9709 S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9710 S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9711 search_node_t *ps_search_node;
9712 S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9713 hme_mv_t *ps_mv, *ps_mv_base;
9714 S08 *pi1_ref_idx, *pi1_ref_idx_base;
9715 S32 jump = 1, mvs_in_blk, mvs_in_row;
9716 S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9717
9718 if(i4_blk_size1 != i4_blk_size2)
9719 {
9720 blk_x_temp <<= 1;
9721 blk_y_temp <<= 1;
9722 jump = 2;
9723 if((i4_blk_size1 << 2) == i4_blk_size2)
9724 {
9725 blk_x_temp <<= 1;
9726 blk_y_temp <<= 1;
9727 jump = 4;
9728 }
9729 }
9730
9731 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9732 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9733
9734 /* Adjust teh blk coord to point to top left locn */
9735 blk_x_temp -= 1;
9736 blk_y_temp -= 1;
9737
9738 /* Pick up the mvs from the location */
9739 i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9740 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9741
9742 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9743 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9744
9745 ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9746 pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9747
9748 ps_mv_base = ps_mv;
9749 pi1_ref_idx_base = pi1_ref_idx;
9750
9751 ps_search_node = &as_left_neighbours[0];
9752 ps_mv = ps_mv_base + mvs_in_row;
9753 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9754 COPY_MV_TO_SEARCH_NODE(
9755 ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
9756
9757 i4_num_srch_cands++;
9758 }
9759 }
9760 else
9761 {
9762 S32 x, y;
9763 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9764 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9765
9766 if(ME_MEDIUM_SPEED > e_me_quality_presets)
9767 {
9768 hme_get_spatial_candt_in_l1_me(
9769 ps_curr_layer,
9770 e_search_blk_size,
9771 blk_x,
9772 blk_y,
9773 i1_ref_idx,
9774 !ps_search_results->pu1_is_past[i1_ref_idx],
9775 &as_top_neighbours[0],
9776 &as_left_neighbours[0],
9777 0,
9778 ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9779 0,
9780 ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
9781 ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
9782
9783 *ps_candt_tr = as_top_neighbours[3];
9784 *ps_candt_t = as_top_neighbours[1];
9785 *ps_candt_tl = as_top_neighbours[0];
9786
9787 i4_num_srch_cands += 3;
9788 }
9789 else
9790 {
9791 layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9792 S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9793 S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9794 S32 i4_mv_pos_in_implicit_array;
9795 search_node_t *ps_search_node;
9796 S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9797 hme_mv_t *ps_mv, *ps_mv_base;
9798 S08 *pi1_ref_idx, *pi1_ref_idx_base;
9799 S32 jump = 1, mvs_in_blk, mvs_in_row;
9800 S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9801 U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
9802 S32 i4_num_results_in_given_dir =
9803 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9804 ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
9805 : (ps_layer_mvbank->i4_num_mvs_per_ref *
9806 ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
9807
9808 if(i4_blk_size1 != i4_blk_size2)
9809 {
9810 blk_x_temp <<= 1;
9811 blk_y_temp <<= 1;
9812 jump = 2;
9813 if((i4_blk_size1 << 2) == i4_blk_size2)
9814 {
9815 blk_x_temp <<= 1;
9816 blk_y_temp <<= 1;
9817 jump = 4;
9818 }
9819 }
9820
9821 mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9822 mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9823
9824 /* Adjust teh blk coord to point to top left locn */
9825 blk_x_temp -= 1;
9826 blk_y_temp -= 1;
9827
9828 /* Pick up the mvs from the location */
9829 i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9830 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9831
9832 i4_offset +=
9833 ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9834 ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
9835 : 0);
9836
9837 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9838 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9839
9840 ps_mv_base = ps_mv;
9841 pi1_ref_idx_base = pi1_ref_idx;
9842
9843 {
9844 /* ps_mv and pi1_ref_idx now point to the top left locn */
9845 ps_search_node = &as_left_neighbours[0];
9846 ps_mv = ps_mv_base + mvs_in_row;
9847 pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9848
9849 i4_mv_pos_in_implicit_array =
9850 hme_find_pos_of_implicitly_stored_ref_id(
9851 pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
9852
9853 if(-1 != i4_mv_pos_in_implicit_array)
9854 {
9855 COPY_MV_TO_SEARCH_NODE(
9856 ps_search_node,
9857 &ps_mv[i4_mv_pos_in_implicit_array],
9858 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
9859 i1_ref_idx,
9860 shift);
9861 }
9862 else
9863 {
9864 ps_search_node->u1_is_avail = 0;
9865 ps_search_node->s_mv.i2_mvx = 0;
9866 ps_search_node->s_mv.i2_mvy = 0;
9867 ps_search_node->i1_ref_idx = i1_ref_idx;
9868 }
9869
9870 i4_num_srch_cands++;
9871 }
9872 }
9873 }
9874
9875 *ps_candt_l = as_left_neighbours[0];
9876
9877 /* when 16x16 is searched in an encode layer, and the prev layer */
9878 /* stores results for 4x4 blks, we project 5 candts corresponding */
9879 /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
9880 /* However in other cases, only 2,2 best and 2nd best reqd */
9881 resultid = 0;
9882 pf_hme_project_coloc_candt(
9883 ps_candt_prj_coloc[0],
9884 ps_curr_layer,
9885 ps_coarse_layer,
9886 pos_x + 2,
9887 pos_y + 2,
9888 i1_ref_idx,
9889 resultid);
9890
9891 i4_num_srch_cands++;
9892
9893 resultid = 1;
9894 if(num_results_prev_layer > 1)
9895 {
9896 pf_hme_project_coloc_candt(
9897 ps_candt_prj_coloc[1],
9898 ps_curr_layer,
9899 ps_coarse_layer,
9900 pos_x + 2,
9901 pos_y + 2,
9902 i1_ref_idx,
9903 resultid);
9904
9905 i4_num_srch_cands++;
9906 }
9907
9908 resultid = 0;
9909
9910 if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9911 {
9912 pf_hme_project_coloc_candt(
9913 ps_candt_prj_t[0],
9914 ps_curr_layer,
9915 ps_coarse_layer,
9916 pos_x,
9917 pos_y - prev_blk_offset,
9918 i1_ref_idx,
9919 resultid);
9920
9921 i4_num_srch_cands++;
9922 }
9923
9924 {
9925 pf_hme_project_coloc_candt(
9926 ps_candt_prj_br[0],
9927 ps_curr_layer,
9928 ps_coarse_layer,
9929 pos_x + next_blk_offset,
9930 pos_y + next_blk_offset,
9931 i1_ref_idx,
9932 resultid);
9933 pf_hme_project_coloc_candt(
9934 ps_candt_prj_bl[0],
9935 ps_curr_layer,
9936 ps_coarse_layer,
9937 pos_x - prev_blk_offset,
9938 pos_y + next_blk_offset,
9939 i1_ref_idx,
9940 resultid);
9941 pf_hme_project_coloc_candt(
9942 ps_candt_prj_r[0],
9943 ps_curr_layer,
9944 ps_coarse_layer,
9945 pos_x + next_blk_offset,
9946 pos_y,
9947 i1_ref_idx,
9948 resultid);
9949 pf_hme_project_coloc_candt(
9950 ps_candt_prj_b[0],
9951 ps_curr_layer,
9952 ps_coarse_layer,
9953 pos_x,
9954 pos_y + next_blk_offset,
9955 i1_ref_idx,
9956 resultid);
9957
9958 i4_num_srch_cands += 4;
9959
9960 if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9961 {
9962 pf_hme_project_coloc_candt(
9963 ps_candt_prj_tr[0],
9964 ps_curr_layer,
9965 ps_coarse_layer,
9966 pos_x + next_blk_offset,
9967 pos_y - prev_blk_offset,
9968 i1_ref_idx,
9969 resultid);
9970 pf_hme_project_coloc_candt(
9971 ps_candt_prj_tl[0],
9972 ps_curr_layer,
9973 ps_coarse_layer,
9974 pos_x - prev_blk_offset,
9975 pos_y - prev_blk_offset,
9976 i1_ref_idx,
9977 resultid);
9978
9979 i4_num_srch_cands += 2;
9980 }
9981 }
9982 if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
9983 {
9984 resultid = 1;
9985 pf_hme_project_coloc_candt(
9986 ps_candt_prj_br[1],
9987 ps_curr_layer,
9988 ps_coarse_layer,
9989 pos_x + next_blk_offset,
9990 pos_y + next_blk_offset,
9991 i1_ref_idx,
9992 resultid);
9993 pf_hme_project_coloc_candt(
9994 ps_candt_prj_bl[1],
9995 ps_curr_layer,
9996 ps_coarse_layer,
9997 pos_x - prev_blk_offset,
9998 pos_y + next_blk_offset,
9999 i1_ref_idx,
10000 resultid);
10001 pf_hme_project_coloc_candt(
10002 ps_candt_prj_r[1],
10003 ps_curr_layer,
10004 ps_coarse_layer,
10005 pos_x + next_blk_offset,
10006 pos_y,
10007 i1_ref_idx,
10008 resultid);
10009 pf_hme_project_coloc_candt(
10010 ps_candt_prj_b[1],
10011 ps_curr_layer,
10012 ps_coarse_layer,
10013 pos_x,
10014 pos_y + next_blk_offset,
10015 i1_ref_idx,
10016 resultid);
10017
10018 i4_num_srch_cands += 4;
10019
10020 pf_hme_project_coloc_candt(
10021 ps_candt_prj_tr[1],
10022 ps_curr_layer,
10023 ps_coarse_layer,
10024 pos_x + next_blk_offset,
10025 pos_y - prev_blk_offset,
10026 i1_ref_idx,
10027 resultid);
10028 pf_hme_project_coloc_candt(
10029 ps_candt_prj_tl[1],
10030 ps_curr_layer,
10031 ps_coarse_layer,
10032 pos_x - prev_blk_offset,
10033 pos_y - prev_blk_offset,
10034 i1_ref_idx,
10035 resultid);
10036 pf_hme_project_coloc_candt(
10037 ps_candt_prj_t[1],
10038 ps_curr_layer,
10039 ps_coarse_layer,
10040 pos_x,
10041 pos_y - prev_blk_offset,
10042 i1_ref_idx,
10043 resultid);
10044
10045 i4_num_srch_cands += 3;
10046 }
10047
10048 /* Note this block also clips the MV range for all candidates */
10049 #ifdef _DEBUG
10050 {
10051 S32 candt;
10052 range_prms_t *ps_range_prms;
10053
10054 S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
10055 for(candt = 0; candt < i4_num_srch_cands; candt++)
10056 {
10057 search_node_t *ps_search_node;
10058
10059 ps_search_node =
10060 s_search_prms_blk.ps_search_candts[candt].ps_search_node;
10061
10062 ps_range_prms = s_search_prms_blk.aps_mv_range[0];
10063
10064 if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
10065 (ps_search_node->i1_ref_idx < 0))
10066 {
10067 ASSERT(0);
10068 }
10069 }
10070 }
10071 #endif
10072
10073 {
10074 S32 srch_cand;
10075 S32 num_unique_nodes = 0;
10076 S32 num_nodes_searched = 0;
10077 S32 num_best_cand = 0;
10078 S08 i1_grid_enable = 0;
10079 search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
10080 /* has list of valid partition to search terminated by -1 */
10081 S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
10082 S32 center_x;
10083 S32 center_y;
10084
10085 /* indicates if the centre point of grid needs to be explicitly added for search */
10086 S32 add_centre = 0;
10087
10088 memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
10089 center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
10090 center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
10091
10092 for(srch_cand = 0;
10093 (srch_cand < i4_num_srch_cands) &&
10094 (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
10095 srch_cand++)
10096 {
10097 search_node_t s_search_node_temp =
10098 s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
10099
10100 s_search_node_temp.i1_ref_idx = i1_ref_idx; //TEMP FIX;
10101
10102 /* Clip the motion vectors as well here since after clipping
10103 two candidates can become same and they will be removed during deduplication */
10104 CLIP_MV_WITHIN_RANGE(
10105 s_search_node_temp.s_mv.i2_mvx,
10106 s_search_node_temp.s_mv.i2_mvy,
10107 s_search_prms_blk.aps_mv_range[0],
10108 ps_refine_prms->i4_num_steps_fpel_refine,
10109 ps_refine_prms->i4_num_steps_hpel_refine,
10110 ps_refine_prms->i4_num_steps_qpel_refine);
10111
10112 /* PT_C */
10113 INSERT_NEW_NODE(
10114 as_unique_search_nodes,
10115 num_unique_nodes,
10116 s_search_node_temp,
10117 0,
10118 au4_unique_node_map,
10119 center_x,
10120 center_y,
10121 1);
10122
10123 num_nodes_searched += 1;
10124 }
10125 num_unique_nodes =
10126 MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
10127
10128 /* If number of candidates projected/number of candidates to be refined are more than 2,
10129 then filter out and choose the best two here */
10130 if(num_unique_nodes >= 2)
10131 {
10132 S32 num_results;
10133 S32 cnt;
10134 S32 *pi4_valid_part_ids;
10135 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10136 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10137 pi4_valid_part_ids = &ai4_valid_part_ids[0];
10138
10139 /* pi4_valid_part_ids is updated inside */
10140 hme_pred_search_no_encode(
10141 &s_search_prms_blk,
10142 ps_curr_layer,
10143 &ps_ctxt->s_wt_pred,
10144 pi4_valid_part_ids,
10145 1,
10146 e_me_quality_presets,
10147 i1_grid_enable,
10148 (ihevce_me_optimised_function_list_t *)
10149 ps_ctxt->pv_me_optimised_function_list
10150
10151 );
10152
10153 num_best_cand = 0;
10154 cnt = 0;
10155 num_results = ps_search_results->u1_num_results_per_part;
10156
10157 while((id = pi4_valid_part_ids[cnt++]) >= 0)
10158 {
10159 num_results =
10160 MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
10161
10162 for(i = 0; i < num_results; i++)
10163 {
10164 search_node_t s_search_node_temp;
10165 s_search_node_temp =
10166 *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
10167 if(s_search_node_temp.i1_ref_idx >= 0)
10168 {
10169 INSERT_NEW_NODE_NOMAP(
10170 as_best_two_proj_node,
10171 num_best_cand,
10172 s_search_node_temp,
10173 0);
10174 }
10175 }
10176 }
10177 }
10178 else
10179 {
10180 add_centre = 1;
10181 num_best_cand = num_unique_nodes;
10182 as_best_two_proj_node[0] = as_unique_search_nodes[0];
10183 }
10184
10185 num_unique_nodes = 0;
10186 num_nodes_searched = 0;
10187
10188 if(1 == num_best_cand)
10189 {
10190 search_node_t s_search_node_temp = as_best_two_proj_node[0];
10191 S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
10192 S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
10193 S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
10194
10195 i1_grid_enable = 1;
10196
10197 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10198 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10199 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10200
10201 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10202 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10203 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10204
10205 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10206 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10207 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10208
10209 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10210 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10211 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10212
10213 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10214 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10215 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10216
10217 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10218 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10219 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10220
10221 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10222 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10223 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10224
10225 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10226 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10227 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10228
10229 if(add_centre)
10230 {
10231 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10232 as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10233 as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10234 }
10235 }
10236 else
10237 {
10238 /* For the candidates where refinement was required, choose the best two */
10239 for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
10240 {
10241 search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
10242 WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
10243 WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
10244
10245 /* Because there may not be two best unique candidates (because of clipping),
10246 second best candidate can be uninitialized, ignore that */
10247 if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
10248 s_search_node_temp.i1_ref_idx < 0)
10249 {
10250 num_nodes_searched++;
10251 continue;
10252 }
10253
10254 /* PT_C */
10255 /* Since the center point has already be evaluated and best results are persistent,
10256 it will not be evaluated again */
10257 if(add_centre) /* centre point added explicitly again if search results is not updated */
10258 {
10259 INSERT_NEW_NODE(
10260 as_unique_search_nodes,
10261 num_unique_nodes,
10262 s_search_node_temp,
10263 0,
10264 au4_unique_node_map,
10265 center_x,
10266 center_y,
10267 1);
10268 }
10269
10270 /* PT_L */
10271 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10272 s_search_node_temp.s_mv.i2_mvy = mv_y;
10273 INSERT_NEW_NODE(
10274 as_unique_search_nodes,
10275 num_unique_nodes,
10276 s_search_node_temp,
10277 0,
10278 au4_unique_node_map,
10279 center_x,
10280 center_y,
10281 1);
10282
10283 /* PT_T */
10284 s_search_node_temp.s_mv.i2_mvx = mv_x;
10285 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10286 INSERT_NEW_NODE(
10287 as_unique_search_nodes,
10288 num_unique_nodes,
10289 s_search_node_temp,
10290 0,
10291 au4_unique_node_map,
10292 center_x,
10293 center_y,
10294 1);
10295
10296 /* PT_R */
10297 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10298 s_search_node_temp.s_mv.i2_mvy = mv_y;
10299 INSERT_NEW_NODE(
10300 as_unique_search_nodes,
10301 num_unique_nodes,
10302 s_search_node_temp,
10303 0,
10304 au4_unique_node_map,
10305 center_x,
10306 center_y,
10307 1);
10308
10309 /* PT_B */
10310 s_search_node_temp.s_mv.i2_mvx = mv_x;
10311 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10312 INSERT_NEW_NODE(
10313 as_unique_search_nodes,
10314 num_unique_nodes,
10315 s_search_node_temp,
10316 0,
10317 au4_unique_node_map,
10318 center_x,
10319 center_y,
10320 1);
10321
10322 /* PT_TL */
10323 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10324 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10325 INSERT_NEW_NODE(
10326 as_unique_search_nodes,
10327 num_unique_nodes,
10328 s_search_node_temp,
10329 0,
10330 au4_unique_node_map,
10331 center_x,
10332 center_y,
10333 1);
10334
10335 /* PT_TR */
10336 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10337 s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10338 INSERT_NEW_NODE(
10339 as_unique_search_nodes,
10340 num_unique_nodes,
10341 s_search_node_temp,
10342 0,
10343 au4_unique_node_map,
10344 center_x,
10345 center_y,
10346 1);
10347
10348 /* PT_BL */
10349 s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10350 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10351 INSERT_NEW_NODE(
10352 as_unique_search_nodes,
10353 num_unique_nodes,
10354 s_search_node_temp,
10355 0,
10356 au4_unique_node_map,
10357 center_x,
10358 center_y,
10359 1);
10360
10361 /* PT_BR */
10362 s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10363 s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10364 INSERT_NEW_NODE(
10365 as_unique_search_nodes,
10366 num_unique_nodes,
10367 s_search_node_temp,
10368 0,
10369 au4_unique_node_map,
10370 center_x,
10371 center_y,
10372 1);
10373 }
10374 }
10375
10376 s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10377 s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10378
10379 /*****************************************************************/
10380 /* Call the search algorithm, this includes: */
10381 /* Pre-Search-Refinement (for coarse candts) */
10382 /* Search on each candidate */
10383 /* Post Search Refinement on winners/other new candidates */
10384 /*****************************************************************/
10385
10386 hme_pred_search_no_encode(
10387 &s_search_prms_blk,
10388 ps_curr_layer,
10389 &ps_ctxt->s_wt_pred,
10390 ai4_valid_part_ids,
10391 0,
10392 e_me_quality_presets,
10393 i1_grid_enable,
10394 (ihevce_me_optimised_function_list_t *)
10395 ps_ctxt->pv_me_optimised_function_list);
10396
10397 i1_grid_enable = 0;
10398 }
10399 }
10400
10401 /* for non encode layer update MV and end processing for block */
10402 {
10403 WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
10404 search_node_t *ps_search_node;
10405 /* now update the reqd results back to the layer mv bank. */
10406 if(1 == ps_refine_prms->i4_layer_id)
10407 {
10408 hme_update_mv_bank_in_l1_me(
10409 ps_search_results,
10410 ps_curr_layer->ps_layer_mvbank,
10411 blk_x,
10412 blk_y,
10413 &s_mv_update_prms);
10414 }
10415 else
10416 {
10417 hme_update_mv_bank_noencode(
10418 ps_search_results,
10419 ps_curr_layer->ps_layer_mvbank,
10420 blk_x,
10421 blk_y,
10422 &s_mv_update_prms);
10423 }
10424
10425 /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
10426 /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
10427 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10428 {
10429 WORD32 i4_j;
10430 layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
10431
10432 //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
10433 /* Not considering this for Dyn. Search Update */
10434 {
10435 for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10436 i4_ref_id++)
10437 {
10438 ps_search_node =
10439 ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10440
10441 for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
10442 {
10443 hme_update_dynamic_search_params(
10444 &ps_ctxt->s_coarse_dyn_range_prms
10445 .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
10446 [i4_ref_id],
10447 ps_search_node->s_mv.i2_mvy);
10448
10449 ps_search_node++;
10450 }
10451 }
10452 }
10453 }
10454
10455 if(1 == ps_refine_prms->i4_layer_id)
10456 {
10457 WORD32 wt_pred_val, log_wt_pred_val;
10458 WORD32 ref_id_of_nearest_poc = 0;
10459 WORD32 max_val = 0x7fffffff;
10460 WORD32 max_l0_val = 0x7fffffff;
10461 WORD32 max_l1_val = 0x7fffffff;
10462 WORD32 cur_val;
10463 WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
10464
10465 WORD32 bestl0_sad = 0x7fffffff;
10466 WORD32 bestl1_sad = 0x7fffffff;
10467 search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
10468
10469 for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10470 i4_ref_id++)
10471 {
10472 wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
10473 log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
10474
10475 ps_search_node =
10476 ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10477
10478 i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
10479 ((1 << log_wt_pred_val) >> 1)) >>
10480 log_wt_pred_val;
10481
10482 i4_local_cost_weighted_pred =
10483 i4_local_weighted_sad +
10484 (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
10485 //the loop is redundant as the results are already sorted based on total cost
10486 //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
10487 {
10488 if(i4_local_cost_weighted_pred < min_cost)
10489 {
10490 min_cost = i4_local_cost_weighted_pred;
10491 min_sad = i4_local_weighted_sad;
10492 }
10493 }
10494
10495 /* For P frame, calculate the nearest poc which is either P or I frame*/
10496 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10497 {
10498 if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
10499 {
10500 cur_val =
10501 ABS(ps_ctxt->i4_curr_poc -
10502 ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
10503 if(cur_val < max_val)
10504 {
10505 max_val = cur_val;
10506 ref_id_of_nearest_poc = i4_ref_id;
10507 }
10508 }
10509 }
10510 }
10511 /*Store me cost wrt. to past frame only for P frame */
10512 if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10513 {
10514 if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10515 {
10516 WORD16 i2_mvx, i2_mvy;
10517
10518 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10519 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10520 WORD32 z_scan_idx =
10521 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10522 WORD32 wt, log_wt;
10523
10524 /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10525 <= (1 + ps_ctxt->num_b_frms));*/
10526
10527 /*obtain mvx and mvy */
10528 i2_mvx =
10529 ps_search_results
10530 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10531 ->s_mv.i2_mvx;
10532 i2_mvy =
10533 ps_search_results
10534 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10535 ->s_mv.i2_mvy;
10536
10537 /*register the min cost for l1 me in blk context */
10538 wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
10539 log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
10540
10541 /*register the min cost for l1 me in blk context */
10542 ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
10543 ((ps_search_results
10544 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10545 ->i4_sad *
10546 wt) +
10547 ((1 << log_wt) >> 1)) >>
10548 log_wt;
10549 ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
10550 ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
10551 (ps_search_results
10552 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10553 ->i4_tot_cost -
10554 ps_search_results
10555 ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10556 ->i4_sad);
10557 /*for complexity change detection*/
10558 ps_ctxt->i4_num_blks++;
10559 if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
10560 (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
10561 {
10562 ps_ctxt->i4_num_blks_high_sad++;
10563 }
10564 }
10565 }
10566 }
10567
10568 /* EIID: Early inter intra decisions */
10569 /* tap L1 level SAD for inter intra decisions */
10570 if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
10571 (!ps_ctxt->s_frm_prms
10572 .is_i_pic)) //for high-quality preset->disable early decisions
10573 {
10574 if(1 == ps_refine_prms->i4_layer_id)
10575 {
10576 WORD32 i4_min_sad_cost_8x8_block = min_cost;
10577 ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
10578 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10579 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10580 WORD32 z_scan_idx =
10581 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10582 ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
10583
10584 /*register the min cost for l1 me in blk context */
10585 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10586 i4_min_sad_cost_8x8_block;
10587 i4_num_comparisions++;
10588
10589 /* take early inter-intra decision here */
10590 ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
10591 #if DISABLE_INTRA_IN_BPICS
10592 if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
10593 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
10594 {
10595 ps_curr_ed_blk_ctxt->intra_or_inter =
10596 2; /*eval only inter if inter cost is less */
10597 i4_num_inter_wins++;
10598 }
10599 else
10600 #endif
10601 {
10602 if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
10603 ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
10604 i4_threshold_multiplier) /
10605 i4_threshold_divider))
10606 {
10607 ps_curr_ed_blk_ctxt->intra_or_inter =
10608 2; /*eval only inter if inter cost is less */
10609 i4_num_inter_wins++;
10610 }
10611 }
10612
10613 //{
10614 // DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
10615 // blk_x,blk_y,
10616 // i4_ctb_blk_ctr, i4_ctb_row_ctr,
10617 // ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
10618 // i4_min_sad_cost_8x8_block
10619 // );
10620 //}
10621
10622 } //end of layer-1
10623 } //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
10624 else
10625 {
10626 if(1 == ps_refine_prms->i4_layer_id)
10627 {
10628 WORD32 i4_min_sad_cost_8x8_block = min_cost;
10629 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10630 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10631 WORD32 z_scan_idx =
10632 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10633
10634 /*register the min cost for l1 me in blk context */
10635 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10636 i4_min_sad_cost_8x8_block;
10637 }
10638 }
10639 if(1 == ps_refine_prms->i4_layer_id)
10640 {
10641 WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10642 WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10643 WORD32 z_scan_idx =
10644 gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10645
10646 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
10647 min_sad;
10648
10649 if(min_cost <
10650 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
10651 {
10652 ps_ctxt->i4_L1_hme_best_cost += min_cost;
10653 ps_ctxt->i4_L1_hme_sad += min_sad;
10654 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
10655 }
10656 else
10657 {
10658 ps_ctxt->i4_L1_hme_best_cost +=
10659 ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
10660 ps_ctxt->i4_L1_hme_sad +=
10661 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10662 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
10663 ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10664 }
10665 }
10666 }
10667 }
10668
10669 /* Update the number of blocks processed in the current row */
10670 if((ME_MEDIUM_SPEED > e_me_quality_presets))
10671 {
10672 ihevce_dmgr_set_row_row_sync(
10673 pv_hme_dep_mngr,
10674 (i4_ctb_x + 1),
10675 blk_y,
10676 0 /* Col Tile No. : Not supported in PreEnc*/);
10677 }
10678 }
10679
10680 /* set the output dependency after completion of row */
10681 ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
10682 }
10683 }
10684