xref: /aosp_15_r20/external/libavc/encoder/svc/isvce_me.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2022 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 
21 /**
22  *******************************************************************************
23  * @file
24  *  isvce_me.c
25  *
26  * @brief
27  *  Contains definition of functions for motion estimation
28  *
29  * @author
30  *  ittiam
31  *
32  * @par List of Functions:
33  *  - isvce_init_mv_bits()
34  *  - isvce_skip_analysis_chroma()
35  *  - isvce_skip_analysis_luma()
36  *  - isvce_analyse_skip()
37  *  - isvce_get_search_candidates()
38  *  - isvce_find_skip_motion_vector()
39  *  - isvce_get_mv_predictor()
40  *  - isvce_mv_pred()
41  *  - isvce_mv_pred_me()
42  *  - isvce_init_me()
43  *  - isvce_compute_me()
44  *  - isvce_compute_me_nmb()
45  *
46  * @remarks
47  *  None
48  *
49  *******************************************************************************
50  */
51 
52 /*****************************************************************************/
53 /* File Includes                                                             */
54 /*****************************************************************************/
55 
56 /* System include files */
57 #include <stdio.h>
58 #include <assert.h>
59 #include <limits.h>
60 #include <stdbool.h>
61 
62 /* User include files */
63 #include "ih264_typedefs.h"
64 #include "ih264_macros.h"
65 #include "isvc_macros.h"
66 #include "ih264_platform_macros.h"
67 #include "iv2.h"
68 #include "ive2.h"
69 #include "ithread.h"
70 #include "ih264_platform_macros.h"
71 #include "isvc_defs.h"
72 #include "ime_defs.h"
73 #include "ime_distortion_metrics.h"
74 #include "ime_structs.h"
75 #include "isvc_structs.h"
76 #include "isvc_trans_quant_itrans_iquant.h"
77 #include "isvc_inter_pred_filters.h"
78 #include "isvc_mem_fns.h"
79 #include "ih264_padding.h"
80 #include "ih264_intra_pred_filters.h"
81 #include "ih264_deblk_edge_filters.h"
82 #include "isvc_cabac_tables.h"
83 #include "isvce_defs.h"
84 #include "ih264e_error.h"
85 #include "ih264e_bitstream.h"
86 #include "irc_cntrl_param.h"
87 #include "irc_frame_info_collector.h"
88 #include "isvce_rate_control.h"
89 #include "isvce_cabac_structs.h"
90 #include "isvce_structs.h"
91 #include "isvce_globals.h"
92 #include "isvce_me.h"
93 #include "ime.h"
94 #include "ih264_debug.h"
95 #include "ih264e_intra_modes_eval.h"
96 #include "isvce_core_coding.h"
97 #include "isvce_mc.h"
98 #include "ih264e_debug.h"
99 #include "ih264e_half_pel.h"
100 #include "ime_statistics.h"
101 #include "ih264e_platform_macros.h"
102 #include "isvce_defs.h"
103 #include "isvce_structs.h"
104 #include "isvce_ilp_mv_utils.h"
105 #include "isvce_utils.h"
106 
107 /*****************************************************************************/
108 /* Function Definitions                                                      */
109 /*****************************************************************************/
110 
111 /**
112 *******************************************************************************
113 *
114 * @brief Diamond Search
115 *
116 * @par Description:
117 *  This function computes the sad at vertices of several layers of diamond grid
118 *  at a time. The number of layers of diamond grid that would be evaluated is
119 *  configurable.The function computes the sad at vertices of a diamond grid. If
120 *  the sad at the center of the diamond grid is lesser than the sad at any other
121 *  point of the diamond grid, the function marks the candidate Mb partition as
122 *  mv.
123 *
124 * @param[in] ps_mb_part
125 *  pointer to current mb partition ctxt with respect to ME
126 *
127 * @param[in] ps_me_ctxt
128 *  pointer to me context
129 *
130 * @param[in] u4_lambda_motion
131 *  lambda motion
132 *
133 * @param[in] u4_enable_fast_sad
134 *  enable/disable fast sad computation
135 *
136 * @returns  mv pair & corresponding distortion and cost
137 *
138 * @remarks Diamond Srch, radius is 1
139 *
140 *******************************************************************************
141 */
isvce_diamond_search_16x16(isvce_me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)142 static void isvce_diamond_search_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
143 {
144     /* MB partition info */
145     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
146 
147     /* lagrange parameter */
148     UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
149 
150     /* srch range*/
151     WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
152     WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
153     WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
154     WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
155 
156     /* pointer to src macro block */
157     UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
158     UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
159 
160     /* strides */
161     WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
162     WORD32 i4_ref_strd = ps_me_ctxt->ai4_rec_strd[i4_reflist];
163 
164     /* least cost */
165     WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
166 
167     /* least sad */
168     WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
169 
170     /* mv pair */
171     WORD16 i2_mvx, i2_mvy;
172 
173     /* mv bits */
174     UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
175 
176     /* temp var */
177     WORD32 i4_cost[4];
178     WORD32 i4_sad[4];
179     UWORD8 *pu1_ref;
180     WORD16 i2_mv_u_x, i2_mv_u_y;
181 
182     /* Diamond search Iteration Max Cnt */
183     WORD64 i8_num_layers = ps_me_ctxt->u4_num_layers;
184 
185     /* mv with best sad during initial evaluation */
186     i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
187     i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
188 
189     i2_mv_u_x = i2_mvx;
190     i2_mv_u_y = i2_mvy;
191 
192     while(i8_num_layers--)
193     {
194         /* FIXME : is this the write way to check for out of bounds ? */
195         if((i2_mvx - 1 < i4_srch_range_w) || (i2_mvx + 1 > i4_srch_range_e) ||
196            (i2_mvy - 1 < i4_srch_range_n) || (i2_mvy + 1 > i4_srch_range_s))
197         {
198             break;
199         }
200 
201         pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
202 
203         ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref, pu1_curr_mb, i4_ref_strd, i4_src_strd,
204                                                 i4_sad);
205 
206         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
207         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
208         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
209         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
210 
211         /* compute cost */
212         i4_cost[0] =
213             i4_sad[0] +
214             u4_lambda_motion * (pu1_mv_bits[((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
215                                 pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
216         i4_cost[1] =
217             i4_sad[1] +
218             u4_lambda_motion * (pu1_mv_bits[((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
219                                 pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
220         i4_cost[2] =
221             i4_sad[2] +
222             u4_lambda_motion * (pu1_mv_bits[(i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
223                                 pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
224         i4_cost[3] =
225             i4_sad[3] +
226             u4_lambda_motion * (pu1_mv_bits[(i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
227                                 pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
228 
229         if(i4_cost_least > i4_cost[0])
230         {
231             i4_cost_least = i4_cost[0];
232             i4_distortion_least = i4_sad[0];
233 
234             i2_mv_u_x = (i2_mvx - 1);
235             i2_mv_u_y = i2_mvy;
236         }
237 
238         if(i4_cost_least > i4_cost[1])
239         {
240             i4_cost_least = i4_cost[1];
241             i4_distortion_least = i4_sad[1];
242 
243             i2_mv_u_x = (i2_mvx + 1);
244             i2_mv_u_y = i2_mvy;
245         }
246 
247         if(i4_cost_least > i4_cost[2])
248         {
249             i4_cost_least = i4_cost[2];
250             i4_distortion_least = i4_sad[2];
251 
252             i2_mv_u_x = i2_mvx;
253             i2_mv_u_y = i2_mvy - 1;
254         }
255 
256         if(i4_cost_least > i4_cost[3])
257         {
258             i4_cost_least = i4_cost[3];
259             i4_distortion_least = i4_sad[3];
260 
261             i2_mv_u_x = i2_mvx;
262             i2_mv_u_y = i2_mvy + 1;
263         }
264 
265         if((i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
266         {
267             ps_mb_part->u4_exit = 1;
268             break;
269         }
270         else
271         {
272             i2_mvx = i2_mv_u_x;
273             i2_mvy = i2_mv_u_y;
274         }
275     }
276 
277     if(i4_cost_least < ps_mb_part->i4_mb_cost)
278     {
279         ps_mb_part->i4_mb_cost = i4_cost_least;
280         ps_mb_part->i4_mb_distortion = i4_distortion_least;
281         ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
282         ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
283     }
284 }
285 
286 /**
287 *******************************************************************************
288 *
289 * @brief This function computes the best motion vector among the tentative mv
290 * candidates chosen.
291 *
292 * @par Description:
293 *  This function determines the position in the search window at which the
294 *motion estimation should begin in order to minimise the number of search
295 *iterations.
296 *
297 * @param[in] ps_mb_part
298 *  pointer to current mb partition ctxt with respect to ME
299 *
300 * @param[in] u4_lambda_motion
301 *  lambda motion
302 *
303 * @param[in] u4_fast_flag
304 *  enable/disable fast sad computation
305 *
306 * @returns  mv pair & corresponding distortion and cost
307 *
308 * @remarks none
309 *
310 *******************************************************************************
311 */
312 
isvce_evaluate_init_srchposn_16x16(isvce_me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)313 static void isvce_evaluate_init_srchposn_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
314 {
315     UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
316 
317     /* candidate mv cnt */
318     UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist];
319 
320     /* list of candidate mvs */
321     ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist];
322 
323     /* pointer to src macro block */
324     UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
325     UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
326 
327     /* strides */
328     WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
329     WORD32 i4_ref_strd = ps_me_ctxt->ai4_rec_strd[i4_reflist];
330 
331     /* enabled fast sad computation */
332     UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
333 
334     /* SAD(distortion metric) of an 8x8 block */
335     WORD32 i4_mb_distortion;
336 
337     /* cost = distortion + u4_lambda_motion * rate */
338     WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
339 
340     /* mb partitions info */
341     mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]);
342 
343     /* mv bits */
344     UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
345 
346     /* temp var */
347     UWORD32 i, j;
348     WORD32 i4_srch_pos_idx = 0;
349     UWORD8 *pu1_ref = NULL;
350 
351     /* Carry out a search using each of the motion vector pairs identified above
352      * as predictors. */
353     /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
354     for(i = 0; i < u4_num_candidates; i++)
355     {
356         /* compute sad */
357         WORD32 c_sad = 1;
358 
359         for(j = 0; j < i; j++)
360         {
361             if((ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
362                (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy))
363             {
364                 c_sad = 0;
365                 break;
366             }
367         }
368         if(c_sad)
369         {
370             /* adjust ref pointer */
371             pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
372 
373             /* compute distortion */
374             ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](
375                 pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least,
376                 &i4_mb_distortion);
377 
378             DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
379             /* compute cost */
380             i4_mb_cost =
381                 i4_mb_distortion +
382                 u4_lambda_motion *
383                     (pu1_mv_bits[(ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
384                      pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
385 
386             if(i4_mb_cost < i4_mb_cost_least)
387             {
388                 i4_mb_cost_least = i4_mb_cost;
389 
390                 i4_distortion_least = i4_mb_distortion;
391 
392                 i4_srch_pos_idx = i;
393             }
394         }
395     }
396 
397     if(i4_mb_cost_least < ps_mb_part->i4_mb_cost)
398     {
399         ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
400         ps_mb_part->i4_mb_cost = i4_mb_cost_least;
401         ps_mb_part->i4_mb_distortion = i4_distortion_least;
402         ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx;
403         ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy;
404     }
405 }
406 
407 /**
408 *******************************************************************************
409 *
410 * @brief Searches for the best matching full pixel predictor within the search
411 * range
412 *
413 * @par Description:
414 *  This function begins by computing the mv predict vector for the current mb.
415 *  This is used for cost computations. Further basing on the algo. chosen, it
416 *  looks through a set of candidate vectors that best represent the mb a least
417 *  cost and returns this information.
418 *
419 * @param[in] ps_proc
420 *  pointer to current proc ctxt
421 *
422 * @param[in] ps_me_ctxt
423 *  pointer to me context
424 *
425 * @returns  mv pair & corresponding distortion and cost
426 *
427 * @remarks none
428 *
429 *******************************************************************************
430 */
isvce_full_pel_motion_estimation_16x16(isvce_me_ctxt_t * ps_me_ctxt,WORD32 i4_ref_list)431 static void isvce_full_pel_motion_estimation_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_ref_list)
432 {
433     /* mb part info */
434     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list];
435 
436     /******************************************************************/
437     /* Modify Search range about initial candidate instead of zero mv */
438     /******************************************************************/
439     /*
440      * FIXME: The motion vectors in a way can become unbounded. It may so happen
441      * that MV might exceed the limit of the profile configured.
442      */
443     ps_me_ctxt->i4_srch_range_w =
444         MAX(ps_me_ctxt->i4_srch_range_w,
445             -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
446     ps_me_ctxt->i4_srch_range_e =
447         MIN(ps_me_ctxt->i4_srch_range_e,
448             ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
449     ps_me_ctxt->i4_srch_range_n =
450         MAX(ps_me_ctxt->i4_srch_range_n,
451             -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
452     ps_me_ctxt->i4_srch_range_s =
453         MIN(ps_me_ctxt->i4_srch_range_s,
454             ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
455 
456     /************************************************************/
457     /* Traverse about best initial candidate for mv             */
458     /************************************************************/
459 
460     switch(ps_me_ctxt->u4_me_speed_preset)
461     {
462         case DMND_SRCH:
463             isvce_diamond_search_16x16(ps_me_ctxt, i4_ref_list);
464             break;
465         default:
466             assert(0);
467             break;
468     }
469 }
470 
471 /**
472 *******************************************************************************
473 *
474 * @brief Searches for the best matching sub pixel predictor within the search
475 * range
476 *
477 * @par Description:
478 *  This function begins by searching across all sub pixel sample points
479 *  around the full pel motion vector. The vector with least cost is chosen as
480 *  the mv for the current mb. If the skip mode is not evaluated while analysing
481 *  the initial search candidates then analyse it here and update the mv.
482 *
483 * @param[in] ps_proc
484 *  pointer to current proc ctxt
485 *
486 * @param[in] ps_me_ctxt
487 *  pointer to me context
488 *
489 * @returns none
490 *
491 * @remarks none
492 *
493 *******************************************************************************
494 */
isvce_sub_pel_motion_estimation_16x16(isvce_me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)495 static void isvce_sub_pel_motion_estimation_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
496 {
497     /* pointers to src & ref macro block */
498     UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
499 
500     /* pointers to ref. half pel planes */
501     UWORD8 *pu1_ref_mb_half_x;
502     UWORD8 *pu1_ref_mb_half_y;
503     UWORD8 *pu1_ref_mb_half_xy;
504 
505     /* pointers to ref. half pel planes */
506     UWORD8 *pu1_ref_mb_half_x_temp;
507     UWORD8 *pu1_ref_mb_half_y_temp;
508     UWORD8 *pu1_ref_mb_half_xy_temp;
509 
510     /* strides */
511     WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
512 
513     WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd;
514 
515     /* mb partitions info */
516     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
517 
518     /* SAD(distortion metric) of an mb */
519     WORD32 i4_mb_distortion;
520     WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
521 
522     /* cost = distortion + u4_lambda_motion * rate */
523     WORD32 i4_mb_cost;
524     WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
525 
526     /*Best half pel buffer*/
527     UWORD8 *pu1_best_hpel_buf = NULL;
528 
529     /* mv bits */
530     UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
531 
532     /* Motion vectors in full-pel units */
533     WORD16 mv_x, mv_y;
534 
535     /* lambda - lagrange constant */
536     UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
537 
538     /* Flags to check if half pel points needs to be evaluated */
539     /**************************************/
540     /* 1 bit for each half pel candidate  */
541     /* bit 0 - half x = 1, half y = 0     */
542     /* bit 1 - half x = -1, half y = 0    */
543     /* bit 2 - half x = 0, half y = 1     */
544     /* bit 3 - half x = 0, half y = -1    */
545     /* bit 4 - half x = 1, half y = 1     */
546     /* bit 5 - half x = -1, half y = 1    */
547     /* bit 6 - half x = 1, half y = -1    */
548     /* bit 7 - half x = -1, half y = -1   */
549     /**************************************/
550     /* temp var */
551     WORD16 i2_mv_u_x, i2_mv_u_y;
552     WORD32 i, j;
553     WORD32 ai4_sad[8];
554 
555     WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx;
556 
557     i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
558     i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
559 
560     /************************************************************/
561     /* Evaluate half pel                                        */
562     /************************************************************/
563     mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
564     mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
565 
566     /**************************************************************/
567     /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
568     /* left side of full pel                                      */
569     /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
570     /* top  side of full pel                                      */
571     /* ps_me_ctxt->pu1_half_xy points to the half pel pixel       */
572     /* on the top left side of full pel                           */
573     /* for the function pf_ime_sub_pel_compute_sad_16x16 the      */
574     /* default postions are                                       */
575     /* ps_me_ctxt->pu1_half_x = right halp_pel                    */
576     /*  ps_me_ctxt->pu1_half_y = bottom halp_pel                  */
577     /*  ps_me_ctxt->pu1_half_xy = bottom right halp_pel           */
578     /* Hence corresponding adjustments made here                  */
579     /**************************************************************/
580 
581     pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1;
582     pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd;
583     pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy =
584         ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd;
585 
586     ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x, pu1_ref_mb_half_y,
587                                                  pu1_ref_mb_half_xy, i4_src_strd, i4_ref_strd,
588                                                  ai4_sad);
589 
590     /* Half x plane */
591     for(i = 0; i < 2; i++)
592     {
593         WORD32 mv_x_tmp = (mv_x << 2) + 2;
594         WORD32 mv_y_tmp = (mv_y << 2);
595 
596         mv_x_tmp -= (i * 4);
597 
598         i4_mb_distortion = ai4_sad[i];
599 
600         /* compute cost */
601         i4_mb_cost = i4_mb_distortion +
602                      u4_lambda_motion * (pu1_mv_bits[mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] +
603                                          pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy]);
604 
605         if(i4_mb_cost < i4_mb_cost_least)
606         {
607             i4_mb_cost_least = i4_mb_cost;
608 
609             i4_distortion_least = i4_mb_distortion;
610 
611             i2_mv_u_x = mv_x_tmp;
612 
613             i2_mv_u_y = mv_y_tmp;
614 
615             ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i;
616             pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
617 
618             i4_srch_pos_idx = 0;
619         }
620     }
621 
622     /* Half y plane */
623     for(i = 0; i < 2; i++)
624     {
625         WORD32 mv_x_tmp = (mv_x << 2);
626         WORD32 mv_y_tmp = (mv_y << 2) + 2;
627 
628         mv_y_tmp -= (i * 4);
629 
630         i4_mb_distortion = ai4_sad[2 + i];
631 
632         /* compute cost */
633         i4_mb_cost = i4_mb_distortion +
634                      u4_lambda_motion * (pu1_mv_bits[mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] +
635                                          pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy]);
636 
637         if(i4_mb_cost < i4_mb_cost_least)
638         {
639             i4_mb_cost_least = i4_mb_cost;
640 
641             i4_distortion_least = i4_mb_distortion;
642 
643             i2_mv_u_x = mv_x_tmp;
644 
645             i2_mv_u_y = mv_y_tmp;
646 
647             ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp - i * (i4_ref_strd);
648             pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i * (i4_ref_strd);
649 
650             i4_srch_pos_idx = 1;
651         }
652     }
653 
654     /* Half xy plane */
655     for(j = 0; j < 2; j++)
656     {
657         for(i = 0; i < 2; i++)
658         {
659             WORD32 mv_x_tmp = (mv_x << 2) + 2;
660             WORD32 mv_y_tmp = (mv_y << 2) + 2;
661 
662             mv_x_tmp -= (i * 4);
663             mv_y_tmp -= (j * 4);
664 
665             i4_mb_distortion = ai4_sad[4 + i + 2 * j];
666 
667             /* compute cost */
668             i4_mb_cost = i4_mb_distortion +
669                          u4_lambda_motion * (pu1_mv_bits[mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] +
670                                              pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy]);
671 
672             if(i4_mb_cost < i4_mb_cost_least)
673             {
674                 i4_mb_cost_least = i4_mb_cost;
675 
676                 i4_distortion_least = i4_mb_distortion;
677 
678                 i2_mv_u_x = mv_x_tmp;
679 
680                 i2_mv_u_y = mv_y_tmp;
681 
682                 ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp - j * (i4_ref_strd) -i;
683                 pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j * (i4_ref_strd) -i;
684 
685                 i4_srch_pos_idx = 2;
686             }
687         }
688     }
689 
690     if(i4_mb_cost_least < ps_mb_part->i4_mb_cost)
691     {
692         ps_mb_part->i4_mb_cost = i4_mb_cost_least;
693         ps_mb_part->i4_mb_distortion = i4_distortion_least;
694         ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
695         ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
696         ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
697         ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
698     }
699 }
700 
701 /**
702 *******************************************************************************
703 *
704 * @brief This function computes cost of skip macroblocks
705 *
706 * @par Description:
707 *
708 * @param[in] ps_me_ctxt
709 *  pointer to me ctxt
710 *
711 *
712 * @returns  none
713 *
714 * @remarks
715 * NOTE: while computing the skip cost, do not enable early exit from compute
716 * sad function because, a negative bias gets added later
717 * Note tha the last ME candidate in me ctxt is taken as skip motion vector
718 *
719 *******************************************************************************
720 */
isvce_compute_skip_cost(isvce_me_ctxt_t * ps_me_ctxt,ime_mv_t * ps_skip_mv,mb_part_ctxt * ps_smb_part_info,UWORD32 u4_use_stat_sad,WORD32 i4_reflist,WORD32 i4_is_slice_type_b)721 static void isvce_compute_skip_cost(isvce_me_ctxt_t *ps_me_ctxt, ime_mv_t *ps_skip_mv,
722                                     mb_part_ctxt *ps_smb_part_info, UWORD32 u4_use_stat_sad,
723                                     WORD32 i4_reflist, WORD32 i4_is_slice_type_b)
724 {
725     /* SAD(distortion metric) of an mb */
726     WORD32 i4_mb_distortion;
727 
728     /* cost = distortion + u4_lambda_motion * rate */
729     WORD32 i4_mb_cost;
730 
731     /* temp var */
732     UWORD8 *pu1_ref = NULL;
733 
734     ime_mv_t s_skip_mv;
735 
736     s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx + 2) >> 2;
737     s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy + 2) >> 2;
738 
739     /* Check if the skip mv is out of bounds or subpel */
740     {
741         /* skip mv */
742         ime_mv_t s_clip_skip_mv;
743 
744         s_clip_skip_mv.i2_mvx =
745             CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx);
746         s_clip_skip_mv.i2_mvy =
747             CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy);
748 
749         if((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) ||
750            (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) || (ps_skip_mv->i2_mvx & 0x3) ||
751            (ps_skip_mv->i2_mvy & 0x3))
752         {
753             return;
754         }
755     }
756 
757     /* adjust ref pointer */
758     pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx +
759               (s_skip_mv.i2_mvy * ps_me_ctxt->ai4_rec_strd[i4_reflist]);
760 
761     if(u4_use_stat_sad == 1)
762     {
763         UWORD32 u4_is_nonzero;
764 
765         ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(
766             ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
767             ps_me_ctxt->ai4_rec_strd[i4_reflist], ps_me_ctxt->pu2_sad_thrsh, &i4_mb_distortion,
768             &u4_is_nonzero);
769 
770         if(u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
771         {
772             ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
773             ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion;
774         }
775     }
776     else
777     {
778         ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad](
779             ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
780             ps_me_ctxt->ai4_rec_strd[i4_reflist], INT_MAX, &i4_mb_distortion);
781 
782         if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
783         {
784             ps_me_ctxt->i4_min_sad = i4_mb_distortion;
785             ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
786         }
787     }
788 
789     /* for skip mode cost & distortion are identical
790      * But we shall add a bias to favor skip mode.
791      * Doc. JVT B118 Suggests SKIP_BIAS as 16.
792      * TODO : Empirical analysis of SKIP_BIAS is necessary */
793 
794     i4_mb_cost = i4_mb_distortion -
795                  (ps_me_ctxt->u4_lambda_motion *
796                   (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1] * i4_is_slice_type_b));
797 
798     if(i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
799     {
800         ps_smb_part_info->i4_mb_cost = i4_mb_cost;
801         ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
802         ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx;
803         ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy;
804     }
805 }
806 
807 /**
808 *******************************************************************************
809 *
810 * @brief
811 *  This function populates the length of the codewords for motion vectors in the
812 *  range (-search range, search range) in pixels
813 *
814 * @param[in] ps_me
815 *  Pointer to me ctxt
816 *
817 * @param[out] pu1_mv_bits
818 *  length of the codeword for all mv's
819 *
820 * @remarks The length of the code words are derived from signed exponential
821 * goloumb codes.
822 *
823 *******************************************************************************
824 */
isvce_init_mv_bits(isvce_me_ctxt_t * ps_me_ctxt)825 void isvce_init_mv_bits(isvce_me_ctxt_t *ps_me_ctxt)
826 {
827     /* temp var */
828     WORD32 i, codesize = 3, diff, limit;
829     UWORD32 u4_code_num, u4_range;
830     UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
831 
832     /* max srch range */
833     diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
834     /* sub pel */
835     diff <<= 2;
836     /* delta mv */
837     diff <<= 1;
838 
839     /* codeNum for positive integer     =  2x-1     : Table9-3  */
840     u4_code_num = (diff << 1);
841 
842     /* get range of the bit string and put using put_bits()                 */
843     GETRANGE(u4_range, u4_code_num);
844 
845     limit = 2 * u4_range - 1;
846 
847     /* init mv bits */
848     ps_me_ctxt->pu1_mv_bits[0] = 1;
849 
850     while(codesize < limit)
851     {
852         u4_uev_min = (1 << (codesize >> 1));
853         u4_uev_max = 2 * u4_uev_min - 1;
854 
855         u4_sev_min = u4_uev_min >> 1;
856         u4_sev_max = u4_uev_max >> 1;
857 
858         DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
859 
860         for(i = u4_sev_min; i <= (WORD32) u4_sev_max; i++)
861         {
862             ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
863         }
864 
865         codesize += 2;
866     }
867 }
868 
869 /**
870 *******************************************************************************
871 *
872 * @brief Adds valid MVs as initial search candidates for motion estimation by
873 * cheking if it is distinct or not.
874 *
875 * @param[in] ps_search_cand
876 *  MV to add as search candidate
877 *
878 * @param[in] ps_me_ctxt
879 *  pointer to ME context
880 *
881 * @param[in] u4_num_candidates
882 *  Number of inital search candidates value
883 *
884 *******************************************************************************
885 */
isvce_add_me_init_search_cands(mv_t * ps_search_cand,isvce_me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist,UWORD32 * u4_num_candidates,bool b_is_max_mv_diff_lt_4)886 static FORCEINLINE void isvce_add_me_init_search_cands(mv_t *ps_search_cand,
887                                                        isvce_me_ctxt_t *ps_me_ctxt,
888                                                        WORD32 i4_reflist,
889                                                        UWORD32 *u4_num_candidates,
890                                                        bool b_is_max_mv_diff_lt_4)
891 {
892     WORD32 k;
893     WORD32 i4_mv_x, i4_mv_y;
894 
895     bool b_is_mv_identical = false;
896 
897     WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
898     WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
899     WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
900     WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
901     UWORD32 u4_num_init_search_cands = u4_num_candidates[0];
902 
903     i4_mv_x = (ps_search_cand->i2_mvx + 2) >> 2;
904     i4_mv_y = (ps_search_cand->i2_mvy + 2) >> 2;
905 
906     i4_mv_x = CLIP3(i4_srch_range_w, i4_srch_range_e, i4_mv_x);
907     i4_mv_y = CLIP3(i4_srch_range_n, i4_srch_range_s, i4_mv_y);
908 
909     if(u4_num_init_search_cands == 0)
910     {
911         b_is_mv_identical = false;
912     }
913     else
914     {
915         for(k = u4_num_init_search_cands - 1; k >= 0; k--)
916         {
917             if((ps_me_ctxt->as_mv_init_search[i4_reflist][k].i2_mvx == i4_mv_x &&
918                 ps_me_ctxt->as_mv_init_search[i4_reflist][k].i2_mvy == i4_mv_y))
919             {
920                 b_is_mv_identical = true;
921             }
922         }
923     }
924 
925     if(!b_is_mv_identical)
926     {
927         if(USE_ILP_MV_IN_ME && ps_me_ctxt->ps_ilp_me_cands)
928         {
929             if(ps_me_ctxt->ps_ilp_me_cands->u4_num_ilp_mvs < 2 || b_is_max_mv_diff_lt_4)
930             {
931                 if(u4_num_init_search_cands < MAX_CAND_IF_NUM_ILP_MV_LT_2)
932                 {
933                     ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvx =
934                         i4_mv_x;
935                     ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvy =
936                         i4_mv_y;
937 
938                     u4_num_candidates[0] += 1;
939                 }
940             }
941             else if(ps_me_ctxt->ps_ilp_me_cands->u4_num_ilp_mvs >= 2 && !b_is_max_mv_diff_lt_4)
942             {
943                 if(u4_num_init_search_cands < MAX_CAND_IF_NUM_ILP_MV_GTEQ_2)
944                 {
945                     ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvx =
946                         i4_mv_x;
947                     ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvy =
948                         i4_mv_y;
949 
950                     u4_num_candidates[0] += 1;
951                 }
952             }
953         }
954         else
955         {
956             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvx = i4_mv_x;
957             ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvy = i4_mv_y;
958 
959             u4_num_candidates[0] += 1;
960         }
961     }
962 }
963 
964 /**
965 *******************************************************************************
966 *
967 * @brief Determines the valid candidates for which the initial search shall
968 *happen. The best of these candidates is used to center the diamond pixel
969 *search.
970 *
971 * @par Description: The function sends the skip, (0,0), left, top and top-right
972 * neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
973 * these are the same MVs that are used to form the MV predictor. This initial MV
974 * search candidates need not take care of slice boundaries and hence neighbor
975 * availability checks are not made here.
976 *
977 * @param[in] ps_left_mb_pu
978 *  pointer to left mb motion vector info
979 *
980 * @param[in] ps_top_mb_pu
981 *  pointer to top & top right mb motion vector info
982 *
983 * @param[in] ps_top_left_mb_pu
984 *  pointer to top left mb motion vector info
985 *
986 * @param[out] ps_skip_mv
987 *  pointer to skip motion vectors for the curr mb
988 *
989 * @param[in] i4_mb_x
990 *  mb index x
991 *
992 * @param[in] i4_mb_y
993 *  mb index y
994 *
995 * @param[in] i4_wd_mbs
996 *  pic width in mbs
997 *
998 * @param[in] ps_motionEst
999 *  pointer to me context
1000 *
1001 * @returns  The list of MVs to be used of priming the full pel search and the
1002 * number of such MVs
1003 *
1004 * @remarks
1005 *   Assumptions : 1. Assumes Only partition of size 16x16
1006 *
1007 *******************************************************************************
1008 */
isvce_get_search_candidates(isvce_process_ctxt_t * ps_proc,isvce_me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)1009 static void isvce_get_search_candidates(isvce_process_ctxt_t *ps_proc, isvce_me_ctxt_t *ps_me_ctxt,
1010                                         WORD32 i4_reflist)
1011 {
1012     mv_t s_zero_mv;
1013     mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv;
1014 
1015     UWORD32 i;
1016     WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode;
1017 
1018     isvce_codec_t *ps_codec = ps_proc->ps_codec;
1019     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1020     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
1021     ilp_me_cands_t *ps_ilp_me_cands = ps_me_ctxt->ps_ilp_me_cands;
1022 
1023     bool b_is_max_mv_diff_lt_4 = false;
1024     WORD32 i4_mb_x = ps_proc->i4_mb_x;
1025     WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? L1 : L0;
1026     UWORD32 u4_num_candidates = 0;
1027 
1028     s_zero_mv.i2_mvx = 0;
1029     s_zero_mv.i2_mvy = 0;
1030     ps_left_mv = &ps_proc->s_nbr_info.ps_left_mb_info->as_pu->as_me_info[i4_reflist].s_mv;
1031     ps_top_mv =
1032         &(ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x)->as_pu->as_me_info[i4_reflist].s_mv;
1033     ps_top_left_mv = &ps_proc->s_nbr_info.ps_top_row_mb_info->as_pu->as_me_info[i4_reflist].s_mv;
1034     ps_top_right_mv =
1035         &(ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x + 1)->as_pu->as_me_info[i4_reflist].s_mv;
1036 
1037     i4_left_mode =
1038         ps_ngbr_avbl->u1_mb_a
1039             ? (ps_proc->s_nbr_info.ps_left_mb_info->as_pu->u1_pred_mode != i4_cmpl_predmode)
1040             : 0;
1041     i4_top_mode = ps_ngbr_avbl->u1_mb_b
1042                       ? ((ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x)->as_pu->u1_pred_mode !=
1043                          i4_cmpl_predmode)
1044                       : 0;
1045     i4_top_right_mode =
1046         ps_ngbr_avbl->u1_mb_c
1047             ? ((ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x + 1)->as_pu->u1_pred_mode !=
1048                i4_cmpl_predmode)
1049             : 0;
1050     i4_top_left_mode =
1051         ps_ngbr_avbl->u1_mb_d
1052             ? ((ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x - 1)->as_pu->u1_pred_mode !=
1053                i4_cmpl_predmode)
1054             : 0;
1055 
1056     if(USE_ILP_MV_IN_ME && ps_ilp_me_cands)
1057     {
1058         if(ps_ilp_me_cands->u4_num_ilp_mvs >= 2)
1059         {
1060             b_is_max_mv_diff_lt_4 = isvce_check_max_mv_diff_lt_4(ps_ilp_me_cands, i4_reflist);
1061         }
1062 
1063         /* Taking ILP MV Predictor as one of the candidates */
1064         if(ps_ilp_me_cands->u4_num_ilp_mvs < 2 || b_is_max_mv_diff_lt_4)
1065         {
1066             for(i = 0; i < ps_ilp_me_cands->u4_num_ilp_mvs_incl_nbrs; i++)
1067             {
1068                 if(((ps_ilp_me_cands->ae_pred_mode[i] == ((PRED_MODE_T) i4_reflist)) ||
1069                     ((ps_ilp_me_cands->ae_pred_mode[i] == BI))))
1070                 {
1071                     isvce_add_me_init_search_cands(&ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv,
1072                                                    ps_me_ctxt, i4_reflist, &u4_num_candidates,
1073                                                    b_is_max_mv_diff_lt_4);
1074                 }
1075             }
1076         }
1077     }
1078 
1079     /* Taking the Top MV Predictor as one of the candidates     */
1080     if(ps_ngbr_avbl->u1_mb_b && i4_top_mode)
1081     {
1082         isvce_add_me_init_search_cands(ps_top_mv, ps_me_ctxt, i4_reflist, &u4_num_candidates,
1083                                        b_is_max_mv_diff_lt_4);
1084     }
1085 
1086     /* Taking the Left MV Predictor as one of the candidates    */
1087     if(ps_ngbr_avbl->u1_mb_a && i4_left_mode)
1088     {
1089         isvce_add_me_init_search_cands(ps_left_mv, ps_me_ctxt, i4_reflist, &u4_num_candidates,
1090                                        b_is_max_mv_diff_lt_4);
1091     }
1092 
1093     /********************************************************************/
1094     /*                            MV Prediction                         */
1095     /********************************************************************/
1096     isvce_mv_pred_me(ps_proc, i4_reflist);
1097 
1098     ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx;
1099     ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy;
1100 
1101     /* Get the skip motion vector                               */
1102     {
1103         ps_me_ctxt->i4_skip_type =
1104             ps_codec->apf_find_skip_params_me[ps_proc->i4_slice_type](ps_proc, i4_reflist);
1105 
1106         /* Taking the Skip motion vector as one of the candidates   */
1107         isvce_add_me_init_search_cands(&ps_proc->ps_skip_mv[i4_reflist].s_mv, ps_me_ctxt,
1108                                        i4_reflist, &u4_num_candidates, b_is_max_mv_diff_lt_4);
1109 
1110         if(ps_proc->i4_slice_type == BSLICE)
1111         {
1112             /* Taking the temporal Skip motion vector as one of the candidates   */
1113             isvce_add_me_init_search_cands(&ps_proc->ps_skip_mv[i4_reflist + 2].s_mv, ps_me_ctxt,
1114                                            i4_reflist, &u4_num_candidates, b_is_max_mv_diff_lt_4);
1115         }
1116     }
1117 
1118     /* Taking ILP MV Predictor as one of the candidates */
1119     if(USE_ILP_MV_IN_ME && ps_ilp_me_cands &&
1120        (ps_ilp_me_cands->u4_num_ilp_mvs >= 2 && !b_is_max_mv_diff_lt_4))
1121     {
1122         for(i = 0; i < ps_ilp_me_cands->u4_num_ilp_mvs_incl_nbrs; i++)
1123         {
1124             if(((ps_ilp_me_cands->ae_pred_mode[i] == ((PRED_MODE_T) i4_reflist)) ||
1125                 ((ps_ilp_me_cands->ae_pred_mode[i] == BI))))
1126             {
1127                 isvce_add_me_init_search_cands(&ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv,
1128                                                ps_me_ctxt, i4_reflist, &u4_num_candidates,
1129                                                b_is_max_mv_diff_lt_4);
1130             }
1131         }
1132     }
1133 
1134     if(ps_ngbr_avbl->u1_mb_b && i4_top_mode)
1135     {
1136         /* Taking the TopRt MV Predictor as one of the candidates   */
1137         if(ps_ngbr_avbl->u1_mb_c && i4_top_right_mode)
1138         {
1139             isvce_add_me_init_search_cands(ps_top_right_mv, ps_me_ctxt, i4_reflist,
1140                                            &u4_num_candidates, b_is_max_mv_diff_lt_4);
1141         }
1142 
1143         /* Taking the TopLt MV Predictor as one of the candidates   */
1144         else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode)
1145         {
1146             isvce_add_me_init_search_cands(ps_top_left_mv, ps_me_ctxt, i4_reflist,
1147                                            &u4_num_candidates, b_is_max_mv_diff_lt_4);
1148         }
1149     }
1150 
1151     /* Taking the Zero motion vector as one of the candidates   */
1152     isvce_add_me_init_search_cands(&s_zero_mv, ps_me_ctxt, i4_reflist, &u4_num_candidates,
1153                                    b_is_max_mv_diff_lt_4);
1154 
1155     ASSERT(u4_num_candidates <= MAX_FPEL_SEARCH_CANDIDATES);
1156 
1157     ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates;
1158 }
1159 
1160 /**
1161 *******************************************************************************
1162 *
1163 * @brief The function computes parameters for a PSKIP MB
1164 *
1165 * @par Description:
1166 *  The function updates the skip motion vector and checks if the current
1167 *  MB can be a skip PSKIP mB or not
1168 *
1169 * @param[in] ps_proc
1170 *  Pointer to process context
1171 *
1172 * @param[in] u4_for_me
1173 *  Flag to indicate function is called for ME or not
1174 *
1175 * @param[out] i4_ref_list
1176 *  Current active refernce list
1177 *
1178 * @returns Flag indicating if the current MB can be marked as skip
1179 *
1180 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
1181 *   specification.
1182 *
1183 *******************************************************************************
1184 */
isvce_find_pskip_params(isvce_process_ctxt_t * ps_proc,WORD32 i4_reflist)1185 WORD32 isvce_find_pskip_params(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist)
1186 {
1187     /* left mb motion vector */
1188     isvce_enc_pu_t *ps_left_mb_pu;
1189 
1190     /* top mb motion vector */
1191     isvce_enc_pu_t *ps_top_mb_pu;
1192 
1193     /* Skip mv */
1194     mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[L0].s_mv;
1195 
1196     UNUSED(i4_reflist);
1197 
1198     ps_left_mb_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu;
1199     ps_top_mb_pu = (ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x)->as_pu;
1200 
1201     if((!ps_proc->ps_ngbr_avbl->u1_mb_a) || (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
1202        ((ps_left_mb_pu->as_me_info[L0].i1_ref_idx == 0) &&
1203         (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) &&
1204         (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)) ||
1205        ((ps_top_mb_pu->as_me_info[L0].i1_ref_idx == 0) &&
1206         (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) &&
1207         (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)))
1208 
1209     {
1210         ps_skip_mv->i2_mvx = 0;
1211         ps_skip_mv->i2_mvy = 0;
1212     }
1213     else
1214     {
1215         ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[L0].s_mv.i2_mvx;
1216         ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[L0].s_mv.i2_mvy;
1217     }
1218 
1219     if((ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx) &&
1220        (ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy))
1221     {
1222         return 1;
1223     }
1224 
1225     return 0;
1226 }
1227 
1228 /**
1229 *******************************************************************************
1230 *
1231 * @brief The function computes parameters for a PSKIP MB
1232 *
1233 * @par Description:
1234 *  The function updates the skip motion vector and checks if the current
1235 *  MB can be a skip PSKIP mB or not
1236 *
1237 * @param[in] ps_proc
1238 *  Pointer to process context
1239 *
1240 * @param[in] u4_for_me
1241 *  Flag to dincate fucntion is called for ME or not
1242 *
1243 * @param[out] i4_ref_list
1244 *  Current active refernce list
1245 *
1246 * @returns Flag indicating if the current MB can be marked as skip
1247 *
1248 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
1249 *   specification.
1250 *
1251 *******************************************************************************
1252 */
isvce_find_pskip_params_me(isvce_process_ctxt_t * ps_proc,WORD32 i4_reflist)1253 WORD32 isvce_find_pskip_params_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist)
1254 {
1255     /* left mb motion vector */
1256     isvce_enc_pu_t *ps_left_mb_pu;
1257 
1258     /* top mb motion vector */
1259     isvce_enc_pu_t *ps_top_mb_pu;
1260 
1261     /* Skip mv */
1262     mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[L0].s_mv;
1263 
1264     UNUSED(i4_reflist);
1265 
1266     ps_left_mb_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu;
1267     ps_top_mb_pu = (ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x)->as_pu;
1268 
1269     if((!ps_proc->ps_ngbr_avbl->u1_mb_a) || (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
1270        ((ps_left_mb_pu->as_me_info[L0].i1_ref_idx == 0) &&
1271         (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) &&
1272         (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)) ||
1273        ((ps_top_mb_pu->as_me_info[L0].i1_ref_idx == 0) &&
1274         (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) &&
1275         (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)))
1276 
1277     {
1278         ps_skip_mv->i2_mvx = 0;
1279         ps_skip_mv->i2_mvy = 0;
1280     }
1281     else
1282     {
1283         ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[L0].s_mv.i2_mvx;
1284         ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[L0].s_mv.i2_mvy;
1285     }
1286 
1287     return L0;
1288 }
1289 
1290 /**
1291 *******************************************************************************
1292 *
1293 * @brief motion vector predictor
1294 *
1295 * @par Description:
1296 *  The routine calculates the motion vector predictor for a given block,
1297 *  given the candidate MV predictors.
1298 *
1299 * @param[in] ps_left_mb_pu
1300 *  pointer to left mb motion vector info
1301 *
1302 * @param[in] ps_top_row_pu
1303 *  pointer to top & top right mb motion vector info
1304 *
1305 * @param[out] ps_pred_mv
1306 *  pointer to candidate predictors for the current block
1307 *
1308 * @returns  The x & y components of the MV predictor.
1309 *
1310 * @remarks The code implements the logic as described in sec 8.4.1.3 in H264
1311 *   specification.
1312 *   Assumptions : 1. Assumes Single reference frame
1313 *                 2. Assumes Only partition of size 16x16
1314 *
1315 *******************************************************************************
1316 */
isvce_get_mv_predictor(isvce_enc_pu_mv_t * ps_pred_mv,isvce_enc_pu_mv_t * ps_neig_mv,WORD32 pred_algo)1317 void isvce_get_mv_predictor(isvce_enc_pu_mv_t *ps_pred_mv, isvce_enc_pu_mv_t *ps_neig_mv,
1318                             WORD32 pred_algo)
1319 {
1320     switch(pred_algo)
1321     {
1322         case 0:
1323             /* left */
1324             ps_pred_mv->s_mv.i2_mvx = ps_neig_mv[0].s_mv.i2_mvx;
1325             ps_pred_mv->s_mv.i2_mvy = ps_neig_mv[0].s_mv.i2_mvy;
1326             break;
1327         case 1:
1328             /* top */
1329             ps_pred_mv->s_mv.i2_mvx = ps_neig_mv[1].s_mv.i2_mvx;
1330             ps_pred_mv->s_mv.i2_mvy = ps_neig_mv[1].s_mv.i2_mvy;
1331             break;
1332         case 2:
1333             /* top right */
1334             ps_pred_mv->s_mv.i2_mvx = ps_neig_mv[2].s_mv.i2_mvx;
1335             ps_pred_mv->s_mv.i2_mvy = ps_neig_mv[2].s_mv.i2_mvy;
1336             break;
1337         case 3:
1338             /* median */
1339             MEDIAN(ps_neig_mv[0].s_mv.i2_mvx, ps_neig_mv[1].s_mv.i2_mvx, ps_neig_mv[2].s_mv.i2_mvx,
1340                    ps_pred_mv->s_mv.i2_mvx);
1341             MEDIAN(ps_neig_mv[0].s_mv.i2_mvy, ps_neig_mv[1].s_mv.i2_mvy, ps_neig_mv[2].s_mv.i2_mvy,
1342                    ps_pred_mv->s_mv.i2_mvy);
1343 
1344             break;
1345         default:
1346             break;
1347     }
1348 }
1349 
1350 /**
1351 *******************************************************************************
1352 *
1353 * @brief This function performs MV prediction
1354 *
1355 * @par Description:
1356 *
1357 * @param[in] ps_proc
1358 *  Process context corresponding to the job
1359 *
1360 * @returns  none
1361 *
1362 * @remarks none
1363 *  This function will update the MB availability since intra inter decision
1364 *  should be done before the call
1365 *
1366 *******************************************************************************
1367 */
isvce_mv_pred(isvce_process_ctxt_t * ps_proc,WORD32 i4_slice_type)1368 void isvce_mv_pred(isvce_process_ctxt_t *ps_proc, WORD32 i4_slice_type)
1369 {
1370     isvce_enc_pu_mv_t as_pu_mv[3];
1371 
1372     UWORD8 u1_reflist, u1_cmpl_predmode;
1373     WORD32 i;
1374 
1375     isvce_enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
1376     isvce_enc_pu_mv_t s_default_mv_info = {{0, 0}, -1};
1377     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1378     isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
1379     isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1;
1380     isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1;
1381     isvce_mb_info_t *ps_left_mb = ps_proc->s_nbr_info.ps_left_mb_info;
1382 
1383     UWORD8 u1_left_is_intra = ps_left_mb->u1_is_intra;
1384     UWORD8 u1_num_ref_lists = (i4_slice_type == PSLICE) ? 1 : 2;
1385 
1386     for(u1_reflist = 0; u1_reflist < u1_num_ref_lists; u1_reflist++)
1387     {
1388         WORD8 i1_cur_ref_idx = 0;
1389 
1390         WORD32 pred_algo = 3, a, b, c;
1391 
1392         for(i = 0; i < 3; i++)
1393         {
1394             as_pu_mv[i] = s_default_mv_info;
1395         }
1396 
1397         u1_cmpl_predmode = (u1_reflist == 0) ? L1 : L0;
1398 
1399         /* Before performing mv prediction prepare the ngbr information and
1400          * reset motion vectors basing on their availability */
1401         if(ps_ngbr_avbl->u1_mb_a && (u1_left_is_intra != 1) &&
1402            (ps_left_mb->as_pu->u1_pred_mode != u1_cmpl_predmode))
1403         {
1404             /* left mv */
1405             as_pu_mv[0].s_mv = ps_left_mb->as_pu->as_me_info[u1_reflist].s_mv;
1406             as_pu_mv[0].i1_ref_idx = ps_left_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx;
1407 
1408             /* Only left available */
1409             if(!ps_ngbr_avbl->u1_mb_b && !ps_ngbr_avbl->u1_mb_c && !ps_ngbr_avbl->u1_mb_d)
1410             {
1411                 as_pu_mv[1].s_mv = ps_left_mb->as_pu->as_me_info[u1_reflist].s_mv;
1412                 as_pu_mv[1].i1_ref_idx = ps_left_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx;
1413 
1414                 as_pu_mv[2].s_mv = ps_left_mb->as_pu->as_me_info[u1_reflist].s_mv;
1415                 as_pu_mv[2].i1_ref_idx = ps_left_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx;
1416             }
1417         }
1418         if(ps_ngbr_avbl->u1_mb_b && !ps_top_mb->u1_is_intra &&
1419            (ps_top_mb->as_pu[0].u1_pred_mode != u1_cmpl_predmode))
1420         {
1421             /* top mv */
1422             as_pu_mv[1].s_mv = ps_top_mb->as_pu[0].as_me_info[u1_reflist].s_mv;
1423             as_pu_mv[1].i1_ref_idx = ps_top_mb->as_pu[0].as_me_info[u1_reflist].i1_ref_idx;
1424         }
1425 
1426         if(!ps_ngbr_avbl->u1_mb_c)
1427         {
1428             /* top right mv - When top right partition is not available for
1429              * prediction if top left is available use it for prediction else
1430              * set the mv information to -1 and (0, 0)
1431              * */
1432             if(ps_ngbr_avbl->u1_mb_d && !ps_top_left_mb->u1_is_intra &&
1433                (ps_top_left_mb->as_pu->u1_pred_mode != u1_cmpl_predmode))
1434             {
1435                 as_pu_mv[2].s_mv = ps_top_left_mb->as_pu[0].as_me_info[u1_reflist].s_mv;
1436                 as_pu_mv[2].i1_ref_idx = ps_top_left_mb->as_pu[0].as_me_info[u1_reflist].i1_ref_idx;
1437             }
1438         }
1439         else if(ps_top_right_mb->as_pu->u1_pred_mode != u1_cmpl_predmode &&
1440                 !ps_top_right_mb->u1_is_intra)
1441         {
1442             as_pu_mv[2].s_mv = ps_top_right_mb->as_pu->as_me_info[u1_reflist].s_mv;
1443             as_pu_mv[2].i1_ref_idx = ps_top_right_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx;
1444         }
1445 
1446         /* If only one of the candidate blocks has a reference frame equal to
1447          * the current block then use the same block as the final predictor */
1448         a = (as_pu_mv[0].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1449         b = (as_pu_mv[1].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1450         c = (as_pu_mv[2].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1451         if(a == 0 && b == -1 && c == -1)
1452             pred_algo = 0; /* LEFT */
1453         else if(a == -1 && b == 0 && c == -1)
1454             pred_algo = 1; /* TOP */
1455         else if(a == -1 && b == -1 && c == 0)
1456             pred_algo = 2;
1457 
1458         isvce_get_mv_predictor(&ps_pred_mv[u1_reflist], &as_pu_mv[0], pred_algo);
1459 
1460         ps_pred_mv[u1_reflist].i1_ref_idx = i1_cur_ref_idx;
1461     }
1462 }
1463 
1464 /**
1465 *******************************************************************************
1466 *
1467 * @brief This function approximates Pred. MV
1468 *
1469 * @par Description:
1470 *
1471 * @param[in] ps_proc
1472 *  Process context corresponding to the job
1473 *
1474 * @returns  none
1475 *
1476 * @remarks none
1477 *  Motion estimation happens at nmb level. For cost calculations, mv is appro
1478 *  ximated using this function
1479 *
1480 *******************************************************************************
1481 */
isvce_mv_pred_me(isvce_process_ctxt_t * ps_proc,WORD32 i4_ref_list)1482 void isvce_mv_pred_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_ref_list)
1483 {
1484     isvce_enc_pu_mv_t as_pu_mv[3];
1485 
1486     WORD32 i, a, b, c;
1487 
1488     isvce_enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
1489     isvce_enc_pu_mv_t s_default_mv_info = {{0, 0}, -1};
1490     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1491     isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
1492     isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1;
1493     isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1;
1494     isvce_mb_info_t *ps_left_mb = ps_proc->s_nbr_info.ps_left_mb_info;
1495 
1496     WORD8 i1_cur_ref_idx = 0;
1497     WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? L1 : L0;
1498     WORD32 pred_algo = 3;
1499 
1500     for(i = 0; i < 3; i++)
1501     {
1502         as_pu_mv[i] = s_default_mv_info;
1503     }
1504 
1505     if(ps_ngbr_avbl->u1_mb_a && !ps_left_mb->u1_is_intra &&
1506        (ps_left_mb->as_pu->u1_pred_mode != i4_cmpl_predmode))
1507     {
1508         /* left mv */
1509         as_pu_mv[0].s_mv = ps_left_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1510         as_pu_mv[0].i1_ref_idx = ps_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1511 
1512         /* Only left available */
1513         if(!ps_ngbr_avbl->u1_mb_b && !ps_ngbr_avbl->u1_mb_c && !ps_ngbr_avbl->u1_mb_d)
1514         {
1515             as_pu_mv[1].s_mv = ps_left_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1516             as_pu_mv[1].i1_ref_idx = ps_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1517 
1518             as_pu_mv[2].s_mv = ps_left_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1519             as_pu_mv[2].i1_ref_idx = ps_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1520         }
1521     }
1522     if(ps_ngbr_avbl->u1_mb_b && !ps_top_mb->u1_is_intra &&
1523        (ps_top_mb->as_pu->u1_pred_mode != i4_cmpl_predmode))
1524     {
1525         /* top mv */
1526         as_pu_mv[1].s_mv = ps_top_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1527         as_pu_mv[1].i1_ref_idx = ps_top_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1528     }
1529     if(!ps_ngbr_avbl->u1_mb_c)
1530     {
1531         /* top right mv - When top right partition is not available for
1532          * prediction if top left is available use it for prediction else
1533          * set the mv information to -1 and (0, 0)
1534          * */
1535         if(ps_ngbr_avbl->u1_mb_d && !ps_top_left_mb->u1_is_intra &&
1536            (ps_top_left_mb->as_pu->u1_pred_mode != i4_cmpl_predmode))
1537         {
1538             as_pu_mv[2].s_mv = ps_top_left_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1539             as_pu_mv[2].i1_ref_idx = ps_top_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1540         }
1541     }
1542     else if(ps_top_right_mb->as_pu->u1_pred_mode != i4_cmpl_predmode &&
1543             !ps_top_right_mb->u1_is_intra)
1544     {
1545         as_pu_mv[2].s_mv = ps_top_right_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1546         as_pu_mv[2].i1_ref_idx = ps_top_right_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1547     }
1548 
1549     /* If only one of the candidate blocks has a reference frame equal to
1550      * the current block then use the same block as the final predictor */
1551     a = (as_pu_mv[0].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1552     b = (as_pu_mv[1].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1553     c = (as_pu_mv[2].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1554 
1555     if(a == 0 && b == -1 && c == -1)
1556         pred_algo = 0; /* LEFT */
1557     else if(a == -1 && b == 0 && c == -1)
1558         pred_algo = 1; /* TOP */
1559     else if(a == -1 && b == -1 && c == 0)
1560         pred_algo = 2;
1561 
1562     isvce_get_mv_predictor(&ps_pred_mv[i4_ref_list], &as_pu_mv[0], pred_algo);
1563 }
1564 
1565 /**
1566 *******************************************************************************
1567 *
1568 * @brief This function initializes me ctxt
1569 *
1570 * @par Description:
1571 *  Before dispatching the current job to me thread, the me context associated
1572 *  with the job is initialized.
1573 *
1574 * @param[in] ps_proc
1575 *  Process context corresponding to the job
1576 *
1577 * @returns  none
1578 *
1579 * @remarks none
1580 *
1581 *******************************************************************************
1582 */
isvce_init_me(isvce_process_ctxt_t * ps_proc)1583 void isvce_init_me(isvce_process_ctxt_t *ps_proc)
1584 {
1585     isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
1586     isvce_codec_t *ps_codec = ps_proc->ps_codec;
1587 
1588     ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B;
1589 
1590     if(ps_codec->s_cfg.u4_num_bframes == 0)
1591     {
1592         ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P;
1593     }
1594     else
1595     {
1596         ps_me_ctxt->i4_skip_bias[PSLICE] = SKIP_BIAS_P;
1597     }
1598 
1599     ps_me_ctxt->pu1_src_buf_luma = ps_proc->s_src_buf_props.as_component_bufs[0].pv_data;
1600     ps_me_ctxt->i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
1601 
1602     ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
1603     ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data;
1604 
1605     ps_me_ctxt->ai4_rec_strd[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].i4_data_stride;
1606     ps_me_ctxt->ai4_rec_strd[1] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].i4_data_stride;
1607 
1608     ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp];
1609 }
1610 
1611 /**
1612 *******************************************************************************
1613 *
1614 * @brief This function performs motion estimation for the current mb using
1615 *   single reference list
1616 *
1617 * @par Description:
1618 *  The current mb is compared with a list of mb's in the reference frame for
1619 *  least cost. The mb that offers least cost is chosen as predicted mb and the
1620 *  displacement of the predicted mb from index location of the current mb is
1621 *  signaled as mv. The list of the mb's that are chosen in the reference frame
1622 *  are dependent on the speed of the ME configured.
1623 *
1624 * @param[in] ps_proc
1625 *  Process context corresponding to the job
1626 *
1627 * @returns  motion vector of the pred mb, sad, cost.
1628 *
1629 * @remarks none
1630 *
1631 *******************************************************************************
1632 */
isvce_compute_me_single_reflist(isvce_process_ctxt_t * ps_proc)1633 void isvce_compute_me_single_reflist(isvce_process_ctxt_t *ps_proc)
1634 {
1635     mb_part_ctxt s_skip_mbpart;
1636 
1637     /* source buffer for halp pel generation functions */
1638     UWORD8 *pu1_hpel_src;
1639 
1640     isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
1641     isvce_codec_t *ps_codec = ps_proc->ps_codec;
1642     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1643     isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
1644     inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
1645 
1646     ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
1647 
1648     ASSERT(1 == MAX_REF_FRAMES_PER_PRED_DIR);
1649 
1650     {
1651         WORD32 rows_above, rows_below, columns_left, columns_right;
1652 
1653         /* During evaluation for motion vectors do not search through padded regions
1654          */
1655         /* Obtain number of rows and columns that are effective for computing for me
1656          * evaluation */
1657         rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
1658         rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
1659         columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
1660         columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
1661 
1662         /* init srch range */
1663         /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X /
1664          * 2 on all sides.
1665          */
1666         ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1667         ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1668         ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1669         ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1670 
1671         /* this is to facilitate fast sub pel computation with minimal loads */
1672         ps_me_ctxt->i4_srch_range_w += 1;
1673         ps_me_ctxt->i4_srch_range_e -= 1;
1674         ps_me_ctxt->i4_srch_range_n += 1;
1675         ps_me_ctxt->i4_srch_range_s -= 1;
1676     }
1677 
1678     /***********************************************************************
1679      * Compute ME for list L0
1680      ***********************************************************************/
1681 
1682     /* Init SATQD for the current list */
1683     ps_me_ctxt->u4_min_sad_reached = 0;
1684     ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1685 
1686     /* Get the seed motion vector candidates                    */
1687     isvce_get_search_candidates(ps_proc, ps_me_ctxt, L0);
1688 
1689     /* ****************************************************************
1690      *Evaluate the SKIP for current list
1691      * ****************************************************************/
1692     s_skip_mbpart.s_mv_curr.i2_mvx = 0;
1693     s_skip_mbpart.s_mv_curr.i2_mvy = 0;
1694     s_skip_mbpart.i4_mb_cost = INT_MAX;
1695     s_skip_mbpart.i4_mb_distortion = INT_MAX;
1696 
1697     isvce_compute_skip_cost(ps_me_ctxt, (ime_mv_t *) (&ps_proc->ps_skip_mv[L0].s_mv),
1698                             &s_skip_mbpart, ps_codec->s_cfg.u4_enable_satqd, PRED_L0,
1699                             0 /* Not a Bslice */);
1700 
1701     s_skip_mbpart.s_mv_curr.i2_mvx <<= 2;
1702     s_skip_mbpart.s_mv_curr.i2_mvy <<= 2;
1703 
1704     /******************************************************************
1705      * Evaluate ME For current list
1706      *****************************************************************/
1707     ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx = 0;
1708     ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy = 0;
1709     ps_me_ctxt->as_mb_part[L0].i4_mb_cost = INT_MAX;
1710     ps_me_ctxt->as_mb_part[L0].i4_mb_distortion = INT_MAX;
1711 
1712     /* Init Hpel */
1713     ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf = NULL;
1714 
1715     /* In case we found out the minimum SAD, exit the ME eval */
1716     if(!ps_me_ctxt->u4_min_sad_reached)
1717     {
1718         /* Evaluate search candidates for initial mv pt */
1719         isvce_evaluate_init_srchposn_16x16(ps_me_ctxt, L0);
1720 
1721         /********************************************************************/
1722         /*                  full pel motion estimation                      */
1723         /********************************************************************/
1724         isvce_full_pel_motion_estimation_16x16(ps_me_ctxt, L0);
1725 
1726         /* Scale the MV to qpel resolution */
1727         ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx <<= 2;
1728         ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy <<= 2;
1729 
1730         if(ps_me_ctxt->u4_enable_hpel)
1731         {
1732             /* moving src pointer to the converged motion vector location*/
1733             pu1_hpel_src =
1734                 ps_me_ctxt->apu1_ref_buf_luma[L0] +
1735                 (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx >> 2) +
1736                 (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy >> 2) * ps_me_ctxt->ai4_rec_strd[L0];
1737 
1738             ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
1739             ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
1740             ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
1741 
1742             ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1743 
1744             /* half  pel search is done for both sides of full pel,
1745              * hence half_x of width x height = 17x16 is created
1746              * starting from left half_x of converged full pel */
1747             pu1_hpel_src -= 1;
1748 
1749             /* computing half_x */
1750             ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[0],
1751                                                   ps_me_ctxt->ai4_rec_strd[L0],
1752                                                   ps_me_ctxt->u4_subpel_buf_strd);
1753 
1754             /*
1755              * Halfpel search is done for both sides of full pel,
1756              * hence half_y of width x height = 16x17 is created
1757              * starting from top half_y of converged full pel
1758              * for half_xy top_left is required
1759              * hence it starts from pu1_hpel_src = full_pel_converged_point -
1760              * i4_rec_strd - 1
1761              */
1762             pu1_hpel_src -= ps_me_ctxt->ai4_rec_strd[L0];
1763 
1764             /* computing half_y , and half_xy*/
1765             ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1766                 pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1], ps_me_ctxt->apu1_subpel_buffs[2],
1767                 ps_me_ctxt->ai4_rec_strd[L0], ps_me_ctxt->u4_subpel_buf_strd,
1768                 ps_proc->ai16_pred1 + 3, ps_me_ctxt->u4_subpel_buf_strd);
1769 
1770             isvce_sub_pel_motion_estimation_16x16(ps_me_ctxt, L0);
1771         }
1772     }
1773 
1774     /***********************************************************************
1775      * If a particular skiip Mv is giving better sad, copy to the corresponding
1776      * MBPART
1777      * In B slices this loop should go only to PREDL1: If we found min sad
1778      * we will go to the skip ref list only
1779      * Have to find a way to make it without too much change or new vars
1780      **********************************************************************/
1781     if(s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[L0].i4_mb_cost)
1782     {
1783         ps_me_ctxt->as_mb_part[L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost;
1784         ps_me_ctxt->as_mb_part[L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion;
1785         ps_me_ctxt->as_mb_part[L0].s_mv_curr = s_skip_mbpart.s_mv_curr;
1786     }
1787     else if(ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf)
1788     {
1789         /* Now we have to copy the buffers */
1790         ps_inter_pred_fxns->pf_inter_pred_luma_copy(
1791             ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf, ps_proc->pu1_best_subpel_buf,
1792             ps_me_ctxt->u4_subpel_buf_strd, ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, NULL,
1793             0);
1794     }
1795 
1796     /**********************************************************************
1797      * Now get the minimum of MB part sads by searching over all ref lists
1798      **********************************************************************/
1799     ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx =
1800         ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx;
1801     ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy =
1802         ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy;
1803     ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[L0].i4_mb_cost;
1804     ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[L0].i4_mb_distortion;
1805     ps_proc->ps_cur_mb->u4_mb_type = P16x16;
1806     ps_proc->ps_mb_info->as_pu->u1_pred_mode = L0;
1807 
1808     /* Mark the reflists */
1809     ps_proc->ps_mb_info->as_pu->as_me_info[0].i1_ref_idx = 0;
1810     ps_proc->ps_mb_info->as_pu->as_me_info[1].i1_ref_idx = -1;
1811 
1812     /* number of partitions */
1813     ps_proc->u4_num_sub_partitions = 1;
1814     *(ps_proc->pu4_mb_pu_cnt) = 1;
1815 
1816     /* position in-terms of PU */
1817     ps_proc->ps_mb_info->as_pu->u1_pos_x_in_4x4 = 0;
1818     ps_proc->ps_mb_info->as_pu->u1_pos_y_in_4x4 = 0;
1819 
1820     /* PU size */
1821     ps_proc->ps_mb_info->as_pu->u1_wd_in_4x4_m1 = 3;
1822     ps_proc->ps_mb_info->as_pu->u1_ht_in_4x4_m1 = 3;
1823 
1824     /* Update min sad conditions */
1825     if(ps_me_ctxt->u4_min_sad_reached == 1)
1826     {
1827         ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
1828         ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
1829     }
1830 }
1831 
1832 /**
1833 *******************************************************************************
1834 *
1835 * @brief This function performs motion estimation for the current NMB
1836 *
1837 * @par Description:
1838 * Intializes input and output pointers required by the function
1839 *isvce_compute_me and calls the function isvce_compute_me in a loop to process
1840 *NMBs.
1841 *
1842 * @param[in] ps_proc
1843 *  Process context corresponding to the job
1844 *
1845 * @returns
1846 *
1847 * @remarks none
1848 *
1849 *******************************************************************************
1850 */
isvce_compute_me_nmb(isvce_process_ctxt_t * ps_proc,UWORD32 u4_nmb_count)1851 void isvce_compute_me_nmb(isvce_process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
1852 {
1853     UWORD32 u4_i;
1854 
1855     isvce_codec_t *ps_codec = ps_proc->ps_codec;
1856     isvce_mb_info_t *ps_mb_begin = ps_proc->ps_mb_info;
1857 
1858     UWORD32 *pu4_mb_pu_cnt_begin = ps_proc->pu4_mb_pu_cnt;
1859     UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
1860 
1861     /* Spatial dependencies for skip are not met if nmb > 1 */
1862     ASSERT(1 == u4_nmb_count);
1863 
1864     if(ps_proc->i4_mb_x)
1865     {
1866         ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra;
1867         ps_proc->s_me_ctxt.u4_left_is_skip =
1868             (ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == PSKIP);
1869     }
1870 
1871     for(u4_i = 0; u4_i < u4_nmb_count; u4_i++)
1872     {
1873         /* Wait for ME map */
1874         if(ps_proc->i4_mb_y > 0)
1875         {
1876             /* Wait for top right ME to be done */
1877             UWORD8 *pu1_me_map_tp_rw =
1878                 ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
1879 
1880             while(1)
1881             {
1882                 volatile UWORD8 *pu1_buf;
1883                 WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
1884 
1885                 idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
1886                 pu1_buf = pu1_me_map_tp_rw + idx;
1887                 if(*pu1_buf) break;
1888                 ithread_yield();
1889             }
1890         }
1891 
1892         ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]);
1893         ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
1894         ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]);
1895         ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
1896 
1897         ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
1898         ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1899 
1900         ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
1901         ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
1902 
1903         /* Set the best subpel buf to the correct mb so that the buffer can be
1904          * copied */
1905         ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
1906         ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
1907 
1908         /* Set the min sad conditions */
1909         ps_proc->ps_cur_mb->u4_min_sad = ps_codec->u4_min_sad;
1910         ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1911 
1912         isvce_derive_nghbr_avbl_of_mbs(ps_proc);
1913 
1914         isvce_init_me(ps_proc);
1915 
1916         /* Compute ME according to slice type */
1917         ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc);
1918 
1919         /* update top and left structs */
1920         if(u4_nmb_count > 1)
1921         {
1922             isvce_mb_info_t *ps_left_syn = ps_proc->s_nbr_info.ps_left_mb_info;
1923 
1924             ps_left_syn[0] = ps_proc->ps_mb_info[0];
1925             ps_left_syn[0].u1_is_intra = 0;
1926             ps_left_syn[0].u2_mb_type = ps_proc->ps_cur_mb->u4_mb_type;
1927         }
1928 
1929         /* Copy the min sad reached info */
1930         ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
1931         ps_proc->ps_nmb_info[u4_i].u4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1932 
1933         /*
1934          * To make sure that the MV map is properly sync to the
1935          * cache we need to do a DDB
1936          */
1937         {
1938             DATA_SYNC();
1939 
1940             pu1_me_map[ps_proc->i4_mb_x] = 1;
1941         }
1942         ps_proc->i4_mb_x++;
1943 
1944         ps_proc->s_me_ctxt.u4_left_is_intra = 0;
1945         ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type == PSKIP);
1946 
1947         /* update buffers pointers */
1948         ps_proc->s_src_buf_props.as_component_bufs[0].pv_data =
1949             ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) + MB_SIZE;
1950         ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data =
1951             ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + MB_SIZE;
1952         ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data =
1953             ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data) + MB_SIZE;
1954         ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data =
1955             ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data) + MB_SIZE;
1956 
1957         /*
1958          * Note: Although chroma mb size is 8, as the chroma buffers are
1959          * interleaved, the stride per MB is MB_SIZE
1960          */
1961         ps_proc->s_src_buf_props.as_component_bufs[1].pv_data =
1962             ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data) + MB_SIZE;
1963         ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data =
1964             ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data) + MB_SIZE;
1965         ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data =
1966             ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data) + MB_SIZE;
1967         ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data =
1968             ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data) + MB_SIZE;
1969 
1970         ps_proc->pu4_mb_pu_cnt++;
1971         ps_proc->ps_mb_info++;
1972     }
1973 
1974     ps_proc->ps_mb_info = ps_mb_begin;
1975     ps_proc->pu4_mb_pu_cnt = pu4_mb_pu_cnt_begin;
1976     ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
1977 
1978     /* update buffers pointers */
1979     ps_proc->s_src_buf_props.as_component_bufs[0].pv_data =
1980         ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) - MB_SIZE * u4_nmb_count;
1981     ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data =
1982         ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) - MB_SIZE * u4_nmb_count;
1983     ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data =
1984         ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data) -
1985         MB_SIZE * u4_nmb_count;
1986     ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data =
1987         ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data) -
1988         MB_SIZE * u4_nmb_count;
1989 
1990     /*
1991      * Note: Although chroma mb size is 8, as the chroma buffers are
1992      * interleaved, the stride per MB is MB_SIZE
1993      */
1994     ps_proc->s_src_buf_props.as_component_bufs[1].pv_data =
1995         ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data) - MB_SIZE * u4_nmb_count;
1996     ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data =
1997         ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data) - MB_SIZE * u4_nmb_count;
1998     ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data =
1999         ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data) -
2000         MB_SIZE * u4_nmb_count;
2001     ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data =
2002         ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data) -
2003         MB_SIZE * u4_nmb_count;
2004 }
2005 
2006 /**
2007 *******************************************************************************
2008 *
2009 * @brief The function computes parameters for a BSKIP MB
2010 *
2011 * @par Description:
2012 *  The function updates the skip motion vector for B Mb, check if the Mb can be
2013 *  marked as skip and returns it
2014 *
2015 * @param[in] ps_proc
2016 *  Pointer to process context
2017 *
2018 * @param[in] u4_for_me
2019 *  Dummy
2020 *
2021 * @param[in] i4_reflist
2022 *  Dummy
2023 *
2024 * @returns Flag indicating if the current Mb can be skip or not
2025 *
2026 * @remarks
2027 *   The code implements the logic as described in sec 8.4.1.2.2
2028 *   It also computes co-located MB parmas according to sec 8.4.1.2.1
2029 *
2030 *   Need to add condition for this fucntion to be used in ME
2031 *
2032 *******************************************************************************/
isvce_find_bskip_params_me(isvce_process_ctxt_t * ps_proc,WORD32 i4_reflist)2033 WORD32 isvce_find_bskip_params_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist)
2034 {
2035     /* Colzero for co-located MB */
2036     WORD32 i4_colzeroflag;
2037 
2038     /* motion vectors for neighbouring MBs */
2039     isvce_enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
2040 
2041     /* Variables to check if a particular mB is available */
2042     WORD32 i4_a, i4_b, i4_c, i4_c_avail;
2043 
2044     /* Mode availability, init to no modes available     */
2045     WORD32 i4_mode_avail;
2046 
2047     /*  mb neighbor availability */
2048     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
2049 
2050     /* Temp var */
2051     WORD32 i, i4_cmpl_mode, i4_skip_type = -1;
2052 
2053     /*
2054      * Colocated motion vector
2055      */
2056     mv_t s_mvcol;
2057 
2058     /*
2059      * Colocated picture idx
2060      */
2061     WORD32 i4_refidxcol;
2062 
2063     isvce_codec_t *ps_codec = ps_proc->ps_codec;
2064 
2065     UNUSED(i4_reflist);
2066 
2067     /**************************************************************************
2068      *Find co-located MB parameters
2069      *      See sec 8.4.1.2.1  for reference
2070      **************************************************************************/
2071     {
2072         /*
2073          * Find the co-located Mb and update the skip and pred appropriately
2074          * 1) Default colpic is forward ref : Table 8-6
2075          * 2) Default mb col is current MB : Table 8-8
2076          */
2077 
2078         if(ps_proc->ps_col_mb->u1_is_intra)
2079         {
2080             s_mvcol.i2_mvx = 0;
2081             s_mvcol.i2_mvy = 0;
2082             i4_refidxcol = -1;
2083         }
2084         else
2085         {
2086             if(ps_proc->ps_col_mb->as_pu->u1_pred_mode != L1)
2087             {
2088                 s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L0].s_mv;
2089                 i4_refidxcol = 0;
2090             }
2091             else
2092             {
2093                 s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L1].s_mv;
2094                 i4_refidxcol = 0;
2095             }
2096         }
2097 
2098         /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as
2099          * default */
2100         i4_colzeroflag =
2101             (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1) && (ABS(s_mvcol.i2_mvy) <= 1));
2102     }
2103 
2104     /***************************************************************************
2105      * Evaluating skip params : Spatial Skip
2106      **************************************************************************/
2107     {
2108         /* Get the neighbouring MBS according to Section 8.4.1.2.2 */
2109         ps_a_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu;
2110         ps_b_pu = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x].as_pu;
2111 
2112         i4_c_avail = 0;
2113         if(ps_ngbr_avbl->u1_mb_c)
2114         {
2115             ps_c_pu = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x + 1].as_pu;
2116             i4_c_avail = 1;
2117         }
2118         else
2119         {
2120             ps_c_pu = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x - 1].as_pu;
2121             i4_c_avail = ps_ngbr_avbl->u1_mb_d;
2122         }
2123 
2124         i4_a = ps_ngbr_avbl->u1_mb_a;
2125         i4_b = ps_ngbr_avbl->u1_mb_b;
2126         i4_c = i4_c_avail;
2127 
2128         /* Init to no mode avail */
2129         i4_mode_avail = 0;
2130         for(i = 0; i < 2; i++)
2131         {
2132             i4_cmpl_mode = (i == 0) ? L1 : L0;
2133 
2134             i4_mode_avail |= (i4_a && (ps_a_pu->u1_pred_mode != i4_cmpl_mode) &&
2135                               (ps_a_pu->as_me_info[i].i1_ref_idx == 0))
2136                              << i;
2137             i4_mode_avail |= (i4_b && (ps_b_pu->u1_pred_mode != i4_cmpl_mode) &&
2138                               (ps_b_pu->as_me_info[i].i1_ref_idx == 0))
2139                              << i;
2140             i4_mode_avail |= (i4_c && (ps_c_pu->u1_pred_mode != i4_cmpl_mode) &&
2141                               (ps_c_pu->as_me_info[i].i1_ref_idx == 0))
2142                              << i;
2143         }
2144 
2145         if(i4_mode_avail == 0x3 || i4_mode_avail == 0x0)
2146         {
2147             i4_skip_type = BI;
2148         }
2149         else if(i4_mode_avail == 0x1)
2150         {
2151             i4_skip_type = L0;
2152         }
2153         else if(i4_mode_avail == 0x2)
2154         {
2155             i4_skip_type = L1;
2156         }
2157 
2158         /* Update skip MV for L0 */
2159         if((i4_mode_avail & 0x1) && (!i4_colzeroflag))
2160         {
2161             ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
2162             ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
2163         }
2164         else
2165         {
2166             ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
2167             ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
2168         }
2169 
2170         /* Update skip MV for L1 */
2171         if((i4_mode_avail & 0x2) && (!i4_colzeroflag))
2172         {
2173             ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
2174             ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
2175         }
2176         else
2177         {
2178             ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
2179             ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
2180         }
2181     }
2182 
2183     /***************************************************************************
2184      * Evaluating skip params : Temporal skip
2185      **************************************************************************/
2186     {
2187         svc_au_buf_t *ps_ref_pic[MAX_REF_PIC_CNT];
2188         WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor;
2189         isvce_enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2];
2190 
2191         ps_ref_pic[L0] = ps_proc->aps_ref_pic[L0];
2192         ps_ref_pic[L1] = ps_proc->aps_ref_pic[L1];
2193 
2194         i4_tb = ps_codec->i4_poc - ps_ref_pic[L0]->i4_abs_poc;
2195         i4_td = ps_ref_pic[L1]->i4_abs_poc - ps_ref_pic[L0]->i4_abs_poc;
2196 
2197         i4_tb = CLIP3(-128, 127, i4_tb);
2198         i4_td = CLIP3(-128, 127, i4_td);
2199 
2200         i4_tx = (16384 + ABS(i4_td / 2)) / i4_td;
2201         i4_dist_scale_factor = CLIP3(-1024, 1023, (i4_tb * i4_tx + 32) >> 6);
2202 
2203         /* Motion vectors taken in full pel resolution , hence  -> (& 0xfffc)
2204          * operation */
2205         ps_skip_mv[L0].s_mv.i2_mvx = ((i4_dist_scale_factor * s_mvcol.i2_mvx + 128) >> 8) & 0xfffc;
2206         ps_skip_mv[L0].s_mv.i2_mvy = ((i4_dist_scale_factor * s_mvcol.i2_mvy + 128) >> 8) & 0xfffc;
2207 
2208         ps_skip_mv[L1].s_mv.i2_mvx = (ps_skip_mv[L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc;
2209         ps_skip_mv[L1].s_mv.i2_mvy = (ps_skip_mv[L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc;
2210     }
2211 
2212     return i4_skip_type;
2213 }
2214 
2215 /**
2216 *******************************************************************************
2217 *
2218 * @brief The function computes the skip motion vectoe for B mb
2219 *
2220 * @par Description:
2221 *  The function gives the skip motion vector for B Mb, check if the Mb can be
2222 *  marked as skip
2223 *
2224 * @param[in] ps_proc
2225 *  Pointer to process context
2226 *
2227 * @param[in] u4_for_me
2228 *  Dummy
2229 *
2230 * @param[in] u4_for_me
2231 *  Dummy
2232 *
2233 * @returns Flag indicating if the current Mb can be skip or not
2234 *
2235 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
2236 *   specification. It also computes co-located MB parmas according to
2237 *sec 8.4.1.2.1
2238 *
2239 *******************************************************************************/
isvce_find_bskip_params(isvce_process_ctxt_t * ps_proc,WORD32 i4_reflist)2240 WORD32 isvce_find_bskip_params(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist)
2241 {
2242     WORD32 i4_colzeroflag;
2243 
2244     /* motion vectors */
2245     isvce_enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
2246 
2247     /* Syntax elem */
2248     isvce_mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn;
2249 
2250     /* Variables to check if a particular mB is available */
2251     WORD32 i4_a, i4_b, i4_c, i4_c_avail;
2252 
2253     /* Mode availability, init to no modes available     */
2254     WORD32 i4_mode_avail;
2255 
2256     /*  mb neighbor availability */
2257     block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
2258 
2259     /* Temp var */
2260     WORD32 i, i4_cmpl_mode;
2261 
2262     UNUSED(i4_reflist);
2263 
2264     /**************************************************************************
2265      *Find co-locates parameters
2266      *      See sec 8.4.1.2.1  for reference
2267      **************************************************************************/
2268     {
2269         /*
2270          * Find the co-located Mb and update the skip and pred appropriately
2271          * 1) Default colpic is forward ref : Table 8-6
2272          * 2) Default mb col is current MB : Table 8-8
2273          */
2274 
2275         mv_t s_mvcol;
2276         WORD32 i4_refidxcol;
2277 
2278         if(ps_proc->ps_col_mb->u1_is_intra)
2279         {
2280             s_mvcol.i2_mvx = 0;
2281             s_mvcol.i2_mvy = 0;
2282             i4_refidxcol = -1;
2283         }
2284         else
2285         {
2286             if(ps_proc->ps_col_mb->as_pu->u1_pred_mode != L1)
2287             {
2288                 s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L0].s_mv;
2289                 i4_refidxcol = 0;
2290             }
2291             else
2292             {
2293                 s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L1].s_mv;
2294                 i4_refidxcol = 0;
2295             }
2296         }
2297 
2298         /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as
2299          * default */
2300         i4_colzeroflag =
2301             (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1) && (ABS(s_mvcol.i2_mvy) <= 1));
2302     }
2303 
2304     /***************************************************************************
2305      * Evaluating skip params
2306      **************************************************************************/
2307     /* Section 8.4.1.2.2 */
2308     ps_a_syn = ps_proc->s_nbr_info.ps_left_mb_info;
2309     ps_a_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu;
2310 
2311     ps_b_syn = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
2312     ps_b_pu = ps_b_syn->as_pu;
2313 
2314     i4_c_avail = 0;
2315     if(ps_ngbr_avbl->u1_mb_c)
2316     {
2317         ps_c_syn = ps_b_syn + 1;
2318         ps_c_pu = ps_c_syn->as_pu;
2319         i4_c_avail = 1;
2320     }
2321     else
2322     {
2323         ps_c_syn = ps_b_syn - 1;
2324         ps_c_pu = ps_c_syn->as_pu;
2325         i4_c_avail = ps_ngbr_avbl->u1_mb_d;
2326     }
2327 
2328     i4_a = ps_ngbr_avbl->u1_mb_a;
2329     i4_a &= !ps_a_syn->u1_is_intra;
2330 
2331     i4_b = ps_ngbr_avbl->u1_mb_b;
2332     i4_b &= !ps_b_syn->u1_is_intra;
2333 
2334     i4_c = i4_c_avail;
2335     i4_c &= !ps_c_syn->u1_is_intra;
2336 
2337     /* Init to no mode avail */
2338     i4_mode_avail = 0;
2339     for(i = 0; i < 2; i++)
2340     {
2341         i4_cmpl_mode = (i == 0) ? L1 : L0;
2342 
2343         i4_mode_avail |= (i4_a && (ps_a_pu->u1_pred_mode != i4_cmpl_mode) &&
2344                           (ps_a_pu->as_me_info[i].i1_ref_idx == 0))
2345                          << i;
2346         i4_mode_avail |= (i4_b && (ps_b_pu->u1_pred_mode != i4_cmpl_mode) &&
2347                           (ps_b_pu->as_me_info[i].i1_ref_idx == 0))
2348                          << i;
2349         i4_mode_avail |= (i4_c && (ps_c_pu->u1_pred_mode != i4_cmpl_mode) &&
2350                           (ps_c_pu->as_me_info[i].i1_ref_idx == 0))
2351                          << i;
2352     }
2353 
2354     /* Update skip MV for L0 */
2355     if((i4_mode_avail & 0x1) && (!i4_colzeroflag))
2356     {
2357         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
2358         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
2359     }
2360     else
2361     {
2362         ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
2363         ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
2364     }
2365 
2366     /* Update skip MV for L1 */
2367     if((i4_mode_avail & 0x2) && (!i4_colzeroflag))
2368     {
2369         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
2370         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
2371     }
2372     else
2373     {
2374         ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
2375         ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
2376     }
2377 
2378     /* Now see if the ME information matches the SKIP information */
2379     switch(ps_proc->ps_mb_info->as_pu->u1_pred_mode)
2380     {
2381         case PRED_BI:
2382             if((ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvx ==
2383                 ps_proc->ps_skip_mv[0].s_mv.i2_mvx) &&
2384                (ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvy ==
2385                 ps_proc->ps_skip_mv[0].s_mv.i2_mvy) &&
2386                (ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvx ==
2387                 ps_proc->ps_skip_mv[1].s_mv.i2_mvx) &&
2388                (ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvy ==
2389                 ps_proc->ps_skip_mv[1].s_mv.i2_mvy) &&
2390                (i4_mode_avail == 0x3 || i4_mode_avail == 0x0))
2391             {
2392                 return 1;
2393             }
2394             break;
2395 
2396         case PRED_L0:
2397             if((ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvx ==
2398                 ps_proc->ps_skip_mv[0].s_mv.i2_mvx) &&
2399                (ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvy ==
2400                 ps_proc->ps_skip_mv[0].s_mv.i2_mvy) &&
2401                (i4_mode_avail == 0x1))
2402             {
2403                 return 1;
2404             }
2405             break;
2406 
2407         case PRED_L1:
2408             if((ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvx ==
2409                 ps_proc->ps_skip_mv[1].s_mv.i2_mvx) &&
2410                (ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvy ==
2411                 ps_proc->ps_skip_mv[1].s_mv.i2_mvy) &&
2412                (i4_mode_avail == 0x2))
2413             {
2414                 return 1;
2415             }
2416             break;
2417     }
2418 
2419     return 0;
2420 }
2421 
2422 /**
2423 *******************************************************************************
2424 *
2425 * @brief This function computes the best motion vector among the tentative mv
2426 * candidates chosen.
2427 *
2428 * @par Description:
2429 *  This function determines the position in the search window at which the
2430 *motion estimation should begin in order to minimise the number of search
2431 *iterations.
2432 *
2433 * @param[in] ps_mb_part
2434 *  pointer to current mb partition ctxt with respect to ME
2435 *
2436 * @param[in] u4_lambda_motion
2437 *  lambda motion
2438 *
2439 * @param[in] u4_fast_flag
2440 *  enable/disable fast sad computation
2441 *
2442 * @returns  mv pair & corresponding distortion and cost
2443 *
2444 * @remarks Currently onyl 4 search candiates are supported
2445 *
2446 *******************************************************************************
2447 */
isvce_evaluate_bipred(isvce_me_ctxt_t * ps_me_ctxt,isvce_process_ctxt_t * ps_proc,mb_part_ctxt * ps_mb_ctxt_bi)2448 void isvce_evaluate_bipred(isvce_me_ctxt_t *ps_me_ctxt, isvce_process_ctxt_t *ps_proc,
2449                            mb_part_ctxt *ps_mb_ctxt_bi)
2450 {
2451     UWORD32 i, u4_fast_sad;
2452 
2453     WORD32 i4_dest_buff;
2454 
2455     mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv;
2456 
2457     UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1;
2458 
2459     UWORD8 *pu1_dst_buf;
2460 
2461     WORD32 i4_ref_l0_stride, i4_ref_l1_stride;
2462 
2463     WORD32 i4_mb_distortion, i4_mb_cost;
2464 
2465     isvce_codec_t *ps_codec = ps_proc->ps_codec;
2466     isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
2467     inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
2468 
2469     u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
2470 
2471     i4_dest_buff = 0;
2472     for(i = 0; i < ps_me_ctxt->u4_num_candidates[BI]; i += 2)
2473     {
2474         pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff];
2475 
2476         s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[BI][i].i2_mvx >> 2;
2477         s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[BI][i].i2_mvy >> 2;
2478         s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvx >> 2;
2479         s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvy >> 2;
2480 
2481         ps_l0_pred_mv = &ps_proc->ps_pred_mv[L0].s_mv;
2482         ps_l1_pred_mv = &ps_proc->ps_pred_mv[L1].s_mv;
2483 
2484         if((ps_me_ctxt->as_mv_init_search[BI][i].i2_mvx & 0x3) ||
2485            (ps_me_ctxt->as_mv_init_search[BI][i].i2_mvy & 0x3))
2486         {
2487             pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf;
2488             i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd;
2489         }
2490         else
2491         {
2492             pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[L0] + (s_l0_mv.i2_mvx) +
2493                             ((s_l0_mv.i2_mvy) * ps_me_ctxt->ai4_rec_strd[L0]);
2494             i4_ref_l0_stride = ps_me_ctxt->ai4_rec_strd[L0];
2495         }
2496 
2497         if((ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvx & 0x3) ||
2498            (ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvy & 0x3))
2499         {
2500             pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[L1].pu1_best_hpel_buf;
2501             i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd;
2502         }
2503         else
2504         {
2505             pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[L1] + (s_l1_mv.i2_mvx) +
2506                             ((s_l1_mv.i2_mvy) * ps_me_ctxt->ai4_rec_strd[L1]);
2507             i4_ref_l1_stride = ps_me_ctxt->ai4_rec_strd[L1];
2508         }
2509 
2510         ps_inter_pred_fxns->pf_inter_pred_luma_bilinear(
2511             pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf, i4_ref_l0_stride, i4_ref_l1_stride,
2512             ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE);
2513 
2514         ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad](
2515             ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf, ps_me_ctxt->i4_src_strd,
2516             ps_me_ctxt->u4_subpel_buf_strd, INT_MAX, &i4_mb_distortion);
2517 
2518         /* compute cost */
2519         i4_mb_cost =
2520             ps_me_ctxt
2521                 ->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i].i2_mvx - ps_l0_pred_mv->i2_mvx];
2522         i4_mb_cost +=
2523             ps_me_ctxt
2524                 ->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i].i2_mvy - ps_l0_pred_mv->i2_mvy];
2525         i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvx -
2526                                               ps_l1_pred_mv->i2_mvx];
2527         i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvy -
2528                                               ps_l1_pred_mv->i2_mvy];
2529 
2530         i4_mb_cost -=
2531             (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == BI) * (i == 0);
2532 
2533         i4_mb_cost *= ps_me_ctxt->u4_lambda_motion;
2534         i4_mb_cost += i4_mb_distortion;
2535 
2536         if(i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost)
2537         {
2538             ps_mb_ctxt_bi->i4_srch_pos_idx = (i >> 1);
2539             ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost;
2540             ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion;
2541             ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf;
2542             i4_dest_buff = (i4_dest_buff + 1) % 2;
2543         }
2544     }
2545 }
2546 
2547 /**
2548 *******************************************************************************
2549 *
2550 * @brief This function performs motion estimation for the current mb
2551 *
2552 * @par Description:
2553 *  The current mb is compared with a list of mb's in the reference frame for
2554 *  least cost. The mb that offers least cost is chosen as predicted mb and the
2555 *  displacement of the predicted mb from index location of the current mb is
2556 *  signaled as mv. The list of the mb's that are chosen in the reference frame
2557 *  are dependent on the speed of the ME configured.
2558 *
2559 * @param[in] ps_proc
2560 *  Process context corresponding to the job
2561 *
2562 * @returns  motion vector of the pred mb, sad, cost.
2563 *
2564 * @remarks none
2565 *
2566 *******************************************************************************
2567 */
isvce_compute_me_multi_reflist(isvce_process_ctxt_t * ps_proc)2568 void isvce_compute_me_multi_reflist(isvce_process_ctxt_t *ps_proc)
2569 {
2570     /* me ctxt */
2571     isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
2572 
2573     /* codec context */
2574     isvce_codec_t *ps_codec = ps_proc->ps_codec;
2575     isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
2576     inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
2577 
2578     /* Temp variables for looping over ref lists */
2579     WORD32 i4_reflist, i4_max_reflist;
2580 
2581     /* source buffer for halp pel generation functions */
2582     UWORD8 *pu1_hpel_src;
2583 
2584     /* quantization parameters */
2585     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
2586 
2587     /* Mb part ctxts for SKIP */
2588     mb_part_ctxt as_skip_mbpart[2];
2589 
2590     ASSERT(1 == MAX_REF_FRAMES_PER_PRED_DIR);
2591 
2592     /* Sad therholds */
2593     ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
2594 
2595     {
2596         WORD32 rows_above, rows_below, columns_left, columns_right;
2597 
2598         /* During evaluation for motion vectors do not search through padded regions
2599          */
2600         /* Obtain number of rows and columns that are effective for computing for me
2601          * evaluation */
2602         rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
2603         rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
2604         columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
2605         columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
2606 
2607         /* init srch range */
2608         /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X /
2609          * 2 on all sides.
2610          */
2611         ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
2612         ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
2613         ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
2614         ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
2615 
2616         /* this is to facilitate fast sub pel computation with minimal loads */
2617         if(ps_me_ctxt->u4_enable_hpel)
2618         {
2619             ps_me_ctxt->i4_srch_range_w += 1;
2620             ps_me_ctxt->i4_srch_range_e -= 1;
2621             ps_me_ctxt->i4_srch_range_n += 1;
2622             ps_me_ctxt->i4_srch_range_s -= 1;
2623         }
2624     }
2625 
2626     /* Compute ME and store the MVs */
2627     {
2628         /***********************************************************************
2629          * Compute ME for lists L0 and L1
2630          *  For L0 -> L0 skip + L0
2631          *  for L1 -> L0 skip + L0 + L1 skip + L1
2632          ***********************************************************************/
2633         i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? L0 : L1;
2634 
2635         /* Init SATQD for the current list */
2636         ps_me_ctxt->u4_min_sad_reached = 0;
2637         ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
2638 
2639         for(i4_reflist = L0; i4_reflist <= i4_max_reflist; i4_reflist++)
2640         {
2641             /* Get the seed motion vector candidates                    */
2642             isvce_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist);
2643 
2644             /* ****************************************************************
2645              *Evaluate the SKIP for current list
2646              * ****************************************************************/
2647             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0;
2648             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0;
2649             as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX;
2650             as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX;
2651 
2652             if(ps_me_ctxt->i4_skip_type == i4_reflist)
2653             {
2654                 isvce_compute_skip_cost(
2655                     ps_me_ctxt, (ime_mv_t *) (&ps_proc->ps_skip_mv[i4_reflist].s_mv),
2656                     &as_skip_mbpart[i4_reflist], ps_codec->s_cfg.u4_enable_satqd, i4_reflist,
2657                     (ps_proc->i4_slice_type == BSLICE));
2658             }
2659 
2660             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2;
2661             as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2;
2662 
2663             /******************************************************************
2664              * Evaluate ME For current list
2665              *****************************************************************/
2666             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0;
2667             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0;
2668             ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX;
2669             ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX;
2670 
2671             /* Init Hpel */
2672             ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL;
2673 
2674             /* In case we found out the minimum SAD, exit the ME eval */
2675             if(ps_me_ctxt->u4_min_sad_reached)
2676             {
2677                 i4_max_reflist = i4_reflist;
2678                 break;
2679             }
2680 
2681             /* Evaluate search candidates for initial mv pt */
2682             isvce_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist);
2683 
2684             /********************************************************************/
2685             /*                  full pel motion estimation                      */
2686             /********************************************************************/
2687             isvce_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
2688 
2689             DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2),
2690                                    (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2));
2691 
2692             DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1);
2693 
2694             /* Scale the MV to qpel resolution */
2695             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2;
2696             ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2;
2697 
2698             if(ps_me_ctxt->u4_enable_hpel)
2699             {
2700                 /* moving src pointer to the converged motion vector location */
2701                 pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] +
2702                                (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2) +
2703                                ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2) *
2704                                 ps_me_ctxt->ai4_rec_strd[i4_reflist]);
2705 
2706                 ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
2707                 ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
2708                 ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
2709 
2710                 /* Init the search position to an invalid number */
2711                 ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3;
2712 
2713                 /* Incase a buffer is still in use by L0, replace it with spare buff */
2714                 ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[L0].i4_srch_pos_idx] =
2715                     ps_proc->apu1_subpel_buffs[3];
2716 
2717                 ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
2718 
2719                 /* half  pel search is done for both sides of full pel,
2720                  * hence half_x of width x height = 17x16 is created
2721                  * starting from left half_x of converged full pel */
2722                 pu1_hpel_src -= 1;
2723 
2724                 /* computing half_x */
2725                 ps_codec->pf_ih264e_sixtapfilter_horz(
2726                     pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[0],
2727                     ps_me_ctxt->ai4_rec_strd[i4_reflist], ps_me_ctxt->u4_subpel_buf_strd);
2728 
2729                 /*
2730                  * Halfpel search is done for both sides of full pel,
2731                  * hence half_y of width x height = 16x17 is created
2732                  * starting from top half_y of converged full pel
2733                  * for half_xy top_left is required
2734                  * hence it starts from pu1_hpel_src = full_pel_converged_point -
2735                  * i4_rec_strd - 1
2736                  */
2737                 pu1_hpel_src -= ps_me_ctxt->ai4_rec_strd[i4_reflist];
2738 
2739                 /* computing half_y and half_xy */
2740                 ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
2741                     pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
2742                     ps_me_ctxt->apu1_subpel_buffs[2], ps_me_ctxt->ai4_rec_strd[i4_reflist],
2743                     ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
2744                     ps_me_ctxt->u4_subpel_buf_strd);
2745 
2746                 isvce_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
2747             }
2748         }
2749 
2750         /***********************************************************************
2751          * If a particular skiip Mv is giving better sad, copy to the corresponding
2752          * MBPART
2753          * In B slices this loop should go only to PREDL1: If we found min sad
2754          * we will go to the skip ref list only
2755          * Have to find a way to make it without too much change or new vars
2756          **********************************************************************/
2757         for(i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
2758         {
2759             if(as_skip_mbpart[i4_reflist].i4_mb_cost <
2760                ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost)
2761             {
2762                 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost =
2763                     as_skip_mbpart[i4_reflist].i4_mb_cost;
2764                 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion =
2765                     as_skip_mbpart[i4_reflist].i4_mb_distortion;
2766                 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr;
2767             }
2768         }
2769 
2770         /***********************************************************************
2771          * Compute ME for BI
2772          *  In case of BI we do ME for two candidates
2773          *   1) The best L0 and L1 Mvs
2774          *   2) Skip L0 and L1 MVs
2775          *
2776          *   TODO
2777          *   one of the search candidates is skip. Hence it may be duplicated
2778          ***********************************************************************/
2779         if(i4_max_reflist == L1 && ps_me_ctxt->u4_min_sad_reached == 0)
2780         {
2781             WORD32 i, j = 0;
2782             WORD32 l0_srch_pos_idx, l1_srch_pos_idx;
2783             WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx;
2784 
2785             /* Get the free buffers */
2786             l0_srch_pos_idx = ps_me_ctxt->as_mb_part[L0].i4_srch_pos_idx;
2787             l1_srch_pos_idx = ps_me_ctxt->as_mb_part[L1].i4_srch_pos_idx;
2788 
2789             /* Search for the two free buffers in subpel list */
2790             for(i = 0; i < SUBPEL_BUFF_CNT; i++)
2791             {
2792                 if(i != l0_srch_pos_idx && i != l1_srch_pos_idx)
2793                 {
2794                     ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i];
2795                     j++;
2796                 }
2797             }
2798             ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
2799 
2800             /* Copy the statial SKIP MV of each list */
2801             i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[L0] - 2;
2802             i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[L1] - 2;
2803             ps_me_ctxt->as_mv_init_search[BI][0].i2_mvx =
2804                 ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2805             ps_me_ctxt->as_mv_init_search[BI][0].i2_mvy =
2806                 ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2807             ps_me_ctxt->as_mv_init_search[BI][1].i2_mvx =
2808                 ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2809             ps_me_ctxt->as_mv_init_search[BI][1].i2_mvy =
2810                 ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2811 
2812             /* Copy the SKIP MV temporal of each list */
2813             i4_l0_skip_mv_idx++;
2814             i4_l1_skip_mv_idx++;
2815             ps_me_ctxt->as_mv_init_search[BI][2].i2_mvx =
2816                 ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2817             ps_me_ctxt->as_mv_init_search[BI][2].i2_mvy =
2818                 ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2819             ps_me_ctxt->as_mv_init_search[BI][3].i2_mvx =
2820                 ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2821             ps_me_ctxt->as_mv_init_search[BI][3].i2_mvy =
2822                 ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2823 
2824             /* Copy the best MV after ME */
2825             ps_me_ctxt->as_mv_init_search[BI][4] = ps_me_ctxt->as_mb_part[L0].s_mv_curr;
2826             ps_me_ctxt->as_mv_init_search[BI][5] = ps_me_ctxt->as_mb_part[L1].s_mv_curr;
2827 
2828             ps_me_ctxt->u4_num_candidates[BI] = 6;
2829 
2830             ps_me_ctxt->as_mb_part[BI].i4_mb_cost = INT_MAX;
2831             ps_me_ctxt->as_mb_part[BI].i4_mb_distortion = INT_MAX;
2832 
2833             isvce_evaluate_bipred(ps_me_ctxt, ps_proc, &ps_me_ctxt->as_mb_part[BI]);
2834 
2835             i4_max_reflist = BI;
2836         }
2837 
2838         /**********************************************************************
2839          * Now get the minimum of MB part sads by searching over all ref lists
2840          **********************************************************************/
2841         ps_proc->ps_mb_info->as_pu->u1_pred_mode = 0x3;
2842 
2843         for(i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
2844         {
2845             if(ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
2846             {
2847                 ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost;
2848                 ps_proc->ps_cur_mb->i4_mb_distortion =
2849                     ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion;
2850                 ps_proc->ps_cur_mb->u4_mb_type =
2851                     (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16;
2852                 ps_proc->ps_mb_info->as_pu->u1_pred_mode = i4_reflist;
2853             }
2854         }
2855 
2856         /**********************************************************************
2857          * In case we have a BI MB, we have to copy the buffers and set proer MV's
2858          *  1)In case its BI, we need to get the best MVs given by BI and update
2859          *    to their corresponding MB part
2860          *  2)We also need to copy the buffer in which bipred buff is populated
2861          *
2862          *  Not that if we have
2863          **********************************************************************/
2864         if(ps_proc->ps_mb_info->as_pu->u1_pred_mode == BI)
2865         {
2866             WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[BI].i4_srch_pos_idx;
2867             UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[BI].pu1_best_hpel_buf;
2868 
2869             ps_me_ctxt->as_mb_part[L0].s_mv_curr =
2870                 ps_me_ctxt->as_mv_init_search[BI][i4_srch_pos << 1];
2871             ps_me_ctxt->as_mb_part[L1].s_mv_curr =
2872                 ps_me_ctxt->as_mv_init_search[BI][(i4_srch_pos << 1) + 1];
2873 
2874             /* Now we have to copy the buffers */
2875             ps_inter_pred_fxns->pf_inter_pred_luma_copy(
2876                 pu1_bi_buf, ps_proc->pu1_best_subpel_buf, ps_me_ctxt->u4_subpel_buf_strd,
2877                 ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, NULL, 0);
2878         }
2879         else if(ps_me_ctxt->as_mb_part[ps_proc->ps_mb_info->as_pu->u1_pred_mode].pu1_best_hpel_buf)
2880         {
2881             /* Now we have to copy the buffers */
2882             ps_inter_pred_fxns->pf_inter_pred_luma_copy(
2883                 ps_me_ctxt->as_mb_part[ps_proc->ps_mb_info->as_pu->u1_pred_mode].pu1_best_hpel_buf,
2884                 ps_proc->pu1_best_subpel_buf, ps_me_ctxt->u4_subpel_buf_strd,
2885                 ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, NULL, 0);
2886         }
2887     }
2888 
2889     /**************************************************************************
2890      *Now copy the MVs to the current PU with qpel scaling
2891      ***************************************************************************/
2892     ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx =
2893         (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx);
2894     ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy =
2895         (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy);
2896     ps_proc->ps_mb_info->as_pu->as_me_info[L1].s_mv.i2_mvx =
2897         (ps_me_ctxt->as_mb_part[L1].s_mv_curr.i2_mvx);
2898     ps_proc->ps_mb_info->as_pu->as_me_info[L1].s_mv.i2_mvy =
2899         (ps_me_ctxt->as_mb_part[L1].s_mv_curr.i2_mvy);
2900 
2901     ps_proc->ps_mb_info->as_pu->as_me_info[0].i1_ref_idx =
2902         (ps_proc->ps_mb_info->as_pu->u1_pred_mode != L1) ? 0 : -1;
2903     ps_proc->ps_mb_info->as_pu->as_me_info[1].i1_ref_idx =
2904         (ps_proc->ps_mb_info->as_pu->u1_pred_mode != L0) ? 0 : -1;
2905 
2906     /* number of partitions */
2907     ps_proc->u4_num_sub_partitions = 1;
2908     *(ps_proc->pu4_mb_pu_cnt) = 1;
2909 
2910     /* position in-terms of PU */
2911     ps_proc->ps_mb_info->as_pu->u1_pos_x_in_4x4 = 0;
2912     ps_proc->ps_mb_info->as_pu->u1_pos_y_in_4x4 = 0;
2913 
2914     /* PU size */
2915     ps_proc->ps_mb_info->as_pu->u1_wd_in_4x4_m1 = 3;
2916     ps_proc->ps_mb_info->as_pu->u1_ht_in_4x4_m1 = 3;
2917 
2918     /* Update min sad conditions */
2919     if(ps_me_ctxt->u4_min_sad_reached == 1)
2920     {
2921         ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
2922         ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
2923     }
2924 }
2925