xref: /aosp_15_r20/external/libavc/encoder/ime.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21  *******************************************************************************
22  * @file
23  *  ime.c
24  *
25  * @brief
26  *  This file contains functions needed for computing motion vectors of a
27  *  16x16 block
28  *
29  * @author
30  *  Ittiam
31  *
32  * @par List of Functions:
33  *  - ime_diamond_search_16x16
34  *  - ime_evaluate_init_srchposn_16x16
35  *  - ime_full_pel_motion_estimation_16x16
36  *  - ime_sub_pel_motion_estimation_16x16
37  *  - ime_compute_skip_cost
38  *
39  * @remarks
40  *  None
41  *
42  *******************************************************************************
43  */
44 
45 /*****************************************************************************/
46 /* File Includes                                                             */
47 /*****************************************************************************/
48 
49 /* System include files */
50 #include <stdio.h>
51 #include <assert.h>
52 #include <limits.h>
53 #include <string.h>
54 
55 /* User include files */
56 #include "ime_typedefs.h"
57 #include "ime_distortion_metrics.h"
58 #include "ime_defs.h"
59 #include "ime_structs.h"
60 #include "ime.h"
61 #include "ime_macros.h"
62 #include "ime_statistics.h"
63 
64 /**
65 *******************************************************************************
66 *
67 * @brief Diamond Search
68 *
69 * @par Description:
70 *  This function computes the sad at vertices of several layers of diamond grid
71 *  at a time. The number of layers of diamond grid that would be evaluated is
72 *  configurable.The function computes the sad at vertices of a diamond grid. If
73 *  the sad at the center of the diamond grid is lesser than the sad at any other
74 *  point of the diamond grid, the function marks the candidate Mb partition as
75 *  mv.
76 *
77 * @param[in] ps_me_ctxt
78 *  pointer to me context
79 *
80 * @param[in] i4_reflist
81 *  ref list
82 *
83 * @returns  mv pair & corresponding distortion and cost
84 *
85 * @remarks Diamond Srch, radius is 1
86 *
87 *******************************************************************************
88 */
ime_diamond_search_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)89 void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
90 {
91     /* MB partition info */
92     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
93 
94     /* lagrange parameter */
95     UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
96 
97     /* srch range*/
98     WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
99     WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
100     WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
101     WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
102 
103     /* enabled fast sad computation */
104 //    UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
105 
106     /* pointer to src macro block */
107     UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
108     UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
109 
110     /* strides */
111     WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
112     WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
113 
114     /* least cost */
115     WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
116 
117     /* least sad */
118     WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
119 
120     /* mv pair */
121     WORD16 i2_mvx, i2_mvy;
122 
123     /* mv bits */
124     UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
125 
126     /* temp var */
127     WORD32 i4_cost[4];
128     WORD32 i4_sad[4];
129     UWORD8 *pu1_ref;
130     WORD16 i2_mv_u_x, i2_mv_u_y;
131 
132     /* Diamond search Iteration Max Cnt */
133     UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers;
134 
135     /* temp var */
136 //    UWORD8 u1_prev_jump = NONE;
137 //    UWORD8 u1_curr_jump = NONE;
138 //    UWORD8 u1_next_jump;
139 //    WORD32 mask_arr[5] = {15, 13, 14, 7, 11};
140 //    WORD32 mask;
141 //    UWORD8 *apu1_ref[4];
142 //    WORD32 i, cnt;
143 //    WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
144 
145     /* mv with best sad during initial evaluation */
146     i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
147     i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
148 
149     i2_mv_u_x = i2_mvx;
150     i2_mv_u_y = i2_mvy;
151 
152     while (u4_num_layers)
153     {
154         /* FIXME : is this the write way to check for out of bounds ? */
155         if ( (i2_mvx - 1 < i4_srch_range_w) ||
156                         (i2_mvx + 1 > i4_srch_range_e) ||
157                         (i2_mvy - 1 < i4_srch_range_n) ||
158                         (i2_mvy + 1 > i4_srch_range_s) )
159         {
160             break;
161         }
162 
163         pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
164 
165         ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref,
166                                                 pu1_curr_mb,
167                                                 i4_ref_strd,
168                                                 i4_src_strd,
169                                                 i4_sad);
170 
171         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
172         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
173         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
174         DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
175 
176         /* compute cost */
177         i4_cost[0] = i4_sad[0] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
178                                                                    + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
179         i4_cost[1] = i4_sad[1] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
180                                                                    + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
181         i4_cost[2] = i4_sad[2] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
182                                                                    + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
183         i4_cost[3] = i4_sad[3] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
184                                                                    + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
185 
186 
187         if (i4_cost_least > i4_cost[0])
188         {
189             i4_cost_least = i4_cost[0];
190             i4_distortion_least = i4_sad[0];
191 
192             i2_mv_u_x = (i2_mvx - 1);
193             i2_mv_u_y = i2_mvy;
194         }
195 
196         if (i4_cost_least > i4_cost[1])
197         {
198             i4_cost_least = i4_cost[1];
199             i4_distortion_least = i4_sad[1];
200 
201             i2_mv_u_x = (i2_mvx + 1);
202             i2_mv_u_y = i2_mvy;
203         }
204 
205         if (i4_cost_least > i4_cost[2])
206         {
207             i4_cost_least = i4_cost[2];
208             i4_distortion_least = i4_sad[2];
209 
210             i2_mv_u_x = i2_mvx;
211             i2_mv_u_y = i2_mvy - 1;
212         }
213 
214         if (i4_cost_least > i4_cost[3])
215         {
216             i4_cost_least = i4_cost[3];
217             i4_distortion_least = i4_sad[3];
218 
219             i2_mv_u_x = i2_mvx;
220             i2_mv_u_y = i2_mvy + 1;
221         }
222 
223         if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
224         {
225             ps_mb_part->u4_exit = 1;
226             break;
227         }
228         else
229         {
230             i2_mvx = i2_mv_u_x;
231             i2_mvy = i2_mv_u_y;
232         }
233         u4_num_layers--;
234     }
235 
236     if (i4_cost_least < ps_mb_part->i4_mb_cost)
237     {
238         ps_mb_part->i4_mb_cost = i4_cost_least;
239         ps_mb_part->i4_mb_distortion = i4_distortion_least;
240         ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
241         ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
242     }
243 
244 }
245 
246 
247 /**
248 *******************************************************************************
249 *
250 * @brief This function computes the best motion vector among the tentative mv
251 * candidates chosen.
252 *
253 * @par Description:
254 *  This function determines the position in the search window at which the motion
255 *  estimation should begin in order to minimise the number of search iterations.
256 *
257 * @param[in] ps_me_ctxt
258 *  pointer to me context
259 *
260 * @param[in] i4_reflist
261 *  ref list
262 *
263 * @returns  mv pair & corresponding distortion and cost
264 *
265 * @remarks none
266 *
267 *******************************************************************************
268 */
269 
ime_evaluate_init_srchposn_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)270 void ime_evaluate_init_srchposn_16x16
271         (
272             me_ctxt_t *ps_me_ctxt,
273             WORD32 i4_reflist
274         )
275 {
276     UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
277 
278     /* candidate mv cnt */
279     UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist];
280 
281     /* list of candidate mvs */
282     ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist];
283 
284     /* pointer to src macro block */
285     UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
286     UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
287 
288     /* strides */
289     WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
290     WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
291 
292     /* enabled fast sad computation */
293     UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
294 
295     /* SAD(distortion metric) of an 8x8 block */
296     WORD32 i4_mb_distortion;
297 
298     /* cost = distortion + u4_lambda_motion * rate */
299     WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
300 
301     /* mb partitions info */
302     mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]);
303 
304     /* mv bits */
305     UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
306 
307     /* temp var */
308     UWORD32  i, j;
309     WORD32 i4_srch_pos_idx = 0;
310     UWORD8 *pu1_ref = NULL;
311 
312     /* Carry out a search using each of the motion vector pairs identified above as predictors. */
313     /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
314     for(i = 0; i < u4_num_candidates; i++)
315     {
316         /* compute sad */
317         WORD32 c_sad = 1;
318 
319         for(j = 0; j < i; j++ )
320         {
321             if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
322                             (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) )
323             {
324                 c_sad = 0;
325                 break;
326             }
327         }
328         if(c_sad)
329         {
330             /* adjust ref pointer */
331             pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
332 
333             /* compute distortion */
334             ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
335 
336             DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
337 
338             /* compute cost */
339             i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
340                             + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
341 
342             if (i4_mb_cost < i4_mb_cost_least)
343             {
344                 i4_mb_cost_least = i4_mb_cost;
345 
346                 i4_distortion_least = i4_mb_distortion;
347 
348                 i4_srch_pos_idx = i;
349             }
350         }
351     }
352 
353     if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
354     {
355         ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
356         ps_mb_part->i4_mb_cost = i4_mb_cost_least;
357         ps_mb_part->i4_mb_distortion = i4_distortion_least;
358         ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx;
359         ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy;
360     }
361 }
362 
363 /**
364 *******************************************************************************
365 *
366 * @brief Searches for the best matching full pixel predictor within the search
367 * range
368 *
369 * @par Description:
370 *  For a given algorithm (diamond, Hex, nStep, ...) chosen, it searches for the
371 *  best matching full pixel predictor within the search range
372 *
373 * @param[in] ps_me_ctxt
374 *  pointer to me context
375 *
376 * @param[in] i4_reflist
377 *  ref list
378 *
379 * @returns  mv pair & corresponding distortion and cost
380 *
381 * @remarks none
382 *
383 *******************************************************************************
384 */
ime_full_pel_motion_estimation_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_ref_list)385 void ime_full_pel_motion_estimation_16x16
386     (
387         me_ctxt_t *ps_me_ctxt,
388         WORD32 i4_ref_list
389     )
390 {
391     /* mb part info */
392     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list];
393 
394     /******************************************************************/
395     /* Modify Search range about initial candidate instead of zero mv */
396     /******************************************************************/
397     /*
398      * FIXME: The motion vectors in a way can become unbounded. It may so happen that
399      * MV might exceed the limit of the profile configured.
400      */
401     ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w,
402                                       -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
403     ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e,
404                                        ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
405     ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n,
406                                       -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
407     ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s,
408                                        ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
409 
410     /************************************************************/
411     /* Traverse about best initial candidate for mv             */
412     /************************************************************/
413 
414     switch (ps_me_ctxt->u4_me_speed_preset)
415     {
416         case DMND_SRCH:
417             ime_diamond_search_16x16(ps_me_ctxt, i4_ref_list);
418             break;
419         default:
420             assert(0);
421             break;
422     }
423 }
424 
425 /**
426 *******************************************************************************
427 *
428 * @brief Searches for the best matching sub pixel predictor within the search
429 * range
430 *
431 * @par Description:
432 *  This function begins by searching across all sub pixel sample points
433 *  around the full pel motion vector. The vector with least cost is chosen as
434 *  the mv for the current mb.
435 *
436 * @param[in] ps_me_ctxt
437 *  pointer to me context
438 *
439 * @param[in] i4_reflist
440 *  ref list
441 *
442 * @returns mv pair & corresponding distortion and cost
443 *
444 * @remarks none
445 *
446 *******************************************************************************
447 */
ime_sub_pel_motion_estimation_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)448 void ime_sub_pel_motion_estimation_16x16
449     (
450         me_ctxt_t *ps_me_ctxt,
451         WORD32 i4_reflist
452     )
453 {
454     /* pointers to src & ref macro block */
455     UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
456 
457     /* pointers to ref. half pel planes */
458     UWORD8 *pu1_ref_mb_half_x;
459     UWORD8 *pu1_ref_mb_half_y;
460     UWORD8 *pu1_ref_mb_half_xy;
461 
462     /* pointers to ref. half pel planes */
463     UWORD8 *pu1_ref_mb_half_x_temp;
464     UWORD8 *pu1_ref_mb_half_y_temp;
465     UWORD8 *pu1_ref_mb_half_xy_temp;
466 
467     /* strides */
468     WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
469 
470     WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd;
471 
472     /* mb partitions info */
473     mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
474 
475     /* SAD(distortion metric) of an mb */
476     WORD32 i4_mb_distortion;
477     WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
478 
479     /* cost = distortion + u4_lambda_motion * rate */
480     WORD32 i4_mb_cost;
481     WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
482 
483     /*Best half pel buffer*/
484     UWORD8 *pu1_best_hpel_buf = NULL;
485 
486     /* mv bits */
487     UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
488 
489     /* Motion vectors in full-pel units */
490     WORD16 mv_x, mv_y;
491 
492     /* lambda - lagrange constant */
493     UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
494 
495     /* Flags to check if half pel points needs to be evaluated */
496     /**************************************/
497     /* 1 bit for each half pel candidate  */
498     /* bit 0 - half x = 1, half y = 0     */
499     /* bit 1 - half x = -1, half y = 0    */
500     /* bit 2 - half x = 0, half y = 1     */
501     /* bit 3 - half x = 0, half y = -1    */
502     /* bit 4 - half x = 1, half y = 1     */
503     /* bit 5 - half x = -1, half y = 1    */
504     /* bit 6 - half x = 1, half y = -1    */
505     /* bit 7 - half x = -1, half y = -1   */
506     /**************************************/
507     /* temp var */
508     WORD16 i2_mv_u_x, i2_mv_u_y;
509     WORD32 i, j;
510     WORD32 ai4_sad[8];
511 
512     WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx;
513 
514     i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
515     i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
516 
517     /************************************************************/
518     /* Evaluate half pel                                        */
519     /************************************************************/
520     mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
521     mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
522 
523 
524     /**************************************************************/
525     /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
526     /* left side of full pel                                      */
527     /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
528     /* top  side of full pel                                      */
529     /* ps_me_ctxt->pu1_half_xy points to the half pel pixel       */
530     /* on the top left side of full pel                           */
531     /* for the function pf_ime_sub_pel_compute_sad_16x16 the      */
532     /* default postions are                                       */
533     /* ps_me_ctxt->pu1_half_x = right halp_pel                    */
534     /*  ps_me_ctxt->pu1_half_y = bottom halp_pel                  */
535     /*  ps_me_ctxt->pu1_half_xy = bottom right halp_pel           */
536     /* Hence corresponding adjustments made here                  */
537     /**************************************************************/
538 
539     pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1;
540     pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd;
541     pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd;
542 
543     ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x,
544                                                  pu1_ref_mb_half_y,
545                                                  pu1_ref_mb_half_xy,
546                                                  i4_src_strd, i4_ref_strd,
547                                                  ai4_sad);
548 
549     /* Half x plane */
550     for(i = 0; i < 2; i++)
551     {
552         WORD32 mv_x_tmp = (mv_x << 2) + 2;
553         WORD32 mv_y_tmp = (mv_y << 2);
554 
555         mv_x_tmp -= (i * 4);
556 
557         i4_mb_distortion = ai4_sad[i];
558 
559         /* compute cost */
560         i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
561                         + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ));
562 
563         if (i4_mb_cost < i4_mb_cost_least)
564         {
565             i4_mb_cost_least = i4_mb_cost;
566 
567             i4_distortion_least = i4_mb_distortion;
568 
569             i2_mv_u_x = mv_x_tmp;
570 
571             i2_mv_u_y = mv_y_tmp;
572 
573 #ifndef HP_PL /*choosing whether left or right half_x*/
574             ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i;
575             pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
576 
577             i4_srch_pos_idx = 0;
578 #endif
579         }
580 
581     }
582 
583     /* Half y plane */
584     for(i = 0; i < 2; i++)
585     {
586         WORD32 mv_x_tmp = (mv_x << 2);
587         WORD32 mv_y_tmp = (mv_y << 2) + 2;
588 
589         mv_y_tmp -= (i * 4);
590 
591         i4_mb_distortion = ai4_sad[2 + i];
592 
593         /* compute cost */
594         i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
595                         + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ));
596 
597         if (i4_mb_cost < i4_mb_cost_least)
598         {
599             i4_mb_cost_least = i4_mb_cost;
600 
601             i4_distortion_least = i4_mb_distortion;
602 
603             i2_mv_u_x = mv_x_tmp;
604 
605             i2_mv_u_y = mv_y_tmp;
606 
607 #ifndef HP_PL/*choosing whether top or bottom half_y*/
608             ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp  - i*(i4_ref_strd);
609             pu1_best_hpel_buf = pu1_ref_mb_half_y_temp  - i*(i4_ref_strd);
610 
611             i4_srch_pos_idx = 1;
612 #endif
613         }
614 
615     }
616 
617     /* Half xy plane */
618     for(j = 0; j < 2; j++)
619     {
620         for(i = 0; i < 2; i++)
621         {
622             WORD32 mv_x_tmp = (mv_x << 2) + 2;
623             WORD32 mv_y_tmp = (mv_y << 2) + 2;
624 
625             mv_x_tmp -= (i * 4);
626             mv_y_tmp -= (j * 4);
627 
628             i4_mb_distortion = ai4_sad[4 + i + 2 * j];
629 
630             /* compute cost */
631             i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
632                             + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ));
633 
634             if (i4_mb_cost < i4_mb_cost_least)
635             {
636                 i4_mb_cost_least = i4_mb_cost;
637 
638                 i4_distortion_least = i4_mb_distortion;
639 
640                 i2_mv_u_x = mv_x_tmp;
641 
642                 i2_mv_u_y = mv_y_tmp;
643 
644 #ifndef HP_PL /*choosing between four half_xy */
645                 ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp  - j*(i4_ref_strd) - i;
646                 pu1_best_hpel_buf =  pu1_ref_mb_half_xy_temp  - j*(i4_ref_strd) - i;
647 
648                 i4_srch_pos_idx = 2;
649 #endif
650             }
651 
652         }
653     }
654 
655     if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
656     {
657         ps_mb_part->i4_mb_cost = i4_mb_cost_least;
658         ps_mb_part->i4_mb_distortion = i4_distortion_least;
659         ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
660         ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
661         ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
662         ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
663     }
664 }
665 
666 /**
667 *******************************************************************************
668 *
669 * @brief This function computes cost of skip macroblocks
670 *
671 * @par Description:
672 *
673 * @param[in] ps_me_ctxt
674 *  pointer to me ctxt
675 *
676 *
677 * @returns  none
678 *
679 * @remarks
680 * NOTE: while computing the skip cost, do not enable early exit from compute
681 * sad function because, a negative bias gets added later
682 * Note that the last ME candidate in me ctxt is taken as skip motion vector
683 *
684 *******************************************************************************
685 */
ime_compute_skip_cost(me_ctxt_t * ps_me_ctxt,ime_mv_t * ps_skip_mv,mb_part_ctxt * ps_smb_part_info,UWORD32 u4_use_stat_sad,WORD32 i4_reflist,WORD32 i4_is_slice_type_b)686 void ime_compute_skip_cost
687     (
688          me_ctxt_t *ps_me_ctxt,
689          ime_mv_t *ps_skip_mv,
690          mb_part_ctxt *ps_smb_part_info,
691          UWORD32 u4_use_stat_sad,
692          WORD32 i4_reflist,
693          WORD32 i4_is_slice_type_b
694     )
695 {
696 
697     /* SAD(distortion metric) of an mb */
698     WORD32 i4_mb_distortion;
699 
700     /* cost = distortion + u4_lambda_motion * rate */
701     WORD32 i4_mb_cost;
702 
703     /* temp var */
704     UWORD8 *pu1_ref = NULL;
705 
706     ime_mv_t s_skip_mv;
707 
708     s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx +2)>>2;
709     s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy +2)>>2;
710 
711     /* Check if the skip mv is out of bounds or subpel */
712     {
713         /* skip mv */
714         ime_mv_t s_clip_skip_mv;
715 
716         s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx);
717         s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy);
718 
719         if ((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) ||
720            (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) ||
721            (ps_skip_mv->i2_mvx & 0x3) ||
722            (ps_skip_mv->i2_mvy & 0x3))
723         {
724             return ;
725         }
726     }
727 
728 
729     /* adjust ref pointer */
730     pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx
731                     + (s_skip_mv.i2_mvy * ps_me_ctxt->i4_rec_strd);
732 
733     if(u4_use_stat_sad == 1)
734     {
735         UWORD32 u4_is_nonzero;
736 
737         ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(
738                         ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
739                         ps_me_ctxt->i4_rec_strd, ps_me_ctxt->pu2_sad_thrsh,
740                         &i4_mb_distortion, &u4_is_nonzero);
741 
742         if (u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
743         {
744             ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
745             ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion;
746         }
747     }
748     else
749     {
750         ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad](
751                         ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
752                         ps_me_ctxt->i4_rec_strd, INT_MAX, &i4_mb_distortion);
753 
754         if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
755         {
756             ps_me_ctxt->i4_min_sad = i4_mb_distortion;
757             ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
758         }
759     }
760 
761 
762     /* for skip mode cost & distortion are identical
763      * But we shall add a bias to favor skip mode.
764      * Doc. JVT B118 Suggests SKIP_BIAS as 16.
765      * TODO : Empirical analysis of SKIP_BIAS is necessary */
766 
767     i4_mb_cost = i4_mb_distortion - (ps_me_ctxt->u4_lambda_motion * (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1]  * i4_is_slice_type_b));
768 
769     if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
770     {
771         ps_smb_part_info->i4_mb_cost = i4_mb_cost;
772         ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
773         ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx;
774         ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy;
775     }
776 }
777 
778