1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 * ime.c
24 *
25 * @brief
26 * This file contains functions needed for computing motion vectors of a
27 * 16x16 block
28 *
29 * @author
30 * Ittiam
31 *
32 * @par List of Functions:
33 * - ime_diamond_search_16x16
34 * - ime_evaluate_init_srchposn_16x16
35 * - ime_full_pel_motion_estimation_16x16
36 * - ime_sub_pel_motion_estimation_16x16
37 * - ime_compute_skip_cost
38 *
39 * @remarks
40 * None
41 *
42 *******************************************************************************
43 */
44
45 /*****************************************************************************/
46 /* File Includes */
47 /*****************************************************************************/
48
49 /* System include files */
50 #include <stdio.h>
51 #include <assert.h>
52 #include <limits.h>
53 #include <string.h>
54
55 /* User include files */
56 #include "ime_typedefs.h"
57 #include "ime_distortion_metrics.h"
58 #include "ime_defs.h"
59 #include "ime_structs.h"
60 #include "ime.h"
61 #include "ime_macros.h"
62 #include "ime_statistics.h"
63
64 /**
65 *******************************************************************************
66 *
67 * @brief Diamond Search
68 *
69 * @par Description:
70 * This function computes the sad at vertices of several layers of diamond grid
71 * at a time. The number of layers of diamond grid that would be evaluated is
72 * configurable.The function computes the sad at vertices of a diamond grid. If
73 * the sad at the center of the diamond grid is lesser than the sad at any other
74 * point of the diamond grid, the function marks the candidate Mb partition as
75 * mv.
76 *
77 * @param[in] ps_me_ctxt
78 * pointer to me context
79 *
80 * @param[in] i4_reflist
81 * ref list
82 *
83 * @returns mv pair & corresponding distortion and cost
84 *
85 * @remarks Diamond Srch, radius is 1
86 *
87 *******************************************************************************
88 */
ime_diamond_search_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)89 void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
90 {
91 /* MB partition info */
92 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
93
94 /* lagrange parameter */
95 UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
96
97 /* srch range*/
98 WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
99 WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
100 WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
101 WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
102
103 /* enabled fast sad computation */
104 // UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
105
106 /* pointer to src macro block */
107 UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
108 UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
109
110 /* strides */
111 WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
112 WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
113
114 /* least cost */
115 WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
116
117 /* least sad */
118 WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
119
120 /* mv pair */
121 WORD16 i2_mvx, i2_mvy;
122
123 /* mv bits */
124 UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
125
126 /* temp var */
127 WORD32 i4_cost[4];
128 WORD32 i4_sad[4];
129 UWORD8 *pu1_ref;
130 WORD16 i2_mv_u_x, i2_mv_u_y;
131
132 /* Diamond search Iteration Max Cnt */
133 UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers;
134
135 /* temp var */
136 // UWORD8 u1_prev_jump = NONE;
137 // UWORD8 u1_curr_jump = NONE;
138 // UWORD8 u1_next_jump;
139 // WORD32 mask_arr[5] = {15, 13, 14, 7, 11};
140 // WORD32 mask;
141 // UWORD8 *apu1_ref[4];
142 // WORD32 i, cnt;
143 // WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
144
145 /* mv with best sad during initial evaluation */
146 i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
147 i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
148
149 i2_mv_u_x = i2_mvx;
150 i2_mv_u_y = i2_mvy;
151
152 while (u4_num_layers)
153 {
154 /* FIXME : is this the write way to check for out of bounds ? */
155 if ( (i2_mvx - 1 < i4_srch_range_w) ||
156 (i2_mvx + 1 > i4_srch_range_e) ||
157 (i2_mvy - 1 < i4_srch_range_n) ||
158 (i2_mvy + 1 > i4_srch_range_s) )
159 {
160 break;
161 }
162
163 pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
164
165 ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref,
166 pu1_curr_mb,
167 i4_ref_strd,
168 i4_src_strd,
169 i4_sad);
170
171 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
172 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
173 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
174 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
175
176 /* compute cost */
177 i4_cost[0] = i4_sad[0] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
178 + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
179 i4_cost[1] = i4_sad[1] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
180 + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
181 i4_cost[2] = i4_sad[2] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
182 + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
183 i4_cost[3] = i4_sad[3] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
184 + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
185
186
187 if (i4_cost_least > i4_cost[0])
188 {
189 i4_cost_least = i4_cost[0];
190 i4_distortion_least = i4_sad[0];
191
192 i2_mv_u_x = (i2_mvx - 1);
193 i2_mv_u_y = i2_mvy;
194 }
195
196 if (i4_cost_least > i4_cost[1])
197 {
198 i4_cost_least = i4_cost[1];
199 i4_distortion_least = i4_sad[1];
200
201 i2_mv_u_x = (i2_mvx + 1);
202 i2_mv_u_y = i2_mvy;
203 }
204
205 if (i4_cost_least > i4_cost[2])
206 {
207 i4_cost_least = i4_cost[2];
208 i4_distortion_least = i4_sad[2];
209
210 i2_mv_u_x = i2_mvx;
211 i2_mv_u_y = i2_mvy - 1;
212 }
213
214 if (i4_cost_least > i4_cost[3])
215 {
216 i4_cost_least = i4_cost[3];
217 i4_distortion_least = i4_sad[3];
218
219 i2_mv_u_x = i2_mvx;
220 i2_mv_u_y = i2_mvy + 1;
221 }
222
223 if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
224 {
225 ps_mb_part->u4_exit = 1;
226 break;
227 }
228 else
229 {
230 i2_mvx = i2_mv_u_x;
231 i2_mvy = i2_mv_u_y;
232 }
233 u4_num_layers--;
234 }
235
236 if (i4_cost_least < ps_mb_part->i4_mb_cost)
237 {
238 ps_mb_part->i4_mb_cost = i4_cost_least;
239 ps_mb_part->i4_mb_distortion = i4_distortion_least;
240 ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
241 ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
242 }
243
244 }
245
246
247 /**
248 *******************************************************************************
249 *
250 * @brief This function computes the best motion vector among the tentative mv
251 * candidates chosen.
252 *
253 * @par Description:
254 * This function determines the position in the search window at which the motion
255 * estimation should begin in order to minimise the number of search iterations.
256 *
257 * @param[in] ps_me_ctxt
258 * pointer to me context
259 *
260 * @param[in] i4_reflist
261 * ref list
262 *
263 * @returns mv pair & corresponding distortion and cost
264 *
265 * @remarks none
266 *
267 *******************************************************************************
268 */
269
ime_evaluate_init_srchposn_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)270 void ime_evaluate_init_srchposn_16x16
271 (
272 me_ctxt_t *ps_me_ctxt,
273 WORD32 i4_reflist
274 )
275 {
276 UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
277
278 /* candidate mv cnt */
279 UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist];
280
281 /* list of candidate mvs */
282 ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist];
283
284 /* pointer to src macro block */
285 UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
286 UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
287
288 /* strides */
289 WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
290 WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
291
292 /* enabled fast sad computation */
293 UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
294
295 /* SAD(distortion metric) of an 8x8 block */
296 WORD32 i4_mb_distortion;
297
298 /* cost = distortion + u4_lambda_motion * rate */
299 WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
300
301 /* mb partitions info */
302 mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]);
303
304 /* mv bits */
305 UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
306
307 /* temp var */
308 UWORD32 i, j;
309 WORD32 i4_srch_pos_idx = 0;
310 UWORD8 *pu1_ref = NULL;
311
312 /* Carry out a search using each of the motion vector pairs identified above as predictors. */
313 /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
314 for(i = 0; i < u4_num_candidates; i++)
315 {
316 /* compute sad */
317 WORD32 c_sad = 1;
318
319 for(j = 0; j < i; j++ )
320 {
321 if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
322 (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) )
323 {
324 c_sad = 0;
325 break;
326 }
327 }
328 if(c_sad)
329 {
330 /* adjust ref pointer */
331 pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
332
333 /* compute distortion */
334 ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
335
336 DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
337
338 /* compute cost */
339 i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
340 + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
341
342 if (i4_mb_cost < i4_mb_cost_least)
343 {
344 i4_mb_cost_least = i4_mb_cost;
345
346 i4_distortion_least = i4_mb_distortion;
347
348 i4_srch_pos_idx = i;
349 }
350 }
351 }
352
353 if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
354 {
355 ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
356 ps_mb_part->i4_mb_cost = i4_mb_cost_least;
357 ps_mb_part->i4_mb_distortion = i4_distortion_least;
358 ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx;
359 ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy;
360 }
361 }
362
363 /**
364 *******************************************************************************
365 *
366 * @brief Searches for the best matching full pixel predictor within the search
367 * range
368 *
369 * @par Description:
370 * For a given algorithm (diamond, Hex, nStep, ...) chosen, it searches for the
371 * best matching full pixel predictor within the search range
372 *
373 * @param[in] ps_me_ctxt
374 * pointer to me context
375 *
376 * @param[in] i4_reflist
377 * ref list
378 *
379 * @returns mv pair & corresponding distortion and cost
380 *
381 * @remarks none
382 *
383 *******************************************************************************
384 */
ime_full_pel_motion_estimation_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_ref_list)385 void ime_full_pel_motion_estimation_16x16
386 (
387 me_ctxt_t *ps_me_ctxt,
388 WORD32 i4_ref_list
389 )
390 {
391 /* mb part info */
392 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list];
393
394 /******************************************************************/
395 /* Modify Search range about initial candidate instead of zero mv */
396 /******************************************************************/
397 /*
398 * FIXME: The motion vectors in a way can become unbounded. It may so happen that
399 * MV might exceed the limit of the profile configured.
400 */
401 ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w,
402 -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
403 ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e,
404 ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
405 ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n,
406 -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
407 ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s,
408 ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
409
410 /************************************************************/
411 /* Traverse about best initial candidate for mv */
412 /************************************************************/
413
414 switch (ps_me_ctxt->u4_me_speed_preset)
415 {
416 case DMND_SRCH:
417 ime_diamond_search_16x16(ps_me_ctxt, i4_ref_list);
418 break;
419 default:
420 assert(0);
421 break;
422 }
423 }
424
425 /**
426 *******************************************************************************
427 *
428 * @brief Searches for the best matching sub pixel predictor within the search
429 * range
430 *
431 * @par Description:
432 * This function begins by searching across all sub pixel sample points
433 * around the full pel motion vector. The vector with least cost is chosen as
434 * the mv for the current mb.
435 *
436 * @param[in] ps_me_ctxt
437 * pointer to me context
438 *
439 * @param[in] i4_reflist
440 * ref list
441 *
442 * @returns mv pair & corresponding distortion and cost
443 *
444 * @remarks none
445 *
446 *******************************************************************************
447 */
ime_sub_pel_motion_estimation_16x16(me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)448 void ime_sub_pel_motion_estimation_16x16
449 (
450 me_ctxt_t *ps_me_ctxt,
451 WORD32 i4_reflist
452 )
453 {
454 /* pointers to src & ref macro block */
455 UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
456
457 /* pointers to ref. half pel planes */
458 UWORD8 *pu1_ref_mb_half_x;
459 UWORD8 *pu1_ref_mb_half_y;
460 UWORD8 *pu1_ref_mb_half_xy;
461
462 /* pointers to ref. half pel planes */
463 UWORD8 *pu1_ref_mb_half_x_temp;
464 UWORD8 *pu1_ref_mb_half_y_temp;
465 UWORD8 *pu1_ref_mb_half_xy_temp;
466
467 /* strides */
468 WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
469
470 WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd;
471
472 /* mb partitions info */
473 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
474
475 /* SAD(distortion metric) of an mb */
476 WORD32 i4_mb_distortion;
477 WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
478
479 /* cost = distortion + u4_lambda_motion * rate */
480 WORD32 i4_mb_cost;
481 WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
482
483 /*Best half pel buffer*/
484 UWORD8 *pu1_best_hpel_buf = NULL;
485
486 /* mv bits */
487 UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
488
489 /* Motion vectors in full-pel units */
490 WORD16 mv_x, mv_y;
491
492 /* lambda - lagrange constant */
493 UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
494
495 /* Flags to check if half pel points needs to be evaluated */
496 /**************************************/
497 /* 1 bit for each half pel candidate */
498 /* bit 0 - half x = 1, half y = 0 */
499 /* bit 1 - half x = -1, half y = 0 */
500 /* bit 2 - half x = 0, half y = 1 */
501 /* bit 3 - half x = 0, half y = -1 */
502 /* bit 4 - half x = 1, half y = 1 */
503 /* bit 5 - half x = -1, half y = 1 */
504 /* bit 6 - half x = 1, half y = -1 */
505 /* bit 7 - half x = -1, half y = -1 */
506 /**************************************/
507 /* temp var */
508 WORD16 i2_mv_u_x, i2_mv_u_y;
509 WORD32 i, j;
510 WORD32 ai4_sad[8];
511
512 WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx;
513
514 i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
515 i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
516
517 /************************************************************/
518 /* Evaluate half pel */
519 /************************************************************/
520 mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
521 mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
522
523
524 /**************************************************************/
525 /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
526 /* left side of full pel */
527 /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
528 /* top side of full pel */
529 /* ps_me_ctxt->pu1_half_xy points to the half pel pixel */
530 /* on the top left side of full pel */
531 /* for the function pf_ime_sub_pel_compute_sad_16x16 the */
532 /* default postions are */
533 /* ps_me_ctxt->pu1_half_x = right halp_pel */
534 /* ps_me_ctxt->pu1_half_y = bottom halp_pel */
535 /* ps_me_ctxt->pu1_half_xy = bottom right halp_pel */
536 /* Hence corresponding adjustments made here */
537 /**************************************************************/
538
539 pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1;
540 pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd;
541 pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd;
542
543 ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x,
544 pu1_ref_mb_half_y,
545 pu1_ref_mb_half_xy,
546 i4_src_strd, i4_ref_strd,
547 ai4_sad);
548
549 /* Half x plane */
550 for(i = 0; i < 2; i++)
551 {
552 WORD32 mv_x_tmp = (mv_x << 2) + 2;
553 WORD32 mv_y_tmp = (mv_y << 2);
554
555 mv_x_tmp -= (i * 4);
556
557 i4_mb_distortion = ai4_sad[i];
558
559 /* compute cost */
560 i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
561 + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ));
562
563 if (i4_mb_cost < i4_mb_cost_least)
564 {
565 i4_mb_cost_least = i4_mb_cost;
566
567 i4_distortion_least = i4_mb_distortion;
568
569 i2_mv_u_x = mv_x_tmp;
570
571 i2_mv_u_y = mv_y_tmp;
572
573 #ifndef HP_PL /*choosing whether left or right half_x*/
574 ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i;
575 pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
576
577 i4_srch_pos_idx = 0;
578 #endif
579 }
580
581 }
582
583 /* Half y plane */
584 for(i = 0; i < 2; i++)
585 {
586 WORD32 mv_x_tmp = (mv_x << 2);
587 WORD32 mv_y_tmp = (mv_y << 2) + 2;
588
589 mv_y_tmp -= (i * 4);
590
591 i4_mb_distortion = ai4_sad[2 + i];
592
593 /* compute cost */
594 i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
595 + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ));
596
597 if (i4_mb_cost < i4_mb_cost_least)
598 {
599 i4_mb_cost_least = i4_mb_cost;
600
601 i4_distortion_least = i4_mb_distortion;
602
603 i2_mv_u_x = mv_x_tmp;
604
605 i2_mv_u_y = mv_y_tmp;
606
607 #ifndef HP_PL/*choosing whether top or bottom half_y*/
608 ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp - i*(i4_ref_strd);
609 pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i*(i4_ref_strd);
610
611 i4_srch_pos_idx = 1;
612 #endif
613 }
614
615 }
616
617 /* Half xy plane */
618 for(j = 0; j < 2; j++)
619 {
620 for(i = 0; i < 2; i++)
621 {
622 WORD32 mv_x_tmp = (mv_x << 2) + 2;
623 WORD32 mv_y_tmp = (mv_y << 2) + 2;
624
625 mv_x_tmp -= (i * 4);
626 mv_y_tmp -= (j * 4);
627
628 i4_mb_distortion = ai4_sad[4 + i + 2 * j];
629
630 /* compute cost */
631 i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
632 + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ));
633
634 if (i4_mb_cost < i4_mb_cost_least)
635 {
636 i4_mb_cost_least = i4_mb_cost;
637
638 i4_distortion_least = i4_mb_distortion;
639
640 i2_mv_u_x = mv_x_tmp;
641
642 i2_mv_u_y = mv_y_tmp;
643
644 #ifndef HP_PL /*choosing between four half_xy */
645 ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i;
646 pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i;
647
648 i4_srch_pos_idx = 2;
649 #endif
650 }
651
652 }
653 }
654
655 if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
656 {
657 ps_mb_part->i4_mb_cost = i4_mb_cost_least;
658 ps_mb_part->i4_mb_distortion = i4_distortion_least;
659 ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
660 ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
661 ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
662 ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
663 }
664 }
665
666 /**
667 *******************************************************************************
668 *
669 * @brief This function computes cost of skip macroblocks
670 *
671 * @par Description:
672 *
673 * @param[in] ps_me_ctxt
674 * pointer to me ctxt
675 *
676 *
677 * @returns none
678 *
679 * @remarks
680 * NOTE: while computing the skip cost, do not enable early exit from compute
681 * sad function because, a negative bias gets added later
682 * Note that the last ME candidate in me ctxt is taken as skip motion vector
683 *
684 *******************************************************************************
685 */
ime_compute_skip_cost(me_ctxt_t * ps_me_ctxt,ime_mv_t * ps_skip_mv,mb_part_ctxt * ps_smb_part_info,UWORD32 u4_use_stat_sad,WORD32 i4_reflist,WORD32 i4_is_slice_type_b)686 void ime_compute_skip_cost
687 (
688 me_ctxt_t *ps_me_ctxt,
689 ime_mv_t *ps_skip_mv,
690 mb_part_ctxt *ps_smb_part_info,
691 UWORD32 u4_use_stat_sad,
692 WORD32 i4_reflist,
693 WORD32 i4_is_slice_type_b
694 )
695 {
696
697 /* SAD(distortion metric) of an mb */
698 WORD32 i4_mb_distortion;
699
700 /* cost = distortion + u4_lambda_motion * rate */
701 WORD32 i4_mb_cost;
702
703 /* temp var */
704 UWORD8 *pu1_ref = NULL;
705
706 ime_mv_t s_skip_mv;
707
708 s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx +2)>>2;
709 s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy +2)>>2;
710
711 /* Check if the skip mv is out of bounds or subpel */
712 {
713 /* skip mv */
714 ime_mv_t s_clip_skip_mv;
715
716 s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx);
717 s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy);
718
719 if ((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) ||
720 (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) ||
721 (ps_skip_mv->i2_mvx & 0x3) ||
722 (ps_skip_mv->i2_mvy & 0x3))
723 {
724 return ;
725 }
726 }
727
728
729 /* adjust ref pointer */
730 pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx
731 + (s_skip_mv.i2_mvy * ps_me_ctxt->i4_rec_strd);
732
733 if(u4_use_stat_sad == 1)
734 {
735 UWORD32 u4_is_nonzero;
736
737 ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(
738 ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
739 ps_me_ctxt->i4_rec_strd, ps_me_ctxt->pu2_sad_thrsh,
740 &i4_mb_distortion, &u4_is_nonzero);
741
742 if (u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
743 {
744 ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
745 ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion;
746 }
747 }
748 else
749 {
750 ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad](
751 ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
752 ps_me_ctxt->i4_rec_strd, INT_MAX, &i4_mb_distortion);
753
754 if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
755 {
756 ps_me_ctxt->i4_min_sad = i4_mb_distortion;
757 ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
758 }
759 }
760
761
762 /* for skip mode cost & distortion are identical
763 * But we shall add a bias to favor skip mode.
764 * Doc. JVT B118 Suggests SKIP_BIAS as 16.
765 * TODO : Empirical analysis of SKIP_BIAS is necessary */
766
767 i4_mb_cost = i4_mb_distortion - (ps_me_ctxt->u4_lambda_motion * (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1] * i4_is_slice_type_b));
768
769 if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
770 {
771 ps_smb_part_info->i4_mb_cost = i4_mb_cost;
772 ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
773 ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx;
774 ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy;
775 }
776 }
777
778