1 /******************************************************************************
2 *
3 * Copyright (C) 2022 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * isvce_me.c
25 *
26 * @brief
27 * Contains definition of functions for motion estimation
28 *
29 * @author
30 * ittiam
31 *
32 * @par List of Functions:
33 * - isvce_init_mv_bits()
34 * - isvce_skip_analysis_chroma()
35 * - isvce_skip_analysis_luma()
36 * - isvce_analyse_skip()
37 * - isvce_get_search_candidates()
38 * - isvce_find_skip_motion_vector()
39 * - isvce_get_mv_predictor()
40 * - isvce_mv_pred()
41 * - isvce_mv_pred_me()
42 * - isvce_init_me()
43 * - isvce_compute_me()
44 * - isvce_compute_me_nmb()
45 *
46 * @remarks
47 * None
48 *
49 *******************************************************************************
50 */
51
52 /*****************************************************************************/
53 /* File Includes */
54 /*****************************************************************************/
55
56 /* System include files */
57 #include <stdio.h>
58 #include <assert.h>
59 #include <limits.h>
60 #include <stdbool.h>
61
62 /* User include files */
63 #include "ih264_typedefs.h"
64 #include "ih264_macros.h"
65 #include "isvc_macros.h"
66 #include "ih264_platform_macros.h"
67 #include "iv2.h"
68 #include "ive2.h"
69 #include "ithread.h"
70 #include "ih264_platform_macros.h"
71 #include "isvc_defs.h"
72 #include "ime_defs.h"
73 #include "ime_distortion_metrics.h"
74 #include "ime_structs.h"
75 #include "isvc_structs.h"
76 #include "isvc_trans_quant_itrans_iquant.h"
77 #include "isvc_inter_pred_filters.h"
78 #include "isvc_mem_fns.h"
79 #include "ih264_padding.h"
80 #include "ih264_intra_pred_filters.h"
81 #include "ih264_deblk_edge_filters.h"
82 #include "isvc_cabac_tables.h"
83 #include "isvce_defs.h"
84 #include "ih264e_error.h"
85 #include "ih264e_bitstream.h"
86 #include "irc_cntrl_param.h"
87 #include "irc_frame_info_collector.h"
88 #include "isvce_rate_control.h"
89 #include "isvce_cabac_structs.h"
90 #include "isvce_structs.h"
91 #include "isvce_globals.h"
92 #include "isvce_me.h"
93 #include "ime.h"
94 #include "ih264_debug.h"
95 #include "ih264e_intra_modes_eval.h"
96 #include "isvce_core_coding.h"
97 #include "isvce_mc.h"
98 #include "ih264e_debug.h"
99 #include "ih264e_half_pel.h"
100 #include "ime_statistics.h"
101 #include "ih264e_platform_macros.h"
102 #include "isvce_defs.h"
103 #include "isvce_structs.h"
104 #include "isvce_ilp_mv_utils.h"
105 #include "isvce_utils.h"
106
107 /*****************************************************************************/
108 /* Function Definitions */
109 /*****************************************************************************/
110
111 /**
112 *******************************************************************************
113 *
114 * @brief Diamond Search
115 *
116 * @par Description:
117 * This function computes the sad at vertices of several layers of diamond grid
118 * at a time. The number of layers of diamond grid that would be evaluated is
119 * configurable.The function computes the sad at vertices of a diamond grid. If
120 * the sad at the center of the diamond grid is lesser than the sad at any other
121 * point of the diamond grid, the function marks the candidate Mb partition as
122 * mv.
123 *
124 * @param[in] ps_mb_part
125 * pointer to current mb partition ctxt with respect to ME
126 *
127 * @param[in] ps_me_ctxt
128 * pointer to me context
129 *
130 * @param[in] u4_lambda_motion
131 * lambda motion
132 *
133 * @param[in] u4_enable_fast_sad
134 * enable/disable fast sad computation
135 *
136 * @returns mv pair & corresponding distortion and cost
137 *
138 * @remarks Diamond Srch, radius is 1
139 *
140 *******************************************************************************
141 */
isvce_diamond_search_16x16(isvce_me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)142 static void isvce_diamond_search_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
143 {
144 /* MB partition info */
145 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
146
147 /* lagrange parameter */
148 UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
149
150 /* srch range*/
151 WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
152 WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
153 WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
154 WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
155
156 /* pointer to src macro block */
157 UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
158 UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
159
160 /* strides */
161 WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
162 WORD32 i4_ref_strd = ps_me_ctxt->ai4_rec_strd[i4_reflist];
163
164 /* least cost */
165 WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
166
167 /* least sad */
168 WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
169
170 /* mv pair */
171 WORD16 i2_mvx, i2_mvy;
172
173 /* mv bits */
174 UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
175
176 /* temp var */
177 WORD32 i4_cost[4];
178 WORD32 i4_sad[4];
179 UWORD8 *pu1_ref;
180 WORD16 i2_mv_u_x, i2_mv_u_y;
181
182 /* Diamond search Iteration Max Cnt */
183 WORD64 i8_num_layers = ps_me_ctxt->u4_num_layers;
184
185 /* mv with best sad during initial evaluation */
186 i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
187 i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
188
189 i2_mv_u_x = i2_mvx;
190 i2_mv_u_y = i2_mvy;
191
192 while(i8_num_layers--)
193 {
194 /* FIXME : is this the write way to check for out of bounds ? */
195 if((i2_mvx - 1 < i4_srch_range_w) || (i2_mvx + 1 > i4_srch_range_e) ||
196 (i2_mvy - 1 < i4_srch_range_n) || (i2_mvy + 1 > i4_srch_range_s))
197 {
198 break;
199 }
200
201 pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
202
203 ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref, pu1_curr_mb, i4_ref_strd, i4_src_strd,
204 i4_sad);
205
206 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
207 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
208 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
209 DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
210
211 /* compute cost */
212 i4_cost[0] =
213 i4_sad[0] +
214 u4_lambda_motion * (pu1_mv_bits[((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
215 pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
216 i4_cost[1] =
217 i4_sad[1] +
218 u4_lambda_motion * (pu1_mv_bits[((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
219 pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
220 i4_cost[2] =
221 i4_sad[2] +
222 u4_lambda_motion * (pu1_mv_bits[(i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
223 pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
224 i4_cost[3] =
225 i4_sad[3] +
226 u4_lambda_motion * (pu1_mv_bits[(i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
227 pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
228
229 if(i4_cost_least > i4_cost[0])
230 {
231 i4_cost_least = i4_cost[0];
232 i4_distortion_least = i4_sad[0];
233
234 i2_mv_u_x = (i2_mvx - 1);
235 i2_mv_u_y = i2_mvy;
236 }
237
238 if(i4_cost_least > i4_cost[1])
239 {
240 i4_cost_least = i4_cost[1];
241 i4_distortion_least = i4_sad[1];
242
243 i2_mv_u_x = (i2_mvx + 1);
244 i2_mv_u_y = i2_mvy;
245 }
246
247 if(i4_cost_least > i4_cost[2])
248 {
249 i4_cost_least = i4_cost[2];
250 i4_distortion_least = i4_sad[2];
251
252 i2_mv_u_x = i2_mvx;
253 i2_mv_u_y = i2_mvy - 1;
254 }
255
256 if(i4_cost_least > i4_cost[3])
257 {
258 i4_cost_least = i4_cost[3];
259 i4_distortion_least = i4_sad[3];
260
261 i2_mv_u_x = i2_mvx;
262 i2_mv_u_y = i2_mvy + 1;
263 }
264
265 if((i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
266 {
267 ps_mb_part->u4_exit = 1;
268 break;
269 }
270 else
271 {
272 i2_mvx = i2_mv_u_x;
273 i2_mvy = i2_mv_u_y;
274 }
275 }
276
277 if(i4_cost_least < ps_mb_part->i4_mb_cost)
278 {
279 ps_mb_part->i4_mb_cost = i4_cost_least;
280 ps_mb_part->i4_mb_distortion = i4_distortion_least;
281 ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
282 ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
283 }
284 }
285
286 /**
287 *******************************************************************************
288 *
289 * @brief This function computes the best motion vector among the tentative mv
290 * candidates chosen.
291 *
292 * @par Description:
293 * This function determines the position in the search window at which the
294 *motion estimation should begin in order to minimise the number of search
295 *iterations.
296 *
297 * @param[in] ps_mb_part
298 * pointer to current mb partition ctxt with respect to ME
299 *
300 * @param[in] u4_lambda_motion
301 * lambda motion
302 *
303 * @param[in] u4_fast_flag
304 * enable/disable fast sad computation
305 *
306 * @returns mv pair & corresponding distortion and cost
307 *
308 * @remarks none
309 *
310 *******************************************************************************
311 */
312
isvce_evaluate_init_srchposn_16x16(isvce_me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)313 static void isvce_evaluate_init_srchposn_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
314 {
315 UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
316
317 /* candidate mv cnt */
318 UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist];
319
320 /* list of candidate mvs */
321 ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist];
322
323 /* pointer to src macro block */
324 UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
325 UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
326
327 /* strides */
328 WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
329 WORD32 i4_ref_strd = ps_me_ctxt->ai4_rec_strd[i4_reflist];
330
331 /* enabled fast sad computation */
332 UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
333
334 /* SAD(distortion metric) of an 8x8 block */
335 WORD32 i4_mb_distortion;
336
337 /* cost = distortion + u4_lambda_motion * rate */
338 WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
339
340 /* mb partitions info */
341 mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]);
342
343 /* mv bits */
344 UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
345
346 /* temp var */
347 UWORD32 i, j;
348 WORD32 i4_srch_pos_idx = 0;
349 UWORD8 *pu1_ref = NULL;
350
351 /* Carry out a search using each of the motion vector pairs identified above
352 * as predictors. */
353 /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
354 for(i = 0; i < u4_num_candidates; i++)
355 {
356 /* compute sad */
357 WORD32 c_sad = 1;
358
359 for(j = 0; j < i; j++)
360 {
361 if((ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
362 (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy))
363 {
364 c_sad = 0;
365 break;
366 }
367 }
368 if(c_sad)
369 {
370 /* adjust ref pointer */
371 pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
372
373 /* compute distortion */
374 ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](
375 pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least,
376 &i4_mb_distortion);
377
378 DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
379 /* compute cost */
380 i4_mb_cost =
381 i4_mb_distortion +
382 u4_lambda_motion *
383 (pu1_mv_bits[(ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
384 pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
385
386 if(i4_mb_cost < i4_mb_cost_least)
387 {
388 i4_mb_cost_least = i4_mb_cost;
389
390 i4_distortion_least = i4_mb_distortion;
391
392 i4_srch_pos_idx = i;
393 }
394 }
395 }
396
397 if(i4_mb_cost_least < ps_mb_part->i4_mb_cost)
398 {
399 ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
400 ps_mb_part->i4_mb_cost = i4_mb_cost_least;
401 ps_mb_part->i4_mb_distortion = i4_distortion_least;
402 ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx;
403 ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy;
404 }
405 }
406
407 /**
408 *******************************************************************************
409 *
410 * @brief Searches for the best matching full pixel predictor within the search
411 * range
412 *
413 * @par Description:
414 * This function begins by computing the mv predict vector for the current mb.
415 * This is used for cost computations. Further basing on the algo. chosen, it
416 * looks through a set of candidate vectors that best represent the mb a least
417 * cost and returns this information.
418 *
419 * @param[in] ps_proc
420 * pointer to current proc ctxt
421 *
422 * @param[in] ps_me_ctxt
423 * pointer to me context
424 *
425 * @returns mv pair & corresponding distortion and cost
426 *
427 * @remarks none
428 *
429 *******************************************************************************
430 */
isvce_full_pel_motion_estimation_16x16(isvce_me_ctxt_t * ps_me_ctxt,WORD32 i4_ref_list)431 static void isvce_full_pel_motion_estimation_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_ref_list)
432 {
433 /* mb part info */
434 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list];
435
436 /******************************************************************/
437 /* Modify Search range about initial candidate instead of zero mv */
438 /******************************************************************/
439 /*
440 * FIXME: The motion vectors in a way can become unbounded. It may so happen
441 * that MV might exceed the limit of the profile configured.
442 */
443 ps_me_ctxt->i4_srch_range_w =
444 MAX(ps_me_ctxt->i4_srch_range_w,
445 -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
446 ps_me_ctxt->i4_srch_range_e =
447 MIN(ps_me_ctxt->i4_srch_range_e,
448 ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
449 ps_me_ctxt->i4_srch_range_n =
450 MAX(ps_me_ctxt->i4_srch_range_n,
451 -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
452 ps_me_ctxt->i4_srch_range_s =
453 MIN(ps_me_ctxt->i4_srch_range_s,
454 ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
455
456 /************************************************************/
457 /* Traverse about best initial candidate for mv */
458 /************************************************************/
459
460 switch(ps_me_ctxt->u4_me_speed_preset)
461 {
462 case DMND_SRCH:
463 isvce_diamond_search_16x16(ps_me_ctxt, i4_ref_list);
464 break;
465 default:
466 assert(0);
467 break;
468 }
469 }
470
471 /**
472 *******************************************************************************
473 *
474 * @brief Searches for the best matching sub pixel predictor within the search
475 * range
476 *
477 * @par Description:
478 * This function begins by searching across all sub pixel sample points
479 * around the full pel motion vector. The vector with least cost is chosen as
480 * the mv for the current mb. If the skip mode is not evaluated while analysing
481 * the initial search candidates then analyse it here and update the mv.
482 *
483 * @param[in] ps_proc
484 * pointer to current proc ctxt
485 *
486 * @param[in] ps_me_ctxt
487 * pointer to me context
488 *
489 * @returns none
490 *
491 * @remarks none
492 *
493 *******************************************************************************
494 */
isvce_sub_pel_motion_estimation_16x16(isvce_me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)495 static void isvce_sub_pel_motion_estimation_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
496 {
497 /* pointers to src & ref macro block */
498 UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
499
500 /* pointers to ref. half pel planes */
501 UWORD8 *pu1_ref_mb_half_x;
502 UWORD8 *pu1_ref_mb_half_y;
503 UWORD8 *pu1_ref_mb_half_xy;
504
505 /* pointers to ref. half pel planes */
506 UWORD8 *pu1_ref_mb_half_x_temp;
507 UWORD8 *pu1_ref_mb_half_y_temp;
508 UWORD8 *pu1_ref_mb_half_xy_temp;
509
510 /* strides */
511 WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
512
513 WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd;
514
515 /* mb partitions info */
516 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
517
518 /* SAD(distortion metric) of an mb */
519 WORD32 i4_mb_distortion;
520 WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
521
522 /* cost = distortion + u4_lambda_motion * rate */
523 WORD32 i4_mb_cost;
524 WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
525
526 /*Best half pel buffer*/
527 UWORD8 *pu1_best_hpel_buf = NULL;
528
529 /* mv bits */
530 UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
531
532 /* Motion vectors in full-pel units */
533 WORD16 mv_x, mv_y;
534
535 /* lambda - lagrange constant */
536 UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
537
538 /* Flags to check if half pel points needs to be evaluated */
539 /**************************************/
540 /* 1 bit for each half pel candidate */
541 /* bit 0 - half x = 1, half y = 0 */
542 /* bit 1 - half x = -1, half y = 0 */
543 /* bit 2 - half x = 0, half y = 1 */
544 /* bit 3 - half x = 0, half y = -1 */
545 /* bit 4 - half x = 1, half y = 1 */
546 /* bit 5 - half x = -1, half y = 1 */
547 /* bit 6 - half x = 1, half y = -1 */
548 /* bit 7 - half x = -1, half y = -1 */
549 /**************************************/
550 /* temp var */
551 WORD16 i2_mv_u_x, i2_mv_u_y;
552 WORD32 i, j;
553 WORD32 ai4_sad[8];
554
555 WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx;
556
557 i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
558 i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
559
560 /************************************************************/
561 /* Evaluate half pel */
562 /************************************************************/
563 mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
564 mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
565
566 /**************************************************************/
567 /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
568 /* left side of full pel */
569 /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
570 /* top side of full pel */
571 /* ps_me_ctxt->pu1_half_xy points to the half pel pixel */
572 /* on the top left side of full pel */
573 /* for the function pf_ime_sub_pel_compute_sad_16x16 the */
574 /* default postions are */
575 /* ps_me_ctxt->pu1_half_x = right halp_pel */
576 /* ps_me_ctxt->pu1_half_y = bottom halp_pel */
577 /* ps_me_ctxt->pu1_half_xy = bottom right halp_pel */
578 /* Hence corresponding adjustments made here */
579 /**************************************************************/
580
581 pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1;
582 pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd;
583 pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy =
584 ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd;
585
586 ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x, pu1_ref_mb_half_y,
587 pu1_ref_mb_half_xy, i4_src_strd, i4_ref_strd,
588 ai4_sad);
589
590 /* Half x plane */
591 for(i = 0; i < 2; i++)
592 {
593 WORD32 mv_x_tmp = (mv_x << 2) + 2;
594 WORD32 mv_y_tmp = (mv_y << 2);
595
596 mv_x_tmp -= (i * 4);
597
598 i4_mb_distortion = ai4_sad[i];
599
600 /* compute cost */
601 i4_mb_cost = i4_mb_distortion +
602 u4_lambda_motion * (pu1_mv_bits[mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] +
603 pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy]);
604
605 if(i4_mb_cost < i4_mb_cost_least)
606 {
607 i4_mb_cost_least = i4_mb_cost;
608
609 i4_distortion_least = i4_mb_distortion;
610
611 i2_mv_u_x = mv_x_tmp;
612
613 i2_mv_u_y = mv_y_tmp;
614
615 ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i;
616 pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
617
618 i4_srch_pos_idx = 0;
619 }
620 }
621
622 /* Half y plane */
623 for(i = 0; i < 2; i++)
624 {
625 WORD32 mv_x_tmp = (mv_x << 2);
626 WORD32 mv_y_tmp = (mv_y << 2) + 2;
627
628 mv_y_tmp -= (i * 4);
629
630 i4_mb_distortion = ai4_sad[2 + i];
631
632 /* compute cost */
633 i4_mb_cost = i4_mb_distortion +
634 u4_lambda_motion * (pu1_mv_bits[mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] +
635 pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy]);
636
637 if(i4_mb_cost < i4_mb_cost_least)
638 {
639 i4_mb_cost_least = i4_mb_cost;
640
641 i4_distortion_least = i4_mb_distortion;
642
643 i2_mv_u_x = mv_x_tmp;
644
645 i2_mv_u_y = mv_y_tmp;
646
647 ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp - i * (i4_ref_strd);
648 pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i * (i4_ref_strd);
649
650 i4_srch_pos_idx = 1;
651 }
652 }
653
654 /* Half xy plane */
655 for(j = 0; j < 2; j++)
656 {
657 for(i = 0; i < 2; i++)
658 {
659 WORD32 mv_x_tmp = (mv_x << 2) + 2;
660 WORD32 mv_y_tmp = (mv_y << 2) + 2;
661
662 mv_x_tmp -= (i * 4);
663 mv_y_tmp -= (j * 4);
664
665 i4_mb_distortion = ai4_sad[4 + i + 2 * j];
666
667 /* compute cost */
668 i4_mb_cost = i4_mb_distortion +
669 u4_lambda_motion * (pu1_mv_bits[mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] +
670 pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy]);
671
672 if(i4_mb_cost < i4_mb_cost_least)
673 {
674 i4_mb_cost_least = i4_mb_cost;
675
676 i4_distortion_least = i4_mb_distortion;
677
678 i2_mv_u_x = mv_x_tmp;
679
680 i2_mv_u_y = mv_y_tmp;
681
682 ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp - j * (i4_ref_strd) -i;
683 pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j * (i4_ref_strd) -i;
684
685 i4_srch_pos_idx = 2;
686 }
687 }
688 }
689
690 if(i4_mb_cost_least < ps_mb_part->i4_mb_cost)
691 {
692 ps_mb_part->i4_mb_cost = i4_mb_cost_least;
693 ps_mb_part->i4_mb_distortion = i4_distortion_least;
694 ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
695 ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
696 ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
697 ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
698 }
699 }
700
701 /**
702 *******************************************************************************
703 *
704 * @brief This function computes cost of skip macroblocks
705 *
706 * @par Description:
707 *
708 * @param[in] ps_me_ctxt
709 * pointer to me ctxt
710 *
711 *
712 * @returns none
713 *
714 * @remarks
715 * NOTE: while computing the skip cost, do not enable early exit from compute
716 * sad function because, a negative bias gets added later
717 * Note tha the last ME candidate in me ctxt is taken as skip motion vector
718 *
719 *******************************************************************************
720 */
isvce_compute_skip_cost(isvce_me_ctxt_t * ps_me_ctxt,ime_mv_t * ps_skip_mv,mb_part_ctxt * ps_smb_part_info,UWORD32 u4_use_stat_sad,WORD32 i4_reflist,WORD32 i4_is_slice_type_b)721 static void isvce_compute_skip_cost(isvce_me_ctxt_t *ps_me_ctxt, ime_mv_t *ps_skip_mv,
722 mb_part_ctxt *ps_smb_part_info, UWORD32 u4_use_stat_sad,
723 WORD32 i4_reflist, WORD32 i4_is_slice_type_b)
724 {
725 /* SAD(distortion metric) of an mb */
726 WORD32 i4_mb_distortion;
727
728 /* cost = distortion + u4_lambda_motion * rate */
729 WORD32 i4_mb_cost;
730
731 /* temp var */
732 UWORD8 *pu1_ref = NULL;
733
734 ime_mv_t s_skip_mv;
735
736 s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx + 2) >> 2;
737 s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy + 2) >> 2;
738
739 /* Check if the skip mv is out of bounds or subpel */
740 {
741 /* skip mv */
742 ime_mv_t s_clip_skip_mv;
743
744 s_clip_skip_mv.i2_mvx =
745 CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx);
746 s_clip_skip_mv.i2_mvy =
747 CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy);
748
749 if((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) ||
750 (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) || (ps_skip_mv->i2_mvx & 0x3) ||
751 (ps_skip_mv->i2_mvy & 0x3))
752 {
753 return;
754 }
755 }
756
757 /* adjust ref pointer */
758 pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx +
759 (s_skip_mv.i2_mvy * ps_me_ctxt->ai4_rec_strd[i4_reflist]);
760
761 if(u4_use_stat_sad == 1)
762 {
763 UWORD32 u4_is_nonzero;
764
765 ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(
766 ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
767 ps_me_ctxt->ai4_rec_strd[i4_reflist], ps_me_ctxt->pu2_sad_thrsh, &i4_mb_distortion,
768 &u4_is_nonzero);
769
770 if(u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
771 {
772 ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
773 ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion;
774 }
775 }
776 else
777 {
778 ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad](
779 ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
780 ps_me_ctxt->ai4_rec_strd[i4_reflist], INT_MAX, &i4_mb_distortion);
781
782 if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
783 {
784 ps_me_ctxt->i4_min_sad = i4_mb_distortion;
785 ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
786 }
787 }
788
789 /* for skip mode cost & distortion are identical
790 * But we shall add a bias to favor skip mode.
791 * Doc. JVT B118 Suggests SKIP_BIAS as 16.
792 * TODO : Empirical analysis of SKIP_BIAS is necessary */
793
794 i4_mb_cost = i4_mb_distortion -
795 (ps_me_ctxt->u4_lambda_motion *
796 (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1] * i4_is_slice_type_b));
797
798 if(i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
799 {
800 ps_smb_part_info->i4_mb_cost = i4_mb_cost;
801 ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
802 ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx;
803 ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy;
804 }
805 }
806
807 /**
808 *******************************************************************************
809 *
810 * @brief
811 * This function populates the length of the codewords for motion vectors in the
812 * range (-search range, search range) in pixels
813 *
814 * @param[in] ps_me
815 * Pointer to me ctxt
816 *
817 * @param[out] pu1_mv_bits
818 * length of the codeword for all mv's
819 *
820 * @remarks The length of the code words are derived from signed exponential
821 * goloumb codes.
822 *
823 *******************************************************************************
824 */
isvce_init_mv_bits(isvce_me_ctxt_t * ps_me_ctxt)825 void isvce_init_mv_bits(isvce_me_ctxt_t *ps_me_ctxt)
826 {
827 /* temp var */
828 WORD32 i, codesize = 3, diff, limit;
829 UWORD32 u4_code_num, u4_range;
830 UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
831
832 /* max srch range */
833 diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
834 /* sub pel */
835 diff <<= 2;
836 /* delta mv */
837 diff <<= 1;
838
839 /* codeNum for positive integer = 2x-1 : Table9-3 */
840 u4_code_num = (diff << 1);
841
842 /* get range of the bit string and put using put_bits() */
843 GETRANGE(u4_range, u4_code_num);
844
845 limit = 2 * u4_range - 1;
846
847 /* init mv bits */
848 ps_me_ctxt->pu1_mv_bits[0] = 1;
849
850 while(codesize < limit)
851 {
852 u4_uev_min = (1 << (codesize >> 1));
853 u4_uev_max = 2 * u4_uev_min - 1;
854
855 u4_sev_min = u4_uev_min >> 1;
856 u4_sev_max = u4_uev_max >> 1;
857
858 DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
859
860 for(i = u4_sev_min; i <= (WORD32) u4_sev_max; i++)
861 {
862 ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
863 }
864
865 codesize += 2;
866 }
867 }
868
869 /**
870 *******************************************************************************
871 *
872 * @brief Adds valid MVs as initial search candidates for motion estimation by
873 * cheking if it is distinct or not.
874 *
875 * @param[in] ps_search_cand
876 * MV to add as search candidate
877 *
878 * @param[in] ps_me_ctxt
879 * pointer to ME context
880 *
881 * @param[in] u4_num_candidates
882 * Number of inital search candidates value
883 *
884 *******************************************************************************
885 */
isvce_add_me_init_search_cands(mv_t * ps_search_cand,isvce_me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist,UWORD32 * u4_num_candidates,bool b_is_max_mv_diff_lt_4)886 static FORCEINLINE void isvce_add_me_init_search_cands(mv_t *ps_search_cand,
887 isvce_me_ctxt_t *ps_me_ctxt,
888 WORD32 i4_reflist,
889 UWORD32 *u4_num_candidates,
890 bool b_is_max_mv_diff_lt_4)
891 {
892 WORD32 k;
893 WORD32 i4_mv_x, i4_mv_y;
894
895 bool b_is_mv_identical = false;
896
897 WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
898 WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
899 WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
900 WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
901 UWORD32 u4_num_init_search_cands = u4_num_candidates[0];
902
903 i4_mv_x = (ps_search_cand->i2_mvx + 2) >> 2;
904 i4_mv_y = (ps_search_cand->i2_mvy + 2) >> 2;
905
906 i4_mv_x = CLIP3(i4_srch_range_w, i4_srch_range_e, i4_mv_x);
907 i4_mv_y = CLIP3(i4_srch_range_n, i4_srch_range_s, i4_mv_y);
908
909 if(u4_num_init_search_cands == 0)
910 {
911 b_is_mv_identical = false;
912 }
913 else
914 {
915 for(k = u4_num_init_search_cands - 1; k >= 0; k--)
916 {
917 if((ps_me_ctxt->as_mv_init_search[i4_reflist][k].i2_mvx == i4_mv_x &&
918 ps_me_ctxt->as_mv_init_search[i4_reflist][k].i2_mvy == i4_mv_y))
919 {
920 b_is_mv_identical = true;
921 }
922 }
923 }
924
925 if(!b_is_mv_identical)
926 {
927 if(USE_ILP_MV_IN_ME && ps_me_ctxt->ps_ilp_me_cands)
928 {
929 if(ps_me_ctxt->ps_ilp_me_cands->u4_num_ilp_mvs < 2 || b_is_max_mv_diff_lt_4)
930 {
931 if(u4_num_init_search_cands < MAX_CAND_IF_NUM_ILP_MV_LT_2)
932 {
933 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvx =
934 i4_mv_x;
935 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvy =
936 i4_mv_y;
937
938 u4_num_candidates[0] += 1;
939 }
940 }
941 else if(ps_me_ctxt->ps_ilp_me_cands->u4_num_ilp_mvs >= 2 && !b_is_max_mv_diff_lt_4)
942 {
943 if(u4_num_init_search_cands < MAX_CAND_IF_NUM_ILP_MV_GTEQ_2)
944 {
945 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvx =
946 i4_mv_x;
947 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvy =
948 i4_mv_y;
949
950 u4_num_candidates[0] += 1;
951 }
952 }
953 }
954 else
955 {
956 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvx = i4_mv_x;
957 ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvy = i4_mv_y;
958
959 u4_num_candidates[0] += 1;
960 }
961 }
962 }
963
964 /**
965 *******************************************************************************
966 *
967 * @brief Determines the valid candidates for which the initial search shall
968 *happen. The best of these candidates is used to center the diamond pixel
969 *search.
970 *
971 * @par Description: The function sends the skip, (0,0), left, top and top-right
972 * neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
973 * these are the same MVs that are used to form the MV predictor. This initial MV
974 * search candidates need not take care of slice boundaries and hence neighbor
975 * availability checks are not made here.
976 *
977 * @param[in] ps_left_mb_pu
978 * pointer to left mb motion vector info
979 *
980 * @param[in] ps_top_mb_pu
981 * pointer to top & top right mb motion vector info
982 *
983 * @param[in] ps_top_left_mb_pu
984 * pointer to top left mb motion vector info
985 *
986 * @param[out] ps_skip_mv
987 * pointer to skip motion vectors for the curr mb
988 *
989 * @param[in] i4_mb_x
990 * mb index x
991 *
992 * @param[in] i4_mb_y
993 * mb index y
994 *
995 * @param[in] i4_wd_mbs
996 * pic width in mbs
997 *
998 * @param[in] ps_motionEst
999 * pointer to me context
1000 *
1001 * @returns The list of MVs to be used of priming the full pel search and the
1002 * number of such MVs
1003 *
1004 * @remarks
1005 * Assumptions : 1. Assumes Only partition of size 16x16
1006 *
1007 *******************************************************************************
1008 */
isvce_get_search_candidates(isvce_process_ctxt_t * ps_proc,isvce_me_ctxt_t * ps_me_ctxt,WORD32 i4_reflist)1009 static void isvce_get_search_candidates(isvce_process_ctxt_t *ps_proc, isvce_me_ctxt_t *ps_me_ctxt,
1010 WORD32 i4_reflist)
1011 {
1012 mv_t s_zero_mv;
1013 mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv;
1014
1015 UWORD32 i;
1016 WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode;
1017
1018 isvce_codec_t *ps_codec = ps_proc->ps_codec;
1019 block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1020 mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
1021 ilp_me_cands_t *ps_ilp_me_cands = ps_me_ctxt->ps_ilp_me_cands;
1022
1023 bool b_is_max_mv_diff_lt_4 = false;
1024 WORD32 i4_mb_x = ps_proc->i4_mb_x;
1025 WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? L1 : L0;
1026 UWORD32 u4_num_candidates = 0;
1027
1028 s_zero_mv.i2_mvx = 0;
1029 s_zero_mv.i2_mvy = 0;
1030 ps_left_mv = &ps_proc->s_nbr_info.ps_left_mb_info->as_pu->as_me_info[i4_reflist].s_mv;
1031 ps_top_mv =
1032 &(ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x)->as_pu->as_me_info[i4_reflist].s_mv;
1033 ps_top_left_mv = &ps_proc->s_nbr_info.ps_top_row_mb_info->as_pu->as_me_info[i4_reflist].s_mv;
1034 ps_top_right_mv =
1035 &(ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x + 1)->as_pu->as_me_info[i4_reflist].s_mv;
1036
1037 i4_left_mode =
1038 ps_ngbr_avbl->u1_mb_a
1039 ? (ps_proc->s_nbr_info.ps_left_mb_info->as_pu->u1_pred_mode != i4_cmpl_predmode)
1040 : 0;
1041 i4_top_mode = ps_ngbr_avbl->u1_mb_b
1042 ? ((ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x)->as_pu->u1_pred_mode !=
1043 i4_cmpl_predmode)
1044 : 0;
1045 i4_top_right_mode =
1046 ps_ngbr_avbl->u1_mb_c
1047 ? ((ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x + 1)->as_pu->u1_pred_mode !=
1048 i4_cmpl_predmode)
1049 : 0;
1050 i4_top_left_mode =
1051 ps_ngbr_avbl->u1_mb_d
1052 ? ((ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x - 1)->as_pu->u1_pred_mode !=
1053 i4_cmpl_predmode)
1054 : 0;
1055
1056 if(USE_ILP_MV_IN_ME && ps_ilp_me_cands)
1057 {
1058 if(ps_ilp_me_cands->u4_num_ilp_mvs >= 2)
1059 {
1060 b_is_max_mv_diff_lt_4 = isvce_check_max_mv_diff_lt_4(ps_ilp_me_cands, i4_reflist);
1061 }
1062
1063 /* Taking ILP MV Predictor as one of the candidates */
1064 if(ps_ilp_me_cands->u4_num_ilp_mvs < 2 || b_is_max_mv_diff_lt_4)
1065 {
1066 for(i = 0; i < ps_ilp_me_cands->u4_num_ilp_mvs_incl_nbrs; i++)
1067 {
1068 if(((ps_ilp_me_cands->ae_pred_mode[i] == ((PRED_MODE_T) i4_reflist)) ||
1069 ((ps_ilp_me_cands->ae_pred_mode[i] == BI))))
1070 {
1071 isvce_add_me_init_search_cands(&ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv,
1072 ps_me_ctxt, i4_reflist, &u4_num_candidates,
1073 b_is_max_mv_diff_lt_4);
1074 }
1075 }
1076 }
1077 }
1078
1079 /* Taking the Top MV Predictor as one of the candidates */
1080 if(ps_ngbr_avbl->u1_mb_b && i4_top_mode)
1081 {
1082 isvce_add_me_init_search_cands(ps_top_mv, ps_me_ctxt, i4_reflist, &u4_num_candidates,
1083 b_is_max_mv_diff_lt_4);
1084 }
1085
1086 /* Taking the Left MV Predictor as one of the candidates */
1087 if(ps_ngbr_avbl->u1_mb_a && i4_left_mode)
1088 {
1089 isvce_add_me_init_search_cands(ps_left_mv, ps_me_ctxt, i4_reflist, &u4_num_candidates,
1090 b_is_max_mv_diff_lt_4);
1091 }
1092
1093 /********************************************************************/
1094 /* MV Prediction */
1095 /********************************************************************/
1096 isvce_mv_pred_me(ps_proc, i4_reflist);
1097
1098 ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx;
1099 ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy;
1100
1101 /* Get the skip motion vector */
1102 {
1103 ps_me_ctxt->i4_skip_type =
1104 ps_codec->apf_find_skip_params_me[ps_proc->i4_slice_type](ps_proc, i4_reflist);
1105
1106 /* Taking the Skip motion vector as one of the candidates */
1107 isvce_add_me_init_search_cands(&ps_proc->ps_skip_mv[i4_reflist].s_mv, ps_me_ctxt,
1108 i4_reflist, &u4_num_candidates, b_is_max_mv_diff_lt_4);
1109
1110 if(ps_proc->i4_slice_type == BSLICE)
1111 {
1112 /* Taking the temporal Skip motion vector as one of the candidates */
1113 isvce_add_me_init_search_cands(&ps_proc->ps_skip_mv[i4_reflist + 2].s_mv, ps_me_ctxt,
1114 i4_reflist, &u4_num_candidates, b_is_max_mv_diff_lt_4);
1115 }
1116 }
1117
1118 /* Taking ILP MV Predictor as one of the candidates */
1119 if(USE_ILP_MV_IN_ME && ps_ilp_me_cands &&
1120 (ps_ilp_me_cands->u4_num_ilp_mvs >= 2 && !b_is_max_mv_diff_lt_4))
1121 {
1122 for(i = 0; i < ps_ilp_me_cands->u4_num_ilp_mvs_incl_nbrs; i++)
1123 {
1124 if(((ps_ilp_me_cands->ae_pred_mode[i] == ((PRED_MODE_T) i4_reflist)) ||
1125 ((ps_ilp_me_cands->ae_pred_mode[i] == BI))))
1126 {
1127 isvce_add_me_init_search_cands(&ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv,
1128 ps_me_ctxt, i4_reflist, &u4_num_candidates,
1129 b_is_max_mv_diff_lt_4);
1130 }
1131 }
1132 }
1133
1134 if(ps_ngbr_avbl->u1_mb_b && i4_top_mode)
1135 {
1136 /* Taking the TopRt MV Predictor as one of the candidates */
1137 if(ps_ngbr_avbl->u1_mb_c && i4_top_right_mode)
1138 {
1139 isvce_add_me_init_search_cands(ps_top_right_mv, ps_me_ctxt, i4_reflist,
1140 &u4_num_candidates, b_is_max_mv_diff_lt_4);
1141 }
1142
1143 /* Taking the TopLt MV Predictor as one of the candidates */
1144 else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode)
1145 {
1146 isvce_add_me_init_search_cands(ps_top_left_mv, ps_me_ctxt, i4_reflist,
1147 &u4_num_candidates, b_is_max_mv_diff_lt_4);
1148 }
1149 }
1150
1151 /* Taking the Zero motion vector as one of the candidates */
1152 isvce_add_me_init_search_cands(&s_zero_mv, ps_me_ctxt, i4_reflist, &u4_num_candidates,
1153 b_is_max_mv_diff_lt_4);
1154
1155 ASSERT(u4_num_candidates <= MAX_FPEL_SEARCH_CANDIDATES);
1156
1157 ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates;
1158 }
1159
1160 /**
1161 *******************************************************************************
1162 *
1163 * @brief The function computes parameters for a PSKIP MB
1164 *
1165 * @par Description:
1166 * The function updates the skip motion vector and checks if the current
1167 * MB can be a skip PSKIP mB or not
1168 *
1169 * @param[in] ps_proc
1170 * Pointer to process context
1171 *
1172 * @param[in] u4_for_me
1173 * Flag to indicate function is called for ME or not
1174 *
1175 * @param[out] i4_ref_list
1176 * Current active refernce list
1177 *
1178 * @returns Flag indicating if the current MB can be marked as skip
1179 *
1180 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
1181 * specification.
1182 *
1183 *******************************************************************************
1184 */
isvce_find_pskip_params(isvce_process_ctxt_t * ps_proc,WORD32 i4_reflist)1185 WORD32 isvce_find_pskip_params(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist)
1186 {
1187 /* left mb motion vector */
1188 isvce_enc_pu_t *ps_left_mb_pu;
1189
1190 /* top mb motion vector */
1191 isvce_enc_pu_t *ps_top_mb_pu;
1192
1193 /* Skip mv */
1194 mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[L0].s_mv;
1195
1196 UNUSED(i4_reflist);
1197
1198 ps_left_mb_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu;
1199 ps_top_mb_pu = (ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x)->as_pu;
1200
1201 if((!ps_proc->ps_ngbr_avbl->u1_mb_a) || (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
1202 ((ps_left_mb_pu->as_me_info[L0].i1_ref_idx == 0) &&
1203 (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) &&
1204 (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)) ||
1205 ((ps_top_mb_pu->as_me_info[L0].i1_ref_idx == 0) &&
1206 (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) &&
1207 (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)))
1208
1209 {
1210 ps_skip_mv->i2_mvx = 0;
1211 ps_skip_mv->i2_mvy = 0;
1212 }
1213 else
1214 {
1215 ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[L0].s_mv.i2_mvx;
1216 ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[L0].s_mv.i2_mvy;
1217 }
1218
1219 if((ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx) &&
1220 (ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy))
1221 {
1222 return 1;
1223 }
1224
1225 return 0;
1226 }
1227
1228 /**
1229 *******************************************************************************
1230 *
1231 * @brief The function computes parameters for a PSKIP MB
1232 *
1233 * @par Description:
1234 * The function updates the skip motion vector and checks if the current
1235 * MB can be a skip PSKIP mB or not
1236 *
1237 * @param[in] ps_proc
1238 * Pointer to process context
1239 *
1240 * @param[in] u4_for_me
1241 * Flag to dincate fucntion is called for ME or not
1242 *
1243 * @param[out] i4_ref_list
1244 * Current active refernce list
1245 *
1246 * @returns Flag indicating if the current MB can be marked as skip
1247 *
1248 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
1249 * specification.
1250 *
1251 *******************************************************************************
1252 */
isvce_find_pskip_params_me(isvce_process_ctxt_t * ps_proc,WORD32 i4_reflist)1253 WORD32 isvce_find_pskip_params_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist)
1254 {
1255 /* left mb motion vector */
1256 isvce_enc_pu_t *ps_left_mb_pu;
1257
1258 /* top mb motion vector */
1259 isvce_enc_pu_t *ps_top_mb_pu;
1260
1261 /* Skip mv */
1262 mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[L0].s_mv;
1263
1264 UNUSED(i4_reflist);
1265
1266 ps_left_mb_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu;
1267 ps_top_mb_pu = (ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x)->as_pu;
1268
1269 if((!ps_proc->ps_ngbr_avbl->u1_mb_a) || (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
1270 ((ps_left_mb_pu->as_me_info[L0].i1_ref_idx == 0) &&
1271 (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) &&
1272 (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)) ||
1273 ((ps_top_mb_pu->as_me_info[L0].i1_ref_idx == 0) &&
1274 (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) &&
1275 (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)))
1276
1277 {
1278 ps_skip_mv->i2_mvx = 0;
1279 ps_skip_mv->i2_mvy = 0;
1280 }
1281 else
1282 {
1283 ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[L0].s_mv.i2_mvx;
1284 ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[L0].s_mv.i2_mvy;
1285 }
1286
1287 return L0;
1288 }
1289
1290 /**
1291 *******************************************************************************
1292 *
1293 * @brief motion vector predictor
1294 *
1295 * @par Description:
1296 * The routine calculates the motion vector predictor for a given block,
1297 * given the candidate MV predictors.
1298 *
1299 * @param[in] ps_left_mb_pu
1300 * pointer to left mb motion vector info
1301 *
1302 * @param[in] ps_top_row_pu
1303 * pointer to top & top right mb motion vector info
1304 *
1305 * @param[out] ps_pred_mv
1306 * pointer to candidate predictors for the current block
1307 *
1308 * @returns The x & y components of the MV predictor.
1309 *
1310 * @remarks The code implements the logic as described in sec 8.4.1.3 in H264
1311 * specification.
1312 * Assumptions : 1. Assumes Single reference frame
1313 * 2. Assumes Only partition of size 16x16
1314 *
1315 *******************************************************************************
1316 */
isvce_get_mv_predictor(isvce_enc_pu_mv_t * ps_pred_mv,isvce_enc_pu_mv_t * ps_neig_mv,WORD32 pred_algo)1317 void isvce_get_mv_predictor(isvce_enc_pu_mv_t *ps_pred_mv, isvce_enc_pu_mv_t *ps_neig_mv,
1318 WORD32 pred_algo)
1319 {
1320 switch(pred_algo)
1321 {
1322 case 0:
1323 /* left */
1324 ps_pred_mv->s_mv.i2_mvx = ps_neig_mv[0].s_mv.i2_mvx;
1325 ps_pred_mv->s_mv.i2_mvy = ps_neig_mv[0].s_mv.i2_mvy;
1326 break;
1327 case 1:
1328 /* top */
1329 ps_pred_mv->s_mv.i2_mvx = ps_neig_mv[1].s_mv.i2_mvx;
1330 ps_pred_mv->s_mv.i2_mvy = ps_neig_mv[1].s_mv.i2_mvy;
1331 break;
1332 case 2:
1333 /* top right */
1334 ps_pred_mv->s_mv.i2_mvx = ps_neig_mv[2].s_mv.i2_mvx;
1335 ps_pred_mv->s_mv.i2_mvy = ps_neig_mv[2].s_mv.i2_mvy;
1336 break;
1337 case 3:
1338 /* median */
1339 MEDIAN(ps_neig_mv[0].s_mv.i2_mvx, ps_neig_mv[1].s_mv.i2_mvx, ps_neig_mv[2].s_mv.i2_mvx,
1340 ps_pred_mv->s_mv.i2_mvx);
1341 MEDIAN(ps_neig_mv[0].s_mv.i2_mvy, ps_neig_mv[1].s_mv.i2_mvy, ps_neig_mv[2].s_mv.i2_mvy,
1342 ps_pred_mv->s_mv.i2_mvy);
1343
1344 break;
1345 default:
1346 break;
1347 }
1348 }
1349
1350 /**
1351 *******************************************************************************
1352 *
1353 * @brief This function performs MV prediction
1354 *
1355 * @par Description:
1356 *
1357 * @param[in] ps_proc
1358 * Process context corresponding to the job
1359 *
1360 * @returns none
1361 *
1362 * @remarks none
1363 * This function will update the MB availability since intra inter decision
1364 * should be done before the call
1365 *
1366 *******************************************************************************
1367 */
isvce_mv_pred(isvce_process_ctxt_t * ps_proc,WORD32 i4_slice_type)1368 void isvce_mv_pred(isvce_process_ctxt_t *ps_proc, WORD32 i4_slice_type)
1369 {
1370 isvce_enc_pu_mv_t as_pu_mv[3];
1371
1372 UWORD8 u1_reflist, u1_cmpl_predmode;
1373 WORD32 i;
1374
1375 isvce_enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
1376 isvce_enc_pu_mv_t s_default_mv_info = {{0, 0}, -1};
1377 block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1378 isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
1379 isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1;
1380 isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1;
1381 isvce_mb_info_t *ps_left_mb = ps_proc->s_nbr_info.ps_left_mb_info;
1382
1383 UWORD8 u1_left_is_intra = ps_left_mb->u1_is_intra;
1384 UWORD8 u1_num_ref_lists = (i4_slice_type == PSLICE) ? 1 : 2;
1385
1386 for(u1_reflist = 0; u1_reflist < u1_num_ref_lists; u1_reflist++)
1387 {
1388 WORD8 i1_cur_ref_idx = 0;
1389
1390 WORD32 pred_algo = 3, a, b, c;
1391
1392 for(i = 0; i < 3; i++)
1393 {
1394 as_pu_mv[i] = s_default_mv_info;
1395 }
1396
1397 u1_cmpl_predmode = (u1_reflist == 0) ? L1 : L0;
1398
1399 /* Before performing mv prediction prepare the ngbr information and
1400 * reset motion vectors basing on their availability */
1401 if(ps_ngbr_avbl->u1_mb_a && (u1_left_is_intra != 1) &&
1402 (ps_left_mb->as_pu->u1_pred_mode != u1_cmpl_predmode))
1403 {
1404 /* left mv */
1405 as_pu_mv[0].s_mv = ps_left_mb->as_pu->as_me_info[u1_reflist].s_mv;
1406 as_pu_mv[0].i1_ref_idx = ps_left_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx;
1407
1408 /* Only left available */
1409 if(!ps_ngbr_avbl->u1_mb_b && !ps_ngbr_avbl->u1_mb_c && !ps_ngbr_avbl->u1_mb_d)
1410 {
1411 as_pu_mv[1].s_mv = ps_left_mb->as_pu->as_me_info[u1_reflist].s_mv;
1412 as_pu_mv[1].i1_ref_idx = ps_left_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx;
1413
1414 as_pu_mv[2].s_mv = ps_left_mb->as_pu->as_me_info[u1_reflist].s_mv;
1415 as_pu_mv[2].i1_ref_idx = ps_left_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx;
1416 }
1417 }
1418 if(ps_ngbr_avbl->u1_mb_b && !ps_top_mb->u1_is_intra &&
1419 (ps_top_mb->as_pu[0].u1_pred_mode != u1_cmpl_predmode))
1420 {
1421 /* top mv */
1422 as_pu_mv[1].s_mv = ps_top_mb->as_pu[0].as_me_info[u1_reflist].s_mv;
1423 as_pu_mv[1].i1_ref_idx = ps_top_mb->as_pu[0].as_me_info[u1_reflist].i1_ref_idx;
1424 }
1425
1426 if(!ps_ngbr_avbl->u1_mb_c)
1427 {
1428 /* top right mv - When top right partition is not available for
1429 * prediction if top left is available use it for prediction else
1430 * set the mv information to -1 and (0, 0)
1431 * */
1432 if(ps_ngbr_avbl->u1_mb_d && !ps_top_left_mb->u1_is_intra &&
1433 (ps_top_left_mb->as_pu->u1_pred_mode != u1_cmpl_predmode))
1434 {
1435 as_pu_mv[2].s_mv = ps_top_left_mb->as_pu[0].as_me_info[u1_reflist].s_mv;
1436 as_pu_mv[2].i1_ref_idx = ps_top_left_mb->as_pu[0].as_me_info[u1_reflist].i1_ref_idx;
1437 }
1438 }
1439 else if(ps_top_right_mb->as_pu->u1_pred_mode != u1_cmpl_predmode &&
1440 !ps_top_right_mb->u1_is_intra)
1441 {
1442 as_pu_mv[2].s_mv = ps_top_right_mb->as_pu->as_me_info[u1_reflist].s_mv;
1443 as_pu_mv[2].i1_ref_idx = ps_top_right_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx;
1444 }
1445
1446 /* If only one of the candidate blocks has a reference frame equal to
1447 * the current block then use the same block as the final predictor */
1448 a = (as_pu_mv[0].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1449 b = (as_pu_mv[1].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1450 c = (as_pu_mv[2].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1451 if(a == 0 && b == -1 && c == -1)
1452 pred_algo = 0; /* LEFT */
1453 else if(a == -1 && b == 0 && c == -1)
1454 pred_algo = 1; /* TOP */
1455 else if(a == -1 && b == -1 && c == 0)
1456 pred_algo = 2;
1457
1458 isvce_get_mv_predictor(&ps_pred_mv[u1_reflist], &as_pu_mv[0], pred_algo);
1459
1460 ps_pred_mv[u1_reflist].i1_ref_idx = i1_cur_ref_idx;
1461 }
1462 }
1463
1464 /**
1465 *******************************************************************************
1466 *
1467 * @brief This function approximates Pred. MV
1468 *
1469 * @par Description:
1470 *
1471 * @param[in] ps_proc
1472 * Process context corresponding to the job
1473 *
1474 * @returns none
1475 *
1476 * @remarks none
1477 * Motion estimation happens at nmb level. For cost calculations, mv is appro
1478 * ximated using this function
1479 *
1480 *******************************************************************************
1481 */
isvce_mv_pred_me(isvce_process_ctxt_t * ps_proc,WORD32 i4_ref_list)1482 void isvce_mv_pred_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_ref_list)
1483 {
1484 isvce_enc_pu_mv_t as_pu_mv[3];
1485
1486 WORD32 i, a, b, c;
1487
1488 isvce_enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
1489 isvce_enc_pu_mv_t s_default_mv_info = {{0, 0}, -1};
1490 block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1491 isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
1492 isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1;
1493 isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1;
1494 isvce_mb_info_t *ps_left_mb = ps_proc->s_nbr_info.ps_left_mb_info;
1495
1496 WORD8 i1_cur_ref_idx = 0;
1497 WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? L1 : L0;
1498 WORD32 pred_algo = 3;
1499
1500 for(i = 0; i < 3; i++)
1501 {
1502 as_pu_mv[i] = s_default_mv_info;
1503 }
1504
1505 if(ps_ngbr_avbl->u1_mb_a && !ps_left_mb->u1_is_intra &&
1506 (ps_left_mb->as_pu->u1_pred_mode != i4_cmpl_predmode))
1507 {
1508 /* left mv */
1509 as_pu_mv[0].s_mv = ps_left_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1510 as_pu_mv[0].i1_ref_idx = ps_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1511
1512 /* Only left available */
1513 if(!ps_ngbr_avbl->u1_mb_b && !ps_ngbr_avbl->u1_mb_c && !ps_ngbr_avbl->u1_mb_d)
1514 {
1515 as_pu_mv[1].s_mv = ps_left_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1516 as_pu_mv[1].i1_ref_idx = ps_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1517
1518 as_pu_mv[2].s_mv = ps_left_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1519 as_pu_mv[2].i1_ref_idx = ps_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1520 }
1521 }
1522 if(ps_ngbr_avbl->u1_mb_b && !ps_top_mb->u1_is_intra &&
1523 (ps_top_mb->as_pu->u1_pred_mode != i4_cmpl_predmode))
1524 {
1525 /* top mv */
1526 as_pu_mv[1].s_mv = ps_top_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1527 as_pu_mv[1].i1_ref_idx = ps_top_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1528 }
1529 if(!ps_ngbr_avbl->u1_mb_c)
1530 {
1531 /* top right mv - When top right partition is not available for
1532 * prediction if top left is available use it for prediction else
1533 * set the mv information to -1 and (0, 0)
1534 * */
1535 if(ps_ngbr_avbl->u1_mb_d && !ps_top_left_mb->u1_is_intra &&
1536 (ps_top_left_mb->as_pu->u1_pred_mode != i4_cmpl_predmode))
1537 {
1538 as_pu_mv[2].s_mv = ps_top_left_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1539 as_pu_mv[2].i1_ref_idx = ps_top_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1540 }
1541 }
1542 else if(ps_top_right_mb->as_pu->u1_pred_mode != i4_cmpl_predmode &&
1543 !ps_top_right_mb->u1_is_intra)
1544 {
1545 as_pu_mv[2].s_mv = ps_top_right_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1546 as_pu_mv[2].i1_ref_idx = ps_top_right_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1547 }
1548
1549 /* If only one of the candidate blocks has a reference frame equal to
1550 * the current block then use the same block as the final predictor */
1551 a = (as_pu_mv[0].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1552 b = (as_pu_mv[1].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1553 c = (as_pu_mv[2].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1554
1555 if(a == 0 && b == -1 && c == -1)
1556 pred_algo = 0; /* LEFT */
1557 else if(a == -1 && b == 0 && c == -1)
1558 pred_algo = 1; /* TOP */
1559 else if(a == -1 && b == -1 && c == 0)
1560 pred_algo = 2;
1561
1562 isvce_get_mv_predictor(&ps_pred_mv[i4_ref_list], &as_pu_mv[0], pred_algo);
1563 }
1564
1565 /**
1566 *******************************************************************************
1567 *
1568 * @brief This function initializes me ctxt
1569 *
1570 * @par Description:
1571 * Before dispatching the current job to me thread, the me context associated
1572 * with the job is initialized.
1573 *
1574 * @param[in] ps_proc
1575 * Process context corresponding to the job
1576 *
1577 * @returns none
1578 *
1579 * @remarks none
1580 *
1581 *******************************************************************************
1582 */
isvce_init_me(isvce_process_ctxt_t * ps_proc)1583 void isvce_init_me(isvce_process_ctxt_t *ps_proc)
1584 {
1585 isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
1586 isvce_codec_t *ps_codec = ps_proc->ps_codec;
1587
1588 ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B;
1589
1590 if(ps_codec->s_cfg.u4_num_bframes == 0)
1591 {
1592 ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P;
1593 }
1594 else
1595 {
1596 ps_me_ctxt->i4_skip_bias[PSLICE] = SKIP_BIAS_P;
1597 }
1598
1599 ps_me_ctxt->pu1_src_buf_luma = ps_proc->s_src_buf_props.as_component_bufs[0].pv_data;
1600 ps_me_ctxt->i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
1601
1602 ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
1603 ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data;
1604
1605 ps_me_ctxt->ai4_rec_strd[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].i4_data_stride;
1606 ps_me_ctxt->ai4_rec_strd[1] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].i4_data_stride;
1607
1608 ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp];
1609 }
1610
1611 /**
1612 *******************************************************************************
1613 *
1614 * @brief This function performs motion estimation for the current mb using
1615 * single reference list
1616 *
1617 * @par Description:
1618 * The current mb is compared with a list of mb's in the reference frame for
1619 * least cost. The mb that offers least cost is chosen as predicted mb and the
1620 * displacement of the predicted mb from index location of the current mb is
1621 * signaled as mv. The list of the mb's that are chosen in the reference frame
1622 * are dependent on the speed of the ME configured.
1623 *
1624 * @param[in] ps_proc
1625 * Process context corresponding to the job
1626 *
1627 * @returns motion vector of the pred mb, sad, cost.
1628 *
1629 * @remarks none
1630 *
1631 *******************************************************************************
1632 */
isvce_compute_me_single_reflist(isvce_process_ctxt_t * ps_proc)1633 void isvce_compute_me_single_reflist(isvce_process_ctxt_t *ps_proc)
1634 {
1635 mb_part_ctxt s_skip_mbpart;
1636
1637 /* source buffer for halp pel generation functions */
1638 UWORD8 *pu1_hpel_src;
1639
1640 isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
1641 isvce_codec_t *ps_codec = ps_proc->ps_codec;
1642 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1643 isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
1644 inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
1645
1646 ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
1647
1648 ASSERT(1 == MAX_REF_FRAMES_PER_PRED_DIR);
1649
1650 {
1651 WORD32 rows_above, rows_below, columns_left, columns_right;
1652
1653 /* During evaluation for motion vectors do not search through padded regions
1654 */
1655 /* Obtain number of rows and columns that are effective for computing for me
1656 * evaluation */
1657 rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
1658 rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
1659 columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
1660 columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
1661
1662 /* init srch range */
1663 /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X /
1664 * 2 on all sides.
1665 */
1666 ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1667 ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1668 ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1669 ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1670
1671 /* this is to facilitate fast sub pel computation with minimal loads */
1672 ps_me_ctxt->i4_srch_range_w += 1;
1673 ps_me_ctxt->i4_srch_range_e -= 1;
1674 ps_me_ctxt->i4_srch_range_n += 1;
1675 ps_me_ctxt->i4_srch_range_s -= 1;
1676 }
1677
1678 /***********************************************************************
1679 * Compute ME for list L0
1680 ***********************************************************************/
1681
1682 /* Init SATQD for the current list */
1683 ps_me_ctxt->u4_min_sad_reached = 0;
1684 ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1685
1686 /* Get the seed motion vector candidates */
1687 isvce_get_search_candidates(ps_proc, ps_me_ctxt, L0);
1688
1689 /* ****************************************************************
1690 *Evaluate the SKIP for current list
1691 * ****************************************************************/
1692 s_skip_mbpart.s_mv_curr.i2_mvx = 0;
1693 s_skip_mbpart.s_mv_curr.i2_mvy = 0;
1694 s_skip_mbpart.i4_mb_cost = INT_MAX;
1695 s_skip_mbpart.i4_mb_distortion = INT_MAX;
1696
1697 isvce_compute_skip_cost(ps_me_ctxt, (ime_mv_t *) (&ps_proc->ps_skip_mv[L0].s_mv),
1698 &s_skip_mbpart, ps_codec->s_cfg.u4_enable_satqd, PRED_L0,
1699 0 /* Not a Bslice */);
1700
1701 s_skip_mbpart.s_mv_curr.i2_mvx <<= 2;
1702 s_skip_mbpart.s_mv_curr.i2_mvy <<= 2;
1703
1704 /******************************************************************
1705 * Evaluate ME For current list
1706 *****************************************************************/
1707 ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx = 0;
1708 ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy = 0;
1709 ps_me_ctxt->as_mb_part[L0].i4_mb_cost = INT_MAX;
1710 ps_me_ctxt->as_mb_part[L0].i4_mb_distortion = INT_MAX;
1711
1712 /* Init Hpel */
1713 ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf = NULL;
1714
1715 /* In case we found out the minimum SAD, exit the ME eval */
1716 if(!ps_me_ctxt->u4_min_sad_reached)
1717 {
1718 /* Evaluate search candidates for initial mv pt */
1719 isvce_evaluate_init_srchposn_16x16(ps_me_ctxt, L0);
1720
1721 /********************************************************************/
1722 /* full pel motion estimation */
1723 /********************************************************************/
1724 isvce_full_pel_motion_estimation_16x16(ps_me_ctxt, L0);
1725
1726 /* Scale the MV to qpel resolution */
1727 ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx <<= 2;
1728 ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy <<= 2;
1729
1730 if(ps_me_ctxt->u4_enable_hpel)
1731 {
1732 /* moving src pointer to the converged motion vector location*/
1733 pu1_hpel_src =
1734 ps_me_ctxt->apu1_ref_buf_luma[L0] +
1735 (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx >> 2) +
1736 (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy >> 2) * ps_me_ctxt->ai4_rec_strd[L0];
1737
1738 ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
1739 ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
1740 ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
1741
1742 ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1743
1744 /* half pel search is done for both sides of full pel,
1745 * hence half_x of width x height = 17x16 is created
1746 * starting from left half_x of converged full pel */
1747 pu1_hpel_src -= 1;
1748
1749 /* computing half_x */
1750 ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[0],
1751 ps_me_ctxt->ai4_rec_strd[L0],
1752 ps_me_ctxt->u4_subpel_buf_strd);
1753
1754 /*
1755 * Halfpel search is done for both sides of full pel,
1756 * hence half_y of width x height = 16x17 is created
1757 * starting from top half_y of converged full pel
1758 * for half_xy top_left is required
1759 * hence it starts from pu1_hpel_src = full_pel_converged_point -
1760 * i4_rec_strd - 1
1761 */
1762 pu1_hpel_src -= ps_me_ctxt->ai4_rec_strd[L0];
1763
1764 /* computing half_y , and half_xy*/
1765 ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1766 pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1], ps_me_ctxt->apu1_subpel_buffs[2],
1767 ps_me_ctxt->ai4_rec_strd[L0], ps_me_ctxt->u4_subpel_buf_strd,
1768 ps_proc->ai16_pred1 + 3, ps_me_ctxt->u4_subpel_buf_strd);
1769
1770 isvce_sub_pel_motion_estimation_16x16(ps_me_ctxt, L0);
1771 }
1772 }
1773
1774 /***********************************************************************
1775 * If a particular skiip Mv is giving better sad, copy to the corresponding
1776 * MBPART
1777 * In B slices this loop should go only to PREDL1: If we found min sad
1778 * we will go to the skip ref list only
1779 * Have to find a way to make it without too much change or new vars
1780 **********************************************************************/
1781 if(s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[L0].i4_mb_cost)
1782 {
1783 ps_me_ctxt->as_mb_part[L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost;
1784 ps_me_ctxt->as_mb_part[L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion;
1785 ps_me_ctxt->as_mb_part[L0].s_mv_curr = s_skip_mbpart.s_mv_curr;
1786 }
1787 else if(ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf)
1788 {
1789 /* Now we have to copy the buffers */
1790 ps_inter_pred_fxns->pf_inter_pred_luma_copy(
1791 ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf, ps_proc->pu1_best_subpel_buf,
1792 ps_me_ctxt->u4_subpel_buf_strd, ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, NULL,
1793 0);
1794 }
1795
1796 /**********************************************************************
1797 * Now get the minimum of MB part sads by searching over all ref lists
1798 **********************************************************************/
1799 ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx =
1800 ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx;
1801 ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy =
1802 ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy;
1803 ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[L0].i4_mb_cost;
1804 ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[L0].i4_mb_distortion;
1805 ps_proc->ps_cur_mb->u4_mb_type = P16x16;
1806 ps_proc->ps_mb_info->as_pu->u1_pred_mode = L0;
1807
1808 /* Mark the reflists */
1809 ps_proc->ps_mb_info->as_pu->as_me_info[0].i1_ref_idx = 0;
1810 ps_proc->ps_mb_info->as_pu->as_me_info[1].i1_ref_idx = -1;
1811
1812 /* number of partitions */
1813 ps_proc->u4_num_sub_partitions = 1;
1814 *(ps_proc->pu4_mb_pu_cnt) = 1;
1815
1816 /* position in-terms of PU */
1817 ps_proc->ps_mb_info->as_pu->u1_pos_x_in_4x4 = 0;
1818 ps_proc->ps_mb_info->as_pu->u1_pos_y_in_4x4 = 0;
1819
1820 /* PU size */
1821 ps_proc->ps_mb_info->as_pu->u1_wd_in_4x4_m1 = 3;
1822 ps_proc->ps_mb_info->as_pu->u1_ht_in_4x4_m1 = 3;
1823
1824 /* Update min sad conditions */
1825 if(ps_me_ctxt->u4_min_sad_reached == 1)
1826 {
1827 ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
1828 ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
1829 }
1830 }
1831
1832 /**
1833 *******************************************************************************
1834 *
1835 * @brief This function performs motion estimation for the current NMB
1836 *
1837 * @par Description:
1838 * Intializes input and output pointers required by the function
1839 *isvce_compute_me and calls the function isvce_compute_me in a loop to process
1840 *NMBs.
1841 *
1842 * @param[in] ps_proc
1843 * Process context corresponding to the job
1844 *
1845 * @returns
1846 *
1847 * @remarks none
1848 *
1849 *******************************************************************************
1850 */
isvce_compute_me_nmb(isvce_process_ctxt_t * ps_proc,UWORD32 u4_nmb_count)1851 void isvce_compute_me_nmb(isvce_process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
1852 {
1853 UWORD32 u4_i;
1854
1855 isvce_codec_t *ps_codec = ps_proc->ps_codec;
1856 isvce_mb_info_t *ps_mb_begin = ps_proc->ps_mb_info;
1857
1858 UWORD32 *pu4_mb_pu_cnt_begin = ps_proc->pu4_mb_pu_cnt;
1859 UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
1860
1861 /* Spatial dependencies for skip are not met if nmb > 1 */
1862 ASSERT(1 == u4_nmb_count);
1863
1864 if(ps_proc->i4_mb_x)
1865 {
1866 ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra;
1867 ps_proc->s_me_ctxt.u4_left_is_skip =
1868 (ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == PSKIP);
1869 }
1870
1871 for(u4_i = 0; u4_i < u4_nmb_count; u4_i++)
1872 {
1873 /* Wait for ME map */
1874 if(ps_proc->i4_mb_y > 0)
1875 {
1876 /* Wait for top right ME to be done */
1877 UWORD8 *pu1_me_map_tp_rw =
1878 ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
1879
1880 while(1)
1881 {
1882 volatile UWORD8 *pu1_buf;
1883 WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
1884
1885 idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
1886 pu1_buf = pu1_me_map_tp_rw + idx;
1887 if(*pu1_buf) break;
1888 ithread_yield();
1889 }
1890 }
1891
1892 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]);
1893 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
1894 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]);
1895 ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
1896
1897 ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
1898 ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1899
1900 ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
1901 ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
1902
1903 /* Set the best subpel buf to the correct mb so that the buffer can be
1904 * copied */
1905 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
1906 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
1907
1908 /* Set the min sad conditions */
1909 ps_proc->ps_cur_mb->u4_min_sad = ps_codec->u4_min_sad;
1910 ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1911
1912 isvce_derive_nghbr_avbl_of_mbs(ps_proc);
1913
1914 isvce_init_me(ps_proc);
1915
1916 /* Compute ME according to slice type */
1917 ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc);
1918
1919 /* update top and left structs */
1920 if(u4_nmb_count > 1)
1921 {
1922 isvce_mb_info_t *ps_left_syn = ps_proc->s_nbr_info.ps_left_mb_info;
1923
1924 ps_left_syn[0] = ps_proc->ps_mb_info[0];
1925 ps_left_syn[0].u1_is_intra = 0;
1926 ps_left_syn[0].u2_mb_type = ps_proc->ps_cur_mb->u4_mb_type;
1927 }
1928
1929 /* Copy the min sad reached info */
1930 ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
1931 ps_proc->ps_nmb_info[u4_i].u4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1932
1933 /*
1934 * To make sure that the MV map is properly sync to the
1935 * cache we need to do a DDB
1936 */
1937 {
1938 DATA_SYNC();
1939
1940 pu1_me_map[ps_proc->i4_mb_x] = 1;
1941 }
1942 ps_proc->i4_mb_x++;
1943
1944 ps_proc->s_me_ctxt.u4_left_is_intra = 0;
1945 ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type == PSKIP);
1946
1947 /* update buffers pointers */
1948 ps_proc->s_src_buf_props.as_component_bufs[0].pv_data =
1949 ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) + MB_SIZE;
1950 ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data =
1951 ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + MB_SIZE;
1952 ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data =
1953 ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data) + MB_SIZE;
1954 ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data =
1955 ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data) + MB_SIZE;
1956
1957 /*
1958 * Note: Although chroma mb size is 8, as the chroma buffers are
1959 * interleaved, the stride per MB is MB_SIZE
1960 */
1961 ps_proc->s_src_buf_props.as_component_bufs[1].pv_data =
1962 ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data) + MB_SIZE;
1963 ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data =
1964 ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data) + MB_SIZE;
1965 ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data =
1966 ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data) + MB_SIZE;
1967 ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data =
1968 ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data) + MB_SIZE;
1969
1970 ps_proc->pu4_mb_pu_cnt++;
1971 ps_proc->ps_mb_info++;
1972 }
1973
1974 ps_proc->ps_mb_info = ps_mb_begin;
1975 ps_proc->pu4_mb_pu_cnt = pu4_mb_pu_cnt_begin;
1976 ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
1977
1978 /* update buffers pointers */
1979 ps_proc->s_src_buf_props.as_component_bufs[0].pv_data =
1980 ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) - MB_SIZE * u4_nmb_count;
1981 ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data =
1982 ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) - MB_SIZE * u4_nmb_count;
1983 ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data =
1984 ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data) -
1985 MB_SIZE * u4_nmb_count;
1986 ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data =
1987 ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data) -
1988 MB_SIZE * u4_nmb_count;
1989
1990 /*
1991 * Note: Although chroma mb size is 8, as the chroma buffers are
1992 * interleaved, the stride per MB is MB_SIZE
1993 */
1994 ps_proc->s_src_buf_props.as_component_bufs[1].pv_data =
1995 ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data) - MB_SIZE * u4_nmb_count;
1996 ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data =
1997 ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data) - MB_SIZE * u4_nmb_count;
1998 ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data =
1999 ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data) -
2000 MB_SIZE * u4_nmb_count;
2001 ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data =
2002 ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data) -
2003 MB_SIZE * u4_nmb_count;
2004 }
2005
2006 /**
2007 *******************************************************************************
2008 *
2009 * @brief The function computes parameters for a BSKIP MB
2010 *
2011 * @par Description:
2012 * The function updates the skip motion vector for B Mb, check if the Mb can be
2013 * marked as skip and returns it
2014 *
2015 * @param[in] ps_proc
2016 * Pointer to process context
2017 *
2018 * @param[in] u4_for_me
2019 * Dummy
2020 *
2021 * @param[in] i4_reflist
2022 * Dummy
2023 *
2024 * @returns Flag indicating if the current Mb can be skip or not
2025 *
2026 * @remarks
2027 * The code implements the logic as described in sec 8.4.1.2.2
2028 * It also computes co-located MB parmas according to sec 8.4.1.2.1
2029 *
2030 * Need to add condition for this fucntion to be used in ME
2031 *
2032 *******************************************************************************/
isvce_find_bskip_params_me(isvce_process_ctxt_t * ps_proc,WORD32 i4_reflist)2033 WORD32 isvce_find_bskip_params_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist)
2034 {
2035 /* Colzero for co-located MB */
2036 WORD32 i4_colzeroflag;
2037
2038 /* motion vectors for neighbouring MBs */
2039 isvce_enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
2040
2041 /* Variables to check if a particular mB is available */
2042 WORD32 i4_a, i4_b, i4_c, i4_c_avail;
2043
2044 /* Mode availability, init to no modes available */
2045 WORD32 i4_mode_avail;
2046
2047 /* mb neighbor availability */
2048 block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
2049
2050 /* Temp var */
2051 WORD32 i, i4_cmpl_mode, i4_skip_type = -1;
2052
2053 /*
2054 * Colocated motion vector
2055 */
2056 mv_t s_mvcol;
2057
2058 /*
2059 * Colocated picture idx
2060 */
2061 WORD32 i4_refidxcol;
2062
2063 isvce_codec_t *ps_codec = ps_proc->ps_codec;
2064
2065 UNUSED(i4_reflist);
2066
2067 /**************************************************************************
2068 *Find co-located MB parameters
2069 * See sec 8.4.1.2.1 for reference
2070 **************************************************************************/
2071 {
2072 /*
2073 * Find the co-located Mb and update the skip and pred appropriately
2074 * 1) Default colpic is forward ref : Table 8-6
2075 * 2) Default mb col is current MB : Table 8-8
2076 */
2077
2078 if(ps_proc->ps_col_mb->u1_is_intra)
2079 {
2080 s_mvcol.i2_mvx = 0;
2081 s_mvcol.i2_mvy = 0;
2082 i4_refidxcol = -1;
2083 }
2084 else
2085 {
2086 if(ps_proc->ps_col_mb->as_pu->u1_pred_mode != L1)
2087 {
2088 s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L0].s_mv;
2089 i4_refidxcol = 0;
2090 }
2091 else
2092 {
2093 s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L1].s_mv;
2094 i4_refidxcol = 0;
2095 }
2096 }
2097
2098 /* RefPicList1[ 0 ] is marked as "used for short-term reference", as
2099 * default */
2100 i4_colzeroflag =
2101 (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1) && (ABS(s_mvcol.i2_mvy) <= 1));
2102 }
2103
2104 /***************************************************************************
2105 * Evaluating skip params : Spatial Skip
2106 **************************************************************************/
2107 {
2108 /* Get the neighbouring MBS according to Section 8.4.1.2.2 */
2109 ps_a_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu;
2110 ps_b_pu = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x].as_pu;
2111
2112 i4_c_avail = 0;
2113 if(ps_ngbr_avbl->u1_mb_c)
2114 {
2115 ps_c_pu = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x + 1].as_pu;
2116 i4_c_avail = 1;
2117 }
2118 else
2119 {
2120 ps_c_pu = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x - 1].as_pu;
2121 i4_c_avail = ps_ngbr_avbl->u1_mb_d;
2122 }
2123
2124 i4_a = ps_ngbr_avbl->u1_mb_a;
2125 i4_b = ps_ngbr_avbl->u1_mb_b;
2126 i4_c = i4_c_avail;
2127
2128 /* Init to no mode avail */
2129 i4_mode_avail = 0;
2130 for(i = 0; i < 2; i++)
2131 {
2132 i4_cmpl_mode = (i == 0) ? L1 : L0;
2133
2134 i4_mode_avail |= (i4_a && (ps_a_pu->u1_pred_mode != i4_cmpl_mode) &&
2135 (ps_a_pu->as_me_info[i].i1_ref_idx == 0))
2136 << i;
2137 i4_mode_avail |= (i4_b && (ps_b_pu->u1_pred_mode != i4_cmpl_mode) &&
2138 (ps_b_pu->as_me_info[i].i1_ref_idx == 0))
2139 << i;
2140 i4_mode_avail |= (i4_c && (ps_c_pu->u1_pred_mode != i4_cmpl_mode) &&
2141 (ps_c_pu->as_me_info[i].i1_ref_idx == 0))
2142 << i;
2143 }
2144
2145 if(i4_mode_avail == 0x3 || i4_mode_avail == 0x0)
2146 {
2147 i4_skip_type = BI;
2148 }
2149 else if(i4_mode_avail == 0x1)
2150 {
2151 i4_skip_type = L0;
2152 }
2153 else if(i4_mode_avail == 0x2)
2154 {
2155 i4_skip_type = L1;
2156 }
2157
2158 /* Update skip MV for L0 */
2159 if((i4_mode_avail & 0x1) && (!i4_colzeroflag))
2160 {
2161 ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
2162 ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
2163 }
2164 else
2165 {
2166 ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
2167 ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
2168 }
2169
2170 /* Update skip MV for L1 */
2171 if((i4_mode_avail & 0x2) && (!i4_colzeroflag))
2172 {
2173 ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
2174 ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
2175 }
2176 else
2177 {
2178 ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
2179 ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
2180 }
2181 }
2182
2183 /***************************************************************************
2184 * Evaluating skip params : Temporal skip
2185 **************************************************************************/
2186 {
2187 svc_au_buf_t *ps_ref_pic[MAX_REF_PIC_CNT];
2188 WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor;
2189 isvce_enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2];
2190
2191 ps_ref_pic[L0] = ps_proc->aps_ref_pic[L0];
2192 ps_ref_pic[L1] = ps_proc->aps_ref_pic[L1];
2193
2194 i4_tb = ps_codec->i4_poc - ps_ref_pic[L0]->i4_abs_poc;
2195 i4_td = ps_ref_pic[L1]->i4_abs_poc - ps_ref_pic[L0]->i4_abs_poc;
2196
2197 i4_tb = CLIP3(-128, 127, i4_tb);
2198 i4_td = CLIP3(-128, 127, i4_td);
2199
2200 i4_tx = (16384 + ABS(i4_td / 2)) / i4_td;
2201 i4_dist_scale_factor = CLIP3(-1024, 1023, (i4_tb * i4_tx + 32) >> 6);
2202
2203 /* Motion vectors taken in full pel resolution , hence -> (& 0xfffc)
2204 * operation */
2205 ps_skip_mv[L0].s_mv.i2_mvx = ((i4_dist_scale_factor * s_mvcol.i2_mvx + 128) >> 8) & 0xfffc;
2206 ps_skip_mv[L0].s_mv.i2_mvy = ((i4_dist_scale_factor * s_mvcol.i2_mvy + 128) >> 8) & 0xfffc;
2207
2208 ps_skip_mv[L1].s_mv.i2_mvx = (ps_skip_mv[L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc;
2209 ps_skip_mv[L1].s_mv.i2_mvy = (ps_skip_mv[L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc;
2210 }
2211
2212 return i4_skip_type;
2213 }
2214
2215 /**
2216 *******************************************************************************
2217 *
2218 * @brief The function computes the skip motion vectoe for B mb
2219 *
2220 * @par Description:
2221 * The function gives the skip motion vector for B Mb, check if the Mb can be
2222 * marked as skip
2223 *
2224 * @param[in] ps_proc
2225 * Pointer to process context
2226 *
2227 * @param[in] u4_for_me
2228 * Dummy
2229 *
2230 * @param[in] u4_for_me
2231 * Dummy
2232 *
2233 * @returns Flag indicating if the current Mb can be skip or not
2234 *
2235 * @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
2236 * specification. It also computes co-located MB parmas according to
2237 *sec 8.4.1.2.1
2238 *
2239 *******************************************************************************/
isvce_find_bskip_params(isvce_process_ctxt_t * ps_proc,WORD32 i4_reflist)2240 WORD32 isvce_find_bskip_params(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist)
2241 {
2242 WORD32 i4_colzeroflag;
2243
2244 /* motion vectors */
2245 isvce_enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
2246
2247 /* Syntax elem */
2248 isvce_mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn;
2249
2250 /* Variables to check if a particular mB is available */
2251 WORD32 i4_a, i4_b, i4_c, i4_c_avail;
2252
2253 /* Mode availability, init to no modes available */
2254 WORD32 i4_mode_avail;
2255
2256 /* mb neighbor availability */
2257 block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
2258
2259 /* Temp var */
2260 WORD32 i, i4_cmpl_mode;
2261
2262 UNUSED(i4_reflist);
2263
2264 /**************************************************************************
2265 *Find co-locates parameters
2266 * See sec 8.4.1.2.1 for reference
2267 **************************************************************************/
2268 {
2269 /*
2270 * Find the co-located Mb and update the skip and pred appropriately
2271 * 1) Default colpic is forward ref : Table 8-6
2272 * 2) Default mb col is current MB : Table 8-8
2273 */
2274
2275 mv_t s_mvcol;
2276 WORD32 i4_refidxcol;
2277
2278 if(ps_proc->ps_col_mb->u1_is_intra)
2279 {
2280 s_mvcol.i2_mvx = 0;
2281 s_mvcol.i2_mvy = 0;
2282 i4_refidxcol = -1;
2283 }
2284 else
2285 {
2286 if(ps_proc->ps_col_mb->as_pu->u1_pred_mode != L1)
2287 {
2288 s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L0].s_mv;
2289 i4_refidxcol = 0;
2290 }
2291 else
2292 {
2293 s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L1].s_mv;
2294 i4_refidxcol = 0;
2295 }
2296 }
2297
2298 /* RefPicList1[ 0 ] is marked as "used for short-term reference", as
2299 * default */
2300 i4_colzeroflag =
2301 (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1) && (ABS(s_mvcol.i2_mvy) <= 1));
2302 }
2303
2304 /***************************************************************************
2305 * Evaluating skip params
2306 **************************************************************************/
2307 /* Section 8.4.1.2.2 */
2308 ps_a_syn = ps_proc->s_nbr_info.ps_left_mb_info;
2309 ps_a_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu;
2310
2311 ps_b_syn = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
2312 ps_b_pu = ps_b_syn->as_pu;
2313
2314 i4_c_avail = 0;
2315 if(ps_ngbr_avbl->u1_mb_c)
2316 {
2317 ps_c_syn = ps_b_syn + 1;
2318 ps_c_pu = ps_c_syn->as_pu;
2319 i4_c_avail = 1;
2320 }
2321 else
2322 {
2323 ps_c_syn = ps_b_syn - 1;
2324 ps_c_pu = ps_c_syn->as_pu;
2325 i4_c_avail = ps_ngbr_avbl->u1_mb_d;
2326 }
2327
2328 i4_a = ps_ngbr_avbl->u1_mb_a;
2329 i4_a &= !ps_a_syn->u1_is_intra;
2330
2331 i4_b = ps_ngbr_avbl->u1_mb_b;
2332 i4_b &= !ps_b_syn->u1_is_intra;
2333
2334 i4_c = i4_c_avail;
2335 i4_c &= !ps_c_syn->u1_is_intra;
2336
2337 /* Init to no mode avail */
2338 i4_mode_avail = 0;
2339 for(i = 0; i < 2; i++)
2340 {
2341 i4_cmpl_mode = (i == 0) ? L1 : L0;
2342
2343 i4_mode_avail |= (i4_a && (ps_a_pu->u1_pred_mode != i4_cmpl_mode) &&
2344 (ps_a_pu->as_me_info[i].i1_ref_idx == 0))
2345 << i;
2346 i4_mode_avail |= (i4_b && (ps_b_pu->u1_pred_mode != i4_cmpl_mode) &&
2347 (ps_b_pu->as_me_info[i].i1_ref_idx == 0))
2348 << i;
2349 i4_mode_avail |= (i4_c && (ps_c_pu->u1_pred_mode != i4_cmpl_mode) &&
2350 (ps_c_pu->as_me_info[i].i1_ref_idx == 0))
2351 << i;
2352 }
2353
2354 /* Update skip MV for L0 */
2355 if((i4_mode_avail & 0x1) && (!i4_colzeroflag))
2356 {
2357 ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
2358 ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
2359 }
2360 else
2361 {
2362 ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
2363 ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
2364 }
2365
2366 /* Update skip MV for L1 */
2367 if((i4_mode_avail & 0x2) && (!i4_colzeroflag))
2368 {
2369 ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
2370 ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
2371 }
2372 else
2373 {
2374 ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
2375 ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
2376 }
2377
2378 /* Now see if the ME information matches the SKIP information */
2379 switch(ps_proc->ps_mb_info->as_pu->u1_pred_mode)
2380 {
2381 case PRED_BI:
2382 if((ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvx ==
2383 ps_proc->ps_skip_mv[0].s_mv.i2_mvx) &&
2384 (ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvy ==
2385 ps_proc->ps_skip_mv[0].s_mv.i2_mvy) &&
2386 (ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvx ==
2387 ps_proc->ps_skip_mv[1].s_mv.i2_mvx) &&
2388 (ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvy ==
2389 ps_proc->ps_skip_mv[1].s_mv.i2_mvy) &&
2390 (i4_mode_avail == 0x3 || i4_mode_avail == 0x0))
2391 {
2392 return 1;
2393 }
2394 break;
2395
2396 case PRED_L0:
2397 if((ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvx ==
2398 ps_proc->ps_skip_mv[0].s_mv.i2_mvx) &&
2399 (ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvy ==
2400 ps_proc->ps_skip_mv[0].s_mv.i2_mvy) &&
2401 (i4_mode_avail == 0x1))
2402 {
2403 return 1;
2404 }
2405 break;
2406
2407 case PRED_L1:
2408 if((ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvx ==
2409 ps_proc->ps_skip_mv[1].s_mv.i2_mvx) &&
2410 (ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvy ==
2411 ps_proc->ps_skip_mv[1].s_mv.i2_mvy) &&
2412 (i4_mode_avail == 0x2))
2413 {
2414 return 1;
2415 }
2416 break;
2417 }
2418
2419 return 0;
2420 }
2421
2422 /**
2423 *******************************************************************************
2424 *
2425 * @brief This function computes the best motion vector among the tentative mv
2426 * candidates chosen.
2427 *
2428 * @par Description:
2429 * This function determines the position in the search window at which the
2430 *motion estimation should begin in order to minimise the number of search
2431 *iterations.
2432 *
2433 * @param[in] ps_mb_part
2434 * pointer to current mb partition ctxt with respect to ME
2435 *
2436 * @param[in] u4_lambda_motion
2437 * lambda motion
2438 *
2439 * @param[in] u4_fast_flag
2440 * enable/disable fast sad computation
2441 *
2442 * @returns mv pair & corresponding distortion and cost
2443 *
2444 * @remarks Currently onyl 4 search candiates are supported
2445 *
2446 *******************************************************************************
2447 */
isvce_evaluate_bipred(isvce_me_ctxt_t * ps_me_ctxt,isvce_process_ctxt_t * ps_proc,mb_part_ctxt * ps_mb_ctxt_bi)2448 void isvce_evaluate_bipred(isvce_me_ctxt_t *ps_me_ctxt, isvce_process_ctxt_t *ps_proc,
2449 mb_part_ctxt *ps_mb_ctxt_bi)
2450 {
2451 UWORD32 i, u4_fast_sad;
2452
2453 WORD32 i4_dest_buff;
2454
2455 mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv;
2456
2457 UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1;
2458
2459 UWORD8 *pu1_dst_buf;
2460
2461 WORD32 i4_ref_l0_stride, i4_ref_l1_stride;
2462
2463 WORD32 i4_mb_distortion, i4_mb_cost;
2464
2465 isvce_codec_t *ps_codec = ps_proc->ps_codec;
2466 isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
2467 inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
2468
2469 u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
2470
2471 i4_dest_buff = 0;
2472 for(i = 0; i < ps_me_ctxt->u4_num_candidates[BI]; i += 2)
2473 {
2474 pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff];
2475
2476 s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[BI][i].i2_mvx >> 2;
2477 s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[BI][i].i2_mvy >> 2;
2478 s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvx >> 2;
2479 s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvy >> 2;
2480
2481 ps_l0_pred_mv = &ps_proc->ps_pred_mv[L0].s_mv;
2482 ps_l1_pred_mv = &ps_proc->ps_pred_mv[L1].s_mv;
2483
2484 if((ps_me_ctxt->as_mv_init_search[BI][i].i2_mvx & 0x3) ||
2485 (ps_me_ctxt->as_mv_init_search[BI][i].i2_mvy & 0x3))
2486 {
2487 pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf;
2488 i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd;
2489 }
2490 else
2491 {
2492 pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[L0] + (s_l0_mv.i2_mvx) +
2493 ((s_l0_mv.i2_mvy) * ps_me_ctxt->ai4_rec_strd[L0]);
2494 i4_ref_l0_stride = ps_me_ctxt->ai4_rec_strd[L0];
2495 }
2496
2497 if((ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvx & 0x3) ||
2498 (ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvy & 0x3))
2499 {
2500 pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[L1].pu1_best_hpel_buf;
2501 i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd;
2502 }
2503 else
2504 {
2505 pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[L1] + (s_l1_mv.i2_mvx) +
2506 ((s_l1_mv.i2_mvy) * ps_me_ctxt->ai4_rec_strd[L1]);
2507 i4_ref_l1_stride = ps_me_ctxt->ai4_rec_strd[L1];
2508 }
2509
2510 ps_inter_pred_fxns->pf_inter_pred_luma_bilinear(
2511 pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf, i4_ref_l0_stride, i4_ref_l1_stride,
2512 ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE);
2513
2514 ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad](
2515 ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf, ps_me_ctxt->i4_src_strd,
2516 ps_me_ctxt->u4_subpel_buf_strd, INT_MAX, &i4_mb_distortion);
2517
2518 /* compute cost */
2519 i4_mb_cost =
2520 ps_me_ctxt
2521 ->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i].i2_mvx - ps_l0_pred_mv->i2_mvx];
2522 i4_mb_cost +=
2523 ps_me_ctxt
2524 ->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i].i2_mvy - ps_l0_pred_mv->i2_mvy];
2525 i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvx -
2526 ps_l1_pred_mv->i2_mvx];
2527 i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvy -
2528 ps_l1_pred_mv->i2_mvy];
2529
2530 i4_mb_cost -=
2531 (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == BI) * (i == 0);
2532
2533 i4_mb_cost *= ps_me_ctxt->u4_lambda_motion;
2534 i4_mb_cost += i4_mb_distortion;
2535
2536 if(i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost)
2537 {
2538 ps_mb_ctxt_bi->i4_srch_pos_idx = (i >> 1);
2539 ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost;
2540 ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion;
2541 ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf;
2542 i4_dest_buff = (i4_dest_buff + 1) % 2;
2543 }
2544 }
2545 }
2546
2547 /**
2548 *******************************************************************************
2549 *
2550 * @brief This function performs motion estimation for the current mb
2551 *
2552 * @par Description:
2553 * The current mb is compared with a list of mb's in the reference frame for
2554 * least cost. The mb that offers least cost is chosen as predicted mb and the
2555 * displacement of the predicted mb from index location of the current mb is
2556 * signaled as mv. The list of the mb's that are chosen in the reference frame
2557 * are dependent on the speed of the ME configured.
2558 *
2559 * @param[in] ps_proc
2560 * Process context corresponding to the job
2561 *
2562 * @returns motion vector of the pred mb, sad, cost.
2563 *
2564 * @remarks none
2565 *
2566 *******************************************************************************
2567 */
isvce_compute_me_multi_reflist(isvce_process_ctxt_t * ps_proc)2568 void isvce_compute_me_multi_reflist(isvce_process_ctxt_t *ps_proc)
2569 {
2570 /* me ctxt */
2571 isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
2572
2573 /* codec context */
2574 isvce_codec_t *ps_codec = ps_proc->ps_codec;
2575 isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
2576 inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
2577
2578 /* Temp variables for looping over ref lists */
2579 WORD32 i4_reflist, i4_max_reflist;
2580
2581 /* source buffer for halp pel generation functions */
2582 UWORD8 *pu1_hpel_src;
2583
2584 /* quantization parameters */
2585 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
2586
2587 /* Mb part ctxts for SKIP */
2588 mb_part_ctxt as_skip_mbpart[2];
2589
2590 ASSERT(1 == MAX_REF_FRAMES_PER_PRED_DIR);
2591
2592 /* Sad therholds */
2593 ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
2594
2595 {
2596 WORD32 rows_above, rows_below, columns_left, columns_right;
2597
2598 /* During evaluation for motion vectors do not search through padded regions
2599 */
2600 /* Obtain number of rows and columns that are effective for computing for me
2601 * evaluation */
2602 rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
2603 rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
2604 columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
2605 columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
2606
2607 /* init srch range */
2608 /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X /
2609 * 2 on all sides.
2610 */
2611 ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
2612 ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
2613 ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
2614 ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
2615
2616 /* this is to facilitate fast sub pel computation with minimal loads */
2617 if(ps_me_ctxt->u4_enable_hpel)
2618 {
2619 ps_me_ctxt->i4_srch_range_w += 1;
2620 ps_me_ctxt->i4_srch_range_e -= 1;
2621 ps_me_ctxt->i4_srch_range_n += 1;
2622 ps_me_ctxt->i4_srch_range_s -= 1;
2623 }
2624 }
2625
2626 /* Compute ME and store the MVs */
2627 {
2628 /***********************************************************************
2629 * Compute ME for lists L0 and L1
2630 * For L0 -> L0 skip + L0
2631 * for L1 -> L0 skip + L0 + L1 skip + L1
2632 ***********************************************************************/
2633 i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? L0 : L1;
2634
2635 /* Init SATQD for the current list */
2636 ps_me_ctxt->u4_min_sad_reached = 0;
2637 ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
2638
2639 for(i4_reflist = L0; i4_reflist <= i4_max_reflist; i4_reflist++)
2640 {
2641 /* Get the seed motion vector candidates */
2642 isvce_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist);
2643
2644 /* ****************************************************************
2645 *Evaluate the SKIP for current list
2646 * ****************************************************************/
2647 as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0;
2648 as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0;
2649 as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX;
2650 as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX;
2651
2652 if(ps_me_ctxt->i4_skip_type == i4_reflist)
2653 {
2654 isvce_compute_skip_cost(
2655 ps_me_ctxt, (ime_mv_t *) (&ps_proc->ps_skip_mv[i4_reflist].s_mv),
2656 &as_skip_mbpart[i4_reflist], ps_codec->s_cfg.u4_enable_satqd, i4_reflist,
2657 (ps_proc->i4_slice_type == BSLICE));
2658 }
2659
2660 as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2;
2661 as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2;
2662
2663 /******************************************************************
2664 * Evaluate ME For current list
2665 *****************************************************************/
2666 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0;
2667 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0;
2668 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX;
2669 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX;
2670
2671 /* Init Hpel */
2672 ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL;
2673
2674 /* In case we found out the minimum SAD, exit the ME eval */
2675 if(ps_me_ctxt->u4_min_sad_reached)
2676 {
2677 i4_max_reflist = i4_reflist;
2678 break;
2679 }
2680
2681 /* Evaluate search candidates for initial mv pt */
2682 isvce_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist);
2683
2684 /********************************************************************/
2685 /* full pel motion estimation */
2686 /********************************************************************/
2687 isvce_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
2688
2689 DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2),
2690 (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2));
2691
2692 DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1);
2693
2694 /* Scale the MV to qpel resolution */
2695 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2;
2696 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2;
2697
2698 if(ps_me_ctxt->u4_enable_hpel)
2699 {
2700 /* moving src pointer to the converged motion vector location */
2701 pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] +
2702 (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2) +
2703 ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2) *
2704 ps_me_ctxt->ai4_rec_strd[i4_reflist]);
2705
2706 ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
2707 ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
2708 ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
2709
2710 /* Init the search position to an invalid number */
2711 ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3;
2712
2713 /* Incase a buffer is still in use by L0, replace it with spare buff */
2714 ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[L0].i4_srch_pos_idx] =
2715 ps_proc->apu1_subpel_buffs[3];
2716
2717 ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
2718
2719 /* half pel search is done for both sides of full pel,
2720 * hence half_x of width x height = 17x16 is created
2721 * starting from left half_x of converged full pel */
2722 pu1_hpel_src -= 1;
2723
2724 /* computing half_x */
2725 ps_codec->pf_ih264e_sixtapfilter_horz(
2726 pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[0],
2727 ps_me_ctxt->ai4_rec_strd[i4_reflist], ps_me_ctxt->u4_subpel_buf_strd);
2728
2729 /*
2730 * Halfpel search is done for both sides of full pel,
2731 * hence half_y of width x height = 16x17 is created
2732 * starting from top half_y of converged full pel
2733 * for half_xy top_left is required
2734 * hence it starts from pu1_hpel_src = full_pel_converged_point -
2735 * i4_rec_strd - 1
2736 */
2737 pu1_hpel_src -= ps_me_ctxt->ai4_rec_strd[i4_reflist];
2738
2739 /* computing half_y and half_xy */
2740 ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
2741 pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
2742 ps_me_ctxt->apu1_subpel_buffs[2], ps_me_ctxt->ai4_rec_strd[i4_reflist],
2743 ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
2744 ps_me_ctxt->u4_subpel_buf_strd);
2745
2746 isvce_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
2747 }
2748 }
2749
2750 /***********************************************************************
2751 * If a particular skiip Mv is giving better sad, copy to the corresponding
2752 * MBPART
2753 * In B slices this loop should go only to PREDL1: If we found min sad
2754 * we will go to the skip ref list only
2755 * Have to find a way to make it without too much change or new vars
2756 **********************************************************************/
2757 for(i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
2758 {
2759 if(as_skip_mbpart[i4_reflist].i4_mb_cost <
2760 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost)
2761 {
2762 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost =
2763 as_skip_mbpart[i4_reflist].i4_mb_cost;
2764 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion =
2765 as_skip_mbpart[i4_reflist].i4_mb_distortion;
2766 ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr;
2767 }
2768 }
2769
2770 /***********************************************************************
2771 * Compute ME for BI
2772 * In case of BI we do ME for two candidates
2773 * 1) The best L0 and L1 Mvs
2774 * 2) Skip L0 and L1 MVs
2775 *
2776 * TODO
2777 * one of the search candidates is skip. Hence it may be duplicated
2778 ***********************************************************************/
2779 if(i4_max_reflist == L1 && ps_me_ctxt->u4_min_sad_reached == 0)
2780 {
2781 WORD32 i, j = 0;
2782 WORD32 l0_srch_pos_idx, l1_srch_pos_idx;
2783 WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx;
2784
2785 /* Get the free buffers */
2786 l0_srch_pos_idx = ps_me_ctxt->as_mb_part[L0].i4_srch_pos_idx;
2787 l1_srch_pos_idx = ps_me_ctxt->as_mb_part[L1].i4_srch_pos_idx;
2788
2789 /* Search for the two free buffers in subpel list */
2790 for(i = 0; i < SUBPEL_BUFF_CNT; i++)
2791 {
2792 if(i != l0_srch_pos_idx && i != l1_srch_pos_idx)
2793 {
2794 ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i];
2795 j++;
2796 }
2797 }
2798 ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
2799
2800 /* Copy the statial SKIP MV of each list */
2801 i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[L0] - 2;
2802 i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[L1] - 2;
2803 ps_me_ctxt->as_mv_init_search[BI][0].i2_mvx =
2804 ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2805 ps_me_ctxt->as_mv_init_search[BI][0].i2_mvy =
2806 ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2807 ps_me_ctxt->as_mv_init_search[BI][1].i2_mvx =
2808 ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2809 ps_me_ctxt->as_mv_init_search[BI][1].i2_mvy =
2810 ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2811
2812 /* Copy the SKIP MV temporal of each list */
2813 i4_l0_skip_mv_idx++;
2814 i4_l1_skip_mv_idx++;
2815 ps_me_ctxt->as_mv_init_search[BI][2].i2_mvx =
2816 ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2817 ps_me_ctxt->as_mv_init_search[BI][2].i2_mvy =
2818 ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2819 ps_me_ctxt->as_mv_init_search[BI][3].i2_mvx =
2820 ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2821 ps_me_ctxt->as_mv_init_search[BI][3].i2_mvy =
2822 ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2823
2824 /* Copy the best MV after ME */
2825 ps_me_ctxt->as_mv_init_search[BI][4] = ps_me_ctxt->as_mb_part[L0].s_mv_curr;
2826 ps_me_ctxt->as_mv_init_search[BI][5] = ps_me_ctxt->as_mb_part[L1].s_mv_curr;
2827
2828 ps_me_ctxt->u4_num_candidates[BI] = 6;
2829
2830 ps_me_ctxt->as_mb_part[BI].i4_mb_cost = INT_MAX;
2831 ps_me_ctxt->as_mb_part[BI].i4_mb_distortion = INT_MAX;
2832
2833 isvce_evaluate_bipred(ps_me_ctxt, ps_proc, &ps_me_ctxt->as_mb_part[BI]);
2834
2835 i4_max_reflist = BI;
2836 }
2837
2838 /**********************************************************************
2839 * Now get the minimum of MB part sads by searching over all ref lists
2840 **********************************************************************/
2841 ps_proc->ps_mb_info->as_pu->u1_pred_mode = 0x3;
2842
2843 for(i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
2844 {
2845 if(ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
2846 {
2847 ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost;
2848 ps_proc->ps_cur_mb->i4_mb_distortion =
2849 ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion;
2850 ps_proc->ps_cur_mb->u4_mb_type =
2851 (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16;
2852 ps_proc->ps_mb_info->as_pu->u1_pred_mode = i4_reflist;
2853 }
2854 }
2855
2856 /**********************************************************************
2857 * In case we have a BI MB, we have to copy the buffers and set proer MV's
2858 * 1)In case its BI, we need to get the best MVs given by BI and update
2859 * to their corresponding MB part
2860 * 2)We also need to copy the buffer in which bipred buff is populated
2861 *
2862 * Not that if we have
2863 **********************************************************************/
2864 if(ps_proc->ps_mb_info->as_pu->u1_pred_mode == BI)
2865 {
2866 WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[BI].i4_srch_pos_idx;
2867 UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[BI].pu1_best_hpel_buf;
2868
2869 ps_me_ctxt->as_mb_part[L0].s_mv_curr =
2870 ps_me_ctxt->as_mv_init_search[BI][i4_srch_pos << 1];
2871 ps_me_ctxt->as_mb_part[L1].s_mv_curr =
2872 ps_me_ctxt->as_mv_init_search[BI][(i4_srch_pos << 1) + 1];
2873
2874 /* Now we have to copy the buffers */
2875 ps_inter_pred_fxns->pf_inter_pred_luma_copy(
2876 pu1_bi_buf, ps_proc->pu1_best_subpel_buf, ps_me_ctxt->u4_subpel_buf_strd,
2877 ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, NULL, 0);
2878 }
2879 else if(ps_me_ctxt->as_mb_part[ps_proc->ps_mb_info->as_pu->u1_pred_mode].pu1_best_hpel_buf)
2880 {
2881 /* Now we have to copy the buffers */
2882 ps_inter_pred_fxns->pf_inter_pred_luma_copy(
2883 ps_me_ctxt->as_mb_part[ps_proc->ps_mb_info->as_pu->u1_pred_mode].pu1_best_hpel_buf,
2884 ps_proc->pu1_best_subpel_buf, ps_me_ctxt->u4_subpel_buf_strd,
2885 ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, NULL, 0);
2886 }
2887 }
2888
2889 /**************************************************************************
2890 *Now copy the MVs to the current PU with qpel scaling
2891 ***************************************************************************/
2892 ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx =
2893 (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx);
2894 ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy =
2895 (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy);
2896 ps_proc->ps_mb_info->as_pu->as_me_info[L1].s_mv.i2_mvx =
2897 (ps_me_ctxt->as_mb_part[L1].s_mv_curr.i2_mvx);
2898 ps_proc->ps_mb_info->as_pu->as_me_info[L1].s_mv.i2_mvy =
2899 (ps_me_ctxt->as_mb_part[L1].s_mv_curr.i2_mvy);
2900
2901 ps_proc->ps_mb_info->as_pu->as_me_info[0].i1_ref_idx =
2902 (ps_proc->ps_mb_info->as_pu->u1_pred_mode != L1) ? 0 : -1;
2903 ps_proc->ps_mb_info->as_pu->as_me_info[1].i1_ref_idx =
2904 (ps_proc->ps_mb_info->as_pu->u1_pred_mode != L0) ? 0 : -1;
2905
2906 /* number of partitions */
2907 ps_proc->u4_num_sub_partitions = 1;
2908 *(ps_proc->pu4_mb_pu_cnt) = 1;
2909
2910 /* position in-terms of PU */
2911 ps_proc->ps_mb_info->as_pu->u1_pos_x_in_4x4 = 0;
2912 ps_proc->ps_mb_info->as_pu->u1_pos_y_in_4x4 = 0;
2913
2914 /* PU size */
2915 ps_proc->ps_mb_info->as_pu->u1_wd_in_4x4_m1 = 3;
2916 ps_proc->ps_mb_info->as_pu->u1_ht_in_4x4_m1 = 3;
2917
2918 /* Update min sad conditions */
2919 if(ps_me_ctxt->u4_min_sad_reached == 1)
2920 {
2921 ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
2922 ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
2923 }
2924 }
2925