1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_intra_modes_eval.c
25 *
26 * @brief
27 * This file contains definitions of routines that perform rate distortion
28 * analysis on a macroblock if they are to be coded as intra.
29 *
30 * @author
31 * ittiam
32 *
33 * @par List of Functions:
34 * - ih264e_derive_neighbor_availability_of_mbs
35 * - ih264e_derive_ngbr_avbl_of_mb_partitions
36 * - ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff
37 * - ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff
38 * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff
39 * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton
40 * - ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff
41 * - ih264e_evaluate_intra16x16_modes
42 * - ih264e_evaluate_intra4x4_modes
43 * - ih264e_evaluate_intra_chroma_modes
44 *
45 * @remarks
46 * none
47 *
48 *******************************************************************************
49 */
50
51 /*****************************************************************************/
52 /* File Includes */
53 /*****************************************************************************/
54
55 /* System Include Files */
56 #include <stdio.h>
57 #include <string.h>
58 #include <limits.h>
59 #include <assert.h>
60
61 /* User Include Files */
62 #include "ih264e_config.h"
63 #include "ih264_typedefs.h"
64 #include "iv2.h"
65 #include "ive2.h"
66
67 #include "ih264_debug.h"
68 #include "ih264_macros.h"
69 #include "ih264_defs.h"
70 #include "ih264_mem_fns.h"
71 #include "ih264_padding.h"
72 #include "ih264_structs.h"
73 #include "ih264_trans_quant_itrans_iquant.h"
74 #include "ih264_inter_pred_filters.h"
75 #include "ih264_intra_pred_filters.h"
76 #include "ih264_deblk_edge_filters.h"
77 #include "ih264_common_tables.h"
78 #include "ih264_cabac_tables.h"
79
80 #include "ime_defs.h"
81 #include "ime_distortion_metrics.h"
82 #include "ime_structs.h"
83 #include "ime_platform_macros.h"
84
85 #include "irc_cntrl_param.h"
86 #include "irc_frame_info_collector.h"
87
88 #include "ih264e_error.h"
89 #include "ih264e_defs.h"
90 #include "ih264e_globals.h"
91 #include "ih264e_rate_control.h"
92 #include "ih264e_bitstream.h"
93 #include "ih264e_cabac_structs.h"
94 #include "ih264e_structs.h"
95 #include "ih264e_intra_modes_eval.h"
96
97
98 /*****************************************************************************/
99 /* Function Definitions */
100 /*****************************************************************************/
101
102 /**
103 ******************************************************************************
104 *
105 * @brief
106 * derivation process for macroblock availability
107 *
108 * @par Description
109 * Calculates the availability of the left, top, topright and topleft macroblocks.
110 *
111 * @param[in] ps_proc_ctxt
112 * pointer to proc context (handle)
113 *
114 * @remarks Based on section 6.4.5 in H264 spec
115 *
116 * @return none
117 *
118 ******************************************************************************
119 */
ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t * ps_proc)120 void ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t *ps_proc)
121 {
122 UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx;
123 UWORD8 *pu1_slice_idx_b;
124 UWORD8 *pu1_slice_idx_a;
125 UWORD8 *pu1_slice_idx_c;
126 UWORD8 *pu1_slice_idx_d;
127 block_neighbors_t *ps_ngbr_avbl;
128 WORD32 i4_mb_x, i4_mb_y;
129 WORD32 i4_wd_mbs;
130
131 i4_mb_x = ps_proc->i4_mb_x;
132 i4_mb_y = ps_proc->i4_mb_y;
133
134 i4_wd_mbs = ps_proc->i4_wd_mbs;
135
136 pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x;
137 pu1_slice_idx_a = pu1_slice_idx_curr - 1;
138 pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs;
139 pu1_slice_idx_c = pu1_slice_idx_b + 1;
140 pu1_slice_idx_d = pu1_slice_idx_b - 1;
141 ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
142
143 /**********************************************************************/
144 /* The macroblock is marked as available, unless one of the following */
145 /* conditions is true in which case the macroblock shall be marked as */
146 /* not available. */
147 /* 1. mbAddr < 0 */
148 /* 2 mbAddr > CurrMbAddr */
149 /* 3. the macroblock with address mbAddr belongs to a different slice */
150 /* than the macroblock with address CurrMbAddr */
151 /**********************************************************************/
152
153 /* left macroblock availability */
154 if (i4_mb_x == 0)
155 { /* macroblocks along first column */
156 ps_ngbr_avbl->u1_mb_a = 0;
157 }
158 else
159 { /* macroblocks belong to same slice? */
160 if (*pu1_slice_idx_a != *pu1_slice_idx_curr)
161 ps_ngbr_avbl->u1_mb_a = 0;
162 else
163 ps_ngbr_avbl->u1_mb_a = 1;
164 }
165
166 /* top macroblock availability */
167 if (i4_mb_y == 0)
168 { /* macroblocks along first row */
169 ps_ngbr_avbl->u1_mb_b = 0;
170 }
171 else
172 { /* macroblocks belong to same slice? */
173 if (*pu1_slice_idx_b != *pu1_slice_idx_curr)
174 ps_ngbr_avbl->u1_mb_b = 0;
175 else
176 ps_ngbr_avbl->u1_mb_b = 1;
177 }
178
179 /* top right macroblock availability */
180 if (i4_mb_x == i4_wd_mbs-1 || i4_mb_y == 0)
181 { /* macroblocks along last column */
182 ps_ngbr_avbl->u1_mb_c = 0;
183 }
184 else
185 { /* macroblocks belong to same slice? */
186 if (*pu1_slice_idx_c != *pu1_slice_idx_curr)
187 ps_ngbr_avbl->u1_mb_c = 0;
188 else
189 ps_ngbr_avbl->u1_mb_c = 1;
190 }
191
192 /* top left macroblock availability */
193 if (i4_mb_x == 0 || i4_mb_y == 0)
194 { /* macroblocks along first column */
195 ps_ngbr_avbl->u1_mb_d = 0;
196 }
197 else
198 { /* macroblocks belong to same slice? */
199 if (*pu1_slice_idx_d != *pu1_slice_idx_curr)
200 ps_ngbr_avbl->u1_mb_d = 0;
201 else
202 ps_ngbr_avbl->u1_mb_d = 1;
203 }
204 }
205
206 /**
207 ******************************************************************************
208 *
209 * @brief
210 * derivation process for subblock/partition availability
211 *
212 * @par Description
213 * Calculates the availability of the left, top, topright and topleft subblock
214 * or partitions.
215 *
216 * @param[in] ps_proc_ctxt
217 * pointer to macroblock context (handle)
218 *
219 * @param[in] i1_pel_pos_x
220 * column position of the pel wrt the current block
221 *
222 * @param[in] i1_pel_pos_y
223 * row position of the pel in wrt current block
224 *
225 * @remarks Assumptions: before calling this function it is assumed that
226 * the neighbor availability of the current macroblock is already derived.
227 * Based on table 6-3 of H264 specification
228 *
229 * @return availability status (yes or no)
230 *
231 ******************************************************************************
232 */
ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t * ps_ngbr_avbl,WORD8 i1_pel_pos_x,WORD8 i1_pel_pos_y)233 UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl,
234 WORD8 i1_pel_pos_x,
235 WORD8 i1_pel_pos_y)
236 {
237 UWORD8 u1_neighbor_avail=0;
238
239 /**********************************************************************/
240 /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */
241 /* various columns of a macroblock */
242 /* */
243 /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */
244 /* various rows of a macroblock */
245 /* */
246 /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */
247 /* outside the bound of an mb ie., represents its neighbors. */
248 /**********************************************************************/
249 if (i1_pel_pos_x < 0)
250 { /* column(-1) */
251 if (i1_pel_pos_y < 0)
252 { /* row(-1) */
253 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */
254 }
255 else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
256 { /* all rows of a macroblock */
257 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */
258 }
259 else /* if (i1_pel_pos_y >= 16) */
260 { /* rows(+16) */
261 u1_neighbor_avail = 0; /* current mb bottom left availability */
262 }
263 }
264 else if (i1_pel_pos_x >= 0 && i1_pel_pos_x < 16)
265 { /* all columns of a macroblock */
266 if (i1_pel_pos_y < 0)
267 { /* row(-1) */
268 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */
269 }
270 else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
271 { /* all rows of a macroblock */
272 u1_neighbor_avail = 1; /* current mb availability */
273 /* availability of the partition is dependent on the position of the partition inside the mb */
274 /* although the availability is declared as 1 in all cases these needs to be corrected somewhere else and this is not done in here */
275 }
276 else /* if (i1_pel_pos_y >= 16) */
277 { /* rows(+16) */
278 u1_neighbor_avail = 0; /* current mb bottom availability */
279 }
280 }
281 else if (i1_pel_pos_x >= 16)
282 { /* column(+16) */
283 if (i1_pel_pos_y < 0)
284 { /* row(-1) */
285 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */
286 }
287 else /* if (i1_pel_pos_y >= 0) */
288 { /* all other rows */
289 u1_neighbor_avail = 0; /* current mb right & bottom right availability */
290 }
291 }
292
293 return u1_neighbor_avail;
294 }
295
296 /**
297 ******************************************************************************
298 *
299 * @brief
300 * evaluate best intra 16x16 mode (rate distortion opt off)
301 *
302 * @par Description
303 * This function evaluates all the possible intra 16x16 modes and finds the mode
304 * that best represents the macro-block (least distortion) and occupies fewer
305 * bits in the bit-stream.
306 *
307 * @param[in] ps_proc_ctxt
308 * pointer to process context (handle)
309 *
310 * @remarks
311 * Ideally the cost of encoding a macroblock is calculated as
312 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
313 * input block and the reconstructed block and rate is the number of bits taken
314 * to place the macroblock in the bit-stream. In this routine the rate does not
315 * exactly point to the total number of bits it takes, rather it points to header
316 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
317 * and residual bits fall in to texture bits the number of bits taken to encoding
318 * mbtype is considered as rate, we compute cost. Further we will approximate
319 * the distortion as the deviation b/w input and the predicted block as opposed
320 * to input and reconstructed block.
321 *
322 * NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
323 * the SAD and cost are one and the same.
324 *
325 * @return none
326 *
327 ******************************************************************************
328 */
ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)329 void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
330 {
331 /* Codec Context */
332 codec_t *ps_codec = ps_proc->ps_codec;
333
334 /* SAD(distortion metric) of an 8x8 block */
335 WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX;
336
337 /* lambda */
338 UWORD32 u4_lambda = ps_proc->u4_lambda;
339
340 /* cost = distortion + lambda*rate */
341 WORD32 i4_mb_cost= INT_MAX, i4_mb_cost_least = INT_MAX;
342
343 /* intra mode */
344 UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16;
345
346 /* neighbor pels for intra prediction */
347 UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels;
348
349 /* neighbor availability */
350 WORD32 i4_ngbr_avbl;
351
352 /* pointer to src macro block */
353 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
354 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
355
356 /* pointer to prediction macro block */
357 UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16;
358 UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane;
359
360 /* strides */
361 WORD32 i4_src_strd = ps_proc->i4_src_strd;
362 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
363 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
364
365 /* pointer to neighbors left, top, topleft */
366 UWORD8 *pu1_mb_a = pu1_ref_mb - 1;
367 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd;
368 UWORD8 *pu1_mb_d = pu1_mb_b - 1;
369 UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
370
371 /* valid intra modes map */
372 UWORD32 u4_valid_intra_modes;
373
374 /* lut for valid intra modes */
375 const UWORD8 u1_valid_intra_modes[8] = {4, 6, 4, 6, 5, 7, 5, 15};
376
377 /* temp var */
378 UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
379 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
380 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
381
382 /* init temp var */
383 if (ps_proc->i4_slice_type != ISLICE)
384 {
385 /* Offset for MBtype */
386 offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23;
387 u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
388 }
389
390 /* locating neighbors that are available for prediction */
391
392 /* gather prediction pels from the neighbors, if particular set is not available
393 * it is set to zero*/
394 /* left pels */
395 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
396 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
397 if (u1_mb_a)
398 {
399 for(i = 0; i < 16; i++)
400 pu1_ngbr_pels_i16[16-1-i] = pu1_mb_a[i * i4_rec_strd];
401 }
402 else
403 {
404 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16,0,MB_SIZE);
405 }
406 /* top pels */
407 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
408 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
409 if (u1_mb_b)
410 {
411 ps_codec->pf_mem_cpy_mul8(pu1_ngbr_pels_i16+16+1,pu1_mb_b,16);
412 }
413 else
414 {
415 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16+16+1,0,MB_SIZE);
416 }
417 /* topleft pels */
418 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
419 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
420 if (u1_mb_d)
421 {
422 pu1_ngbr_pels_i16[16] = *pu1_mb_d;
423 }
424 else
425 {
426 pu1_ngbr_pels_i16[16] = 0;
427 }
428
429 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
430 ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl;
431
432 /* set valid intra modes for evaluation */
433 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
434
435 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
436 ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
437 u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
438
439 /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */
440 ps_codec->pf_ih264e_evaluate_intra16x16_modes(pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16,
441 i4_src_strd, i4_pred_strd,
442 i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least,
443 u4_valid_intra_modes);
444
445 /* cost = distortion + lambda*rate */
446 i4_mb_cost_least = i4_mb_distortion_least;
447
448 if (((u4_valid_intra_modes >> 3) & 1) != 0)
449 {
450 /* intra prediction for PLANE mode*/
451 (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl);
452
453 /* evaluate distortion between the actual blk and the estimated blk for the given mode */
454 ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, &i4_mb_distortion);
455
456 /* cost = distortion + lambda*rate */
457 i4_mb_cost = i4_mb_distortion;
458
459 /* update the least cost information if necessary */
460 if(i4_mb_cost < i4_mb_distortion_least)
461 {
462 u4_intra_mode = PLANE_I16x16;
463
464 i4_mb_cost_least = i4_mb_cost;
465 i4_mb_distortion_least = i4_mb_distortion;
466 }
467 }
468
469 u4_best_intra_16x16_mode = u4_intra_mode;
470
471 DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode);
472
473 ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode;
474
475 /* cost = distortion + lambda*rate */
476 i4_mb_cost_least = i4_mb_distortion_least + u4_lambda*u1_uev_codelength[offset + u4_best_intra_16x16_mode];
477
478
479 /* update the type of the mb if necessary */
480 if (i4_mb_cost_least < ps_proc->i4_mb_cost)
481 {
482 ps_proc->i4_mb_cost = i4_mb_cost_least;
483 ps_proc->i4_mb_distortion = i4_mb_distortion_least;
484 ps_proc->u4_mb_type = I16x16;
485 }
486 if (i4_mb_cost_least < ps_proc->i4_mb_intra_cost)
487 {
488 ps_proc->i4_mb_intra_cost = i4_mb_cost_least;
489 }
490
491 return ;
492 }
493
494
495 /**
496 ******************************************************************************
497 *
498 * @brief
499 * evaluate best intra 8x8 mode (rate distortion opt on)
500 *
501 * @par Description
502 * This function evaluates all the possible intra 8x8 modes and finds the mode
503 * that best represents the macro-block (least distortion) and occupies fewer
504 * bits in the bit-stream.
505 *
506 * @param[in] ps_proc_ctxt
507 * pointer to proc ctxt
508 *
509 * @remarks Ideally the cost of encoding a macroblock is calculated as
510 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
511 * input block and the reconstructed block and rate is the number of bits taken
512 * to place the macroblock in the bit-stream. In this routine the rate does not
513 * exactly point to the total number of bits it takes, rather it points to header
514 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
515 * and residual bits fall in to texture bits the number of bits taken to encoding
516 * mbtype is considered as rate, we compute cost. Further we will approximate
517 * the distortion as the deviation b/w input and the predicted block as opposed
518 * to input and reconstructed block.
519 *
520 * NOTE: TODO: This function needs to be tested
521 *
522 * @return none
523 *
524 ******************************************************************************
525 */
ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)526 void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
527 {
528 /* Codec Context */
529 codec_t *ps_codec = ps_proc->ps_codec;
530
531 /* SAD(distortion metric) of an 4x4 block */
532 WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
533
534 /* lambda */
535 UWORD32 u4_lambda = ps_proc->u4_lambda;
536
537 /* cost = distortion + lambda*rate */
538 WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda;
539
540 /* cost due to mbtype */
541 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
542
543 /* intra mode */
544 UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode;
545
546 /* neighbor pels for intra prediction */
547 UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels;
548
549 /* pointer to curr partition */
550 UWORD8 *pu1_mb_curr;
551
552 /* pointer to prediction macro block */
553 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
554
555 /* strides */
556 WORD32 i4_src_strd = ps_proc->i4_src_strd;
557 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
558
559 /* neighbors left, top, top right, top left */
560 UWORD8 *pu1_mb_a;
561 UWORD8 *pu1_mb_b;
562 UWORD8 *pu1_mb_d;
563
564 /* neighbor availability */
565 WORD32 i4_ngbr_avbl;
566 block_neighbors_t s_ngbr_avbl;
567
568 /* temp vars */
569 UWORD32 b8, u4_pix_x, u4_pix_y;
570 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
571 block_neighbors_t s_ngbr_avbl_MB;
572
573 /* ngbr mb syntax information */
574 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
575 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
576 mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
577
578 /* valid intra modes map */
579 UWORD32 u4_valid_intra_modes;
580
581 if (ps_proc->ps_ngbr_avbl->u1_mb_c)
582 {
583 ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + (ps_proc->i4_mb_x + 1);
584 }
585 /* left pels */
586 s_ngbr_avbl_MB.u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
587 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
588
589 /* top pels */
590 s_ngbr_avbl_MB.u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
591 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
592
593 /* topleft pels */
594 s_ngbr_avbl_MB.u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
595 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
596
597 /* top right */
598 s_ngbr_avbl_MB.u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c)
599 && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1));
600
601
602 for (b8 = 0; b8 < 4; b8++)
603 {
604 u4_pix_x = (b8 & 0x01) << 3;
605 u4_pix_y = (b8 >> 1) << 3;
606
607 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
608 /* when rdopt is off, we use the input as reference for constructing prediction buffer */
609 /* as opposed to using the recon pels. (open loop intra prediction) */
610 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
611 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
612 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
613
614 /* locating neighbors that are available for prediction */
615 /* TODO : update the neighbor availability information basing on constrained intra pred information */
616 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
617 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
618 s_ngbr_avbl.u1_mb_a = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */
619 s_ngbr_avbl.u1_mb_b = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */
620 s_ngbr_avbl.u1_mb_c = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */
621 s_ngbr_avbl.u1_mb_d = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */
622
623 /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
624 i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) +
625 (s_ngbr_avbl.u1_mb_a << 4);
626 /* if top partition is available and top right is not available for intra prediction, then */
627 /* padd top right samples using top sample and make top right also available */
628 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
629 ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl;
630
631
632 ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8,
633 i4_src_strd, i4_ngbr_avbl);
634
635 i4_partition_cost_least = INT_MAX;
636 /* set valid intra modes for evaluation */
637 u4_valid_intra_modes = 0x1ff;
638
639 if (!s_ngbr_avbl.u1_mb_b)
640 {
641 u4_valid_intra_modes &= ~(1 << VERT_I4x4);
642 u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4);
643 u4_valid_intra_modes &= ~(1 << VERT_L_I4x4);
644 }
645 if (!s_ngbr_avbl.u1_mb_a)
646 {
647 u4_valid_intra_modes &= ~(1 << HORZ_I4x4);
648 u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4);
649 }
650 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d)
651 {
652 u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4);
653 u4_valid_intra_modes &= ~(1 << VERT_R_I4x4);
654 u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4);
655 }
656
657 /* estimate the intra 8x8 mode for the current partition (for evaluating cost) */
658 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
659 {
660 u4_estimated_intra_8x8_mode = DC_I8x8;
661 }
662 else
663 {
664 UWORD32 u4_left_intra_8x8_mode = DC_I8x8;
665 UWORD32 u4_top_intra_8x8_mode = DC_I8x8;
666
667 if (u4_pix_x == 0)
668 {
669 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
670 {
671 u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[b8+1];
672 }
673 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
674 {
675 u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[(b8+1)*4+2];
676 }
677 }
678 else
679 {
680 u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-1];
681 }
682
683 if (u4_pix_y == 0)
684 {
685 if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
686 {
687 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8+2];
688 }
689 else if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
690 {
691 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8+2)*4+2];
692 }
693 }
694 else
695 {
696 u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-2];
697 }
698
699 u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode);
700 }
701
702 /* perform intra mode 8x8 evaluation */
703 for (u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; u4_intra_mode++, u4_valid_intra_modes >>= 1)
704 {
705 if ( (u4_valid_intra_modes & 1) == 0)
706 continue;
707
708 /* intra prediction */
709 (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, i4_pred_strd, i4_ngbr_avbl);
710
711 /* evaluate distortion between the actual blk and the estimated blk for the given mode */
712 ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_partition_cost_least, &i4_partition_distortion);
713
714 i4_partition_cost = i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)?u4_cost_one_bit:u4_cost_four_bits);
715
716 /* update the least cost information if necessary */
717 if (i4_partition_cost < i4_partition_cost_least)
718 {
719 i4_partition_cost_least = i4_partition_cost;
720 i4_partition_distortion_least = i4_partition_distortion;
721 u4_best_intra_8x8_mode = u4_intra_mode;
722 }
723 }
724 /* macroblock distortion */
725 i4_total_cost += i4_partition_cost_least;
726 i4_total_distortion += i4_partition_distortion_least;
727 /* mb partition mode */
728 ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode;
729
730 }
731
732 /* update the type of the mb if necessary */
733 if (i4_total_cost < ps_proc->i4_mb_cost)
734 {
735 ps_proc->i4_mb_cost = i4_total_cost;
736 ps_proc->i4_mb_distortion = i4_total_distortion;
737 ps_proc->u4_mb_type = I8x8;
738 }
739 if (i4_total_cost < ps_proc->i4_mb_intra_cost)
740 {
741 ps_proc->i4_mb_intra_cost = i4_total_cost;
742 }
743
744 return ;
745 }
746
747
748 /**
749 ******************************************************************************
750 *
751 * @brief
752 * evaluate best intra 4x4 mode (rate distortion opt off)
753 *
754 * @par Description
755 * This function evaluates all the possible intra 4x4 modes and finds the mode
756 * that best represents the macro-block (least distortion) and occupies fewer
757 * bits in the bit-stream.
758 *
759 * @param[in] ps_proc_ctxt
760 * pointer to proc ctxt
761 *
762 * @remarks
763 * Ideally the cost of encoding a macroblock is calculated as
764 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
765 * input block and the reconstructed block and rate is the number of bits taken
766 * to place the macroblock in the bit-stream. In this routine the rate does not
767 * exactly point to the total number of bits it takes, rather it points to header
768 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
769 * and residual bits fall in to texture bits the number of bits taken to encoding
770 * mbtype is considered as rate, we compute cost. Further we will approximate
771 * the distortion as the deviation b/w input and the predicted block as opposed
772 * to input and reconstructed block.
773 *
774 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
775 * 24*lambda is added to the SAD before comparison with the best SAD for
776 * inter prediction. This is an empirical value to prevent using too many intra
777 * blocks.
778 *
779 * @return none
780 *
781 ******************************************************************************
782 */
ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)783 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
784 {
785 /* Codec Context */
786 codec_t *ps_codec = ps_proc->ps_codec;
787
788 /* SAD(distortion metric) of an 4x4 block */
789 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
790
791 /* lambda */
792 UWORD32 u4_lambda = ps_proc->u4_lambda;
793
794 /* cost = distortion + lambda*rate */
795 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
796
797 /* cost due to mbtype */
798 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
799
800 /* intra mode */
801 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
802
803 /* neighbor pels for intra prediction */
804 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
805
806 /* pointer to curr partition */
807 UWORD8 *pu1_mb_curr;
808
809 /* pointer to prediction macro block */
810 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
811
812 /* strides */
813 WORD32 i4_src_strd = ps_proc->i4_src_strd;
814 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
815
816 /* neighbors left, top, top right, top left */
817 UWORD8 *pu1_mb_a;
818 UWORD8 *pu1_mb_b;
819 UWORD8 *pu1_mb_c;
820 UWORD8 *pu1_mb_d;
821
822 /* neighbor availability */
823 WORD32 i4_ngbr_avbl;
824 block_neighbors_t s_ngbr_avbl;
825
826 /* temp vars */
827 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
828
829 /* scan order inside 4x4 block */
830 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
831
832 /* ngbr sub mb modes */
833 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
834 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
835 mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
836
837 /* valid intra modes map */
838 UWORD32 u4_valid_intra_modes;
839 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
840
841 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
842 UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
843
844 if (ps_proc->ps_ngbr_avbl->u1_mb_c)
845 {
846 ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x + 1;
847 }
848 /* left pels */
849 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
850 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
851
852 /* top pels */
853 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
854 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
855
856 /* topleft pels */
857 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
858 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
859
860 /* top right */
861 u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c)
862 && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1));
863
864 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
865 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
866
867 for (b8 = 0; b8 < 4; b8++)
868 {
869 u4_blk_x = (b8 & 0x01) << 3;
870 u4_blk_y = (b8 >> 1) << 3;
871 for (b4 = 0; b4 < 4; b4++)
872 {
873 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
874 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
875
876 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
877 /* when rdopt is off, we use the input as reference for constructing prediction buffer */
878 /* as opposed to using the recon pels. (open loop intra prediction) */
879 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
880 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
881 pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */
882 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
883
884 /* locating neighbors that are available for prediction */
885 /* TODO : update the neighbor availability information basing on constrained intra pred information */
886 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
887 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
888
889 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
890 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
891 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
892 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
893 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
894 /* set valid intra modes for evaluation */
895 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
896
897 /* if top partition is available and top right is not available for intra prediction, then */
898 /* padd top right samples using top sample and make top right also available */
899 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
900
901 /* gather prediction pels from the neighbors */
902 if (s_ngbr_avbl.u1_mb_a)
903 {
904 for(i = 0; i < 4; i++)
905 pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_src_strd];
906 }
907 else
908 {
909 memset(pu1_ngbr_pels_i4, 0, 4);
910 }
911
912 if (s_ngbr_avbl.u1_mb_b)
913 {
914 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
915 }
916 else
917 {
918 memset(pu1_ngbr_pels_i4 + 5, 0, 4);
919 }
920
921 if (s_ngbr_avbl.u1_mb_d)
922 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
923 else
924 pu1_ngbr_pels_i4[4] = 0;
925
926 if (s_ngbr_avbl.u1_mb_c)
927 {
928 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
929 }
930 else if (s_ngbr_avbl.u1_mb_b)
931 {
932 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
933 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
934 }
935
936 i4_partition_cost_least = INT_MAX;
937
938 /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
939 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
940 {
941 u4_estimated_intra_4x4_mode = DC_I4x4;
942 }
943 else
944 {
945 UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
946 UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
947
948 if (u4_pix_x == 0)
949 {
950 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
951 {
952 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
953 }
954 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
955 {
956 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
957 }
958 }
959 else
960 {
961 u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
962 }
963
964 if (u4_pix_y == 0)
965 {
966 if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
967 {
968 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
969 }
970 else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
971 {
972 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
973 }
974 }
975 else
976 {
977 u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
978 }
979
980 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
981 }
982
983 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
984
985 /* mode evaluation and prediction */
986 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
987 pu1_ngbr_pels_i4,
988 pu1_pred_mb, i4_src_strd,
989 i4_pred_strd, i4_ngbr_avbl,
990 &u4_best_intra_4x4_mode,
991 &i4_partition_cost_least,
992 u4_valid_intra_modes,
993 u4_lambda,
994 u4_estimated_intra_4x4_mode);
995
996
997 i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) ? u4_cost_one_bit : u4_cost_four_bits);
998
999 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
1000 /* macroblock distortion */
1001 i4_total_distortion += i4_partition_distortion_least;
1002 i4_total_cost += i4_partition_cost_least;
1003 /* mb partition mode */
1004 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
1005 }
1006 }
1007
1008 /* update the type of the mb if necessary */
1009 if (i4_total_cost < ps_proc->i4_mb_cost)
1010 {
1011 ps_proc->i4_mb_cost = i4_total_cost;
1012 ps_proc->i4_mb_distortion = i4_total_distortion;
1013 ps_proc->u4_mb_type = I4x4;
1014 }
1015 if (i4_total_cost < ps_proc->i4_mb_intra_cost)
1016 {
1017 ps_proc->i4_mb_intra_cost = i4_total_cost;
1018 }
1019
1020 return ;
1021 }
1022
1023 /**
1024 ******************************************************************************
1025 *
1026 * @brief evaluate best intra 4x4 mode (rate distortion opt on)
1027 *
1028 * @par Description
1029 * This function evaluates all the possible intra 4x4 modes and finds the mode
1030 * that best represents the macro-block (least distortion) and occupies fewer
1031 * bits in the bit-stream.
1032 *
1033 * @param[in] ps_proc_ctxt
1034 * pointer to proc ctxt
1035 *
1036 * @remarks
1037 * Ideally the cost of encoding a macroblock is calculated as
1038 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
1039 * input block and the reconstructed block and rate is the number of bits taken
1040 * to place the macroblock in the bit-stream. In this routine the rate does not
1041 * exactly point to the total number of bits it takes, rather it points to header
1042 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
1043 * and residual bits fall in to texture bits the number of bits taken to encoding
1044 * mbtype is considered as rate, we compute cost. Further we will approximate
1045 * the distortion as the deviation b/w input and the predicted block as opposed
1046 * to input and reconstructed block.
1047 *
1048 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
1049 * 24*lambda is added to the SAD before comparison with the best SAD for
1050 * inter prediction. This is an empirical value to prevent using too many intra
1051 * blocks.
1052 *
1053 * @return none
1054 *
1055 ******************************************************************************
1056 */
ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t * ps_proc)1057 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t *ps_proc)
1058 {
1059 /* Codec Context */
1060 codec_t *ps_codec = ps_proc->ps_codec;
1061
1062 /* SAD(distortion metric) of an 4x4 block */
1063 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
1064
1065 /* lambda */
1066 UWORD32 u4_lambda = ps_proc->u4_lambda;
1067
1068 /* cost = distortion + lambda*rate */
1069 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
1070
1071 /* cost due to mbtype */
1072 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
1073
1074 /* intra mode */
1075 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
1076
1077 /* neighbor pels for intra prediction */
1078 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1079
1080 /* pointer to curr partition */
1081 UWORD8 *pu1_mb_curr;
1082 UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top;
1083 UWORD8 *pu1_ref_mb_intra_4x4;
1084
1085 /* pointer to residual macro block */
1086 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
1087
1088 /* pointer to prediction macro block */
1089 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1090
1091 /* strides */
1092 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1093 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1094 WORD32 i4_ref_strd_left, i4_ref_strd_top;
1095
1096 /* neighbors left, top, top right, top left */
1097 UWORD8 *pu1_mb_a;
1098 UWORD8 *pu1_mb_b;
1099 UWORD8 *pu1_mb_c;
1100 UWORD8 *pu1_mb_d;
1101
1102 /* number of non zero coeffs*/
1103 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
1104
1105 /* quantization parameters */
1106 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1107
1108 /* neighbor availability */
1109 WORD32 i4_ngbr_avbl;
1110 block_neighbors_t s_ngbr_avbl;
1111
1112 /* temp vars */
1113 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
1114
1115 /* scan order inside 4x4 block */
1116 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
1117
1118 /* ngbr sub mb modes */
1119 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
1120 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1121 mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1122
1123 /* valid intra modes map */
1124 UWORD32 u4_valid_intra_modes;
1125 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
1126
1127 /* Dummy variable for 4x4 trans function */
1128 WORD16 i2_dc_dummy;
1129 UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
1130 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
1131
1132 /* compute ngbr availability for sub blks */
1133 if (ps_proc->ps_ngbr_avbl->u1_mb_c)
1134 {
1135 ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + (ps_proc->i4_mb_x + 1);
1136 }
1137
1138 /* left pels */
1139 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
1140 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
1141
1142 /* top pels */
1143 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
1144 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
1145
1146 /* topleft pels */
1147 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
1148 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
1149
1150 /* top right pels */
1151 u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c)
1152 && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1));
1153
1154 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
1155 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
1156
1157 for (b8 = 0; b8 < 4; b8++)
1158 {
1159 u4_blk_x = (b8 & 0x01) << 3;
1160 u4_blk_y = (b8 >> 1) << 3;
1161 for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
1162 {
1163 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
1164 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
1165
1166 pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd);
1167 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
1168 if (u4_pix_x == 0)
1169 {
1170 i4_ref_strd_left = ps_proc->i4_rec_strd;
1171 pu1_mb_ref_left = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_left);
1172 }
1173 else
1174 {
1175 i4_ref_strd_left = i4_pred_strd;
1176 pu1_mb_ref_left = pu1_ref_mb_intra_4x4;
1177 }
1178 if (u4_pix_y == 0)
1179 {
1180 i4_ref_strd_top = ps_proc->i4_rec_strd;
1181 pu1_mb_ref_top = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_top);
1182 }
1183 else
1184 {
1185 i4_ref_strd_top = i4_pred_strd;
1186 pu1_mb_ref_top = pu1_ref_mb_intra_4x4;
1187 }
1188
1189 pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */
1190 pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */
1191 pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */
1192 if (u4_pix_y == 0)
1193 pu1_mb_d = pu1_mb_b - 1;
1194 else
1195 pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */
1196
1197 /* locating neighbors that are available for prediction */
1198 /* TODO : update the neighbor availability information basing on constrained intra pred information */
1199 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
1200 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
1201
1202 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1203 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
1204 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
1205 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
1206 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
1207 /* set valid intra modes for evaluation */
1208 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
1209
1210 /* if top partition is available and top right is not available for intra prediction, then */
1211 /* padd top right samples using top sample and make top right also available */
1212 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
1213
1214 /* gather prediction pels from the neighbors */
1215 if (s_ngbr_avbl.u1_mb_a)
1216 {
1217 for(i = 0; i < 4; i++)
1218 pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_ref_strd_left];
1219 }
1220 else
1221 {
1222 memset(pu1_ngbr_pels_i4,0,4);
1223 }
1224 if(s_ngbr_avbl.u1_mb_b)
1225 {
1226 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1227 }
1228 else
1229 {
1230 memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4);
1231 }
1232 if (s_ngbr_avbl.u1_mb_d)
1233 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1234 else
1235 pu1_ngbr_pels_i4[4] = 0;
1236 if (s_ngbr_avbl.u1_mb_c)
1237 {
1238 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
1239 }
1240 else if (s_ngbr_avbl.u1_mb_b)
1241 {
1242 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
1243 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
1244 }
1245
1246 i4_partition_cost_least = INT_MAX;
1247
1248 /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
1249 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
1250 {
1251 u4_estimated_intra_4x4_mode = DC_I4x4;
1252 }
1253 else
1254 {
1255 UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
1256 UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
1257
1258 if (u4_pix_x == 0)
1259 {
1260 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
1261 {
1262 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
1263 }
1264 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
1265 {
1266 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
1267 }
1268 }
1269 else
1270 {
1271 u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
1272 }
1273
1274 if (u4_pix_y == 0)
1275 {
1276 if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
1277 {
1278 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
1279 }
1280 else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
1281 {
1282 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
1283 }
1284 }
1285 else
1286 {
1287 u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
1288 }
1289
1290 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
1291 }
1292
1293 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
1294
1295 /*mode evaluation and prediction*/
1296 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
1297 pu1_ngbr_pels_i4,
1298 pu1_pred_mb, i4_src_strd,
1299 i4_pred_strd, i4_ngbr_avbl,
1300 &u4_best_intra_4x4_mode,
1301 &i4_partition_cost_least,
1302 u4_valid_intra_modes,
1303 u4_lambda,
1304 u4_estimated_intra_4x4_mode);
1305
1306
1307 i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)?u4_cost_one_bit:u4_cost_four_bits);
1308
1309 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
1310
1311 /* macroblock distortion */
1312 i4_total_distortion += i4_partition_distortion_least;
1313 i4_total_cost += i4_partition_cost_least;
1314
1315 /* mb partition mode */
1316 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
1317
1318
1319 /********************************************************/
1320 /* error estimation, */
1321 /* transform */
1322 /* quantization */
1323 /********************************************************/
1324 ps_codec->pf_resi_trans_quant_4x4(pu1_mb_curr, pu1_pred_mb,
1325 pi2_res_mb, i4_src_strd,
1326 i4_pred_strd,
1327 /* No op stride, this implies a buff of lenght 1x16 */
1328 ps_qp_params->pu2_scale_mat,
1329 ps_qp_params->pu2_thres_mat,
1330 ps_qp_params->u1_qbits,
1331 ps_qp_params->u4_dead_zone,
1332 pu1_nnz, &i2_dc_dummy);
1333
1334 /********************************************************/
1335 /* ierror estimation, */
1336 /* itransform */
1337 /* iquantization */
1338 /********************************************************/
1339 ps_codec->pf_iquant_itrans_recon_4x4(pi2_res_mb, pu1_pred_mb,
1340 pu1_ref_mb_intra_4x4,
1341 i4_pred_strd, i4_pred_strd,
1342 ps_qp_params->pu2_iscale_mat,
1343 ps_qp_params->pu2_weigh_mat,
1344 ps_qp_params->u1_qp_div,
1345 ps_proc->pv_scratch_buff, 0,
1346 NULL);
1347 }
1348 }
1349
1350 /* update the type of the mb if necessary */
1351 if (i4_total_cost < ps_proc->i4_mb_cost)
1352 {
1353 ps_proc->i4_mb_cost = i4_total_cost;
1354 ps_proc->i4_mb_distortion = i4_total_distortion;
1355 ps_proc->u4_mb_type = I4x4;
1356 }
1357 if (i4_total_cost < ps_proc->i4_mb_intra_cost)
1358 {
1359 ps_proc->i4_mb_intra_cost = i4_total_cost;
1360 }
1361
1362 return ;
1363 }
1364
1365 /**
1366 ******************************************************************************
1367 *
1368 * @brief
1369 * evaluate best chroma intra 8x8 mode (rate distortion opt off)
1370 *
1371 * @par Description
1372 * This function evaluates all the possible chroma intra 8x8 modes and finds
1373 * the mode that best represents the macroblock (least distortion) and occupies
1374 * fewer bits in the bitstream.
1375 *
1376 * @param[in] ps_proc_ctxt
1377 * pointer to macroblock context (handle)
1378 *
1379 * @remarks
1380 * For chroma best intra pred mode is calculated based only on SAD
1381 *
1382 * @returns none
1383 *
1384 ******************************************************************************
1385 */
ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)1386 void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
1387 {
1388 /* Codec Context */
1389 codec_t *ps_codec = ps_proc->ps_codec;
1390
1391 /* SAD(distortion metric) of an 8x8 block */
1392 WORD32 i4_mb_distortion, i4_chroma_mb_distortion;
1393
1394 /* intra mode */
1395 UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8;
1396
1397 /* neighbor pels for intra prediction */
1398 UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels;
1399
1400 /* pointer to curr macro block */
1401 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
1402 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
1403
1404 /* pointer to prediction macro block */
1405 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
1406 UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
1407
1408 /* strides */
1409 WORD32 i4_src_strd_c = ps_proc->i4_src_chroma_strd;
1410 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1411 WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd;
1412
1413 /* neighbors left, top, top left */
1414 UWORD8 *pu1_mb_a = pu1_ref_mb - 2;
1415 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c;
1416 UWORD8 *pu1_mb_d = pu1_mb_b - 2;
1417
1418 /* neighbor availability */
1419 const UWORD8 u1_valid_intra_modes[8] = {1, 3, 1, 3, 5, 7, 5, 15};
1420 WORD32 i4_ngbr_avbl;
1421
1422 /* valid intra modes map */
1423 UWORD32 u4_valid_intra_modes;
1424 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1425
1426 /* temp var */
1427 UWORD8 i;
1428 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
1429 UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
1430
1431 /* locating neighbors that are available for prediction */
1432 /* gather prediction pels from the neighbors */
1433 /* left pels */
1434 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
1435 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
1436 if (u1_mb_a)
1437 {
1438 for (i = 0; i < 16; i += 2)
1439 {
1440 pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c];
1441 pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1];
1442 }
1443 }
1444 else
1445 {
1446 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE);
1447 }
1448
1449 /* top pels */
1450 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
1451 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
1452 if (u1_mb_b)
1453 {
1454 ps_codec->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16);
1455 }
1456 else
1457 {
1458 ps_codec->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE);
1459 }
1460
1461 /* top left pels */
1462 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
1463 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
1464 if (u1_mb_d)
1465 {
1466 pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d;
1467 pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1);
1468 }
1469 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
1470 ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl;
1471
1472 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
1473
1474 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
1475 ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
1476 u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8);
1477
1478 i4_chroma_mb_distortion = INT_MAX;
1479
1480 /* perform intra mode chroma 8x8 evaluation */
1481 /* intra prediction */
1482 ps_codec->pf_ih264e_evaluate_intra_chroma_modes(pu1_curr_mb,
1483 pu1_ngbr_pels_c_i8x8,
1484 pu1_pred_mb,
1485 i4_src_strd_c,
1486 i4_pred_strd,
1487 i4_ngbr_avbl,
1488 &u4_best_chroma_intra_8x8_mode,
1489 &i4_chroma_mb_distortion,
1490 u4_valid_intra_modes);
1491
1492 if (u4_valid_intra_modes & 8)/* if Chroma PLANE is valid*/
1493 {
1494 (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, i4_pred_strd, i4_ngbr_avbl);
1495
1496 /* evaluate distortion(sad) */
1497 ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, i4_chroma_mb_distortion, &i4_mb_distortion);
1498
1499 /* update the least distortion information if necessary */
1500 if(i4_mb_distortion < i4_chroma_mb_distortion)
1501 {
1502 i4_chroma_mb_distortion = i4_mb_distortion;
1503 u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8;
1504 }
1505 }
1506
1507 DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, u4_best_chroma_intra_8x8_mode);
1508
1509 ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode;
1510
1511 return ;
1512 }
1513
1514
1515 /**
1516 ******************************************************************************
1517 *
1518 * @brief
1519 * Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
1520 * prediction.
1521 *
1522 * @par Description
1523 * This function evaluates first three 16x16 modes and compute corresponding sad
1524 * and return the buffer predicted with best mode.
1525 *
1526 * @param[in] pu1_src
1527 * UWORD8 pointer to the source
1528 *
1529 * @param[in] pu1_ngbr_pels_i16
1530 * UWORD8 pointer to neighbouring pels
1531 *
1532 * @param[out] pu1_dst
1533 * UWORD8 pointer to the destination
1534 *
1535 * @param[in] src_strd
1536 * integer source stride
1537 *
1538 * @param[in] dst_strd
1539 * integer destination stride
1540 *
1541 * @param[in] u4_n_avblty
1542 * availability of neighbouring pixels
1543 *
1544 * @param[in] u4_intra_mode
1545 * Pointer to the variable in which best mode is returned
1546 *
1547 * @param[in] pu4_sadmin
1548 * Pointer to the variable in which minimum sad is returned
1549 *
1550 * @param[in] u4_valid_intra_modes
1551 * Says what all modes are valid
1552 *
1553 * @returns none
1554 *
1555 ******************************************************************************
1556 */
ih264e_evaluate_intra16x16_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels_i16,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes)1557 void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
1558 UWORD8 *pu1_ngbr_pels_i16,
1559 UWORD8 *pu1_dst,
1560 UWORD32 src_strd,
1561 UWORD32 dst_strd,
1562 WORD32 u4_n_avblty,
1563 UWORD32 *u4_intra_mode,
1564 WORD32 *pu4_sadmin,
1565 UWORD32 u4_valid_intra_modes)
1566 {
1567 UWORD8 *pu1_neighbour;
1568 UWORD8 *pu1_src_temp = pu1_src;
1569 UWORD8 left = 0, top = 0;
1570 WORD32 u4_dcval = 0;
1571 WORD32 i, j;
1572 WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX,
1573 i4_min_sad = INT_MAX;
1574 UWORD8 val;
1575
1576 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1577 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1578
1579 /* left available */
1580 if (left)
1581 {
1582 i4_sad_horz = 0;
1583
1584 for (i = 0; i < 16; i++)
1585 {
1586 val = pu1_ngbr_pels_i16[15 - i];
1587
1588 u4_dcval += val;
1589
1590 for (j = 0; j < 16; j++)
1591 {
1592 i4_sad_horz += ABS(val - pu1_src_temp[j]);
1593 }
1594
1595 pu1_src_temp += src_strd;
1596 }
1597 u4_dcval += 8;
1598 }
1599
1600 pu1_src_temp = pu1_src;
1601 /* top available */
1602 if (top)
1603 {
1604 i4_sad_vert = 0;
1605
1606 for (i = 0; i < 16; i++)
1607 {
1608 u4_dcval += pu1_ngbr_pels_i16[17 + i];
1609
1610 for (j = 0; j < 16; j++)
1611 {
1612 i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]);
1613 }
1614 pu1_src_temp += src_strd;
1615
1616 }
1617 u4_dcval += 8;
1618 }
1619
1620 u4_dcval = (u4_dcval) >> (3 + left + top);
1621
1622 pu1_src_temp = pu1_src;
1623
1624 /* none available */
1625 u4_dcval += (left == 0) * (top == 0) * 128;
1626
1627 i4_sad_dc = 0;
1628
1629 for (i = 0; i < 16; i++)
1630 {
1631 for (j = 0; j < 16; j++)
1632 {
1633 i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]);
1634 }
1635 pu1_src_temp += src_strd;
1636 }
1637
1638 if ((u4_valid_intra_modes & 04) == 0)/* If DC is disabled */
1639 i4_sad_dc = INT_MAX;
1640
1641 if ((u4_valid_intra_modes & 01) == 0)/* If VERT is disabled */
1642 i4_sad_vert = INT_MAX;
1643
1644 if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled */
1645 i4_sad_horz = INT_MAX;
1646
1647 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
1648
1649 /* Finding Minimum sad and doing corresponding prediction */
1650 if (i4_min_sad < *pu4_sadmin)
1651 {
1652 *pu4_sadmin = i4_min_sad;
1653 if (i4_min_sad == i4_sad_vert)
1654 {
1655 *u4_intra_mode = VERT_I16x16;
1656 pu1_neighbour = pu1_ngbr_pels_i16 + 17;
1657 for (j = 0; j < 16; j++)
1658 {
1659 memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
1660 pu1_dst += dst_strd;
1661 }
1662 }
1663 else if (i4_min_sad == i4_sad_horz)
1664 {
1665 *u4_intra_mode = HORZ_I16x16;
1666 for (j = 0; j < 16; j++)
1667 {
1668 val = pu1_ngbr_pels_i16[15 - j];
1669 memset(pu1_dst, val, MB_SIZE);
1670 pu1_dst += dst_strd;
1671 }
1672 }
1673 else
1674 {
1675 *u4_intra_mode = DC_I16x16;
1676 for (j = 0; j < 16; j++)
1677 {
1678 memset(pu1_dst, u4_dcval, MB_SIZE);
1679 pu1_dst += dst_strd;
1680 }
1681 }
1682 }
1683 return;
1684 }
1685
1686 /**
1687 ******************************************************************************
1688 *
1689 * @brief
1690 * Evaluate best intra 4x4 mode and perform prediction.
1691 *
1692 * @par Description
1693 * This function evaluates 4x4 modes and compute corresponding sad
1694 * and return the buffer predicted with best mode.
1695 *
1696 * @param[in] pu1_src
1697 * UWORD8 pointer to the source
1698 *
1699 * @param[in] pu1_ngbr_pels
1700 * UWORD8 pointer to neighbouring pels
1701 *
1702 * @param[out] pu1_dst
1703 * UWORD8 pointer to the destination
1704 *
1705 * @param[in] src_strd
1706 * integer source stride
1707 *
1708 * @param[in] dst_strd
1709 * integer destination stride
1710 *
1711 * @param[in] u4_n_avblty
1712 * availability of neighbouring pixels
1713 *
1714 * @param[in] u4_intra_mode
1715 * Pointer to the variable in which best mode is returned
1716 *
1717 * @param[in] pu4_sadmin
1718 * Pointer to the variable in which minimum cost is returned
1719 *
1720 * @param[in] u4_valid_intra_modes
1721 * Says what all modes are valid
1722 *
1723 * @param[in] u4_lambda
1724 * Lamda value for computing cost from SAD
1725 *
1726 * @param[in] u4_predictd_mode
1727 * Predicted mode for cost computation
1728 *
1729 * @returns none
1730 *
1731 ******************************************************************************
1732 */
ih264e_evaluate_intra_4x4_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes,UWORD32 u4_lambda,UWORD32 u4_predictd_mode)1733 void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src,
1734 UWORD8 *pu1_ngbr_pels,
1735 UWORD8 *pu1_dst,
1736 UWORD32 src_strd,
1737 UWORD32 dst_strd,
1738 WORD32 u4_n_avblty,
1739 UWORD32 *u4_intra_mode,
1740 WORD32 *pu4_sadmin,
1741 UWORD32 u4_valid_intra_modes,
1742 UWORD32 u4_lambda,
1743 UWORD32 u4_predictd_mode)
1744 {
1745 UWORD8 *pu1_src_temp = pu1_src;
1746 UWORD8 *pu1_pred = pu1_ngbr_pels;
1747 UWORD8 left = 0, top = 0;
1748 UWORD8 u1_pred_val = 0;
1749 UWORD8 u1_pred_vals[4] = {0};
1750 UWORD8 *pu1_pred_val = NULL;
1751 /* To store FILT121 operated values*/
1752 UWORD8 u1_pred_vals_diag_121[15] = {0};
1753 /* To store FILT11 operated values*/
1754 UWORD8 u1_pred_vals_diag_11[15] = {0};
1755 UWORD8 u1_pred_vals_vert_r[8] = {0};
1756 UWORD8 u1_pred_vals_horz_d[10] = {0};
1757 UWORD8 u1_pred_vals_horz_u[10] = {0};
1758 WORD32 u4_dcval = 0;
1759 WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1760 INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1761
1762 WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1763 INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1764 WORD32 i, i4_min_cost = INT_MAX;
1765
1766 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1767 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1768
1769 /* Computing SAD */
1770
1771 /* VERT mode valid */
1772 if (u4_valid_intra_modes & 1)
1773 {
1774 pu1_pred = pu1_ngbr_pels + 5;
1775 i4_sad[VERT_I4x4] = 0;
1776 i4_cost[VERT_I4x4] = 0;
1777
1778 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1779 pu1_src_temp += src_strd;
1780 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1781 pu1_src_temp += src_strd;
1782 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1783 pu1_src_temp += src_strd;
1784 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1785
1786 i4_cost[VERT_I4x4] = i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ?
1787 u4_lambda : 4 * u4_lambda);
1788 }
1789
1790 /* HORZ mode valid */
1791 if (u4_valid_intra_modes & 2)
1792 {
1793 i4_sad[HORZ_I4x4] = 0;
1794 i4_cost[HORZ_I4x4] =0;
1795 pu1_src_temp = pu1_src;
1796
1797 u1_pred_val = pu1_ngbr_pels[3];
1798
1799 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1800 + ABS(pu1_src_temp[1] - u1_pred_val)
1801 + ABS(pu1_src_temp[2] - u1_pred_val)
1802 + ABS(pu1_src_temp[3] - u1_pred_val);
1803 pu1_src_temp += src_strd;
1804
1805 u1_pred_val = pu1_ngbr_pels[2];
1806
1807 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1808 + ABS(pu1_src_temp[1] - u1_pred_val)
1809 + ABS(pu1_src_temp[2] - u1_pred_val)
1810 + ABS(pu1_src_temp[3] - u1_pred_val);
1811 pu1_src_temp += src_strd;
1812
1813 u1_pred_val = pu1_ngbr_pels[1];
1814
1815 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1816 + ABS(pu1_src_temp[1] - u1_pred_val)
1817 + ABS(pu1_src_temp[2] - u1_pred_val)
1818 + ABS(pu1_src_temp[3] - u1_pred_val);
1819 pu1_src_temp += src_strd;
1820
1821 u1_pred_val = pu1_ngbr_pels[0];
1822
1823 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1824 + ABS(pu1_src_temp[1] - u1_pred_val)
1825 + ABS(pu1_src_temp[2] - u1_pred_val)
1826 + ABS(pu1_src_temp[3] - u1_pred_val);
1827
1828 i4_cost[HORZ_I4x4] = i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ?
1829 u4_lambda : 4 * u4_lambda);
1830 }
1831
1832 /* DC mode valid */
1833 if (u4_valid_intra_modes & 4)
1834 {
1835 i4_sad[DC_I4x4] = 0;
1836 i4_cost[DC_I4x4] = 0;
1837 pu1_src_temp = pu1_src;
1838
1839 if (left)
1840 u4_dcval = pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2]
1841 + pu1_ngbr_pels[3] + 2;
1842 if (top)
1843 u4_dcval += pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7]
1844 + pu1_ngbr_pels[8] + 2;
1845
1846 u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128;
1847
1848 /* none available */
1849 memset(u1_pred_vals, u4_dcval, 4);
1850 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1851 pu1_src_temp += src_strd;
1852 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1853 pu1_src_temp += src_strd;
1854 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1855 pu1_src_temp += src_strd;
1856 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1857 pu1_src_temp += src_strd;
1858
1859 i4_cost[DC_I4x4] = i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ?
1860 u4_lambda : 4 * u4_lambda);
1861 }
1862
1863 /* if modes other than VERT, HORZ and DC are valid */
1864 if (u4_valid_intra_modes > 7)
1865 {
1866 pu1_pred = pu1_ngbr_pels;
1867 pu1_pred[13] = pu1_pred[14] = pu1_pred[12];
1868
1869 /* Performing FILT121 and FILT11 operation for all neighbour values*/
1870 for (i = 0; i < 13; i++)
1871 {
1872 u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]);
1873 u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]);
1874
1875 pu1_pred++;
1876 }
1877
1878 if (u4_valid_intra_modes & 8)/* DIAG_DL */
1879 {
1880 i4_sad[DIAG_DL_I4x4] = 0;
1881 i4_cost[DIAG_DL_I4x4] = 0;
1882 pu1_src_temp = pu1_src;
1883 pu1_pred_val = u1_pred_vals_diag_121 + 5;
1884
1885 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]);
1886 pu1_src_temp += src_strd;
1887 USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]);
1888 pu1_src_temp += src_strd;
1889 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]);
1890 pu1_src_temp += src_strd;
1891 USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]);
1892 pu1_src_temp += src_strd;
1893 i4_cost[DIAG_DL_I4x4] = i4_sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ?
1894 u4_lambda : 4 * u4_lambda);
1895 }
1896
1897 if (u4_valid_intra_modes & 16)/* DIAG_DR */
1898 {
1899 i4_sad[DIAG_DR_I4x4] = 0;
1900 i4_cost[DIAG_DR_I4x4] = 0;
1901 pu1_src_temp = pu1_src;
1902 pu1_pred_val = u1_pred_vals_diag_121 + 3;
1903
1904 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]);
1905 pu1_src_temp += src_strd;
1906 USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]);
1907 pu1_src_temp += src_strd;
1908 USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]);
1909 pu1_src_temp += src_strd;
1910 USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]);
1911 pu1_src_temp += src_strd;
1912 i4_cost[DIAG_DR_I4x4] = i4_sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ?
1913 u4_lambda : 4 * u4_lambda);
1914
1915 }
1916
1917 if (u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/
1918 {
1919 i4_sad[VERT_R_I4x4] = 0;
1920
1921 pu1_src_temp = pu1_src;
1922 u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2];
1923 memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3);
1924 u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1];
1925 memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3);
1926
1927 pu1_pred_val = u1_pred_vals_diag_11 + 4;
1928 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1929 pu1_pred_val = u1_pred_vals_diag_121 + 3;
1930 pu1_src_temp += src_strd;
1931 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1932 pu1_src_temp += src_strd;
1933 USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]);
1934 pu1_src_temp += src_strd;
1935 USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4),
1936 i4_sad[VERT_R_I4x4]);
1937
1938 i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ?
1939 u4_lambda : 4 * u4_lambda);
1940 }
1941
1942 if (u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/
1943 {
1944 i4_sad[HORZ_D_I4x4] = 0;
1945
1946 pu1_src_temp = pu1_src;
1947 u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3];
1948 memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3);
1949 u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0];
1950 u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0];
1951 u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1];
1952 u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1];
1953 u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2];
1954 u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2];
1955
1956 pu1_pred_val = u1_pred_vals_horz_d;
1957 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]);
1958 pu1_src_temp += src_strd;
1959 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]);
1960 pu1_src_temp += src_strd;
1961 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]);
1962 pu1_src_temp += src_strd;
1963 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]);
1964
1965 i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ?
1966 u4_lambda : 4 * u4_lambda);
1967 }
1968
1969 if (u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/
1970 {
1971 i4_sad[VERT_L_I4x4] = 0;
1972 pu1_src_temp = pu1_src;
1973 pu1_pred_val = u1_pred_vals_diag_11 + 5;
1974 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1975 pu1_src_temp += src_strd;
1976 pu1_pred_val = u1_pred_vals_diag_121 + 5;
1977 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1978 pu1_src_temp += src_strd;
1979 pu1_pred_val = u1_pred_vals_diag_11 + 6;
1980 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1981 pu1_src_temp += src_strd;
1982 pu1_pred_val = u1_pred_vals_diag_121 + 6;
1983 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1984
1985 i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ?
1986 u4_lambda : 4 * u4_lambda);
1987 }
1988
1989 if (u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/
1990 {
1991 i4_sad[HORZ_U_I4x4] = 0;
1992 pu1_src_temp = pu1_src;
1993 u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2];
1994 u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1];
1995 u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1];
1996 u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0];
1997 u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0];
1998 u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]);
1999
2000 memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4);
2001
2002 pu1_pred_val = u1_pred_vals_horz_u;
2003 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]);
2004 pu1_src_temp += src_strd;
2005 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]);
2006 pu1_src_temp += src_strd;
2007 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]);
2008 pu1_src_temp += src_strd;
2009 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]);
2010
2011 i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ?
2012 u4_lambda : 4 * u4_lambda);
2013 }
2014
2015 i4_min_cost = MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]),
2016 MIN3(i4_cost[3], i4_cost[4], i4_cost[5]),
2017 MIN3(i4_cost[6], i4_cost[7], i4_cost[8]));
2018
2019 }
2020 else
2021 {
2022 /* Only first three modes valid */
2023 i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]);
2024 }
2025
2026 *pu4_sadmin = i4_min_cost;
2027
2028 if (i4_min_cost == i4_cost[0])
2029 {
2030 *u4_intra_mode = VERT_I4x4;
2031 pu1_pred_val = pu1_ngbr_pels + 5;
2032 memcpy(pu1_dst, (pu1_pred_val), 4);
2033 pu1_dst += dst_strd;
2034 memcpy(pu1_dst, (pu1_pred_val), 4);
2035 pu1_dst += dst_strd;
2036 memcpy(pu1_dst, (pu1_pred_val), 4);
2037 pu1_dst += dst_strd;
2038 memcpy(pu1_dst, (pu1_pred_val), 4);
2039 }
2040 else if (i4_min_cost == i4_cost[1])
2041 {
2042 *u4_intra_mode = HORZ_I4x4;
2043 memset(pu1_dst, pu1_ngbr_pels[3], 4);
2044 pu1_dst += dst_strd;
2045 memset(pu1_dst, pu1_ngbr_pels[2], 4);
2046 pu1_dst += dst_strd;
2047 memset(pu1_dst, pu1_ngbr_pels[1], 4);
2048 pu1_dst += dst_strd;
2049 memset(pu1_dst, pu1_ngbr_pels[0], 4);
2050 }
2051 else if (i4_min_cost == i4_cost[2])
2052 {
2053 *u4_intra_mode = DC_I4x4;
2054 memset(pu1_dst, u4_dcval, 4);
2055 pu1_dst += dst_strd;
2056 memset(pu1_dst, u4_dcval, 4);
2057 pu1_dst += dst_strd;
2058 memset(pu1_dst, u4_dcval, 4);
2059 pu1_dst += dst_strd;
2060 memset(pu1_dst, u4_dcval, 4);
2061 }
2062 else if (i4_min_cost == i4_cost[3])
2063 {
2064 *u4_intra_mode = DIAG_DL_I4x4;
2065 pu1_pred_val = u1_pred_vals_diag_121 + 5;
2066 memcpy(pu1_dst, (pu1_pred_val), 4);
2067 pu1_dst += dst_strd;
2068 memcpy(pu1_dst, (pu1_pred_val + 1), 4);
2069 pu1_dst += dst_strd;
2070 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2071 pu1_dst += dst_strd;
2072 memcpy(pu1_dst, (pu1_pred_val + 3), 4);
2073 }
2074 else if (i4_min_cost == i4_cost[4])
2075 {
2076 *u4_intra_mode = DIAG_DR_I4x4;
2077 pu1_pred_val = u1_pred_vals_diag_121 + 3;
2078
2079 memcpy(pu1_dst, (pu1_pred_val), 4);
2080 pu1_dst += dst_strd;
2081 memcpy(pu1_dst, (pu1_pred_val - 1), 4);
2082 pu1_dst += dst_strd;
2083 memcpy(pu1_dst, (pu1_pred_val - 2), 4);
2084 pu1_dst += dst_strd;
2085 memcpy(pu1_dst, (pu1_pred_val - 3), 4);
2086 }
2087 else if (i4_min_cost == i4_cost[5])
2088 {
2089 *u4_intra_mode = VERT_R_I4x4;
2090 pu1_pred_val = u1_pred_vals_diag_11 + 4;
2091 memcpy(pu1_dst, (pu1_pred_val), 4);
2092 pu1_dst += dst_strd;
2093 pu1_pred_val = u1_pred_vals_diag_121 + 3;
2094 memcpy(pu1_dst, (pu1_pred_val), 4);
2095 pu1_dst += dst_strd;
2096 memcpy(pu1_dst, (u1_pred_vals_vert_r), 4);
2097 pu1_dst += dst_strd;
2098 memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4);
2099 }
2100 else if (i4_min_cost == i4_cost[6])
2101 {
2102 *u4_intra_mode = HORZ_D_I4x4;
2103 pu1_pred_val = u1_pred_vals_horz_d;
2104 memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2105 pu1_dst += dst_strd;
2106 memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2107 pu1_dst += dst_strd;
2108 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2109 pu1_dst += dst_strd;
2110 memcpy(pu1_dst, (pu1_pred_val), 4);
2111 pu1_dst += dst_strd;
2112 }
2113 else if (i4_min_cost == i4_cost[7])
2114 {
2115 *u4_intra_mode = VERT_L_I4x4;
2116 pu1_pred_val = u1_pred_vals_diag_11 + 5;
2117 memcpy(pu1_dst, (pu1_pred_val), 4);
2118 pu1_dst += dst_strd;
2119 pu1_pred_val = u1_pred_vals_diag_121 + 5;
2120 memcpy(pu1_dst, (pu1_pred_val), 4);
2121 pu1_dst += dst_strd;
2122 pu1_pred_val = u1_pred_vals_diag_11 + 6;
2123 memcpy(pu1_dst, (pu1_pred_val), 4);
2124 pu1_dst += dst_strd;
2125 pu1_pred_val = u1_pred_vals_diag_121 + 6;
2126 memcpy(pu1_dst, (pu1_pred_val), 4);
2127 }
2128 else if (i4_min_cost == i4_cost[8])
2129 {
2130 *u4_intra_mode = HORZ_U_I4x4;
2131 pu1_pred_val = u1_pred_vals_horz_u;
2132 memcpy(pu1_dst, (pu1_pred_val), 4);
2133 pu1_dst += dst_strd;
2134 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2135 pu1_dst += dst_strd;
2136 memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2137 pu1_dst += dst_strd;
2138 memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2139 pu1_dst += dst_strd;
2140 }
2141
2142 return;
2143 }
2144
2145 /**
2146 ******************************************************************************
2147 *
2148 * @brief:
2149 * Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the prediction.
2150 *
2151 * @par Description
2152 * This function evaluates first three intra chroma modes and compute corresponding sad
2153 * and return the buffer predicted with best mode.
2154 *
2155 * @param[in] pu1_src
2156 * UWORD8 pointer to the source
2157 *
2158 * @param[in] pu1_ngbr_pels
2159 * UWORD8 pointer to neighbouring pels
2160 *
2161 * @param[out] pu1_dst
2162 * UWORD8 pointer to the destination
2163 *
2164 * @param[in] src_strd
2165 * integer source stride
2166 *
2167 * @param[in] dst_strd
2168 * integer destination stride
2169 *
2170 * @param[in] u4_n_avblty
2171 * availability of neighbouring pixels
2172 *
2173 * @param[in] u4_intra_mode
2174 * Pointer to the variable in which best mode is returned
2175 *
2176 * @param[in] pu4_sadmin
2177 * Pointer to the variable in which minimum sad is returned
2178 *
2179 * @param[in] u4_valid_intra_modes
2180 * Says what all modes are valid
2181 *
2182 * @return none
2183 *
2184 ******************************************************************************
2185 */
ih264e_evaluate_intra_chroma_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes)2186 void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
2187 UWORD8 *pu1_ngbr_pels,
2188 UWORD8 *pu1_dst,
2189 UWORD32 src_strd,
2190 UWORD32 dst_strd,
2191 WORD32 u4_n_avblty,
2192 UWORD32 *u4_intra_mode,
2193 WORD32 *pu4_sadmin,
2194 UWORD32 u4_valid_intra_modes)
2195 {
2196 UWORD8 *pu1_neighbour;
2197 UWORD8 *pu1_src_temp = pu1_src;
2198 UWORD8 left = 0, top = 0;
2199 WORD32 u4_dcval_u_l[2] = { 0, 0 }, /*sum left neighbours for 'U' ,two separate sets - sum of first four from top,and sum of four values from bottom */
2200 u4_dcval_u_t[2] = { 0, 0 }; /*sum top neighbours for 'U'*/
2201
2202 WORD32 u4_dcval_v_l[2] = { 0, 0 }, /*sum left neighbours for 'V'*/
2203 u4_dcval_v_t[2] = { 0, 0 }; /*sum top neighbours for 'V'*/
2204
2205 WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX,
2206 i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX;
2207 UWORD8 val_u, val_v;
2208
2209 WORD32 u4_dc_val[2][2][2];/* -----------
2210 | | | Chroma can have four
2211 | 00 | 01 | separate dc value...
2212 ----------- u4_dc_val corresponds to this dc values
2213 | | | with u4_dc_val[2][2][U] and u4_dc_val[2][2][V]
2214 | 10 | 11 |
2215 ----------- */
2216 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
2217 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
2218
2219 /*Evaluating HORZ*/
2220 if (left)/* Ifleft available*/
2221 {
2222 i4_sad_horz = 0;
2223
2224 for (i = 0; i < 8; i++)
2225 {
2226 val_v = pu1_ngbr_pels[15 - 2 * i];
2227 val_u = pu1_ngbr_pels[15 - 2 * i - 1];
2228 row = i / 4;
2229 u4_dcval_u_l[row] += val_u;
2230 u4_dcval_v_l[row] += val_v;
2231 for (j = 0; j < 8; j++)
2232 {
2233 i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for HORZ mode*/
2234 i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]);
2235 }
2236
2237 pu1_src_temp += src_strd;
2238 }
2239 u4_dcval_u_l[0] += 2;
2240 u4_dcval_u_l[1] += 2;
2241 u4_dcval_v_l[0] += 2;
2242 u4_dcval_v_l[1] += 2;
2243 }
2244
2245 /*Evaluating VERT**/
2246 pu1_src_temp = pu1_src;
2247 if (top) /* top available*/
2248 {
2249 i4_sad_vert = 0;
2250
2251 for (i = 0; i < 8; i++)
2252 {
2253 col = i / 4;
2254
2255 val_u = pu1_ngbr_pels[18 + i * 2];
2256 val_v = pu1_ngbr_pels[18 + i * 2 + 1];
2257 u4_dcval_u_t[col] += val_u;
2258 u4_dcval_v_t[col] += val_v;
2259
2260 for (j = 0; j < 16; j++)
2261 {
2262 i4_sad_vert += ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]);/* Finding SAD for VERT mode*/
2263 }
2264 pu1_src_temp += src_strd;
2265
2266 }
2267 u4_dcval_u_t[0] += 2;
2268 u4_dcval_u_t[1] += 2;
2269 u4_dcval_v_t[0] += 2;
2270 u4_dcval_v_t[1] += 2;
2271 }
2272
2273 /* computing DC value*/
2274 /* Equation 8-128 in spec*/
2275 u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top);
2276 u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top);
2277 u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top);
2278 u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top);
2279
2280 if (top)
2281 {
2282 /* Equation 8-132 in spec*/
2283 u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top);
2284 u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top);
2285 }
2286 else
2287 {
2288 u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left);
2289 u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left);
2290 }
2291
2292 if (left)
2293 {
2294 u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left);
2295 u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left);
2296 }
2297 else
2298 {
2299 u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top);
2300 u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top);
2301 }
2302
2303 if (!(left || top))
2304 {
2305 /*none available*/
2306 u4_dc_val[0][0][0] = u4_dc_val[0][0][1] =
2307 u4_dc_val[0][1][0] = u4_dc_val[0][1][1] =
2308 u4_dc_val[1][0][0] = u4_dc_val[1][0][1] =
2309 u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128;
2310 }
2311
2312 /* Evaluating DC */
2313 pu1_src_temp = pu1_src;
2314 i4_sad_dc = 0;
2315 for (i = 0; i < 8; i++)
2316 {
2317 for (j = 0; j < 8; j++)
2318 {
2319 col = j / 4;
2320 row = i / 4;
2321 val_u = u4_dc_val[row][col][0];
2322 val_v = u4_dc_val[row][col][1];
2323
2324 i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for DC mode*/
2325 i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]);
2326 }
2327 pu1_src_temp += src_strd;
2328 }
2329
2330 if ((u4_valid_intra_modes & 01) == 0)/* If DC is disabled*/
2331 i4_sad_dc = INT_MAX;
2332 if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled*/
2333 i4_sad_horz = INT_MAX;
2334 if ((u4_valid_intra_modes & 04) == 0)/* If VERT is disabled*/
2335 i4_sad_vert = INT_MAX;
2336
2337 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
2338
2339 /* Finding Minimum sad and doing corresponding prediction*/
2340 if (i4_min_sad < *pu4_sadmin)
2341 {
2342 *pu4_sadmin = i4_min_sad;
2343
2344 if (i4_min_sad == i4_sad_dc)
2345 {
2346 *u4_intra_mode = DC_CH_I8x8;
2347 for (i = 0; i < 8; i++)
2348 {
2349 for (j = 0; j < 8; j++)
2350 {
2351 col = j / 4;
2352 row = i / 4;
2353
2354 pu1_dst[2 * j] = u4_dc_val[row][col][0];
2355 pu1_dst[2 * j + 1] = u4_dc_val[row][col][1];
2356 }
2357 pu1_dst += dst_strd;
2358 }
2359 }
2360 else if (i4_min_sad == i4_sad_horz)
2361 {
2362 *u4_intra_mode = HORZ_CH_I8x8;
2363 for (j = 0; j < 8; j++)
2364 {
2365 val_v = pu1_ngbr_pels[15 - 2 * j];
2366 val_u = pu1_ngbr_pels[15 - 2 * j - 1];
2367
2368 for (i = 0; i < 8; i++)
2369 {
2370 pu1_dst[2 * i] = val_u;
2371 pu1_dst[2 * i + 1] = val_v;
2372
2373 }
2374 pu1_dst += dst_strd;
2375 }
2376 }
2377 else
2378 {
2379 *u4_intra_mode = VERT_CH_I8x8;
2380 pu1_neighbour = pu1_ngbr_pels + 18;
2381 for (j = 0; j < 8; j++)
2382 {
2383 memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
2384 pu1_dst += dst_strd;
2385 }
2386 }
2387 }
2388
2389 return;
2390 }
2391