1 /******************************************************************************
2 *
3 * Copyright (C) 2022 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * isvce_intra_modes_eval.c
25 *
26 * @brief
27 * This file contains definitions of routines that perform rate distortion
28 * analysis on a macroblock if they are to be coded as intra.
29 *
30 * @author
31 * ittiam
32 *
33 * @par List of Functions:
34 * - isvce_derive_neighbor_availability_of_mbs()
35 * - isvce_derive_ngbr_avbl_of_mb_partitions()
36 * - isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff()
37 * - isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff()
38 * - isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff()
39 * - isvce_evaluate_intra4x4_modes_for_least_cost_rdopton()
40 * - isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff()
41 * - isvce_evaluate_intra16x16_modes()
42 * - isvce_evaluate_intra4x4_modes()
43 * - isvce_evaluate_intra_chroma_modes()
44 *
45 * @remarks
46 * None
47 *
48 *******************************************************************************
49 */
50
51 /*****************************************************************************/
52 /* File Includes */
53 /*****************************************************************************/
54
55 /* System include files */
56 #include <stdio.h>
57 #include <string.h>
58 #include <limits.h>
59 #include <assert.h>
60
61 /* User include files */
62 #include "ih264e_config.h"
63 #include "ih264_typedefs.h"
64 #include "iv2.h"
65 #include "ive2.h"
66 #include "ih264_debug.h"
67 #include "isvc_defs.h"
68 #include "isvc_macros.h"
69 #include "ih264_intra_pred_filters.h"
70 #include "isvc_structs.h"
71 #include "isvc_common_tables.h"
72 #include "isvc_trans_quant_itrans_iquant.h"
73 #include "isvc_inter_pred_filters.h"
74 #include "isvc_mem_fns.h"
75 #include "ih264_padding.h"
76 #include "ih264_size_defs.h"
77 #include "ih264_deblk_edge_filters.h"
78 #include "isvc_cabac_tables.h"
79 #include "isvce_defs.h"
80 #include "ime_distortion_metrics.h"
81 #include "ih264e_error.h"
82 #include "ih264e_bitstream.h"
83 #include "ime_defs.h"
84 #include "ime_structs.h"
85 #include "irc_cntrl_param.h"
86 #include "irc_frame_info_collector.h"
87 #include "isvce_rate_control.h"
88 #include "isvce_cabac_structs.h"
89 #include "isvce_structs.h"
90 #include "ih264e_intra_modes_eval.h"
91 #include "isvce_globals.h"
92 #include "ime_platform_macros.h"
93
94 /*****************************************************************************/
95 /* Function Definitions */
96 /*****************************************************************************/
97
98 /**
99 ******************************************************************************
100 *
101 * @brief
102 * derivation process for subblock/partition availability
103 *
104 * @par Description
105 * Calculates the availability of the left, top, topright and topleft subblock
106 * or partitions.
107 *
108 * @param[in] ps_proc_ctxt
109 * pointer to macroblock context (handle)
110 *
111 * @param[in] i1_pel_pos_x
112 * column position of the pel wrt the current block
113 *
114 * @param[in] i1_pel_pos_y
115 * row position of the pel in wrt current block
116 *
117 * @remarks Assumptions: before calling this function it is assumed that
118 * the neighbor availability of the current macroblock is already derived.
119 * Based on table 6-3 of H264 specification
120 *
121 * @return availability status (yes or no)
122 *
123 ******************************************************************************
124 */
isvce_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t * ps_ngbr_avbl,WORD8 i1_pel_pos_x,WORD8 i1_pel_pos_y)125 UWORD8 isvce_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl, WORD8 i1_pel_pos_x,
126 WORD8 i1_pel_pos_y)
127 {
128 UWORD8 u1_neighbor_avail = 0;
129
130 /**********************************************************************/
131 /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */
132 /* various columns of a macroblock */
133 /* */
134 /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */
135 /* various rows of a macroblock */
136 /* */
137 /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */
138 /* outside the bound of an mb ie., represents its neighbors. */
139 /**********************************************************************/
140 if(i1_pel_pos_x < 0)
141 { /* column(-1) */
142 if(i1_pel_pos_y < 0)
143 { /* row(-1) */
144 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */
145 }
146 else if(i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
147 { /* all rows of a macroblock */
148 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */
149 }
150 else /* if (i1_pel_pos_y >= 16) */
151 { /* rows(+16) */
152 u1_neighbor_avail = 0; /* current mb bottom left availability */
153 }
154 }
155 else if(i1_pel_pos_x >= 0 && i1_pel_pos_x < 16)
156 { /* all columns of a macroblock */
157 if(i1_pel_pos_y < 0)
158 { /* row(-1) */
159 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */
160 }
161 else if(i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
162 { /* all rows of a macroblock */
163 u1_neighbor_avail = 1; /* current mb availability */
164 /* availability of the partition is dependent on the position of the
165 * partition inside the mb */
166 /* although the availability is declared as 1 in all cases these needs to
167 * be corrected somewhere else and this is not done in here */
168 }
169 else /* if (i1_pel_pos_y >= 16) */
170 { /* rows(+16) */
171 u1_neighbor_avail = 0; /* current mb bottom availability */
172 }
173 }
174 else if(i1_pel_pos_x >= 16)
175 { /* column(+16) */
176 if(i1_pel_pos_y < 0)
177 { /* row(-1) */
178 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */
179 }
180 else /* if (i1_pel_pos_y >= 0) */
181 { /* all other rows */
182 u1_neighbor_avail = 0; /* current mb right & bottom right availability */
183 }
184 }
185
186 return u1_neighbor_avail;
187 }
188
189 /**
190 ******************************************************************************
191 *
192 * @brief
193 * evaluate best intra 16x16 mode (rate distortion opt off)
194 *
195 * @par Description
196 * This function evaluates all the possible intra 16x16 modes and finds the mode
197 * that best represents the macro-block (least distortion) and occupies fewer
198 * bits in the bit-stream.
199 *
200 * @param[in] ps_proc_ctxt
201 * pointer to process context (handle)
202 *
203 * @remarks
204 * Ideally the cost of encoding a macroblock is calculated as
205 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
206 * input block and the reconstructed block and rate is the number of bits taken
207 * to place the macroblock in the bit-stream. In this routine the rate does not
208 * exactly point to the total number of bits it takes, rather it points to
209 *header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
210 *bits and residual bits fall in to texture bits the number of bits taken to
211 *encoding mbtype is considered as rate, we compute cost. Further we will
212 *approximate the distortion as the deviation b/w input and the predicted block
213 *as opposed to input and reconstructed block.
214 *
215 * NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
216 * the SAD and cost are one and the same.
217 *
218 * @return none
219 *
220 ******************************************************************************
221 */
222
isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t * ps_proc)223 void isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
224 {
225 /* Codec Context */
226 isvce_codec_t *ps_codec = ps_proc->ps_codec;
227 isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
228 mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
229
230 /* SAD(distortion metric) of an 8x8 block */
231 WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX;
232
233 /* lambda */
234 UWORD32 u4_lambda = ps_proc->u4_lambda;
235
236 /* cost = distortion + lambda*rate */
237 WORD32 i4_mb_cost = INT_MAX, i4_mb_cost_least = INT_MAX;
238
239 /* intra mode */
240 UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16;
241
242 /* neighbor pels for intra prediction */
243 UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels;
244
245 /* neighbor availability */
246 WORD32 i4_ngbr_avbl;
247
248 /* pointer to src macro block */
249 UWORD8 *pu1_curr_mb = ps_proc->s_src_buf_props.as_component_bufs[0].pv_data;
250 UWORD8 *pu1_ref_mb = ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data;
251
252 /* pointer to prediction macro block */
253 UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16;
254 UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane;
255
256 /* strides */
257 WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
258 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
259 WORD32 i4_rec_strd = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
260
261 /* pointer to neighbors left, top, topleft */
262 UWORD8 *pu1_mb_a = pu1_ref_mb - 1;
263 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd;
264 UWORD8 *pu1_mb_d = pu1_mb_b - 1;
265 UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
266 /* valid intra modes map */
267 UWORD32 u4_valid_intra_modes;
268
269 /* lut for valid intra modes */
270 const UWORD8 u1_valid_intra_modes[8] = {4, 6, 4, 6, 5, 7, 5, 15};
271
272 UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
273 isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
274 UWORD32 u4_constrained_intra_pred =
275 ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
276
277 if(ps_proc->i4_slice_type != ISLICE)
278 {
279 /* Offset for MBtype */
280 offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23;
281 u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
282 }
283
284 /* locating neighbors that are available for prediction */
285
286 /* gather prediction pels from the neighbors, if particular set is not
287 * available it is set to zero*/
288 /* left pels */
289 u1_mb_a =
290 ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
291 (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
292 !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
293 : 1));
294 if(u1_mb_a)
295 {
296 for(i = 0; i < 16; i++) pu1_ngbr_pels_i16[16 - 1 - i] = pu1_mb_a[i * i4_rec_strd];
297 }
298 else
299 {
300 ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_i16, 0, MB_SIZE);
301 }
302 /* top pels */
303 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
304 (u4_constrained_intra_pred
305 ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag)
306 : 1));
307 if(u1_mb_b)
308 {
309 ps_mem_fxns->pf_mem_cpy_mul8(pu1_ngbr_pels_i16 + 16 + 1, pu1_mb_b, 16);
310 }
311 else
312 {
313 ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_i16 + 16 + 1, 0, MB_SIZE);
314 }
315 /* topleft pels */
316 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
317 (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra &&
318 !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
319 : 1));
320 if(u1_mb_d)
321 {
322 pu1_ngbr_pels_i16[16] = *pu1_mb_d;
323 }
324 else
325 {
326 pu1_ngbr_pels_i16[16] = 0;
327 }
328
329 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
330 ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl;
331
332 /* set valid intra modes for evaluation */
333 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
334
335 if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
336 ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
337 u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
338
339 /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */
340 ps_codec->pf_ih264e_evaluate_intra16x16_modes(
341 pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16, i4_src_strd, i4_pred_strd,
342 i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least, u4_valid_intra_modes);
343
344 /* cost = distortion + lambda*rate */
345 i4_mb_cost_least = i4_mb_distortion_least;
346
347 if(((u4_valid_intra_modes >> 3) & 1) != 0)
348 {
349 /* intra prediction for PLANE mode*/
350 (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](
351 pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl);
352
353 /* evaluate distortion between the actual blk and the estimated blk for the
354 * given mode */
355 ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](
356 pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least,
357 &i4_mb_distortion);
358
359 /* cost = distortion + lambda*rate */
360 i4_mb_cost = i4_mb_distortion;
361
362 /* update the least cost information if necessary */
363 if(i4_mb_cost < i4_mb_distortion_least)
364 {
365 u4_intra_mode = PLANE_I16x16;
366
367 i4_mb_cost_least = i4_mb_cost;
368 i4_mb_distortion_least = i4_mb_distortion;
369 }
370 }
371
372 u4_best_intra_16x16_mode = u4_intra_mode;
373
374 DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode);
375
376 ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode;
377
378 /* cost = distortion + lambda*rate */
379 i4_mb_cost_least =
380 i4_mb_distortion_least + u4_lambda * u1_uev_codelength[offset + u4_best_intra_16x16_mode];
381
382 /* update the type of the mb if necessary */
383 if(i4_mb_cost_least < ps_proc->i4_mb_cost)
384 {
385 ps_proc->i4_mb_cost = i4_mb_cost_least;
386 ps_proc->i4_mb_distortion = i4_mb_distortion_least;
387 ps_proc->ps_mb_info->u2_mb_type = I16x16;
388 }
389 }
390
391 /**
392 ******************************************************************************
393 *
394 * @brief
395 * evaluate best intra 8x8 mode (rate distortion opt on)
396 *
397 * @par Description
398 * This function evaluates all the possible intra 8x8 modes and finds the mode
399 * that best represents the macro-block (least distortion) and occupies fewer
400 * bits in the bit-stream.
401 *
402 * @param[in] ps_proc_ctxt
403 * pointer to proc ctxt
404 *
405 * @remarks Ideally the cost of encoding a macroblock is calculated as
406 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
407 * input block and the reconstructed block and rate is the number of bits taken
408 * to place the macroblock in the bit-stream. In this routine the rate does not
409 * exactly point to the total number of bits it takes, rather it points to
410 *header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
411 *bits and residual bits fall in to texture bits the number of bits taken to
412 *encoding mbtype is considered as rate, we compute cost. Further we will
413 *approximate the distortion as the deviation b/w input and the predicted block
414 *as opposed to input and reconstructed block.
415 *
416 * NOTE: TODO: This function needs to be tested
417 *
418 * @return none
419 *
420 ******************************************************************************
421 */
isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t * ps_proc)422 void isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
423 {
424 /* Codec Context */
425 isvce_codec_t *ps_codec = ps_proc->ps_codec;
426
427 /* SAD(distortion metric) of an 4x4 block */
428 WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX,
429 i4_total_distortion = 0;
430
431 /* lambda */
432 UWORD32 u4_lambda = ps_proc->u4_lambda;
433
434 /* cost = distortion + lambda*rate */
435 WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda;
436
437 /* cost due to mbtype */
438 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
439
440 /* intra mode */
441 UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode;
442
443 /* neighbor pels for intra prediction */
444 UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels;
445
446 /* pointer to curr partition */
447 UWORD8 *pu1_mb_curr;
448
449 /* pointer to prediction macro block */
450 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
451
452 /* strides */
453 WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
454 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
455
456 /* neighbors left, top, top right, top left */
457 UWORD8 *pu1_mb_a;
458 UWORD8 *pu1_mb_b;
459 UWORD8 *pu1_mb_d;
460
461 /* neighbor availability */
462 WORD32 i4_ngbr_avbl;
463 block_neighbors_t s_ngbr_avbl;
464
465 /* temp vars */
466 UWORD32 b8, u4_pix_x, u4_pix_y;
467 UWORD32 u4_constrained_intra_pred =
468 ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
469 block_neighbors_t s_ngbr_avbl_MB;
470
471 /* ngbr mb syntax information */
472 UWORD8 *pu1_top_mb_intra_modes =
473 ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes;
474 isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
475 isvce_mb_info_t *ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
476 /* valid intra modes map */
477 UWORD32 u4_valid_intra_modes;
478
479 if(ps_proc->ps_ngbr_avbl->u1_mb_c)
480 {
481 ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
482 }
483 /* left pels */
484 s_ngbr_avbl_MB.u1_mb_a =
485 ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
486 (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
487 !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
488 : 1));
489
490 /* top pels */
491 s_ngbr_avbl_MB.u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
492 (u4_constrained_intra_pred ? (ps_top_mb_syn_ele->u1_is_intra &&
493 !ps_top_mb_syn_ele->u1_base_mode_flag)
494 : 1));
495
496 /* topleft pels */
497 s_ngbr_avbl_MB.u1_mb_d =
498 ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
499 (u4_constrained_intra_pred
500 ? (ps_top_mb_syn_ele[-1].u1_is_intra && !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
501 : 1));
502
503 /* top right */
504 s_ngbr_avbl_MB.u1_mb_c =
505 ((ps_proc->ps_ngbr_avbl->u1_mb_c) &&
506 (u4_constrained_intra_pred ? (ps_top_right_mb_syn_ele->u1_is_intra &&
507 !ps_top_right_mb_syn_ele->u1_base_mode_flag)
508 : 1));
509
510 for(b8 = 0; b8 < 4; b8++)
511 {
512 u4_pix_x = (b8 & 0x01) << 3;
513 u4_pix_y = (b8 >> 1) << 3;
514
515 pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) +
516 u4_pix_x + (u4_pix_y * i4_src_strd);
517 /* when rdopt is off, we use the input as reference for constructing
518 * prediction buffer */
519 /* as opposed to using the recon pels. (open loop intra prediction) */
520 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
521 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
522 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
523
524 /* locating neighbors that are available for prediction */
525 /* TODO : update the neighbor availability information basing on constrained
526 * intra pred information */
527 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be
528 * split in to distinct routines */
529 /* basing on neighbors available and hence evade the computation of neighbor
530 * availability totally. */
531 s_ngbr_avbl.u1_mb_a = isvce_derive_ngbr_avbl_of_mb_partitions(
532 &s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */
533 s_ngbr_avbl.u1_mb_b = isvce_derive_ngbr_avbl_of_mb_partitions(
534 &s_ngbr_avbl_MB, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */
535 s_ngbr_avbl.u1_mb_c = isvce_derive_ngbr_avbl_of_mb_partitions(
536 &s_ngbr_avbl_MB, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */
537 s_ngbr_avbl.u1_mb_d = isvce_derive_ngbr_avbl_of_mb_partitions(
538 &s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */
539
540 /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b *
541 * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d *
542 * TOP_LEFT_MB_AVAILABLE_MASK */
543 i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
544 (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) +
545 (s_ngbr_avbl.u1_mb_a << 4);
546 /* if top partition is available and top right is not available for intra
547 * prediction, then */
548 /* padd top right samples using top sample and make top right also available
549 */
550 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
551 * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b |
552 * s_ngbr_avbl.u1_mb_c) << 3); */
553 ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl;
554
555 ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8,
556 i4_src_strd, i4_ngbr_avbl);
557
558 i4_partition_cost_least = INT_MAX;
559 /* set valid intra modes for evaluation */
560 u4_valid_intra_modes = 0x1ff;
561
562 if(!s_ngbr_avbl.u1_mb_b)
563 {
564 u4_valid_intra_modes &= ~(1 << VERT_I4x4);
565 u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4);
566 u4_valid_intra_modes &= ~(1 << VERT_L_I4x4);
567 }
568 if(!s_ngbr_avbl.u1_mb_a)
569 {
570 u4_valid_intra_modes &= ~(1 << HORZ_I4x4);
571 u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4);
572 }
573 if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d)
574 {
575 u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4);
576 u4_valid_intra_modes &= ~(1 << VERT_R_I4x4);
577 u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4);
578 }
579
580 /* estimate the intra 8x8 mode for the current partition (for evaluating
581 * cost) */
582 if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
583 {
584 u4_estimated_intra_8x8_mode = DC_I8x8;
585 }
586 else
587 {
588 UWORD32 u4_left_intra_8x8_mode = DC_I8x8;
589 UWORD32 u4_top_intra_8x8_mode = DC_I8x8;
590
591 if(u4_pix_x == 0)
592 {
593 if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8)
594 {
595 u4_left_intra_8x8_mode =
596 ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1];
597 }
598 else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4)
599 {
600 u4_left_intra_8x8_mode = ps_proc->s_nbr_info.ps_left_mb_intra_modes
601 ->au1_intra_modes[(b8 + 1) * 4 + 2];
602 }
603 }
604 else
605 {
606 u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8 - 1];
607 }
608
609 if(u4_pix_y == 0)
610 {
611 if(ps_top_mb_syn_ele->u2_mb_type == I8x8)
612 {
613 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8 + 2];
614 }
615 else if(ps_top_mb_syn_ele->u2_mb_type == I4x4)
616 {
617 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8 + 2) * 4 + 2];
618 }
619 }
620 else
621 {
622 u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8 - 2];
623 }
624
625 u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode);
626 }
627
628 /* perform intra mode 8x8 evaluation */
629 for(u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0;
630 u4_intra_mode++, u4_valid_intra_modes >>= 1)
631 {
632 if((u4_valid_intra_modes & 1) == 0) continue;
633
634 /* intra prediction */
635 (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0,
636 i4_pred_strd, i4_ngbr_avbl);
637
638 /* evaluate distortion between the actual blk and the estimated blk for
639 * the given mode */
640 ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd,
641 i4_partition_cost_least, &i4_partition_distortion);
642
643 i4_partition_cost =
644 i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)
645 ? u4_cost_one_bit
646 : u4_cost_four_bits);
647
648 /* update the least cost information if necessary */
649 if(i4_partition_cost < i4_partition_cost_least)
650 {
651 i4_partition_cost_least = i4_partition_cost;
652 i4_partition_distortion_least = i4_partition_distortion;
653 u4_best_intra_8x8_mode = u4_intra_mode;
654 }
655 }
656 /* macroblock distortion */
657 i4_total_cost += i4_partition_cost_least;
658 i4_total_distortion += i4_partition_distortion_least;
659 /* mb partition mode */
660 ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode;
661 }
662
663 /* update the type of the mb if necessary */
664 if(i4_total_cost < ps_proc->i4_mb_cost)
665 {
666 ps_proc->i4_mb_cost = i4_total_cost;
667 ps_proc->i4_mb_distortion = i4_total_distortion;
668 ps_proc->ps_mb_info->u2_mb_type = I8x8;
669 }
670 }
671
672 /**
673 ******************************************************************************
674 *
675 * @brief
676 * evaluate best intra 4x4 mode (rate distortion opt off)
677 *
678 * @par Description
679 * This function evaluates all the possible intra 4x4 modes and finds the mode
680 * that best represents the macro-block (least distortion) and occupies fewer
681 * bits in the bit-stream.
682 *
683 * @param[in] ps_proc_ctxt
684 * pointer to proc ctxt
685 *
686 * @remarks
687 * Ideally the cost of encoding a macroblock is calculated as
688 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
689 * input block and the reconstructed block and rate is the number of bits taken
690 * to place the macroblock in the bit-stream. In this routine the rate does not
691 * exactly point to the total number of bits it takes, rather it points to
692 *header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
693 *bits and residual bits fall in to texture bits the number of bits taken to
694 *encoding mbtype is considered as rate, we compute cost. Further we will
695 *approximate the distortion as the deviation b/w input and the predicted block
696 *as opposed to input and reconstructed block.
697 *
698 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
699 * 24*lambda is added to the SAD before comparison with the best SAD for
700 * inter prediction. This is an empirical value to prevent using too many intra
701 * blocks.
702 *
703 * @return none
704 *
705 ******************************************************************************
706 */
isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t * ps_proc)707 void isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
708 {
709 /* Codec Context */
710 isvce_codec_t *ps_codec = ps_proc->ps_codec;
711
712 /* SAD(distortion metric) of an 4x4 block */
713 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
714
715 /* lambda */
716 UWORD32 u4_lambda = ps_proc->u4_lambda;
717
718 /* cost = distortion + lambda*rate */
719 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
720
721 /* cost due to mbtype */
722 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
723
724 /* intra mode */
725 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
726
727 /* neighbor pels for intra prediction */
728 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
729
730 /* pointer to curr partition */
731 UWORD8 *pu1_mb_curr;
732
733 /* pointer to prediction macro block */
734 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
735
736 /* strides */
737 WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
738 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
739
740 /* neighbors left, top, top right, top left */
741 UWORD8 *pu1_mb_a;
742 UWORD8 *pu1_mb_b;
743 UWORD8 *pu1_mb_c;
744 UWORD8 *pu1_mb_d;
745
746 /* neighbor availability */
747 WORD32 i4_ngbr_avbl;
748 block_neighbors_t s_ngbr_avbl;
749
750 /* temp vars */
751 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
752
753 /* ngbr sub mb modes */
754 UWORD8 *pu1_top_mb_intra_modes =
755 ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes;
756 isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
757 isvce_mb_info_t *ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
758
759 /* valid intra modes map */
760 UWORD32 u4_valid_intra_modes;
761 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
762
763 UWORD32 u4_constrained_intra_pred =
764 ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
765 UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
766 if(ps_proc->ps_ngbr_avbl->u1_mb_c)
767 {
768 ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
769 }
770 /* left pels */
771 u1_mb_a =
772 ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
773 (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
774 !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
775 : 1));
776
777 /* top pels */
778 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
779 (u4_constrained_intra_pred
780 ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag)
781 : 1));
782
783 /* topleft pels */
784 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
785 (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra &&
786 !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
787 : 1));
788
789 /* top right */
790 u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) &&
791 (u4_constrained_intra_pred ? (ps_top_right_mb_syn_ele->u1_is_intra &&
792 !ps_top_right_mb_syn_ele->u1_base_mode_flag)
793 : 1));
794
795 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
796 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
797
798 for(b8 = 0; b8 < 4; b8++)
799 {
800 u4_blk_x = (b8 & 0x01) << 3;
801 u4_blk_y = (b8 >> 1) << 3;
802 for(b4 = 0; b4 < 4; b4++)
803 {
804 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
805 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
806
807 pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) +
808 u4_pix_x + (u4_pix_y * i4_src_strd);
809 /* when rdopt is off, we use the input as reference for constructing
810 * prediction buffer */
811 /* as opposed to using the recon pels. (open loop intra prediction) */
812 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
813 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
814 pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */
815 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
816
817 /* locating neighbors that are available for prediction */
818 /* TODO : update the neighbor availability information basing on
819 * constrained intra pred information */
820 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be
821 * split in to distinct routines */
822 /* basing on neighbors available and hence evade the computation of
823 * neighbor availability totally. */
824
825 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
826 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
827 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
828 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
829 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
830 /* set valid intra modes for evaluation */
831 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
832
833 /* if top partition is available and top right is not available for intra
834 * prediction, then */
835 /* padd top right samples using top sample and make top right also
836 * available */
837 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
838 * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b |
839 * s_ngbr_avbl.u1_mb_c) << 3); */
840
841 /* gather prediction pels from the neighbors */
842 if(s_ngbr_avbl.u1_mb_a)
843 {
844 for(i = 0; i < 4; i++) pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_src_strd];
845 }
846 else
847 {
848 memset(pu1_ngbr_pels_i4, 0, 4);
849 }
850
851 if(s_ngbr_avbl.u1_mb_b)
852 {
853 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
854 }
855 else
856 {
857 memset(pu1_ngbr_pels_i4 + 5, 0, 4);
858 }
859
860 if(s_ngbr_avbl.u1_mb_d)
861 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
862 else
863 pu1_ngbr_pels_i4[4] = 0;
864
865 if(s_ngbr_avbl.u1_mb_c)
866 {
867 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
868 }
869 else if(s_ngbr_avbl.u1_mb_b)
870 {
871 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
872 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
873 }
874
875 i4_partition_cost_least = INT_MAX;
876
877 /* predict the intra 4x4 mode for the current partition (for evaluating
878 * cost) */
879 if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
880 {
881 u4_estimated_intra_4x4_mode = DC_I4x4;
882 }
883 else
884 {
885 UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
886 UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
887
888 if(u4_pix_x == 0)
889 {
890 if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4)
891 {
892 u4_left_intra_4x4_mode =
893 ps_proc->s_nbr_info.ps_left_mb_intra_modes
894 ->au1_intra_modes[gau1_raster_to_zscan_map[3 + u4_pix_y]];
895 }
896 else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8)
897 {
898 u4_left_intra_4x4_mode =
899 ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1];
900 }
901 }
902 else
903 {
904 u4_left_intra_4x4_mode =
905 ps_proc->au1_intra_luma_mb_4x4_modes
906 [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 1]];
907 }
908
909 if(u4_pix_y == 0)
910 {
911 if(ps_top_mb_syn_ele->u2_mb_type == I4x4)
912 {
913 u4_top_intra_4x4_mode =
914 pu1_top_mb_intra_modes[gau1_raster_to_zscan_map[12 + (u4_pix_x >> 2)]];
915 }
916 else if(ps_top_mb_syn_ele->u2_mb_type == I8x8)
917 {
918 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
919 }
920 }
921 else
922 {
923 u4_top_intra_4x4_mode =
924 ps_proc->au1_intra_luma_mb_4x4_modes
925 [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 4]];
926 }
927
928 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
929 }
930
931 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] =
932 u4_estimated_intra_4x4_mode;
933
934 /* mode evaluation and prediction */
935 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(
936 pu1_mb_curr, pu1_ngbr_pels_i4, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_ngbr_avbl,
937 &u4_best_intra_4x4_mode, &i4_partition_cost_least, u4_valid_intra_modes, u4_lambda,
938 u4_estimated_intra_4x4_mode);
939
940 i4_partition_distortion_least =
941 i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)
942 ? u4_cost_one_bit
943 : u4_cost_four_bits);
944
945 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least,
946 u4_best_intra_4x4_mode);
947 /* macroblock distortion */
948 i4_total_distortion += i4_partition_distortion_least;
949 i4_total_cost += i4_partition_cost_least;
950 /* mb partition mode */
951 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
952 }
953 }
954
955 /* update the type of the mb if necessary */
956 if(i4_total_cost < ps_proc->i4_mb_cost)
957 {
958 ps_proc->i4_mb_cost = i4_total_cost;
959 ps_proc->i4_mb_distortion = i4_total_distortion;
960 ps_proc->ps_mb_info->u2_mb_type = I4x4;
961 }
962 }
963
964 /**
965 ******************************************************************************
966 *
967 * @brief evaluate best intra 4x4 mode (rate distortion opt on)
968 *
969 * @par Description
970 * This function evaluates all the possible intra 4x4 modes and finds the mode
971 * that best represents the macro-block (least distortion) and occupies fewer
972 * bits in the bit-stream.
973 *
974 * @param[in] ps_proc_ctxt
975 * pointer to proc ctxt
976 *
977 * @remarks
978 * Ideally the cost of encoding a macroblock is calculated as
979 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
980 * input block and the reconstructed block and rate is the number of bits taken
981 * to place the macroblock in the bit-stream. In this routine the rate does not
982 * exactly point to the total number of bits it takes, rather it points to
983 *header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
984 *bits and residual bits fall in to texture bits the number of bits taken to
985 *encoding mbtype is considered as rate, we compute cost. Further we will
986 *approximate the distortion as the deviation b/w input and the predicted block
987 *as opposed to input and reconstructed block.
988 *
989 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
990 * 24*lambda is added to the SAD before comparison with the best SAD for
991 * inter prediction. This is an empirical value to prevent using too many intra
992 * blocks.
993 *
994 * @return none
995 *
996 ******************************************************************************
997 */
isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(isvce_process_ctxt_t * ps_proc)998 void isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(isvce_process_ctxt_t *ps_proc)
999 {
1000 block_neighbors_t s_ngbr_avbl;
1001 buffer_container_t s_src;
1002 buffer_container_t s_pred;
1003 buffer_container_t s_recon;
1004 buffer_container_t s_quant_coeffs;
1005 buffer_container_t s_res_pred;
1006
1007 /* neighbors left, top, top right, top left */
1008 UWORD8 *pu1_mb_a;
1009 UWORD8 *pu1_mb_b;
1010 UWORD8 *pu1_mb_c;
1011 UWORD8 *pu1_mb_d;
1012 UWORD8 *pu1_mb_curr;
1013 UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top;
1014 UWORD8 *pu1_ref_mb_intra_4x4;
1015 WORD32 i4_ref_strd_left, i4_ref_strd_top;
1016 WORD32 i4_ngbr_avbl;
1017 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
1018 /* valid intra modes map */
1019 UWORD32 u4_valid_intra_modes;
1020 /* Dummy variable for 4x4 trans function */
1021 WORD16 i2_dc_dummy;
1022 UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
1023
1024 isvce_codec_t *ps_codec = ps_proc->ps_codec;
1025 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1026 isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
1027 isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1;
1028 isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1;
1029 isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
1030 enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
1031 resi_trans_quant_constants_t s_resi_trans_quant_constants = {
1032 .pu2_scale_matrix = ps_qp_params->pu2_scale_mat,
1033 .pu2_threshold_matrix = ps_qp_params->pu2_thres_mat,
1034 .u4_qbits = ps_qp_params->u1_qbits,
1035 .u4_round_factor = ps_qp_params->u4_dead_zone};
1036 iq_it_res_rec_constants_t s_iq_it_res_rec_constants = {
1037 .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat,
1038 .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat,
1039 .u4_qp_div_6 = ps_qp_params->u1_qp_div};
1040
1041 const UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
1042 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
1043 UWORD32 u4_lambda = ps_proc->u4_lambda;
1044 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
1045 /* cost due to mbtype */
1046 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
1047 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
1048 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1049 WORD16 *pi2_quant_coeffs = ps_proc->pi2_res_buf_intra_4x4;
1050 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1051 WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
1052 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1053 UWORD8 *pu1_nnz = (UWORD8 *) ps_proc->au4_nnz_intra_4x4;
1054 UWORD8 *pu1_top_mb_intra_modes =
1055 ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes;
1056 UWORD32 u4_constrained_intra_pred =
1057 ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
1058 UWORD8 u1_resi_trans_fxn_idx = isvc_get_resi_trans_quant_variant_idx(0);
1059 UWORD8 u1_iq_it_recon_fxn_idx = isvc_get_iq_it_recon_variant_idx(1, 0);
1060
1061 s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[Y];
1062
1063 /* compute ngbr availability for sub blks */
1064 if(ps_proc->ps_ngbr_avbl->u1_mb_c)
1065 {
1066 ps_top_right_mb = ps_top_mb + 1;
1067 }
1068
1069 /* left pels */
1070 u1_mb_a =
1071 ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
1072 (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
1073 !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
1074 : 1));
1075
1076 /* top pels */
1077 u1_mb_b =
1078 ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
1079 (u4_constrained_intra_pred ? (ps_top_mb->u1_is_intra && !ps_top_mb->u1_base_mode_flag)
1080 : 1));
1081
1082 /* topleft pels */
1083 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
1084 (u4_constrained_intra_pred
1085 ? (ps_top_left_mb->u1_is_intra && !ps_top_left_mb->u1_base_mode_flag)
1086 : 1));
1087
1088 /* top right pels */
1089 u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) &&
1090 (u4_constrained_intra_pred
1091 ? (ps_top_right_mb->u1_is_intra && !ps_top_right_mb->u1_base_mode_flag)
1092 : 1));
1093
1094 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
1095 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
1096
1097 for(b8 = 0; b8 < 4; b8++)
1098 {
1099 u4_blk_x = (b8 & 0x01) << 3;
1100 u4_blk_y = (b8 >> 1) << 3;
1101 for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_quant_coeffs += MB_SIZE)
1102 {
1103 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
1104 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
1105
1106 pu1_ref_mb_intra_4x4 =
1107 ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd);
1108 pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) +
1109 u4_pix_x + (u4_pix_y * i4_src_strd);
1110 pu1_pred_mb = ps_proc->pu1_pred_mb + u4_pix_x + (u4_pix_y * i4_pred_strd);
1111 if(u4_pix_x == 0)
1112 {
1113 i4_ref_strd_left = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
1114 pu1_mb_ref_left =
1115 ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + u4_pix_x +
1116 (u4_pix_y * i4_ref_strd_left);
1117 }
1118 else
1119 {
1120 i4_ref_strd_left = i4_pred_strd;
1121 pu1_mb_ref_left = pu1_ref_mb_intra_4x4;
1122 }
1123 if(u4_pix_y == 0)
1124 {
1125 i4_ref_strd_top = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
1126 pu1_mb_ref_top =
1127 ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + u4_pix_x +
1128 (u4_pix_y * i4_ref_strd_top);
1129 }
1130 else
1131 {
1132 i4_ref_strd_top = i4_pred_strd;
1133 pu1_mb_ref_top = pu1_ref_mb_intra_4x4;
1134 }
1135
1136 pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */
1137 pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */
1138 pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */
1139 if(u4_pix_y == 0)
1140 pu1_mb_d = pu1_mb_b - 1;
1141 else
1142 pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */
1143
1144 /* locating neighbors that are available for prediction */
1145 /* TODO : update the neighbor availability information basing on
1146 * constrained intra pred information */
1147 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be
1148 * split in to distinct routines */
1149 /* basing on neighbors available and hence evade the computation of
1150 * neighbor availability totally. */
1151
1152 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1153 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
1154 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
1155 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
1156 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
1157 /* set valid intra modes for evaluation */
1158 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
1159
1160 /* if top partition is available and top right is not available for intra
1161 * prediction, then */
1162 /* padd top right samples using top sample and make top right also
1163 * available */
1164 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
1165 * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b |
1166 * s_ngbr_avbl.u1_mb_c) << 3); */
1167
1168 /* gather prediction pels from the neighbors */
1169 if(s_ngbr_avbl.u1_mb_a)
1170 {
1171 for(i = 0; i < 4; i++) pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_ref_strd_left];
1172 }
1173 else
1174 {
1175 memset(pu1_ngbr_pels_i4, 0, 4);
1176 }
1177 if(s_ngbr_avbl.u1_mb_b)
1178 {
1179 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1180 }
1181 else
1182 {
1183 memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4);
1184 }
1185 if(s_ngbr_avbl.u1_mb_d)
1186 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1187 else
1188 pu1_ngbr_pels_i4[4] = 0;
1189 if(s_ngbr_avbl.u1_mb_c)
1190 {
1191 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
1192 }
1193 else if(s_ngbr_avbl.u1_mb_b)
1194 {
1195 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
1196 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
1197 }
1198
1199 i4_partition_cost_least = INT_MAX;
1200
1201 /* predict the intra 4x4 mode for the current partition (for evaluating
1202 * cost) */
1203 if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
1204 {
1205 u4_estimated_intra_4x4_mode = DC_I4x4;
1206 }
1207 else
1208 {
1209 UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
1210 UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
1211
1212 if(u4_pix_x == 0)
1213 {
1214 if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4)
1215 {
1216 u4_left_intra_4x4_mode =
1217 ps_proc->s_nbr_info.ps_left_mb_intra_modes
1218 ->au1_intra_modes[gau1_raster_to_zscan_map[3 + u4_pix_y]];
1219 }
1220 else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8)
1221 {
1222 u4_left_intra_4x4_mode =
1223 ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1];
1224 }
1225 }
1226 else
1227 {
1228 u4_left_intra_4x4_mode =
1229 ps_proc->au1_intra_luma_mb_4x4_modes
1230 [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 1]];
1231 }
1232
1233 if(u4_pix_y == 0)
1234 {
1235 if(ps_top_mb->u2_mb_type == I4x4)
1236 {
1237 u4_top_intra_4x4_mode =
1238 pu1_top_mb_intra_modes[gau1_raster_to_zscan_map[12 + (u4_pix_x >> 2)]];
1239 }
1240 else if(ps_top_mb->u2_mb_type == I8x8)
1241 {
1242 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
1243 }
1244 }
1245 else
1246 {
1247 u4_top_intra_4x4_mode =
1248 ps_proc->au1_intra_luma_mb_4x4_modes
1249 [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 4]];
1250 }
1251
1252 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
1253 }
1254
1255 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] =
1256 u4_estimated_intra_4x4_mode;
1257
1258 /*mode evaluation and prediction*/
1259 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(
1260 pu1_mb_curr, pu1_ngbr_pels_i4, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_ngbr_avbl,
1261 &u4_best_intra_4x4_mode, &i4_partition_cost_least, u4_valid_intra_modes, u4_lambda,
1262 u4_estimated_intra_4x4_mode);
1263
1264 i4_partition_distortion_least =
1265 i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)
1266 ? u4_cost_one_bit
1267 : u4_cost_four_bits);
1268
1269 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least,
1270 u4_best_intra_4x4_mode);
1271
1272 /* macroblock distortion */
1273 i4_total_distortion += i4_partition_distortion_least;
1274 i4_total_cost += i4_partition_cost_least;
1275
1276 /* mb partition mode */
1277 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
1278
1279 /********************************************************/
1280 /* error estimation, */
1281 /* transform */
1282 /* quantization */
1283 /********************************************************/
1284 s_src.pv_data = pu1_mb_curr;
1285 s_src.i4_data_stride = i4_src_strd;
1286
1287 s_pred.pv_data = pu1_pred_mb;
1288 s_pred.i4_data_stride = i4_pred_strd;
1289
1290 s_quant_coeffs.pv_data = pi2_quant_coeffs;
1291 s_quant_coeffs.i4_data_stride = 4;
1292
1293 ps_enc_loop_fxns->apf_resi_trans_quant_4x4[u1_resi_trans_fxn_idx](
1294 &s_src, &s_pred, &s_quant_coeffs, &s_res_pred,
1295 /* No op stride, this implies a buff of lenght 1x16 */
1296 &s_resi_trans_quant_constants, pu1_nnz, &i2_dc_dummy, 0);
1297
1298 /********************************************************/
1299 /* ierror estimation, */
1300 /* itransform */
1301 /* iquantization */
1302 /********************************************************/
1303
1304 /* Tx blk coeffs are stored blk by blk */
1305 /* Hence, in order to access rows of each Tx blk, one needs to stride of
1306 * TxxSize */
1307 s_quant_coeffs.i4_data_stride = 4;
1308
1309 s_recon.pv_data = pu1_ref_mb_intra_4x4;
1310 s_recon.i4_data_stride = i4_pred_strd;
1311
1312 ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[u1_iq_it_recon_fxn_idx](
1313 &s_quant_coeffs, &s_pred, &s_res_pred, &s_res_pred, &s_recon,
1314 &s_iq_it_res_rec_constants, ps_proc->pv_scratch_buff, s_quant_coeffs.pv_data, 0, 0);
1315 }
1316 }
1317
1318 /* update the type of the mb if necessary */
1319 if(i4_total_cost < ps_proc->i4_mb_cost)
1320 {
1321 ps_proc->i4_mb_cost = i4_total_cost;
1322 ps_proc->i4_mb_distortion = i4_total_distortion;
1323 ps_proc->ps_mb_info->u2_mb_type = I4x4;
1324 }
1325 }
1326
1327 /**
1328 ******************************************************************************
1329 *
1330 * @brief
1331 * evaluate best chroma intra 8x8 mode (rate distortion opt off)
1332 *
1333 * @par Description
1334 * This function evaluates all the possible chroma intra 8x8 modes and finds
1335 * the mode that best represents the macroblock (least distortion) and occupies
1336 * fewer bits in the bitstream.
1337 *
1338 * @param[in] ps_proc_ctxt
1339 * pointer to macroblock context (handle)
1340 *
1341 * @remarks
1342 * For chroma best intra pred mode is calculated based only on SAD
1343 *
1344 * @returns none
1345 *
1346 ******************************************************************************
1347 */
1348
isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t * ps_proc)1349 void isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
1350 {
1351 /* Codec Context */
1352 isvce_codec_t *ps_codec = ps_proc->ps_codec;
1353 isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
1354 mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
1355
1356 /* SAD(distortion metric) of an 8x8 block */
1357 WORD32 i4_mb_distortion, i4_chroma_mb_distortion;
1358
1359 /* intra mode */
1360 UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8;
1361
1362 /* neighbor pels for intra prediction */
1363 UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels;
1364
1365 /* pointer to curr macro block */
1366 UWORD8 *pu1_curr_mb = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data);
1367 UWORD8 *pu1_ref_mb = ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data);
1368
1369 /* pointer to prediction macro block */
1370 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
1371 UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
1372
1373 /* strides */
1374 WORD32 i4_src_strd_c = ps_proc->s_src_buf_props.as_component_bufs[1].i4_data_stride;
1375 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1376 WORD32 i4_rec_strd_c = ps_proc->s_rec_buf_props.as_component_bufs[1].i4_data_stride;
1377
1378 /* neighbors left, top, top left */
1379 UWORD8 *pu1_mb_a = pu1_ref_mb - 2;
1380 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c;
1381 UWORD8 *pu1_mb_d = pu1_mb_b - 2;
1382
1383 /* neighbor availability */
1384 const UWORD8 u1_valid_intra_modes[8] = {1, 3, 1, 3, 5, 7, 5, 15};
1385 WORD32 i4_ngbr_avbl;
1386
1387 /* valid intra modes map */
1388 UWORD32 u4_valid_intra_modes;
1389 isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
1390
1391 /* temp var */
1392 UWORD8 i;
1393 UWORD32 u4_constrained_intra_pred =
1394 ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
1395 UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
1396 /* locating neighbors that are available for prediction */
1397
1398 /* gather prediction pels from the neighbors */
1399 /* left pels */
1400 u1_mb_a =
1401 ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
1402 (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
1403 !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
1404 : 1));
1405 if(u1_mb_a)
1406 {
1407 for(i = 0; i < 16; i += 2)
1408 {
1409 pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c];
1410 pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1];
1411 }
1412 }
1413 else
1414 {
1415 ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE);
1416 }
1417
1418 /* top pels */
1419 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
1420 (u4_constrained_intra_pred
1421 ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag)
1422 : 1));
1423 if(u1_mb_b)
1424 {
1425 ps_mem_fxns->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16);
1426 }
1427 else
1428 {
1429 ps_mem_fxns->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE);
1430 }
1431
1432 /* top left pels */
1433 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
1434 (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra &&
1435 !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
1436 : 1));
1437 if(u1_mb_d)
1438 {
1439 pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d;
1440 pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1);
1441 }
1442 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
1443 ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl;
1444
1445 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
1446
1447 if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
1448 ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
1449 u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8);
1450
1451 i4_chroma_mb_distortion = INT_MAX;
1452
1453 /* perform intra mode chroma 8x8 evaluation */
1454 /* intra prediction */
1455 ps_codec->pf_ih264e_evaluate_intra_chroma_modes(
1456 pu1_curr_mb, pu1_ngbr_pels_c_i8x8, pu1_pred_mb, i4_src_strd_c, i4_pred_strd, i4_ngbr_avbl,
1457 &u4_best_chroma_intra_8x8_mode, &i4_chroma_mb_distortion, u4_valid_intra_modes);
1458
1459 if(u4_valid_intra_modes & 8) /* if Chroma PLANE is valid*/
1460 {
1461 (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0,
1462 i4_pred_strd, i4_ngbr_avbl);
1463
1464 /* evaluate distortion(sad) */
1465 ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd,
1466 i4_chroma_mb_distortion, &i4_mb_distortion);
1467
1468 /* update the least distortion information if necessary */
1469 if(i4_mb_distortion < i4_chroma_mb_distortion)
1470 {
1471 i4_chroma_mb_distortion = i4_mb_distortion;
1472 u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8;
1473 }
1474 }
1475
1476 DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion,
1477 u4_best_chroma_intra_8x8_mode);
1478
1479 ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode;
1480 }
1481
1482 /**
1483 ******************************************************************************
1484 *
1485 * @brief
1486 * Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
1487 * prediction.
1488 *
1489 * @par Description
1490 * This function evaluates first three 16x16 modes and compute corresponding sad
1491 * and return the buffer predicted with best mode.
1492 *
1493 * @param[in] pu1_src
1494 * UWORD8 pointer to the source
1495 *
1496 * @param[in] pu1_ngbr_pels_i16
1497 * UWORD8 pointer to neighbouring pels
1498 *
1499 * @param[out] pu1_dst
1500 * UWORD8 pointer to the destination
1501 *
1502 * @param[in] src_strd
1503 * integer source stride
1504 *
1505 * @param[in] dst_strd
1506 * integer destination stride
1507 *
1508 * @param[in] u4_n_avblty
1509 * availability of neighbouring pixels
1510 *
1511 * @param[in] u4_intra_mode
1512 * Pointer to the variable in which best mode is returned
1513 *
1514 * @param[in] pu4_sadmin
1515 * Pointer to the variable in which minimum sad is returned
1516 *
1517 * @param[in] u4_valid_intra_modes
1518 * Says what all modes are valid
1519 *
1520 * @returns none
1521 *
1522 ******************************************************************************
1523 */
isvce_evaluate_intra16x16_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels_i16,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes)1524 void isvce_evaluate_intra16x16_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16, UWORD8 *pu1_dst,
1525 UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty,
1526 UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin,
1527 UWORD32 u4_valid_intra_modes)
1528 {
1529 UWORD8 *pu1_neighbour;
1530 UWORD8 *pu1_src_temp = pu1_src;
1531 UWORD8 left = 0, top = 0;
1532 WORD32 u4_dcval = 0;
1533 WORD32 i, j;
1534 WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX;
1535 UWORD8 val;
1536
1537 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1538 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1539
1540 /* left available */
1541 if(left)
1542 {
1543 i4_sad_horz = 0;
1544
1545 for(i = 0; i < 16; i++)
1546 {
1547 val = pu1_ngbr_pels_i16[15 - i];
1548
1549 u4_dcval += val;
1550
1551 for(j = 0; j < 16; j++)
1552 {
1553 i4_sad_horz += ABS(val - pu1_src_temp[j]);
1554 }
1555
1556 pu1_src_temp += src_strd;
1557 }
1558 u4_dcval += 8;
1559 }
1560
1561 pu1_src_temp = pu1_src;
1562 /* top available */
1563 if(top)
1564 {
1565 i4_sad_vert = 0;
1566
1567 for(i = 0; i < 16; i++)
1568 {
1569 u4_dcval += pu1_ngbr_pels_i16[17 + i];
1570
1571 for(j = 0; j < 16; j++)
1572 {
1573 i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]);
1574 }
1575 pu1_src_temp += src_strd;
1576 }
1577 u4_dcval += 8;
1578 }
1579
1580 u4_dcval = (u4_dcval) >> (3 + left + top);
1581
1582 pu1_src_temp = pu1_src;
1583
1584 /* none available */
1585 u4_dcval += (left == 0) * (top == 0) * 128;
1586
1587 i4_sad_dc = 0;
1588
1589 for(i = 0; i < 16; i++)
1590 {
1591 for(j = 0; j < 16; j++)
1592 {
1593 i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]);
1594 }
1595 pu1_src_temp += src_strd;
1596 }
1597
1598 if((u4_valid_intra_modes & 04) == 0) /* If DC is disabled */
1599 i4_sad_dc = INT_MAX;
1600
1601 if((u4_valid_intra_modes & 01) == 0) /* If VERT is disabled */
1602 i4_sad_vert = INT_MAX;
1603
1604 if((u4_valid_intra_modes & 02) == 0) /* If HORZ is disabled */
1605 i4_sad_horz = INT_MAX;
1606
1607 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
1608
1609 /* Finding Minimum sad and doing corresponding prediction */
1610 if(i4_min_sad < *pu4_sadmin)
1611 {
1612 *pu4_sadmin = i4_min_sad;
1613 if(i4_min_sad == i4_sad_vert)
1614 {
1615 *u4_intra_mode = VERT_I16x16;
1616 pu1_neighbour = pu1_ngbr_pels_i16 + 17;
1617 for(j = 0; j < 16; j++)
1618 {
1619 memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
1620 pu1_dst += dst_strd;
1621 }
1622 }
1623 else if(i4_min_sad == i4_sad_horz)
1624 {
1625 *u4_intra_mode = HORZ_I16x16;
1626 for(j = 0; j < 16; j++)
1627 {
1628 val = pu1_ngbr_pels_i16[15 - j];
1629 memset(pu1_dst, val, MB_SIZE);
1630 pu1_dst += dst_strd;
1631 }
1632 }
1633 else
1634 {
1635 *u4_intra_mode = DC_I16x16;
1636 for(j = 0; j < 16; j++)
1637 {
1638 memset(pu1_dst, u4_dcval, MB_SIZE);
1639 pu1_dst += dst_strd;
1640 }
1641 }
1642 }
1643 }
1644
1645 /**
1646 ******************************************************************************
1647 *
1648 * @brief
1649 * Evaluate best intra 4x4 mode and perform prediction.
1650 *
1651 * @par Description
1652 * This function evaluates 4x4 modes and compute corresponding sad
1653 * and return the buffer predicted with best mode.
1654 *
1655 * @param[in] pu1_src
1656 * UWORD8 pointer to the source
1657 *
1658 * @param[in] pu1_ngbr_pels
1659 * UWORD8 pointer to neighbouring pels
1660 *
1661 * @param[out] pu1_dst
1662 * UWORD8 pointer to the destination
1663 *
1664 * @param[in] src_strd
1665 * integer source stride
1666 *
1667 * @param[in] dst_strd
1668 * integer destination stride
1669 *
1670 * @param[in] u4_n_avblty
1671 * availability of neighbouring pixels
1672 *
1673 * @param[in] u4_intra_mode
1674 * Pointer to the variable in which best mode is returned
1675 *
1676 * @param[in] pu4_sadmin
1677 * Pointer to the variable in which minimum cost is returned
1678 *
1679 * @param[in] u4_valid_intra_modes
1680 * Says what all modes are valid
1681 *
1682 * @param[in] u4_lambda
1683 * Lamda value for computing cost from SAD
1684 *
1685 * @param[in] u4_predictd_mode
1686 * Predicted mode for cost computation
1687 *
1688 * @returns none
1689 *
1690 ******************************************************************************
1691 */
isvce_evaluate_intra_4x4_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes,UWORD32 u4_lambda,UWORD32 u4_predictd_mode)1692 void isvce_evaluate_intra_4x4_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst,
1693 UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty,
1694 UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin,
1695 UWORD32 u4_valid_intra_modes, UWORD32 u4_lambda,
1696 UWORD32 u4_predictd_mode)
1697 {
1698 UWORD8 *pu1_src_temp = pu1_src;
1699 UWORD8 *pu1_pred = pu1_ngbr_pels;
1700 UWORD8 left = 0, top = 0;
1701 UWORD8 u1_pred_val = 0;
1702 UWORD8 u1_pred_vals[4] = {0};
1703 UWORD8 *pu1_pred_val = NULL;
1704 /* To store FILT121 operated values*/
1705 UWORD8 u1_pred_vals_diag_121[15] = {0};
1706 /* To store FILT11 operated values*/
1707 UWORD8 u1_pred_vals_diag_11[15] = {0};
1708 UWORD8 u1_pred_vals_vert_r[8] = {0};
1709 UWORD8 u1_pred_vals_horz_d[10] = {0};
1710 UWORD8 u1_pred_vals_horz_u[10] = {0};
1711 WORD32 u4_dcval = 0;
1712 WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1713 INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1714
1715 WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1716 INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1717 WORD32 i, i4_min_cost = INT_MAX;
1718
1719 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1720 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1721
1722 /* Computing SAD */
1723
1724 /* VERT mode valid */
1725 if(u4_valid_intra_modes & 1)
1726 {
1727 pu1_pred = pu1_ngbr_pels + 5;
1728 i4_sad[VERT_I4x4] = 0;
1729 i4_cost[VERT_I4x4] = 0;
1730
1731 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1732 pu1_src_temp += src_strd;
1733 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1734 pu1_src_temp += src_strd;
1735 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1736 pu1_src_temp += src_strd;
1737 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1738
1739 i4_cost[VERT_I4x4] =
1740 i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? u4_lambda : 4 * u4_lambda);
1741 }
1742
1743 /* HORZ mode valid */
1744 if(u4_valid_intra_modes & 2)
1745 {
1746 i4_sad[HORZ_I4x4] = 0;
1747 i4_cost[HORZ_I4x4] = 0;
1748 pu1_src_temp = pu1_src;
1749
1750 u1_pred_val = pu1_ngbr_pels[3];
1751
1752 i4_sad[HORZ_I4x4] +=
1753 ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
1754 ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
1755 pu1_src_temp += src_strd;
1756
1757 u1_pred_val = pu1_ngbr_pels[2];
1758
1759 i4_sad[HORZ_I4x4] +=
1760 ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
1761 ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
1762 pu1_src_temp += src_strd;
1763
1764 u1_pred_val = pu1_ngbr_pels[1];
1765
1766 i4_sad[HORZ_I4x4] +=
1767 ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
1768 ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
1769 pu1_src_temp += src_strd;
1770
1771 u1_pred_val = pu1_ngbr_pels[0];
1772
1773 i4_sad[HORZ_I4x4] +=
1774 ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
1775 ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
1776
1777 i4_cost[HORZ_I4x4] =
1778 i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? u4_lambda : 4 * u4_lambda);
1779 }
1780
1781 /* DC mode valid */
1782 if(u4_valid_intra_modes & 4)
1783 {
1784 i4_sad[DC_I4x4] = 0;
1785 i4_cost[DC_I4x4] = 0;
1786 pu1_src_temp = pu1_src;
1787
1788 if(left)
1789 u4_dcval =
1790 pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2] + pu1_ngbr_pels[3] + 2;
1791 if(top)
1792 u4_dcval +=
1793 pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7] + pu1_ngbr_pels[8] + 2;
1794
1795 u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128;
1796
1797 /* none available */
1798 memset(u1_pred_vals, u4_dcval, 4);
1799 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1800 pu1_src_temp += src_strd;
1801 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1802 pu1_src_temp += src_strd;
1803 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1804 pu1_src_temp += src_strd;
1805 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1806 pu1_src_temp += src_strd;
1807
1808 i4_cost[DC_I4x4] =
1809 i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? u4_lambda : 4 * u4_lambda);
1810 }
1811
1812 /* if modes other than VERT, HORZ and DC are valid */
1813 if(u4_valid_intra_modes > 7)
1814 {
1815 pu1_pred = pu1_ngbr_pels;
1816 pu1_pred[13] = pu1_pred[14] = pu1_pred[12];
1817
1818 /* Performing FILT121 and FILT11 operation for all neighbour values*/
1819 for(i = 0; i < 13; i++)
1820 {
1821 u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]);
1822 u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]);
1823
1824 pu1_pred++;
1825 }
1826
1827 if(u4_valid_intra_modes & 8) /* DIAG_DL */
1828 {
1829 i4_sad[DIAG_DL_I4x4] = 0;
1830 i4_cost[DIAG_DL_I4x4] = 0;
1831 pu1_src_temp = pu1_src;
1832 pu1_pred_val = u1_pred_vals_diag_121 + 5;
1833
1834 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]);
1835 pu1_src_temp += src_strd;
1836 USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]);
1837 pu1_src_temp += src_strd;
1838 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]);
1839 pu1_src_temp += src_strd;
1840 USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]);
1841 pu1_src_temp += src_strd;
1842 i4_cost[DIAG_DL_I4x4] =
1843 i4_sad[DIAG_DL_I4x4] +
1844 ((u4_predictd_mode == DIAG_DL_I4x4) ? u4_lambda : 4 * u4_lambda);
1845 }
1846
1847 if(u4_valid_intra_modes & 16) /* DIAG_DR */
1848 {
1849 i4_sad[DIAG_DR_I4x4] = 0;
1850 i4_cost[DIAG_DR_I4x4] = 0;
1851 pu1_src_temp = pu1_src;
1852 pu1_pred_val = u1_pred_vals_diag_121 + 3;
1853
1854 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]);
1855 pu1_src_temp += src_strd;
1856 USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]);
1857 pu1_src_temp += src_strd;
1858 USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]);
1859 pu1_src_temp += src_strd;
1860 USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]);
1861 pu1_src_temp += src_strd;
1862 i4_cost[DIAG_DR_I4x4] =
1863 i4_sad[DIAG_DR_I4x4] +
1864 ((u4_predictd_mode == DIAG_DR_I4x4) ? u4_lambda : 4 * u4_lambda);
1865 }
1866
1867 if(u4_valid_intra_modes & 32) /* VERT_R mode valid ????*/
1868 {
1869 i4_sad[VERT_R_I4x4] = 0;
1870
1871 pu1_src_temp = pu1_src;
1872 u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2];
1873 memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3);
1874 u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1];
1875 memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3);
1876
1877 pu1_pred_val = u1_pred_vals_diag_11 + 4;
1878 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1879 pu1_pred_val = u1_pred_vals_diag_121 + 3;
1880 pu1_src_temp += src_strd;
1881 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1882 pu1_src_temp += src_strd;
1883 USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]);
1884 pu1_src_temp += src_strd;
1885 USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4), i4_sad[VERT_R_I4x4]);
1886
1887 i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] +
1888 ((u4_predictd_mode == VERT_R_I4x4) ? u4_lambda : 4 * u4_lambda);
1889 }
1890
1891 if(u4_valid_intra_modes & 64) /* HORZ_D mode valid ????*/
1892 {
1893 i4_sad[HORZ_D_I4x4] = 0;
1894
1895 pu1_src_temp = pu1_src;
1896 u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3];
1897 memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3);
1898 u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0];
1899 u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0];
1900 u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1];
1901 u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1];
1902 u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2];
1903 u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2];
1904
1905 pu1_pred_val = u1_pred_vals_horz_d;
1906 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]);
1907 pu1_src_temp += src_strd;
1908 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]);
1909 pu1_src_temp += src_strd;
1910 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]);
1911 pu1_src_temp += src_strd;
1912 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]);
1913
1914 i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] +
1915 ((u4_predictd_mode == HORZ_D_I4x4) ? u4_lambda : 4 * u4_lambda);
1916 }
1917
1918 if(u4_valid_intra_modes & 128) /* VERT_L mode valid ????*/
1919 {
1920 i4_sad[VERT_L_I4x4] = 0;
1921 pu1_src_temp = pu1_src;
1922 pu1_pred_val = u1_pred_vals_diag_11 + 5;
1923 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1924 pu1_src_temp += src_strd;
1925 pu1_pred_val = u1_pred_vals_diag_121 + 5;
1926 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1927 pu1_src_temp += src_strd;
1928 pu1_pred_val = u1_pred_vals_diag_11 + 6;
1929 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1930 pu1_src_temp += src_strd;
1931 pu1_pred_val = u1_pred_vals_diag_121 + 6;
1932 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1933
1934 i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] +
1935 ((u4_predictd_mode == VERT_L_I4x4) ? u4_lambda : 4 * u4_lambda);
1936 }
1937
1938 if(u4_valid_intra_modes & 256) /* HORZ_U mode valid ????*/
1939 {
1940 i4_sad[HORZ_U_I4x4] = 0;
1941 pu1_src_temp = pu1_src;
1942 u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2];
1943 u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1];
1944 u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1];
1945 u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0];
1946 u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0];
1947 u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]);
1948
1949 memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4);
1950
1951 pu1_pred_val = u1_pred_vals_horz_u;
1952 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]);
1953 pu1_src_temp += src_strd;
1954 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]);
1955 pu1_src_temp += src_strd;
1956 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]);
1957 pu1_src_temp += src_strd;
1958 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]);
1959
1960 i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] +
1961 ((u4_predictd_mode == HORZ_U_I4x4) ? u4_lambda : 4 * u4_lambda);
1962 }
1963
1964 i4_min_cost =
1965 MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]), MIN3(i4_cost[3], i4_cost[4], i4_cost[5]),
1966 MIN3(i4_cost[6], i4_cost[7], i4_cost[8]));
1967 }
1968 else
1969 {
1970 /* Only first three modes valid */
1971 i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]);
1972 }
1973
1974 *pu4_sadmin = i4_min_cost;
1975
1976 if(i4_min_cost == i4_cost[0])
1977 {
1978 *u4_intra_mode = VERT_I4x4;
1979 pu1_pred_val = pu1_ngbr_pels + 5;
1980 memcpy(pu1_dst, (pu1_pred_val), 4);
1981 pu1_dst += dst_strd;
1982 memcpy(pu1_dst, (pu1_pred_val), 4);
1983 pu1_dst += dst_strd;
1984 memcpy(pu1_dst, (pu1_pred_val), 4);
1985 pu1_dst += dst_strd;
1986 memcpy(pu1_dst, (pu1_pred_val), 4);
1987 }
1988 else if(i4_min_cost == i4_cost[1])
1989 {
1990 *u4_intra_mode = HORZ_I4x4;
1991 memset(pu1_dst, pu1_ngbr_pels[3], 4);
1992 pu1_dst += dst_strd;
1993 memset(pu1_dst, pu1_ngbr_pels[2], 4);
1994 pu1_dst += dst_strd;
1995 memset(pu1_dst, pu1_ngbr_pels[1], 4);
1996 pu1_dst += dst_strd;
1997 memset(pu1_dst, pu1_ngbr_pels[0], 4);
1998 }
1999 else if(i4_min_cost == i4_cost[2])
2000 {
2001 *u4_intra_mode = DC_I4x4;
2002 memset(pu1_dst, u4_dcval, 4);
2003 pu1_dst += dst_strd;
2004 memset(pu1_dst, u4_dcval, 4);
2005 pu1_dst += dst_strd;
2006 memset(pu1_dst, u4_dcval, 4);
2007 pu1_dst += dst_strd;
2008 memset(pu1_dst, u4_dcval, 4);
2009 }
2010
2011 else if(i4_min_cost == i4_cost[3])
2012 {
2013 *u4_intra_mode = DIAG_DL_I4x4;
2014 pu1_pred_val = u1_pred_vals_diag_121 + 5;
2015 memcpy(pu1_dst, (pu1_pred_val), 4);
2016 pu1_dst += dst_strd;
2017 memcpy(pu1_dst, (pu1_pred_val + 1), 4);
2018 pu1_dst += dst_strd;
2019 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2020 pu1_dst += dst_strd;
2021 memcpy(pu1_dst, (pu1_pred_val + 3), 4);
2022 }
2023 else if(i4_min_cost == i4_cost[4])
2024 {
2025 *u4_intra_mode = DIAG_DR_I4x4;
2026 pu1_pred_val = u1_pred_vals_diag_121 + 3;
2027
2028 memcpy(pu1_dst, (pu1_pred_val), 4);
2029 pu1_dst += dst_strd;
2030 memcpy(pu1_dst, (pu1_pred_val - 1), 4);
2031 pu1_dst += dst_strd;
2032 memcpy(pu1_dst, (pu1_pred_val - 2), 4);
2033 pu1_dst += dst_strd;
2034 memcpy(pu1_dst, (pu1_pred_val - 3), 4);
2035 }
2036
2037 else if(i4_min_cost == i4_cost[5])
2038 {
2039 *u4_intra_mode = VERT_R_I4x4;
2040 pu1_pred_val = u1_pred_vals_diag_11 + 4;
2041 memcpy(pu1_dst, (pu1_pred_val), 4);
2042 pu1_dst += dst_strd;
2043 pu1_pred_val = u1_pred_vals_diag_121 + 3;
2044 memcpy(pu1_dst, (pu1_pred_val), 4);
2045 pu1_dst += dst_strd;
2046 memcpy(pu1_dst, (u1_pred_vals_vert_r), 4);
2047 pu1_dst += dst_strd;
2048 memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4);
2049 }
2050 else if(i4_min_cost == i4_cost[6])
2051 {
2052 *u4_intra_mode = HORZ_D_I4x4;
2053 pu1_pred_val = u1_pred_vals_horz_d;
2054 memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2055 pu1_dst += dst_strd;
2056 memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2057 pu1_dst += dst_strd;
2058 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2059 pu1_dst += dst_strd;
2060 memcpy(pu1_dst, (pu1_pred_val), 4);
2061 pu1_dst += dst_strd;
2062 }
2063 else if(i4_min_cost == i4_cost[7])
2064 {
2065 *u4_intra_mode = VERT_L_I4x4;
2066 pu1_pred_val = u1_pred_vals_diag_11 + 5;
2067 memcpy(pu1_dst, (pu1_pred_val), 4);
2068 pu1_dst += dst_strd;
2069 pu1_pred_val = u1_pred_vals_diag_121 + 5;
2070 memcpy(pu1_dst, (pu1_pred_val), 4);
2071 pu1_dst += dst_strd;
2072 pu1_pred_val = u1_pred_vals_diag_11 + 6;
2073 memcpy(pu1_dst, (pu1_pred_val), 4);
2074 pu1_dst += dst_strd;
2075 pu1_pred_val = u1_pred_vals_diag_121 + 6;
2076 memcpy(pu1_dst, (pu1_pred_val), 4);
2077 }
2078 else if(i4_min_cost == i4_cost[8])
2079 {
2080 *u4_intra_mode = HORZ_U_I4x4;
2081 pu1_pred_val = u1_pred_vals_horz_u;
2082 memcpy(pu1_dst, (pu1_pred_val), 4);
2083 pu1_dst += dst_strd;
2084 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2085 pu1_dst += dst_strd;
2086 memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2087 pu1_dst += dst_strd;
2088 memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2089 pu1_dst += dst_strd;
2090 }
2091
2092 return;
2093 }
2094
2095 /**
2096 ******************************************************************************
2097 *
2098 * @brief:
2099 * Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the
2100 *prediction.
2101 *
2102 * @par Description
2103 * This function evaluates first three intra chroma modes and compute
2104 *corresponding sad and return the buffer predicted with best mode.
2105 *
2106 * @param[in] pu1_src
2107 * UWORD8 pointer to the source
2108 *
2109 * @param[in] pu1_ngbr_pels
2110 * UWORD8 pointer to neighbouring pels
2111 *
2112 * @param[out] pu1_dst
2113 * UWORD8 pointer to the destination
2114 *
2115 * @param[in] src_strd
2116 * integer source stride
2117 *
2118 * @param[in] dst_strd
2119 * integer destination stride
2120 *
2121 * @param[in] u4_n_avblty
2122 * availability of neighbouring pixels
2123 *
2124 * @param[in] u4_intra_mode
2125 * Pointer to the variable in which best mode is returned
2126 *
2127 * @param[in] pu4_sadmin
2128 * Pointer to the variable in which minimum sad is returned
2129 *
2130 * @param[in] u4_valid_intra_modes
2131 * Says what all modes are valid
2132 *
2133 * @return none
2134 *
2135 ******************************************************************************
2136 */
isvce_evaluate_intra_chroma_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes)2137 void isvce_evaluate_intra_chroma_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst,
2138 UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty,
2139 UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin,
2140 UWORD32 u4_valid_intra_modes)
2141 {
2142 UWORD8 *pu1_neighbour;
2143 UWORD8 *pu1_src_temp = pu1_src;
2144 UWORD8 left = 0, top = 0;
2145 WORD32 u4_dcval_u_l[2] = {0, 0}, /*sum left neighbours for 'U' ,two separate sets - sum of
2146 first four from top,and sum of four values from bottom */
2147 u4_dcval_u_t[2] = {0, 0}; /*sum top neighbours for 'U'*/
2148
2149 WORD32 u4_dcval_v_l[2] = {0, 0}, /*sum left neighbours for 'V'*/
2150 u4_dcval_v_t[2] = {0, 0}; /*sum top neighbours for 'V'*/
2151
2152 WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX,
2153 i4_min_sad = INT_MAX;
2154 UWORD8 val_u, val_v;
2155
2156 WORD32 u4_dc_val[2][2][2]; /* -----------
2157 | | | Chroma can have four
2158 | 00 | 01 | separate dc value...
2159 ----------- u4_dc_val corresponds to this dc
2160 values | | | with u4_dc_val[2][2][U] and
2161 u4_dc_val[2][2][V] | 10 | 11 |
2162 ----------- */
2163 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
2164 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
2165
2166 /*Evaluating HORZ*/
2167 if(left) /* Ifleft available*/
2168 {
2169 i4_sad_horz = 0;
2170
2171 for(i = 0; i < 8; i++)
2172 {
2173 val_v = pu1_ngbr_pels[15 - 2 * i];
2174 val_u = pu1_ngbr_pels[15 - 2 * i - 1];
2175 row = i / 4;
2176 u4_dcval_u_l[row] += val_u;
2177 u4_dcval_v_l[row] += val_v;
2178 for(j = 0; j < 8; j++)
2179 {
2180 i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]); /* Finding SAD for HORZ mode*/
2181 i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]);
2182 }
2183
2184 pu1_src_temp += src_strd;
2185 }
2186 u4_dcval_u_l[0] += 2;
2187 u4_dcval_u_l[1] += 2;
2188 u4_dcval_v_l[0] += 2;
2189 u4_dcval_v_l[1] += 2;
2190 }
2191
2192 /*Evaluating VERT**/
2193 pu1_src_temp = pu1_src;
2194 if(top) /* top available*/
2195 {
2196 i4_sad_vert = 0;
2197
2198 for(i = 0; i < 8; i++)
2199 {
2200 col = i / 4;
2201
2202 val_u = pu1_ngbr_pels[18 + i * 2];
2203 val_v = pu1_ngbr_pels[18 + i * 2 + 1];
2204 u4_dcval_u_t[col] += val_u;
2205 u4_dcval_v_t[col] += val_v;
2206
2207 for(j = 0; j < 16; j++)
2208 {
2209 i4_sad_vert +=
2210 ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]); /* Finding SAD for VERT mode*/
2211 }
2212 pu1_src_temp += src_strd;
2213 }
2214 u4_dcval_u_t[0] += 2;
2215 u4_dcval_u_t[1] += 2;
2216 u4_dcval_v_t[0] += 2;
2217 u4_dcval_v_t[1] += 2;
2218 }
2219
2220 /* computing DC value*/
2221 /* Equation 8-128 in spec*/
2222 u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top);
2223 u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top);
2224 u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top);
2225 u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top);
2226
2227 if(top)
2228 {
2229 /* Equation 8-132 in spec*/
2230 u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top);
2231 u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top);
2232 }
2233 else
2234 {
2235 u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left);
2236 u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left);
2237 }
2238
2239 if(left)
2240 {
2241 u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left);
2242 u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left);
2243 }
2244 else
2245 {
2246 u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top);
2247 u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top);
2248 }
2249
2250 if(!(left || top))
2251 {
2252 /*none available*/
2253 u4_dc_val[0][0][0] = u4_dc_val[0][0][1] = u4_dc_val[0][1][0] = u4_dc_val[0][1][1] =
2254 u4_dc_val[1][0][0] = u4_dc_val[1][0][1] = u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128;
2255 }
2256
2257 /* Evaluating DC */
2258 pu1_src_temp = pu1_src;
2259 i4_sad_dc = 0;
2260 for(i = 0; i < 8; i++)
2261 {
2262 for(j = 0; j < 8; j++)
2263 {
2264 col = j / 4;
2265 row = i / 4;
2266 val_u = u4_dc_val[row][col][0];
2267 val_v = u4_dc_val[row][col][1];
2268
2269 i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]); /* Finding SAD for DC mode*/
2270 i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]);
2271 }
2272 pu1_src_temp += src_strd;
2273 }
2274
2275 if((u4_valid_intra_modes & 01) == 0) /* If DC is disabled*/
2276 i4_sad_dc = INT_MAX;
2277 if((u4_valid_intra_modes & 02) == 0) /* If HORZ is disabled*/
2278 i4_sad_horz = INT_MAX;
2279 if((u4_valid_intra_modes & 04) == 0) /* If VERT is disabled*/
2280 i4_sad_vert = INT_MAX;
2281
2282 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
2283
2284 /* Finding Minimum sad and doing corresponding prediction*/
2285 if(i4_min_sad < *pu4_sadmin)
2286 {
2287 *pu4_sadmin = i4_min_sad;
2288
2289 if(i4_min_sad == i4_sad_dc)
2290 {
2291 *u4_intra_mode = DC_CH_I8x8;
2292 for(i = 0; i < 8; i++)
2293 {
2294 for(j = 0; j < 8; j++)
2295 {
2296 col = j / 4;
2297 row = i / 4;
2298
2299 pu1_dst[2 * j] = u4_dc_val[row][col][0];
2300 pu1_dst[2 * j + 1] = u4_dc_val[row][col][1];
2301 }
2302 pu1_dst += dst_strd;
2303 }
2304 }
2305 else if(i4_min_sad == i4_sad_horz)
2306 {
2307 *u4_intra_mode = HORZ_CH_I8x8;
2308 for(j = 0; j < 8; j++)
2309 {
2310 val_v = pu1_ngbr_pels[15 - 2 * j];
2311 val_u = pu1_ngbr_pels[15 - 2 * j - 1];
2312
2313 for(i = 0; i < 8; i++)
2314 {
2315 pu1_dst[2 * i] = val_u;
2316 pu1_dst[2 * i + 1] = val_v;
2317 }
2318 pu1_dst += dst_strd;
2319 }
2320 }
2321 else
2322 {
2323 *u4_intra_mode = VERT_CH_I8x8;
2324 pu1_neighbour = pu1_ngbr_pels + 18;
2325 for(j = 0; j < 8; j++)
2326 {
2327 memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
2328 pu1_dst += dst_strd;
2329 }
2330 }
2331 }
2332
2333 return;
2334 }
2335