xref: /aosp_15_r20/external/libavc/encoder/svc/isvce_intra_modes_eval.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2022 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  isvce_intra_modes_eval.c
25 *
26 * @brief
27 *  This file contains definitions of routines that perform rate distortion
28 *  analysis on a macroblock if they are to be coded as intra.
29 *
30 * @author
31 *  ittiam
32 *
33 * @par List of Functions:
34 *  - isvce_derive_neighbor_availability_of_mbs()
35 *  - isvce_derive_ngbr_avbl_of_mb_partitions()
36 *  - isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff()
37 *  - isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff()
38 *  - isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff()
39 *  - isvce_evaluate_intra4x4_modes_for_least_cost_rdopton()
40 *  - isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff()
41 *  - isvce_evaluate_intra16x16_modes()
42 *  - isvce_evaluate_intra4x4_modes()
43 *  - isvce_evaluate_intra_chroma_modes()
44 *
45 * @remarks
46 *  None
47 *
48 *******************************************************************************
49 */
50 
51 /*****************************************************************************/
52 /* File Includes                                                             */
53 /*****************************************************************************/
54 
55 /* System include files */
56 #include <stdio.h>
57 #include <string.h>
58 #include <limits.h>
59 #include <assert.h>
60 
61 /* User include files */
62 #include "ih264e_config.h"
63 #include "ih264_typedefs.h"
64 #include "iv2.h"
65 #include "ive2.h"
66 #include "ih264_debug.h"
67 #include "isvc_defs.h"
68 #include "isvc_macros.h"
69 #include "ih264_intra_pred_filters.h"
70 #include "isvc_structs.h"
71 #include "isvc_common_tables.h"
72 #include "isvc_trans_quant_itrans_iquant.h"
73 #include "isvc_inter_pred_filters.h"
74 #include "isvc_mem_fns.h"
75 #include "ih264_padding.h"
76 #include "ih264_size_defs.h"
77 #include "ih264_deblk_edge_filters.h"
78 #include "isvc_cabac_tables.h"
79 #include "isvce_defs.h"
80 #include "ime_distortion_metrics.h"
81 #include "ih264e_error.h"
82 #include "ih264e_bitstream.h"
83 #include "ime_defs.h"
84 #include "ime_structs.h"
85 #include "irc_cntrl_param.h"
86 #include "irc_frame_info_collector.h"
87 #include "isvce_rate_control.h"
88 #include "isvce_cabac_structs.h"
89 #include "isvce_structs.h"
90 #include "ih264e_intra_modes_eval.h"
91 #include "isvce_globals.h"
92 #include "ime_platform_macros.h"
93 
94 /*****************************************************************************/
95 /* Function Definitions                                                      */
96 /*****************************************************************************/
97 
98 /**
99 ******************************************************************************
100 *
101 * @brief
102 *  derivation process for subblock/partition availability
103 *
104 * @par   Description
105 *  Calculates the availability of the left, top, topright and topleft subblock
106 *  or partitions.
107 *
108 * @param[in]    ps_proc_ctxt
109 *  pointer to macroblock context (handle)
110 *
111 * @param[in]    i1_pel_pos_x
112 *  column position of the pel wrt the current block
113 *
114 * @param[in]    i1_pel_pos_y
115 *  row position of the pel in wrt current block
116 *
117 * @remarks     Assumptions: before calling this function it is assumed that
118 *   the neighbor availability of the current macroblock is already derived.
119 *   Based on table 6-3 of H264 specification
120 *
121 * @return      availability status (yes or no)
122 *
123 ******************************************************************************
124 */
isvce_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t * ps_ngbr_avbl,WORD8 i1_pel_pos_x,WORD8 i1_pel_pos_y)125 UWORD8 isvce_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl, WORD8 i1_pel_pos_x,
126                                                WORD8 i1_pel_pos_y)
127 {
128     UWORD8 u1_neighbor_avail = 0;
129 
130     /**********************************************************************/
131     /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to   */
132     /* various columns of a macroblock                                    */
133     /*                                                                    */
134     /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to   */
135     /* various rows of a macroblock                                       */
136     /*                                                                    */
137     /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements    */
138     /* outside the bound of an mb ie., represents its neighbors.          */
139     /**********************************************************************/
140     if(i1_pel_pos_x < 0)
141     { /* column(-1) */
142         if(i1_pel_pos_y < 0)
143         {                                              /* row(-1) */
144             u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */
145         }
146         else if(i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
147         {                                              /* all rows of a macroblock */
148             u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */
149         }
150         else                       /* if (i1_pel_pos_y >= 16) */
151         {                          /* rows(+16) */
152             u1_neighbor_avail = 0; /* current mb bottom left availability */
153         }
154     }
155     else if(i1_pel_pos_x >= 0 && i1_pel_pos_x < 16)
156     { /* all columns of a macroblock */
157         if(i1_pel_pos_y < 0)
158         {                                              /* row(-1) */
159             u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */
160         }
161         else if(i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
162         {                          /* all rows of a macroblock */
163             u1_neighbor_avail = 1; /* current mb availability */
164             /* availability of the partition is dependent on the position of the
165              * partition inside the mb */
166             /* although the availability is declared as 1 in all cases these needs to
167              * be corrected somewhere else and this is not done in here */
168         }
169         else                       /* if (i1_pel_pos_y >= 16) */
170         {                          /* rows(+16) */
171             u1_neighbor_avail = 0; /* current mb bottom availability */
172         }
173     }
174     else if(i1_pel_pos_x >= 16)
175     { /* column(+16) */
176         if(i1_pel_pos_y < 0)
177         {                                              /* row(-1) */
178             u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */
179         }
180         else                       /* if (i1_pel_pos_y >= 0) */
181         {                          /* all other rows */
182             u1_neighbor_avail = 0; /* current mb right & bottom right availability */
183         }
184     }
185 
186     return u1_neighbor_avail;
187 }
188 
189 /**
190 ******************************************************************************
191 *
192 * @brief
193 *  evaluate best intra 16x16 mode (rate distortion opt off)
194 *
195 * @par Description
196 *  This function evaluates all the possible intra 16x16 modes and finds the mode
197 *  that best represents the macro-block (least distortion) and occupies fewer
198 *  bits in the bit-stream.
199 *
200 * @param[in]   ps_proc_ctxt
201 *  pointer to process context (handle)
202 *
203 * @remarks
204 *  Ideally the cost of encoding a macroblock is calculated as
205 *  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
206 *  input block and the reconstructed block and rate is the number of bits taken
207 *  to place the macroblock in the bit-stream. In this routine the rate does not
208 *  exactly point to the total number of bits it takes, rather it points to
209 *header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
210 *bits and residual bits fall in to texture bits the number of bits taken to
211 *encoding mbtype is considered as rate, we compute cost. Further we will
212 *approximate the distortion as the deviation b/w input and the predicted block
213 *as opposed to input and reconstructed block.
214 *
215 *  NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
216 *  the SAD and cost are one and the same.
217 *
218 * @return     none
219 *
220 ******************************************************************************
221 */
222 
isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t * ps_proc)223 void isvce_evaluate_intra16x16_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
224 {
225     /* Codec Context */
226     isvce_codec_t *ps_codec = ps_proc->ps_codec;
227     isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
228     mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
229 
230     /* SAD(distortion metric) of an 8x8 block */
231     WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX;
232 
233     /* lambda */
234     UWORD32 u4_lambda = ps_proc->u4_lambda;
235 
236     /* cost = distortion + lambda*rate */
237     WORD32 i4_mb_cost = INT_MAX, i4_mb_cost_least = INT_MAX;
238 
239     /* intra mode */
240     UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16;
241 
242     /* neighbor pels for intra prediction */
243     UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels;
244 
245     /* neighbor availability */
246     WORD32 i4_ngbr_avbl;
247 
248     /* pointer to src macro block */
249     UWORD8 *pu1_curr_mb = ps_proc->s_src_buf_props.as_component_bufs[0].pv_data;
250     UWORD8 *pu1_ref_mb = ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data;
251 
252     /* pointer to prediction macro block */
253     UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16;
254     UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane;
255 
256     /* strides */
257     WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
258     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
259     WORD32 i4_rec_strd = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
260 
261     /* pointer to neighbors left, top, topleft */
262     UWORD8 *pu1_mb_a = pu1_ref_mb - 1;
263     UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd;
264     UWORD8 *pu1_mb_d = pu1_mb_b - 1;
265     UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
266     /* valid intra modes map */
267     UWORD32 u4_valid_intra_modes;
268 
269     /* lut for valid intra modes */
270     const UWORD8 u1_valid_intra_modes[8] = {4, 6, 4, 6, 5, 7, 5, 15};
271 
272     UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
273     isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
274     UWORD32 u4_constrained_intra_pred =
275         ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
276 
277     if(ps_proc->i4_slice_type != ISLICE)
278     {
279         /* Offset for MBtype */
280         offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23;
281         u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
282     }
283 
284     /* locating neighbors that are available for prediction */
285 
286     /* gather prediction pels from the neighbors, if particular set is not
287      * available it is set to zero*/
288     /* left pels */
289     u1_mb_a =
290         ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
291          (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
292                                        !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
293                                     : 1));
294     if(u1_mb_a)
295     {
296         for(i = 0; i < 16; i++) pu1_ngbr_pels_i16[16 - 1 - i] = pu1_mb_a[i * i4_rec_strd];
297     }
298     else
299     {
300         ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_i16, 0, MB_SIZE);
301     }
302     /* top pels */
303     u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
304                (u4_constrained_intra_pred
305                     ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag)
306                     : 1));
307     if(u1_mb_b)
308     {
309         ps_mem_fxns->pf_mem_cpy_mul8(pu1_ngbr_pels_i16 + 16 + 1, pu1_mb_b, 16);
310     }
311     else
312     {
313         ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_i16 + 16 + 1, 0, MB_SIZE);
314     }
315     /* topleft pels */
316     u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
317                (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra &&
318                                              !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
319                                           : 1));
320     if(u1_mb_d)
321     {
322         pu1_ngbr_pels_i16[16] = *pu1_mb_d;
323     }
324     else
325     {
326         pu1_ngbr_pels_i16[16] = 0;
327     }
328 
329     i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
330     ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl;
331 
332     /* set valid intra modes for evaluation */
333     u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
334 
335     if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
336        ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
337         u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
338 
339     /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */
340     ps_codec->pf_ih264e_evaluate_intra16x16_modes(
341         pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16, i4_src_strd, i4_pred_strd,
342         i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least, u4_valid_intra_modes);
343 
344     /* cost = distortion + lambda*rate */
345     i4_mb_cost_least = i4_mb_distortion_least;
346 
347     if(((u4_valid_intra_modes >> 3) & 1) != 0)
348     {
349         /* intra prediction for PLANE mode*/
350         (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](
351             pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl);
352 
353         /* evaluate distortion between the actual blk and the estimated blk for the
354          * given mode */
355         ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](
356             pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least,
357             &i4_mb_distortion);
358 
359         /* cost = distortion + lambda*rate */
360         i4_mb_cost = i4_mb_distortion;
361 
362         /* update the least cost information if necessary */
363         if(i4_mb_cost < i4_mb_distortion_least)
364         {
365             u4_intra_mode = PLANE_I16x16;
366 
367             i4_mb_cost_least = i4_mb_cost;
368             i4_mb_distortion_least = i4_mb_distortion;
369         }
370     }
371 
372     u4_best_intra_16x16_mode = u4_intra_mode;
373 
374     DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode);
375 
376     ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode;
377 
378     /* cost = distortion + lambda*rate */
379     i4_mb_cost_least =
380         i4_mb_distortion_least + u4_lambda * u1_uev_codelength[offset + u4_best_intra_16x16_mode];
381 
382     /* update the type of the mb if necessary */
383     if(i4_mb_cost_least < ps_proc->i4_mb_cost)
384     {
385         ps_proc->i4_mb_cost = i4_mb_cost_least;
386         ps_proc->i4_mb_distortion = i4_mb_distortion_least;
387         ps_proc->ps_mb_info->u2_mb_type = I16x16;
388     }
389 }
390 
391 /**
392 ******************************************************************************
393 *
394 * @brief
395 *  evaluate best intra 8x8 mode (rate distortion opt on)
396 *
397 * @par Description
398 *  This function evaluates all the possible intra 8x8 modes and finds the mode
399 *  that best represents the macro-block (least distortion) and occupies fewer
400 *  bits in the bit-stream.
401 *
402 * @param[in]    ps_proc_ctxt
403 *  pointer to proc ctxt
404 *
405 * @remarks Ideally the cost of encoding a macroblock is calculated as
406 *  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
407 *  input block and the reconstructed block and rate is the number of bits taken
408 *  to place the macroblock in the bit-stream. In this routine the rate does not
409 *  exactly point to the total number of bits it takes, rather it points to
410 *header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
411 *bits and residual bits fall in to texture bits the number of bits taken to
412 *encoding mbtype is considered as rate, we compute cost. Further we will
413 *approximate the distortion as the deviation b/w input and the predicted block
414 *as opposed to input and reconstructed block.
415 *
416 *  NOTE: TODO: This function needs to be tested
417 *
418 *  @return      none
419 *
420 ******************************************************************************
421 */
isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t * ps_proc)422 void isvce_evaluate_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
423 {
424     /* Codec Context */
425     isvce_codec_t *ps_codec = ps_proc->ps_codec;
426 
427     /* SAD(distortion metric) of an 4x4 block */
428     WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX,
429                                     i4_total_distortion = 0;
430 
431     /* lambda */
432     UWORD32 u4_lambda = ps_proc->u4_lambda;
433 
434     /* cost = distortion + lambda*rate */
435     WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda;
436 
437     /* cost due to mbtype */
438     UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
439 
440     /* intra mode */
441     UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode;
442 
443     /* neighbor pels for intra prediction */
444     UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels;
445 
446     /* pointer to curr partition */
447     UWORD8 *pu1_mb_curr;
448 
449     /* pointer to prediction macro block */
450     UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
451 
452     /* strides */
453     WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
454     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
455 
456     /* neighbors left, top, top right, top left */
457     UWORD8 *pu1_mb_a;
458     UWORD8 *pu1_mb_b;
459     UWORD8 *pu1_mb_d;
460 
461     /* neighbor availability */
462     WORD32 i4_ngbr_avbl;
463     block_neighbors_t s_ngbr_avbl;
464 
465     /* temp vars */
466     UWORD32 b8, u4_pix_x, u4_pix_y;
467     UWORD32 u4_constrained_intra_pred =
468         ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
469     block_neighbors_t s_ngbr_avbl_MB;
470 
471     /* ngbr mb syntax information */
472     UWORD8 *pu1_top_mb_intra_modes =
473         ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes;
474     isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
475     isvce_mb_info_t *ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
476     /* valid intra modes map */
477     UWORD32 u4_valid_intra_modes;
478 
479     if(ps_proc->ps_ngbr_avbl->u1_mb_c)
480     {
481         ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
482     }
483     /* left pels */
484     s_ngbr_avbl_MB.u1_mb_a =
485         ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
486          (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
487                                        !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
488                                     : 1));
489 
490     /* top pels */
491     s_ngbr_avbl_MB.u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
492                               (u4_constrained_intra_pred ? (ps_top_mb_syn_ele->u1_is_intra &&
493                                                             !ps_top_mb_syn_ele->u1_base_mode_flag)
494                                                          : 1));
495 
496     /* topleft pels */
497     s_ngbr_avbl_MB.u1_mb_d =
498         ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
499          (u4_constrained_intra_pred
500               ? (ps_top_mb_syn_ele[-1].u1_is_intra && !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
501               : 1));
502 
503     /* top right */
504     s_ngbr_avbl_MB.u1_mb_c =
505         ((ps_proc->ps_ngbr_avbl->u1_mb_c) &&
506          (u4_constrained_intra_pred ? (ps_top_right_mb_syn_ele->u1_is_intra &&
507                                        !ps_top_right_mb_syn_ele->u1_base_mode_flag)
508                                     : 1));
509 
510     for(b8 = 0; b8 < 4; b8++)
511     {
512         u4_pix_x = (b8 & 0x01) << 3;
513         u4_pix_y = (b8 >> 1) << 3;
514 
515         pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) +
516                       u4_pix_x + (u4_pix_y * i4_src_strd);
517         /* when rdopt is off, we use the input as reference for constructing
518          * prediction buffer */
519         /* as opposed to using the recon pels. (open loop intra prediction) */
520         pu1_mb_a = pu1_mb_curr - 1;           /* pointer to left macro block */
521         pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
522         pu1_mb_d = pu1_mb_b - 1;              /* pointer to top left macro block */
523 
524         /* locating neighbors that are available for prediction */
525         /* TODO : update the neighbor availability information basing on constrained
526          * intra pred information */
527         /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be
528          * split in to distinct routines */
529         /* basing on neighbors available and hence evade the computation of neighbor
530          * availability totally. */
531         s_ngbr_avbl.u1_mb_a = isvce_derive_ngbr_avbl_of_mb_partitions(
532             &s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */
533         s_ngbr_avbl.u1_mb_b = isvce_derive_ngbr_avbl_of_mb_partitions(
534             &s_ngbr_avbl_MB, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */
535         s_ngbr_avbl.u1_mb_c = isvce_derive_ngbr_avbl_of_mb_partitions(
536             &s_ngbr_avbl_MB, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */
537         s_ngbr_avbl.u1_mb_d = isvce_derive_ngbr_avbl_of_mb_partitions(
538             &s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */
539 
540         /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b *
541          * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d *
542          * TOP_LEFT_MB_AVAILABLE_MASK */
543         i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
544                        (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) +
545                        (s_ngbr_avbl.u1_mb_a << 4);
546         /* if top partition is available and top right is not available for intra
547          * prediction, then */
548         /* padd top right samples using top sample and make top right also available
549          */
550         /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
551          * (s_ngbr_avbl.u1_mb_b << 2) +  ((s_ngbr_avbl.u1_mb_b |
552          * s_ngbr_avbl.u1_mb_c) << 3); */
553         ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl;
554 
555         ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8,
556                                                      i4_src_strd, i4_ngbr_avbl);
557 
558         i4_partition_cost_least = INT_MAX;
559         /* set valid intra modes for evaluation */
560         u4_valid_intra_modes = 0x1ff;
561 
562         if(!s_ngbr_avbl.u1_mb_b)
563         {
564             u4_valid_intra_modes &= ~(1 << VERT_I4x4);
565             u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4);
566             u4_valid_intra_modes &= ~(1 << VERT_L_I4x4);
567         }
568         if(!s_ngbr_avbl.u1_mb_a)
569         {
570             u4_valid_intra_modes &= ~(1 << HORZ_I4x4);
571             u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4);
572         }
573         if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d)
574         {
575             u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4);
576             u4_valid_intra_modes &= ~(1 << VERT_R_I4x4);
577             u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4);
578         }
579 
580         /* estimate the intra 8x8 mode for the current partition (for evaluating
581          * cost) */
582         if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
583         {
584             u4_estimated_intra_8x8_mode = DC_I8x8;
585         }
586         else
587         {
588             UWORD32 u4_left_intra_8x8_mode = DC_I8x8;
589             UWORD32 u4_top_intra_8x8_mode = DC_I8x8;
590 
591             if(u4_pix_x == 0)
592             {
593                 if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8)
594                 {
595                     u4_left_intra_8x8_mode =
596                         ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1];
597                 }
598                 else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4)
599                 {
600                     u4_left_intra_8x8_mode = ps_proc->s_nbr_info.ps_left_mb_intra_modes
601                                                  ->au1_intra_modes[(b8 + 1) * 4 + 2];
602                 }
603             }
604             else
605             {
606                 u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8 - 1];
607             }
608 
609             if(u4_pix_y == 0)
610             {
611                 if(ps_top_mb_syn_ele->u2_mb_type == I8x8)
612                 {
613                     u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8 + 2];
614                 }
615                 else if(ps_top_mb_syn_ele->u2_mb_type == I4x4)
616                 {
617                     u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8 + 2) * 4 + 2];
618                 }
619             }
620             else
621             {
622                 u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8 - 2];
623             }
624 
625             u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode);
626         }
627 
628         /* perform intra mode 8x8 evaluation */
629         for(u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0;
630             u4_intra_mode++, u4_valid_intra_modes >>= 1)
631         {
632             if((u4_valid_intra_modes & 1) == 0) continue;
633 
634             /* intra prediction */
635             (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0,
636                                                           i4_pred_strd, i4_ngbr_avbl);
637 
638             /* evaluate distortion between the actual blk and the estimated blk for
639              * the given mode */
640             ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd,
641                                 i4_partition_cost_least, &i4_partition_distortion);
642 
643             i4_partition_cost =
644                 i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)
645                                                ? u4_cost_one_bit
646                                                : u4_cost_four_bits);
647 
648             /* update the least cost information if necessary */
649             if(i4_partition_cost < i4_partition_cost_least)
650             {
651                 i4_partition_cost_least = i4_partition_cost;
652                 i4_partition_distortion_least = i4_partition_distortion;
653                 u4_best_intra_8x8_mode = u4_intra_mode;
654             }
655         }
656         /* macroblock distortion */
657         i4_total_cost += i4_partition_cost_least;
658         i4_total_distortion += i4_partition_distortion_least;
659         /* mb partition mode */
660         ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode;
661     }
662 
663     /* update the type of the mb if necessary */
664     if(i4_total_cost < ps_proc->i4_mb_cost)
665     {
666         ps_proc->i4_mb_cost = i4_total_cost;
667         ps_proc->i4_mb_distortion = i4_total_distortion;
668         ps_proc->ps_mb_info->u2_mb_type = I8x8;
669     }
670 }
671 
672 /**
673 ******************************************************************************
674 *
675 * @brief
676 *  evaluate best intra 4x4 mode (rate distortion opt off)
677 *
678 * @par Description
679 *  This function evaluates all the possible intra 4x4 modes and finds the mode
680 *  that best represents the macro-block (least distortion) and occupies fewer
681 *  bits in the bit-stream.
682 *
683 * @param[in]    ps_proc_ctxt
684 *  pointer to proc ctxt
685 *
686 * @remarks
687 *  Ideally the cost of encoding a macroblock is calculated as
688 *  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
689 *  input block and the reconstructed block and rate is the number of bits taken
690 *  to place the macroblock in the bit-stream. In this routine the rate does not
691 *  exactly point to the total number of bits it takes, rather it points to
692 *header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
693 *bits and residual bits fall in to texture bits the number of bits taken to
694 *encoding mbtype is considered as rate, we compute cost. Further we will
695 *approximate the distortion as the deviation b/w input and the predicted block
696 *as opposed to input and reconstructed block.
697 *
698 *  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
699 *  24*lambda is added to the SAD before comparison with the best SAD for
700 *  inter prediction. This is an empirical value to prevent using too many intra
701 *  blocks.
702 *
703 * @return      none
704 *
705 ******************************************************************************
706 */
isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t * ps_proc)707 void isvce_evaluate_intra4x4_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
708 {
709     /* Codec Context */
710     isvce_codec_t *ps_codec = ps_proc->ps_codec;
711 
712     /* SAD(distortion metric) of an 4x4 block */
713     WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
714 
715     /* lambda */
716     UWORD32 u4_lambda = ps_proc->u4_lambda;
717 
718     /* cost = distortion + lambda*rate */
719     WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
720 
721     /* cost due to mbtype */
722     UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
723 
724     /* intra mode */
725     UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
726 
727     /* neighbor pels for intra prediction */
728     UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
729 
730     /* pointer to curr partition */
731     UWORD8 *pu1_mb_curr;
732 
733     /* pointer to prediction macro block */
734     UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
735 
736     /* strides */
737     WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
738     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
739 
740     /* neighbors left, top, top right, top left */
741     UWORD8 *pu1_mb_a;
742     UWORD8 *pu1_mb_b;
743     UWORD8 *pu1_mb_c;
744     UWORD8 *pu1_mb_d;
745 
746     /* neighbor availability */
747     WORD32 i4_ngbr_avbl;
748     block_neighbors_t s_ngbr_avbl;
749 
750     /* temp vars */
751     UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
752 
753     /* ngbr sub mb modes */
754     UWORD8 *pu1_top_mb_intra_modes =
755         ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes;
756     isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
757     isvce_mb_info_t *ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
758 
759     /* valid intra modes map */
760     UWORD32 u4_valid_intra_modes;
761     UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
762 
763     UWORD32 u4_constrained_intra_pred =
764         ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
765     UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
766     if(ps_proc->ps_ngbr_avbl->u1_mb_c)
767     {
768         ps_top_right_mb_syn_ele = ps_top_mb_syn_ele + 1;
769     }
770     /* left pels */
771     u1_mb_a =
772         ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
773          (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
774                                        !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
775                                     : 1));
776 
777     /* top pels */
778     u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
779                (u4_constrained_intra_pred
780                     ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag)
781                     : 1));
782 
783     /* topleft pels */
784     u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
785                (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra &&
786                                              !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
787                                           : 1));
788 
789     /* top right */
790     u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) &&
791                (u4_constrained_intra_pred ? (ps_top_right_mb_syn_ele->u1_is_intra &&
792                                              !ps_top_right_mb_syn_ele->u1_base_mode_flag)
793                                           : 1));
794 
795     i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
796     memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
797 
798     for(b8 = 0; b8 < 4; b8++)
799     {
800         u4_blk_x = (b8 & 0x01) << 3;
801         u4_blk_y = (b8 >> 1) << 3;
802         for(b4 = 0; b4 < 4; b4++)
803         {
804             u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
805             u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
806 
807             pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) +
808                           u4_pix_x + (u4_pix_y * i4_src_strd);
809             /* when rdopt is off, we use the input as reference for constructing
810              * prediction buffer */
811             /* as opposed to using the recon pels. (open loop intra prediction) */
812             pu1_mb_a = pu1_mb_curr - 1;           /* pointer to left macro block */
813             pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
814             pu1_mb_c = pu1_mb_b + 4;              /* pointer to top macro block */
815             pu1_mb_d = pu1_mb_b - 1;              /* pointer to top left macro block */
816 
817             /* locating neighbors that are available for prediction */
818             /* TODO : update the neighbor availability information basing on
819              * constrained intra pred information */
820             /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be
821              * split in to distinct routines */
822             /* basing on neighbors available and hence evade the computation of
823              * neighbor availability totally. */
824 
825             i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
826             s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
827             s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
828             s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
829             s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
830             /* set valid intra modes for evaluation */
831             u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
832 
833             /* if top partition is available and top right is not available for intra
834              * prediction, then */
835             /* padd top right samples using top sample and make top right also
836              * available */
837             /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
838              * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b |
839              * s_ngbr_avbl.u1_mb_c) << 3); */
840 
841             /* gather prediction pels from the neighbors */
842             if(s_ngbr_avbl.u1_mb_a)
843             {
844                 for(i = 0; i < 4; i++) pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_src_strd];
845             }
846             else
847             {
848                 memset(pu1_ngbr_pels_i4, 0, 4);
849             }
850 
851             if(s_ngbr_avbl.u1_mb_b)
852             {
853                 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
854             }
855             else
856             {
857                 memset(pu1_ngbr_pels_i4 + 5, 0, 4);
858             }
859 
860             if(s_ngbr_avbl.u1_mb_d)
861                 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
862             else
863                 pu1_ngbr_pels_i4[4] = 0;
864 
865             if(s_ngbr_avbl.u1_mb_c)
866             {
867                 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
868             }
869             else if(s_ngbr_avbl.u1_mb_b)
870             {
871                 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
872                 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
873             }
874 
875             i4_partition_cost_least = INT_MAX;
876 
877             /* predict the intra 4x4 mode for the current partition (for evaluating
878              * cost) */
879             if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
880             {
881                 u4_estimated_intra_4x4_mode = DC_I4x4;
882             }
883             else
884             {
885                 UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
886                 UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
887 
888                 if(u4_pix_x == 0)
889                 {
890                     if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4)
891                     {
892                         u4_left_intra_4x4_mode =
893                             ps_proc->s_nbr_info.ps_left_mb_intra_modes
894                                 ->au1_intra_modes[gau1_raster_to_zscan_map[3 + u4_pix_y]];
895                     }
896                     else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8)
897                     {
898                         u4_left_intra_4x4_mode =
899                             ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1];
900                     }
901                 }
902                 else
903                 {
904                     u4_left_intra_4x4_mode =
905                         ps_proc->au1_intra_luma_mb_4x4_modes
906                             [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 1]];
907                 }
908 
909                 if(u4_pix_y == 0)
910                 {
911                     if(ps_top_mb_syn_ele->u2_mb_type == I4x4)
912                     {
913                         u4_top_intra_4x4_mode =
914                             pu1_top_mb_intra_modes[gau1_raster_to_zscan_map[12 + (u4_pix_x >> 2)]];
915                     }
916                     else if(ps_top_mb_syn_ele->u2_mb_type == I8x8)
917                     {
918                         u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
919                     }
920                 }
921                 else
922                 {
923                     u4_top_intra_4x4_mode =
924                         ps_proc->au1_intra_luma_mb_4x4_modes
925                             [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 4]];
926                 }
927 
928                 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
929             }
930 
931             ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] =
932                 u4_estimated_intra_4x4_mode;
933 
934             /* mode evaluation and prediction */
935             ps_codec->pf_ih264e_evaluate_intra_4x4_modes(
936                 pu1_mb_curr, pu1_ngbr_pels_i4, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_ngbr_avbl,
937                 &u4_best_intra_4x4_mode, &i4_partition_cost_least, u4_valid_intra_modes, u4_lambda,
938                 u4_estimated_intra_4x4_mode);
939 
940             i4_partition_distortion_least =
941                 i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)
942                                                ? u4_cost_one_bit
943                                                : u4_cost_four_bits);
944 
945             DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least,
946                   u4_best_intra_4x4_mode);
947             /* macroblock distortion */
948             i4_total_distortion += i4_partition_distortion_least;
949             i4_total_cost += i4_partition_cost_least;
950             /* mb partition mode */
951             ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
952         }
953     }
954 
955     /* update the type of the mb if necessary */
956     if(i4_total_cost < ps_proc->i4_mb_cost)
957     {
958         ps_proc->i4_mb_cost = i4_total_cost;
959         ps_proc->i4_mb_distortion = i4_total_distortion;
960         ps_proc->ps_mb_info->u2_mb_type = I4x4;
961     }
962 }
963 
964 /**
965 ******************************************************************************
966 *
967 * @brief evaluate best intra 4x4 mode (rate distortion opt on)
968 *
969 * @par Description
970 *  This function evaluates all the possible intra 4x4 modes and finds the mode
971 *  that best represents the macro-block (least distortion) and occupies fewer
972 *  bits in the bit-stream.
973 *
974 * @param[in]    ps_proc_ctxt
975 *  pointer to proc ctxt
976 *
977 * @remarks
978 *  Ideally the cost of encoding a macroblock is calculated as
979 *  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
980 *  input block and the reconstructed block and rate is the number of bits taken
981 *  to place the macroblock in the bit-stream. In this routine the rate does not
982 *  exactly point to the total number of bits it takes, rather it points to
983 *header bits necessary for encoding the macroblock. Assuming the deltaQP, cbp
984 *bits and residual bits fall in to texture bits the number of bits taken to
985 *encoding mbtype is considered as rate, we compute cost. Further we will
986 *approximate the distortion as the deviation b/w input and the predicted block
987 *as opposed to input and reconstructed block.
988 *
989 *  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
990 *  24*lambda is added to the SAD before comparison with the best SAD for
991 *  inter prediction. This is an empirical value to prevent using too many intra
992 *  blocks.
993 *
994 * @return      none
995 *
996 ******************************************************************************
997 */
isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(isvce_process_ctxt_t * ps_proc)998 void isvce_evaluate_intra4x4_modes_for_least_cost_rdopton(isvce_process_ctxt_t *ps_proc)
999 {
1000     block_neighbors_t s_ngbr_avbl;
1001     buffer_container_t s_src;
1002     buffer_container_t s_pred;
1003     buffer_container_t s_recon;
1004     buffer_container_t s_quant_coeffs;
1005     buffer_container_t s_res_pred;
1006 
1007     /* neighbors left, top, top right, top left */
1008     UWORD8 *pu1_mb_a;
1009     UWORD8 *pu1_mb_b;
1010     UWORD8 *pu1_mb_c;
1011     UWORD8 *pu1_mb_d;
1012     UWORD8 *pu1_mb_curr;
1013     UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top;
1014     UWORD8 *pu1_ref_mb_intra_4x4;
1015     WORD32 i4_ref_strd_left, i4_ref_strd_top;
1016     WORD32 i4_ngbr_avbl;
1017     UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
1018     /* valid intra modes map */
1019     UWORD32 u4_valid_intra_modes;
1020     /* Dummy variable for 4x4 trans function */
1021     WORD16 i2_dc_dummy;
1022     UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
1023 
1024     isvce_codec_t *ps_codec = ps_proc->ps_codec;
1025     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1026     isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
1027     isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1;
1028     isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1;
1029     isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
1030     enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
1031     resi_trans_quant_constants_t s_resi_trans_quant_constants = {
1032         .pu2_scale_matrix = ps_qp_params->pu2_scale_mat,
1033         .pu2_threshold_matrix = ps_qp_params->pu2_thres_mat,
1034         .u4_qbits = ps_qp_params->u1_qbits,
1035         .u4_round_factor = ps_qp_params->u4_dead_zone};
1036     iq_it_res_rec_constants_t s_iq_it_res_rec_constants = {
1037         .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat,
1038         .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat,
1039         .u4_qp_div_6 = ps_qp_params->u1_qp_div};
1040 
1041     const UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
1042     WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
1043     UWORD32 u4_lambda = ps_proc->u4_lambda;
1044     WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
1045     /* cost due to mbtype */
1046     UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
1047     UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
1048     UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1049     WORD16 *pi2_quant_coeffs = ps_proc->pi2_res_buf_intra_4x4;
1050     UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1051     WORD32 i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
1052     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1053     UWORD8 *pu1_nnz = (UWORD8 *) ps_proc->au4_nnz_intra_4x4;
1054     UWORD8 *pu1_top_mb_intra_modes =
1055         ps_proc->s_nbr_info.ps_top_mb_intra_modes[ps_proc->i4_mb_x].au1_intra_modes;
1056     UWORD32 u4_constrained_intra_pred =
1057         ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
1058     UWORD8 u1_resi_trans_fxn_idx = isvc_get_resi_trans_quant_variant_idx(0);
1059     UWORD8 u1_iq_it_recon_fxn_idx = isvc_get_iq_it_recon_variant_idx(1, 0);
1060 
1061     s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[Y];
1062 
1063     /* compute ngbr availability for sub blks */
1064     if(ps_proc->ps_ngbr_avbl->u1_mb_c)
1065     {
1066         ps_top_right_mb = ps_top_mb + 1;
1067     }
1068 
1069     /* left pels */
1070     u1_mb_a =
1071         ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
1072          (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
1073                                        !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
1074                                     : 1));
1075 
1076     /* top pels */
1077     u1_mb_b =
1078         ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
1079          (u4_constrained_intra_pred ? (ps_top_mb->u1_is_intra && !ps_top_mb->u1_base_mode_flag)
1080                                     : 1));
1081 
1082     /* topleft pels */
1083     u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
1084                (u4_constrained_intra_pred
1085                     ? (ps_top_left_mb->u1_is_intra && !ps_top_left_mb->u1_base_mode_flag)
1086                     : 1));
1087 
1088     /* top right pels */
1089     u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) &&
1090                (u4_constrained_intra_pred
1091                     ? (ps_top_right_mb->u1_is_intra && !ps_top_right_mb->u1_base_mode_flag)
1092                     : 1));
1093 
1094     i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
1095     memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
1096 
1097     for(b8 = 0; b8 < 4; b8++)
1098     {
1099         u4_blk_x = (b8 & 0x01) << 3;
1100         u4_blk_y = (b8 >> 1) << 3;
1101         for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_quant_coeffs += MB_SIZE)
1102         {
1103             u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
1104             u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
1105 
1106             pu1_ref_mb_intra_4x4 =
1107                 ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd);
1108             pu1_mb_curr = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) +
1109                           u4_pix_x + (u4_pix_y * i4_src_strd);
1110             pu1_pred_mb = ps_proc->pu1_pred_mb + u4_pix_x + (u4_pix_y * i4_pred_strd);
1111             if(u4_pix_x == 0)
1112             {
1113                 i4_ref_strd_left = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
1114                 pu1_mb_ref_left =
1115                     ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + u4_pix_x +
1116                     (u4_pix_y * i4_ref_strd_left);
1117             }
1118             else
1119             {
1120                 i4_ref_strd_left = i4_pred_strd;
1121                 pu1_mb_ref_left = pu1_ref_mb_intra_4x4;
1122             }
1123             if(u4_pix_y == 0)
1124             {
1125                 i4_ref_strd_top = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
1126                 pu1_mb_ref_top =
1127                     ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + u4_pix_x +
1128                     (u4_pix_y * i4_ref_strd_top);
1129             }
1130             else
1131             {
1132                 i4_ref_strd_top = i4_pred_strd;
1133                 pu1_mb_ref_top = pu1_ref_mb_intra_4x4;
1134             }
1135 
1136             pu1_mb_a = pu1_mb_ref_left - 1;              /* pointer to left macro block */
1137             pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */
1138             pu1_mb_c = pu1_mb_b + 4;                     /* pointer to top right macro block */
1139             if(u4_pix_y == 0)
1140                 pu1_mb_d = pu1_mb_b - 1;
1141             else
1142                 pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */
1143 
1144             /* locating neighbors that are available for prediction */
1145             /* TODO : update the neighbor availability information basing on
1146              * constrained intra pred information */
1147             /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be
1148              * split in to distinct routines */
1149             /* basing on neighbors available and hence evade the computation of
1150              * neighbor availability totally. */
1151 
1152             i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1153             s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
1154             s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
1155             s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
1156             s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
1157             /* set valid intra modes for evaluation */
1158             u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
1159 
1160             /* if top partition is available and top right is not available for intra
1161              * prediction, then */
1162             /* padd top right samples using top sample and make top right also
1163              * available */
1164             /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) +
1165              * (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b |
1166              * s_ngbr_avbl.u1_mb_c) << 3); */
1167 
1168             /* gather prediction pels from the neighbors */
1169             if(s_ngbr_avbl.u1_mb_a)
1170             {
1171                 for(i = 0; i < 4; i++) pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_ref_strd_left];
1172             }
1173             else
1174             {
1175                 memset(pu1_ngbr_pels_i4, 0, 4);
1176             }
1177             if(s_ngbr_avbl.u1_mb_b)
1178             {
1179                 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1180             }
1181             else
1182             {
1183                 memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4);
1184             }
1185             if(s_ngbr_avbl.u1_mb_d)
1186                 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1187             else
1188                 pu1_ngbr_pels_i4[4] = 0;
1189             if(s_ngbr_avbl.u1_mb_c)
1190             {
1191                 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
1192             }
1193             else if(s_ngbr_avbl.u1_mb_b)
1194             {
1195                 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
1196                 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
1197             }
1198 
1199             i4_partition_cost_least = INT_MAX;
1200 
1201             /* predict the intra 4x4 mode for the current partition (for evaluating
1202              * cost) */
1203             if(!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
1204             {
1205                 u4_estimated_intra_4x4_mode = DC_I4x4;
1206             }
1207             else
1208             {
1209                 UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
1210                 UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
1211 
1212                 if(u4_pix_x == 0)
1213                 {
1214                     if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I4x4)
1215                     {
1216                         u4_left_intra_4x4_mode =
1217                             ps_proc->s_nbr_info.ps_left_mb_intra_modes
1218                                 ->au1_intra_modes[gau1_raster_to_zscan_map[3 + u4_pix_y]];
1219                     }
1220                     else if(ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == I8x8)
1221                     {
1222                         u4_left_intra_4x4_mode =
1223                             ps_proc->s_nbr_info.ps_left_mb_intra_modes->au1_intra_modes[b8 + 1];
1224                     }
1225                 }
1226                 else
1227                 {
1228                     u4_left_intra_4x4_mode =
1229                         ps_proc->au1_intra_luma_mb_4x4_modes
1230                             [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 1]];
1231                 }
1232 
1233                 if(u4_pix_y == 0)
1234                 {
1235                     if(ps_top_mb->u2_mb_type == I4x4)
1236                     {
1237                         u4_top_intra_4x4_mode =
1238                             pu1_top_mb_intra_modes[gau1_raster_to_zscan_map[12 + (u4_pix_x >> 2)]];
1239                     }
1240                     else if(ps_top_mb->u2_mb_type == I8x8)
1241                     {
1242                         u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
1243                     }
1244                 }
1245                 else
1246                 {
1247                     u4_top_intra_4x4_mode =
1248                         ps_proc->au1_intra_luma_mb_4x4_modes
1249                             [gau1_raster_to_zscan_map[(u4_pix_x >> 2) + u4_pix_y - 4]];
1250                 }
1251 
1252                 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
1253             }
1254 
1255             ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] =
1256                 u4_estimated_intra_4x4_mode;
1257 
1258             /*mode evaluation and prediction*/
1259             ps_codec->pf_ih264e_evaluate_intra_4x4_modes(
1260                 pu1_mb_curr, pu1_ngbr_pels_i4, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_ngbr_avbl,
1261                 &u4_best_intra_4x4_mode, &i4_partition_cost_least, u4_valid_intra_modes, u4_lambda,
1262                 u4_estimated_intra_4x4_mode);
1263 
1264             i4_partition_distortion_least =
1265                 i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)
1266                                                ? u4_cost_one_bit
1267                                                : u4_cost_four_bits);
1268 
1269             DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least,
1270                   u4_best_intra_4x4_mode);
1271 
1272             /* macroblock distortion */
1273             i4_total_distortion += i4_partition_distortion_least;
1274             i4_total_cost += i4_partition_cost_least;
1275 
1276             /* mb partition mode */
1277             ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
1278 
1279             /********************************************************/
1280             /*  error estimation,                                   */
1281             /*  transform                                           */
1282             /*  quantization                                        */
1283             /********************************************************/
1284             s_src.pv_data = pu1_mb_curr;
1285             s_src.i4_data_stride = i4_src_strd;
1286 
1287             s_pred.pv_data = pu1_pred_mb;
1288             s_pred.i4_data_stride = i4_pred_strd;
1289 
1290             s_quant_coeffs.pv_data = pi2_quant_coeffs;
1291             s_quant_coeffs.i4_data_stride = 4;
1292 
1293             ps_enc_loop_fxns->apf_resi_trans_quant_4x4[u1_resi_trans_fxn_idx](
1294                 &s_src, &s_pred, &s_quant_coeffs, &s_res_pred,
1295                 /* No op stride, this implies a buff of lenght 1x16 */
1296                 &s_resi_trans_quant_constants, pu1_nnz, &i2_dc_dummy, 0);
1297 
1298             /********************************************************/
1299             /*  ierror estimation,                                  */
1300             /*  itransform                                          */
1301             /*  iquantization                                       */
1302             /********************************************************/
1303 
1304             /* Tx blk coeffs are stored blk by blk */
1305             /* Hence, in order to access rows of each Tx blk, one needs to stride of
1306              * TxxSize */
1307             s_quant_coeffs.i4_data_stride = 4;
1308 
1309             s_recon.pv_data = pu1_ref_mb_intra_4x4;
1310             s_recon.i4_data_stride = i4_pred_strd;
1311 
1312             ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[u1_iq_it_recon_fxn_idx](
1313                 &s_quant_coeffs, &s_pred, &s_res_pred, &s_res_pred, &s_recon,
1314                 &s_iq_it_res_rec_constants, ps_proc->pv_scratch_buff, s_quant_coeffs.pv_data, 0, 0);
1315         }
1316     }
1317 
1318     /* update the type of the mb if necessary */
1319     if(i4_total_cost < ps_proc->i4_mb_cost)
1320     {
1321         ps_proc->i4_mb_cost = i4_total_cost;
1322         ps_proc->i4_mb_distortion = i4_total_distortion;
1323         ps_proc->ps_mb_info->u2_mb_type = I4x4;
1324     }
1325 }
1326 
1327 /**
1328 ******************************************************************************
1329 *
1330 * @brief
1331 *  evaluate best chroma intra 8x8 mode (rate distortion opt off)
1332 *
1333 * @par Description
1334 *  This function evaluates all the possible chroma intra 8x8 modes and finds
1335 *  the mode that best represents the macroblock (least distortion) and occupies
1336 *  fewer bits in the bitstream.
1337 *
1338 * @param[in] ps_proc_ctxt
1339 *  pointer to macroblock context (handle)
1340 *
1341 * @remarks
1342 *  For chroma best intra pred mode is calculated based only on SAD
1343 *
1344 * @returns none
1345 *
1346 ******************************************************************************
1347 */
1348 
isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t * ps_proc)1349 void isvce_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(isvce_process_ctxt_t *ps_proc)
1350 {
1351     /* Codec Context */
1352     isvce_codec_t *ps_codec = ps_proc->ps_codec;
1353     isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
1354     mem_fxns_t *ps_mem_fxns = &ps_isa_dependent_fxns->s_mem_fxns;
1355 
1356     /* SAD(distortion metric) of an 8x8 block */
1357     WORD32 i4_mb_distortion, i4_chroma_mb_distortion;
1358 
1359     /* intra mode */
1360     UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8;
1361 
1362     /* neighbor pels for intra prediction */
1363     UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels;
1364 
1365     /* pointer to curr macro block */
1366     UWORD8 *pu1_curr_mb = ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data);
1367     UWORD8 *pu1_ref_mb = ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data);
1368 
1369     /* pointer to prediction macro block */
1370     UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
1371     UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
1372 
1373     /* strides */
1374     WORD32 i4_src_strd_c = ps_proc->s_src_buf_props.as_component_bufs[1].i4_data_stride;
1375     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1376     WORD32 i4_rec_strd_c = ps_proc->s_rec_buf_props.as_component_bufs[1].i4_data_stride;
1377 
1378     /* neighbors left, top, top left */
1379     UWORD8 *pu1_mb_a = pu1_ref_mb - 2;
1380     UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c;
1381     UWORD8 *pu1_mb_d = pu1_mb_b - 2;
1382 
1383     /* neighbor availability */
1384     const UWORD8 u1_valid_intra_modes[8] = {1, 3, 1, 3, 5, 7, 5, 15};
1385     WORD32 i4_ngbr_avbl;
1386 
1387     /* valid intra modes map */
1388     UWORD32 u4_valid_intra_modes;
1389     isvce_mb_info_t *ps_top_mb_syn_ele = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
1390 
1391     /* temp var */
1392     UWORD8 i;
1393     UWORD32 u4_constrained_intra_pred =
1394         ps_codec->au4_constrained_intra_pred[ps_proc->u1_spatial_layer_id];
1395     UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
1396     /* locating neighbors that are available for prediction */
1397 
1398     /* gather prediction pels from the neighbors */
1399     /* left pels */
1400     u1_mb_a =
1401         ((ps_proc->ps_ngbr_avbl->u1_mb_a) &&
1402          (u4_constrained_intra_pred ? (ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra &&
1403                                        !ps_proc->s_nbr_info.ps_left_mb_info->u1_base_mode_flag)
1404                                     : 1));
1405     if(u1_mb_a)
1406     {
1407         for(i = 0; i < 16; i += 2)
1408         {
1409             pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c];
1410             pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1];
1411         }
1412     }
1413     else
1414     {
1415         ps_mem_fxns->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE);
1416     }
1417 
1418     /* top pels */
1419     u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) &&
1420                (u4_constrained_intra_pred
1421                     ? (ps_top_mb_syn_ele->u1_is_intra && !ps_top_mb_syn_ele->u1_base_mode_flag)
1422                     : 1));
1423     if(u1_mb_b)
1424     {
1425         ps_mem_fxns->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16);
1426     }
1427     else
1428     {
1429         ps_mem_fxns->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE);
1430     }
1431 
1432     /* top left pels */
1433     u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) &&
1434                (u4_constrained_intra_pred ? (ps_top_mb_syn_ele[-1].u1_is_intra &&
1435                                              !ps_top_mb_syn_ele[-1].u1_base_mode_flag)
1436                                           : 1));
1437     if(u1_mb_d)
1438     {
1439         pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d;
1440         pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1);
1441     }
1442     i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
1443     ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl;
1444 
1445     u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
1446 
1447     if(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
1448        ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
1449         u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8);
1450 
1451     i4_chroma_mb_distortion = INT_MAX;
1452 
1453     /* perform intra mode chroma  8x8 evaluation */
1454     /* intra prediction */
1455     ps_codec->pf_ih264e_evaluate_intra_chroma_modes(
1456         pu1_curr_mb, pu1_ngbr_pels_c_i8x8, pu1_pred_mb, i4_src_strd_c, i4_pred_strd, i4_ngbr_avbl,
1457         &u4_best_chroma_intra_8x8_mode, &i4_chroma_mb_distortion, u4_valid_intra_modes);
1458 
1459     if(u4_valid_intra_modes & 8) /* if Chroma PLANE is valid*/
1460     {
1461         (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0,
1462                                                     i4_pred_strd, i4_ngbr_avbl);
1463 
1464         /* evaluate distortion(sad) */
1465         ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd,
1466                                       i4_chroma_mb_distortion, &i4_mb_distortion);
1467 
1468         /* update the least distortion information if necessary */
1469         if(i4_mb_distortion < i4_chroma_mb_distortion)
1470         {
1471             i4_chroma_mb_distortion = i4_mb_distortion;
1472             u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8;
1473         }
1474     }
1475 
1476     DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion,
1477           u4_best_chroma_intra_8x8_mode);
1478 
1479     ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode;
1480 }
1481 
1482 /**
1483 ******************************************************************************
1484 *
1485 * @brief
1486 *  Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
1487 *  prediction.
1488 *
1489 * @par Description
1490 *  This function evaluates first three 16x16 modes and compute corresponding sad
1491 *  and return the buffer predicted with best mode.
1492 *
1493 * @param[in] pu1_src
1494 *  UWORD8 pointer to the source
1495 *
1496 * @param[in] pu1_ngbr_pels_i16
1497 *  UWORD8 pointer to neighbouring pels
1498 *
1499 * @param[out] pu1_dst
1500 *  UWORD8 pointer to the destination
1501 *
1502 * @param[in] src_strd
1503 *  integer source stride
1504 *
1505 * @param[in] dst_strd
1506 *  integer destination stride
1507 *
1508 * @param[in] u4_n_avblty
1509 *  availability of neighbouring pixels
1510 *
1511 * @param[in] u4_intra_mode
1512 *  Pointer to the variable in which best mode is returned
1513 *
1514 * @param[in] pu4_sadmin
1515 *  Pointer to the variable in which minimum sad is returned
1516 *
1517 * @param[in] u4_valid_intra_modes
1518 *  Says what all modes are valid
1519 *
1520 * @returns      none
1521 *
1522 ******************************************************************************
1523 */
isvce_evaluate_intra16x16_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels_i16,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes)1524 void isvce_evaluate_intra16x16_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16, UWORD8 *pu1_dst,
1525                                      UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty,
1526                                      UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin,
1527                                      UWORD32 u4_valid_intra_modes)
1528 {
1529     UWORD8 *pu1_neighbour;
1530     UWORD8 *pu1_src_temp = pu1_src;
1531     UWORD8 left = 0, top = 0;
1532     WORD32 u4_dcval = 0;
1533     WORD32 i, j;
1534     WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX;
1535     UWORD8 val;
1536 
1537     left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1538     top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1539 
1540     /* left available */
1541     if(left)
1542     {
1543         i4_sad_horz = 0;
1544 
1545         for(i = 0; i < 16; i++)
1546         {
1547             val = pu1_ngbr_pels_i16[15 - i];
1548 
1549             u4_dcval += val;
1550 
1551             for(j = 0; j < 16; j++)
1552             {
1553                 i4_sad_horz += ABS(val - pu1_src_temp[j]);
1554             }
1555 
1556             pu1_src_temp += src_strd;
1557         }
1558         u4_dcval += 8;
1559     }
1560 
1561     pu1_src_temp = pu1_src;
1562     /* top available */
1563     if(top)
1564     {
1565         i4_sad_vert = 0;
1566 
1567         for(i = 0; i < 16; i++)
1568         {
1569             u4_dcval += pu1_ngbr_pels_i16[17 + i];
1570 
1571             for(j = 0; j < 16; j++)
1572             {
1573                 i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]);
1574             }
1575             pu1_src_temp += src_strd;
1576         }
1577         u4_dcval += 8;
1578     }
1579 
1580     u4_dcval = (u4_dcval) >> (3 + left + top);
1581 
1582     pu1_src_temp = pu1_src;
1583 
1584     /* none available */
1585     u4_dcval += (left == 0) * (top == 0) * 128;
1586 
1587     i4_sad_dc = 0;
1588 
1589     for(i = 0; i < 16; i++)
1590     {
1591         for(j = 0; j < 16; j++)
1592         {
1593             i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]);
1594         }
1595         pu1_src_temp += src_strd;
1596     }
1597 
1598     if((u4_valid_intra_modes & 04) == 0) /* If DC is disabled */
1599         i4_sad_dc = INT_MAX;
1600 
1601     if((u4_valid_intra_modes & 01) == 0) /* If VERT is disabled */
1602         i4_sad_vert = INT_MAX;
1603 
1604     if((u4_valid_intra_modes & 02) == 0) /* If HORZ is disabled */
1605         i4_sad_horz = INT_MAX;
1606 
1607     i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
1608 
1609     /* Finding Minimum sad and doing corresponding prediction */
1610     if(i4_min_sad < *pu4_sadmin)
1611     {
1612         *pu4_sadmin = i4_min_sad;
1613         if(i4_min_sad == i4_sad_vert)
1614         {
1615             *u4_intra_mode = VERT_I16x16;
1616             pu1_neighbour = pu1_ngbr_pels_i16 + 17;
1617             for(j = 0; j < 16; j++)
1618             {
1619                 memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
1620                 pu1_dst += dst_strd;
1621             }
1622         }
1623         else if(i4_min_sad == i4_sad_horz)
1624         {
1625             *u4_intra_mode = HORZ_I16x16;
1626             for(j = 0; j < 16; j++)
1627             {
1628                 val = pu1_ngbr_pels_i16[15 - j];
1629                 memset(pu1_dst, val, MB_SIZE);
1630                 pu1_dst += dst_strd;
1631             }
1632         }
1633         else
1634         {
1635             *u4_intra_mode = DC_I16x16;
1636             for(j = 0; j < 16; j++)
1637             {
1638                 memset(pu1_dst, u4_dcval, MB_SIZE);
1639                 pu1_dst += dst_strd;
1640             }
1641         }
1642     }
1643 }
1644 
1645 /**
1646 ******************************************************************************
1647 *
1648 * @brief
1649 *  Evaluate best intra 4x4 mode and perform prediction.
1650 *
1651 * @par Description
1652 *  This function evaluates  4x4 modes and compute corresponding sad
1653 *  and return the buffer predicted with best mode.
1654 *
1655 * @param[in] pu1_src
1656 *  UWORD8 pointer to the source
1657 *
1658 * @param[in] pu1_ngbr_pels
1659 *  UWORD8 pointer to neighbouring pels
1660 *
1661 * @param[out] pu1_dst
1662 *  UWORD8 pointer to the destination
1663 *
1664 * @param[in] src_strd
1665 *  integer source stride
1666 *
1667 * @param[in] dst_strd
1668 *  integer destination stride
1669 *
1670 * @param[in] u4_n_avblty
1671 *  availability of neighbouring pixels
1672 *
1673 * @param[in] u4_intra_mode
1674 *  Pointer to the variable in which best mode is returned
1675 *
1676 * @param[in] pu4_sadmin
1677 *  Pointer to the variable in which minimum cost is returned
1678 *
1679 * @param[in] u4_valid_intra_modes
1680 *  Says what all modes are valid
1681 *
1682 * @param[in] u4_lambda
1683 *  Lamda value for computing cost from SAD
1684 *
1685 * @param[in] u4_predictd_mode
1686 *  Predicted mode for cost computation
1687 *
1688 * @returns      none
1689 *
1690 ******************************************************************************
1691 */
isvce_evaluate_intra_4x4_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes,UWORD32 u4_lambda,UWORD32 u4_predictd_mode)1692 void isvce_evaluate_intra_4x4_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst,
1693                                     UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty,
1694                                     UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin,
1695                                     UWORD32 u4_valid_intra_modes, UWORD32 u4_lambda,
1696                                     UWORD32 u4_predictd_mode)
1697 {
1698     UWORD8 *pu1_src_temp = pu1_src;
1699     UWORD8 *pu1_pred = pu1_ngbr_pels;
1700     UWORD8 left = 0, top = 0;
1701     UWORD8 u1_pred_val = 0;
1702     UWORD8 u1_pred_vals[4] = {0};
1703     UWORD8 *pu1_pred_val = NULL;
1704     /* To store FILT121 operated values*/
1705     UWORD8 u1_pred_vals_diag_121[15] = {0};
1706     /* To store FILT11 operated values*/
1707     UWORD8 u1_pred_vals_diag_11[15] = {0};
1708     UWORD8 u1_pred_vals_vert_r[8] = {0};
1709     UWORD8 u1_pred_vals_horz_d[10] = {0};
1710     UWORD8 u1_pred_vals_horz_u[10] = {0};
1711     WORD32 u4_dcval = 0;
1712     WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1713                                INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1714 
1715     WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1716                                 INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1717     WORD32 i, i4_min_cost = INT_MAX;
1718 
1719     left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1720     top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1721 
1722     /* Computing SAD */
1723 
1724     /* VERT mode valid */
1725     if(u4_valid_intra_modes & 1)
1726     {
1727         pu1_pred = pu1_ngbr_pels + 5;
1728         i4_sad[VERT_I4x4] = 0;
1729         i4_cost[VERT_I4x4] = 0;
1730 
1731         USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1732         pu1_src_temp += src_strd;
1733         USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1734         pu1_src_temp += src_strd;
1735         USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1736         pu1_src_temp += src_strd;
1737         USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1738 
1739         i4_cost[VERT_I4x4] =
1740             i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? u4_lambda : 4 * u4_lambda);
1741     }
1742 
1743     /* HORZ mode valid */
1744     if(u4_valid_intra_modes & 2)
1745     {
1746         i4_sad[HORZ_I4x4] = 0;
1747         i4_cost[HORZ_I4x4] = 0;
1748         pu1_src_temp = pu1_src;
1749 
1750         u1_pred_val = pu1_ngbr_pels[3];
1751 
1752         i4_sad[HORZ_I4x4] +=
1753             ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
1754             ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
1755         pu1_src_temp += src_strd;
1756 
1757         u1_pred_val = pu1_ngbr_pels[2];
1758 
1759         i4_sad[HORZ_I4x4] +=
1760             ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
1761             ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
1762         pu1_src_temp += src_strd;
1763 
1764         u1_pred_val = pu1_ngbr_pels[1];
1765 
1766         i4_sad[HORZ_I4x4] +=
1767             ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
1768             ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
1769         pu1_src_temp += src_strd;
1770 
1771         u1_pred_val = pu1_ngbr_pels[0];
1772 
1773         i4_sad[HORZ_I4x4] +=
1774             ABS(pu1_src_temp[0] - u1_pred_val) + ABS(pu1_src_temp[1] - u1_pred_val) +
1775             ABS(pu1_src_temp[2] - u1_pred_val) + ABS(pu1_src_temp[3] - u1_pred_val);
1776 
1777         i4_cost[HORZ_I4x4] =
1778             i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? u4_lambda : 4 * u4_lambda);
1779     }
1780 
1781     /* DC mode valid */
1782     if(u4_valid_intra_modes & 4)
1783     {
1784         i4_sad[DC_I4x4] = 0;
1785         i4_cost[DC_I4x4] = 0;
1786         pu1_src_temp = pu1_src;
1787 
1788         if(left)
1789             u4_dcval =
1790                 pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2] + pu1_ngbr_pels[3] + 2;
1791         if(top)
1792             u4_dcval +=
1793                 pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7] + pu1_ngbr_pels[8] + 2;
1794 
1795         u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128;
1796 
1797         /* none available */
1798         memset(u1_pred_vals, u4_dcval, 4);
1799         USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1800         pu1_src_temp += src_strd;
1801         USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1802         pu1_src_temp += src_strd;
1803         USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1804         pu1_src_temp += src_strd;
1805         USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1806         pu1_src_temp += src_strd;
1807 
1808         i4_cost[DC_I4x4] =
1809             i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? u4_lambda : 4 * u4_lambda);
1810     }
1811 
1812     /* if modes other than VERT, HORZ and DC are  valid */
1813     if(u4_valid_intra_modes > 7)
1814     {
1815         pu1_pred = pu1_ngbr_pels;
1816         pu1_pred[13] = pu1_pred[14] = pu1_pred[12];
1817 
1818         /* Performing FILT121 and FILT11 operation for all neighbour values*/
1819         for(i = 0; i < 13; i++)
1820         {
1821             u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]);
1822             u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]);
1823 
1824             pu1_pred++;
1825         }
1826 
1827         if(u4_valid_intra_modes & 8) /* DIAG_DL */
1828         {
1829             i4_sad[DIAG_DL_I4x4] = 0;
1830             i4_cost[DIAG_DL_I4x4] = 0;
1831             pu1_src_temp = pu1_src;
1832             pu1_pred_val = u1_pred_vals_diag_121 + 5;
1833 
1834             USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]);
1835             pu1_src_temp += src_strd;
1836             USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]);
1837             pu1_src_temp += src_strd;
1838             USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]);
1839             pu1_src_temp += src_strd;
1840             USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]);
1841             pu1_src_temp += src_strd;
1842             i4_cost[DIAG_DL_I4x4] =
1843                 i4_sad[DIAG_DL_I4x4] +
1844                 ((u4_predictd_mode == DIAG_DL_I4x4) ? u4_lambda : 4 * u4_lambda);
1845         }
1846 
1847         if(u4_valid_intra_modes & 16) /* DIAG_DR */
1848         {
1849             i4_sad[DIAG_DR_I4x4] = 0;
1850             i4_cost[DIAG_DR_I4x4] = 0;
1851             pu1_src_temp = pu1_src;
1852             pu1_pred_val = u1_pred_vals_diag_121 + 3;
1853 
1854             USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]);
1855             pu1_src_temp += src_strd;
1856             USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]);
1857             pu1_src_temp += src_strd;
1858             USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]);
1859             pu1_src_temp += src_strd;
1860             USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]);
1861             pu1_src_temp += src_strd;
1862             i4_cost[DIAG_DR_I4x4] =
1863                 i4_sad[DIAG_DR_I4x4] +
1864                 ((u4_predictd_mode == DIAG_DR_I4x4) ? u4_lambda : 4 * u4_lambda);
1865         }
1866 
1867         if(u4_valid_intra_modes & 32) /* VERT_R mode valid ????*/
1868         {
1869             i4_sad[VERT_R_I4x4] = 0;
1870 
1871             pu1_src_temp = pu1_src;
1872             u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2];
1873             memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3);
1874             u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1];
1875             memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3);
1876 
1877             pu1_pred_val = u1_pred_vals_diag_11 + 4;
1878             USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1879             pu1_pred_val = u1_pred_vals_diag_121 + 3;
1880             pu1_src_temp += src_strd;
1881             USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1882             pu1_src_temp += src_strd;
1883             USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]);
1884             pu1_src_temp += src_strd;
1885             USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4), i4_sad[VERT_R_I4x4]);
1886 
1887             i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] +
1888                                    ((u4_predictd_mode == VERT_R_I4x4) ? u4_lambda : 4 * u4_lambda);
1889         }
1890 
1891         if(u4_valid_intra_modes & 64) /* HORZ_D mode valid ????*/
1892         {
1893             i4_sad[HORZ_D_I4x4] = 0;
1894 
1895             pu1_src_temp = pu1_src;
1896             u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3];
1897             memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3);
1898             u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0];
1899             u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0];
1900             u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1];
1901             u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1];
1902             u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2];
1903             u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2];
1904 
1905             pu1_pred_val = u1_pred_vals_horz_d;
1906             USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]);
1907             pu1_src_temp += src_strd;
1908             USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]);
1909             pu1_src_temp += src_strd;
1910             USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]);
1911             pu1_src_temp += src_strd;
1912             USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]);
1913 
1914             i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] +
1915                                    ((u4_predictd_mode == HORZ_D_I4x4) ? u4_lambda : 4 * u4_lambda);
1916         }
1917 
1918         if(u4_valid_intra_modes & 128) /* VERT_L mode valid ????*/
1919         {
1920             i4_sad[VERT_L_I4x4] = 0;
1921             pu1_src_temp = pu1_src;
1922             pu1_pred_val = u1_pred_vals_diag_11 + 5;
1923             USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1924             pu1_src_temp += src_strd;
1925             pu1_pred_val = u1_pred_vals_diag_121 + 5;
1926             USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1927             pu1_src_temp += src_strd;
1928             pu1_pred_val = u1_pred_vals_diag_11 + 6;
1929             USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1930             pu1_src_temp += src_strd;
1931             pu1_pred_val = u1_pred_vals_diag_121 + 6;
1932             USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1933 
1934             i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] +
1935                                    ((u4_predictd_mode == VERT_L_I4x4) ? u4_lambda : 4 * u4_lambda);
1936         }
1937 
1938         if(u4_valid_intra_modes & 256) /* HORZ_U mode valid ????*/
1939         {
1940             i4_sad[HORZ_U_I4x4] = 0;
1941             pu1_src_temp = pu1_src;
1942             u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2];
1943             u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1];
1944             u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1];
1945             u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0];
1946             u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0];
1947             u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]);
1948 
1949             memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4);
1950 
1951             pu1_pred_val = u1_pred_vals_horz_u;
1952             USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]);
1953             pu1_src_temp += src_strd;
1954             USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]);
1955             pu1_src_temp += src_strd;
1956             USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]);
1957             pu1_src_temp += src_strd;
1958             USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]);
1959 
1960             i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] +
1961                                    ((u4_predictd_mode == HORZ_U_I4x4) ? u4_lambda : 4 * u4_lambda);
1962         }
1963 
1964         i4_min_cost =
1965             MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]), MIN3(i4_cost[3], i4_cost[4], i4_cost[5]),
1966                  MIN3(i4_cost[6], i4_cost[7], i4_cost[8]));
1967     }
1968     else
1969     {
1970         /* Only first three modes valid */
1971         i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]);
1972     }
1973 
1974     *pu4_sadmin = i4_min_cost;
1975 
1976     if(i4_min_cost == i4_cost[0])
1977     {
1978         *u4_intra_mode = VERT_I4x4;
1979         pu1_pred_val = pu1_ngbr_pels + 5;
1980         memcpy(pu1_dst, (pu1_pred_val), 4);
1981         pu1_dst += dst_strd;
1982         memcpy(pu1_dst, (pu1_pred_val), 4);
1983         pu1_dst += dst_strd;
1984         memcpy(pu1_dst, (pu1_pred_val), 4);
1985         pu1_dst += dst_strd;
1986         memcpy(pu1_dst, (pu1_pred_val), 4);
1987     }
1988     else if(i4_min_cost == i4_cost[1])
1989     {
1990         *u4_intra_mode = HORZ_I4x4;
1991         memset(pu1_dst, pu1_ngbr_pels[3], 4);
1992         pu1_dst += dst_strd;
1993         memset(pu1_dst, pu1_ngbr_pels[2], 4);
1994         pu1_dst += dst_strd;
1995         memset(pu1_dst, pu1_ngbr_pels[1], 4);
1996         pu1_dst += dst_strd;
1997         memset(pu1_dst, pu1_ngbr_pels[0], 4);
1998     }
1999     else if(i4_min_cost == i4_cost[2])
2000     {
2001         *u4_intra_mode = DC_I4x4;
2002         memset(pu1_dst, u4_dcval, 4);
2003         pu1_dst += dst_strd;
2004         memset(pu1_dst, u4_dcval, 4);
2005         pu1_dst += dst_strd;
2006         memset(pu1_dst, u4_dcval, 4);
2007         pu1_dst += dst_strd;
2008         memset(pu1_dst, u4_dcval, 4);
2009     }
2010 
2011     else if(i4_min_cost == i4_cost[3])
2012     {
2013         *u4_intra_mode = DIAG_DL_I4x4;
2014         pu1_pred_val = u1_pred_vals_diag_121 + 5;
2015         memcpy(pu1_dst, (pu1_pred_val), 4);
2016         pu1_dst += dst_strd;
2017         memcpy(pu1_dst, (pu1_pred_val + 1), 4);
2018         pu1_dst += dst_strd;
2019         memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2020         pu1_dst += dst_strd;
2021         memcpy(pu1_dst, (pu1_pred_val + 3), 4);
2022     }
2023     else if(i4_min_cost == i4_cost[4])
2024     {
2025         *u4_intra_mode = DIAG_DR_I4x4;
2026         pu1_pred_val = u1_pred_vals_diag_121 + 3;
2027 
2028         memcpy(pu1_dst, (pu1_pred_val), 4);
2029         pu1_dst += dst_strd;
2030         memcpy(pu1_dst, (pu1_pred_val - 1), 4);
2031         pu1_dst += dst_strd;
2032         memcpy(pu1_dst, (pu1_pred_val - 2), 4);
2033         pu1_dst += dst_strd;
2034         memcpy(pu1_dst, (pu1_pred_val - 3), 4);
2035     }
2036 
2037     else if(i4_min_cost == i4_cost[5])
2038     {
2039         *u4_intra_mode = VERT_R_I4x4;
2040         pu1_pred_val = u1_pred_vals_diag_11 + 4;
2041         memcpy(pu1_dst, (pu1_pred_val), 4);
2042         pu1_dst += dst_strd;
2043         pu1_pred_val = u1_pred_vals_diag_121 + 3;
2044         memcpy(pu1_dst, (pu1_pred_val), 4);
2045         pu1_dst += dst_strd;
2046         memcpy(pu1_dst, (u1_pred_vals_vert_r), 4);
2047         pu1_dst += dst_strd;
2048         memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4);
2049     }
2050     else if(i4_min_cost == i4_cost[6])
2051     {
2052         *u4_intra_mode = HORZ_D_I4x4;
2053         pu1_pred_val = u1_pred_vals_horz_d;
2054         memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2055         pu1_dst += dst_strd;
2056         memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2057         pu1_dst += dst_strd;
2058         memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2059         pu1_dst += dst_strd;
2060         memcpy(pu1_dst, (pu1_pred_val), 4);
2061         pu1_dst += dst_strd;
2062     }
2063     else if(i4_min_cost == i4_cost[7])
2064     {
2065         *u4_intra_mode = VERT_L_I4x4;
2066         pu1_pred_val = u1_pred_vals_diag_11 + 5;
2067         memcpy(pu1_dst, (pu1_pred_val), 4);
2068         pu1_dst += dst_strd;
2069         pu1_pred_val = u1_pred_vals_diag_121 + 5;
2070         memcpy(pu1_dst, (pu1_pred_val), 4);
2071         pu1_dst += dst_strd;
2072         pu1_pred_val = u1_pred_vals_diag_11 + 6;
2073         memcpy(pu1_dst, (pu1_pred_val), 4);
2074         pu1_dst += dst_strd;
2075         pu1_pred_val = u1_pred_vals_diag_121 + 6;
2076         memcpy(pu1_dst, (pu1_pred_val), 4);
2077     }
2078     else if(i4_min_cost == i4_cost[8])
2079     {
2080         *u4_intra_mode = HORZ_U_I4x4;
2081         pu1_pred_val = u1_pred_vals_horz_u;
2082         memcpy(pu1_dst, (pu1_pred_val), 4);
2083         pu1_dst += dst_strd;
2084         memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2085         pu1_dst += dst_strd;
2086         memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2087         pu1_dst += dst_strd;
2088         memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2089         pu1_dst += dst_strd;
2090     }
2091 
2092     return;
2093 }
2094 
2095 /**
2096 ******************************************************************************
2097 *
2098 * @brief:
2099 *  Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the
2100 *prediction.
2101 *
2102 * @par Description
2103 *  This function evaluates  first three intra chroma modes and compute
2104 *corresponding sad and return the buffer predicted with best mode.
2105 *
2106 * @param[in] pu1_src
2107 *  UWORD8 pointer to the source
2108 *
2109 * @param[in] pu1_ngbr_pels
2110 *  UWORD8 pointer to neighbouring pels
2111 *
2112 * @param[out] pu1_dst
2113 *  UWORD8 pointer to the destination
2114 *
2115 * @param[in] src_strd
2116 *  integer source stride
2117 *
2118 * @param[in] dst_strd
2119 *  integer destination stride
2120 *
2121 * @param[in] u4_n_avblty
2122 *  availability of neighbouring pixels
2123 *
2124 * @param[in] u4_intra_mode
2125 *  Pointer to the variable in which best mode is returned
2126 *
2127 * @param[in] pu4_sadmin
2128 *  Pointer to the variable in which minimum sad is returned
2129 *
2130 * @param[in] u4_valid_intra_modes
2131 *  Says what all modes are valid
2132 *
2133 * @return      none
2134 *
2135 ******************************************************************************
2136 */
isvce_evaluate_intra_chroma_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes)2137 void isvce_evaluate_intra_chroma_modes(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst,
2138                                        UWORD32 src_strd, UWORD32 dst_strd, WORD32 u4_n_avblty,
2139                                        UWORD32 *u4_intra_mode, WORD32 *pu4_sadmin,
2140                                        UWORD32 u4_valid_intra_modes)
2141 {
2142     UWORD8 *pu1_neighbour;
2143     UWORD8 *pu1_src_temp = pu1_src;
2144     UWORD8 left = 0, top = 0;
2145     WORD32 u4_dcval_u_l[2] = {0, 0}, /*sum left neighbours for 'U' ,two separate sets - sum of
2146                                         first four from top,and sum of four values from bottom */
2147         u4_dcval_u_t[2] = {0, 0};    /*sum top neighbours for 'U'*/
2148 
2149     WORD32 u4_dcval_v_l[2] = {0, 0}, /*sum left neighbours for 'V'*/
2150         u4_dcval_v_t[2] = {0, 0};    /*sum top neighbours for 'V'*/
2151 
2152     WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX,
2153                            i4_min_sad = INT_MAX;
2154     UWORD8 val_u, val_v;
2155 
2156     WORD32 u4_dc_val[2][2][2]; /*  -----------
2157                                    |    |    |  Chroma can have four
2158                                    | 00 | 01 |  separate dc value...
2159                                    -----------  u4_dc_val corresponds to this dc
2160                                   values |    |    |  with u4_dc_val[2][2][U] and
2161                                   u4_dc_val[2][2][V] | 10 | 11 |
2162                                    -----------                */
2163     left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
2164     top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
2165 
2166     /*Evaluating HORZ*/
2167     if(left) /* Ifleft available*/
2168     {
2169         i4_sad_horz = 0;
2170 
2171         for(i = 0; i < 8; i++)
2172         {
2173             val_v = pu1_ngbr_pels[15 - 2 * i];
2174             val_u = pu1_ngbr_pels[15 - 2 * i - 1];
2175             row = i / 4;
2176             u4_dcval_u_l[row] += val_u;
2177             u4_dcval_v_l[row] += val_v;
2178             for(j = 0; j < 8; j++)
2179             {
2180                 i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]); /* Finding SAD for HORZ mode*/
2181                 i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]);
2182             }
2183 
2184             pu1_src_temp += src_strd;
2185         }
2186         u4_dcval_u_l[0] += 2;
2187         u4_dcval_u_l[1] += 2;
2188         u4_dcval_v_l[0] += 2;
2189         u4_dcval_v_l[1] += 2;
2190     }
2191 
2192     /*Evaluating VERT**/
2193     pu1_src_temp = pu1_src;
2194     if(top) /* top available*/
2195     {
2196         i4_sad_vert = 0;
2197 
2198         for(i = 0; i < 8; i++)
2199         {
2200             col = i / 4;
2201 
2202             val_u = pu1_ngbr_pels[18 + i * 2];
2203             val_v = pu1_ngbr_pels[18 + i * 2 + 1];
2204             u4_dcval_u_t[col] += val_u;
2205             u4_dcval_v_t[col] += val_v;
2206 
2207             for(j = 0; j < 16; j++)
2208             {
2209                 i4_sad_vert +=
2210                     ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]); /* Finding SAD for VERT mode*/
2211             }
2212             pu1_src_temp += src_strd;
2213         }
2214         u4_dcval_u_t[0] += 2;
2215         u4_dcval_u_t[1] += 2;
2216         u4_dcval_v_t[0] += 2;
2217         u4_dcval_v_t[1] += 2;
2218     }
2219 
2220     /* computing DC value*/
2221     /* Equation  8-128 in spec*/
2222     u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top);
2223     u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top);
2224     u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top);
2225     u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top);
2226 
2227     if(top)
2228     {
2229         /* Equation  8-132 in spec*/
2230         u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top);
2231         u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top);
2232     }
2233     else
2234     {
2235         u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left);
2236         u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left);
2237     }
2238 
2239     if(left)
2240     {
2241         u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left);
2242         u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left);
2243     }
2244     else
2245     {
2246         u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top);
2247         u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top);
2248     }
2249 
2250     if(!(left || top))
2251     {
2252         /*none available*/
2253         u4_dc_val[0][0][0] = u4_dc_val[0][0][1] = u4_dc_val[0][1][0] = u4_dc_val[0][1][1] =
2254             u4_dc_val[1][0][0] = u4_dc_val[1][0][1] = u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128;
2255     }
2256 
2257     /* Evaluating DC */
2258     pu1_src_temp = pu1_src;
2259     i4_sad_dc = 0;
2260     for(i = 0; i < 8; i++)
2261     {
2262         for(j = 0; j < 8; j++)
2263         {
2264             col = j / 4;
2265             row = i / 4;
2266             val_u = u4_dc_val[row][col][0];
2267             val_v = u4_dc_val[row][col][1];
2268 
2269             i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]); /* Finding SAD for DC mode*/
2270             i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]);
2271         }
2272         pu1_src_temp += src_strd;
2273     }
2274 
2275     if((u4_valid_intra_modes & 01) == 0) /* If DC is disabled*/
2276         i4_sad_dc = INT_MAX;
2277     if((u4_valid_intra_modes & 02) == 0) /* If HORZ is disabled*/
2278         i4_sad_horz = INT_MAX;
2279     if((u4_valid_intra_modes & 04) == 0) /* If VERT is disabled*/
2280         i4_sad_vert = INT_MAX;
2281 
2282     i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
2283 
2284     /* Finding Minimum sad and doing corresponding prediction*/
2285     if(i4_min_sad < *pu4_sadmin)
2286     {
2287         *pu4_sadmin = i4_min_sad;
2288 
2289         if(i4_min_sad == i4_sad_dc)
2290         {
2291             *u4_intra_mode = DC_CH_I8x8;
2292             for(i = 0; i < 8; i++)
2293             {
2294                 for(j = 0; j < 8; j++)
2295                 {
2296                     col = j / 4;
2297                     row = i / 4;
2298 
2299                     pu1_dst[2 * j] = u4_dc_val[row][col][0];
2300                     pu1_dst[2 * j + 1] = u4_dc_val[row][col][1];
2301                 }
2302                 pu1_dst += dst_strd;
2303             }
2304         }
2305         else if(i4_min_sad == i4_sad_horz)
2306         {
2307             *u4_intra_mode = HORZ_CH_I8x8;
2308             for(j = 0; j < 8; j++)
2309             {
2310                 val_v = pu1_ngbr_pels[15 - 2 * j];
2311                 val_u = pu1_ngbr_pels[15 - 2 * j - 1];
2312 
2313                 for(i = 0; i < 8; i++)
2314                 {
2315                     pu1_dst[2 * i] = val_u;
2316                     pu1_dst[2 * i + 1] = val_v;
2317                 }
2318                 pu1_dst += dst_strd;
2319             }
2320         }
2321         else
2322         {
2323             *u4_intra_mode = VERT_CH_I8x8;
2324             pu1_neighbour = pu1_ngbr_pels + 18;
2325             for(j = 0; j < 8; j++)
2326             {
2327                 memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
2328                 pu1_dst += dst_strd;
2329             }
2330         }
2331     }
2332 
2333     return;
2334 }
2335