xref: /aosp_15_r20/external/libavc/encoder/ih264e_utils.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ih264e_utils.c
25 *
26 * @brief
27 *  Contains miscellaneous utility functions used by the encoder
28 *
29 * @author
30 *  ittiam
31 *
32 * @par List of Functions:
33 *  - ih264e_input_queue_update
34 *  - ih264e_get_min_level
35 *  - ih264e_get_lvl_idx
36 *  - ih264e_get_dpb_size
37 *  - ih264e_get_total_pic_buf_size
38 *  - ih264e_get_pic_mv_bank_size
39 *  - ih264e_pic_buf_mgr_add_bufs
40 *  - ih264e_mv_buf_mgr_add_bufs
41 *  - ih264e_init_quant_params
42 *  - ih264e_init_air_map
43 *  - ih264e_codec_init
44 *  - ih264e_pic_init
45 *
46 * @remarks
47 *  none
48 *
49 *******************************************************************************
50 */
51 
52 /*****************************************************************************/
53 /* File Includes                                                             */
54 /*****************************************************************************/
55 
56 /* System Include Files */
57 #include <stdio.h>
58 #include <stddef.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <assert.h>
62 
63 /* User Include Files */
64 #include "ih264e_config.h"
65 #include "ih264_typedefs.h"
66 #include "iv2.h"
67 #include "ive2.h"
68 #include "ithread.h"
69 
70 #include "ih264_debug.h"
71 #include "ih264_macros.h"
72 #include "ih264_error.h"
73 #include "ih264_defs.h"
74 #include "ih264_mem_fns.h"
75 #include "ih264_padding.h"
76 #include "ih264_structs.h"
77 #include "ih264_size_defs.h"
78 #include "ih264_trans_quant_itrans_iquant.h"
79 #include "ih264_inter_pred_filters.h"
80 #include "ih264_intra_pred_filters.h"
81 #include "ih264_deblk_edge_filters.h"
82 #include "ih264_common_tables.h"
83 #include "ih264_trans_data.h"
84 #include "ih264_cavlc_tables.h"
85 #include "ih264_cabac_tables.h"
86 #include "ih264_buf_mgr.h"
87 #include "ih264_list.h"
88 #include "ih264_dpb_mgr.h"
89 
90 #include "ime_defs.h"
91 #include "ime_distortion_metrics.h"
92 #include "ime_structs.h"
93 #include "ime.h"
94 #include "ime_statistics.h"
95 
96 #include "irc_mem_req_and_acq.h"
97 #include "irc_cntrl_param.h"
98 #include "irc_frame_info_collector.h"
99 #include "irc_rate_control_api.h"
100 
101 #include "psnr.h"
102 
103 #include "ih264e.h"
104 #include "ih264e_error.h"
105 #include "ih264e_version.h"
106 #include "ih264e_defs.h"
107 #include "ih264e_globals.h"
108 #include "ih264e_time_stamp.h"
109 #include "ih264e_modify_frm_rate.h"
110 #include "ih264e_rate_control.h"
111 #include "ih264e_bitstream.h"
112 #include "ih264e_cabac_structs.h"
113 #include "ih264e_structs.h"
114 #include "ih264e_me.h"
115 #include "ih264e_utils.h"
116 #include "ih264e_core_coding.h"
117 #include "ih264e_encode_header.h"
118 #include "ih264e_cavlc.h"
119 #include "ih264e_cabac.h"
120 #include "ih264e_master.h"
121 #include "ih264e_process.h"
122 #include "ih264e_fmt_conv.h"
123 #include "ih264e_statistics.h"
124 #include "ih264e_trace.h"
125 
126 
127 /*****************************************************************************/
128 /* Function Definitions                                                      */
129 /*****************************************************************************/
130 
131 /**
132 *******************************************************************************
133 *
134 * @brief
135 *  Queues the current buffer, gets back a another buffer for encoding with
136 *  current picture type
137 *
138 * @par Description:
139 *  This function performs 3 distinct but related functions.
140 *  1) Maintains an input queue [Note the the term queue do not imply a first-in
141 *  first-out logic here] that queues input and dequeues them so that input
142 *  frames can be encoded at any predetermined encoding order
143 *  2) Uses RC library to decide which frame must be encoded in current pass
144 *  and which picture type it must be encoded to.
145 *  3) Uses RC library to decide the QP at which current frame has to be encoded
146 *  4) Determines if the current picture must be encoded or not based on PRE-ENC
147 *  skip
148 *
149 *  Input queue is used for storing input buffers till they are used for
150 *  encoding. This queue is maintained at ps_codec->as_inp_list. Whenever a
151 *  valid input comes, it is added to the end of queue. This same input is
152 *  added to RC queue using the identifier as ps_codec->i4_pic_cnt. Hence any
153 *  pic from RC can be located in the input queue easily.
154 *
155 *  The dequeue operation does not start till we have ps_codec->s_cfg.u4_max_num_bframes
156 *  frames in the queue. This is done in order to ensure that once output
157 *  starts we will have a constant stream of output with no gaps.
158 *
159 *  The output frame order is governed by RC library. When ever we dequeue a
160 *  buffer from RC library, it ensures that we will get them in encoding order
161 *  With the output of RC library, we can use the picture id to dequeue the
162 *  corresponding buffer from input queue and encode it.
163 *
164 *  Condition at the end of stream:
165 *  -------------------------------
166 *  At the last valid buffer from the app, we will get ps_ive_ip->u4_is_last
167 *  to be set. This will the given to lib when appropriate input buffer is
168 *  given to encoding.
169 *
170 *  Since we have to output is not in sync with input, we will have frames to
171 *  encode even after we receive the last valid input buffer. Hence we have to
172 *  make sure that we do not queue any new buffers once we get the flag [It may
173 *  mess up GOP ?]. This is achieved by setting ps_codec->i4_last_inp_buff_received
174 *  to act as a permanent marker for last frame received [This may not be needed,
175 *  because in our current app, all buffers after the last are marked as last.
176 *  But can we rely on that?] . Hence after this flag is set no new buffers are
177 *  queued.
178 *
179 * @param[in] ps_codec
180 *  Pointer to codec descriptor
181 *
182 * @param[in] ps_ive_ip
183 *  Current input buffer to the encoder
184 *
185 * @param[out] ps_inp
186 *  Buffer to be encoded in the current pass
187 *
188 * @returns
189 *  Flag indicating if we have a pre-enc skip or not
190 *
191 * @remarks
192 *  TODO (bpic) : The check for null and is last is redundant. Need to see if we
193 *  can remove it
194 *
195 *******************************************************************************
196 */
ih264e_input_queue_update(codec_t * ps_codec,ive_video_encode_ip_t * ps_ive_ip,inp_buf_t * ps_enc_buff)197 WORD32 ih264e_input_queue_update(codec_t *ps_codec,
198                                  ive_video_encode_ip_t *ps_ive_ip,
199                                  inp_buf_t *ps_enc_buff)
200 {
201 
202     inp_buf_t *ps_inp_buf;
203     picture_type_e e_pictype;
204     WORD32 i4_skip;
205     UWORD32 ctxt_sel, u4_pic_id, u4_pic_disp_id;
206     UWORD8 u1_frame_qp, i;
207     UWORD32 max_frame_bits = 0x7FFFFFFF;
208 
209     /*  Mark that the last input frame has been received */
210     if (ps_ive_ip->u4_is_last == 1)
211     {
212         ps_codec->i4_last_inp_buff_received = 1;
213     }
214 
215     if (ps_ive_ip->s_inp_buf.apv_bufs[0] == NULL
216                     && !ps_codec->i4_last_inp_buff_received)
217     {
218         ps_enc_buff->s_raw_buf.apv_bufs[0] = NULL;
219         ps_enc_buff->u4_is_last = ps_ive_ip->u4_is_last;
220         ps_codec->i4_pic_cnt -= 1;
221         return 0;
222     }
223 
224     /***************************************************************************
225      * Check for pre enc skip
226      *   When src and target frame rates donot match, we skip some frames to
227      *   maintain the relation ship between them
228      **************************************************************************/
229     {
230         WORD32 skip_src;
231 
232         skip_src = ih264e_update_rc_framerates(
233                         ps_codec->s_rate_control.pps_rate_control_api,
234                         ps_codec->s_rate_control.pps_pd_frm_rate,
235                         ps_codec->s_rate_control.pps_time_stamp,
236                         ps_codec->s_rate_control.pps_frame_time);
237 
238         if (skip_src)
239         {
240             ps_enc_buff->u4_is_last = ps_ive_ip->u4_is_last;
241             ps_codec->i4_pic_cnt -= 1;
242             return 1;
243         }
244     }
245 
246     /***************************************************************************
247      * Queue the input to the queue
248      **************************************************************************/
249     ps_inp_buf = &(ps_codec->as_inp_list[ps_codec->i4_pic_cnt
250                                          % MAX_NUM_INP_FRAMES]);
251 
252     /* copy input info. to internal structure */
253     ps_inp_buf->s_raw_buf = ps_ive_ip->s_inp_buf;
254     ps_inp_buf->u4_timestamp_low = ps_ive_ip->u4_timestamp_low;
255     ps_inp_buf->u4_timestamp_high = ps_ive_ip->u4_timestamp_high;
256     ps_inp_buf->u4_is_last = ps_ive_ip->u4_is_last;
257     ps_inp_buf->pv_mb_info = ps_ive_ip->pv_mb_info;
258     ps_inp_buf->u4_mb_info_type = ps_ive_ip->u4_mb_info_type;
259     ps_inp_buf->pv_pic_info = ps_ive_ip->pv_pic_info;
260     ps_inp_buf->u4_pic_info_type = ps_ive_ip->u4_pic_info_type;
261 
262     ps_inp_buf->u1_sei_ccv_params_present_flag =
263                 ps_codec->s_cfg.s_sei.u1_sei_ccv_params_present_flag;
264     ps_inp_buf->s_sei_ccv = ps_codec->s_cfg.s_sei.s_sei_ccv_params;
265 
266     ps_inp_buf->u1_sei_sii_params_present_flag =
267         ps_codec->s_cfg.s_sei.u1_sei_sii_params_present_flag;
268     ps_inp_buf->s_sei_sii = ps_codec->s_cfg.s_sei.s_sei_sii_params;
269 
270     /***************************************************************************
271      * Now we should add the picture to RC stack here
272      **************************************************************************/
273     /*
274      * If an I frame has been requested, ask  RC to force it
275      * For IDR requests, we have to ask RC to force I and set IDR by our selves
276      * since RC Donot know about IDR. For forcing an IDR at dequeue stage we
277      * should record that an IDR has been requested some where. Hence we will
278      * store it in the u4_idr_inp_list at a position same as that of input frame
279      */
280     {
281         WORD32 i4_force_idr, i4_force_i;
282 
283         i4_force_idr = (ps_codec->force_curr_frame_type == IV_IDR_FRAME);
284         i4_force_idr |= !(ps_codec->i4_pic_cnt % ps_codec->s_cfg.u4_idr_frm_interval);
285 
286         i4_force_i = (ps_codec->force_curr_frame_type == IV_I_FRAME);
287 
288         ps_codec->i4_pending_idr_flag |= i4_force_idr;
289 
290         if ((ps_codec->i4_pic_cnt > 0) && (i4_force_idr || i4_force_i))
291         {
292             irc_force_I_frame(ps_codec->s_rate_control.pps_rate_control_api);
293         }
294         ps_codec->force_curr_frame_type = IV_NA_FRAME;
295     }
296 
297     irc_add_picture_to_stack(ps_codec->s_rate_control.pps_rate_control_api,
298                              ps_codec->i4_pic_cnt);
299 
300 
301     /* Delay */
302     if (ps_codec->i4_pic_cnt < (WORD32)(ps_codec->s_cfg.u4_num_bframes))
303     {
304         ps_enc_buff->s_raw_buf.apv_bufs[0] = NULL;
305         ps_enc_buff->u4_is_last = 0;
306         return 0;
307     }
308 
309     /***************************************************************************
310      * Get a new pic to encode
311      **************************************************************************/
312     /* Query the picture_type */
313     e_pictype = ih264e_rc_get_picture_details(
314                     ps_codec->s_rate_control.pps_rate_control_api, (WORD32 *)(&u4_pic_id),
315                     (WORD32 *)(&u4_pic_disp_id));
316 
317     switch (e_pictype)
318     {
319         case I_PIC:
320             ps_codec->pic_type = PIC_I;
321             break;
322         case P_PIC:
323             ps_codec->pic_type = PIC_P;
324             break;
325         case B_PIC:
326             ps_codec->pic_type = PIC_B;
327             break;
328         default:
329             ps_codec->pic_type = PIC_NA;
330             ps_enc_buff->s_raw_buf.apv_bufs[0] = NULL;
331             return 0;
332     }
333 
334     /* Set IDR if it has been requested */
335     if (ps_codec->pic_type == PIC_I)
336     {
337         ps_codec->pic_type = ps_codec->i4_pending_idr_flag ?
338                                     PIC_IDR : ps_codec->pic_type;
339         ps_codec->i4_pending_idr_flag = 0;
340     }
341 
342     /* Get current frame Qp */
343     u1_frame_qp = (UWORD8)irc_get_frame_level_qp(
344                     ps_codec->s_rate_control.pps_rate_control_api, e_pictype,
345                     max_frame_bits);
346     ps_codec->u4_frame_qp = gau1_mpeg2_to_h264_qmap[u1_frame_qp];
347 
348     /*
349      * copy the pic id to poc because the display order is assumed to be same
350      * as input order
351      */
352     ps_codec->i4_poc = u4_pic_id;
353 
354     /***************************************************************************
355      * Now retrieve the correct picture from the queue
356      **************************************************************************/
357     /* Mark the skip flag   */
358     i4_skip = 0;
359     ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
360     ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = i4_skip;
361 
362     /* Get a buffer to encode */
363     ps_inp_buf = &(ps_codec->as_inp_list[u4_pic_id % MAX_NUM_INP_FRAMES]);
364 
365     /* copy dequeued input to output */
366     ps_enc_buff->s_raw_buf = ps_inp_buf->s_raw_buf;
367     ps_enc_buff->u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
368     ps_enc_buff->u4_timestamp_high = ps_inp_buf->u4_timestamp_high;
369     ps_enc_buff->u4_is_last = ps_inp_buf->u4_is_last;
370     ps_enc_buff->pv_mb_info = ps_inp_buf->pv_mb_info;
371     ps_enc_buff->u4_mb_info_type = ps_inp_buf->u4_mb_info_type;
372     ps_enc_buff->pv_pic_info = ps_inp_buf->pv_pic_info;
373     ps_enc_buff->u4_pic_info_type = ps_inp_buf->u4_pic_info_type;
374 
375     ps_enc_buff->u1_sei_ccv_params_present_flag = ps_inp_buf->u1_sei_ccv_params_present_flag;
376     ps_enc_buff->s_sei_ccv = ps_inp_buf->s_sei_ccv;
377     ps_enc_buff->u1_sei_sii_params_present_flag = ps_inp_buf->u1_sei_sii_params_present_flag;
378     ps_enc_buff->s_sei_sii = ps_inp_buf->s_sei_sii;
379 
380     /* Special case for encoding trailing B frames
381      *
382      * In encoding streams with B frames it may happen that we have a B frame
383      * at the end without a P/I frame after it. Hence when we are dequeing from
384      * the RC, it will return the P frame [next in display order but before in
385      * encoding order] first. Since the dequeue happens for an invalid frame we
386      * will get a frame with null buff and set u4_is_last. Hence lib with return
387      * last frame flag at this point and will stop encoding.
388      *
389      * Since for the last B frame, we does not have the forward ref frame
390      * it makes sense to force it into P.
391      *
392      * To solve this, in case the current frame is P and if the last frame flag
393      * is set, we need to see if there is and pending B frames. If there are any,
394      * we should just encode that picture as the current P frame and set
395      * that B frame as the last frame. Hence the encoder will terminate naturally
396      * once that B-frame is encoded after all the in between frames.
397      *
398      * Since we cannot touch RC stack directly, the option of actually swapping
399      * frames in RC is ruled out. We have to modify the as_inp_list to simulate
400      * such a behavior by RC. We can do that by
401      *  1) Search through as_inp_list to locate the largest u4_timestamp_low less
402      *     than current u4_timestamp_low. This will give us the last B frame before
403      *     the current P frame. Note that this will handle pre encode skip too since
404      *     queue happens after pre enc skip.
405      *  2) Swap the position in as_inp_list. Hence now the last B frame is
406      *     encoded as P frame. And the new last B frame will have u4_is_last
407      *     set so that encoder will end naturally once we reached that B frame
408      *     or any subsequent frame. Also the current GOP will have 1 less B frame
409      *     Since we are swapping, the poc will also be in-order.
410      *  3) In case we have an IPP stream, the result of our search will be an
411      *     I/P frame which is already encoded. Thus swap and encode will result
412      *     in encoding of duplicate frames. Hence to avoid this we will only
413      *     have this work around in case of u4_num_bframes > 0.
414      *
415      *     In case we have forced an I/IDR frame In between this P frame and
416      *     the last B frame -> This cannot happen as the current P frame is
417      *     supposed to have u4_is_last set. Thus forcing an I/ IDR after this
418      *     is illogical.
419      *
420      *     In cae if we have forced an I such that the frame just before last frame
421      *     in is I/P -> This case will never arise. Since we have a closed GOP now,
422      *     once we force an I, the gop gets reset, hence there will be a B between
423      *     I/P and I/P.
424      */
425     if (ps_enc_buff->u4_is_last && (ps_codec->pic_type == PIC_P)
426                     && ps_codec->s_cfg.u4_num_bframes)
427     {
428         WORD32 cntr;
429         WORD32 lst_bframe = -1;
430         UWORD32 u4_timestamp_low = 0;
431         UWORD32 u4_timestamp_high = 0;
432         inp_buf_t *ps_swap_buff, *ps_inp_list;
433 
434         ps_inp_list = &ps_codec->as_inp_list[0];
435 
436         /* Now search the inp list for highest timestamp */
437         for(cntr = 0; cntr < MAX_NUM_INP_FRAMES; cntr++)
438         {
439             if(ps_inp_list[cntr].s_raw_buf.apv_bufs[0] != NULL)
440             {
441                 if ((ps_inp_list[cntr].u4_timestamp_high  > u4_timestamp_high) ||
442                     (ps_inp_list[cntr].u4_timestamp_high  == u4_timestamp_high &&
443                      ps_inp_list[cntr].u4_timestamp_low  > u4_timestamp_low))
444                 {
445                     u4_timestamp_low = ps_inp_list[cntr].u4_timestamp_low;
446                     u4_timestamp_high = ps_inp_list[cntr].u4_timestamp_high;
447                     lst_bframe = cntr;
448                 }
449             }
450         }
451 
452         if(lst_bframe != -1)
453         {
454             ps_swap_buff = &(ps_codec->as_inp_list[lst_bframe]);
455 
456             /* copy the last B buffer to output */
457             *ps_enc_buff = *ps_swap_buff;
458 
459             /* Store the current buf into the queue in place of last B buf */
460             *ps_swap_buff = *ps_inp_buf;
461         }
462     }
463 
464     /* The buffer in the queue is set to NULL to specify that encoding is done for that frame */
465     for(i = 0; i < 3; i++)
466     {
467         ps_inp_buf->s_raw_buf.apv_bufs[i] = NULL;
468     }
469 
470     /* Return the buffer status */
471     return (0);
472 }
473 
474 /**
475 *******************************************************************************
476 *
477 * @brief
478 *  Used to get minimum level index for a given picture size
479 *
480 * @par Description:
481 *  Gets the minimum level index and then gets corresponding level.
482 *  Also used to ignore invalid levels like 2.3, 3.3 etc
483 *
484 * @param[in] level
485 *  Level of the stream
486 *
487 * @returns  Level index for a given level
488 *
489 * @remarks
490 *
491 *******************************************************************************
492 */
ih264e_get_min_level(WORD32 wd,WORD32 ht)493 WORD32 ih264e_get_min_level(WORD32 wd, WORD32 ht)
494 {
495     WORD32 lvl_idx = MAX_LEVEL, i;
496     WORD32 pic_size = wd * ht;
497     WORD32 max = MAX(wd, ht);
498 
499     for (i = 0; i < MAX_LEVEL; i++)
500     {
501         if ((pic_size <= gai4_ih264_max_luma_pic_size[i]) &&
502             (max <= gai4_ih264_max_wd_ht[i]))
503         {
504             lvl_idx = i;
505             break;
506         }
507     }
508     return gai4_ih264_levels[lvl_idx];
509 }
510 
511 /**
512 *******************************************************************************
513 *
514 * @brief
515 *  Used to get level index for a given level
516 *
517 * @par Description:
518 *  Converts from level_idc (which is multiplied by 30) to an index that can be
519 *  used as a lookup. Also used to ignore invalid levels like 2.2 , 3.2 etc
520 *
521 * @param[in] level
522 *  Level of the stream
523 *
524 * @returns  Level index for a given level
525 *
526 * @remarks
527 *
528 *******************************************************************************
529 */
ih264e_get_lvl_idx(WORD32 level)530 WORD32 ih264e_get_lvl_idx(WORD32 level)
531 {
532     WORD32 lvl_idx = 0;
533 
534     if (level < IH264_LEVEL_11)
535     {
536         lvl_idx = 0;
537     }
538     else if (level < IH264_LEVEL_12)
539     {
540         lvl_idx = 1;
541     }
542     else if (level < IH264_LEVEL_13)
543     {
544         lvl_idx = 2;
545     }
546     else if (level < IH264_LEVEL_20)
547     {
548         lvl_idx = 3;
549     }
550     else if (level < IH264_LEVEL_21)
551     {
552         lvl_idx = 4;
553     }
554     else if (level < IH264_LEVEL_22)
555     {
556         lvl_idx = 5;
557     }
558     else if (level < IH264_LEVEL_30)
559     {
560         lvl_idx = 6;
561     }
562     else if (level < IH264_LEVEL_31)
563     {
564         lvl_idx = 7;
565     }
566     else if (level < IH264_LEVEL_32)
567     {
568         lvl_idx = 8;
569     }
570     else if (level < IH264_LEVEL_40)
571     {
572         lvl_idx = 9;
573     }
574     else if (level < IH264_LEVEL_41)
575     {
576         lvl_idx = 10;
577     }
578     else if (level < IH264_LEVEL_42)
579     {
580         lvl_idx = 11;
581     }
582     else if (level < IH264_LEVEL_50)
583     {
584         lvl_idx = 12;
585     }
586     else if (level < IH264_LEVEL_51)
587     {
588         lvl_idx = 13;
589     }
590     else
591     {
592         lvl_idx = 14;
593     }
594 
595     return (lvl_idx);
596 }
597 
598 /**
599 *******************************************************************************
600 *
601 * @brief returns maximum number of pictures allowed in dpb for a given level
602 *
603 * @par Description:
604 *  For given width, height and level, number of pictures allowed in decoder
605 *  picture buffer is computed as per Annex A.3.1
606 *
607 * @param[in] level
608 *  level of the bit-stream
609 *
610 * @param[in] pic_size
611 *  width * height
612 *
613 * @returns  Number of buffers in DPB
614 *
615 * @remarks
616 *  From annexure A.3.1 of H264 specification,
617 *  max_dec_frame_buffering <= MaxDpbSize, where MaxDpbSize is equal to
618 *  Min( 1024 * MaxDPB / ( PicWidthInMbs * FrameHeightInMbs * 384 ), 16 ) and
619 *  MaxDPB is given in Table A-1 in units of 1024 bytes. However the MaxDPB size
620 *  presented in the look up table gas_ih264_lvl_tbl is in units of 512
621 *  bytes. Hence the expression is modified accordingly.
622 *
623 *******************************************************************************
624 */
ih264e_get_dpb_size(WORD32 level,WORD32 pic_size)625 WORD32 ih264e_get_dpb_size(WORD32 level, WORD32 pic_size)
626 {
627     /* dpb size */
628     WORD32 max_dpb_size_bytes = 0;
629 
630     /* dec frame buffering */
631     WORD32 max_dpb_size_frames = 0;
632 
633     /* temp var */
634     WORD32 i;
635 
636     /* determine max luma samples */
637     for (i = 0; i < 16; i++)
638         if (level == (WORD32)gas_ih264_lvl_tbl[i].u4_level_idc)
639             max_dpb_size_bytes = gas_ih264_lvl_tbl[i].u4_max_dpb_size;
640 
641     /* from Annexure A.3.1 h264 specification */
642     max_dpb_size_frames =
643                     MIN( 1024 * max_dpb_size_bytes / ( pic_size * 3 ), MAX_DPB_SIZE );
644 
645     return max_dpb_size_frames;
646 }
647 
648 /**
649 *******************************************************************************
650 *
651 * @brief
652 *  Used to get reference picture buffer size for a given level and
653 *  and padding used
654 *
655 * @par Description:
656 *  Used to get reference picture buffer size for a given level and padding used
657 *  Each picture is padded on all four sides
658 *
659 * @param[in] pic_size
660 *  Number of luma samples (Width * Height)
661 *
662 * @param[in] level
663 *  Level
664 *
665 * @param[in] horz_pad
666 *  Total padding used in horizontal direction
667 *
668 * @param[in] vert_pad
669 *  Total padding used in vertical direction
670 *
671 * @returns  Total picture buffer size
672 *
673 * @remarks
674 *
675 *******************************************************************************
676 */
ih264e_get_total_pic_buf_size(WORD32 pic_size,WORD32 level,WORD32 horz_pad,WORD32 vert_pad,WORD32 num_ref_frames,WORD32 num_reorder_frames)677 WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size,
678                                      WORD32 level,
679                                      WORD32 horz_pad,
680                                      WORD32 vert_pad,
681                                      WORD32 num_ref_frames,
682                                      WORD32 num_reorder_frames)
683 {
684     WORD32 size;
685     WORD32 num_luma_samples;
686     WORD32 lvl_idx;
687     WORD32 max_wd, min_ht;
688     WORD32 num_samples;
689     WORD32 max_num_bufs;
690     WORD32 pad = MAX(horz_pad, vert_pad);
691 
692     /*
693      * If num_ref_frames and num_reorder_frmaes is specified
694      * Use minimum value
695      */
696     max_num_bufs = (num_ref_frames + num_reorder_frames + MAX_CTXT_SETS);
697 
698     /* Get level index */
699     lvl_idx = ih264e_get_lvl_idx(level);
700 
701     /* Maximum number of luma samples in a picture at given level */
702     num_luma_samples = gai4_ih264_max_luma_pic_size[lvl_idx];
703     num_luma_samples = MAX(num_luma_samples, pic_size);
704 
705     /* Account for chroma */
706     num_samples = num_luma_samples * 3 / 2;
707 
708     /* Maximum width of luma samples in a picture at given level */
709     max_wd = gai4_ih264_max_wd_ht[lvl_idx];
710 
711     /* Minimum height of luma samples in a picture at given level */
712     min_ht = gai4_ih264_min_wd_ht[lvl_idx];
713 
714     /* Allocation is required for
715      * (Wd + horz_pad) * (Ht + vert_pad) * (2 * max_dpb_size + 1)
716      *
717      * Above expanded as
718      * ((Wd * Ht) + (horz_pad * vert_pad) + Wd * vert_pad + Ht * horz_pad) * (2 * max_dpb_size + 1)
719      * (Wd * Ht) * (2 * max_dpb_size + 1) + ((horz_pad * vert_pad) + Wd * vert_pad + Ht * horz_pad) * (2 * max_dpb_size + 1)
720      * Now  max_dpb_size increases with smaller Wd and Ht, but Wd * ht * max_dpb_size will still be lesser or equal to max_wd * max_ht * dpb_size
721      *
722      * In the above equation (Wd * Ht) * (2 * max_dpb_size + 1) is accounted by using num_samples * (2 * max_dpb_size + 1) below
723      *
724      * For the padded area use MAX(horz_pad, vert_pad) as pad
725      * ((pad * pad) + pad * (Wd + Ht)) * (2 * max_dpb_size + 1) has to accounted from the above for padding
726      *
727      * Since Width and Height can change worst Wd + Ht is when One of the dimensions is max and other is min
728      * So use max_wd and min_ht
729      */
730 
731     /* Number of bytes in reference pictures */
732     size = num_samples * max_num_bufs;
733 
734     /* Account for padding area */
735     size += ((pad * pad) + pad * (max_wd + min_ht)) * 3 / 2 * max_num_bufs;
736 
737     return size;
738 }
739 
740 /**
741 *******************************************************************************
742 *
743 * @brief Returns MV bank buffer size for a given number of luma samples
744 *
745 * @par Description:
746 *  For given number of luma samples  one MV bank size is computed.
747 *  Each MV bank includes pu_map and enc_pu_t for all the min PUs(4x4) in a picture
748 *
749 * @param[in] num_luma_samples
750 *  Max number of luma pixels in the frame
751 *
752 * @returns  Total MV Bank size
753 *
754 * @remarks
755 *
756 *******************************************************************************
757 */
ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples)758 WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples)
759 {
760     /* mv bank buffer size */
761     WORD32 mv_bank_size = 0;
762 
763     /* number of sub mb partitions possible */
764     WORD32 num_pu = num_luma_samples / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE);
765 
766     /* number of mbs */
767     WORD32 num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
768 
769     /* Size for storing enc_pu_t start index each MB */
770     /* One extra entry is needed to compute number of PUs in the last MB */
771     mv_bank_size += num_mb * sizeof(WORD32);
772 
773     /* Size for pu_map */
774     mv_bank_size += ALIGN4(num_pu);
775 
776     /* Size for storing enc_pu_t for each PU */
777     mv_bank_size += ALIGN4(num_pu * sizeof(enc_pu_t));
778 
779     return mv_bank_size;
780 }
781 
782 /**
783 *******************************************************************************
784 *
785 * @brief
786 *  Function to initialize ps_pic_buf structs add pic buffers to
787 *  buffer manager in case of non-shared mode
788 *
789 * @par Description:
790 *  Function to initialize ps_pic_buf structs add pic buffers to
791 *  buffer manager in case of non-shared mode
792 *  To be called once per stream or for every reset
793 *
794 * @param[in] ps_codec
795 *  Pointer to codec context
796 *
797 * @returns  error status
798 *
799 * @remarks
800 *
801 *******************************************************************************
802 */
ih264e_pic_buf_mgr_add_bufs(codec_t * ps_codec)803 IH264E_ERROR_T ih264e_pic_buf_mgr_add_bufs(codec_t *ps_codec)
804 {
805     /* error status */
806     IH264E_ERROR_T ret = IH264E_SUCCESS;
807 
808     /* max ref buffer cnt */
809     WORD32 max_num_bufs = ps_codec->i4_ref_buf_cnt;
810 
811     /* total size for pic buffers */
812     WORD32 pic_buf_size_allocated = ps_codec->i4_total_pic_buf_size
813                     - BUF_MGR_MAX_CNT * sizeof(pic_buf_t);
814 
815     /* temp var */
816     UWORD8 *pu1_buf = (UWORD8 *) ps_codec->ps_pic_buf;
817     pic_buf_t *ps_pic_buf = (pic_buf_t *) ps_codec->ps_pic_buf;
818     WORD32 i;
819 
820     pu1_buf += BUF_MGR_MAX_CNT * sizeof(pic_buf_t);
821 
822     /* In case of non-shared mode, add picture buffers to buffer manager
823      * In case of shared mode, buffers are added in the run-time
824      */
825     {
826         WORD32 buf_ret;
827 
828         WORD32 luma_samples = (ps_codec->i4_rec_strd)
829                         * (ps_codec->s_cfg.u4_ht + PAD_HT);
830 
831         WORD32 chroma_samples = luma_samples >> 1;
832 
833         /* Try and add as many buffers as possible for the memory that is allocated */
834         /* If the number of buffers that can be added is less than max_num_bufs
835          * return with an error */
836         for (i = 0; i < max_num_bufs; i++)
837         {
838             pic_buf_size_allocated -= (luma_samples + chroma_samples);
839 
840             if (pic_buf_size_allocated < 0)
841             {
842                 return IH264E_INSUFFICIENT_MEM_PICBUF;
843             }
844 
845             ps_pic_buf->pu1_luma = pu1_buf + ps_codec->i4_rec_strd * PAD_TOP
846                             + PAD_LEFT;
847             pu1_buf += luma_samples;
848 
849             ps_pic_buf->pu1_chroma = pu1_buf
850                             + ps_codec->i4_rec_strd * (PAD_TOP / 2)+ PAD_LEFT;
851             pu1_buf += chroma_samples;
852 
853             buf_ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_ref_buf_mgr,
854                                         ps_pic_buf, i);
855 
856             if (0 != buf_ret)
857             {
858                 return IH264E_BUF_MGR_ERROR;
859             }
860             pu1_buf += (HPEL_PLANES_CNT - 1) * (chroma_samples + luma_samples);
861             ps_pic_buf++;
862         }
863     }
864 
865     return ret;
866 }
867 
868 /**
869 *******************************************************************************
870 *
871 * @brief Function to add buffers to MV Bank buffer manager
872 *
873 * @par Description:
874 *  Function to add buffers to MV Bank buffer manager.  To be called once per
875 *  stream or for every reset
876 *
877 * @param[in] ps_codec
878 *  Pointer to codec context
879 *
880 * @returns  error status
881 *
882 * @remarks
883 *
884 *******************************************************************************
885 */
ih264e_mv_buf_mgr_add_bufs(codec_t * ps_codec)886 IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec)
887 {
888     /* error status */
889     IH264E_ERROR_T error_status = IH264E_SUCCESS;
890     IH264_ERROR_T ret;
891 
892     /* max dpb size in frames */
893     WORD32 max_dpb_size = 0;
894 
895     /* mv bank size for the entire dpb */
896     WORD32 mv_bank_size_allocated = 0;
897 
898     /* mv bank size per pic */
899     WORD32 pic_mv_bank_size = 0;
900 
901     /* mv buffer ptr */
902     mv_buf_t *ps_mv_buf = NULL;
903 
904     /* num of luma samples */
905     WORD32 num_luma_samples = ALIGN16(ps_codec->s_cfg.u4_wd)
906                             * ALIGN16(ps_codec->s_cfg.u4_ht);
907 
908     /* number of mb's & frame partitions */
909     WORD32 num_pu, num_mb;
910 
911     /* temp var */
912     UWORD8 *pu1_buf = NULL;
913     WORD32 i;
914 
915     /* Compute the number of MB Bank buffers needed */
916     max_dpb_size = ps_codec->i4_ref_buf_cnt;
917 
918     /* allocate memory for mv buffer array */
919     ps_codec->ps_mv_buf = ps_codec->pv_mv_bank_buf_base;
920     pu1_buf = ps_codec->pv_mv_bank_buf_base;
921     pu1_buf += BUF_MGR_MAX_CNT * sizeof(mv_buf_t);
922 
923     /********************************************************************/
924     /* allocate memory for individual elements of mv buffer ptr         */
925     /********************************************************************/
926     mv_bank_size_allocated = ps_codec->i4_total_mv_bank_size
927                     - (BUF_MGR_MAX_CNT * sizeof(mv_buf_t));
928 
929     /* compute MV bank size per picture */
930     pic_mv_bank_size = ih264e_get_pic_mv_bank_size(num_luma_samples);
931 
932     num_pu = num_luma_samples / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE);
933     num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
934     i = 0;
935     ps_mv_buf = ps_codec->pv_mv_bank_buf_base;
936 
937     while (i < max_dpb_size)
938     {
939         mv_bank_size_allocated -= pic_mv_bank_size;
940 
941         if (mv_bank_size_allocated < 0)
942         {
943             return IH264E_INSUFFICIENT_MEM_MVBANK;
944         }
945 
946         ps_mv_buf->pu4_mb_pu_cnt = (UWORD32 *) pu1_buf;
947         pu1_buf += num_mb * sizeof(WORD32);
948 
949         ps_mv_buf->pu1_pic_pu_map = pu1_buf;
950         pu1_buf += ALIGN4(num_pu);
951 
952         ps_mv_buf->ps_pic_pu = (enc_pu_t *) (pu1_buf);
953         pu1_buf += ALIGN4(num_pu * sizeof(enc_pu_t));
954 
955         ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_mv_buf_mgr,
956                                 ps_mv_buf, i);
957 
958         if (IH264_SUCCESS != ret)
959         {
960             return IH264E_BUF_MGR_ERROR;
961         }
962 
963         ps_mv_buf++;
964         i++;
965     }
966 
967     return error_status;
968 }
969 
970 /**
971 *******************************************************************************
972 *
973 * @brief Function to initialize quant params structure
974 *
975 * @par Description:
976 *  The forward quantization modules depends on qp/6, qp mod 6, forward scale
977 *  matrix, forward threshold matrix, weight list. The inverse quantization
978 *  modules depends on qp/6, qp mod 6, inverse scale matrix, weight list.
979 *  These params are initialized in this function.
980 *
981 * @param[in] ps_proc
982 *  pointer to process context
983 *
984 * @param[in] qp
985 *  quantization parameter
986 *
987 * @returns none
988 *
989 * @remarks
990 *
991 *******************************************************************************
992 */
ih264e_init_quant_params(process_ctxt_t * ps_proc,int qp)993 void ih264e_init_quant_params(process_ctxt_t *ps_proc, int qp)
994 {
995     /* quant params */
996     quant_params_t *ps_qp_params;
997 
998     /* ptr to forward quant threshold matrix */
999     const UWORD16 *pu2_thres_mat = NULL;
1000 
1001     /* ptr to forward scale matrix */
1002     const UWORD16 *pu2_scale_mat = gu2_quant_scale_matrix_4x4;
1003 
1004     /* ptr to inverse scale matrix */
1005     const UWORD16 *pu2_iscale_mat = gau2_ih264_iquant_scale_matrix_4x4;
1006 
1007     /* temp var */
1008     UWORD32 u4_qp[3], u4_qp_div6, u4_qp_mod6;
1009     COMPONENT_TYPE plane;
1010     WORD32 i;
1011     UWORD32 u4_satdq_t;
1012     const UWORD16 *pu2_smat;
1013 
1014     /********************************************************************/
1015     /* init quant params for all planes Y, U and V                      */
1016     /********************************************************************/
1017     /* luma qp */
1018     u4_qp[Y] = qp;
1019 
1020     /* chroma qp
1021      * TODO_LATER : just in case if the chroma planes use different qp's this
1022      * needs to be corrected accordingly.
1023      */
1024     u4_qp[U] = gu1_qpc_fqpi[qp];
1025     u4_qp[V] = gu1_qpc_fqpi[qp];
1026 
1027     plane = Y;
1028     while (plane <= V)
1029     {
1030         u4_qp_div6 = (u4_qp[plane] / 6);
1031         u4_qp_mod6 = (u4_qp[plane] % 6);
1032 
1033         ps_qp_params = ps_proc->ps_qp_params[plane];
1034 
1035         /* mb qp */
1036         ps_qp_params->u1_mb_qp = u4_qp[plane];
1037 
1038         /* mb qp / 6 */
1039         ps_qp_params->u1_qp_div = u4_qp_div6;
1040 
1041         /* mb qp % 6 */
1042         ps_qp_params->u1_qp_rem = u4_qp_mod6;
1043 
1044         /* QP bits */
1045         ps_qp_params->u1_qbits = QP_BITS_h264_4x4 + u4_qp_div6;
1046 
1047         /* forward scale matrix */
1048         ps_qp_params->pu2_scale_mat = pu2_scale_mat + (u4_qp_mod6 * 16);
1049 
1050         /* threshold matrix & weight for quantization */
1051         pu2_thres_mat = gu2_forward_quant_threshold_4x4 + (u4_qp_mod6 * 16);
1052         for (i = 0; i < 16; i++)
1053         {
1054             ps_qp_params->pu2_thres_mat[i] = pu2_thres_mat[i]
1055                             >> (8 - u4_qp_div6);
1056             ps_qp_params->pu2_weigh_mat[i] = 16;
1057         }
1058 
1059         /* qp dependent rounding constant */
1060         ps_qp_params->u4_dead_zone =
1061                         gu4_forward_quant_round_factor_4x4[u4_qp_div6];
1062 
1063         /* slice dependent rounding constant */
1064         if (ps_proc->i4_slice_type != ISLICE
1065                         && ps_proc->i4_slice_type != SISLICE)
1066         {
1067             ps_qp_params->u4_dead_zone >>= 1;
1068         }
1069 
1070         /* SATQD threshold for zero block prediction */
1071         if (ps_proc->ps_codec->s_cfg.u4_enable_satqd)
1072         {
1073             pu2_smat = ps_qp_params->pu2_scale_mat;
1074 
1075             u4_satdq_t = ((1 << (ps_qp_params->u1_qbits)) - ps_qp_params->u4_dead_zone);
1076 
1077             ps_qp_params->pu2_sad_thrsh[0] = u4_satdq_t / MAX(pu2_smat[3], pu2_smat[11]);
1078             ps_qp_params->pu2_sad_thrsh[1] = u4_satdq_t / MAX(pu2_smat[1], pu2_smat[9]);
1079             ps_qp_params->pu2_sad_thrsh[2] = u4_satdq_t / pu2_smat[15];
1080             ps_qp_params->pu2_sad_thrsh[3] = u4_satdq_t / pu2_smat[7];
1081             ps_qp_params->pu2_sad_thrsh[4] = u4_satdq_t / MAX(pu2_smat[12], pu2_smat[14]);
1082             ps_qp_params->pu2_sad_thrsh[5] = u4_satdq_t / MAX(pu2_smat[4], pu2_smat[6]);
1083             ps_qp_params->pu2_sad_thrsh[6] = u4_satdq_t / pu2_smat[13];
1084             ps_qp_params->pu2_sad_thrsh[7] = u4_satdq_t / pu2_smat[5];
1085             ps_qp_params->pu2_sad_thrsh[8] = u4_satdq_t / MAX(MAX3(pu2_smat[0], pu2_smat[2], pu2_smat[8]), pu2_smat[10]);
1086         }
1087 
1088         /* inverse scale matrix */
1089         ps_qp_params->pu2_iscale_mat = pu2_iscale_mat + (u4_qp_mod6 * 16);
1090 
1091         plane += 1;
1092     }
1093     return ;
1094 }
1095 
1096 /**
1097 *******************************************************************************
1098 *
1099 * @brief
1100 *  Initialize AIR mb frame Map
1101 *
1102 * @par Description:
1103 *  Initialize AIR mb frame map.  MB frame map indicates which MB in a frame
1104 *  should be coded as intra according to AIR
1105 *
1106 * @param[in] ps_codec
1107 *  Pointer to codec context
1108 *
1109 * @returns  error_status
1110 *
1111 * @remarks
1112 *
1113 *******************************************************************************
1114 */
ih264e_init_air_map(codec_t * ps_codec)1115 IH264E_ERROR_T ih264e_init_air_map(codec_t *ps_codec)
1116 {
1117     /* intra refresh map */
1118     UWORD16 *pu2_intr_rfrsh_map = ps_codec->pu2_intr_rfrsh_map;
1119 
1120     /* air mode */
1121     IVE_AIR_MODE_T air_mode = ps_codec->s_cfg.e_air_mode;
1122 
1123     /* refresh period */
1124     UWORD32 air_period = ps_codec->s_cfg.u4_air_refresh_period;
1125 
1126     /* mb cnt */
1127     UWORD32 u4_mb_cnt = ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs;
1128 
1129     /* temp var */
1130     UWORD32 curr_mb, seed_rand = 1;
1131 
1132     switch (air_mode)
1133     {
1134         case IVE_AIR_MODE_CYCLIC:
1135 
1136             for (curr_mb = 0; curr_mb < u4_mb_cnt; curr_mb++)
1137             {
1138                 pu2_intr_rfrsh_map[curr_mb] = curr_mb % air_period;
1139             }
1140             break;
1141 
1142         case IVE_AIR_MODE_RANDOM:
1143 
1144             for (curr_mb = 0; curr_mb < u4_mb_cnt; curr_mb++)
1145             {
1146                 seed_rand = (seed_rand * 32719 + 3) % 32749;
1147                 pu2_intr_rfrsh_map[curr_mb] = seed_rand % air_period;
1148             }
1149             break;
1150 
1151         default:
1152 
1153             break;
1154     }
1155 
1156     return IH264E_SUCCESS;
1157 }
1158 
1159 /**
1160 *******************************************************************************
1161 *
1162 * @brief Speed preset side effects
1163 *
1164 * @par Description:
1165 *  Force apply the configuration options basing on the configured speed preset
1166 *
1167 * @param[in] ps_codec
1168 *  Pointer to codec context
1169 *
1170 * @returns none
1171 *
1172 * @remarks
1173 *
1174 *******************************************************************************
1175 */
ih264e_speed_preset_side_effects(codec_t * ps_codec)1176 void ih264e_speed_preset_side_effects(codec_t *ps_codec)
1177 {
1178     cfg_params_t *ps_cfg = &ps_codec->s_cfg;
1179 
1180     if (ps_cfg->u4_enc_speed_preset == IVE_SLOWEST)
1181     {/* high quality */
1182         /* enable diamond search */
1183         ps_cfg->u4_me_speed_preset = DMND_SRCH;
1184         ps_cfg->u4_enable_fast_sad = 0;
1185 
1186         /* disable intra 4x4 */
1187         ps_cfg->u4_enable_intra_4x4 = 1;
1188         ps_codec->luma_energy_compaction[1] =
1189                         ih264e_code_luma_intra_macroblock_4x4_rdopt_on;
1190 
1191         /* sub pel off */
1192         ps_cfg->u4_enable_hpel = 1;
1193 
1194         /* deblocking off */
1195         ps_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
1196 
1197         /* disabled intra inter gating in Inter slices */
1198         ps_codec->u4_inter_gate = 0;
1199     }
1200     else if (ps_cfg->u4_enc_speed_preset == IVE_NORMAL)
1201     {/* normal */
1202         /* enable diamond search */
1203         ps_cfg->u4_me_speed_preset = DMND_SRCH;
1204         ps_cfg->u4_enable_fast_sad = 0;
1205 
1206         /* disable intra 4x4 */
1207         ps_cfg->u4_enable_intra_4x4 = 1;
1208 
1209         /* sub pel off */
1210         ps_cfg->u4_enable_hpel = 1;
1211 
1212         /* deblocking off */
1213         ps_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
1214 
1215         /* disabled intra inter gating in Inter slices */
1216         ps_codec->u4_inter_gate = 0;
1217     }
1218     else if (ps_cfg->u4_enc_speed_preset == IVE_FAST)
1219     {/* fast */
1220          /* enable diamond search */
1221          ps_cfg->u4_me_speed_preset = DMND_SRCH;
1222          ps_cfg->u4_enable_fast_sad = 0;
1223 
1224          /* disable intra 4x4 */
1225          ps_cfg->u4_enable_intra_4x4 = 0;
1226 
1227          /* sub pel off */
1228          ps_cfg->u4_enable_hpel = 1;
1229 
1230          /* deblocking off */
1231          ps_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
1232 
1233          /* disabled intra inter gating in Inter slices */
1234          ps_codec->u4_inter_gate = 1;
1235      }
1236     else if (ps_cfg->u4_enc_speed_preset == IVE_HIGH_SPEED)
1237     {/* high speed */
1238         /* enable diamond search */
1239         ps_cfg->u4_me_speed_preset = DMND_SRCH;
1240         ps_cfg->u4_enable_fast_sad = 0;
1241 
1242         /* disable intra 4x4 */
1243         ps_cfg->u4_enable_intra_4x4 = 0;
1244 
1245         /* sub pel off */
1246         ps_cfg->u4_enable_hpel = 0;
1247 
1248         /* deblocking off */
1249         ps_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
1250 
1251         /* disabled intra inter gating in Inter slices */
1252         ps_codec->u4_inter_gate = 0;
1253     }
1254     else if (ps_cfg->u4_enc_speed_preset == IVE_FASTEST)
1255     {/* fastest */
1256         /* enable diamond search */
1257         ps_cfg->u4_me_speed_preset = DMND_SRCH;
1258 
1259         /* disable intra 4x4 */
1260         ps_cfg->u4_enable_intra_4x4 = 0;
1261 
1262         /* sub pel off */
1263         ps_cfg->u4_enable_hpel = 0;
1264 
1265         /* deblocking off */
1266         ps_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
1267 
1268         /* disabled intra inter gating in Inter slices */
1269         ps_codec->u4_inter_gate = 1;
1270     }
1271 }
1272 
1273 /**
1274 *******************************************************************************
1275 *
1276 * @brief
1277 *  Codec level initializations
1278 *
1279 * @par Description:
1280 *  Initializes the codec with parameters that needs to be set before encoding
1281 *  first frame
1282 *
1283 * @param[in] ps_codec
1284 *  Pointer to codec context
1285 *
1286 * @param[in] ps_inp_buf
1287 *  Pointer to input buffer context
1288 *
1289 * @returns  error_status
1290 *
1291 * @remarks
1292 *
1293 *******************************************************************************
1294 */
ih264e_codec_init(codec_t * ps_codec)1295 IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec)
1296 {
1297     /********************************************************************
1298      *                     INITIALIZE CODEC CONTEXT                     *
1299      ********************************************************************/
1300     /* encoder presets */
1301     if (ps_codec->s_cfg.u4_enc_speed_preset != IVE_CONFIG)
1302     {
1303         ih264e_speed_preset_side_effects(ps_codec);
1304     }
1305 
1306     /*****************************************************************
1307      * Initialize AIR inside codec
1308      *****************************************************************/
1309     if (IVE_AIR_MODE_NONE != ps_codec->s_cfg.e_air_mode)
1310     {
1311         ih264e_init_air_map(ps_codec);
1312 
1313         ps_codec->i4_air_pic_cnt = -1;
1314     }
1315 
1316     /*****************************************************************/
1317     /*                    Initialize Intra Cost Map                  */
1318     /*****************************************************************/
1319     memset(ps_codec->pi4_mb_intra_cost, 0, ps_codec->s_cfg.i4_wd_mbs *
1320            ps_codec->s_cfg.i4_ht_mbs * sizeof(*ps_codec->pi4_mb_intra_cost));
1321 
1322     /****************************************************/
1323     /*           INITIALIZE RATE CONTROL                */
1324     /****************************************************/
1325     {
1326         /* init qp */
1327         UWORD8 au1_init_qp[MAX_PIC_TYPE];
1328 
1329         /* min max qp */
1330         UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE];
1331 
1332         /* init i,p,b qp */
1333         au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp];
1334         au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp];
1335         au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp];
1336 
1337         /* init min max qp */
1338         au1_min_max_qp[2 * I_PIC] =
1339                         gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min];
1340         au1_min_max_qp[2 * I_PIC + 1] =
1341                         gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max];
1342 
1343         au1_min_max_qp[2 * P_PIC] =
1344                         gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min];
1345         au1_min_max_qp[2 * P_PIC + 1] =
1346                         gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max];
1347 
1348         au1_min_max_qp[2 * B_PIC] =
1349                         gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min];
1350         au1_min_max_qp[2 * B_PIC + 1] =
1351                         gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max];
1352 
1353         /* get rc mode */
1354         switch (ps_codec->s_cfg.e_rc_mode)
1355         {
1356             case IVE_RC_STORAGE:
1357                 ps_codec->s_rate_control.e_rc_type = VBR_STORAGE;
1358                 break;
1359             case IVE_RC_CBR_NON_LOW_DELAY:
1360                 ps_codec->s_rate_control.e_rc_type = CBR_NLDRC;
1361                 break;
1362             case IVE_RC_CBR_LOW_DELAY:
1363                 ps_codec->s_rate_control.e_rc_type = CBR_LDRC;
1364                 break;
1365             case IVE_RC_NONE:
1366                 ps_codec->s_rate_control.e_rc_type = CONST_QP;
1367                 break;
1368             default:
1369                 break;
1370         }
1371 
1372         /* init rate control */
1373         ih264e_rc_init(ps_codec->s_rate_control.pps_rate_control_api,
1374                        ps_codec->s_rate_control.pps_frame_time,
1375                        ps_codec->s_rate_control.pps_time_stamp,
1376                        ps_codec->s_rate_control.pps_pd_frm_rate,
1377                        ps_codec->s_cfg.u4_max_framerate,
1378                        ps_codec->s_cfg.u4_src_frame_rate,
1379                        ps_codec->s_cfg.u4_tgt_frame_rate,
1380                        ps_codec->s_rate_control.e_rc_type,
1381                        ps_codec->s_cfg.u4_target_bitrate,
1382                        ps_codec->s_cfg.u4_max_bitrate,
1383                        ps_codec->s_cfg.u4_vbv_buffer_delay,
1384                        ps_codec->s_cfg.u4_i_frm_interval,
1385                        ps_codec->s_cfg.u4_num_bframes + 1, au1_init_qp,
1386                        ps_codec->s_cfg.u4_num_bframes + 2 , au1_min_max_qp,
1387                        MAX(ps_codec->s_cfg.u4_max_level,
1388                                (UWORD32)ih264e_get_min_level(ps_codec->s_cfg.u4_max_wd, ps_codec->s_cfg.u4_max_ht)));
1389     }
1390 
1391     /* recon stride */
1392     ps_codec->i4_rec_strd = ALIGN16(ps_codec->s_cfg.u4_max_wd) + PAD_WD;
1393 
1394     /* max ref and reorder cnt */
1395     ps_codec->i4_ref_buf_cnt = ps_codec->s_cfg.u4_max_ref_cnt
1396                     + ps_codec->s_cfg.u4_max_reorder_cnt;
1397     ps_codec->i4_ref_buf_cnt += MAX_CTXT_SETS;
1398 
1399     DEBUG_HISTOGRAM_INIT();
1400 
1401     /* Init dependecy vars */
1402     ps_codec->i4_last_inp_buff_received = 0;
1403 
1404     /* At codec start no IDR is pending */
1405     ps_codec->i4_pending_idr_flag = 0;
1406 
1407     return IH264E_SUCCESS;
1408 }
1409 
1410 /**
1411 *******************************************************************************
1412 *
1413 * @brief
1414 *  Picture level initializations
1415 *
1416 * @par Description:
1417 *  Before beginning to encode the frame, the current function initializes all
1418 *  the ctxts (proc, entropy, me, ...) basing on the input configured params.
1419 *  It locates space for storing recon in the encoder picture buffer set, fetches
1420 *  reference frame from encoder picture buffer set. Calls RC pre-enc to get
1421 *  qp and pic type for the current frame. Queues proc jobs so that
1422 *  the other threads can begin encoding. In brief, this function sets up the
1423 *  tone for the entire encoder.
1424 *
1425 * @param[in] ps_codec
1426 *  Pointer to codec context
1427 *
1428 * @param[in] ps_inp_buf
1429 *  Pointer to input buffer context
1430 *
1431 * @returns  error_status
1432 *
1433 * @remarks
1434 *
1435 *******************************************************************************
1436 */
ih264e_pic_init(codec_t * ps_codec,inp_buf_t * ps_inp_buf)1437 IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
1438 {
1439     /* error status */
1440     IH264E_ERROR_T error_status = IH264E_SUCCESS;
1441     IH264_ERROR_T ret = IH264_SUCCESS;
1442 
1443     /* mv buff bank */
1444     mv_buf_t *ps_mv_buf = NULL;
1445     WORD32 cur_mv_bank_buf_id;
1446 
1447     /* recon buffer set */
1448     pic_buf_t *ps_cur_pic;
1449     WORD32 cur_pic_buf_id;
1450     UWORD8 *pu1_cur_pic_luma, *pu1_cur_pic_chroma;
1451 
1452     /* ref buffer set */
1453     pic_buf_t *aps_ref_pic[MAX_REF_PIC_CNT] = {NULL, NULL};
1454     mv_buf_t *aps_mv_buf[MAX_REF_PIC_CNT] = {NULL, NULL};
1455     WORD32 ref_set_id;
1456 
1457     /* pic time stamp */
1458     UWORD32 u4_timestamp_high = ps_inp_buf->u4_timestamp_high;
1459     UWORD32 u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
1460 
1461     /* indices to access curr/prev frame info */
1462     WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
1463 
1464     /* curr pic type */
1465     PIC_TYPE_T *pic_type = &ps_codec->pic_type;
1466 
1467     /* Diamond search Iteration Max Cnt */
1468     UWORD32 u4_num_layers =
1469                     (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) ?
1470                                     (NUM_LAYERS >> 2) : NUM_LAYERS;
1471 
1472     /* enable fast sad */
1473     UWORD32 u4_enable_fast_sad = ps_codec->s_cfg.u4_enable_fast_sad;
1474 
1475     /********************************************************************/
1476     /*                     INITIALIZE CODEC CONTEXT                     */
1477     /********************************************************************/
1478     /* slice_type */
1479     if ((PIC_I == *pic_type) || (PIC_IDR == *pic_type))
1480     {
1481         ps_codec->i4_slice_type = ISLICE;
1482     }
1483     else if (PIC_P == *pic_type)
1484     {
1485         ps_codec->i4_slice_type = PSLICE;
1486     }
1487     else if(PIC_B == *pic_type)
1488     {
1489         ps_codec->i4_slice_type = BSLICE;
1490     }
1491 
1492 
1493     /***************************************************************************
1494      * Set up variables for sending frame number, poc and reference
1495      *   a) Set up alt ref too
1496      **************************************************************************/
1497 
1498     /* Check and set if the current frame is reference or not */
1499     ps_codec->u4_is_curr_frm_ref = 0;
1500 
1501     /* This frame is reference if its not a B pic, pending approval from alt ref */
1502     ps_codec->u4_is_curr_frm_ref = (*pic_type != PIC_B);
1503 
1504     /* In case if its a P pic, we will decide according to alt ref also */
1505     if (ps_codec->s_cfg.u4_enable_alt_ref && (*pic_type == PIC_P)
1506                     && (ps_codec->i4_pic_cnt
1507                                     % (ps_codec->s_cfg.u4_enable_alt_ref + 1)))
1508     {
1509         ps_codec->u4_is_curr_frm_ref = 0;
1510     }
1511 
1512     /*
1513      * Override everything in case of IDR
1514      * Note that in case of IDR, at this point ps_codec->u4_is_curr_frm_ref must
1515      * be 1
1516      */
1517 
1518     /* is this an IDR pic */
1519     ps_codec->u4_is_idr = 0;
1520 
1521     if (PIC_IDR == *pic_type)
1522     {
1523         /* set idr flag */
1524         ps_codec->u4_is_idr = 1;
1525 
1526         ps_codec->i4_restore_frame_num = ps_codec->i4_frame_num;
1527         /* reset frame num */
1528         ps_codec->i4_frame_num = 0;
1529 
1530         /* idr_pic_id */
1531         ps_codec->i4_idr_pic_id++;
1532     }
1533 
1534     /***************************************************************************
1535      * Set up Deblock
1536      **************************************************************************/
1537 
1538     /* set deblock disable flags based on disable deblock level */
1539     ps_codec->i4_disable_deblk_pic = 1;
1540 
1541     if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_0)
1542     {
1543         /* enable deblocking */
1544         ps_codec->i4_disable_deblk_pic = 0;
1545     }
1546     else if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_2)
1547     {
1548         /* enable deblocking after a period of frames */
1549         if (ps_codec->i4_disable_deblk_pic_cnt == DISABLE_DEBLOCK_INTERVAL
1550                         || ps_codec->i4_slice_type == ISLICE)
1551         {
1552             ps_codec->i4_disable_deblk_pic = 0;
1553         }
1554     }
1555     else if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_3)
1556     {
1557         if (ps_codec->i4_slice_type == ISLICE)
1558         {
1559             ps_codec->i4_disable_deblk_pic = 0;
1560         }
1561     }
1562 
1563     if (ps_codec->i4_disable_deblk_pic)
1564     {
1565         ps_codec->i4_disable_deblk_pic_cnt++;
1566     }
1567     else
1568     {
1569         ps_codec->i4_disable_deblk_pic_cnt = 0;
1570     }
1571 
1572     /* In slice mode - lets not deblk mb edges that lie along slice boundaries */
1573     if (ps_codec->i4_disable_deblk_pic == 0)
1574     {
1575         if (ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_NONE)
1576         {
1577             ps_codec->i4_disable_deblk_pic = 2;
1578         }
1579     }
1580 
1581     /* error status */
1582     ps_codec->i4_error_code = IH264E_SUCCESS;
1583 
1584     /* populate header */
1585     if (ps_codec->i4_gen_header)
1586     {
1587         /* sps */
1588         sps_t *ps_sps = NULL;
1589 
1590         /* pps */
1591         pps_t *ps_pps = NULL;
1592 
1593         /*ps_codec->i4_pps_id ++;*/
1594         ps_codec->i4_pps_id %= MAX_PPS_CNT;
1595 
1596         /*ps_codec->i4_sps_id ++;*/
1597         ps_codec->i4_sps_id %= MAX_SPS_CNT;
1598 
1599         /* populate sps header */
1600         ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
1601         ih264e_populate_sps(ps_codec, ps_sps);
1602 
1603         /* populate pps header */
1604         ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
1605         ih264e_populate_pps(ps_codec, ps_pps);
1606     }
1607 
1608     /***************************************************************************
1609      *  Reference and MV bank Buffer Manager
1610      *  Here we will
1611      *      1) Find the correct ref pics for the current frame
1612      *      2) Free the ref pic that is not going to be used anywhere
1613      *      3) Find a free buff from the list and assign it as the recon of
1614      *         current frame
1615      *
1616      *  1) Finding correct ref pic
1617      *      All pics needed for future are arranged in a picture list called
1618      *      ps_codec->as_ref_set. Each picture in this will have a pic buffer and
1619      *      MV buffer that is marked appropriately as BUF_MGR_REF, BUF_MGR_IO or
1620      *      BUF_MGR_CODEC. Also the pic_cnt and poc will also be present.
1621      *      Hence to find the ref pic we will loop through the list and find
1622      *      2 pictures with maximum i4_pic_cnt .
1623      *
1624      *      note that i4_pic_cnt == -1 is used to filter uninit ref pics.
1625      *      Now since we only have max two ref pics, we will always find max 2
1626      *      ref pics.
1627      *
1628      *  2), 3) Self explanatory
1629      ***************************************************************************/
1630     {
1631         /* Search for buffs with maximum pic cnt */
1632 
1633         WORD32 max_pic_cnt[] = { -1, -1 };
1634 
1635         mv_buf_t *ps_mv_buf_to_free[] = { NULL, NULL };
1636 
1637         /* temp var */
1638         WORD32 i, buf_status;
1639 
1640         for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
1641         {
1642             if (ps_codec->as_ref_set[i].i4_pic_cnt == -1)
1643                 continue;
1644 
1645             buf_status = ih264_buf_mgr_get_status(
1646                             ps_codec->pv_ref_buf_mgr,
1647                             ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id);
1648 
1649             /* Ideally we should look for buffer status of MV BUFF also. But since
1650              * the correponding MV buffs also will be at the same state. It dosent
1651              * matter as of now. But the check will make the logic better */
1652             if ((max_pic_cnt[0] < ps_codec->as_ref_set[i].i4_pic_cnt)
1653                             && (buf_status & BUF_MGR_REF))
1654             {
1655                 if (max_pic_cnt[1] < ps_codec->as_ref_set[i].i4_pic_cnt)
1656                 {
1657                     max_pic_cnt[0] = max_pic_cnt[1];
1658                     aps_ref_pic[0] = aps_ref_pic[1];
1659                     aps_mv_buf[0] = aps_mv_buf[1];
1660 
1661                     ps_mv_buf_to_free[0] = ps_mv_buf_to_free[1];
1662 
1663                     max_pic_cnt[1] = ps_codec->as_ref_set[i].i4_pic_cnt;
1664                     aps_ref_pic[1] = ps_codec->as_ref_set[i].ps_pic_buf;
1665                     aps_mv_buf[1] = ps_codec->as_ref_set[i].ps_mv_buf;
1666                     ps_mv_buf_to_free[1] = ps_codec->as_ref_set[i].ps_mv_buf;
1667 
1668                 }
1669                 else
1670                 {
1671                     max_pic_cnt[0] = ps_codec->as_ref_set[i].i4_pic_cnt;
1672                     aps_ref_pic[0] = ps_codec->as_ref_set[i].ps_pic_buf;
1673                     aps_mv_buf[0] = ps_codec->as_ref_set[i].ps_mv_buf;
1674                     ps_mv_buf_to_free[0] = ps_codec->as_ref_set[i].ps_mv_buf;
1675                 }
1676             }
1677         }
1678 
1679         /*
1680          * Now if the current picture is I or P, we discard the back ref pic and
1681          * assign forward ref as backward ref
1682          */
1683         if (*pic_type != PIC_B)
1684         {
1685             if (ps_mv_buf_to_free[0])
1686             {
1687                 /* release this frame from reference list */
1688                 ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr,
1689                                       ps_mv_buf_to_free[0]->i4_buf_id,
1690                                       BUF_MGR_REF);
1691 
1692                 ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr,
1693                                       aps_ref_pic[0]->i4_buf_id, BUF_MGR_REF);
1694             }
1695 
1696             max_pic_cnt[0] = max_pic_cnt[1];
1697             aps_ref_pic[0] = aps_ref_pic[1];
1698             aps_mv_buf[0] = aps_mv_buf[1];
1699 
1700             /* Dummy */
1701             max_pic_cnt[1] = -1;
1702         }
1703 
1704         /*
1705          * Mark all reference pic with unused buffers to be free
1706          * We need this step since each one, ie ref, recon io etc only unset their
1707          * respective flags. Hence we need to combine togather and mark the ref set
1708          * accordingly
1709          */
1710         ref_set_id = -1;
1711         for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
1712         {
1713             if (ps_codec->as_ref_set[i].i4_pic_cnt == -1)
1714             {
1715                 ref_set_id = i;
1716                 continue;
1717             }
1718 
1719             buf_status = ih264_buf_mgr_get_status(
1720                             ps_codec->pv_ref_buf_mgr,
1721                             ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id);
1722 
1723             if ((buf_status & (BUF_MGR_REF | BUF_MGR_CODEC | BUF_MGR_IO)) == 0)
1724             {
1725                 ps_codec->as_ref_set[i].i4_pic_cnt = -1;
1726                 ps_codec->as_ref_set[i].i4_poc = 32768;
1727 
1728                 ref_set_id = i;
1729             }
1730         }
1731         /* An asssert failure here means we donot have any free buffs */
1732         ASSERT(ref_set_id >= 0);
1733     }
1734 
1735     {
1736         /*****************************************************************/
1737         /* Get free MV Bank to hold current picture's motion vector data */
1738         /* If there are no free buffers then return with an error code.  */
1739         /* If the buffer is to be freed by another thread, change the    */
1740         /* following to call thread yield and wait for buffer to be freed*/
1741         /*****************************************************************/
1742         ps_mv_buf = (mv_buf_t *) ih264_buf_mgr_get_next_free(
1743                         (buf_mgr_t *) ps_codec->pv_mv_buf_mgr,
1744                         &cur_mv_bank_buf_id);
1745 
1746         if (NULL == ps_mv_buf)
1747         {
1748             return IH264E_NO_FREE_MVBANK;
1749         }
1750 
1751         /* mark the buffer as needed for reference if the curr pic is available for ref */
1752         if (ps_codec->u4_is_curr_frm_ref)
1753         {
1754             ih264_buf_mgr_set_status(ps_codec->pv_mv_buf_mgr,
1755                                      cur_mv_bank_buf_id, BUF_MGR_REF);
1756         }
1757 
1758         /* Set current ABS poc to ps_mv_buf, so that while freeing a reference buffer
1759          * corresponding mv buffer can be found by looping through ps_codec->ps_mv_buf array
1760          * and getting a buffer id to free
1761          */
1762         ps_mv_buf->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt;
1763         ps_mv_buf->i4_buf_id = cur_mv_bank_buf_id;
1764     }
1765 
1766     {
1767         /*****************************************************************/
1768         /* Get free pic buf to hold current picture's recon data         */
1769         /* If there are no free buffers then return with an error code.  */
1770         /* If the buffer is to be freed by another thread, change the    */
1771         /* following to call thread yield and wait for buffer to be freed*/
1772         /*****************************************************************/
1773         ps_cur_pic = (pic_buf_t *) ih264_buf_mgr_get_next_free(
1774                         (buf_mgr_t *) ps_codec->pv_ref_buf_mgr,
1775                         &cur_pic_buf_id);
1776 
1777         if (NULL == ps_cur_pic)
1778         {
1779             return IH264E_NO_FREE_PICBUF;
1780         }
1781 
1782         /* mark the buffer as needed for reference if the curr pic is available for ref */
1783         if (ps_codec->u4_is_curr_frm_ref)
1784         {
1785             ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id,
1786                                      BUF_MGR_REF);
1787         }
1788 
1789         /* Mark the current buffer as needed for IO if recon is enabled */
1790         if (1 == ps_codec->s_cfg.u4_enable_recon)
1791         {
1792             ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id,
1793                                      BUF_MGR_IO);
1794         }
1795 
1796         /* Associate input timestamp with current buffer */
1797         ps_cur_pic->u4_timestamp_high = ps_inp_buf->u4_timestamp_high;
1798         ps_cur_pic->u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
1799 
1800         ps_cur_pic->i4_abs_poc = ps_codec->i4_poc;
1801         ps_cur_pic->i4_poc_lsb = ps_codec->i4_pic_order_cnt_lsb;
1802 
1803         ps_cur_pic->i4_buf_id = cur_pic_buf_id;
1804 
1805         pu1_cur_pic_luma = ps_cur_pic->pu1_luma;
1806         pu1_cur_pic_chroma = ps_cur_pic->pu1_chroma;
1807     }
1808 
1809     /*
1810      * Add the current picture to ref list independent of the fact that it is used
1811      * as reference or not. This is because, now recon is not in sync with output
1812      * hence we may need the current recon after some delay. By adding it to ref list
1813      * we can retrieve the recon any time we want. The information that it is used
1814      * for ref can still be found by checking the buffer status of pic buf.
1815      */
1816     {
1817         ps_codec->as_ref_set[ref_set_id].i4_pic_cnt = ps_codec->i4_pic_cnt;
1818         ps_codec->as_ref_set[ref_set_id].i4_poc = ps_codec->i4_poc;
1819         ps_codec->as_ref_set[ref_set_id].ps_mv_buf = ps_mv_buf;
1820         ps_codec->as_ref_set[ref_set_id].ps_pic_buf = ps_cur_pic;
1821     }
1822 
1823     /********************************************************************/
1824     /*                     INITIALIZE PROCESS CONTEXT                   */
1825     /********************************************************************/
1826     {
1827         /* temp var */
1828         WORD32 i, j = 0;
1829 
1830         /* curr proc ctxt */
1831         process_ctxt_t *ps_proc = NULL;
1832 
1833         j = ctxt_sel * MAX_PROCESS_THREADS;
1834 
1835         /* begin init */
1836         for (i = j; i < (j + MAX_PROCESS_THREADS); i++)
1837         {
1838             ps_proc = &ps_codec->as_process[i];
1839 
1840             /* luma src buffer */
1841             if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
1842             {
1843                 ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1844             }
1845             else
1846             {
1847                 ps_proc->pu1_src_buf_luma_base =
1848                                 ps_inp_buf->s_raw_buf.apv_bufs[0];
1849             }
1850 
1851             /* chroma src buffer */
1852             if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE
1853                             || ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P)
1854             {
1855                 ps_proc->pu1_src_buf_chroma_base =
1856                                 ps_codec->pu1_uv_csc_buf_base;
1857             }
1858             else
1859             {
1860                 ps_proc->pu1_src_buf_chroma_base =
1861                                 ps_inp_buf->s_raw_buf.apv_bufs[1];
1862             }
1863 
1864             /* luma rec buffer */
1865             ps_proc->pu1_rec_buf_luma_base = pu1_cur_pic_luma;
1866 
1867             /* chroma rec buffer */
1868             ps_proc->pu1_rec_buf_chroma_base = pu1_cur_pic_chroma;
1869 
1870             /* rec stride */
1871             ps_proc->i4_rec_strd = ps_codec->i4_rec_strd;
1872 
1873             /* frame num */
1874             ps_proc->i4_frame_num = ps_codec->i4_frame_num;
1875 
1876             /* is idr */
1877             ps_proc->u4_is_idr = ps_codec->u4_is_idr;
1878 
1879             /* idr pic id */
1880             ps_proc->u4_idr_pic_id = ps_codec->i4_idr_pic_id;
1881 
1882             /* slice_type */
1883             ps_proc->i4_slice_type = ps_codec->i4_slice_type;
1884 
1885             /* Input width in mbs */
1886             ps_proc->i4_wd_mbs = ps_codec->s_cfg.i4_wd_mbs;
1887 
1888             /* Input height in mbs */
1889             ps_proc->i4_ht_mbs = ps_codec->s_cfg.i4_ht_mbs;
1890 
1891             /* Half x plane offset from pic buf */
1892             ps_proc->u4_half_x_offset = 0;
1893 
1894             /* Half y plane offset from half x plane */
1895             ps_proc->u4_half_y_offset = 0;
1896 
1897             /* Half x plane offset from half y plane */
1898             ps_proc->u4_half_xy_offset = 0;
1899 
1900             /* top row syntax elements */
1901             ps_proc->ps_top_row_mb_syntax_ele =
1902                             ps_proc->ps_top_row_mb_syntax_ele_base;
1903 
1904             ps_proc->pu1_top_mb_intra_modes =
1905                             ps_proc->pu1_top_mb_intra_modes_base;
1906 
1907             ps_proc->ps_top_row_pu = ps_proc->ps_top_row_pu_base;
1908 
1909             /* initialize quant params */
1910             ps_proc->u4_frame_qp = ps_codec->u4_frame_qp;
1911             ps_proc->u4_mb_qp = ps_codec->u4_frame_qp;
1912             ih264e_init_quant_params(ps_proc, ps_proc->u4_frame_qp);
1913 
1914             /* previous mb qp*/
1915             ps_proc->u4_mb_qp_prev = ps_proc->u4_frame_qp;
1916 
1917             /* Reset frame info */
1918             memset(&ps_proc->s_frame_info, 0, sizeof(frame_info_t));
1919 
1920             /* initialize proc, deblk and ME map */
1921             if (i == j)
1922             {
1923                 /* row '-1' */
1924                 memset(ps_proc->pu1_proc_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
1925                 /* row 0 to ht in mbs */
1926                 memset(ps_proc->pu1_proc_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
1927 
1928                 /* row '-1' */
1929                 memset(ps_proc->pu1_deblk_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
1930                 /* row 0 to ht in mbs */
1931                 memset(ps_proc->pu1_deblk_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
1932 
1933                 /* row '-1' */
1934                 memset(ps_proc->pu1_me_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
1935                 /* row 0 to ht in mbs */
1936                 memset(ps_proc->pu1_me_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
1937 
1938                 /* at the start of air refresh period, reset intra coded map */
1939                 if (IVE_AIR_MODE_NONE != ps_codec->s_cfg.e_air_mode)
1940                 {
1941                     ps_codec->i4_air_pic_cnt = (ps_codec->i4_air_pic_cnt + 1)
1942                                     % ps_codec->s_cfg.u4_air_refresh_period;
1943 
1944                     if (!ps_codec->i4_air_pic_cnt)
1945                     {
1946                         memset(ps_proc->pu1_is_intra_coded, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
1947                     }
1948                 }
1949             }
1950 
1951             /* deblock level */
1952             ps_proc->u4_disable_deblock_level = ps_codec->i4_disable_deblk_pic;
1953 
1954             /* slice index map */
1955             /* no slice */
1956             if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_NONE)
1957             {
1958                 memset(ps_proc->pu1_slice_idx, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
1959             }
1960             /* generate slices for every 'n' rows, 'n' is given through slice param */
1961             else if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
1962             {
1963                 /* slice idx map */
1964                 UWORD8 *pu1_slice_idx = ps_proc->pu1_slice_idx;
1965 
1966                 /* temp var */
1967                 WORD32 i4_mb_y = 0, slice_idx = 0, cnt;
1968 
1969                 while (i4_mb_y < ps_proc->i4_ht_mbs)
1970                 {
1971                     if (i4_mb_y +(WORD32)ps_codec->s_cfg.u4_slice_param < ps_proc->i4_ht_mbs)
1972                     {
1973                         cnt = ps_codec->s_cfg.u4_slice_param * ps_proc->i4_wd_mbs;
1974                         i4_mb_y += ps_codec->s_cfg.u4_slice_param;
1975                     }
1976                     else
1977                     {
1978                         cnt = (ps_proc->i4_ht_mbs - i4_mb_y) * ps_proc->i4_wd_mbs;
1979                         i4_mb_y += (ps_proc->i4_ht_mbs - i4_mb_y);
1980                     }
1981                     memset(pu1_slice_idx, slice_idx, cnt);
1982                     slice_idx++;
1983                     pu1_slice_idx += cnt;
1984                 }
1985             }
1986 
1987             /* Current MV Bank's buffer ID */
1988             ps_proc->i4_cur_mv_bank_buf_id = cur_mv_bank_buf_id;
1989 
1990             /* Pointer to current picture buffer structure */
1991             ps_proc->ps_cur_pic = ps_cur_pic;
1992 
1993             /* Pointer to current pictures mv buffers */
1994             ps_proc->ps_cur_mv_buf = ps_mv_buf;
1995 
1996             /*
1997              * pointer to ref picture
1998              * 0    : Temporal back reference
1999              * 1    : Temporal forward reference
2000              */
2001             ps_proc->aps_ref_pic[PRED_L0] = aps_ref_pic[PRED_L0];
2002             ps_proc->aps_ref_pic[PRED_L1] = aps_ref_pic[PRED_L1];
2003             if (ps_codec->pic_type == PIC_B)
2004             {
2005                 ps_proc->aps_mv_buf[PRED_L0] = aps_mv_buf[PRED_L0];
2006                 ps_proc->aps_mv_buf[PRED_L1] = aps_mv_buf[PRED_L1];
2007             }
2008             else
2009             {
2010                 /*
2011                  * Else is dummy since for non B pic we does not need this
2012                  * But an assignment here will help in not having a segfault
2013                  * when we calcualte colpic in P slices
2014                  */
2015                 ps_proc->aps_mv_buf[PRED_L0] = ps_mv_buf;
2016                 ps_proc->aps_mv_buf[PRED_L1] = ps_mv_buf;
2017             }
2018 
2019             if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I))
2020             {
2021                 /* temporal back an forward  ref pointer luma and chroma */
2022                 ps_proc->apu1_ref_buf_luma_base[PRED_L0] = aps_ref_pic[PRED_L0]->pu1_luma;
2023                 ps_proc->apu1_ref_buf_chroma_base[PRED_L0] = aps_ref_pic[PRED_L0]->pu1_chroma;
2024 
2025                 ps_proc->apu1_ref_buf_luma_base[PRED_L1] = aps_ref_pic[PRED_L1]->pu1_luma;
2026                 ps_proc->apu1_ref_buf_chroma_base[PRED_L1] = aps_ref_pic[PRED_L1]->pu1_chroma;
2027             }
2028 
2029             /* Structure for current input buffer */
2030             ps_proc->s_inp_buf = *ps_inp_buf;
2031 
2032             /* Number of encode frame API calls made */
2033             ps_proc->i4_encode_api_call_cnt = ps_codec->i4_encode_api_call_cnt;
2034 
2035             /* Current Picture count */
2036             ps_proc->i4_pic_cnt = ps_codec->i4_pic_cnt;
2037 
2038             /* error status */
2039             ps_proc->i4_error_code = 0;
2040 
2041             /********************************************************************/
2042             /*                     INITIALIZE ENTROPY CONTEXT                   */
2043             /********************************************************************/
2044             {
2045                 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
2046 
2047                 /* start of frame */
2048                 ps_entropy->i4_sof = 0;
2049 
2050                 /* end of frame */
2051                 ps_entropy->i4_eof = 0;
2052 
2053                 /* generate header */
2054                 ps_entropy->i4_gen_header = ps_codec->i4_gen_header;
2055 
2056                 /* sps ref_set_id */
2057                 ps_entropy->u4_sps_id = ps_codec->i4_sps_id;
2058 
2059                 /* sps base */
2060                 ps_entropy->ps_sps_base = ps_codec->ps_sps_base;
2061 
2062                 /* sps id */
2063                 ps_entropy->u4_pps_id = ps_codec->i4_pps_id;
2064 
2065                 /* sps base */
2066                 ps_entropy->ps_pps_base = ps_codec->ps_pps_base;
2067 
2068                 /* slice map */
2069                 ps_entropy->pu1_slice_idx = ps_proc->pu1_slice_idx;
2070 
2071                 /* slice hdr base */
2072                 ps_entropy->ps_slice_hdr_base = ps_proc->ps_slice_hdr_base;
2073 
2074                 /* Abs poc */
2075                 ps_entropy->i4_abs_pic_order_cnt = ps_proc->ps_codec->i4_poc;
2076 
2077                 /* initialize entropy map */
2078                 if (i == j)
2079                 {
2080                     /* row '-1' */
2081                     memset(ps_entropy->pu1_entropy_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
2082                     /* row 0 to ht in mbs */
2083                     memset(ps_entropy->pu1_entropy_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
2084 
2085                     /* intialize cabac tables */
2086                     ih264e_init_cabac_table(ps_entropy);
2087                 }
2088 
2089                 /* wd in mbs */
2090                 ps_entropy->i4_wd_mbs = ps_proc->i4_wd_mbs;
2091 
2092                 /* ht in mbs */
2093                 ps_entropy->i4_ht_mbs = ps_proc->i4_ht_mbs;
2094 
2095                 /* transform_8x8_mode_flag */
2096                 ps_entropy->i1_transform_8x8_mode_flag = 0;
2097 
2098                 /* entropy_coding_mode_flag */
2099                 ps_entropy->u1_entropy_coding_mode_flag =
2100                                 ps_codec->s_cfg.u4_entropy_coding_mode;
2101 
2102                 /* error code */
2103                 ps_entropy->i4_error_code = IH264E_SUCCESS;
2104 
2105                 /* mb skip run */
2106                 *(ps_proc->s_entropy.pi4_mb_skip_run) = 0;
2107 
2108                 /* last frame to encode */
2109                 ps_proc->s_entropy.u4_is_last = ps_inp_buf->u4_is_last;
2110 
2111                 /* Current Picture count */
2112                 ps_proc->s_entropy.i4_pic_cnt = ps_codec->i4_pic_cnt;
2113 
2114                 /* time stamps */
2115                 ps_entropy->u4_timestamp_low = u4_timestamp_low;
2116                 ps_entropy->u4_timestamp_high = u4_timestamp_high;
2117 
2118                 /* init frame statistics */
2119                 ps_entropy->u4_header_bits[MB_TYPE_INTRA] = 0;
2120                 ps_entropy->u4_header_bits[MB_TYPE_INTER] = 0;
2121                 ps_entropy->u4_residue_bits[MB_TYPE_INTRA] = 0;
2122                 ps_entropy->u4_residue_bits[MB_TYPE_INTER] = 0;
2123             }
2124 
2125             /********************************************************************/
2126             /*                     INITIALIZE DEBLOCK CONTEXT                   */
2127             /********************************************************************/
2128             {
2129                 /* deblk ctxt */
2130                 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
2131 
2132                 /* slice idx map */
2133                 ps_deblk->pu1_slice_idx = ps_proc->pu1_slice_idx;
2134             }
2135 
2136             /********************************************************************/
2137             /*                     INITIALIZE ME CONTEXT                        */
2138             /********************************************************************/
2139             {
2140                 /* me ctxt */
2141                 me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
2142 
2143                 /* srch range x */
2144                 ps_me_ctxt->ai2_srch_boundaries[0] =
2145                                 ps_codec->s_cfg.u4_srch_rng_x;
2146 
2147                 /* srch range y */
2148                 ps_me_ctxt->ai2_srch_boundaries[1] =
2149                                 ps_codec->s_cfg.u4_srch_rng_y;
2150 
2151                 /* rec stride */
2152                 ps_me_ctxt->i4_rec_strd = ps_codec->i4_rec_strd;
2153 
2154                 /* Half x plane offset from pic buf */
2155                 ps_me_ctxt->u4_half_x_offset = ps_proc->u4_half_x_offset;
2156 
2157                 /* Half y plane offset from half x plane */
2158                 ps_me_ctxt->u4_half_y_offset = ps_proc->u4_half_y_offset;
2159 
2160                 /* Half x plane offset from half y plane */
2161                 ps_me_ctxt->u4_half_xy_offset = ps_proc->u4_half_xy_offset;
2162 
2163                 /* enable fast sad */
2164                 ps_me_ctxt->u4_enable_fast_sad = u4_enable_fast_sad;
2165 
2166                 /* half pel */
2167                 ps_me_ctxt->u4_enable_hpel = ps_codec->s_cfg.u4_enable_hpel;
2168 
2169                 /* Diamond search Iteration Max Cnt */
2170                 ps_me_ctxt->u4_num_layers = u4_num_layers;
2171 
2172                 /* me speed preset */
2173                 ps_me_ctxt->u4_me_speed_preset =
2174                                 ps_codec->s_cfg.u4_me_speed_preset;
2175 
2176                 /* qp */
2177                 ps_me_ctxt->u1_mb_qp = ps_codec->u4_frame_qp;
2178 
2179                 if ((i == j) && (0 == ps_codec->i4_poc))
2180                 {
2181                     /* init mv bits tables */
2182                     ih264e_init_mv_bits(ps_me_ctxt);
2183                 }
2184             }
2185 
2186             ps_proc->ps_ngbr_avbl = &(ps_proc->s_ngbr_avbl);
2187 
2188         }
2189 
2190         /* reset encoder header */
2191         ps_codec->i4_gen_header = 0;
2192     }
2193 
2194     /********************************************************************/
2195     /*                       ADD JOBS TO THE QUEUE                      */
2196     /********************************************************************/
2197     {
2198         /* job structures */
2199         job_t s_job;
2200 
2201         /* temp var */
2202         WORD32 i;
2203 
2204         /* job class */
2205         s_job.i4_cmd = CMD_PROCESS;
2206 
2207         /* number of mbs to be processed in the current job */
2208         s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
2209 
2210         /* job start index x */
2211         s_job.i2_mb_x = 0;
2212 
2213         /* proc base idx */
2214         s_job.i2_proc_base_idx = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2215 
2216         for (i = 0; i < (WORD32)ps_codec->s_cfg.i4_ht_mbs; i++)
2217         {
2218             /* job start index y */
2219             s_job.i2_mb_y = i;
2220 
2221             /* queue the job */
2222             ret = ih264_list_queue(ps_codec->pv_proc_jobq, &s_job, 1);
2223             if (ret != IH264_SUCCESS)
2224             {
2225                 return IH264E_FAIL;
2226             }
2227         }
2228 
2229         /* Once all the jobs are queued, terminate the queue */
2230         /* Since the threads are created and deleted in each call, terminating
2231         here is not an issue */
2232         ih264_list_terminate(ps_codec->pv_proc_jobq);
2233     }
2234 
2235     return error_status;
2236 }
2237 
2238 /**
2239 *******************************************************************************
2240 *
2241 * @brief
2242 *  Calculate the per-pic and global PSNR
2243 *
2244 * @par Description:
2245 *  This function takes the source and recon luma/chroma buffer pointers from the
2246 *  codec context and calculates the per-pic and global PSNR for the current encoding
2247 *  frame.
2248 *
2249 * @param[in] ps_codec
2250 *  Pointer to process context
2251 *
2252 * @returns  none
2253 *
2254 * @remarks
2255 *
2256 *
2257 *******************************************************************************
2258 */
ih264e_compute_quality_stats(process_ctxt_t * ps_proc)2259 void ih264e_compute_quality_stats(process_ctxt_t *ps_proc)
2260 {
2261     codec_t *ps_codec = ps_proc->ps_codec;
2262     WORD32 wd = ps_codec->s_cfg.u4_wd;
2263     WORD32 ht = ps_codec->s_cfg.u4_ht;
2264     WORD32 disp_wd = ps_codec->s_cfg.u4_disp_wd;
2265     WORD32 disp_ht = ps_codec->s_cfg.u4_disp_ht;
2266     WORD32 src_strds = ps_proc->i4_src_strd;
2267     WORD32 rec_strds = ps_proc->i4_rec_strd;
2268     quality_stats_t *ps_pic_quality_stats = NULL;
2269     double sum_squared_error[3] = {0.0, 0.0, 0.0};
2270     double total_samples[3];
2271     WORD32 i;
2272     for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
2273     {
2274         if (ps_codec->as_ref_set[i].i4_pic_cnt != -1 &&
2275             ps_codec->as_ref_set[i].i4_poc == ps_codec->i4_poc)
2276         {
2277             ps_pic_quality_stats = &ps_codec->as_ref_set[i].s_pic_quality_stats;
2278             break;
2279         }
2280     }
2281 
2282     if(ps_pic_quality_stats == NULL) return;
2283 
2284     get_sse(
2285         ps_proc->pu1_src_buf_luma_base, ps_proc->pu1_rec_buf_luma_base,
2286         ps_proc->pu1_src_buf_chroma_base, ps_proc->pu1_rec_buf_chroma_base,
2287         src_strds, rec_strds, wd, ht, sum_squared_error);
2288 
2289     total_samples[0] = disp_wd * disp_ht;
2290     total_samples[1] = total_samples[2] = total_samples[0] / 4;
2291 
2292     ps_pic_quality_stats->total_frames = 1;
2293     ps_codec->s_global_quality_stats.total_frames += 1;
2294     for (i = 0; i < 3; i++)
2295     {
2296         double psnr = sse_to_psnr(total_samples[i], sum_squared_error[i]);
2297         ps_pic_quality_stats->total_samples[i] = total_samples[i];
2298         ps_pic_quality_stats->total_sse[i] = sum_squared_error[i];
2299         ps_pic_quality_stats->global_psnr[i] = ps_pic_quality_stats->avg_psnr[i] =
2300             ps_pic_quality_stats->total_psnr[i] = psnr;
2301         ps_codec->s_global_quality_stats.total_sse[i] += sum_squared_error[i];
2302         ps_codec->s_global_quality_stats.global_psnr[i] =
2303             sse_to_psnr(ps_codec->s_global_quality_stats.total_samples[i],
2304                 ps_codec->s_global_quality_stats.total_sse[i]);
2305         ps_codec->s_global_quality_stats.total_psnr[i] += psnr;
2306         ps_codec->s_global_quality_stats.avg_psnr[i] =
2307             ps_codec->s_global_quality_stats.total_psnr[i] /
2308                 ps_codec->s_global_quality_stats.total_frames;
2309     }
2310 }
2311 
2312