xref: /aosp_15_r20/external/libavc/encoder/ih264e_process.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ih264e_process.c
25 *
26 * @brief
27 *  Contains functions for codec thread
28 *
29 * @author
30 *  ittiam
31 *
32 * @par List of Functions:
33 * - ih264e_generate_sps_pps
34 * - ih264e_init_entropy_ctxt
35 * - ih264e_entropy
36 * - ih264e_pack_header_data
37 * - ih264e_update_proc_ctxt
38 * - ih264e_init_proc_ctxt
39 * - ih264e_pad_recon_buffer
40 * - ih264e_dblk_pad_hpel_processing_n_mbs
41 * - ih264e_process
42 * - ih264e_update_rc_post_enc
43 * - ih264e_process_thread
44 *
45 * @remarks
46 *  none
47 *
48 *******************************************************************************
49 */
50 
51 /*****************************************************************************/
52 /* File Includes                                                             */
53 /*****************************************************************************/
54 
55 /* System Include Files */
56 #include <stdio.h>
57 #include <stddef.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <limits.h>
61 #include <assert.h>
62 
63 /* User Include Files */
64 #include "ih264e_config.h"
65 #include "ih264_typedefs.h"
66 #include "iv2.h"
67 #include "ive2.h"
68 #include "ithread.h"
69 
70 #include "ih264_debug.h"
71 #include "ih264_macros.h"
72 #include "ih264_error.h"
73 #include "ih264_defs.h"
74 #include "ih264_mem_fns.h"
75 #include "ih264_padding.h"
76 #include "ih264_structs.h"
77 #include "ih264_trans_quant_itrans_iquant.h"
78 #include "ih264_inter_pred_filters.h"
79 #include "ih264_intra_pred_filters.h"
80 #include "ih264_deblk_edge_filters.h"
81 #include "ih264_common_tables.h"
82 #include "ih264_cavlc_tables.h"
83 #include "ih264_cabac_tables.h"
84 #include "ih264_buf_mgr.h"
85 #include "ih264_list.h"
86 #include "ih264_platform_macros.h"
87 
88 #include "ime_defs.h"
89 #include "ime_distortion_metrics.h"
90 #include "ime_structs.h"
91 #include "ime_statistics.h"
92 
93 #include "irc_mem_req_and_acq.h"
94 #include "irc_cntrl_param.h"
95 #include "irc_frame_info_collector.h"
96 #include "irc_rate_control_api.h"
97 
98 #include "ih264e_error.h"
99 #include "ih264e_defs.h"
100 #include "ih264e_globals.h"
101 #include "ih264e_rate_control.h"
102 #include "ih264e_bitstream.h"
103 #include "ih264e_cabac_structs.h"
104 #include "ih264e_structs.h"
105 #include "ih264e_deblk.h"
106 #include "ih264e_encode_header.h"
107 #include "ih264e_utils.h"
108 #include "ih264e_me.h"
109 #include "ih264e_intra_modes_eval.h"
110 #include "ih264e_cavlc.h"
111 #include "ih264e_cabac.h"
112 #include "ih264e_master.h"
113 #include "ih264e_process.h"
114 #include "ih264e_trace.h"
115 #include "ih264e_statistics.h"
116 #include "ih264e_platform_macros.h"
117 
118 
119 /*****************************************************************************/
120 /* Function Definitions                                                      */
121 /*****************************************************************************/
122 
123 /**
124 ******************************************************************************
125 *
126 * @brief This function generates sps, pps set on request
127 *
128 * @par   Description
129 *  When the encoder is set in header generation mode, the following function
130 *  is called. This generates sps and pps headers and returns the control back
131 *  to caller.
132 *
133 * @param[in]    ps_codec
134 *  pointer to codec context
135 *
136 * @return      success or failure error code
137 *
138 ******************************************************************************
139 */
ih264e_generate_sps_pps(codec_t * ps_codec)140 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
141 {
142     /* choose between ping-pong process buffer set */
143     WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
144 
145     /* entropy ctxt */
146     entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
147 
148     /* Bitstream structure */
149     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
150 
151     /* sps */
152     sps_t *ps_sps = NULL;
153 
154     /* pps */
155     pps_t *ps_pps = NULL;
156 
157     /* output buff */
158     out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
159 
160 
161     /********************************************************************/
162     /*      initialize the bit stream buffer                            */
163     /********************************************************************/
164     ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
165 
166     /********************************************************************/
167     /*                    BEGIN HEADER GENERATION                       */
168     /********************************************************************/
169     /*ps_codec->i4_pps_id ++;*/
170     ps_codec->i4_pps_id %= MAX_PPS_CNT;
171 
172     /*ps_codec->i4_sps_id ++;*/
173     ps_codec->i4_sps_id %= MAX_SPS_CNT;
174 
175     /* populate sps header */
176     ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
177     ih264e_populate_sps(ps_codec, ps_sps);
178 
179     /* populate pps header */
180     ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
181     ih264e_populate_pps(ps_codec, ps_pps);
182 
183     ps_entropy->i4_error_code = IH264E_SUCCESS;
184 
185     /* generate sps */
186     ps_entropy->i4_error_code = ih264e_generate_sps(ps_bitstrm, ps_sps,
187                                                      &ps_codec->s_cfg.s_vui);
188     if(ps_entropy->i4_error_code != IH264E_SUCCESS)
189     {
190         return ps_entropy->i4_error_code;
191     }
192     /* generate pps */
193     ps_entropy->i4_error_code = ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
194 
195     /* queue output buffer */
196     ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
197 
198     return ps_entropy->i4_error_code;
199 }
200 
201 /**
202 *******************************************************************************
203 *
204 * @brief   initialize entropy context.
205 *
206 * @par Description:
207 *  Before invoking the call to perform to entropy coding the entropy context
208 *  associated with the job needs to be initialized. This involves the start
209 *  mb address, end mb address, slice index and the pointer to location at
210 *  which the mb residue info and mb header info are packed.
211 *
212 * @param[in] ps_proc
213 *  Pointer to the current process context
214 *
215 * @returns error status
216 *
217 * @remarks none
218 *
219 *******************************************************************************
220 */
ih264e_init_entropy_ctxt(process_ctxt_t * ps_proc)221 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
222 {
223     /* codec context */
224     codec_t *ps_codec = ps_proc->ps_codec;
225 
226     /* entropy ctxt */
227     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
228 
229     /* start address */
230     ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
231 
232     /* end address */
233     ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
234 
235     /* slice index */
236     ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
237 
238     /* sof */
239     /* @ start of frame or start of a new slice, set sof flag */
240     if (ps_entropy->i4_mb_start_add == 0)
241     {
242         ps_entropy->i4_sof = 1;
243     }
244 
245     if (ps_entropy->i4_mb_x == 0)
246     {
247         /* packed mb coeff data */
248         ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
249                         ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
250 
251         /* packed mb header data */
252         ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
253                         ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
254     }
255 
256     return IH264E_SUCCESS;
257 }
258 
259 /**
260 *******************************************************************************
261 *
262 * @brief entry point for entropy coding
263 *
264 * @par Description
265 *  This function calls lower level functions to perform entropy coding for a
266 *  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
267 *  back the control, updates the ctxt and calls lower level functions again.
268 *  This process is repeated till all the rows or group of mb's (which ever is
269 *  minimum) are coded
270 *
271 * @param[in] ps_proc
272 *  process context
273 *
274 * @returns  error status
275 *
276 * @remarks
277 *
278 *******************************************************************************
279 */
ih264e_entropy(process_ctxt_t * ps_proc)280 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
281 {
282     /* codec context */
283     codec_t *ps_codec = ps_proc->ps_codec;
284 
285     /* entropy context */
286     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
287 
288     /* cabac context */
289     cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
290 
291     /* sps */
292     sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
293 
294     /* pps */
295     pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
296 
297     /* slice header */
298     slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
299 
300     /* slice type */
301     WORD32 i4_slice_type = ps_proc->i4_slice_type;
302 
303     /* Bitstream structure */
304     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
305 
306     /* output buff */
307     out_buf_t s_out_buf;
308 
309     /* sei params */
310     sei_params_t s_sei;
311 
312     /* proc map */
313     UWORD8  *pu1_proc_map;
314 
315     /* entropy map */
316     UWORD8  *pu1_entropy_map_curr;
317 
318     /* proc base idx */
319     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
320 
321     /* temp var */
322     WORD32 i4_wd_mbs, i4_ht_mbs;
323     UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx, u4_insert_per_idr;
324     WORD32 bitstream_start_offset, bitstream_end_offset;
325     /********************************************************************/
326     /*                            BEGIN INIT                            */
327     /********************************************************************/
328 
329     /* entropy encode start address */
330     u4_mb_idx = ps_entropy->i4_mb_start_add;
331 
332     /* entropy encode end address */
333     u4_mb_end_idx = ps_entropy->i4_mb_end_add;
334 
335     /* width in mbs */
336     i4_wd_mbs = ps_entropy->i4_wd_mbs;
337 
338     /* height in mbs */
339     i4_ht_mbs = ps_entropy->i4_ht_mbs;
340 
341     /* total mb cnt */
342     u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
343 
344     /* proc map */
345     pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
346 
347     /* entropy map */
348     pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
349 
350     /********************************************************************/
351     /* @ start of frame / slice,                                        */
352     /*      initialize the output buffer,                               */
353     /*      initialize the bit stream buffer,                           */
354     /*      check if sps and pps headers have to be generated,          */
355     /*      populate and generate slice header                          */
356     /********************************************************************/
357     if (ps_entropy->i4_sof)
358     {
359         /********************************************************************/
360         /*      initialize the output buffer                                */
361         /********************************************************************/
362         s_out_buf = ps_codec->as_out_buf[ctxt_sel];
363 
364         /* is last frame to encode */
365         s_out_buf.u4_is_last = ps_entropy->u4_is_last;
366 
367         /* frame idx */
368         s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
369         s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
370 
371         /********************************************************************/
372         /*      initialize the bit stream buffer                            */
373         /********************************************************************/
374         ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
375 
376         /********************************************************************/
377         /*                    BEGIN HEADER GENERATION                       */
378         /********************************************************************/
379         if (1 == ps_entropy->i4_gen_header)
380         {
381             /* generate sps */
382             ps_entropy->i4_error_code = ih264e_generate_sps(ps_bitstrm, ps_sps,
383                                                              &ps_codec->s_cfg.s_vui);
384             RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
385             /* generate pps */
386             ps_entropy->i4_error_code = ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
387             RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
388 
389             /* reset i4_gen_header */
390             ps_entropy->i4_gen_header = 0;
391         }
392 
393         /* populate slice header */
394         ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
395 
396         /* Starting bitstream offset for header in bits */
397         bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
398 
399         /* generate sei */
400         u4_insert_per_idr = (NAL_SLICE_IDR == ps_slice_hdr->i1_nal_unit_type);
401 
402         memset(&s_sei, 0, sizeof(sei_params_t));
403         s_sei.u1_sei_mdcv_params_present_flag =
404                     ps_codec->s_cfg.s_sei.u1_sei_mdcv_params_present_flag;
405         s_sei.s_sei_mdcv_params = ps_codec->s_cfg.s_sei.s_sei_mdcv_params;
406         s_sei.u1_sei_cll_params_present_flag =
407                     ps_codec->s_cfg.s_sei.u1_sei_cll_params_present_flag;
408         s_sei.s_sei_cll_params = ps_codec->s_cfg.s_sei.s_sei_cll_params;
409         s_sei.u1_sei_ave_params_present_flag =
410                     ps_codec->s_cfg.s_sei.u1_sei_ave_params_present_flag;
411         s_sei.s_sei_ave_params = ps_codec->s_cfg.s_sei.s_sei_ave_params;
412         s_sei.u1_sei_ccv_params_present_flag = 0;
413         s_sei.s_sei_ccv_params =
414                     ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].s_sei_ccv;
415         s_sei.u1_sei_sii_params_present_flag = ps_codec->s_cfg.s_sei.u1_sei_sii_params_present_flag;
416         s_sei.s_sei_sii_params = ps_codec->s_cfg.s_sei.s_sei_sii_params;
417 
418         if((1 == ps_sps->i1_vui_parameters_present_flag) &&
419            (1 == ps_codec->s_cfg.s_vui.u1_video_signal_type_present_flag) &&
420            (1 == ps_codec->s_cfg.s_vui.u1_colour_description_present_flag) &&
421            (2 != ps_codec->s_cfg.s_vui.u1_colour_primaries) &&
422            (2 != ps_codec->s_cfg.s_vui.u1_matrix_coefficients) &&
423            (2 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics) &&
424            (4 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics) &&
425            (5 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics))
426         {
427             s_sei.u1_sei_ccv_params_present_flag =
428             ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].u1_sei_ccv_params_present_flag;
429         }
430 
431         if((1 == s_sei.u1_sei_mdcv_params_present_flag && u4_insert_per_idr) ||
432            (1 == s_sei.u1_sei_cll_params_present_flag && u4_insert_per_idr) ||
433            (1 == s_sei.u1_sei_ave_params_present_flag && u4_insert_per_idr) ||
434            (1 == s_sei.u1_sei_ccv_params_present_flag) ||
435            (1 == s_sei.u1_sei_sii_params_present_flag))
436         {
437             ps_entropy->i4_error_code =
438                     ih264e_generate_sei(ps_bitstrm, &s_sei, u4_insert_per_idr);
439             RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
440         }
441         ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].u1_sei_ccv_params_present_flag = 0;
442 
443         /* generate slice header */
444         ps_entropy->i4_error_code = ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
445                                                                   ps_pps, ps_sps);
446         RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
447         /* once start of frame / slice is done, you can reset it */
448         /* it is the responsibility of the caller to set this flag */
449         ps_entropy->i4_sof = 0;
450 
451         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
452         {
453             BITSTREAM_BYTE_ALIGN(ps_bitstrm);
454             BITSTREAM_FLUSH(ps_bitstrm, ps_entropy->i4_error_code);
455             RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
456             ih264e_init_cabac_ctxt(ps_entropy);
457         }
458 
459         /* Ending bitstream offset for header in bits */
460         bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
461         ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
462                         bitstream_end_offset - bitstream_start_offset;
463     }
464 
465     /* begin entropy coding for the mb set */
466     while (u4_mb_idx < u4_mb_end_idx)
467     {
468         /* init ptrs/indices */
469         if (ps_entropy->i4_mb_x == i4_wd_mbs)
470         {
471             ps_entropy->i4_mb_y++;
472             ps_entropy->i4_mb_x = 0;
473 
474             /* packed mb coeff data */
475             ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
476                             ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
477 
478             /* packed mb header data */
479             ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
480                             ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
481 
482             /* proc map */
483             pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
484 
485             /* entropy map */
486             pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
487         }
488 
489         DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
490         ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
491         ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
492 
493         /* wait until the curr mb is core coded */
494         /* The wait for curr mb to be core coded is essential when entropy is launched
495          * as a separate job
496          */
497         while (1)
498         {
499             volatile UWORD8 *pu1_buf1;
500             WORD32 idx = ps_entropy->i4_mb_x;
501 
502             pu1_buf1 = pu1_proc_map + idx;
503             if (*pu1_buf1)
504                 break;
505             ithread_yield();
506         }
507 
508 
509         /* write mb layer */
510         ps_entropy->i4_error_code = ps_codec->pf_write_mb_syntax_layer
511                         [ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
512         RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
513 
514         /* Starting bitstream offset for header in bits */
515         bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
516 
517         /* set entropy map */
518         pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
519 
520         u4_mb_idx++;
521         ps_entropy->i4_mb_x++;
522         /* check for eof */
523         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
524         {
525             if (ps_entropy->i4_mb_x < i4_wd_mbs)
526             {
527                 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
528             }
529         }
530 
531         if (ps_entropy->i4_mb_x == i4_wd_mbs)
532         {
533             /* if slices are enabled */
534             if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
535             {
536                 /* current slice index */
537                 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
538 
539                 /* slice map */
540                 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
541 
542                 /* No need to open a slice at end of frame. The current slice can be closed at the time
543                  * of signaling eof flag.
544                  */
545                 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
546                                                 != pu1_slice_idx[u4_mb_idx]))
547                 {
548                     if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
549                     { /* mb skip run */
550                         if ((i4_slice_type != ISLICE)
551                                         && *ps_entropy->pi4_mb_skip_run)
552                         {
553                             if (*ps_entropy->pi4_mb_skip_run)
554                             {
555                                 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
556                                             ps_entropy->i4_error_code, "mb skip run");
557                                 *ps_entropy->pi4_mb_skip_run = 0;
558                                 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
559                             }
560                         }
561                         /* put rbsp trailing bits for the previous slice */
562                         ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm);
563                         RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
564                     }
565                     else
566                     {
567                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
568                     }
569 
570                     /* update slice header pointer */
571                     i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
572                     ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
573                     ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
574 
575                     /* populate slice header */
576                     ps_entropy->i4_mb_start_add = u4_mb_idx;
577                     ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
578                                                  ps_sps);
579 
580                     /* generate slice header */
581                     ps_entropy->i4_error_code = ih264e_generate_slice_header(
582                                     ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
583                     RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
584                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
585                     {
586                         BITSTREAM_BYTE_ALIGN(ps_bitstrm);
587                         BITSTREAM_FLUSH(ps_bitstrm, ps_entropy->i4_error_code);
588                         ih264e_init_cabac_ctxt(ps_entropy);
589                     }
590                 }
591                 else
592                 {
593                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
594                                     && u4_mb_idx != u4_mb_cnt)
595                     {
596                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
597                     }
598                 }
599             }
600         }
601 
602         /* Ending bitstream offset for header in bits */
603         bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
604         ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
605                         bitstream_end_offset - bitstream_start_offset;
606     }
607 
608     /* check for eof */
609     if (u4_mb_idx == u4_mb_cnt)
610     {
611         /* set end of frame flag */
612         ps_entropy->i4_eof = 1;
613     }
614     else
615     {
616         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
617                         && ps_codec->s_cfg.e_slice_mode
618                                         != IVE_SLICE_MODE_BLOCKS)
619         {
620             bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
621             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
622             bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
623             ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
624                             bitstream_end_offset - bitstream_start_offset;
625         }
626     }
627 
628     if (ps_entropy->i4_eof)
629     {
630         bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
631         if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
632         {
633             /* mb skip run */
634             if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
635             {
636                 if (*ps_entropy->pi4_mb_skip_run)
637                 {
638                     PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
639                                  ps_entropy->i4_error_code, "mb skip run");
640                     *ps_entropy->pi4_mb_skip_run = 0;
641                     RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
642                 }
643             }
644             /* put rbsp trailing bits */
645              ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm);
646              RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
647         }
648         else
649         {
650             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
651         }
652         bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
653         ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
654                         bitstream_end_offset - bitstream_start_offset;
655 
656         DEBUG("entropy status %x", ps_entropy->i4_error_code);
657     }
658 
659     return ps_entropy->i4_error_code;
660 }
661 
662 /**
663 *******************************************************************************
664 *
665 * @brief Packs header information of a mb in to a buffer
666 *
667 * @par Description:
668 *  After the deciding the mode info of a macroblock, the syntax elements
669 *  associated with the mb are packed and stored. The entropy thread unpacks
670 *  this buffer and generates the end bit stream.
671 *
672 * @param[in] ps_proc
673 *  Pointer to the current process context
674 *
675 * @returns error status
676 *
677 * @remarks none
678 *
679 *******************************************************************************
680 */
ih264e_pack_header_data(process_ctxt_t * ps_proc)681 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
682 {
683     /* curr mb type */
684     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
685 
686     /* pack mb syntax layer of curr mb (used for entropy coding) */
687     if (u4_mb_type == I4x4)
688     {
689         /* pointer to mb header storage space */
690         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
691         mb_hdr_i4x4_t *ps_mb_hdr = (mb_hdr_i4x4_t *)ps_proc->pv_mb_header_data;
692 
693         /* temp var */
694         WORD32 i4, byte;
695 
696         /* mb type plus mode */
697         ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
698 
699         /* cbp */
700         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
701 
702         /* mb qp delta */
703         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
704 
705         /* sub mb modes */
706         for (i4 = 0; i4 < 16; i4 ++)
707         {
708             byte = 0;
709 
710             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
711                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
712             {
713                 byte |= 1;
714             }
715             else
716             {
717 
718                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
719                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
720                 {
721                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
722                 }
723                 else
724                 {
725                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
726                 }
727             }
728 
729             i4++;
730 
731             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
732                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
733             {
734                 byte |= 16;
735             }
736             else
737             {
738 
739                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
740                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
741                 {
742                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
743                 }
744                 else
745                 {
746                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
747                 }
748             }
749 
750             ps_mb_hdr->au1_sub_blk_modes[i4 >> 1] =  byte;
751         }
752 
753         /* end of mb layer */
754         pu1_ptr += sizeof(mb_hdr_i4x4_t);
755         ps_proc->pv_mb_header_data = pu1_ptr;
756     }
757     else if (u4_mb_type == I16x16)
758     {
759         /* pointer to mb header storage space */
760         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
761         mb_hdr_i16x16_t *ps_mb_hdr = (mb_hdr_i16x16_t *)ps_proc->pv_mb_header_data;
762 
763         /* mb type plus mode */
764         ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
765 
766         /* cbp */
767         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
768 
769         /* mb qp delta */
770         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
771 
772         /* end of mb layer */
773         pu1_ptr += sizeof(mb_hdr_i16x16_t);
774         ps_proc->pv_mb_header_data = pu1_ptr;
775     }
776     else if (u4_mb_type == P16x16)
777     {
778         /* pointer to mb header storage space */
779         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
780         mb_hdr_p16x16_t *ps_mb_hdr = (mb_hdr_p16x16_t *)ps_proc->pv_mb_header_data;
781 
782         /* mb type */
783         ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
784 
785         /* cbp */
786         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
787 
788         /* mb qp delta */
789         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
790 
791         ps_mb_hdr->ai2_mv[0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
792 
793         ps_mb_hdr->ai2_mv[1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
794 
795         /* end of mb layer */
796         pu1_ptr += sizeof(mb_hdr_p16x16_t);
797         ps_proc->pv_mb_header_data = pu1_ptr;
798     }
799     else if (u4_mb_type == PSKIP)
800     {
801         /* pointer to mb header storage space */
802         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
803         mb_hdr_pskip_t *ps_mb_hdr = (mb_hdr_pskip_t *)ps_proc->pv_mb_header_data;
804 
805         /* mb type */
806         ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
807 
808         /* end of mb layer */
809         pu1_ptr += sizeof(mb_hdr_pskip_t);
810         ps_proc->pv_mb_header_data = pu1_ptr;
811     }
812     else if(u4_mb_type == B16x16)
813     {
814 
815         /* pointer to mb header storage space */
816         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
817         mb_hdr_b16x16_t *ps_mb_hdr = (mb_hdr_b16x16_t *)ps_proc->pv_mb_header_data;
818 
819         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
820 
821         /* mb type plus mode */
822         ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
823 
824         /* cbp */
825         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
826 
827         /* mb qp delta */
828         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
829 
830         /* l0 & l1 me data */
831         if (u4_pred_mode != PRED_L1)
832         {
833             ps_mb_hdr->ai2_mv[0][0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
834                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
835 
836             ps_mb_hdr->ai2_mv[0][1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
837                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
838         }
839         if (u4_pred_mode != PRED_L0)
840         {
841             ps_mb_hdr->ai2_mv[1][0] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
842                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
843 
844             ps_mb_hdr->ai2_mv[1][1] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
845                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
846         }
847 
848         /* end of mb layer */
849         pu1_ptr += sizeof(mb_hdr_b16x16_t);
850         ps_proc->pv_mb_header_data = pu1_ptr;
851 
852     }
853     else if(u4_mb_type == BDIRECT)
854     {
855         /* pointer to mb header storage space */
856         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
857         mb_hdr_bdirect_t *ps_mb_hdr = (mb_hdr_bdirect_t *)ps_proc->pv_mb_header_data;
858 
859         /* mb type plus mode */
860         ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
861 
862         /* cbp */
863         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
864 
865         /* mb qp delta */
866         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
867 
868         /* end of mb layer */
869         pu1_ptr += sizeof(mb_hdr_bdirect_t);
870         ps_proc->pv_mb_header_data = pu1_ptr;
871 
872     }
873     else if(u4_mb_type == BSKIP)
874     {
875         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
876 
877         /* pointer to mb header storage space */
878         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
879         mb_hdr_bskip_t *ps_mb_hdr = (mb_hdr_bskip_t *)ps_proc->pv_mb_header_data;
880 
881         /* mb type plus mode */
882         ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
883 
884         /* end of mb layer */
885         pu1_ptr += sizeof(mb_hdr_bskip_t);
886         ps_proc->pv_mb_header_data = pu1_ptr;
887     }
888 
889     return IH264E_SUCCESS;
890 }
891 
892 /**
893 *******************************************************************************
894 *
895 * @brief   update process context after encoding an mb. This involves preserving
896 * the current mb information for later use, initialize the proc ctxt elements to
897 * encode next mb.
898 *
899 * @par Description:
900 *  This function performs house keeping tasks after encoding an mb.
901 *  After encoding an mb, various elements of the process context needs to be
902 *  updated to encode the next mb. For instance, the source, recon and reference
903 *  pointers, mb indices have to be adjusted to the next mb. The slice index of
904 *  the current mb needs to be updated. If mb qp modulation is enabled, then if
905 *  the qp changes the quant param structure needs to be updated. Also to encoding
906 *  the next mb, the current mb info is used as part of mode prediction or mv
907 *  prediction. Hence the current mb info has to preserved at top/top left/left
908 *  locations.
909 *
910 * @param[in] ps_proc
911 *  Pointer to the current process context
912 *
913 * @returns none
914 *
915 * @remarks none
916 *
917 *******************************************************************************
918 */
ih264e_update_proc_ctxt(process_ctxt_t * ps_proc)919 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
920 {
921     /* error status */
922     WORD32 error_status = IH264_SUCCESS;
923 
924     /* codec context */
925     codec_t *ps_codec = ps_proc->ps_codec;
926 
927     /* curr mb indices */
928     WORD32 i4_mb_x = ps_proc->i4_mb_x;
929     WORD32 i4_mb_y = ps_proc->i4_mb_y;
930 
931     /* mb syntax elements of neighbors */
932     mb_info_t *ps_left_syn =  &ps_proc->s_left_mb_syntax_ele;
933     mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
934     mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
935 
936     /* curr mb type */
937     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
938 
939     /* curr mb type */
940     UWORD32 u4_is_intra = ps_proc->u4_is_intra;
941 
942     /* width in mbs */
943     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
944 
945     /*height in mbs*/
946     WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
947 
948     /* proc map */
949     UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
950 
951     /* deblk context */
952     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
953 
954     /* deblk bs context */
955     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
956 
957     /* top row motion vector info */
958     enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
959 
960     /* top left mb motion vector */
961     enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
962 
963     /* left mb motion vector */
964     enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
965 
966     /* sub mb modes */
967     UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
968 
969     /*************************************************************/
970     /* During MV prediction, when top right mb is not available, */
971     /* top left mb info. is used for prediction. Hence the curr  */
972     /* top, which will be top left for the next mb needs to be   */
973     /* preserved before updating it with curr mb info.           */
974     /*************************************************************/
975 
976     /* mb type, mb class, csbp */
977     *ps_top_left_syn = *ps_top_syn;
978 
979     if (ps_proc->i4_slice_type != ISLICE)
980     {
981         /*****************************************/
982         /* update top left with top info results */
983         /*****************************************/
984         /* mv */
985         *ps_top_left_mb_pu = *ps_top_row_pu;
986     }
987 
988     /*************************************************/
989     /* update top and left with curr mb info results */
990     /*************************************************/
991 
992     /* mb type */
993     ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
994 
995     /* mb class */
996     ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
997 
998     /* csbp */
999     ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
1000 
1001     /* distortion */
1002     ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
1003 
1004     if (u4_is_intra)
1005     {
1006         /* mb / sub mb modes */
1007         if (I16x16 == u4_mb_type)
1008         {
1009             pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
1010         }
1011         else if (I4x4 == u4_mb_type)
1012         {
1013             ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
1014             ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
1015         }
1016         else if (I8x8 == u4_mb_type)
1017         {
1018             memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
1019             memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
1020         }
1021 
1022         if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
1023         {
1024             /* mv */
1025             *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
1026         }
1027 
1028         *ps_proc->pu4_mb_pu_cnt = 1;
1029     }
1030     else
1031     {
1032         /* mv */
1033         *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
1034     }
1035 
1036     /*
1037      * Mark that the MB has been coded intra
1038      * So that future AIRs can skip it
1039      */
1040     ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
1041 
1042     /**************************************************/
1043     /* pack mb header info. for entropy coding        */
1044     /**************************************************/
1045     ih264e_pack_header_data(ps_proc);
1046 
1047     /* update previous mb qp */
1048     ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1049 
1050     /* store qp */
1051     ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1052 
1053     /*
1054      * We need to sync the cache to make sure that the nmv content of proc
1055      * is updated to cache properly
1056      */
1057     DATA_SYNC();
1058 
1059     /* Just before finishing the row, enqueue the job in to entropy queue.
1060      * The master thread depending on its convenience shall dequeue it and
1061      * performs entropy.
1062      *
1063      * WARN !! Placing this block post proc map update can cause queuing of
1064      * entropy jobs in out of order.
1065      */
1066     if (i4_mb_x == i4_wd_mbs - 1)
1067     {
1068         /* job structures */
1069         job_t s_job;
1070 
1071         /* job class */
1072         s_job.i4_cmd = CMD_ENTROPY;
1073 
1074         /* number of mbs to be processed in the current job */
1075         s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1076 
1077         /* job start index x */
1078         s_job.i2_mb_x = 0;
1079 
1080         /* job start index y */
1081         s_job.i2_mb_y = ps_proc->i4_mb_y;
1082 
1083         /* proc base idx */
1084         s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
1085 
1086         /* queue the job */
1087         error_status = ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1088         if(error_status != IH264_SUCCESS)
1089         {
1090             return error_status;
1091         }
1092         if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1093             ih264_list_terminate(ps_codec->pv_entropy_jobq);
1094     }
1095 
1096     /* update intra cost if valid */
1097     if (ps_proc->i4_mb_intra_cost != INT_MAX)
1098     {
1099         ps_codec->pi4_mb_intra_cost[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->i4_mb_intra_cost;
1100     }
1101 
1102     /* update proc map */
1103     pu1_proc_map[i4_mb_x] = 1;
1104 
1105     /**************************************************/
1106     /* update proc ctxt elements for encoding next mb */
1107     /**************************************************/
1108     /* update indices */
1109     i4_mb_x ++;
1110     ps_proc->i4_mb_x = i4_mb_x;
1111 
1112     if (ps_proc->i4_mb_x == i4_wd_mbs)
1113     {
1114         ps_proc->i4_mb_y++;
1115         ps_proc->i4_mb_x = 0;
1116     }
1117 
1118     /* update slice index */
1119     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1120 
1121     /* update buffers pointers */
1122     ps_proc->pu1_src_buf_luma += MB_SIZE;
1123     ps_proc->pu1_rec_buf_luma += MB_SIZE;
1124     ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1125     ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1126 
1127     /*
1128      * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1129      * the stride per MB is MB_SIZE
1130      */
1131     ps_proc->pu1_src_buf_chroma += MB_SIZE;
1132     ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1133     ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1134     ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1135 
1136     /* Reset cost, distortion params */
1137     ps_proc->i4_mb_cost = INT_MAX;
1138     ps_proc->i4_mb_intra_cost = INT_MAX;
1139     ps_proc->i4_mb_distortion = SHRT_MAX;
1140 
1141     ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1142 
1143     ps_proc->pu4_mb_pu_cnt += 1;
1144 
1145     /* Update colocated pu */
1146     if (ps_proc->i4_slice_type == BSLICE)
1147         ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt +  (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1148 
1149     /* deblk ctxts */
1150     if (ps_proc->u4_disable_deblock_level != 1)
1151     {
1152         /* indices */
1153         ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1154         ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1155 
1156 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1157         ps_deblk->i4_mb_x ++;
1158 
1159         ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1160         /*
1161          * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1162          * the stride per MB is MB_SIZE
1163          */
1164         ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1165 #endif
1166     }
1167 
1168     return error_status;
1169 }
1170 
1171 /**
1172 *******************************************************************************
1173 *
1174 * @brief   initialize process context.
1175 *
1176 * @par Description:
1177 *  Before dispatching the current job to process thread, the process context
1178 *  associated with the job is initialized. Usually every job aims to encode one
1179 *  row of mb's. Basing on the row indices provided by the job, the process
1180 *  context's buffer ptrs, slice indices and other elements that are necessary
1181 *  during core-coding are initialized.
1182 *
1183 * @param[in] ps_proc
1184 *  Pointer to the current process context
1185 *
1186 * @returns error status
1187 *
1188 * @remarks none
1189 *
1190 *******************************************************************************
1191 */
ih264e_init_proc_ctxt(process_ctxt_t * ps_proc)1192 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1193 {
1194     /* codec context */
1195     codec_t *ps_codec = ps_proc->ps_codec;
1196 
1197     /* nmb processing context*/
1198     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1199 
1200     /* indices */
1201     WORD32 i4_mb_x, i4_mb_y;
1202 
1203     /* strides */
1204     WORD32 i4_src_strd = ps_proc->i4_src_strd;
1205     WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1206     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1207 
1208     /* quant params */
1209     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1210 
1211     /* deblk ctxt */
1212     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1213 
1214     /* deblk bs context */
1215     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1216 
1217     /* Pointer to mv_buffer of current frame */
1218     mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1219 
1220     /* Pointers for color space conversion */
1221     UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1222 
1223     /* Pad the MB to support non standard sizes */
1224     UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1225     UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1226     UWORD16 u2_num_rows = MB_SIZE;
1227     WORD32 convert_uv_only;
1228 
1229     /********************************************************************/
1230     /*                            BEGIN INIT                            */
1231     /********************************************************************/
1232 
1233     i4_mb_x = ps_proc->i4_mb_x;
1234     i4_mb_y = ps_proc->i4_mb_y;
1235 
1236     /* Number of mbs processed in one loop of process function */
1237     ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
1238     ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
1239 
1240     /* init buffer pointers */
1241     convert_uv_only = 1;
1242     if (u4_pad_bottom_sz || u4_pad_right_sz ||
1243         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1244         ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR ||
1245         ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1246     {
1247         if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1248             u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1249         ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1250         i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1251         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1252         convert_uv_only = 0;
1253     }
1254     else
1255     {
1256         i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1257         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1258     }
1259 
1260 
1261     if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1262         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1263         ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
1264         ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR ||
1265         u4_pad_bottom_sz || u4_pad_right_sz)
1266     {
1267         if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1268             (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1269             ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1270 
1271         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1272         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1273     }
1274     else
1275     {
1276         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1277         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1278     }
1279 
1280     ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1281     ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1282 
1283     /* Temporal back and forward reference buffer */
1284     ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1285     ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1286     ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1287     ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1288 
1289     /*
1290      * Do color space conversion
1291      * NOTE : We assume there that the number of MB's to process will not span multiple rows
1292      */
1293     switch (ps_codec->s_cfg.e_inp_color_fmt)
1294     {
1295         case IV_YUV_420SP_UV:
1296         case IV_YUV_420SP_VU:
1297             /* In case of 420 semi-planar input, copy last few rows to intermediate
1298                buffer as few SIMD functions access upto 16 more bytes.
1299                This data will be padded if required */
1300             if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
1301             {
1302                 WORD32 num_rows = MB_SIZE;
1303                 UWORD8 *pu1_src;
1304                 UWORD8 *pu1_dst;
1305                 WORD32 i;
1306                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1307                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1308 
1309                 pu1_dst = ps_proc->pu1_src_buf_luma;
1310 
1311                 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1312                     num_rows = MB_SIZE - u4_pad_bottom_sz;
1313                 for (i = 0; i < num_rows; i++)
1314                 {
1315                     memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1316                     pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1317                     pu1_dst += ps_proc->i4_src_strd;
1318                 }
1319                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1320                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1321                 pu1_dst = ps_proc->pu1_src_buf_chroma;
1322 
1323                 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1324                  * due to interleaved input
1325                  */
1326                 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1327                     num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1328                 else
1329                     num_rows = BLK8x8SIZE;
1330                 for (i = 0; i < num_rows; i++)
1331                 {
1332                     memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1333                     pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1334                     pu1_dst += ps_proc->i4_src_chroma_strd;
1335                 }
1336 
1337             }
1338             break;
1339 
1340         case IV_YUV_420P :
1341             pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1342                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1343 
1344             pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1345                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1346 
1347             pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1348                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1349 
1350             ps_codec->pf_ih264e_conv_420p_to_420sp(
1351                             pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1352                             ps_proc->pu1_src_buf_luma,
1353                             ps_proc->pu1_src_buf_chroma, u2_num_rows,
1354                             ps_codec->s_cfg.u4_disp_wd,
1355                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1356                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1357                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1358                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1359                             convert_uv_only);
1360             break;
1361 
1362         case IV_YUV_422ILE :
1363             pu1_y_buf_base =  (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1364                               + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1365 
1366             ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1367                             ps_proc->pu1_src_buf_luma,
1368                             ps_proc->pu1_src_buf_chroma,
1369                             ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1370                             ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1371                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1372                             ps_proc->i4_src_chroma_strd,
1373                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1374             break;
1375 
1376         default:
1377             break;
1378     }
1379 
1380     if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
1381     {
1382         UWORD32 u4_pad_wd, u4_pad_ht;
1383         u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1384         u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1385         u4_pad_ht = MB_SIZE;
1386         if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1387             u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1388 
1389         ih264_pad_right_luma(
1390                         ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1391                         ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1392 
1393         ih264_pad_right_chroma(
1394                         ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1395                         ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1396     }
1397 
1398     if (ps_proc->i4_mb_y && ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) {
1399         UWORD8 *pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] +
1400                         ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE) -
1401                         ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1402         UWORD8 *pu1_dst = ps_proc->pu1_src_buf_luma - ps_proc->i4_src_strd;
1403         memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1404         if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0)) {
1405             pu1_dst += ps_codec->s_cfg.u4_disp_wd;
1406             memset(pu1_dst, pu1_dst[-1], u4_pad_right_sz);
1407         }
1408     }
1409 
1410     /* pad bottom edge */
1411     if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1412     {
1413         ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1414                          ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1415 
1416         ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1417                          ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1418     }
1419 
1420 
1421     /* packed mb coeff data */
1422     ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1423 
1424     /* packed mb header data */
1425     ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1426 
1427     /* slice index */
1428     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1429 
1430     /*********************************************************************/
1431     /* ih264e_init_quant_params() routine is called at the pic init level*/
1432     /* this would have initialized the qp.                               */
1433     /* TODO_LATER: currently it is assumed that quant params donot change*/
1434     /* across mb's. When they do calculate update ps_qp_params accordingly*/
1435     /*********************************************************************/
1436 
1437     /* init mv buffer ptr */
1438     ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1439                      ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1440 
1441     /* Init co-located mv buffer */
1442     ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1443                         ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1444 
1445     if (i4_mb_y == 0)
1446     {
1447         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1448     }
1449     else
1450     {
1451         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
1452                                     ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1453     }
1454 
1455     ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1456 
1457     /* mb type */
1458     ps_proc->u4_mb_type = I16x16;
1459 
1460     /* lambda */
1461     if (ps_codec->pic_type == PIC_B)
1462     {
1463         ps_proc->u4_lambda = gu1_qp_lambdaB[ps_qp_params->u1_mb_qp];
1464     }
1465     else
1466     {
1467         ps_proc->u4_lambda = gu1_qp_lambdaIP[ps_qp_params->u1_mb_qp];
1468     }
1469 
1470     /* mb distortion */
1471     ps_proc->i4_mb_distortion = SHRT_MAX;
1472 
1473     if (i4_mb_x == 0)
1474     {
1475         ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1476 
1477         ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1478 
1479         ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1480 
1481         if (i4_mb_y == 0)
1482         {
1483             memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1484         }
1485     }
1486 
1487     /* mb cost */
1488     ps_proc->i4_mb_cost = INT_MAX;
1489     ps_proc->i4_mb_intra_cost = INT_MAX;
1490 
1491     /**********************/
1492     /* init deblk context */
1493     /**********************/
1494     ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1495     /* deblk lags the current mb proc by 1 row */
1496     /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1497     /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1498     /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1499     ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1500 
1501     /* buffer ptrs */
1502     ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1503     ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1504 
1505     /* init deblk bs context */
1506     /* mb indices */
1507     ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1508     ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1509 
1510     /* init n_mb_process  context */
1511     ps_n_mb_ctxt->i4_mb_x = 0;
1512     ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1513     ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1514 
1515     return IH264E_SUCCESS;
1516 }
1517 
1518 /**
1519 *******************************************************************************
1520 *
1521 * @brief This function performs luma & chroma padding
1522 *
1523 * @par Description:
1524 *
1525 * @param[in] ps_proc
1526 *  Process context corresponding to the job
1527 *
1528 * @param[in] pu1_curr_pic_luma
1529 *  Pointer to luma buffer
1530 *
1531 * @param[in] pu1_curr_pic_chroma
1532 *  Pointer to chroma buffer
1533 *
1534 * @param[in] i4_mb_x
1535 *  mb index x
1536 *
1537 * @param[in] i4_mb_y
1538 *  mb index y
1539 *
1540 *  @param[in] i4_pad_ht
1541 *  number of rows to be padded
1542 *
1543 * @returns  error status
1544 *
1545 * @remarks none
1546 *
1547 *******************************************************************************
1548 */
ih264e_pad_recon_buffer(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y,WORD32 i4_pad_ht)1549 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1550                                        UWORD8 *pu1_curr_pic_luma,
1551                                        UWORD8 *pu1_curr_pic_chroma,
1552                                        WORD32 i4_mb_x,
1553                                        WORD32 i4_mb_y,
1554                                        WORD32 i4_pad_ht)
1555 {
1556     /* codec context */
1557     codec_t *ps_codec = ps_proc->ps_codec;
1558 
1559     /* strides */
1560     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1561 
1562     if (i4_mb_x == 0)
1563     {
1564         /* padding left luma */
1565         ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1566 
1567         /* padding left chroma */
1568         ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1569     }
1570     if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1571     {
1572         /* padding right luma */
1573         ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1574 
1575         /* padding right chroma */
1576         ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1577 
1578         if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1579         {
1580             UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1581             UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1582 
1583             /* padding bottom luma */
1584             ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1585 
1586             /* padding bottom chroma */
1587             ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1588         }
1589     }
1590 
1591     if (i4_mb_y == 0)
1592     {
1593         UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1594         UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1595         WORD32 wd = MB_SIZE;
1596 
1597         if (i4_mb_x == 0)
1598         {
1599             pu1_rec_luma -= PAD_LEFT;
1600             pu1_rec_chroma -= PAD_LEFT;
1601 
1602             wd += PAD_LEFT;
1603         }
1604         if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1605         {
1606             wd += PAD_RIGHT;
1607         }
1608 
1609         /* padding top luma */
1610         ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1611 
1612         /* padding top chroma */
1613         ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1614     }
1615 
1616     return IH264E_SUCCESS;
1617 }
1618 
1619 /**
1620 *******************************************************************************
1621 *
1622 * @brief This function performs deblocking, padding and halfpel generation for
1623 *  'n' MBs
1624 *
1625 * @par Description:
1626 *
1627 * @param[in] ps_proc
1628 *  Process context corresponding to the job
1629 *
1630 * @param[in] pu1_curr_pic_luma
1631 * Current MB being processed(Luma)
1632 *
1633 * @param[in] pu1_curr_pic_chroma
1634 * Current MB being processed(Chroma)
1635 *
1636 * @param[in] i4_mb_x
1637 * Column value of current MB processed
1638 *
1639 * @param[in] i4_mb_y
1640 * Curent row processed
1641 *
1642 * @returns  error status
1643 *
1644 * @remarks none
1645 *
1646 *******************************************************************************
1647 */
ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y)1648 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1649                                                      UWORD8 *pu1_curr_pic_luma,
1650                                                      UWORD8 *pu1_curr_pic_chroma,
1651                                                      WORD32 i4_mb_x,
1652                                                      WORD32 i4_mb_y)
1653 {
1654     /* codec context */
1655     codec_t *ps_codec = ps_proc->ps_codec;
1656 
1657     /* n_mb processing context */
1658     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1659 
1660     /* deblk context */
1661     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1662 
1663     /* strides */
1664     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1665 
1666     /* loop variables */
1667     WORD32 row, i, j, col;
1668 
1669     /* Padding Width */
1670     UWORD32 u4_pad_wd;
1671 
1672     /* deblk_map of the row being deblocked */
1673     UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1674 
1675     /* deblk_map_previous row */
1676     UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1677 
1678     WORD32 u4_pad_top = 0;
1679 
1680     WORD32 u4_deblk_prev_row = 0;
1681 
1682     /* Number of mbs to be processed */
1683     WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1684 
1685     /* Number of mbs  actually processed
1686      * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1687     WORD32 i4_n_mb_process_count = 0;
1688 
1689     UWORD8 *pu1_pad_bottom_src = NULL;
1690 
1691     UWORD8 *pu1_pad_src_luma = NULL;
1692     UWORD8 *pu1_pad_src_chroma = NULL;
1693 
1694     if (ps_proc->u4_disable_deblock_level == 1)
1695     {
1696         /* If left most MB is processed, then pad left */
1697         if (i4_mb_x == 0)
1698         {
1699             /* padding left luma */
1700             ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1701 
1702             /* padding left chroma */
1703             ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1704         }
1705         /*last col*/
1706         if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1707         {
1708             /* padding right luma */
1709             ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1710 
1711             /* padding right chroma */
1712             ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1713         }
1714     }
1715 
1716     if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1717     {
1718         /* if number of mb's to be processed are less than 'N', go back.
1719          * exception to the above clause is end of row */
1720         if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1721         {
1722             return IH264E_SUCCESS;
1723         }
1724         else
1725         {
1726             i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1727 
1728             /* performing deblocking for required number of MBs */
1729             if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1730             {
1731                 u4_deblk_prev_row = 1;
1732 
1733                 /* checking whether the top rows are deblocked */
1734                 for (col = 0; col < i4_n_mb_process_count; col++)
1735                 {
1736                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1737                 }
1738 
1739                 /* checking whether the top right MB is deblocked */
1740                 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1741                 {
1742                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1743                 }
1744 
1745                 /* Top or Top right MBs not deblocked */
1746                 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1747                 {
1748                     return IH264E_SUCCESS;
1749                 }
1750 
1751                 for (row = 0; row < i4_n_mb_process_count; row++)
1752                 {
1753                     ih264e_deblock_mb(ps_proc, ps_deblk);
1754 
1755                     pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1756 
1757                     if (ps_deblk->i4_mb_y > 0)
1758                     {
1759                         if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1760                         {
1761                             /* padding left luma */
1762                             ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1763 
1764                             /* padding left chroma */
1765                             ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1766                         }
1767 
1768                         if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1769                         {
1770                             /* padding right luma */
1771                             ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1772 
1773                             /* padding right chroma */
1774                             ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1775                         }
1776                     }
1777                     ps_deblk->i4_mb_x++;
1778 
1779                     ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1780                     ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1781 
1782                 }
1783             }
1784             else if(i4_mb_y > 0)
1785             {
1786                 ps_deblk->i4_mb_x += i4_n_mb_process_count;
1787 
1788                 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1789                 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1790             }
1791 
1792             if (i4_mb_y == 2)
1793             {
1794                 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1795                 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1796 
1797                 if (ps_n_mb_ctxt->i4_mb_x == 0)
1798                 {
1799                     u4_pad_wd += PAD_LEFT;
1800                     u4_pad_top = -PAD_LEFT;
1801                 }
1802 
1803                 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1804                 {
1805                     u4_pad_wd += PAD_RIGHT;
1806                 }
1807 
1808                 /* padding top luma */
1809                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1810 
1811                 /* padding top chroma */
1812                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1813             }
1814 
1815             ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1816 
1817             if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1818             {
1819                 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1820                 {
1821                     /* Bottom Padding is done in one stretch for the entire width */
1822                     if (ps_proc->u4_disable_deblock_level != 1)
1823                     {
1824                         ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1825 
1826                         ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1827 
1828                         ps_n_mb_ctxt->i4_mb_x = 0;
1829                         ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1830                         ps_deblk->i4_mb_x = 0;
1831                         ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1832 
1833                         /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1834                         ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1835 
1836                         i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1837 
1838                         j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1839 
1840                         for (i = 0; i < j; i++)
1841                         {
1842                             for (col = 0; col < i4_n_mbs; col++)
1843                             {
1844                                 ih264e_deblock_mb(ps_proc, ps_deblk);
1845 
1846                                 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1847 
1848                                 ps_deblk->i4_mb_x++;
1849                                 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1850                                 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1851                                 ps_n_mb_ctxt->i4_mb_x++;
1852                             }
1853                         }
1854 
1855                         for (col = 0; col < i4_n_mb_process_count; col++)
1856                         {
1857                             ih264e_deblock_mb(ps_proc, ps_deblk);
1858 
1859                             pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1860 
1861                             ps_deblk->i4_mb_x++;
1862                             ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1863                             ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1864                             ps_n_mb_ctxt->i4_mb_x++;
1865                         }
1866 
1867                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1868 
1869                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1870 
1871                         /* padding left luma */
1872                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1873 
1874                         /* padding left chroma */
1875                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1876 
1877                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1878                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1879 
1880                         /* padding left luma */
1881                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1882 
1883                         /* padding left chroma */
1884                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1885 
1886                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1887 
1888                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1889 
1890                         /* padding right luma */
1891                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1892 
1893                         /* padding right chroma */
1894                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1895 
1896                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1897                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1898 
1899                         /* padding right luma */
1900                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1901 
1902                         /* padding right chroma */
1903                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1904 
1905                     }
1906 
1907                     /* In case height is less than 2 MBs pad top */
1908                     if (ps_proc->i4_ht_mbs <= 2)
1909                     {
1910                         UWORD8 *pu1_pad_top_src;
1911                         /* padding top luma */
1912                         pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1913                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1914 
1915                         /* padding top chroma */
1916                         pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1917                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1918                     }
1919 
1920                     /* padding bottom luma */
1921                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1922                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1923 
1924                     /* padding bottom chroma */
1925                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1926                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1927                 }
1928             }
1929         }
1930     }
1931 
1932     return IH264E_SUCCESS;
1933 }
1934 
1935 
1936 /**
1937 *******************************************************************************
1938 *
1939 * @brief This function performs luma & chroma encoding for a set of mb's.
1940 *
1941 * @par Description:
1942 *  The mb to be coded is taken and is evaluated over a predefined set of modes
1943 *  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1944 *  is selected and using intra/inter prediction filters, prediction is carried out.
1945 *  The deviation between src and pred signal constitutes error signal. This error
1946 *  signal is transformed (hierarchical transform if necessary) and quantized. The
1947 *  quantized residue is packed in to entropy buffer for entropy coding. This is
1948 *  repeated for all the mb's enlisted under the job.
1949 *
1950 * @param[in] ps_proc
1951 *  Process context corresponding to the job
1952 *
1953 * @returns  error status
1954 *
1955 * @remarks none
1956 *
1957 *******************************************************************************
1958 */
ih264e_process(process_ctxt_t * ps_proc)1959 WORD32 ih264e_process(process_ctxt_t *ps_proc)
1960 {
1961     /* error status */
1962     WORD32 error_status = IH264_SUCCESS;
1963 
1964     /* codec context */
1965     codec_t *ps_codec = ps_proc->ps_codec;
1966 
1967     /* cbp luma, chroma */
1968     UWORD32 u4_cbp_l, u4_cbp_c;
1969 
1970     /* width in mbs */
1971     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1972 
1973     /* loop var */
1974     WORD32  i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1975 
1976     /* valid modes */
1977     UWORD32 u4_valid_modes = 0;
1978 
1979     /* gate threshold */
1980     WORD32 i4_gate_threshold = 0;
1981 
1982     /* is intra */
1983     WORD32 luma_idx, chroma_idx, is_intra;
1984 
1985     /* temp variables */
1986     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
1987 
1988     /*
1989      * list of modes for evaluation
1990      * -------------------------------------------------------------------------
1991      * Note on enabling I4x4 and I16x16
1992      * At very low QP's the hadamard transform in I16x16 will push up the maximum
1993      * coeff value very high. CAVLC may not be able to represent the value and
1994      * hence the stream may not be decodable in some clips.
1995      * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
1996      */
1997     if (ps_proc->i4_slice_type == ISLICE)
1998     {
1999         if (ps_proc->u4_frame_qp > 10)
2000         {
2001             /* enable intra 16x16 */
2002             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2003 
2004             /* enable intra 8x8 */
2005             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
2006         }
2007 
2008         /* enable intra 4x4 */
2009         u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2010         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2011 
2012     }
2013     else if (ps_proc->i4_slice_type == PSLICE)
2014     {
2015         if (ps_proc->u4_frame_qp > 10)
2016         {
2017             /* enable intra 16x16 */
2018             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2019         }
2020 
2021         /* enable intra 4x4 */
2022         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2023         {
2024             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2025         }
2026         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2027 
2028         /* enable inter P16x16 */
2029         u4_valid_modes |= (1 << P16x16);
2030     }
2031     else if (ps_proc->i4_slice_type == BSLICE)
2032     {
2033         if (ps_proc->u4_frame_qp > 10)
2034         {
2035             /* enable intra 16x16 */
2036             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2037         }
2038 
2039         /* enable intra 4x4 */
2040         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2041         {
2042             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2043         }
2044         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2045 
2046         /* enable inter B16x16 */
2047         u4_valid_modes |= (1 << B16x16);
2048     }
2049 
2050     /* init entropy */
2051     ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
2052     ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
2053     ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
2054 
2055     /* compute recon when :
2056      *   1. current frame is to be used as a reference
2057      *   2. dump recon for bit stream sanity check
2058      */
2059     ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
2060                                 ps_codec->s_cfg.u4_enable_recon ||
2061                                 ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR;
2062 
2063     /* Encode 'n' macroblocks,
2064      * 'n' being the number of mbs dictated by current proc ctxt */
2065     for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
2066     {
2067         /* since we have not yet found sad, we have not yet got min sad */
2068         /* we need to initialize these variables for each MB */
2069         /* TODO how to get the min sad into the codec */
2070         ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
2071         ps_proc->u4_min_sad_reached = 0;
2072 
2073         /* mb analysis */
2074         {
2075             /* temp var */
2076             WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
2077 
2078             /* force intra refresh ? */
2079             WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
2080                             (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
2081 
2082             /* evaluate inter 16x16 modes */
2083             if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
2084             {
2085                 /* compute nmb me */
2086                 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
2087                 {
2088                     ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
2089                                                        i4_wd_mbs - ps_proc->i4_mb_x));
2090                 }
2091 
2092                 /* set pointers to ME data appropriately for other modules to use */
2093                 {
2094                     UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
2095 
2096                     /* get the min sad condition for current mb */
2097                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2098                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2099                     ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2100                     ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2101                     ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2102 
2103                     ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2104                     ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2105                     ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2106 
2107                     /* get the best sub pel buffer */
2108                     ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2109                     ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2110                 }
2111                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2112             }
2113             else
2114             {
2115                 /* Derive neighbor availability for the current macroblock */
2116                 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2117 
2118                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2119             }
2120 
2121             /*
2122              * If air says intra, we need to force the following code path to evaluate intra
2123              * The easy way is just to say that the inter cost is too much
2124              */
2125             if (!i4_air_enable_inter)
2126             {
2127                 ps_proc->u4_min_sad_reached = 0;
2128                 ps_proc->i4_mb_cost = INT_MAX;
2129                 ps_proc->i4_mb_distortion = INT_MAX;
2130             }
2131             else if (ps_proc->u4_mb_type == PSKIP)
2132             {
2133                 goto UPDATE_MB_INFO;
2134             }
2135 
2136             /* wait until the proc of [top + 1] mb is computed.
2137              * We wait till the proc dependencies are satisfied */
2138              if(ps_proc->i4_mb_y > 0)
2139              {
2140                 /* proc map */
2141                 UWORD8  *pu1_proc_map_top;
2142 
2143                 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2144 
2145                 while (1)
2146                 {
2147                     volatile UWORD8 *pu1_buf;
2148                     WORD32 idx = i4_mb_idx + 1;
2149 
2150                     idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2151                     pu1_buf =  pu1_proc_map_top + idx;
2152                     if(*pu1_buf)
2153                         break;
2154                     ithread_yield();
2155                 }
2156             }
2157 
2158             /* If we already have the minimum sad, there is no point in searching for sad again */
2159             if (ps_proc->u4_min_sad_reached == 0 || ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST)
2160             {
2161                 /* intra gating in inter slices */
2162                 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2163                 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2164                 {
2165                     /* distortion of neighboring blocks */
2166                     WORD32 i4_distortion[4];
2167 
2168                     i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2169 
2170                     i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2171 
2172                     i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2173 
2174                     i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2175 
2176                     i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2177 
2178                 }
2179 
2180 
2181                 /* If we are going to force intra we need to evaluate intra irrespective of gating */
2182                 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2183                 {
2184                     /* evaluate intra 4x4 modes */
2185                     if (u4_valid_modes & (1 << I4x4))
2186                     {
2187                         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2188                         {
2189                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2190                         }
2191                         else
2192                         {
2193                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2194                         }
2195                     }
2196 
2197                     /* evaluate intra 16x16 modes */
2198                     if (u4_valid_modes & (1 << I16x16))
2199                     {
2200                         ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2201                     }
2202 
2203                     /* evaluate intra 8x8 modes */
2204                     if (u4_valid_modes & (1 << I8x8))
2205                     {
2206                         ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2207                     }
2208 
2209                 }
2210             }
2211         }
2212 
2213         /* is intra */
2214         if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2215         {
2216             luma_idx = ps_proc->u4_mb_type;
2217             chroma_idx = 0;
2218             is_intra = 1;
2219 
2220             /* evaluate chroma blocks for intra */
2221             ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2222         }
2223         else
2224         {
2225             luma_idx = 3;
2226             chroma_idx = 1;
2227             is_intra = 0;
2228         }
2229         ps_proc->u4_is_intra = is_intra;
2230         ps_proc->ps_pu->b1_intra_flag = is_intra;
2231 
2232         /* redo MV pred of neighbors in the case intra mb */
2233         /* TODO : currently called unconditionally, needs to be called only in the case of intra
2234          * to modify neighbors */
2235         if (ps_proc->i4_slice_type != ISLICE)
2236         {
2237             ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2238         }
2239 
2240         /* Perform luma mb core coding */
2241         u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2242 
2243         /* Perform luma mb core coding */
2244         u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2245 
2246         /* coded block pattern */
2247         ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2248 
2249         if (!ps_proc->u4_is_intra)
2250         {
2251             if (ps_proc->i4_slice_type == BSLICE)
2252             {
2253                 if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2254                 {
2255                     ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2256                 }
2257             }
2258             else if(!ps_proc->u4_cbp)
2259             {
2260                 if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2261                 {
2262                     ps_proc->u4_mb_type = PSKIP;
2263                 }
2264             }
2265         }
2266 
2267 UPDATE_MB_INFO:
2268 
2269         /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2270         ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2271 
2272         /**********************************************************************/
2273         /* if disable deblock level is '0' this implies enable deblocking for */
2274         /* all edges of all macroblocks with out any restrictions             */
2275         /*                                                                    */
2276         /* if disable deblock level is '1' this implies disable deblocking for*/
2277         /* all edges of all macroblocks with out any restrictions             */
2278         /*                                                                    */
2279         /* if disable deblock level is '2' this implies enable deblocking for */
2280         /* all edges of all macroblocks except edges overlapping with slice   */
2281         /* boundaries. This option is not currently supported by the encoder  */
2282         /* hence the slice map should be of no significance to perform debloc */
2283         /* king                                                               */
2284         /**********************************************************************/
2285 
2286         if (ps_proc->u4_compute_recon)
2287         {
2288             /* deblk context */
2289             /* src pointers */
2290             UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2291             UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2292 
2293             /* src indices */
2294             UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2295             UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2296 
2297             /* compute blocking strength */
2298             if (ps_proc->u4_disable_deblock_level != 1)
2299             {
2300                 ih264e_compute_bs(ps_proc);
2301             }
2302 
2303             /* nmb deblocking and hpel and padding */
2304             ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2305                                                   pu1_cur_pic_chroma, i4_mb_x,
2306                                                   i4_mb_y);
2307         }
2308 
2309         /* update the context after for coding next mb */
2310         error_status = ih264e_update_proc_ctxt(ps_proc);
2311         if(error_status != IH264E_SUCCESS)
2312         {
2313             return error_status;
2314         }
2315         /* Once the last row is processed, mark the buffer status appropriately */
2316         if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2317         {
2318             /* Pointer to current picture buffer structure */
2319             pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2320 
2321             /* Pointer to current picture's mv buffer structure */
2322             mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2323 
2324             /**********************************************************************/
2325             /* if disable deblock level is '0' this implies enable deblocking for */
2326             /* all edges of all macroblocks with out any restrictions             */
2327             /*                                                                    */
2328             /* if disable deblock level is '1' this implies disable deblocking for*/
2329             /* all edges of all macroblocks with out any restrictions             */
2330             /*                                                                    */
2331             /* if disable deblock level is '2' this implies enable deblocking for */
2332             /* all edges of all macroblocks except edges overlapping with slice   */
2333             /* boundaries. This option is not currently supported by the encoder  */
2334             /* hence the slice map should be of no significance to perform debloc */
2335             /* king                                                               */
2336             /**********************************************************************/
2337             error_status = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr,
2338                                                 ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2339             if(error_status != IH264E_SUCCESS)
2340             {
2341                 return error_status;
2342             }
2343             error_status = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr,
2344                                                 ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2345             if(error_status != IH264E_SUCCESS)
2346             {
2347                 return error_status;
2348             }
2349             if (ps_codec->s_cfg.u4_enable_recon)
2350             {
2351                 /* pic cnt */
2352                 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2353 
2354                 /* rec buffers */
2355                 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf  = *ps_proc->ps_cur_pic;
2356 
2357                 /* is last? */
2358                 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2359 
2360                 /* frame time stamp */
2361                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2362                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2363             }
2364 
2365         }
2366     }
2367 
2368     DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2369 
2370     return error_status;
2371 }
2372 
2373 /**
2374 *******************************************************************************
2375 *
2376 * @brief
2377 *  Function to update rc context after encoding
2378 *
2379 * @par   Description
2380 *  This function updates the rate control context after the frame is encoded.
2381 *  Number of bits consumed by the current frame, frame distortion, frame cost,
2382 *  number of intra/inter mb's, ... are passed on to rate control context for
2383 *  updating the rc model.
2384 *
2385 * @param[in] ps_codec
2386 *  Handle to codec context
2387 *
2388 * @param[in] ctxt_sel
2389 *  frame context selector
2390 *
2391 * @param[in] pic_cnt
2392 *  pic count
2393 *
2394 * @returns i4_stuffing_byte
2395 *  number of stuffing bytes (if necessary)
2396 *
2397 * @remarks
2398 *
2399 *******************************************************************************
2400 */
ih264e_update_rc_post_enc(codec_t * ps_codec,WORD32 ctxt_sel,WORD32 i4_is_first_frm)2401 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2402 {
2403     /* proc set base idx */
2404     WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2405 
2406     /* proc ctxt */
2407     process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2408 
2409     /* entropy context */
2410     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
2411 
2412     /* Bitstream structure */
2413     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
2414 
2415     /* frame qp */
2416     UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2417 
2418     /* cbr rc return status */
2419     WORD32 i4_stuffing_byte = 0;
2420 
2421     /* current frame stats */
2422     frame_info_t s_frame_info;
2423     picture_type_e rc_pic_type;
2424 
2425     /* temp var */
2426     WORD32 i, j;
2427 
2428     /********************************************************************/
2429     /*                            BEGIN INIT                            */
2430     /********************************************************************/
2431 
2432     /* init frame info */
2433     irc_init_frame_info(&s_frame_info);
2434 
2435     /* get frame info */
2436     for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2437     {
2438         /*****************************************************************/
2439         /* One frame can be encoded by max of u4_num_cores threads       */
2440         /* Accumulating the num mbs, sad, qp and intra_mb_cost from      */
2441         /* u4_num_cores threads                                          */
2442         /*****************************************************************/
2443         for (j = 0; j< MAX_MB_TYPE; j++)
2444         {
2445             s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2446 
2447             s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2448 
2449             s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2450         }
2451 
2452         s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2453 
2454         s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2455 
2456         /*****************************************************************/
2457         /* gather number of residue and header bits consumed by the frame*/
2458         /*****************************************************************/
2459         ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2460     }
2461 
2462     /* get pic type */
2463     switch (ps_codec->pic_type)
2464     {
2465         case PIC_I:
2466         case PIC_IDR:
2467             rc_pic_type = I_PIC;
2468             break;
2469         case PIC_P:
2470             rc_pic_type = P_PIC;
2471             break;
2472         case PIC_B:
2473             rc_pic_type = B_PIC;
2474             break;
2475         default:
2476             assert(0);
2477             break;
2478     }
2479 
2480     /* update rc lib with current frame stats */
2481     i4_stuffing_byte =  ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2482                                           &(s_frame_info),
2483                                           ps_codec->s_rate_control.pps_pd_frm_rate,
2484                                           ps_codec->s_rate_control.pps_time_stamp,
2485                                           ps_codec->s_rate_control.pps_frame_time,
2486                                           (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2487                                           &rc_pic_type,
2488                                           i4_is_first_frm,
2489                                           &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2490                                           u1_frame_qp,
2491                                           &ps_codec->s_rate_control.num_intra_in_prev_frame,
2492                                           &ps_codec->s_rate_control.i4_avg_activity);
2493 
2494     /* cbr rc - house keeping */
2495     if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
2496     {
2497          ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
2498          // If an IDR frame was skipped, restore frame num and IDR pic id
2499          if (ps_codec->u4_is_idr == 1)
2500          {
2501              ps_codec->i4_frame_num = ps_codec->i4_restore_frame_num;
2502              ps_codec->i4_idr_pic_id--;
2503          }
2504     }
2505     else if (i4_stuffing_byte)
2506     {
2507         /* add filler nal units */
2508         ps_entropy->i4_error_code = ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuffing_byte);
2509     }
2510 
2511     /*
2512      * Frame number is to be incremented only if the current frame is a
2513      * reference frame. After each successful frame encode, we increment
2514      * frame number by 1
2515      */
2516     if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
2517                     && ps_codec->u4_is_curr_frm_ref)
2518     {
2519         ps_codec->i4_frame_num++;
2520     }
2521     /********************************************************************/
2522     /*      signal the output                                           */
2523     /********************************************************************/
2524     ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
2525                     ps_entropy->ps_bitstrm->u4_strm_buf_offset;
2526 
2527     return ps_entropy->i4_error_code;
2528 }
2529 
2530 /**
2531 *******************************************************************************
2532 *
2533 * @brief
2534 *  entry point of a spawned encoder thread
2535 *
2536 * @par Description:
2537 *  The encoder thread dequeues a proc/entropy job from the encoder queue and
2538 *  calls necessary routines.
2539 *
2540 * @param[in] pv_proc
2541 *  Process context corresponding to the thread
2542 *
2543 * @returns  error status
2544 *
2545 * @remarks
2546 *
2547 *******************************************************************************
2548 */
ih264e_process_thread(void * pv_proc)2549 WORD32 ih264e_process_thread(void *pv_proc)
2550 {
2551     /* error status */
2552     IH264_ERROR_T ret = IH264_SUCCESS;
2553     WORD32 error_status = IH264_SUCCESS;
2554 
2555     /* proc ctxt */
2556     process_ctxt_t *ps_proc = pv_proc;
2557 
2558     /* codec ctxt */
2559     codec_t *ps_codec = ps_proc->ps_codec;
2560 
2561     /* structure to represent a processing job entry */
2562     job_t s_job;
2563 
2564     /* blocking call : entropy dequeue is non-blocking till all
2565      * the proc jobs are processed */
2566     WORD32 is_blocking = 0;
2567 
2568     /* codec context selector */
2569     WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2570 
2571     /* set affinity */
2572     ithread_set_affinity(ps_proc->i4_id);
2573 
2574     ps_proc->i4_error_code = IH264_SUCCESS;
2575     while(1)
2576     {
2577         /* dequeue a job from the entropy queue */
2578         {
2579             int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2580 
2581             volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2582 
2583             /* have the lock */
2584             if (error == 0)
2585             {
2586                 if (*pu4_buf == 0)
2587                 {
2588                     /* no entropy threads are active, try dequeuing a job from the entropy queue */
2589                     ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2590                     if (IH264_SUCCESS == ret)
2591                     {
2592                         *pu4_buf = 1;
2593                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2594                         goto WORKER;
2595                     }
2596                     else if(is_blocking)
2597                     {
2598                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2599                         break;
2600                     }
2601                 }
2602                 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2603             }
2604         }
2605 
2606         /* dequeue a job from the process queue */
2607         ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2608         if (IH264_SUCCESS != ret)
2609         {
2610             if(ps_proc->i4_id)
2611                 break;
2612             else
2613             {
2614                 is_blocking = 1;
2615                 continue;
2616             }
2617         }
2618 
2619 WORKER:
2620         /* choose appropriate proc context based on proc_base_idx */
2621         ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2622 
2623         switch (s_job.i4_cmd)
2624         {
2625             case CMD_PROCESS:
2626                 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2627                 ps_proc->i4_mb_x = s_job.i2_mb_x;
2628                 ps_proc->i4_mb_y = s_job.i2_mb_y;
2629 
2630                 /* init process context */
2631                 ih264e_init_proc_ctxt(ps_proc);
2632 
2633                 /* core code all mbs enlisted under the current job */
2634                 error_status = ih264e_process(ps_proc);
2635                 if(error_status !=IH264_SUCCESS)
2636                 {
2637                     ps_proc->i4_error_code = error_status;
2638                     return ret;
2639                 }
2640                 break;
2641 
2642             case CMD_ENTROPY:
2643                 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2644                 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2645                 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2646 
2647                 /* init entropy */
2648                 ih264e_init_entropy_ctxt(ps_proc);
2649 
2650                 /* entropy code all mbs enlisted under the current job */
2651                 error_status = ih264e_entropy(ps_proc);
2652 
2653                 /* Dont execute any further instructions until store synchronization took place */
2654                 DATA_SYNC();
2655 
2656                 /* allow threads to dequeue entropy jobs */
2657                 ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
2658 
2659                 if (error_status != IH264_SUCCESS)
2660                 {
2661                     ps_proc->i4_error_code = error_status;
2662                     return ret;
2663                 }
2664                 break;
2665 
2666             default:
2667                 ps_proc->i4_error_code = IH264_FAIL;
2668                 return ret;
2669         }
2670     }
2671 
2672     return ret;
2673 }
2674