1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_process.c
25 *
26 * @brief
27 * Contains functions for codec thread
28 *
29 * @author
30 * ittiam
31 *
32 * @par List of Functions:
33 * - ih264e_generate_sps_pps
34 * - ih264e_init_entropy_ctxt
35 * - ih264e_entropy
36 * - ih264e_pack_header_data
37 * - ih264e_update_proc_ctxt
38 * - ih264e_init_proc_ctxt
39 * - ih264e_pad_recon_buffer
40 * - ih264e_dblk_pad_hpel_processing_n_mbs
41 * - ih264e_process
42 * - ih264e_update_rc_post_enc
43 * - ih264e_process_thread
44 *
45 * @remarks
46 * none
47 *
48 *******************************************************************************
49 */
50
51 /*****************************************************************************/
52 /* File Includes */
53 /*****************************************************************************/
54
55 /* System Include Files */
56 #include <stdio.h>
57 #include <stddef.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <limits.h>
61 #include <assert.h>
62
63 /* User Include Files */
64 #include "ih264e_config.h"
65 #include "ih264_typedefs.h"
66 #include "iv2.h"
67 #include "ive2.h"
68 #include "ithread.h"
69
70 #include "ih264_debug.h"
71 #include "ih264_macros.h"
72 #include "ih264_error.h"
73 #include "ih264_defs.h"
74 #include "ih264_mem_fns.h"
75 #include "ih264_padding.h"
76 #include "ih264_structs.h"
77 #include "ih264_trans_quant_itrans_iquant.h"
78 #include "ih264_inter_pred_filters.h"
79 #include "ih264_intra_pred_filters.h"
80 #include "ih264_deblk_edge_filters.h"
81 #include "ih264_common_tables.h"
82 #include "ih264_cavlc_tables.h"
83 #include "ih264_cabac_tables.h"
84 #include "ih264_buf_mgr.h"
85 #include "ih264_list.h"
86 #include "ih264_platform_macros.h"
87
88 #include "ime_defs.h"
89 #include "ime_distortion_metrics.h"
90 #include "ime_structs.h"
91 #include "ime_statistics.h"
92
93 #include "irc_mem_req_and_acq.h"
94 #include "irc_cntrl_param.h"
95 #include "irc_frame_info_collector.h"
96 #include "irc_rate_control_api.h"
97
98 #include "ih264e_error.h"
99 #include "ih264e_defs.h"
100 #include "ih264e_globals.h"
101 #include "ih264e_rate_control.h"
102 #include "ih264e_bitstream.h"
103 #include "ih264e_cabac_structs.h"
104 #include "ih264e_structs.h"
105 #include "ih264e_deblk.h"
106 #include "ih264e_encode_header.h"
107 #include "ih264e_utils.h"
108 #include "ih264e_me.h"
109 #include "ih264e_intra_modes_eval.h"
110 #include "ih264e_cavlc.h"
111 #include "ih264e_cabac.h"
112 #include "ih264e_master.h"
113 #include "ih264e_process.h"
114 #include "ih264e_trace.h"
115 #include "ih264e_statistics.h"
116 #include "ih264e_platform_macros.h"
117
118
119 /*****************************************************************************/
120 /* Function Definitions */
121 /*****************************************************************************/
122
123 /**
124 ******************************************************************************
125 *
126 * @brief This function generates sps, pps set on request
127 *
128 * @par Description
129 * When the encoder is set in header generation mode, the following function
130 * is called. This generates sps and pps headers and returns the control back
131 * to caller.
132 *
133 * @param[in] ps_codec
134 * pointer to codec context
135 *
136 * @return success or failure error code
137 *
138 ******************************************************************************
139 */
ih264e_generate_sps_pps(codec_t * ps_codec)140 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
141 {
142 /* choose between ping-pong process buffer set */
143 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
144
145 /* entropy ctxt */
146 entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
147
148 /* Bitstream structure */
149 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
150
151 /* sps */
152 sps_t *ps_sps = NULL;
153
154 /* pps */
155 pps_t *ps_pps = NULL;
156
157 /* output buff */
158 out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
159
160
161 /********************************************************************/
162 /* initialize the bit stream buffer */
163 /********************************************************************/
164 ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
165
166 /********************************************************************/
167 /* BEGIN HEADER GENERATION */
168 /********************************************************************/
169 /*ps_codec->i4_pps_id ++;*/
170 ps_codec->i4_pps_id %= MAX_PPS_CNT;
171
172 /*ps_codec->i4_sps_id ++;*/
173 ps_codec->i4_sps_id %= MAX_SPS_CNT;
174
175 /* populate sps header */
176 ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
177 ih264e_populate_sps(ps_codec, ps_sps);
178
179 /* populate pps header */
180 ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
181 ih264e_populate_pps(ps_codec, ps_pps);
182
183 ps_entropy->i4_error_code = IH264E_SUCCESS;
184
185 /* generate sps */
186 ps_entropy->i4_error_code = ih264e_generate_sps(ps_bitstrm, ps_sps,
187 &ps_codec->s_cfg.s_vui);
188 if(ps_entropy->i4_error_code != IH264E_SUCCESS)
189 {
190 return ps_entropy->i4_error_code;
191 }
192 /* generate pps */
193 ps_entropy->i4_error_code = ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
194
195 /* queue output buffer */
196 ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
197
198 return ps_entropy->i4_error_code;
199 }
200
201 /**
202 *******************************************************************************
203 *
204 * @brief initialize entropy context.
205 *
206 * @par Description:
207 * Before invoking the call to perform to entropy coding the entropy context
208 * associated with the job needs to be initialized. This involves the start
209 * mb address, end mb address, slice index and the pointer to location at
210 * which the mb residue info and mb header info are packed.
211 *
212 * @param[in] ps_proc
213 * Pointer to the current process context
214 *
215 * @returns error status
216 *
217 * @remarks none
218 *
219 *******************************************************************************
220 */
ih264e_init_entropy_ctxt(process_ctxt_t * ps_proc)221 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
222 {
223 /* codec context */
224 codec_t *ps_codec = ps_proc->ps_codec;
225
226 /* entropy ctxt */
227 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
228
229 /* start address */
230 ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
231
232 /* end address */
233 ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
234
235 /* slice index */
236 ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
237
238 /* sof */
239 /* @ start of frame or start of a new slice, set sof flag */
240 if (ps_entropy->i4_mb_start_add == 0)
241 {
242 ps_entropy->i4_sof = 1;
243 }
244
245 if (ps_entropy->i4_mb_x == 0)
246 {
247 /* packed mb coeff data */
248 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
249 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
250
251 /* packed mb header data */
252 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
253 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
254 }
255
256 return IH264E_SUCCESS;
257 }
258
259 /**
260 *******************************************************************************
261 *
262 * @brief entry point for entropy coding
263 *
264 * @par Description
265 * This function calls lower level functions to perform entropy coding for a
266 * group (n rows) of mb's. After encoding 1 row of mb's, the function takes
267 * back the control, updates the ctxt and calls lower level functions again.
268 * This process is repeated till all the rows or group of mb's (which ever is
269 * minimum) are coded
270 *
271 * @param[in] ps_proc
272 * process context
273 *
274 * @returns error status
275 *
276 * @remarks
277 *
278 *******************************************************************************
279 */
ih264e_entropy(process_ctxt_t * ps_proc)280 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
281 {
282 /* codec context */
283 codec_t *ps_codec = ps_proc->ps_codec;
284
285 /* entropy context */
286 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
287
288 /* cabac context */
289 cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
290
291 /* sps */
292 sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
293
294 /* pps */
295 pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
296
297 /* slice header */
298 slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
299
300 /* slice type */
301 WORD32 i4_slice_type = ps_proc->i4_slice_type;
302
303 /* Bitstream structure */
304 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
305
306 /* output buff */
307 out_buf_t s_out_buf;
308
309 /* sei params */
310 sei_params_t s_sei;
311
312 /* proc map */
313 UWORD8 *pu1_proc_map;
314
315 /* entropy map */
316 UWORD8 *pu1_entropy_map_curr;
317
318 /* proc base idx */
319 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
320
321 /* temp var */
322 WORD32 i4_wd_mbs, i4_ht_mbs;
323 UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx, u4_insert_per_idr;
324 WORD32 bitstream_start_offset, bitstream_end_offset;
325 /********************************************************************/
326 /* BEGIN INIT */
327 /********************************************************************/
328
329 /* entropy encode start address */
330 u4_mb_idx = ps_entropy->i4_mb_start_add;
331
332 /* entropy encode end address */
333 u4_mb_end_idx = ps_entropy->i4_mb_end_add;
334
335 /* width in mbs */
336 i4_wd_mbs = ps_entropy->i4_wd_mbs;
337
338 /* height in mbs */
339 i4_ht_mbs = ps_entropy->i4_ht_mbs;
340
341 /* total mb cnt */
342 u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
343
344 /* proc map */
345 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
346
347 /* entropy map */
348 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
349
350 /********************************************************************/
351 /* @ start of frame / slice, */
352 /* initialize the output buffer, */
353 /* initialize the bit stream buffer, */
354 /* check if sps and pps headers have to be generated, */
355 /* populate and generate slice header */
356 /********************************************************************/
357 if (ps_entropy->i4_sof)
358 {
359 /********************************************************************/
360 /* initialize the output buffer */
361 /********************************************************************/
362 s_out_buf = ps_codec->as_out_buf[ctxt_sel];
363
364 /* is last frame to encode */
365 s_out_buf.u4_is_last = ps_entropy->u4_is_last;
366
367 /* frame idx */
368 s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
369 s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
370
371 /********************************************************************/
372 /* initialize the bit stream buffer */
373 /********************************************************************/
374 ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
375
376 /********************************************************************/
377 /* BEGIN HEADER GENERATION */
378 /********************************************************************/
379 if (1 == ps_entropy->i4_gen_header)
380 {
381 /* generate sps */
382 ps_entropy->i4_error_code = ih264e_generate_sps(ps_bitstrm, ps_sps,
383 &ps_codec->s_cfg.s_vui);
384 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
385 /* generate pps */
386 ps_entropy->i4_error_code = ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
387 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
388
389 /* reset i4_gen_header */
390 ps_entropy->i4_gen_header = 0;
391 }
392
393 /* populate slice header */
394 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
395
396 /* Starting bitstream offset for header in bits */
397 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
398
399 /* generate sei */
400 u4_insert_per_idr = (NAL_SLICE_IDR == ps_slice_hdr->i1_nal_unit_type);
401
402 memset(&s_sei, 0, sizeof(sei_params_t));
403 s_sei.u1_sei_mdcv_params_present_flag =
404 ps_codec->s_cfg.s_sei.u1_sei_mdcv_params_present_flag;
405 s_sei.s_sei_mdcv_params = ps_codec->s_cfg.s_sei.s_sei_mdcv_params;
406 s_sei.u1_sei_cll_params_present_flag =
407 ps_codec->s_cfg.s_sei.u1_sei_cll_params_present_flag;
408 s_sei.s_sei_cll_params = ps_codec->s_cfg.s_sei.s_sei_cll_params;
409 s_sei.u1_sei_ave_params_present_flag =
410 ps_codec->s_cfg.s_sei.u1_sei_ave_params_present_flag;
411 s_sei.s_sei_ave_params = ps_codec->s_cfg.s_sei.s_sei_ave_params;
412 s_sei.u1_sei_ccv_params_present_flag = 0;
413 s_sei.s_sei_ccv_params =
414 ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].s_sei_ccv;
415 s_sei.u1_sei_sii_params_present_flag = ps_codec->s_cfg.s_sei.u1_sei_sii_params_present_flag;
416 s_sei.s_sei_sii_params = ps_codec->s_cfg.s_sei.s_sei_sii_params;
417
418 if((1 == ps_sps->i1_vui_parameters_present_flag) &&
419 (1 == ps_codec->s_cfg.s_vui.u1_video_signal_type_present_flag) &&
420 (1 == ps_codec->s_cfg.s_vui.u1_colour_description_present_flag) &&
421 (2 != ps_codec->s_cfg.s_vui.u1_colour_primaries) &&
422 (2 != ps_codec->s_cfg.s_vui.u1_matrix_coefficients) &&
423 (2 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics) &&
424 (4 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics) &&
425 (5 != ps_codec->s_cfg.s_vui.u1_transfer_characteristics))
426 {
427 s_sei.u1_sei_ccv_params_present_flag =
428 ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].u1_sei_ccv_params_present_flag;
429 }
430
431 if((1 == s_sei.u1_sei_mdcv_params_present_flag && u4_insert_per_idr) ||
432 (1 == s_sei.u1_sei_cll_params_present_flag && u4_insert_per_idr) ||
433 (1 == s_sei.u1_sei_ave_params_present_flag && u4_insert_per_idr) ||
434 (1 == s_sei.u1_sei_ccv_params_present_flag) ||
435 (1 == s_sei.u1_sei_sii_params_present_flag))
436 {
437 ps_entropy->i4_error_code =
438 ih264e_generate_sei(ps_bitstrm, &s_sei, u4_insert_per_idr);
439 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
440 }
441 ps_codec->as_inp_list[ps_codec->i4_poc % MAX_NUM_BFRAMES].u1_sei_ccv_params_present_flag = 0;
442
443 /* generate slice header */
444 ps_entropy->i4_error_code = ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
445 ps_pps, ps_sps);
446 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
447 /* once start of frame / slice is done, you can reset it */
448 /* it is the responsibility of the caller to set this flag */
449 ps_entropy->i4_sof = 0;
450
451 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
452 {
453 BITSTREAM_BYTE_ALIGN(ps_bitstrm);
454 BITSTREAM_FLUSH(ps_bitstrm, ps_entropy->i4_error_code);
455 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
456 ih264e_init_cabac_ctxt(ps_entropy);
457 }
458
459 /* Ending bitstream offset for header in bits */
460 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
461 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
462 bitstream_end_offset - bitstream_start_offset;
463 }
464
465 /* begin entropy coding for the mb set */
466 while (u4_mb_idx < u4_mb_end_idx)
467 {
468 /* init ptrs/indices */
469 if (ps_entropy->i4_mb_x == i4_wd_mbs)
470 {
471 ps_entropy->i4_mb_y++;
472 ps_entropy->i4_mb_x = 0;
473
474 /* packed mb coeff data */
475 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
476 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
477
478 /* packed mb header data */
479 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
480 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
481
482 /* proc map */
483 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
484
485 /* entropy map */
486 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
487 }
488
489 DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
490 ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
491 ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
492
493 /* wait until the curr mb is core coded */
494 /* The wait for curr mb to be core coded is essential when entropy is launched
495 * as a separate job
496 */
497 while (1)
498 {
499 volatile UWORD8 *pu1_buf1;
500 WORD32 idx = ps_entropy->i4_mb_x;
501
502 pu1_buf1 = pu1_proc_map + idx;
503 if (*pu1_buf1)
504 break;
505 ithread_yield();
506 }
507
508
509 /* write mb layer */
510 ps_entropy->i4_error_code = ps_codec->pf_write_mb_syntax_layer
511 [ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
512 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
513
514 /* Starting bitstream offset for header in bits */
515 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
516
517 /* set entropy map */
518 pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
519
520 u4_mb_idx++;
521 ps_entropy->i4_mb_x++;
522 /* check for eof */
523 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
524 {
525 if (ps_entropy->i4_mb_x < i4_wd_mbs)
526 {
527 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
528 }
529 }
530
531 if (ps_entropy->i4_mb_x == i4_wd_mbs)
532 {
533 /* if slices are enabled */
534 if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
535 {
536 /* current slice index */
537 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
538
539 /* slice map */
540 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
541
542 /* No need to open a slice at end of frame. The current slice can be closed at the time
543 * of signaling eof flag.
544 */
545 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
546 != pu1_slice_idx[u4_mb_idx]))
547 {
548 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
549 { /* mb skip run */
550 if ((i4_slice_type != ISLICE)
551 && *ps_entropy->pi4_mb_skip_run)
552 {
553 if (*ps_entropy->pi4_mb_skip_run)
554 {
555 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
556 ps_entropy->i4_error_code, "mb skip run");
557 *ps_entropy->pi4_mb_skip_run = 0;
558 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
559 }
560 }
561 /* put rbsp trailing bits for the previous slice */
562 ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm);
563 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
564 }
565 else
566 {
567 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
568 }
569
570 /* update slice header pointer */
571 i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
572 ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
573 ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
574
575 /* populate slice header */
576 ps_entropy->i4_mb_start_add = u4_mb_idx;
577 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
578 ps_sps);
579
580 /* generate slice header */
581 ps_entropy->i4_error_code = ih264e_generate_slice_header(
582 ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
583 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
584 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
585 {
586 BITSTREAM_BYTE_ALIGN(ps_bitstrm);
587 BITSTREAM_FLUSH(ps_bitstrm, ps_entropy->i4_error_code);
588 ih264e_init_cabac_ctxt(ps_entropy);
589 }
590 }
591 else
592 {
593 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
594 && u4_mb_idx != u4_mb_cnt)
595 {
596 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
597 }
598 }
599 }
600 }
601
602 /* Ending bitstream offset for header in bits */
603 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
604 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
605 bitstream_end_offset - bitstream_start_offset;
606 }
607
608 /* check for eof */
609 if (u4_mb_idx == u4_mb_cnt)
610 {
611 /* set end of frame flag */
612 ps_entropy->i4_eof = 1;
613 }
614 else
615 {
616 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
617 && ps_codec->s_cfg.e_slice_mode
618 != IVE_SLICE_MODE_BLOCKS)
619 {
620 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
621 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
622 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
623 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
624 bitstream_end_offset - bitstream_start_offset;
625 }
626 }
627
628 if (ps_entropy->i4_eof)
629 {
630 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
631 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
632 {
633 /* mb skip run */
634 if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
635 {
636 if (*ps_entropy->pi4_mb_skip_run)
637 {
638 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
639 ps_entropy->i4_error_code, "mb skip run");
640 *ps_entropy->pi4_mb_skip_run = 0;
641 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
642 }
643 }
644 /* put rbsp trailing bits */
645 ps_entropy->i4_error_code = ih264e_put_rbsp_trailing_bits(ps_bitstrm);
646 RETURN_ENTROPY_IF_ERROR(ps_codec, ps_entropy, ctxt_sel);
647 }
648 else
649 {
650 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
651 }
652 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
653 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
654 bitstream_end_offset - bitstream_start_offset;
655
656 DEBUG("entropy status %x", ps_entropy->i4_error_code);
657 }
658
659 return ps_entropy->i4_error_code;
660 }
661
662 /**
663 *******************************************************************************
664 *
665 * @brief Packs header information of a mb in to a buffer
666 *
667 * @par Description:
668 * After the deciding the mode info of a macroblock, the syntax elements
669 * associated with the mb are packed and stored. The entropy thread unpacks
670 * this buffer and generates the end bit stream.
671 *
672 * @param[in] ps_proc
673 * Pointer to the current process context
674 *
675 * @returns error status
676 *
677 * @remarks none
678 *
679 *******************************************************************************
680 */
ih264e_pack_header_data(process_ctxt_t * ps_proc)681 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
682 {
683 /* curr mb type */
684 UWORD32 u4_mb_type = ps_proc->u4_mb_type;
685
686 /* pack mb syntax layer of curr mb (used for entropy coding) */
687 if (u4_mb_type == I4x4)
688 {
689 /* pointer to mb header storage space */
690 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
691 mb_hdr_i4x4_t *ps_mb_hdr = (mb_hdr_i4x4_t *)ps_proc->pv_mb_header_data;
692
693 /* temp var */
694 WORD32 i4, byte;
695
696 /* mb type plus mode */
697 ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
698
699 /* cbp */
700 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
701
702 /* mb qp delta */
703 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
704
705 /* sub mb modes */
706 for (i4 = 0; i4 < 16; i4 ++)
707 {
708 byte = 0;
709
710 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
711 ps_proc->au1_intra_luma_mb_4x4_modes[i4])
712 {
713 byte |= 1;
714 }
715 else
716 {
717
718 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
719 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
720 {
721 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
722 }
723 else
724 {
725 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
726 }
727 }
728
729 i4++;
730
731 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
732 ps_proc->au1_intra_luma_mb_4x4_modes[i4])
733 {
734 byte |= 16;
735 }
736 else
737 {
738
739 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
740 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
741 {
742 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
743 }
744 else
745 {
746 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
747 }
748 }
749
750 ps_mb_hdr->au1_sub_blk_modes[i4 >> 1] = byte;
751 }
752
753 /* end of mb layer */
754 pu1_ptr += sizeof(mb_hdr_i4x4_t);
755 ps_proc->pv_mb_header_data = pu1_ptr;
756 }
757 else if (u4_mb_type == I16x16)
758 {
759 /* pointer to mb header storage space */
760 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
761 mb_hdr_i16x16_t *ps_mb_hdr = (mb_hdr_i16x16_t *)ps_proc->pv_mb_header_data;
762
763 /* mb type plus mode */
764 ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
765
766 /* cbp */
767 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
768
769 /* mb qp delta */
770 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
771
772 /* end of mb layer */
773 pu1_ptr += sizeof(mb_hdr_i16x16_t);
774 ps_proc->pv_mb_header_data = pu1_ptr;
775 }
776 else if (u4_mb_type == P16x16)
777 {
778 /* pointer to mb header storage space */
779 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
780 mb_hdr_p16x16_t *ps_mb_hdr = (mb_hdr_p16x16_t *)ps_proc->pv_mb_header_data;
781
782 /* mb type */
783 ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
784
785 /* cbp */
786 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
787
788 /* mb qp delta */
789 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
790
791 ps_mb_hdr->ai2_mv[0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
792
793 ps_mb_hdr->ai2_mv[1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
794
795 /* end of mb layer */
796 pu1_ptr += sizeof(mb_hdr_p16x16_t);
797 ps_proc->pv_mb_header_data = pu1_ptr;
798 }
799 else if (u4_mb_type == PSKIP)
800 {
801 /* pointer to mb header storage space */
802 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
803 mb_hdr_pskip_t *ps_mb_hdr = (mb_hdr_pskip_t *)ps_proc->pv_mb_header_data;
804
805 /* mb type */
806 ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
807
808 /* end of mb layer */
809 pu1_ptr += sizeof(mb_hdr_pskip_t);
810 ps_proc->pv_mb_header_data = pu1_ptr;
811 }
812 else if(u4_mb_type == B16x16)
813 {
814
815 /* pointer to mb header storage space */
816 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
817 mb_hdr_b16x16_t *ps_mb_hdr = (mb_hdr_b16x16_t *)ps_proc->pv_mb_header_data;
818
819 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
820
821 /* mb type plus mode */
822 ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
823
824 /* cbp */
825 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
826
827 /* mb qp delta */
828 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
829
830 /* l0 & l1 me data */
831 if (u4_pred_mode != PRED_L1)
832 {
833 ps_mb_hdr->ai2_mv[0][0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
834 - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
835
836 ps_mb_hdr->ai2_mv[0][1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
837 - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
838 }
839 if (u4_pred_mode != PRED_L0)
840 {
841 ps_mb_hdr->ai2_mv[1][0] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
842 - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
843
844 ps_mb_hdr->ai2_mv[1][1] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
845 - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
846 }
847
848 /* end of mb layer */
849 pu1_ptr += sizeof(mb_hdr_b16x16_t);
850 ps_proc->pv_mb_header_data = pu1_ptr;
851
852 }
853 else if(u4_mb_type == BDIRECT)
854 {
855 /* pointer to mb header storage space */
856 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
857 mb_hdr_bdirect_t *ps_mb_hdr = (mb_hdr_bdirect_t *)ps_proc->pv_mb_header_data;
858
859 /* mb type plus mode */
860 ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
861
862 /* cbp */
863 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
864
865 /* mb qp delta */
866 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
867
868 /* end of mb layer */
869 pu1_ptr += sizeof(mb_hdr_bdirect_t);
870 ps_proc->pv_mb_header_data = pu1_ptr;
871
872 }
873 else if(u4_mb_type == BSKIP)
874 {
875 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
876
877 /* pointer to mb header storage space */
878 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
879 mb_hdr_bskip_t *ps_mb_hdr = (mb_hdr_bskip_t *)ps_proc->pv_mb_header_data;
880
881 /* mb type plus mode */
882 ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
883
884 /* end of mb layer */
885 pu1_ptr += sizeof(mb_hdr_bskip_t);
886 ps_proc->pv_mb_header_data = pu1_ptr;
887 }
888
889 return IH264E_SUCCESS;
890 }
891
892 /**
893 *******************************************************************************
894 *
895 * @brief update process context after encoding an mb. This involves preserving
896 * the current mb information for later use, initialize the proc ctxt elements to
897 * encode next mb.
898 *
899 * @par Description:
900 * This function performs house keeping tasks after encoding an mb.
901 * After encoding an mb, various elements of the process context needs to be
902 * updated to encode the next mb. For instance, the source, recon and reference
903 * pointers, mb indices have to be adjusted to the next mb. The slice index of
904 * the current mb needs to be updated. If mb qp modulation is enabled, then if
905 * the qp changes the quant param structure needs to be updated. Also to encoding
906 * the next mb, the current mb info is used as part of mode prediction or mv
907 * prediction. Hence the current mb info has to preserved at top/top left/left
908 * locations.
909 *
910 * @param[in] ps_proc
911 * Pointer to the current process context
912 *
913 * @returns none
914 *
915 * @remarks none
916 *
917 *******************************************************************************
918 */
ih264e_update_proc_ctxt(process_ctxt_t * ps_proc)919 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
920 {
921 /* error status */
922 WORD32 error_status = IH264_SUCCESS;
923
924 /* codec context */
925 codec_t *ps_codec = ps_proc->ps_codec;
926
927 /* curr mb indices */
928 WORD32 i4_mb_x = ps_proc->i4_mb_x;
929 WORD32 i4_mb_y = ps_proc->i4_mb_y;
930
931 /* mb syntax elements of neighbors */
932 mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele;
933 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
934 mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
935
936 /* curr mb type */
937 UWORD32 u4_mb_type = ps_proc->u4_mb_type;
938
939 /* curr mb type */
940 UWORD32 u4_is_intra = ps_proc->u4_is_intra;
941
942 /* width in mbs */
943 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
944
945 /*height in mbs*/
946 WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
947
948 /* proc map */
949 UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
950
951 /* deblk context */
952 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
953
954 /* deblk bs context */
955 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
956
957 /* top row motion vector info */
958 enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
959
960 /* top left mb motion vector */
961 enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
962
963 /* left mb motion vector */
964 enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
965
966 /* sub mb modes */
967 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
968
969 /*************************************************************/
970 /* During MV prediction, when top right mb is not available, */
971 /* top left mb info. is used for prediction. Hence the curr */
972 /* top, which will be top left for the next mb needs to be */
973 /* preserved before updating it with curr mb info. */
974 /*************************************************************/
975
976 /* mb type, mb class, csbp */
977 *ps_top_left_syn = *ps_top_syn;
978
979 if (ps_proc->i4_slice_type != ISLICE)
980 {
981 /*****************************************/
982 /* update top left with top info results */
983 /*****************************************/
984 /* mv */
985 *ps_top_left_mb_pu = *ps_top_row_pu;
986 }
987
988 /*************************************************/
989 /* update top and left with curr mb info results */
990 /*************************************************/
991
992 /* mb type */
993 ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
994
995 /* mb class */
996 ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
997
998 /* csbp */
999 ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
1000
1001 /* distortion */
1002 ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
1003
1004 if (u4_is_intra)
1005 {
1006 /* mb / sub mb modes */
1007 if (I16x16 == u4_mb_type)
1008 {
1009 pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
1010 }
1011 else if (I4x4 == u4_mb_type)
1012 {
1013 ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
1014 ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
1015 }
1016 else if (I8x8 == u4_mb_type)
1017 {
1018 memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
1019 memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
1020 }
1021
1022 if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
1023 {
1024 /* mv */
1025 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
1026 }
1027
1028 *ps_proc->pu4_mb_pu_cnt = 1;
1029 }
1030 else
1031 {
1032 /* mv */
1033 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
1034 }
1035
1036 /*
1037 * Mark that the MB has been coded intra
1038 * So that future AIRs can skip it
1039 */
1040 ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
1041
1042 /**************************************************/
1043 /* pack mb header info. for entropy coding */
1044 /**************************************************/
1045 ih264e_pack_header_data(ps_proc);
1046
1047 /* update previous mb qp */
1048 ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1049
1050 /* store qp */
1051 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1052
1053 /*
1054 * We need to sync the cache to make sure that the nmv content of proc
1055 * is updated to cache properly
1056 */
1057 DATA_SYNC();
1058
1059 /* Just before finishing the row, enqueue the job in to entropy queue.
1060 * The master thread depending on its convenience shall dequeue it and
1061 * performs entropy.
1062 *
1063 * WARN !! Placing this block post proc map update can cause queuing of
1064 * entropy jobs in out of order.
1065 */
1066 if (i4_mb_x == i4_wd_mbs - 1)
1067 {
1068 /* job structures */
1069 job_t s_job;
1070
1071 /* job class */
1072 s_job.i4_cmd = CMD_ENTROPY;
1073
1074 /* number of mbs to be processed in the current job */
1075 s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1076
1077 /* job start index x */
1078 s_job.i2_mb_x = 0;
1079
1080 /* job start index y */
1081 s_job.i2_mb_y = ps_proc->i4_mb_y;
1082
1083 /* proc base idx */
1084 s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
1085
1086 /* queue the job */
1087 error_status = ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1088 if(error_status != IH264_SUCCESS)
1089 {
1090 return error_status;
1091 }
1092 if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1093 ih264_list_terminate(ps_codec->pv_entropy_jobq);
1094 }
1095
1096 /* update intra cost if valid */
1097 if (ps_proc->i4_mb_intra_cost != INT_MAX)
1098 {
1099 ps_codec->pi4_mb_intra_cost[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->i4_mb_intra_cost;
1100 }
1101
1102 /* update proc map */
1103 pu1_proc_map[i4_mb_x] = 1;
1104
1105 /**************************************************/
1106 /* update proc ctxt elements for encoding next mb */
1107 /**************************************************/
1108 /* update indices */
1109 i4_mb_x ++;
1110 ps_proc->i4_mb_x = i4_mb_x;
1111
1112 if (ps_proc->i4_mb_x == i4_wd_mbs)
1113 {
1114 ps_proc->i4_mb_y++;
1115 ps_proc->i4_mb_x = 0;
1116 }
1117
1118 /* update slice index */
1119 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1120
1121 /* update buffers pointers */
1122 ps_proc->pu1_src_buf_luma += MB_SIZE;
1123 ps_proc->pu1_rec_buf_luma += MB_SIZE;
1124 ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1125 ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1126
1127 /*
1128 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1129 * the stride per MB is MB_SIZE
1130 */
1131 ps_proc->pu1_src_buf_chroma += MB_SIZE;
1132 ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1133 ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1134 ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1135
1136 /* Reset cost, distortion params */
1137 ps_proc->i4_mb_cost = INT_MAX;
1138 ps_proc->i4_mb_intra_cost = INT_MAX;
1139 ps_proc->i4_mb_distortion = SHRT_MAX;
1140
1141 ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1142
1143 ps_proc->pu4_mb_pu_cnt += 1;
1144
1145 /* Update colocated pu */
1146 if (ps_proc->i4_slice_type == BSLICE)
1147 ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1148
1149 /* deblk ctxts */
1150 if (ps_proc->u4_disable_deblock_level != 1)
1151 {
1152 /* indices */
1153 ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1154 ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1155
1156 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1157 ps_deblk->i4_mb_x ++;
1158
1159 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1160 /*
1161 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1162 * the stride per MB is MB_SIZE
1163 */
1164 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1165 #endif
1166 }
1167
1168 return error_status;
1169 }
1170
1171 /**
1172 *******************************************************************************
1173 *
1174 * @brief initialize process context.
1175 *
1176 * @par Description:
1177 * Before dispatching the current job to process thread, the process context
1178 * associated with the job is initialized. Usually every job aims to encode one
1179 * row of mb's. Basing on the row indices provided by the job, the process
1180 * context's buffer ptrs, slice indices and other elements that are necessary
1181 * during core-coding are initialized.
1182 *
1183 * @param[in] ps_proc
1184 * Pointer to the current process context
1185 *
1186 * @returns error status
1187 *
1188 * @remarks none
1189 *
1190 *******************************************************************************
1191 */
ih264e_init_proc_ctxt(process_ctxt_t * ps_proc)1192 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1193 {
1194 /* codec context */
1195 codec_t *ps_codec = ps_proc->ps_codec;
1196
1197 /* nmb processing context*/
1198 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1199
1200 /* indices */
1201 WORD32 i4_mb_x, i4_mb_y;
1202
1203 /* strides */
1204 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1205 WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1206 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1207
1208 /* quant params */
1209 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1210
1211 /* deblk ctxt */
1212 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1213
1214 /* deblk bs context */
1215 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1216
1217 /* Pointer to mv_buffer of current frame */
1218 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1219
1220 /* Pointers for color space conversion */
1221 UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1222
1223 /* Pad the MB to support non standard sizes */
1224 UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1225 UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1226 UWORD16 u2_num_rows = MB_SIZE;
1227 WORD32 convert_uv_only;
1228
1229 /********************************************************************/
1230 /* BEGIN INIT */
1231 /********************************************************************/
1232
1233 i4_mb_x = ps_proc->i4_mb_x;
1234 i4_mb_y = ps_proc->i4_mb_y;
1235
1236 /* Number of mbs processed in one loop of process function */
1237 ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
1238 ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
1239
1240 /* init buffer pointers */
1241 convert_uv_only = 1;
1242 if (u4_pad_bottom_sz || u4_pad_right_sz ||
1243 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1244 ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR ||
1245 ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1246 {
1247 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1248 u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1249 ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1250 i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1251 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1252 convert_uv_only = 0;
1253 }
1254 else
1255 {
1256 i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1257 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1258 }
1259
1260
1261 if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1262 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1263 ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
1264 ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR ||
1265 u4_pad_bottom_sz || u4_pad_right_sz)
1266 {
1267 if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1268 (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1269 ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1270
1271 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1272 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1273 }
1274 else
1275 {
1276 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1277 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1278 }
1279
1280 ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1281 ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1282
1283 /* Temporal back and forward reference buffer */
1284 ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1285 ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1286 ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1287 ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1288
1289 /*
1290 * Do color space conversion
1291 * NOTE : We assume there that the number of MB's to process will not span multiple rows
1292 */
1293 switch (ps_codec->s_cfg.e_inp_color_fmt)
1294 {
1295 case IV_YUV_420SP_UV:
1296 case IV_YUV_420SP_VU:
1297 /* In case of 420 semi-planar input, copy last few rows to intermediate
1298 buffer as few SIMD functions access upto 16 more bytes.
1299 This data will be padded if required */
1300 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
1301 {
1302 WORD32 num_rows = MB_SIZE;
1303 UWORD8 *pu1_src;
1304 UWORD8 *pu1_dst;
1305 WORD32 i;
1306 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1307 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1308
1309 pu1_dst = ps_proc->pu1_src_buf_luma;
1310
1311 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1312 num_rows = MB_SIZE - u4_pad_bottom_sz;
1313 for (i = 0; i < num_rows; i++)
1314 {
1315 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1316 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1317 pu1_dst += ps_proc->i4_src_strd;
1318 }
1319 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1320 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1321 pu1_dst = ps_proc->pu1_src_buf_chroma;
1322
1323 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1324 * due to interleaved input
1325 */
1326 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1327 num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1328 else
1329 num_rows = BLK8x8SIZE;
1330 for (i = 0; i < num_rows; i++)
1331 {
1332 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1333 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1334 pu1_dst += ps_proc->i4_src_chroma_strd;
1335 }
1336
1337 }
1338 break;
1339
1340 case IV_YUV_420P :
1341 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1342 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1343
1344 pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1345 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1346
1347 pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1348 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1349
1350 ps_codec->pf_ih264e_conv_420p_to_420sp(
1351 pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1352 ps_proc->pu1_src_buf_luma,
1353 ps_proc->pu1_src_buf_chroma, u2_num_rows,
1354 ps_codec->s_cfg.u4_disp_wd,
1355 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1356 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1357 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1358 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1359 convert_uv_only);
1360 break;
1361
1362 case IV_YUV_422ILE :
1363 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1364 + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1365
1366 ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1367 ps_proc->pu1_src_buf_luma,
1368 ps_proc->pu1_src_buf_chroma,
1369 ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1370 ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1371 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1372 ps_proc->i4_src_chroma_strd,
1373 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1374 break;
1375
1376 default:
1377 break;
1378 }
1379
1380 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
1381 {
1382 UWORD32 u4_pad_wd, u4_pad_ht;
1383 u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1384 u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1385 u4_pad_ht = MB_SIZE;
1386 if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1387 u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1388
1389 ih264_pad_right_luma(
1390 ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1391 ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1392
1393 ih264_pad_right_chroma(
1394 ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1395 ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1396 }
1397
1398 if (ps_proc->i4_mb_y && ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) {
1399 UWORD8 *pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] +
1400 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE) -
1401 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1402 UWORD8 *pu1_dst = ps_proc->pu1_src_buf_luma - ps_proc->i4_src_strd;
1403 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_disp_wd);
1404 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0)) {
1405 pu1_dst += ps_codec->s_cfg.u4_disp_wd;
1406 memset(pu1_dst, pu1_dst[-1], u4_pad_right_sz);
1407 }
1408 }
1409
1410 /* pad bottom edge */
1411 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1412 {
1413 ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1414 ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1415
1416 ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1417 ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1418 }
1419
1420
1421 /* packed mb coeff data */
1422 ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1423
1424 /* packed mb header data */
1425 ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1426
1427 /* slice index */
1428 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1429
1430 /*********************************************************************/
1431 /* ih264e_init_quant_params() routine is called at the pic init level*/
1432 /* this would have initialized the qp. */
1433 /* TODO_LATER: currently it is assumed that quant params donot change*/
1434 /* across mb's. When they do calculate update ps_qp_params accordingly*/
1435 /*********************************************************************/
1436
1437 /* init mv buffer ptr */
1438 ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1439 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1440
1441 /* Init co-located mv buffer */
1442 ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1443 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1444
1445 if (i4_mb_y == 0)
1446 {
1447 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1448 }
1449 else
1450 {
1451 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
1452 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1453 }
1454
1455 ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1456
1457 /* mb type */
1458 ps_proc->u4_mb_type = I16x16;
1459
1460 /* lambda */
1461 if (ps_codec->pic_type == PIC_B)
1462 {
1463 ps_proc->u4_lambda = gu1_qp_lambdaB[ps_qp_params->u1_mb_qp];
1464 }
1465 else
1466 {
1467 ps_proc->u4_lambda = gu1_qp_lambdaIP[ps_qp_params->u1_mb_qp];
1468 }
1469
1470 /* mb distortion */
1471 ps_proc->i4_mb_distortion = SHRT_MAX;
1472
1473 if (i4_mb_x == 0)
1474 {
1475 ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1476
1477 ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1478
1479 ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1480
1481 if (i4_mb_y == 0)
1482 {
1483 memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1484 }
1485 }
1486
1487 /* mb cost */
1488 ps_proc->i4_mb_cost = INT_MAX;
1489 ps_proc->i4_mb_intra_cost = INT_MAX;
1490
1491 /**********************/
1492 /* init deblk context */
1493 /**********************/
1494 ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1495 /* deblk lags the current mb proc by 1 row */
1496 /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1497 /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1498 /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1499 ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1500
1501 /* buffer ptrs */
1502 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1503 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1504
1505 /* init deblk bs context */
1506 /* mb indices */
1507 ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1508 ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1509
1510 /* init n_mb_process context */
1511 ps_n_mb_ctxt->i4_mb_x = 0;
1512 ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1513 ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1514
1515 return IH264E_SUCCESS;
1516 }
1517
1518 /**
1519 *******************************************************************************
1520 *
1521 * @brief This function performs luma & chroma padding
1522 *
1523 * @par Description:
1524 *
1525 * @param[in] ps_proc
1526 * Process context corresponding to the job
1527 *
1528 * @param[in] pu1_curr_pic_luma
1529 * Pointer to luma buffer
1530 *
1531 * @param[in] pu1_curr_pic_chroma
1532 * Pointer to chroma buffer
1533 *
1534 * @param[in] i4_mb_x
1535 * mb index x
1536 *
1537 * @param[in] i4_mb_y
1538 * mb index y
1539 *
1540 * @param[in] i4_pad_ht
1541 * number of rows to be padded
1542 *
1543 * @returns error status
1544 *
1545 * @remarks none
1546 *
1547 *******************************************************************************
1548 */
ih264e_pad_recon_buffer(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y,WORD32 i4_pad_ht)1549 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1550 UWORD8 *pu1_curr_pic_luma,
1551 UWORD8 *pu1_curr_pic_chroma,
1552 WORD32 i4_mb_x,
1553 WORD32 i4_mb_y,
1554 WORD32 i4_pad_ht)
1555 {
1556 /* codec context */
1557 codec_t *ps_codec = ps_proc->ps_codec;
1558
1559 /* strides */
1560 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1561
1562 if (i4_mb_x == 0)
1563 {
1564 /* padding left luma */
1565 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1566
1567 /* padding left chroma */
1568 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1569 }
1570 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1571 {
1572 /* padding right luma */
1573 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1574
1575 /* padding right chroma */
1576 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1577
1578 if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1579 {
1580 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1581 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1582
1583 /* padding bottom luma */
1584 ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1585
1586 /* padding bottom chroma */
1587 ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1588 }
1589 }
1590
1591 if (i4_mb_y == 0)
1592 {
1593 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1594 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1595 WORD32 wd = MB_SIZE;
1596
1597 if (i4_mb_x == 0)
1598 {
1599 pu1_rec_luma -= PAD_LEFT;
1600 pu1_rec_chroma -= PAD_LEFT;
1601
1602 wd += PAD_LEFT;
1603 }
1604 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1605 {
1606 wd += PAD_RIGHT;
1607 }
1608
1609 /* padding top luma */
1610 ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1611
1612 /* padding top chroma */
1613 ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1614 }
1615
1616 return IH264E_SUCCESS;
1617 }
1618
1619 /**
1620 *******************************************************************************
1621 *
1622 * @brief This function performs deblocking, padding and halfpel generation for
1623 * 'n' MBs
1624 *
1625 * @par Description:
1626 *
1627 * @param[in] ps_proc
1628 * Process context corresponding to the job
1629 *
1630 * @param[in] pu1_curr_pic_luma
1631 * Current MB being processed(Luma)
1632 *
1633 * @param[in] pu1_curr_pic_chroma
1634 * Current MB being processed(Chroma)
1635 *
1636 * @param[in] i4_mb_x
1637 * Column value of current MB processed
1638 *
1639 * @param[in] i4_mb_y
1640 * Curent row processed
1641 *
1642 * @returns error status
1643 *
1644 * @remarks none
1645 *
1646 *******************************************************************************
1647 */
ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y)1648 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1649 UWORD8 *pu1_curr_pic_luma,
1650 UWORD8 *pu1_curr_pic_chroma,
1651 WORD32 i4_mb_x,
1652 WORD32 i4_mb_y)
1653 {
1654 /* codec context */
1655 codec_t *ps_codec = ps_proc->ps_codec;
1656
1657 /* n_mb processing context */
1658 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1659
1660 /* deblk context */
1661 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1662
1663 /* strides */
1664 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1665
1666 /* loop variables */
1667 WORD32 row, i, j, col;
1668
1669 /* Padding Width */
1670 UWORD32 u4_pad_wd;
1671
1672 /* deblk_map of the row being deblocked */
1673 UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1674
1675 /* deblk_map_previous row */
1676 UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1677
1678 WORD32 u4_pad_top = 0;
1679
1680 WORD32 u4_deblk_prev_row = 0;
1681
1682 /* Number of mbs to be processed */
1683 WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1684
1685 /* Number of mbs actually processed
1686 * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1687 WORD32 i4_n_mb_process_count = 0;
1688
1689 UWORD8 *pu1_pad_bottom_src = NULL;
1690
1691 UWORD8 *pu1_pad_src_luma = NULL;
1692 UWORD8 *pu1_pad_src_chroma = NULL;
1693
1694 if (ps_proc->u4_disable_deblock_level == 1)
1695 {
1696 /* If left most MB is processed, then pad left */
1697 if (i4_mb_x == 0)
1698 {
1699 /* padding left luma */
1700 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1701
1702 /* padding left chroma */
1703 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1704 }
1705 /*last col*/
1706 if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1707 {
1708 /* padding right luma */
1709 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1710
1711 /* padding right chroma */
1712 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1713 }
1714 }
1715
1716 if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1717 {
1718 /* if number of mb's to be processed are less than 'N', go back.
1719 * exception to the above clause is end of row */
1720 if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1721 {
1722 return IH264E_SUCCESS;
1723 }
1724 else
1725 {
1726 i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1727
1728 /* performing deblocking for required number of MBs */
1729 if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1730 {
1731 u4_deblk_prev_row = 1;
1732
1733 /* checking whether the top rows are deblocked */
1734 for (col = 0; col < i4_n_mb_process_count; col++)
1735 {
1736 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1737 }
1738
1739 /* checking whether the top right MB is deblocked */
1740 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1741 {
1742 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1743 }
1744
1745 /* Top or Top right MBs not deblocked */
1746 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1747 {
1748 return IH264E_SUCCESS;
1749 }
1750
1751 for (row = 0; row < i4_n_mb_process_count; row++)
1752 {
1753 ih264e_deblock_mb(ps_proc, ps_deblk);
1754
1755 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1756
1757 if (ps_deblk->i4_mb_y > 0)
1758 {
1759 if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1760 {
1761 /* padding left luma */
1762 ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1763
1764 /* padding left chroma */
1765 ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1766 }
1767
1768 if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1769 {
1770 /* padding right luma */
1771 ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1772
1773 /* padding right chroma */
1774 ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1775 }
1776 }
1777 ps_deblk->i4_mb_x++;
1778
1779 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1780 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1781
1782 }
1783 }
1784 else if(i4_mb_y > 0)
1785 {
1786 ps_deblk->i4_mb_x += i4_n_mb_process_count;
1787
1788 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1789 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1790 }
1791
1792 if (i4_mb_y == 2)
1793 {
1794 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1795 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1796
1797 if (ps_n_mb_ctxt->i4_mb_x == 0)
1798 {
1799 u4_pad_wd += PAD_LEFT;
1800 u4_pad_top = -PAD_LEFT;
1801 }
1802
1803 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1804 {
1805 u4_pad_wd += PAD_RIGHT;
1806 }
1807
1808 /* padding top luma */
1809 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1810
1811 /* padding top chroma */
1812 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1813 }
1814
1815 ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1816
1817 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1818 {
1819 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1820 {
1821 /* Bottom Padding is done in one stretch for the entire width */
1822 if (ps_proc->u4_disable_deblock_level != 1)
1823 {
1824 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1825
1826 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1827
1828 ps_n_mb_ctxt->i4_mb_x = 0;
1829 ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1830 ps_deblk->i4_mb_x = 0;
1831 ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1832
1833 /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1834 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1835
1836 i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1837
1838 j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1839
1840 for (i = 0; i < j; i++)
1841 {
1842 for (col = 0; col < i4_n_mbs; col++)
1843 {
1844 ih264e_deblock_mb(ps_proc, ps_deblk);
1845
1846 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1847
1848 ps_deblk->i4_mb_x++;
1849 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1850 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1851 ps_n_mb_ctxt->i4_mb_x++;
1852 }
1853 }
1854
1855 for (col = 0; col < i4_n_mb_process_count; col++)
1856 {
1857 ih264e_deblock_mb(ps_proc, ps_deblk);
1858
1859 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1860
1861 ps_deblk->i4_mb_x++;
1862 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1863 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1864 ps_n_mb_ctxt->i4_mb_x++;
1865 }
1866
1867 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1868
1869 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1870
1871 /* padding left luma */
1872 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1873
1874 /* padding left chroma */
1875 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1876
1877 pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1878 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1879
1880 /* padding left luma */
1881 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1882
1883 /* padding left chroma */
1884 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1885
1886 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1887
1888 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1889
1890 /* padding right luma */
1891 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1892
1893 /* padding right chroma */
1894 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1895
1896 pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1897 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1898
1899 /* padding right luma */
1900 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1901
1902 /* padding right chroma */
1903 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1904
1905 }
1906
1907 /* In case height is less than 2 MBs pad top */
1908 if (ps_proc->i4_ht_mbs <= 2)
1909 {
1910 UWORD8 *pu1_pad_top_src;
1911 /* padding top luma */
1912 pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1913 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1914
1915 /* padding top chroma */
1916 pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1917 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1918 }
1919
1920 /* padding bottom luma */
1921 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1922 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1923
1924 /* padding bottom chroma */
1925 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1926 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1927 }
1928 }
1929 }
1930 }
1931
1932 return IH264E_SUCCESS;
1933 }
1934
1935
1936 /**
1937 *******************************************************************************
1938 *
1939 * @brief This function performs luma & chroma encoding for a set of mb's.
1940 *
1941 * @par Description:
1942 * The mb to be coded is taken and is evaluated over a predefined set of modes
1943 * (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1944 * is selected and using intra/inter prediction filters, prediction is carried out.
1945 * The deviation between src and pred signal constitutes error signal. This error
1946 * signal is transformed (hierarchical transform if necessary) and quantized. The
1947 * quantized residue is packed in to entropy buffer for entropy coding. This is
1948 * repeated for all the mb's enlisted under the job.
1949 *
1950 * @param[in] ps_proc
1951 * Process context corresponding to the job
1952 *
1953 * @returns error status
1954 *
1955 * @remarks none
1956 *
1957 *******************************************************************************
1958 */
ih264e_process(process_ctxt_t * ps_proc)1959 WORD32 ih264e_process(process_ctxt_t *ps_proc)
1960 {
1961 /* error status */
1962 WORD32 error_status = IH264_SUCCESS;
1963
1964 /* codec context */
1965 codec_t *ps_codec = ps_proc->ps_codec;
1966
1967 /* cbp luma, chroma */
1968 UWORD32 u4_cbp_l, u4_cbp_c;
1969
1970 /* width in mbs */
1971 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1972
1973 /* loop var */
1974 WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1975
1976 /* valid modes */
1977 UWORD32 u4_valid_modes = 0;
1978
1979 /* gate threshold */
1980 WORD32 i4_gate_threshold = 0;
1981
1982 /* is intra */
1983 WORD32 luma_idx, chroma_idx, is_intra;
1984
1985 /* temp variables */
1986 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
1987
1988 /*
1989 * list of modes for evaluation
1990 * -------------------------------------------------------------------------
1991 * Note on enabling I4x4 and I16x16
1992 * At very low QP's the hadamard transform in I16x16 will push up the maximum
1993 * coeff value very high. CAVLC may not be able to represent the value and
1994 * hence the stream may not be decodable in some clips.
1995 * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
1996 */
1997 if (ps_proc->i4_slice_type == ISLICE)
1998 {
1999 if (ps_proc->u4_frame_qp > 10)
2000 {
2001 /* enable intra 16x16 */
2002 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2003
2004 /* enable intra 8x8 */
2005 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
2006 }
2007
2008 /* enable intra 4x4 */
2009 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2010 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2011
2012 }
2013 else if (ps_proc->i4_slice_type == PSLICE)
2014 {
2015 if (ps_proc->u4_frame_qp > 10)
2016 {
2017 /* enable intra 16x16 */
2018 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2019 }
2020
2021 /* enable intra 4x4 */
2022 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2023 {
2024 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2025 }
2026 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2027
2028 /* enable inter P16x16 */
2029 u4_valid_modes |= (1 << P16x16);
2030 }
2031 else if (ps_proc->i4_slice_type == BSLICE)
2032 {
2033 if (ps_proc->u4_frame_qp > 10)
2034 {
2035 /* enable intra 16x16 */
2036 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
2037 }
2038
2039 /* enable intra 4x4 */
2040 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2041 {
2042 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
2043 }
2044 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
2045
2046 /* enable inter B16x16 */
2047 u4_valid_modes |= (1 << B16x16);
2048 }
2049
2050 /* init entropy */
2051 ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
2052 ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
2053 ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
2054
2055 /* compute recon when :
2056 * 1. current frame is to be used as a reference
2057 * 2. dump recon for bit stream sanity check
2058 */
2059 ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
2060 ps_codec->s_cfg.u4_enable_recon ||
2061 ps_codec->s_cfg.u4_enable_quality_metrics & QUALITY_MASK_PSNR;
2062
2063 /* Encode 'n' macroblocks,
2064 * 'n' being the number of mbs dictated by current proc ctxt */
2065 for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
2066 {
2067 /* since we have not yet found sad, we have not yet got min sad */
2068 /* we need to initialize these variables for each MB */
2069 /* TODO how to get the min sad into the codec */
2070 ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
2071 ps_proc->u4_min_sad_reached = 0;
2072
2073 /* mb analysis */
2074 {
2075 /* temp var */
2076 WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
2077
2078 /* force intra refresh ? */
2079 WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
2080 (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
2081
2082 /* evaluate inter 16x16 modes */
2083 if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
2084 {
2085 /* compute nmb me */
2086 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
2087 {
2088 ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
2089 i4_wd_mbs - ps_proc->i4_mb_x));
2090 }
2091
2092 /* set pointers to ME data appropriately for other modules to use */
2093 {
2094 UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
2095
2096 /* get the min sad condition for current mb */
2097 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2098 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2099 ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2100 ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2101 ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2102
2103 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2104 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2105 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2106
2107 /* get the best sub pel buffer */
2108 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2109 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2110 }
2111 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2112 }
2113 else
2114 {
2115 /* Derive neighbor availability for the current macroblock */
2116 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2117
2118 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2119 }
2120
2121 /*
2122 * If air says intra, we need to force the following code path to evaluate intra
2123 * The easy way is just to say that the inter cost is too much
2124 */
2125 if (!i4_air_enable_inter)
2126 {
2127 ps_proc->u4_min_sad_reached = 0;
2128 ps_proc->i4_mb_cost = INT_MAX;
2129 ps_proc->i4_mb_distortion = INT_MAX;
2130 }
2131 else if (ps_proc->u4_mb_type == PSKIP)
2132 {
2133 goto UPDATE_MB_INFO;
2134 }
2135
2136 /* wait until the proc of [top + 1] mb is computed.
2137 * We wait till the proc dependencies are satisfied */
2138 if(ps_proc->i4_mb_y > 0)
2139 {
2140 /* proc map */
2141 UWORD8 *pu1_proc_map_top;
2142
2143 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2144
2145 while (1)
2146 {
2147 volatile UWORD8 *pu1_buf;
2148 WORD32 idx = i4_mb_idx + 1;
2149
2150 idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2151 pu1_buf = pu1_proc_map_top + idx;
2152 if(*pu1_buf)
2153 break;
2154 ithread_yield();
2155 }
2156 }
2157
2158 /* If we already have the minimum sad, there is no point in searching for sad again */
2159 if (ps_proc->u4_min_sad_reached == 0 || ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST)
2160 {
2161 /* intra gating in inter slices */
2162 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2163 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2164 {
2165 /* distortion of neighboring blocks */
2166 WORD32 i4_distortion[4];
2167
2168 i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2169
2170 i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2171
2172 i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2173
2174 i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2175
2176 i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2177
2178 }
2179
2180
2181 /* If we are going to force intra we need to evaluate intra irrespective of gating */
2182 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2183 {
2184 /* evaluate intra 4x4 modes */
2185 if (u4_valid_modes & (1 << I4x4))
2186 {
2187 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2188 {
2189 ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2190 }
2191 else
2192 {
2193 ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2194 }
2195 }
2196
2197 /* evaluate intra 16x16 modes */
2198 if (u4_valid_modes & (1 << I16x16))
2199 {
2200 ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2201 }
2202
2203 /* evaluate intra 8x8 modes */
2204 if (u4_valid_modes & (1 << I8x8))
2205 {
2206 ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2207 }
2208
2209 }
2210 }
2211 }
2212
2213 /* is intra */
2214 if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2215 {
2216 luma_idx = ps_proc->u4_mb_type;
2217 chroma_idx = 0;
2218 is_intra = 1;
2219
2220 /* evaluate chroma blocks for intra */
2221 ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2222 }
2223 else
2224 {
2225 luma_idx = 3;
2226 chroma_idx = 1;
2227 is_intra = 0;
2228 }
2229 ps_proc->u4_is_intra = is_intra;
2230 ps_proc->ps_pu->b1_intra_flag = is_intra;
2231
2232 /* redo MV pred of neighbors in the case intra mb */
2233 /* TODO : currently called unconditionally, needs to be called only in the case of intra
2234 * to modify neighbors */
2235 if (ps_proc->i4_slice_type != ISLICE)
2236 {
2237 ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2238 }
2239
2240 /* Perform luma mb core coding */
2241 u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2242
2243 /* Perform luma mb core coding */
2244 u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2245
2246 /* coded block pattern */
2247 ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2248
2249 if (!ps_proc->u4_is_intra)
2250 {
2251 if (ps_proc->i4_slice_type == BSLICE)
2252 {
2253 if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2254 {
2255 ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2256 }
2257 }
2258 else if(!ps_proc->u4_cbp)
2259 {
2260 if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2261 {
2262 ps_proc->u4_mb_type = PSKIP;
2263 }
2264 }
2265 }
2266
2267 UPDATE_MB_INFO:
2268
2269 /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2270 ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2271
2272 /**********************************************************************/
2273 /* if disable deblock level is '0' this implies enable deblocking for */
2274 /* all edges of all macroblocks with out any restrictions */
2275 /* */
2276 /* if disable deblock level is '1' this implies disable deblocking for*/
2277 /* all edges of all macroblocks with out any restrictions */
2278 /* */
2279 /* if disable deblock level is '2' this implies enable deblocking for */
2280 /* all edges of all macroblocks except edges overlapping with slice */
2281 /* boundaries. This option is not currently supported by the encoder */
2282 /* hence the slice map should be of no significance to perform debloc */
2283 /* king */
2284 /**********************************************************************/
2285
2286 if (ps_proc->u4_compute_recon)
2287 {
2288 /* deblk context */
2289 /* src pointers */
2290 UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2291 UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2292
2293 /* src indices */
2294 UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2295 UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2296
2297 /* compute blocking strength */
2298 if (ps_proc->u4_disable_deblock_level != 1)
2299 {
2300 ih264e_compute_bs(ps_proc);
2301 }
2302
2303 /* nmb deblocking and hpel and padding */
2304 ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2305 pu1_cur_pic_chroma, i4_mb_x,
2306 i4_mb_y);
2307 }
2308
2309 /* update the context after for coding next mb */
2310 error_status = ih264e_update_proc_ctxt(ps_proc);
2311 if(error_status != IH264E_SUCCESS)
2312 {
2313 return error_status;
2314 }
2315 /* Once the last row is processed, mark the buffer status appropriately */
2316 if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2317 {
2318 /* Pointer to current picture buffer structure */
2319 pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2320
2321 /* Pointer to current picture's mv buffer structure */
2322 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2323
2324 /**********************************************************************/
2325 /* if disable deblock level is '0' this implies enable deblocking for */
2326 /* all edges of all macroblocks with out any restrictions */
2327 /* */
2328 /* if disable deblock level is '1' this implies disable deblocking for*/
2329 /* all edges of all macroblocks with out any restrictions */
2330 /* */
2331 /* if disable deblock level is '2' this implies enable deblocking for */
2332 /* all edges of all macroblocks except edges overlapping with slice */
2333 /* boundaries. This option is not currently supported by the encoder */
2334 /* hence the slice map should be of no significance to perform debloc */
2335 /* king */
2336 /**********************************************************************/
2337 error_status = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr,
2338 ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2339 if(error_status != IH264E_SUCCESS)
2340 {
2341 return error_status;
2342 }
2343 error_status = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr,
2344 ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2345 if(error_status != IH264E_SUCCESS)
2346 {
2347 return error_status;
2348 }
2349 if (ps_codec->s_cfg.u4_enable_recon)
2350 {
2351 /* pic cnt */
2352 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2353
2354 /* rec buffers */
2355 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic;
2356
2357 /* is last? */
2358 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2359
2360 /* frame time stamp */
2361 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2362 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2363 }
2364
2365 }
2366 }
2367
2368 DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2369
2370 return error_status;
2371 }
2372
2373 /**
2374 *******************************************************************************
2375 *
2376 * @brief
2377 * Function to update rc context after encoding
2378 *
2379 * @par Description
2380 * This function updates the rate control context after the frame is encoded.
2381 * Number of bits consumed by the current frame, frame distortion, frame cost,
2382 * number of intra/inter mb's, ... are passed on to rate control context for
2383 * updating the rc model.
2384 *
2385 * @param[in] ps_codec
2386 * Handle to codec context
2387 *
2388 * @param[in] ctxt_sel
2389 * frame context selector
2390 *
2391 * @param[in] pic_cnt
2392 * pic count
2393 *
2394 * @returns i4_stuffing_byte
2395 * number of stuffing bytes (if necessary)
2396 *
2397 * @remarks
2398 *
2399 *******************************************************************************
2400 */
ih264e_update_rc_post_enc(codec_t * ps_codec,WORD32 ctxt_sel,WORD32 i4_is_first_frm)2401 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2402 {
2403 /* proc set base idx */
2404 WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2405
2406 /* proc ctxt */
2407 process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2408
2409 /* entropy context */
2410 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
2411
2412 /* Bitstream structure */
2413 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
2414
2415 /* frame qp */
2416 UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2417
2418 /* cbr rc return status */
2419 WORD32 i4_stuffing_byte = 0;
2420
2421 /* current frame stats */
2422 frame_info_t s_frame_info;
2423 picture_type_e rc_pic_type;
2424
2425 /* temp var */
2426 WORD32 i, j;
2427
2428 /********************************************************************/
2429 /* BEGIN INIT */
2430 /********************************************************************/
2431
2432 /* init frame info */
2433 irc_init_frame_info(&s_frame_info);
2434
2435 /* get frame info */
2436 for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2437 {
2438 /*****************************************************************/
2439 /* One frame can be encoded by max of u4_num_cores threads */
2440 /* Accumulating the num mbs, sad, qp and intra_mb_cost from */
2441 /* u4_num_cores threads */
2442 /*****************************************************************/
2443 for (j = 0; j< MAX_MB_TYPE; j++)
2444 {
2445 s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2446
2447 s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2448
2449 s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2450 }
2451
2452 s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2453
2454 s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2455
2456 /*****************************************************************/
2457 /* gather number of residue and header bits consumed by the frame*/
2458 /*****************************************************************/
2459 ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2460 }
2461
2462 /* get pic type */
2463 switch (ps_codec->pic_type)
2464 {
2465 case PIC_I:
2466 case PIC_IDR:
2467 rc_pic_type = I_PIC;
2468 break;
2469 case PIC_P:
2470 rc_pic_type = P_PIC;
2471 break;
2472 case PIC_B:
2473 rc_pic_type = B_PIC;
2474 break;
2475 default:
2476 assert(0);
2477 break;
2478 }
2479
2480 /* update rc lib with current frame stats */
2481 i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2482 &(s_frame_info),
2483 ps_codec->s_rate_control.pps_pd_frm_rate,
2484 ps_codec->s_rate_control.pps_time_stamp,
2485 ps_codec->s_rate_control.pps_frame_time,
2486 (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2487 &rc_pic_type,
2488 i4_is_first_frm,
2489 &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2490 u1_frame_qp,
2491 &ps_codec->s_rate_control.num_intra_in_prev_frame,
2492 &ps_codec->s_rate_control.i4_avg_activity);
2493
2494 /* cbr rc - house keeping */
2495 if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
2496 {
2497 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
2498 // If an IDR frame was skipped, restore frame num and IDR pic id
2499 if (ps_codec->u4_is_idr == 1)
2500 {
2501 ps_codec->i4_frame_num = ps_codec->i4_restore_frame_num;
2502 ps_codec->i4_idr_pic_id--;
2503 }
2504 }
2505 else if (i4_stuffing_byte)
2506 {
2507 /* add filler nal units */
2508 ps_entropy->i4_error_code = ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuffing_byte);
2509 }
2510
2511 /*
2512 * Frame number is to be incremented only if the current frame is a
2513 * reference frame. After each successful frame encode, we increment
2514 * frame number by 1
2515 */
2516 if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
2517 && ps_codec->u4_is_curr_frm_ref)
2518 {
2519 ps_codec->i4_frame_num++;
2520 }
2521 /********************************************************************/
2522 /* signal the output */
2523 /********************************************************************/
2524 ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
2525 ps_entropy->ps_bitstrm->u4_strm_buf_offset;
2526
2527 return ps_entropy->i4_error_code;
2528 }
2529
2530 /**
2531 *******************************************************************************
2532 *
2533 * @brief
2534 * entry point of a spawned encoder thread
2535 *
2536 * @par Description:
2537 * The encoder thread dequeues a proc/entropy job from the encoder queue and
2538 * calls necessary routines.
2539 *
2540 * @param[in] pv_proc
2541 * Process context corresponding to the thread
2542 *
2543 * @returns error status
2544 *
2545 * @remarks
2546 *
2547 *******************************************************************************
2548 */
ih264e_process_thread(void * pv_proc)2549 WORD32 ih264e_process_thread(void *pv_proc)
2550 {
2551 /* error status */
2552 IH264_ERROR_T ret = IH264_SUCCESS;
2553 WORD32 error_status = IH264_SUCCESS;
2554
2555 /* proc ctxt */
2556 process_ctxt_t *ps_proc = pv_proc;
2557
2558 /* codec ctxt */
2559 codec_t *ps_codec = ps_proc->ps_codec;
2560
2561 /* structure to represent a processing job entry */
2562 job_t s_job;
2563
2564 /* blocking call : entropy dequeue is non-blocking till all
2565 * the proc jobs are processed */
2566 WORD32 is_blocking = 0;
2567
2568 /* codec context selector */
2569 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2570
2571 /* set affinity */
2572 ithread_set_affinity(ps_proc->i4_id);
2573
2574 ps_proc->i4_error_code = IH264_SUCCESS;
2575 while(1)
2576 {
2577 /* dequeue a job from the entropy queue */
2578 {
2579 int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2580
2581 volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2582
2583 /* have the lock */
2584 if (error == 0)
2585 {
2586 if (*pu4_buf == 0)
2587 {
2588 /* no entropy threads are active, try dequeuing a job from the entropy queue */
2589 ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2590 if (IH264_SUCCESS == ret)
2591 {
2592 *pu4_buf = 1;
2593 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2594 goto WORKER;
2595 }
2596 else if(is_blocking)
2597 {
2598 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2599 break;
2600 }
2601 }
2602 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2603 }
2604 }
2605
2606 /* dequeue a job from the process queue */
2607 ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2608 if (IH264_SUCCESS != ret)
2609 {
2610 if(ps_proc->i4_id)
2611 break;
2612 else
2613 {
2614 is_blocking = 1;
2615 continue;
2616 }
2617 }
2618
2619 WORKER:
2620 /* choose appropriate proc context based on proc_base_idx */
2621 ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2622
2623 switch (s_job.i4_cmd)
2624 {
2625 case CMD_PROCESS:
2626 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2627 ps_proc->i4_mb_x = s_job.i2_mb_x;
2628 ps_proc->i4_mb_y = s_job.i2_mb_y;
2629
2630 /* init process context */
2631 ih264e_init_proc_ctxt(ps_proc);
2632
2633 /* core code all mbs enlisted under the current job */
2634 error_status = ih264e_process(ps_proc);
2635 if(error_status !=IH264_SUCCESS)
2636 {
2637 ps_proc->i4_error_code = error_status;
2638 return ret;
2639 }
2640 break;
2641
2642 case CMD_ENTROPY:
2643 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2644 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2645 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2646
2647 /* init entropy */
2648 ih264e_init_entropy_ctxt(ps_proc);
2649
2650 /* entropy code all mbs enlisted under the current job */
2651 error_status = ih264e_entropy(ps_proc);
2652
2653 /* Dont execute any further instructions until store synchronization took place */
2654 DATA_SYNC();
2655
2656 /* allow threads to dequeue entropy jobs */
2657 ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
2658
2659 if (error_status != IH264_SUCCESS)
2660 {
2661 ps_proc->i4_error_code = error_status;
2662 return ret;
2663 }
2664 break;
2665
2666 default:
2667 ps_proc->i4_error_code = IH264_FAIL;
2668 return ret;
2669 }
2670 }
2671
2672 return ret;
2673 }
2674