1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_core_coding.c
25 *
26 * @brief
27 * This file contains routines that perform luma and chroma core coding of
28 * H264 macroblocks
29 *
30 * @author
31 * ittiam
32 *
33 * @par List of Functions:
34 * - ih264e_luma_16x16_resi_trans_dctrans_quant
35 * - ih264e_luma_16x16_idctrans_iquant_itrans_recon
36 * - ih264e_chroma_8x8_resi_trans_dctrans_quant
37 * - ih264e_chroma_8x8_idctrans_iquant_itrans_recon
38 * - ih264e_pack_l_mb_i16
39 * - ih264e_pack_l_mb
40 * - ih264e_pack_c_mb_i8
41 * - ih264e_code_luma_intra_macroblock_16x16
42 * - ih264e_code_luma_intra_macroblock_4x4
43 * - ih264e_code_luma_intra_macroblock_4x4_rdopt_on
44 * - ih264e_code_chroma_intra_macroblock_8x8
45 * - ih264e_code_luma_inter_macroblock_16x16
46 * - ih264e_code_chroma_inter_macroblock_8x8
47 *
48 * @remarks
49 * none
50 *
51 *******************************************************************************
52 */
53
54 /*****************************************************************************/
55 /* File Includes */
56 /*****************************************************************************/
57
58 /* System Include Files */
59 #include <stdio.h>
60 #include <string.h>
61 #include <assert.h>
62
63 /* User Include Files */
64 #include "ih264e_config.h"
65 #include "ih264_typedefs.h"
66 #include "iv2.h"
67 #include "ive2.h"
68
69 #include "ih264_macros.h"
70 #include "ih264_defs.h"
71 #include "ih264_mem_fns.h"
72 #include "ih264_padding.h"
73 #include "ih264_structs.h"
74 #include "ih264_trans_quant_itrans_iquant.h"
75 #include "ih264_inter_pred_filters.h"
76 #include "ih264_intra_pred_filters.h"
77 #include "ih264_deblk_edge_filters.h"
78 #include "ih264_trans_data.h"
79 #include "ih264_cabac_tables.h"
80 #include "ih264_platform_macros.h"
81
82 #include "ime_defs.h"
83 #include "ime_distortion_metrics.h"
84 #include "ime_structs.h"
85
86 #include "irc_cntrl_param.h"
87 #include "irc_frame_info_collector.h"
88
89 #include "ih264e_error.h"
90 #include "ih264e_defs.h"
91 #include "ih264e_globals.h"
92 #include "ih264e_rate_control.h"
93 #include "ih264e_bitstream.h"
94 #include "ih264e_cabac_structs.h"
95 #include "ih264e_structs.h"
96 #include "ih264e_mc.h"
97 #include "ih264e_core_coding.h"
98
99
100 /*****************************************************************************/
101 /* Function Definitions */
102 /*****************************************************************************/
103
104 /**
105 *******************************************************************************
106 *
107 * @brief
108 * This function performs does the DCT transform then Hadamard transform
109 * and quantization for a macroblock when the mb mode is intra 16x16 mode
110 *
111 * @par Description:
112 * First cf4 is done on all 16 4x4 blocks of the 16x16 input block.
113 * Then hadamard transform is done on the DC coefficients
114 * Quantization is then performed on the 16x16 block, 4x4 wise
115 *
116 * @param[in] pu1_src
117 * Pointer to source sub-block
118 *
119 * @param[in] pu1_pred
120 * Pointer to prediction sub-block
121 *
122 * @param[in] pi2_out
123 * Pointer to residual sub-block
124 * The output will be in linear format
125 * The first 16 continuous locations will contain the values of Dc block
126 * After DC block and a stride 1st AC block will follow
127 * After one more stride next AC block will follow
128 * The blocks will be in raster scan order
129 *
130 * @param[in] src_strd
131 * Source stride
132 *
133 * @param[in] pred_strd
134 * Prediction stride
135 *
136 * @param[in] dst_strd
137 * Destination stride
138 *
139 * @param[in] pu2_scale_matrix
140 * The quantization matrix for 4x4 transform
141 *
142 * @param[in] pu2_threshold_matrix
143 * Threshold matrix
144 *
145 * @param[in] u4_qbits
146 * 15+QP/6
147 *
148 * @param[in] u4_round_factor
149 * Round factor for quant
150 *
151 * @param[out] pu1_nnz
152 * Memory to store the non-zeros after transform
153 * The first byte will be the nnz of DC block
154 * From the next byte the AC nnzs will be stored in raster scan order
155 *
156 * @param u4_dc_flag
157 * Signals if Dc transform is to be done or not
158 * 1 -> Dc transform will be done
159 * 0 -> Dc transform will not be done
160 *
161 * @remarks
162 *
163 *******************************************************************************
164 */
ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t * ps_codec,UWORD8 * pu1_src,UWORD8 * pu1_pred,WORD16 * pi2_out,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,const UWORD16 * pu2_scale_matrix,const UWORD16 * pu2_threshold_matrix,UWORD32 u4_qbits,UWORD32 u4_round_factor,UWORD8 * pu1_nnz,UWORD32 u4_dc_flag)165 void ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t *ps_codec,
166 UWORD8 *pu1_src,
167 UWORD8 *pu1_pred,
168 WORD16 *pi2_out,
169 WORD32 src_strd,
170 WORD32 pred_strd,
171 WORD32 dst_strd,
172 const UWORD16 *pu2_scale_matrix,
173 const UWORD16 *pu2_threshold_matrix,
174 UWORD32 u4_qbits,
175 UWORD32 u4_round_factor,
176 UWORD8 *pu1_nnz,
177 UWORD32 u4_dc_flag)
178
179 {
180 WORD32 blk_cntr;
181 WORD32 i4_offsetx, i4_offsety;
182 UWORD8 *pu1_curr_src, *pu1_curr_pred;
183
184 WORD16 *pi2_dc_str = pi2_out;
185
186 /* Move to the ac addresses */
187 pu1_nnz++;
188 pi2_out += dst_strd;
189
190 for (blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++)
191 {
192 IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety);
193
194 pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
195 pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
196
197 ps_codec->pf_resi_trans_quant_4x4(pu1_curr_src, pu1_curr_pred,
198 pi2_out + blk_cntr * dst_strd,
199 src_strd, pred_strd, pu2_scale_matrix,
200 pu2_threshold_matrix, u4_qbits,
201 u4_round_factor, &pu1_nnz[blk_cntr],
202 &pi2_dc_str[blk_cntr]);
203
204 }
205
206 if (!u4_dc_flag)
207 return;
208
209 /*
210 * In case of i16x16, we need to remove the contribution of dc coeffs into
211 * nnz of each block. We are doing that in the packing function
212 */
213
214 /* Adjust pointers to point to dc values */
215 pi2_out -= dst_strd;
216 pu1_nnz--;
217
218 u4_qbits++;
219 u4_round_factor <<= 1;
220
221 ps_codec->pf_hadamard_quant_4x4(pi2_dc_str, pi2_out, pu2_scale_matrix,
222 pu2_threshold_matrix, u4_qbits,
223 u4_round_factor, &pu1_nnz[0]);
224 }
225
226 /**
227 *******************************************************************************
228 *
229 * @brief
230 * This function performs the intra 16x16 inverse transform process for H264
231 * it includes inverse Dc transform, inverse quant and then inverse transform
232 *
233 * @par Description:
234 *
235 * @param[in] pi2_src
236 * Input data, 16x16 size
237 * First 16 mem locations will have the Dc coffs in raster scan order in linear
238 * fashion after a stride 1st AC clock will be present again in raster can order
239 * Then each AC block of the 16x16 block will follow in raster scan order
240 *
241 * @param[in] pu1_pred
242 * The predicted data, 16x16 size
243 * Block by block form
244 *
245 * @param[in] pu1_out
246 * Output 16x16
247 * In block by block form
248 *
249 * @param[in] src_strd
250 * Source stride
251 *
252 * @param[in] pred_strd
253 * input stride for prediction buffer
254 *
255 * @param[in] out_strd
256 * input stride for output buffer
257 *
258 * @param[in] pu2_iscale_mat
259 * Inverse quantization matrix for 4x4 transform
260 *
261 * @param[in] pu2_weigh_mat
262 * weight matrix of 4x4 transform
263 *
264 * @param[in] qp_div
265 * QP/6
266 *
267 * @param[in] u4_cntrl
268 * Controls the transform path
269 * total Last 17 bits are used
270 * the 16th th bit will correspond to DC block
271 * and 32-17 will correspond to the ac blocks in raster scan order
272 * bit equaling zero indicates that the entire 4x4 block is zero for DC
273 * For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block
274 * is nonzero
275 *
276 * @param[in] u4_dc_trans_flag
277 * Differentiates intra vs inter
278 *
279 * @param[in] pi4_tmp
280 * Input temporary buffer
281 * needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
282 *
283 * @returns
284 * none
285 *
286 * @remarks
287 * The all zero case must be taken care outside
288 *
289 *******************************************************************************
290 */
ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t * ps_codec,WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 src_strd,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,UWORD32 u4_cntrl,UWORD32 u4_dc_trans_flag,WORD32 * pi4_tmp)291 void ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t *ps_codec,
292 WORD16 *pi2_src,
293 UWORD8 *pu1_pred,
294 UWORD8 *pu1_out,
295 WORD32 src_strd,
296 WORD32 pred_strd,
297 WORD32 out_strd,
298 const UWORD16 *pu2_iscale_mat,
299 const UWORD16 *pu2_weigh_mat,
300 UWORD32 qp_div,
301 UWORD32 u4_cntrl,
302 UWORD32 u4_dc_trans_flag,
303 WORD32 *pi4_tmp)
304 {
305 /* Start index for inverse quant in a 4x4 block */
306 WORD32 iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1;
307
308 /* Cntrl bits for 4x4 transforms
309 * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
310 * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
311 * : dc block must contain only single dc coefficient
312 * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
313 * : ie not (ac or dc)
314 */
315 UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
316
317 /* tmp registers for block ids */
318 UWORD32 u4_blk_id;
319
320 /* Subscrripts */
321 WORD32 i4_offset_x, i4_offset_y;
322
323 UWORD8 *pu1_cur_prd_blk, *pu1_cur_out_blk;
324
325 /* Src and stride for dc coeffs */
326 UWORD32 u4_dc_inc;
327 WORD16 *pi2_dc_src;
328
329 /*
330 * For intra blocks we need to do inverse dc transform
331 * In case if intra blocks, its here that we populate the dc bits in cntrl
332 * as they cannot be populated any earlier
333 */
334 if (u4_dc_trans_flag)
335 {
336 UWORD32 cntr, u4_dc_cntrl;
337 /* Do inv hadamard and place the results at the start of each AC block */
338 ps_codec->pf_ihadamard_scaling_4x4(pi2_src, pi2_src, pu2_iscale_mat,
339 pu2_weigh_mat, qp_div, pi4_tmp);
340
341 /* Update the cntrl flag */
342 u4_dc_cntrl = 0;
343 for (cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++)
344 {
345 u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
346 }
347 /* Mark dc bits as 1 if corresponding ac bit is 0 */
348 u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
349 /* Combine both ac and dc bits */
350 u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA)
351 | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA);
352 }
353
354 /* Source for dc coeffs
355 * If the block is intra, we have to read dc values from first row of src
356 * then stride for each block is 1, other wise its src stride
357 */
358 pi2_dc_src = (iq_start_idx == 0) ? (pi2_src + src_strd) : pi2_src;
359 u4_dc_inc = (iq_start_idx == 0) ? src_strd : 1;
360
361 /* The AC blocks starts from 2nd row */
362 pi2_src += src_strd;
363
364 /* Get the block bits */
365 u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA);
366 u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16;
367 u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000;
368
369 /* Get first block to process */
370 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
371 while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
372 {
373 /* Compute address of src blocks */
374 WORD32 i4_src_offset = u4_dc_inc * u4_blk_id;
375
376 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
377
378 /* Compute address of out and pred blocks */
379 pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
380 pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
381
382 /* Do inv dc transform */
383 ps_codec->pf_iquant_itrans_recon_4x4_dc(pi2_dc_src + i4_src_offset,
384 pu1_cur_prd_blk,
385 pu1_cur_out_blk, pred_strd,
386 out_strd, pu2_iscale_mat,
387 pu2_weigh_mat, qp_div, NULL,
388 iq_start_idx,
389 pi2_dc_src + i4_src_offset);
390 /* Get next DC block to process */
391 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
392 }
393
394 /* now process ac/mixed blocks */
395 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
396 while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
397 {
398
399 WORD32 i4_src_offset = src_strd * u4_blk_id;
400
401 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
402
403 pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
404 pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
405
406 ps_codec->pf_iquant_itrans_recon_4x4(pi2_src + i4_src_offset,
407 pu1_cur_prd_blk, pu1_cur_out_blk,
408 pred_strd, out_strd,
409 pu2_iscale_mat, pu2_weigh_mat,
410 qp_div, (WORD16*) pi4_tmp,
411 iq_start_idx,
412 pi2_dc_src + u4_blk_id);
413
414 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
415 }
416
417 /* Now process empty blocks */
418 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
419 while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
420 {
421 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
422
423 pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
424 pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
425
426 ps_codec->pf_inter_pred_luma_copy(pu1_cur_prd_blk, pu1_cur_out_blk,
427 pred_strd, out_strd, SIZE_4X4_BLK_HRZ,
428 SIZE_4X4_BLK_VERT, 0, 0);
429
430 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
431 }
432 }
433
434 /**
435 *******************************************************************************
436 *
437 * @brief
438 * This function performs does the DCT transform then Hadamard transform
439 * and quantization for a chroma macroblock
440 *
441 * @par Description:
442 * First cf4 is done on all 16 4x4 blocks of the 8x8input block
443 * Then hadamard transform is done on the DC coefficients
444 * Quantization is then performed on the 8x8 block, 4x4 wise
445 *
446 * @param[in] pu1_src
447 * Pointer to source sub-block
448 * The input is in interleaved format for two chroma planes
449 *
450 * @param[in] pu1_pred
451 * Pointer to prediction sub-block
452 * Prediction is in inter leaved format
453 *
454 * @param[in] pi2_out
455 * Pointer to residual sub-block
456 * The output will be in linear format
457 * The first 4 continuous locations will contain the values of DC block for U
458 * and then next 4 will contain for V.
459 * After DC block and a stride 1st AC block of U plane will follow
460 * After one more stride next AC block of V plane will follow
461 * The blocks will be in raster scan order
462 *
463 * After all the AC blocks of U plane AC blocks of V plane will follow in exact
464 * same way
465 *
466 * @param[in] src_strd
467 * Source stride
468 *
469 * @param[in] pred_strd
470 * Prediction stride
471 *
472 * @param[in] out_strd
473 * Destination stride
474 *
475 * @param[in] pu2_scale_matrix
476 * The quantization matrix for 4x4 transform
477 *
478 * @param[in] pu2_threshold_matrix
479 * Threshold matrix
480 *
481 * @param[in] u4_qbits
482 * 15+QP/6
483 *
484 * @param[in] u4_round_factor
485 * Round factor for quant
486 *
487 * @param[out] pu1_nnz_c
488 * Memory to store the non-zeros after transform
489 * The first byte will be the nnz od DC block for U plane
490 * From the next byte the AC nnzs will be storerd in raster scan order
491 * The fifth byte will be nnz of Dc block of V plane
492 * Then Ac blocks will follow
493 *
494 * @remarks
495 *
496 *******************************************************************************
497 */
ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t * ps_codec,UWORD8 * pu1_src,UWORD8 * pu1_pred,WORD16 * pi2_out,WORD32 src_strd,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_scale_matrix,const UWORD16 * pu2_threshold_matrix,UWORD32 u4_qbits,UWORD32 u4_round_factor,UWORD8 * pu1_nnz_c)498 void ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t *ps_codec,
499 UWORD8 *pu1_src,
500 UWORD8 *pu1_pred,
501 WORD16 *pi2_out,
502 WORD32 src_strd,
503 WORD32 pred_strd,
504 WORD32 out_strd,
505 const UWORD16 *pu2_scale_matrix,
506 const UWORD16 *pu2_threshold_matrix,
507 UWORD32 u4_qbits,
508 UWORD32 u4_round_factor,
509 UWORD8 *pu1_nnz_c)
510 {
511 WORD32 blk_cntr;
512 WORD32 i4_offsetx, i4_offsety;
513 UWORD8 *pu1_curr_src, *pu1_curr_pred;
514
515 WORD16 pi2_dc_str[8];
516 UWORD8 au1_dcnnz[2];
517
518 /* Move to the ac addresses */
519 pu1_nnz_c++;
520 pi2_out += out_strd;
521
522 for (blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++)
523 {
524 IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety);
525
526 pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
527 pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
528
529 /* For chroma, v plane nnz is populated from position 5 */
530 ps_codec->pf_resi_trans_quant_chroma_4x4(
531 pu1_curr_src, pu1_curr_pred,
532 pi2_out + blk_cntr * out_strd, src_strd, pred_strd,
533 pu2_scale_matrix, pu2_threshold_matrix, u4_qbits,
534 u4_round_factor, &pu1_nnz_c[blk_cntr + (blk_cntr > 3)],
535 &pi2_dc_str[blk_cntr]);
536 }
537
538 /* Adjust pointers to point to dc values */
539 pi2_out -= out_strd;
540 pu1_nnz_c--;
541
542 u4_qbits++;
543 u4_round_factor <<= 1;
544
545 ps_codec->pf_hadamard_quant_2x2_uv(pi2_dc_str, pi2_out, pu2_scale_matrix,
546 pu2_threshold_matrix, u4_qbits,
547 u4_round_factor, au1_dcnnz);
548
549 /* Copy the dc nnzs */
550 pu1_nnz_c[0] = au1_dcnnz[0];
551 pu1_nnz_c[5] = au1_dcnnz[1];
552
553 }
554
555 /**
556 *******************************************************************************
557 * @brief Does inverse DC transform, inverse quantization inverse transform for
558 * chroma MB
559 *
560 * @par Description:
561 * Does inverse DC transform, inverse quantization inverse transform for
562 * chroma MB
563 *
564 * @param[in] pi2_src
565 * Input data, 16x16 size
566 * The input is in the form of, first 4 locations will contain DC coeffs of
567 * U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
568 * in raster scan order will follow, each block as linear array in raster scan order.
569 * After a stride next AC block will follow. After all AC blocks of U plane
570 * V plane AC blocks will follow in exact same order.
571 *
572 * @param[in] pu1_pred
573 * The predicted data, 8x16 size, U and V interleaved
574 *
575 * @param[in] pu1_out
576 * Output 8x16, U and V interleaved
577 *
578 * @param[in] src_strd
579 * Source stride
580 *
581 * @param[in] pred_strd
582 * input stride for prediction buffer
583 *
584 * @param[in] out_strd
585 * input stride for output buffer
586 *
587 * @param[in] pu2_iscale_mat
588 * Inverse quantization martix for 4x4 transform
589 *
590 * @param[in] pu2_weigh_mat
591 * weight matrix of 4x4 transform
592 *
593 * @param[in] qp_div
594 * QP/6
595 *
596 * @param[in] u4_cntrl
597 * Controls the transform path
598 * the 15 th bit will correspond to DC block of U plane, 14th will indicate the
599 * V plane Dc block. 32-28 bits will indicate AC blocks of U plane in raster
600 * scan order. 27-23 bits will indicate AC blocks of V plane in rater scan order.
601 * The bit 1 implies that there is at least one non zero coeff in a block
602 *
603 * @param[in] pi4_tmp
604 * Input temporary buffer
605 * needs to be at least COFF_CNT_SUB_BLK_4x4 + (Number of Dc coeffs for chroma *
606 * number of planes) in size
607 *
608 * @returns
609 * none
610 *
611 * @remarks
612 *
613 *******************************************************************************
614 */
ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t * ps_codec,WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 src_strd,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,UWORD32 u4_cntrl,WORD32 * pi4_tmp)615 void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t *ps_codec,
616 WORD16 *pi2_src,
617 UWORD8 *pu1_pred,
618 UWORD8 *pu1_out,
619 WORD32 src_strd,
620 WORD32 pred_strd,
621 WORD32 out_strd,
622 const UWORD16 *pu2_iscale_mat,
623 const UWORD16 *pu2_weigh_mat,
624 UWORD32 qp_div,
625 UWORD32 u4_cntrl,
626 WORD32 *pi4_tmp)
627 {
628 /* Cntrl bits for 4x4 transforms
629 * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
630 * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
631 * : dc block must contain only single dc coefficient
632 * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
633 * : ie not (ac or dc)
634 */
635
636 UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
637
638 /* tmp registers for block ids */
639 WORD32 u4_blk_id;
640
641 /* Offsets for pointers */
642 WORD32 i4_offset_x, i4_offset_y;
643
644 /* Pointer to 4x4 blocks */
645 UWORD8 *pu1_cur_4x4_prd_blk, *pu1_cur_4x4_out_blk;
646
647 /* Tmp register for pointer to dc coffs */
648 WORD16 *pi2_dc_src;
649
650 WORD16 i2_zero = 0;
651
652 /* Increment for dc block */
653 WORD32 i4_dc_inc;
654
655 /*
656 * Lets do the inverse transform for dc coeffs in chroma
657 */
658 if (u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA)
659 {
660 UWORD32 cntr, u4_dc_cntrl;
661
662 /* Do inv hadamard for u an v block */
663 ps_codec->pf_ihadamard_scaling_2x2_uv(pi2_src, pi2_src, pu2_iscale_mat,
664 pu2_weigh_mat, qp_div, NULL);
665 /*
666 * Update the cntrl flag
667 * Flag is updated as follows bits 15-11 -> u block dc bits
668 */
669 u4_dc_cntrl = 0;
670 for (cntr = 0; cntr < 8; cntr++)
671 {
672 u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
673 }
674
675 /* Mark dc bits as 1 if corresponding ac bit is 0 */
676 u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
677 /* Combine both ac and dc bits */
678 u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA)
679 | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA);
680
681 /* Since we populated the dc coffs, we have to read them from there */
682 pi2_dc_src = pi2_src;
683 i4_dc_inc = 1;
684 }
685 else
686 {
687 u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA;
688 pi2_dc_src = &i2_zero;
689 i4_dc_inc = 0;
690 }
691
692 /* Get the block bits */
693 u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA);
694 u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16;
695 u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000;
696
697 /* The AC blocks starts from 2nd row */
698 pi2_src += src_strd;
699
700 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
701 while (u4_blk_id < 8)
702 {
703 WORD32 dc_src_offset = u4_blk_id * i4_dc_inc;
704
705 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
706
707 pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
708 pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
709
710 ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc(
711 pi2_dc_src + dc_src_offset, pu1_cur_4x4_prd_blk,
712 pu1_cur_4x4_out_blk, pred_strd, out_strd, NULL, NULL, 0,
713 NULL, pi2_dc_src + dc_src_offset);
714 /* Get next DC block to process */
715 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
716 }
717
718 /* now process ac/mixed blocks */
719 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
720 while (u4_blk_id < 8)
721 {
722 WORD32 i4_src_offset = src_strd * u4_blk_id;
723 WORD32 dc_src_offset = i4_dc_inc * u4_blk_id;
724
725 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
726
727 pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
728 pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
729
730 ps_codec->pf_iquant_itrans_recon_chroma_4x4(pi2_src + i4_src_offset,
731 pu1_cur_4x4_prd_blk,
732 pu1_cur_4x4_out_blk,
733 pred_strd, out_strd,
734 pu2_iscale_mat,
735 pu2_weigh_mat, qp_div,
736 (WORD16 *) pi4_tmp,
737 pi2_dc_src + dc_src_offset);
738
739 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
740 }
741
742 /* Now process empty blocks */
743 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
744 while (u4_blk_id < 8)
745 {
746 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
747
748 pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
749 pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
750
751 ps_codec->pf_interleave_copy(pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk,
752 pred_strd, out_strd, SIZE_4X4_BLK_VERT,
753 SIZE_4X4_BLK_HRZ);
754
755 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
756 }
757 }
758
759 /**
760 ******************************************************************************
761 *
762 * @brief This function packs residue of an i16x16 luma mb for entropy coding
763 *
764 * @par Description
765 * An i16 macro block contains two classes of units, dc 4x4 block and
766 * 4x4 ac blocks. while packing the mb, the dc block is sent first, and
767 * the 16 ac blocks are sent next in scan order. Each and every block is
768 * represented by 3 parameters (nnz, significant coefficient map and the
769 * residue coefficients itself). If a 4x4 unit does not have any coefficients
770 * then only nnz is sent. Inside a 4x4 block the individual coefficients are
771 * sent in scan order.
772 *
773 * The first byte of each block will be nnz of the block, if it is non zero,
774 * a 2 byte significance map is sent. This is followed by nonzero coefficients.
775 * This is repeated for 1 dc + 16 ac blocks.
776 *
777 * @param[in] pi2_res_mb
778 * pointer to residue mb
779 *
780 * @param[in, out] pv_mb_coeff_data
781 * buffer pointing to packed residue coefficients
782 *
783 * @param[in] u4_res_strd
784 * residual block stride
785 *
786 * @param[out] u1_cbp_l
787 * coded block pattern luma
788 *
789 * @param[in] pu1_nnz
790 * number of non zero coefficients in each 4x4 unit
791 *
792 * @param[out]
793 * Control signal for inverse transform of 16x16 blocks
794 *
795 * @return none
796 *
797 * @ remarks
798 *
799 ******************************************************************************
800 */
ih264e_pack_l_mb_i16(WORD16 * pi2_res_mb,void ** pv_mb_coeff_data,WORD32 i4_res_strd,UWORD8 * u1_cbp_l,UWORD8 * pu1_nnz,UWORD32 * pu4_cntrl)801 void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb,
802 void **pv_mb_coeff_data,
803 WORD32 i4_res_strd,
804 UWORD8 *u1_cbp_l,
805 UWORD8 *pu1_nnz,
806 UWORD32 *pu4_cntrl)
807 {
808 /* pointer to packed sub block buffer space */
809 tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac;
810
811 /* no of non zero coefficients in the current sub block */
812 UWORD32 u4_nnz_cnt;
813
814 /* significant coefficient map */
815 UWORD32 u4_s_map;
816
817 /* pointer to scanning matrix */
818 const UWORD8 *pu1_scan_order;
819
820 /* number of non zeros in sub block */
821 UWORD32 u4_nnz;
822
823 /* coeff scan order */
824 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
825
826 /* temp var */
827 UWORD32 coeff_cnt, mask, b4,u4_cntrl=0;
828
829 /*DC and AC coeff pointers*/
830 WORD16 *pi2_res_mb_ac,*pi2_res_mb_dc;
831
832 /********************************************************/
833 /* pack dc coeff data for entropy coding */
834 /********************************************************/
835
836 pi2_res_mb_dc = pi2_res_mb;
837 pu1_scan_order = gu1_luma_scan_order_dc;
838
839 u4_nnz = *pu1_nnz;
840 u4_cntrl = 0;
841
842 /* write number of non zero coefficients */
843 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
844
845 if (u4_nnz)
846 {
847 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
848 {
849 if (pi2_res_mb_dc[pu1_scan_order[coeff_cnt]])
850 {
851 /* write residue */
852 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_dc[pu1_scan_order[coeff_cnt]];
853 u4_s_map |= mask;
854 }
855 mask <<= 1;
856 }
857 /* write significant coeff map */
858 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
859 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
860
861 u4_cntrl = 0x00008000;// Set DC bit in ctrl code
862 }
863 else
864 {
865 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
866 }
867
868 /********************************************************/
869 /* pack ac coeff data for entropy coding */
870 /********************************************************/
871
872 pu1_nnz ++;
873 pu1_scan_order = gu1_luma_scan_order;
874 pi2_res_mb += i4_res_strd; /*Move to AC block*/
875
876 ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
877
878 for (b4 = 0; b4 < 16; b4++)
879 {
880 ps_mb_coeff_data = (*pv_mb_coeff_data);
881
882 u4_nnz = pu1_nnz[u1_scan_order[b4]];
883
884 /* Jump according to the scan order */
885 pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
886
887 /*
888 * Since this is a i16x16 block, we should not count dc coeff on indi
889 * vidual 4x4 blocks to nnz. But due to the implementation of 16x16
890 * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that
891 * here
892 */
893 u4_nnz -= (pi2_res_mb_ac[0] != 0);
894
895 /* write number of non zero coefficients */
896 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
897
898 if (u4_nnz)
899 {
900 for (u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
901 {
902 if (pi2_res_mb_ac[pu1_scan_order[coeff_cnt]])
903 {
904 /* write residue */
905 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_ac[pu1_scan_order[coeff_cnt]];
906 u4_s_map |= mask;
907 }
908 mask <<= 1;
909 }
910 /* write significant coeff map */
911 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
912 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
913 *u1_cbp_l = 15;
914
915 u4_cntrl |= (1 << (31 - u1_scan_order[b4]));
916 }
917 else
918 {
919 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
920 }
921
922 }
923
924 if (!(*u1_cbp_l))
925 {
926 (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
927 }
928
929 /* Store the cntrl signal */
930 (*pu4_cntrl) = u4_cntrl;
931 return;
932 }
933
934 /**
935 ******************************************************************************
936 *
937 * @brief This function packs residue of an p16x16 luma mb for entropy coding
938 *
939 * @par Description
940 * A p16x16 macro block contains two classes of units 16 4x4 ac blocks.
941 * while packing the mb, the dc block is sent first, and
942 * the 16 ac blocks are sent next in scan order. Each and every block is
943 * represented by 3 parameters (nnz, significant coefficient map and the
944 * residue coefficients itself). If a 4x4 unit does not have any coefficients
945 * then only nnz is sent. Inside a 4x4 block the individual coefficients are
946 * sent in scan order.
947 *
948 * The first byte of each block will be nnz of the block, if it is non zero,
949 * a 2 byte significance map is sent. This is followed by nonzero coefficients.
950 * This is repeated for 1 dc + 16 ac blocks.
951 *
952 * @param[in] pi2_res_mb
953 * pointer to residue mb
954 *
955 * @param[in, out] pv_mb_coeff_data
956 * buffer pointing to packed residue coefficients
957 *
958 * @param[in] i4_res_strd
959 * residual block stride
960 *
961 * @param[out] u1_cbp_l
962 * coded block pattern luma
963 *
964 * @param[in] pu1_nnz
965 * number of non zero coefficients in each 4x4 unit
966 *
967 * @param[out] pu4_cntrl
968 * Control signal for inverse transform
969 *
970 * @return none
971 *
972 * @remarks Killing coffs not yet coded
973 *
974 ******************************************************************************
975 */
ih264e_pack_l_mb(WORD16 * pi2_res_mb,void ** pv_mb_coeff_data,WORD32 i4_res_strd,UWORD8 * u1_cbp_l,UWORD8 * pu1_nnz,UWORD32 u4_thres_resi,UWORD32 * pu4_cntrl)976 void ih264e_pack_l_mb(WORD16 *pi2_res_mb,
977 void **pv_mb_coeff_data,
978 WORD32 i4_res_strd,
979 UWORD8 *u1_cbp_l,
980 UWORD8 *pu1_nnz,
981 UWORD32 u4_thres_resi,
982 UWORD32 *pu4_cntrl)
983 {
984 /* pointer to packed sub block buffer space */
985 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb;
986
987 /* no of non zero coefficients in the current sub block */
988 UWORD32 u4_nnz_cnt;
989
990 /* significant coefficient map */
991 UWORD32 u4_s_map;
992
993 /* pointer to scanning matrix */
994 const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
995
996 /* number of non zeros in sub block */
997 UWORD32 u4_nnz;
998
999 /* pointer to residual sub block */
1000 WORD16 *pi2_res_sb;
1001
1002 /* coeff scan order */
1003 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
1004
1005 /* coeff cost */
1006 const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
1007
1008 /* temp var */
1009 UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8;
1010
1011 /* temp var */
1012 WORD32 i4_res_val, i4_run = -1, dcac_block;
1013
1014 /* When Hadamard transform is disabled, first row values are dont care, ignore them */
1015 pi2_res_mb += i4_res_strd;
1016
1017 /* When Hadamard transform is disabled, first unit value is dont care, ignore this */
1018 pu1_nnz ++;
1019
1020 ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
1021
1022 /********************************************************/
1023 /* pack coeff data for entropy coding */
1024 /********************************************************/
1025
1026 for (b4 = 0; b4 < 16; b4++)
1027 {
1028 ps_mb_coeff_data = (*pv_mb_coeff_data);
1029
1030 b8 = b4 >> 2;
1031
1032 u4_nnz = pu1_nnz[u1_scan_order[b4]];
1033
1034 /* Jump according to the scan order */
1035 pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
1036
1037 /* write number of non zero coefficients */
1038 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1039
1040 if (u4_nnz)
1041 {
1042 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
1043 {
1044 /* number of runs of zero before, this is used to compute coeff cost */
1045 i4_run++;
1046
1047 i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1048
1049 if (i4_res_val)
1050 {
1051 /* write residue */
1052 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val;
1053 u4_s_map |= mask;
1054
1055 if (u4_thres_resi)
1056 {
1057 /* compute coeff cost */
1058 if (i4_res_val == 1 || i4_res_val == -1)
1059 {
1060 if (i4_run < 6)
1061 u4_b8_coeff_cost += pu1_coeff_cost[i4_run];
1062 }
1063 else
1064 u4_b8_coeff_cost += 9;
1065
1066 i4_run = -1;
1067 }
1068 }
1069
1070 mask <<= 1;
1071 }
1072
1073 /* write significant coeff map */
1074 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1075 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1076
1077 /* cbp */
1078 *u1_cbp_l |= (1 << b8);
1079
1080 /* Cntrl map for inverse transform computation
1081 *
1082 * If coeff_cnt is zero, it means that only nonzero was a dc coeff
1083 * Hence we have to set the 16 - u1_scan_order[b4]) position instead
1084 * of 31 - u1_scan_order[b4]
1085 */
1086 dcac_block = (coeff_cnt == 0)?16:31;
1087 u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4]));
1088 }
1089 else
1090 {
1091 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1092 }
1093
1094 /* Decide if the 8x8 unit has to be sent for entropy coding? */
1095 if ((b4+1) % 4 == 0)
1096 {
1097 if (u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) &&
1098 (*u1_cbp_l & (1 << b8)))
1099 {
1100 /*
1101 * When we want to reset the full 8x8 block, we have to reset
1102 * both the dc and ac coeff bits hence we have the symmetric
1103 * arrangement of bits
1104 */
1105 const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033};
1106
1107 /* restore cbp */
1108 *u1_cbp_l = (*u1_cbp_l & (~(1 << b8)));
1109
1110 /* correct cntrl flag */
1111 u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]);
1112
1113 /* correct nnz */
1114 pu1_nnz[u1_scan_order[b4 - 3]] = 0;
1115 pu1_nnz[u1_scan_order[b4 - 2]] = 0;
1116 pu1_nnz[u1_scan_order[b4 - 1]] = 0;
1117 pu1_nnz[u1_scan_order[b4]] = 0;
1118
1119 /* reset blk cost */
1120 u4_b8_coeff_cost = 0;
1121 }
1122
1123 if (!(*u1_cbp_l & (1 << b8)))
1124 {
1125 (*pv_mb_coeff_data) = ps_mb_coeff_data_b8;
1126 }
1127
1128 u4_mb_coeff_cost += u4_b8_coeff_cost;
1129
1130 u4_b8_coeff_cost = 0;
1131 i4_run = -1;
1132 ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
1133 }
1134 }
1135
1136 if (u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD)
1137 && (*u1_cbp_l))
1138 {
1139 (*pv_mb_coeff_data) = ps_mb_coeff_data_mb;
1140 *u1_cbp_l = 0;
1141 u4_cntrl = 0;
1142 memset(pu1_nnz, 0, 16);
1143 }
1144
1145 (*pu4_cntrl) = u4_cntrl;
1146
1147 return;
1148 }
1149
1150 /**
1151 ******************************************************************************
1152 *
1153 * @brief This function packs residue of an i8x8 chroma mb for entropy coding
1154 *
1155 * @par Description
1156 * An i8 chroma macro block contains two classes of units, dc 2x2 block and
1157 * 4x4 ac blocks. while packing the mb, the dc block is sent first, and
1158 * the 4 ac blocks are sent next in scan order. Each and every block is
1159 * represented by 3 parameters (nnz, significant coefficient map and the
1160 * residue coefficients itself). If a 4x4 unit does not have any coefficients
1161 * then only nnz is sent. Inside a 4x4 block the individual coefficients are
1162 * sent in scan order.
1163 *
1164 * The first byte of each block will be nnz of the block, if it is non zero,
1165 * a 2 byte significance map is sent. This is followed by nonzero coefficients.
1166 * This is repeated for 1 dc + 4 ac blocks.
1167 *
1168 * @param[in] pi2_res_mb
1169 * pointer to residue mb
1170 *
1171 * @param[in, out] pv_mb_coeff_data
1172 * buffer pointing to packed residue coefficients
1173 *
1174 * @param[in] u4_res_strd
1175 * residual block stride
1176 *
1177 * @param[out] u1_cbp_c
1178 * coded block pattern chroma
1179 *
1180 * @param[in] pu1_nnz
1181 * number of non zero coefficients in each 4x4 unit
1182 *
1183 * @param[out] pu1_nnz
1184 * Control signal for inverse transform
1185 *
1186 * @param[in] u4_swap_uv
1187 * Swaps the order of U and V planes in entropy bitstream
1188 *
1189 * @return none
1190 *
1191 * @ remarks
1192 *
1193 ******************************************************************************
1194 */
ih264e_pack_c_mb(WORD16 * pi2_res_mb,void ** pv_mb_coeff_data,WORD32 i4_res_strd,UWORD8 * u1_cbp_c,UWORD8 * pu1_nnz,UWORD32 u4_thres_resi,UWORD32 * pu4_cntrl,UWORD32 u4_swap_uv)1195 void ih264e_pack_c_mb(WORD16 *pi2_res_mb,
1196 void **pv_mb_coeff_data,
1197 WORD32 i4_res_strd,
1198 UWORD8 *u1_cbp_c,
1199 UWORD8 *pu1_nnz,
1200 UWORD32 u4_thres_resi,
1201 UWORD32 *pu4_cntrl,
1202 UWORD32 u4_swap_uv)
1203 {
1204 /* pointer to packed sub block buffer space */
1205 tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data);
1206 tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac;
1207
1208 /* nnz pointer */
1209 UWORD8 *pu1_nnz_ac, *pu1_nnz_dc;
1210
1211 /* nnz counter */
1212 UWORD32 u4_nnz_cnt;
1213
1214 /* significant coefficient map */
1215 UWORD32 u4_s_map;
1216
1217 /* pointer to scanning matrix */
1218 const UWORD8 *pu1_scan_order;
1219
1220 /* no of non zero coefficients in the current sub block */
1221 UWORD32 u4_nnz;
1222
1223 /* pointer to residual sub block, res val */
1224 WORD16 *pi2_res_sb, i2_res_val;
1225
1226 /* temp var */
1227 UWORD32 coeff_cnt, mask, b4,plane;
1228
1229 /* temp var */
1230 UWORD32 u4_coeff_cost;
1231 WORD32 i4_run;
1232
1233 /* coeff cost */
1234 const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
1235
1236 /* pointer to packed buffer space */
1237 UWORD32 *pu4_mb_coeff_data = NULL;
1238
1239 /* ac coded block pattern */
1240 UWORD8 u1_cbp_ac;
1241
1242 /* Variable to store the current bit pos in cntrl variable*/
1243 UWORD32 cntrl_pos = 0;
1244
1245 /********************************************************/
1246 /* pack dc coeff data for entropy coding */
1247 /********************************************************/
1248 pu1_scan_order = gu1_chroma_scan_order_dc;
1249 pi2_res_sb = pi2_res_mb;
1250 pu1_nnz_dc = pu1_nnz;
1251 (*pu4_cntrl) = 0;
1252 cntrl_pos = 15;
1253 ps_mb_coeff_data_dc = (*pv_mb_coeff_data);
1254
1255 /* Color space conversion between SP_UV and SP_VU
1256 * We always assume SP_UV for all the processing
1257 * Hence to get proper stream output we need to swap U and V channels here
1258 *
1259 * For that there are two paths we need to look for
1260 * One is the path to bitstream , these variables should have the proper input
1261 * configured UV or VU
1262 * For the other path the inverse transform variables should have what ever ordering the
1263 * input had
1264 */
1265
1266 if (u4_swap_uv)
1267 {
1268 pu1_nnz_dc += 5;/* Move to NNZ of V planve */
1269 pi2_res_sb += 4;/* Move to DC coff of V plane */
1270
1271 cntrl_pos = 14; /* Control bit for V plane */
1272 }
1273
1274 for (plane = 0; plane < 2; plane++)
1275 {
1276 ps_mb_coeff_data = (*pv_mb_coeff_data);
1277
1278 u4_nnz = *pu1_nnz_dc;
1279 /* write number of non zero coefficients U/V */
1280 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1281
1282 if (u4_nnz)
1283 {
1284 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
1285 {
1286 i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1287 if (i2_res_val)
1288 {
1289 /* write residue U/V */
1290 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
1291 u4_s_map |= mask;
1292 }
1293 mask <<= 1;
1294 }
1295 /* write significant coeff map U/V */
1296 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1297 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1298 *u1_cbp_c = 1;
1299
1300 (*pu4_cntrl) |= (1 << cntrl_pos);
1301 }
1302 else
1303 {
1304 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1305 }
1306
1307 if (u4_swap_uv)
1308 {
1309 cntrl_pos++; /* Control bit for U plane */
1310 pu1_nnz_dc -= 5; /* Move to NNZ of U plane */
1311 pi2_res_sb -= 4; /* Move to DC coff of U plane */
1312
1313 }
1314 else
1315 {
1316 cntrl_pos--; /* Control bit for U plane */
1317 pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */
1318 pi2_res_sb += 4; /* Move to DC coff of V plane */
1319 }
1320 }
1321
1322 /********************************************************/
1323 /* pack ac coeff data for entropy coding */
1324 /********************************************************/
1325
1326 pu1_scan_order = gu1_chroma_scan_order;
1327 ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
1328
1329 if (u4_swap_uv)
1330 {
1331 pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */
1332 cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */
1333 pu1_nnz_ac = pu1_nnz + 6;/*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
1334 }
1335 else
1336 {
1337 pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */
1338 cntrl_pos = 31;
1339 pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */
1340 }
1341
1342 for (plane = 0; plane < 2; plane++)
1343 {
1344 pu4_mb_coeff_data = (*pv_mb_coeff_data);
1345
1346 u4_coeff_cost = 0;
1347 i4_run = -1;
1348
1349 /* get the current cbp, so that it automatically
1350 * gets reverted in case of zero ac values */
1351 u1_cbp_ac = *u1_cbp_c;
1352
1353 for (b4 = 0; b4 < 4; b4++)
1354 {
1355 ps_mb_coeff_data = (*pv_mb_coeff_data);
1356
1357 u4_nnz = *pu1_nnz_ac;
1358
1359 /*
1360 * We are scanning only ac coeffs, but the nnz is for the
1361 * complete 4x4 block. Hence we have to discount the nnz contributed
1362 * by the dc coefficient
1363 */
1364 u4_nnz -= (pi2_res_sb[0]!=0);
1365
1366 /* write number of non zero coefficients U/V */
1367 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1368
1369 if (u4_nnz)
1370 {
1371 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
1372 {
1373 i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1374
1375 i4_run++;
1376
1377 if (i2_res_val)
1378 {
1379 /* write residue U/V */
1380 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
1381 u4_s_map |= mask;
1382
1383 if ( u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD) )
1384 {
1385 /* compute coeff cost */
1386 if (i2_res_val == 1 || i2_res_val == -1)
1387 {
1388 if (i4_run < 6)
1389 u4_coeff_cost += pu1_coeff_cost[i4_run];
1390 }
1391 else
1392 u4_coeff_cost += 9;
1393
1394 i4_run = -1;
1395 }
1396 }
1397 mask <<= 1;
1398 }
1399
1400 /* write significant coeff map U/V */
1401 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1402 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1403 u1_cbp_ac = 2;
1404
1405 (*pu4_cntrl) |= 1 << cntrl_pos;
1406 }
1407 else
1408 {
1409 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1410 }
1411
1412 pu1_nnz_ac++;
1413 pi2_res_sb += i4_res_strd;
1414 cntrl_pos--;
1415 }
1416
1417 /* reset block */
1418 if (u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD))
1419 {
1420 pu4_mb_coeff_data[0] = 0;
1421 pu4_mb_coeff_data[1] = 0;
1422 pu4_mb_coeff_data[2] = 0;
1423 pu4_mb_coeff_data[3] = 0;
1424 (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4;
1425
1426 /* Generate the control signal */
1427 /* Zero out the current plane's AC coefficients */
1428 (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF);
1429
1430 /* Similarly do for the NNZ also */
1431 *(pu1_nnz_ac - 4) = 0;
1432 *(pu1_nnz_ac - 3) = 0;
1433 *(pu1_nnz_ac - 2) = 0;
1434 *(pu1_nnz_ac - 1) = 0;
1435 }
1436 else
1437 {
1438 *u1_cbp_c = u1_cbp_ac;
1439 }
1440
1441 if (u4_swap_uv)
1442 {
1443 pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */
1444 cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */
1445 pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
1446
1447 pu1_nnz_ac = pu1_nnz + 1;
1448 }
1449 else
1450 pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */
1451 }
1452
1453 /* restore the ptr basing on cbp */
1454 if (*u1_cbp_c == 0)
1455 {
1456 (*pv_mb_coeff_data) = ps_mb_coeff_data_dc;
1457 }
1458 else if (*u1_cbp_c == 1)
1459 {
1460 (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
1461 }
1462
1463 return ;
1464 }
1465
1466 /**
1467 *******************************************************************************
1468 *
1469 * @brief performs luma core coding when intra mode is i16x16
1470 *
1471 * @par Description:
1472 * If the current mb is to be coded as intra of mb type i16x16, the mb is first
1473 * predicted using one of i16x16 prediction filters, basing on the intra mode
1474 * chosen. Then, error is computed between the input blk and the estimated blk.
1475 * This error is transformed (hierarchical transform i.e., dct followed by hada-
1476 * -mard), quantized. The quantized coefficients are packed in scan order for
1477 * entropy coding.
1478 *
1479 * @param[in] ps_proc_ctxt
1480 * pointer to the current macro block context
1481 *
1482 * @returns u1_cbp_l
1483 * coded block pattern luma
1484 *
1485 * @remarks none
1486 *
1487 *******************************************************************************
1488 */
ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t * ps_proc)1489 UWORD8 ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t *ps_proc)
1490 {
1491 /* Codec Context */
1492 codec_t *ps_codec = ps_proc->ps_codec;
1493
1494 /* pointer to ref macro block */
1495 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
1496
1497 /* pointer to src macro block */
1498 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
1499
1500 /* pointer to prediction macro block */
1501 UWORD8 *pu1_pred_mb = NULL;
1502
1503 /* pointer to residual macro block */
1504 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
1505
1506 /* strides */
1507 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1508 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1509 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1510 WORD32 i4_res_strd = ps_proc->i4_res_strd;
1511
1512 /* intra mode */
1513 UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
1514
1515 /* coded block pattern */
1516 UWORD8 u1_cbp_l = 0;
1517
1518 /* number of non zero coeffs*/
1519 UWORD32 au4_nnz[5];
1520 UWORD8 *pu1_nnz = (UWORD8 *)au4_nnz;
1521
1522 /*Cntrol signal for itrans*/
1523 UWORD32 u4_cntrl;
1524
1525 /* quantization parameters */
1526 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1527
1528 /* pointer to packed mb coeff data */
1529 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1530
1531 /* init nnz */
1532 au4_nnz[0] = 0;
1533 au4_nnz[1] = 0;
1534 au4_nnz[2] = 0;
1535 au4_nnz[3] = 0;
1536 au4_nnz[4] = 0;
1537
1538 if (u1_intra_mode == PLANE_I16x16)
1539 {
1540 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane;
1541 }
1542 else
1543 {
1544 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16;
1545 }
1546
1547 /********************************************************/
1548 /* error estimation, */
1549 /* transform */
1550 /* quantization */
1551 /********************************************************/
1552 ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
1553 pu1_pred_mb, pi2_res_mb,
1554 i4_src_strd, i4_pred_strd,
1555 i4_res_strd,
1556 ps_qp_params->pu2_scale_mat,
1557 ps_qp_params->pu2_thres_mat,
1558 ps_qp_params->u1_qbits,
1559 ps_qp_params->u4_dead_zone,
1560 pu1_nnz, ENABLE_DC_TRANSFORM);
1561
1562 /********************************************************/
1563 /* pack coeff data for entropy coding */
1564 /********************************************************/
1565 ih264e_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
1566 pu1_nnz, &u4_cntrl);
1567
1568 /********************************************************/
1569 /* ierror estimation, */
1570 /* itransform */
1571 /* iquantization */
1572 /********************************************************/
1573 /*
1574 *if refernce frame is not to be computed
1575 *we only need the right and bottom border 4x4 blocks to predict next intra
1576 *blocks, hence only compute them
1577 */
1578 if (!ps_proc->u4_compute_recon)
1579 {
1580 u4_cntrl &= 0x111F8000;
1581 }
1582
1583 if (u4_cntrl)
1584 {
1585 ih264e_luma_16x16_idctrans_iquant_itrans_recon(
1586 ps_codec, pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
1587 i4_res_strd, i4_pred_strd, i4_rec_strd,
1588 ps_qp_params->pu2_iscale_mat,
1589 ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
1590 u4_cntrl, ENABLE_DC_TRANSFORM,
1591 ps_proc->pv_scratch_buff);
1592 }
1593 else
1594 {
1595 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, i4_pred_strd,
1596 i4_rec_strd, MB_SIZE, MB_SIZE, NULL,
1597 0);
1598 }
1599
1600 return (u1_cbp_l);
1601 }
1602
1603
1604 /**
1605 *******************************************************************************
1606 *
1607 * @brief performs luma core coding when intra mode is i4x4
1608 *
1609 * @par Description:
1610 * If the current mb is to be coded as intra of mb type i4x4, the mb is first
1611 * predicted using one of i4x4 prediction filters, basing on the intra mode
1612 * chosen. Then, error is computed between the input blk and the estimated blk.
1613 * This error is dct transformed and quantized. The quantized coefficients are
1614 * packed in scan order for entropy coding.
1615 *
1616 * @param[in] ps_proc_ctxt
1617 * pointer to the current macro block context
1618 *
1619 * @returns u1_cbp_l
1620 * coded block pattern luma
1621 *
1622 * @remarks
1623 * The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
1624 * mentioned in h.264 specification
1625 *
1626 *******************************************************************************
1627 */
ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t * ps_proc)1628 UWORD8 ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t *ps_proc)
1629 {
1630 /* Codec Context */
1631 codec_t *ps_codec = ps_proc->ps_codec;
1632
1633 /* pointer to ref macro block */
1634 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
1635
1636 /* pointer to src macro block */
1637 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
1638
1639 /* pointer to prediction macro block */
1640 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1641
1642 /* pointer to residual macro block */
1643 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
1644
1645 /* strides */
1646 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1647 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1648 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1649
1650 /* pointer to neighbors: left, top, top-left */
1651 UWORD8 *pu1_mb_a;
1652 UWORD8 *pu1_mb_b;
1653 UWORD8 *pu1_mb_c;
1654 UWORD8 *pu1_mb_d;
1655
1656 /* intra mode */
1657 UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
1658
1659 /* neighbor availability */
1660 WORD32 i4_ngbr_avbl;
1661
1662 /* neighbor pels for intra prediction */
1663 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1664
1665 /* coded block pattern */
1666 UWORD8 u1_cbp_l = 0;
1667
1668 /* number of non zero coeffs*/
1669 UWORD8 u1_nnz;
1670
1671 /* quantization parameters */
1672 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1673
1674 /* pointer to packed mb coeff data */
1675 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1676
1677 /* pointer to packed mb coeff data */
1678 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
1679
1680 /* no of non zero coefficients in the current sub block */
1681 UWORD32 u4_nnz_cnt;
1682
1683 /* significant coefficient map */
1684 UWORD32 u4_s_map;
1685
1686 /* pointer to scanning matrix */
1687 const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
1688
1689 /*Dummy variable for 4x4 trans fucntion*/
1690 WORD16 i2_dc_dummy;
1691
1692 /* temp var */
1693 UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask;
1694
1695 /* Process 16 4x4 lum sub-blocks of the MB in scan order */
1696 for (b8 = 0; b8 < 4; b8++)
1697 {
1698 u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3;
1699 u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3;
1700
1701 /* if in case cbp for the 8x8 block is zero, send no residue */
1702 ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
1703
1704 for (b4 = 0; b4 < 4; b4++)
1705 {
1706 /* index of pel in MB */
1707 u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2);
1708 u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2);
1709
1710 /* Initialize source and reference pointers */
1711 pu1_curr_mb = ps_proc->pu1_src_buf_luma + u1_pix_x + (u1_pix_y * i4_src_strd);
1712 pu1_ref_mb = ps_proc->pu1_rec_buf_luma + u1_pix_x + (u1_pix_y * i4_rec_strd);
1713
1714 /* pointer to left of ref macro block */
1715 pu1_mb_a = pu1_ref_mb - 1;
1716 /* pointer to top of ref macro block */
1717 pu1_mb_b = pu1_ref_mb - i4_rec_strd;
1718 /* pointer to topright of ref macro block */
1719 pu1_mb_c = pu1_mb_b + 4;
1720 /* pointer to topleft macro block */
1721 pu1_mb_d = pu1_mb_b - 1;
1722
1723 /* compute neighbor availability */
1724 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1725
1726 /* sub block intra mode */
1727 u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4];
1728
1729 /********************************************************/
1730 /* gather prediction pels from neighbors for prediction */
1731 /********************************************************/
1732 /* left pels */
1733 if (i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK)
1734 {
1735 for (i = 0; i < 4; i++)
1736 pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_rec_strd];
1737 }
1738 else
1739 {
1740 memset(pu1_ngbr_pels_i4, 0, 4);
1741 }
1742
1743 /* top pels */
1744 if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
1745 {
1746 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1747 }
1748 else
1749 {
1750 memset(pu1_ngbr_pels_i4 + 5, 0, 4);
1751 }
1752 /* top left pels */
1753 if (i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK)
1754 {
1755 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1756 }
1757 else
1758 {
1759 pu1_ngbr_pels_i4[4] = 0;
1760 }
1761 /* top right pels */
1762 if (i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK)
1763 {
1764 memcpy(pu1_ngbr_pels_i4+8+1,pu1_mb_c,4);
1765 }
1766 else if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
1767 {
1768 memset(pu1_ngbr_pels_i4+8+1,pu1_ngbr_pels_i4[8],4);
1769 }
1770
1771 /********************************************************/
1772 /* prediction */
1773 /********************************************************/
1774 (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4,
1775 pu1_pred_mb, 0,
1776 i4_pred_strd,
1777 i4_ngbr_avbl);
1778
1779 /********************************************************/
1780 /* error estimation, */
1781 /* transform */
1782 /* quantization */
1783 /********************************************************/
1784 ps_codec->pf_resi_trans_quant_4x4(pu1_curr_mb, pu1_pred_mb,
1785 pi2_res_mb, i4_src_strd,
1786 i4_pred_strd,
1787 ps_qp_params->pu2_scale_mat,
1788 ps_qp_params->pu2_thres_mat,
1789 ps_qp_params->u1_qbits,
1790 ps_qp_params->u4_dead_zone,
1791 &u1_nnz, &i2_dc_dummy);
1792
1793 /********************************************************/
1794 /* pack coeff data for entropy coding */
1795 /********************************************************/
1796 ps_mb_coeff_data = *pv_mb_coeff_data;
1797
1798 /* write number of non zero coefficients */
1799 ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz;
1800
1801 if (u1_nnz)
1802 {
1803 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; coeff_cnt++)
1804 {
1805 if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
1806 {
1807 /* write residue */
1808 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
1809 u4_s_map |= mask;
1810 }
1811 mask <<= 1;
1812 }
1813 /* write significant coeff map */
1814 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1815
1816 /* update ptr to coeff data */
1817 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1818
1819 /* cbp */
1820 u1_cbp_l |= (1 << b8);
1821 }
1822 else
1823 {
1824 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1825 }
1826
1827 /********************************************************/
1828 /* ierror estimation, */
1829 /* itransform */
1830 /* iquantization */
1831 /********************************************************/
1832 if (u1_nnz)
1833 ps_codec->pf_iquant_itrans_recon_4x4(
1834 pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
1835 /*No input stride,*/i4_pred_strd,
1836 i4_rec_strd, ps_qp_params->pu2_iscale_mat,
1837 ps_qp_params->pu2_weigh_mat,
1838 ps_qp_params->u1_qp_div,
1839 ps_proc->pv_scratch_buff, 0, 0);
1840 else
1841 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb,
1842 i4_pred_strd, i4_rec_strd,
1843 BLK_SIZE, BLK_SIZE, NULL,
1844 0);
1845
1846 }
1847
1848 /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
1849 if (!(u1_cbp_l & (1 << b8)))
1850 {
1851 *pv_mb_coeff_data = ps_mb_coeff_data_b8;
1852 }
1853 }
1854
1855 return (u1_cbp_l);
1856 }
1857
1858 /**
1859 *******************************************************************************
1860 *
1861 * @brief performs luma core coding when intra mode is i4x4
1862 *
1863 * @par Description:
1864 * If the current mb is to be coded as intra of mb type i4x4, the mb is first
1865 * predicted using one of i4x4 prediction filters, basing on the intra mode
1866 * chosen. Then, error is computed between the input blk and the estimated blk.
1867 * This error is dct transformed and quantized. The quantized coefficients are
1868 * packed in scan order for entropy coding.
1869 *
1870 * @param[in] ps_proc_ctxt
1871 * pointer to the current macro block context
1872 *
1873 * @returns u1_cbp_l
1874 * coded block pattern luma
1875 *
1876 * @remarks
1877 * The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
1878 * mentioned in h.264 specification
1879 *
1880 *******************************************************************************
1881 */
ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t * ps_proc)1882 UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t *ps_proc)
1883 {
1884 /* Codec Context */
1885 codec_t *ps_codec = ps_proc->ps_codec;
1886
1887 /* pointer to ref macro block */
1888 UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4;
1889
1890 /* pointer to recon buffer */
1891 UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
1892
1893 /* pointer to residual macro block */
1894 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
1895
1896 /* strides */
1897 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1898
1899 /* number of non zero coeffs*/
1900 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
1901
1902 /* coded block pattern */
1903 UWORD8 u1_cbp_l = 0;
1904
1905 /* pointer to packed mb coeff data */
1906 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1907
1908 /* pointer to packed mb coeff data */
1909 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
1910
1911 /* no of non zero coefficients in the current sub block */
1912 UWORD32 u4_nnz_cnt;
1913
1914 /* significant coefficient map */
1915 UWORD32 u4_s_map;
1916
1917 /* pointer to scanning matrix */
1918 const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
1919
1920 /* temp var */
1921 UWORD32 b8, b4, coeff_cnt, mask;
1922
1923 /* Process 16 4x4 lum sub-blocks of the MB in scan order */
1924 for (b8 = 0; b8 < 4; b8++)
1925 {
1926 /* if in case cbp for the 8x8 block is zero, send no residue */
1927 ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
1928
1929 for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
1930 {
1931 /********************************************************/
1932 /* pack coeff data for entropy coding */
1933 /********************************************************/
1934 ps_mb_coeff_data = *pv_mb_coeff_data;
1935
1936 /* write number of non zero coefficients */
1937 ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz;
1938
1939 if (*pu1_nnz)
1940 {
1941 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; coeff_cnt++)
1942 {
1943 if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
1944 {
1945 /* write residue */
1946 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
1947 u4_s_map |= mask;
1948 }
1949 mask <<= 1;
1950 }
1951 /* write significant coeff map */
1952 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1953
1954 /* update ptr to coeff data */
1955 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1956
1957 /* cbp */
1958 u1_cbp_l |= (1 << b8);
1959 }
1960 else
1961 {
1962 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1963 }
1964 }
1965
1966 /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
1967 if (!(u1_cbp_l & (1 << b8)))
1968 {
1969 *pv_mb_coeff_data = ps_mb_coeff_data_b8;
1970 }
1971 }
1972
1973 /* memcpy recon */
1974 ps_codec->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0);
1975
1976 return (u1_cbp_l);
1977 }
1978
1979
1980 /**
1981 *******************************************************************************
1982 *
1983 * @brief performs chroma core coding for intra macro blocks
1984 *
1985 * @par Description:
1986 * If the current MB is to be intra coded with mb type chroma I8x8, the MB is
1987 * first predicted using intra 8x8 prediction filters. The predicted data is
1988 * compared with the input for error and the error is transformed. The DC
1989 * coefficients of each transformed sub blocks are further transformed using
1990 * Hadamard transform. The resulting coefficients are quantized, packed and sent
1991 * for entropy coding.
1992 *
1993 * @param[in] ps_proc_ctxt
1994 * pointer to the current macro block context
1995 *
1996 * @returns u1_cbp_c
1997 * coded block pattern chroma
1998 *
1999 * @remarks
2000 * The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
2001 * mentioned in h.264 specification
2002 *
2003 *******************************************************************************
2004 */
ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t * ps_proc)2005 UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc)
2006 {
2007 /* Codec Context */
2008 codec_t *ps_codec = ps_proc->ps_codec;
2009
2010 /* pointer to ref macro block */
2011 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
2012
2013 /* pointer to src macro block */
2014 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
2015
2016 /* pointer to prediction macro block */
2017 UWORD8 *pu1_pred_mb = NULL;
2018
2019 /* pointer to residual macro block */
2020 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2021
2022 /* strides */
2023 WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd;
2024 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
2025 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
2026 WORD32 i4_res_strd = ps_proc->i4_res_strd;
2027
2028 /* intra mode */
2029 UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode;
2030
2031 /* coded block pattern */
2032 UWORD8 u1_cbp_c = 0;
2033
2034 /* number of non zero coeffs*/
2035 UWORD8 au1_nnz[18] = {0};
2036
2037 /* quantization parameters */
2038 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
2039
2040 /* Control signal for inverse transform */
2041 UWORD32 u4_cntrl;
2042
2043 /* pointer to packed mb coeff data */
2044 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2045
2046 /* See if we need to swap U and V plances for entropy */
2047 UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
2048
2049 if (PLANE_CH_I8x8 == u1_intra_mode)
2050 {
2051 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane;
2052 }
2053 else
2054 {
2055 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
2056 }
2057
2058 /********************************************************/
2059 /* error estimation, */
2060 /* transform */
2061 /* quantization */
2062 /********************************************************/
2063 ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
2064 pu1_pred_mb, pi2_res_mb,
2065 i4_src_strd, i4_pred_strd,
2066 i4_res_strd,
2067 ps_qp_params->pu2_scale_mat,
2068 ps_qp_params->pu2_thres_mat,
2069 ps_qp_params->u1_qbits,
2070 ps_qp_params->u4_dead_zone,
2071 au1_nnz);
2072
2073 /********************************************************/
2074 /* pack coeff data for entropy coding */
2075 /********************************************************/
2076 ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
2077 au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
2078
2079 /********************************************************/
2080 /* ierror estimation, */
2081 /* itransform */
2082 /* iquantization */
2083 /********************************************************/
2084 ih264e_chroma_8x8_idctrans_iquant_itrans_recon(ps_codec, pi2_res_mb,
2085 pu1_pred_mb, pu1_ref_mb,
2086 i4_res_strd, i4_pred_strd,
2087 i4_rec_strd,
2088 ps_qp_params->pu2_iscale_mat,
2089 ps_qp_params->pu2_weigh_mat,
2090 ps_qp_params->u1_qp_div,
2091 u4_cntrl,
2092 ps_proc->pv_scratch_buff);
2093 return (u1_cbp_c);
2094 }
2095
2096
2097 /**
2098 *******************************************************************************
2099 *
2100 * @brief performs luma core coding when mode is inter
2101 *
2102 * @par Description:
2103 * If the current mb is to be coded as inter the mb is predicted based on the
2104 * sub mb partitions and corresponding motion vectors generated by ME. Then,
2105 * error is computed between the input blk and the estimated blk. This error is
2106 * transformed, quantized. The quantized coefficients are packed in scan order
2107 * for entropy coding
2108 *
2109 * @param[in] ps_proc_ctxt
2110 * pointer to the current macro block context
2111 *
2112 * @returns coded block pattern luma
2113 *
2114 * @remarks none
2115 *
2116 *******************************************************************************
2117 */
ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t * ps_proc)2118 UWORD8 ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t *ps_proc)
2119 {
2120 /* Codec Context */
2121 codec_t *ps_codec = ps_proc->ps_codec;
2122
2123 /* pointer to ref macro block */
2124 UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
2125
2126 /* pointer to src macro block */
2127 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
2128
2129 /* pointer to prediction macro block */
2130 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
2131
2132 /* pointer to residual macro block */
2133 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2134
2135 /* strides */
2136 WORD32 i4_src_strd = ps_proc->i4_src_strd;
2137 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
2138 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
2139 WORD32 i4_res_strd = ps_proc->i4_res_strd;
2140
2141 /* coded block pattern */
2142 UWORD8 u1_cbp_l = 0;
2143
2144 /*Control signal of itrans*/
2145 UWORD32 u4_cntrl;
2146
2147 /* number of non zero coeffs*/
2148 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz;
2149
2150 /* quantization parameters */
2151 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
2152
2153 /* pointer to packed mb coeff data */
2154 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2155
2156 /* pseudo pred buffer */
2157 UWORD8 *pu1_pseudo_pred = pu1_pred_mb;
2158
2159 /* pseudo pred buffer stride */
2160 WORD32 i4_pseudo_pred_strd = i4_pred_strd;
2161
2162 /* init nnz */
2163 ps_proc->au4_nnz[0] = 0;
2164 ps_proc->au4_nnz[1] = 0;
2165 ps_proc->au4_nnz[2] = 0;
2166 ps_proc->au4_nnz[3] = 0;
2167 ps_proc->au4_nnz[4] = 0;
2168
2169 /********************************************************/
2170 /* prediction */
2171 /********************************************************/
2172 ih264e_motion_comp_luma(ps_proc, &pu1_pseudo_pred, &i4_pseudo_pred_strd);
2173
2174 /********************************************************/
2175 /* error estimation, */
2176 /* transform */
2177 /* quantization */
2178 /********************************************************/
2179 if (ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0)
2180 {
2181 ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
2182 pu1_pseudo_pred, pi2_res_mb,
2183 i4_src_strd,
2184 i4_pseudo_pred_strd,
2185 i4_res_strd,
2186 ps_qp_params->pu2_scale_mat,
2187 ps_qp_params->pu2_thres_mat,
2188 ps_qp_params->u1_qbits,
2189 ps_qp_params->u4_dead_zone,
2190 pu1_nnz,
2191 DISABLE_DC_TRANSFORM);
2192
2193 /********************************************************/
2194 /* pack coeff data for entropy coding */
2195 /********************************************************/
2196 ih264e_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
2197 pu1_nnz, ps_codec->u4_thres_resi, &u4_cntrl);
2198 }
2199 else
2200 {
2201 u1_cbp_l = 0;
2202 u4_cntrl = 0;
2203 }
2204
2205 /********************************************************/
2206 /* ierror estimation, */
2207 /* itransform */
2208 /* iquantization */
2209 /********************************************************/
2210
2211 /*If the frame is not to be used for P frame reference or dumping recon
2212 * we only will use the reocn for only predicting intra Mbs
2213 * THis will need only right and bottom edge 4x4 blocks recon
2214 * Hence we selectively enable them using control signal(including DC)
2215 */
2216 if (ps_proc->u4_compute_recon != 1)
2217 {
2218 u4_cntrl &= 0x111F0000;
2219 }
2220
2221 if (u4_cntrl)
2222 {
2223 ih264e_luma_16x16_idctrans_iquant_itrans_recon(
2224 ps_codec, pi2_res_mb, pu1_pseudo_pred, pu1_rec_mb,
2225 i4_res_strd, i4_pseudo_pred_strd, i4_rec_strd,
2226 ps_qp_params->pu2_iscale_mat,
2227 ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
2228 u4_cntrl /*Cntrl*/, DISABLE_DC_TRANSFORM,
2229 ps_proc->pv_scratch_buff);
2230 }
2231 else
2232 {
2233 ps_codec->pf_inter_pred_luma_copy(pu1_pseudo_pred, pu1_rec_mb,
2234 i4_pseudo_pred_strd, i4_rec_strd,
2235 MB_SIZE, MB_SIZE, NULL, 0);
2236 }
2237
2238
2239 return (u1_cbp_l);
2240 }
2241
2242 /**
2243 *******************************************************************************
2244 *
2245 * @brief performs chroma core coding for inter macro blocks
2246 *
2247 * @par Description:
2248 * If the current mb is to be coded as inter predicted mb, based on the sub mb
2249 * partitions and corresponding motion vectors generated by ME, prediction is done.
2250 * Then, error is computed between the input blk and the estimated blk.
2251 * This error is transformed, quantized. The quantized coefficients
2252 * are packed in scan order for entropy coding.
2253 *
2254 * @param[in] ps_proc_ctxt
2255 * pointer to the current macro block context
2256 *
2257 * @returns coded block pattern chroma
2258 *
2259 * @remarks none
2260 *
2261 *******************************************************************************
2262 */
ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t * ps_proc)2263 UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc)
2264 {
2265 /* Codec Context */
2266 codec_t *ps_codec = ps_proc->ps_codec;
2267
2268 /* pointer to ref macro block */
2269 UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_chroma;
2270
2271 /* pointer to src macro block */
2272 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
2273
2274 /* pointer to prediction macro block */
2275 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
2276
2277 /* pointer to residual macro block */
2278 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2279
2280 /* strides */
2281 WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd;
2282 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
2283 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
2284 WORD32 i4_res_strd = ps_proc->i4_res_strd;
2285
2286 /* coded block pattern */
2287 UWORD8 u1_cbp_c = 0;
2288
2289 /*Control signal for inverse transform*/
2290 UWORD32 u4_cntrl;
2291
2292 /* number of non zero coeffs*/
2293 UWORD8 au1_nnz[10] = {0};
2294
2295 /* quantization parameters */
2296 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
2297
2298 /* pointer to packed mb coeff data */
2299 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2300
2301 /*See if we need to swap U and V plances for entropy*/
2302 UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
2303
2304 /********************************************************/
2305 /* prediction */
2306 /********************************************************/
2307 ih264e_motion_comp_chroma(ps_proc);
2308
2309 /********************************************************/
2310 /* error estimation, */
2311 /* transform */
2312 /* quantization */
2313 /********************************************************/
2314 ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
2315 pu1_pred_mb, pi2_res_mb,
2316 i4_src_strd, i4_pred_strd,
2317 i4_res_strd,
2318 ps_qp_params->pu2_scale_mat,
2319 ps_qp_params->pu2_thres_mat,
2320 ps_qp_params->u1_qbits,
2321 ps_qp_params->u4_dead_zone,
2322 au1_nnz);
2323
2324 /********************************************************/
2325 /* pack coeff data for entropy coding */
2326 /********************************************************/
2327 ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
2328 au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
2329
2330 /********************************************************/
2331 /* ierror estimation, */
2332 /* itransform */
2333 /* iquantization */
2334 /********************************************************/
2335
2336 /* If the frame is not to be used for P frame reference or dumping recon
2337 * we only will use the reocn for only predicting intra Mbs
2338 * THis will need only right and bottom edge 4x4 blocks recon
2339 * Hence we selectively enable them using control signal(including DC)
2340 */
2341 if (!ps_proc->u4_compute_recon)
2342 {
2343 u4_cntrl &= 0x7700C000;
2344 }
2345
2346 if (u4_cntrl)
2347 {
2348 ih264e_chroma_8x8_idctrans_iquant_itrans_recon(
2349 ps_codec, pi2_res_mb, pu1_pred_mb, pu1_rec_mb,
2350 i4_res_strd, i4_pred_strd, i4_rec_strd,
2351 ps_qp_params->pu2_iscale_mat,
2352 ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
2353 u4_cntrl, ps_proc->pv_scratch_buff);
2354 }
2355 else
2356 {
2357 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_rec_mb, i4_pred_strd,
2358 i4_rec_strd, MB_SIZE >> 1, MB_SIZE,
2359 NULL, 0);
2360 }
2361
2362 return (u1_cbp_c);
2363 }
2364