1 /******************************************************************************
2 *
3 * Copyright (C) 2022 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * isvce_core_coding.c
25 *
26 * @brief
27 * This file contains routines that perform luma and chroma core coding for
28 * intra macroblocks
29 *
30 * @author
31 * ittiam
32 *
33 * @par List of Functions:
34 * - isvce_pack_l_mb_i16()
35 * - isvce_pack_c_mb_i8()
36 * - isvce_code_luma_intra_macroblock_16x16()
37 * - isvce_code_luma_intra_macroblock_4x4()
38 * - isvce_code_chroma_intra_macroblock_8x8()
39 *
40 * @remarks
41 * None
42 *
43 *******************************************************************************
44 */
45
46 /*****************************************************************************/
47 /* File Includes */
48 /*****************************************************************************/
49
50 /* System include files */
51 #include <stdio.h>
52 #include <string.h>
53 #include <assert.h>
54
55 /* User include files */
56 #include "ih264_typedefs.h"
57 #include "ih264_debug.h"
58 #include "ih264_platform_macros.h"
59 #include "iv2.h"
60 #include "ive2.h"
61 #include "isvc_macros.h"
62 #include "isvc_defs.h"
63 #include "ih264e_config.h"
64 #include "isvce_defs.h"
65 #include "ih264_trans_data.h"
66 #include "ih264e_error.h"
67 #include "ih264e_bitstream.h"
68 #include "ime_distortion_metrics.h"
69 #include "ime_defs.h"
70 #include "ime_structs.h"
71 #include "isvc_structs.h"
72 #include "isvc_trans_quant_itrans_iquant.h"
73 #include "isvc_inter_pred_filters.h"
74 #include "isvc_mem_fns.h"
75 #include "ih264_padding.h"
76 #include "ih264_intra_pred_filters.h"
77 #include "ih264_deblk_edge_filters.h"
78 #include "isvc_cabac_tables.h"
79 #include "irc_cntrl_param.h"
80 #include "irc_frame_info_collector.h"
81 #include "isvce_rate_control.h"
82 #include "isvce_cabac_structs.h"
83 #include "isvce_structs.h"
84 #include "isvce_globals.h"
85 #include "isvce_core_coding.h"
86 #include "isvce_mc.h"
87 #include "isvce_ibl_eval.h"
88
89 /*****************************************************************************/
90 /* Function Definitions */
91 /*****************************************************************************/
92
93 /**
94 *******************************************************************************
95 *
96 * @brief
97 * This function performs does the DCT transform then Hadamard transform
98 * and quantization for a macroblock when the mb mode is intra 16x16 mode
99 *
100 * @par Description:
101 * First cf4 is done on all 16 4x4 blocks of the 16x16 input block.
102 * Then hadamard transform is done on the DC coefficients
103 * Quantization is then performed on the 16x16 block, 4x4 wise
104 *
105 * @param[in] pu1_src
106 * Pointer to source sub-block
107 *
108 * @param[in] pu1_pred
109 * Pointer to prediction sub-block
110 *
111 * @param[in] pi2_out
112 * Pointer to residual sub-block
113 * The output will be in linear format
114 * The first 16 continuous locations will contain the values of Dc block
115 * After DC block and a stride 1st AC block will follow
116 * After one more stride next AC block will follow
117 * The blocks will be in raster scan order
118 *
119 * @param[in] i4_src_stride
120 * Source stride
121 *
122 * @param[in] i4_pred_stride
123 * Prediction stride
124 *
125 * @param[in] dst_strd
126 * Destination stride
127 *
128 * @param[in] pu2_scale_matrix
129 * The quantization matrix for 4x4 transform
130 *
131 * @param[in] pu2_threshold_matrix
132 * Threshold matrix
133 *
134 * @param[in] u4_qbits
135 * 15+QP/6
136 *
137 * @param[in] u4_round_factor
138 * Round factor for quant
139 *
140 * @param[out] pu1_nnz
141 * Memory to store the non-zeros after transform
142 * The first byte will be the nnz of DC block
143 * From the next byte the AC nnzs will be stored in raster scan order
144 *
145 * @param u4_dc_flag
146 * Signals if Dc transform is to be done or not
147 * 1 -> Dc transform will be done
148 * 0 -> Dc transform will not be done
149 *
150 * @remarks
151 *
152 *******************************************************************************
153 */
isvce_luma_16x16_resi_trans_dctrans_quant(buffer_container_t * ps_src,buffer_container_t * ps_pred,buffer_container_t * ps_quant_coeffs,buffer_container_t * ps_upsampled_res,isa_dependent_fxns_t * ps_isa_dependent_fxns,const UWORD16 * pu2_scale_matrix,const UWORD16 * pu2_threshold_matrix,UWORD8 * pu1_nnz,UWORD32 u4_qbits,UWORD32 u4_round_factor,UWORD32 u4_dc_flag,UWORD8 u1_use_upsampled_res)154 void isvce_luma_16x16_resi_trans_dctrans_quant(
155 buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_quant_coeffs,
156 buffer_container_t *ps_upsampled_res, isa_dependent_fxns_t *ps_isa_dependent_fxns,
157 const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, UWORD8 *pu1_nnz,
158 UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD32 u4_dc_flag, UWORD8 u1_use_upsampled_res)
159 {
160 WORD32 blk_cntr;
161 WORD32 i4_offsetx, i4_offsety;
162
163 enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
164 buffer_container_t s_src = ps_src[0];
165 buffer_container_t s_pred = ps_pred[0];
166 buffer_container_t s_quant_coeffs = ps_quant_coeffs[0];
167 buffer_container_t s_upsampled_res = {0};
168 resi_trans_quant_constants_t s_resi_trans_quant_constants = {
169 .pu2_scale_matrix = pu2_scale_matrix,
170 .pu2_threshold_matrix = pu2_threshold_matrix,
171 .u4_qbits = u4_qbits,
172 .u4_round_factor = u4_round_factor};
173
174 UWORD8 u1_resi_trans_fxn_idx = isvc_get_resi_trans_quant_variant_idx(u1_use_upsampled_res);
175
176 /* Move to the ac addresses */
177 pu1_nnz++;
178 s_quant_coeffs.pv_data = ((WORD16 *) s_quant_coeffs.pv_data) + s_quant_coeffs.i4_data_stride;
179
180 if(u1_use_upsampled_res)
181 {
182 s_upsampled_res = ps_upsampled_res[0];
183 }
184
185 for(blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++)
186 {
187 IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety);
188
189 s_src.pv_data =
190 ((UWORD8 *) ps_src[0].pv_data) + i4_offsetx + i4_offsety * ps_src[0].i4_data_stride;
191 s_pred.pv_data =
192 ((UWORD8 *) ps_pred[0].pv_data) + i4_offsetx + i4_offsety * ps_pred[0].i4_data_stride;
193 s_quant_coeffs.pv_data =
194 ((WORD16 *) ps_quant_coeffs[0].pv_data) + blk_cntr * ps_quant_coeffs[0].i4_data_stride;
195
196 if(u1_use_upsampled_res)
197 {
198 s_upsampled_res.pv_data = ((WORD16 *) ps_upsampled_res[0].pv_data) + i4_offsetx +
199 i4_offsety * ps_upsampled_res[0].i4_data_stride;
200 }
201
202 /* Move to the ac addresses */
203 s_quant_coeffs.pv_data =
204 ((WORD16 *) s_quant_coeffs.pv_data) + ps_quant_coeffs[0].i4_data_stride;
205
206 s_quant_coeffs.i4_data_stride = 4;
207
208 ps_enc_loop_fxns->apf_resi_trans_quant_4x4[u1_resi_trans_fxn_idx](
209 &s_src, &s_pred, &s_quant_coeffs, &s_upsampled_res, &s_resi_trans_quant_constants,
210 &pu1_nnz[blk_cntr], ((WORD16 *) ps_quant_coeffs->pv_data) + blk_cntr,
211 u1_use_upsampled_res);
212 }
213
214 if(!u4_dc_flag)
215 {
216 return;
217 }
218
219 /*
220 * In case of i16x16, we need to remove the contribution of dc coeffs into
221 * nnz of each block. We are doing that in the packing function
222 */
223
224 /* Adjust pointers to point to dc values */
225 s_quant_coeffs = ps_quant_coeffs[0];
226 pu1_nnz--;
227
228 u4_qbits++;
229 u4_round_factor <<= 1;
230
231 ps_enc_loop_fxns->pf_hadamard_quant_4x4(((WORD16 *) s_quant_coeffs.pv_data),
232 ((WORD16 *) s_quant_coeffs.pv_data),
233 &s_resi_trans_quant_constants, &pu1_nnz[0]);
234 }
235
236 /**
237 *******************************************************************************
238 *
239 * @brief
240 * This function performs the intra 16x16 inverse transform process for H264
241 * it includes inverse Dc transform, inverse quant and then inverse transform
242 *
243 * @par Description:
244 *
245 * @param[in] pi2_src
246 * Input data, 16x16 size
247 * First 16 mem locations will have the Dc coffs in rater scan order in linear
248 *fashion after a stride 1st AC clock will be present again in raster can order
249 * Then each AC block of the 16x16 block will follow in raster scan order
250 *
251 * @param[in] pu1_pred
252 * The predicted data, 16x16 size
253 * Block by block form
254 *
255 * @param[in] pu1_out
256 * Output 16x16
257 * In block by block form
258 *
259 * @param[in] i4_src_stride
260 * Source stride
261 *
262 * @param[in] i4_pred_stride
263 * input stride for prediction buffer
264 *
265 * @param[in] i4_out_stride
266 * input stride for output buffer
267 *
268 * @param[in] pu2_iscale_mat
269 * Inverse quantization matrix for 4x4 transform
270 *
271 * @param[in] pu2_weigh_mat
272 * weight matrix of 4x4 transform
273 *
274 * @param[in] u4_qp_div_6
275 * QP/6
276 *
277 * @param[in] pi4_tmp
278 * Input temporary buffer
279 * needs to be at least 20 in size
280 *
281 * @param[in] pu4_cntrl
282 * Controls the transform path
283 * total Last 17 bits are used
284 * the 16th th bit will correspond to DC block
285 * and 32-17 will correspond to the ac blocks in raster scan order
286 * bit equaling zero indicates that the entire 4x4 block is zero for DC
287 * For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block
288 *is nonzero
289 *
290 * @param[in] pi4_tmp
291 * Input temporary buffer
292 * needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
293 *
294 * @returns
295 * none
296 *
297 * @remarks
298 * The all zero case must be taken care outside
299 *
300 *******************************************************************************
301 */
isvce_luma_16x16_idctrans_iquant_itrans_recon(buffer_container_t * ps_src,buffer_container_t * ps_pred,buffer_container_t * ps_recon,buffer_container_t * ps_res,buffer_container_t * ps_res_pred,iq_it_res_rec_constants_t * ps_iq_it_res_rec_constants,isa_dependent_fxns_t * ps_isa_dependent_fxns,WORD32 * pi4_tmp,UWORD32 u4_cntrl,UWORD32 u4_dc_trans_flag,UWORD8 u1_res_accumulate)302 void isvce_luma_16x16_idctrans_iquant_itrans_recon(
303 buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_recon,
304 buffer_container_t *ps_res, buffer_container_t *ps_res_pred,
305 iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
306 isa_dependent_fxns_t *ps_isa_dependent_fxns, WORD32 *pi4_tmp, UWORD32 u4_cntrl,
307 UWORD32 u4_dc_trans_flag, UWORD8 u1_res_accumulate)
308 {
309 /* Cntrl bits for 4x4 transforms
310 * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
311 * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
312 * : dc block must contain only single dc coefficient
313 * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
314 * : ie not (ac or dc)
315 */
316 UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
317 UWORD32 u4_blk_id;
318 WORD32 i4_offset_x, i4_offset_y;
319 UWORD32 u4_dc_inc;
320 WORD16 *pi2_dc_src;
321
322 enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
323 buffer_container_t s_src = ps_src[0];
324 buffer_container_t s_pred = ps_pred[0];
325 buffer_container_t s_recon = ps_recon[0];
326 buffer_container_t s_res = ps_res[0];
327 buffer_container_t s_res_pred = ps_res_pred[0];
328
329 /* Start index for inverse quant in a 4x4 block */
330 WORD32 i4_iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1;
331 const UWORD16 *pu2_iscale_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
332 const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
333 UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
334 UWORD8 u1_iq_it_recon_fxn_idx =
335 isvc_get_iq_it_recon_variant_idx(!!u4_dc_trans_flag, u1_res_accumulate);
336
337 /*
338 * For intra blocks we need to do inverse dc transform
339 * In case if intra blocks, its here that we populate the dc bits in cntrl
340 * as they cannot be populated any earlier
341 */
342 if(u4_dc_trans_flag)
343 {
344 UWORD32 cntr, u4_dc_cntrl;
345
346 /* Do inv hadamard and place the results at the start of each AC block */
347 ps_enc_loop_fxns->pf_ihadamard_scaling_4x4(ps_src->pv_data, ps_src->pv_data, pu2_iscale_mat,
348 pu2_weigh_mat, u4_qp_div_6, pi4_tmp);
349
350 /* Update the cntrl flag */
351 u4_dc_cntrl = 0;
352
353 for(cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++)
354 {
355 u4_dc_cntrl |= ((((WORD16 *) ps_src->pv_data)[cntr] != 0) << (15 - cntr));
356 }
357
358 /* Mark dc bits as 1 if corresponding ac bit is 0 */
359 u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
360
361 /* Combine both ac and dc bits */
362 u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA) | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA);
363 }
364
365 /* Source for dc coeffs
366 * If the block is intra, we have to read dc values from first row of src
367 * then stride for each block is 1, other wise its src stride
368 */
369 pi2_dc_src = ((WORD16 *) ps_src->pv_data) + (i4_iq_start_idx == 0) * ps_src->i4_data_stride;
370 u4_dc_inc = (i4_iq_start_idx == 0) ? ps_src->i4_data_stride : 1;
371
372 /* Get the block bits */
373 u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA);
374 u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16;
375 u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000;
376
377 /* Get first block to process */
378 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
379
380 while(u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
381 {
382 /* Compute address of src blocks */
383 WORD32 i4_src_offset = u4_dc_inc * u4_blk_id;
384
385 /* Tx blk coeffs are stored blk by blk */
386 /* Hence, in order to access rows of each Tx blk, one needs to stride of
387 * TxxSize */
388 s_src.i4_data_stride = 4;
389 s_src.pv_data = pi2_dc_src + i4_src_offset;
390
391 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
392
393 /* Compute address of out and pred blocks */
394 s_pred.pv_data =
395 ((UWORD8 *) ps_pred->pv_data) + i4_offset_x + i4_offset_y * ps_pred->i4_data_stride;
396 s_recon.pv_data =
397 ((UWORD8 *) ps_recon->pv_data) + i4_offset_x + i4_offset_y * ps_recon->i4_data_stride;
398 s_res.pv_data =
399 ((WORD16 *) ps_res->pv_data) + i4_offset_x + i4_offset_y * ps_res->i4_data_stride;
400 s_res_pred.pv_data = ((WORD16 *) ps_res_pred->pv_data) + i4_offset_x +
401 i4_offset_y * ps_res_pred->i4_data_stride;
402
403 ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4_dc[u1_iq_it_recon_fxn_idx](
404 &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, ps_iq_it_res_rec_constants, NULL,
405 pi2_dc_src + i4_src_offset, i4_iq_start_idx, u1_res_accumulate);
406
407 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
408 }
409
410 /* now process ac/mixed blocks */
411 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
412 while(u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
413 {
414 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
415
416 /* Tx blk coeffs are stored blk by blk */
417 /* Hence, in order to access rows of each Tx blk, one needs to stride of
418 * TxxSize */
419 s_src.i4_data_stride = 4;
420 /* The AC blocks starts from 2nd row */
421 s_src.pv_data = ((WORD16 *) ps_src->pv_data) + (u4_blk_id + 1) * ps_src->i4_data_stride;
422
423 s_pred.pv_data =
424 ((UWORD8 *) ps_pred->pv_data) + i4_offset_x + i4_offset_y * ps_pred->i4_data_stride;
425 s_recon.pv_data =
426 ((UWORD8 *) ps_recon->pv_data) + i4_offset_x + i4_offset_y * ps_recon->i4_data_stride;
427 s_res.pv_data =
428 ((WORD16 *) ps_res->pv_data) + i4_offset_x + i4_offset_y * ps_res->i4_data_stride;
429 s_res_pred.pv_data = ((WORD16 *) ps_res_pred->pv_data) + i4_offset_x +
430 i4_offset_y * ps_res_pred->i4_data_stride;
431
432 ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[u1_iq_it_recon_fxn_idx](
433 &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, ps_iq_it_res_rec_constants,
434 (WORD16 *) pi4_tmp, pi2_dc_src + u4_blk_id, i4_iq_start_idx, u1_res_accumulate);
435
436 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
437 }
438
439 /* Now process empty blocks */
440 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
441 while(u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
442 {
443 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
444
445 /* Tx blk coeffs are stored blk by blk */
446 /* Hence, in order to access rows of each Tx blk, one needs to stride of
447 * TxxSize */
448 s_src.i4_data_stride = 4;
449 /* The AC blocks starts from 2nd row */
450 s_src.pv_data = ((WORD16 *) ps_src->pv_data) + (u4_blk_id + 1) * ps_src->i4_data_stride;
451
452 s_pred.pv_data =
453 ((UWORD8 *) ps_pred->pv_data) + i4_offset_x + i4_offset_y * ps_pred->i4_data_stride;
454 s_recon.pv_data =
455 ((UWORD8 *) ps_recon->pv_data) + i4_offset_x + i4_offset_y * ps_recon->i4_data_stride;
456 s_res.pv_data =
457 ((WORD16 *) ps_res->pv_data) + i4_offset_x + i4_offset_y * ps_res->i4_data_stride;
458 s_res_pred.pv_data = ((WORD16 *) ps_res_pred->pv_data) + i4_offset_x +
459 i4_offset_y * ps_res_pred->i4_data_stride;
460
461 ps_enc_loop_fxns->pf_zcbf_iquant_itrans_recon_4x4(
462 &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, ps_iq_it_res_rec_constants, NULL,
463 pi2_dc_src + u4_blk_id, i4_iq_start_idx, u1_res_accumulate);
464
465 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
466 }
467 }
468
469 /**
470 *******************************************************************************
471 *
472 * @brief
473 * This function performs does the DCT transform then Hadamard transform
474 * and quantization for a chroma macroblock
475 *
476 * @par Description:
477 * First cf4 is done on all 16 4x4 blocks of the 8x8input block
478 * Then hadamard transform is done on the DC coefficients
479 * Quantization is then performed on the 8x8 block, 4x4 wise
480 *
481 * @param[in] pu1_src
482 * Pointer to source sub-block
483 * The input is in interleaved format for two chroma planes
484 *
485 * @param[in] pu1_pred
486 * Pointer to prediction sub-block
487 * Prediction is in inter leaved format
488 *
489 * @param[in] pi2_out
490 * Pointer to residual sub-block
491 * The output will be in linear format
492 * The first 4 continuous locations will contain the values of DC block for U
493 * and then next 4 will contain for V.
494 * After DC block and a stride 1st AC block of U plane will follow
495 * After one more stride next AC block of V plane will follow
496 * The blocks will be in raster scan order
497 *
498 * After all the AC blocks of U plane AC blocks of V plane will follow in exact
499 * same way
500 *
501 * @param[in] i4_src_stride
502 * Source stride
503 *
504 * @param[in] i4_pred_stride
505 * Prediction stride
506 *
507 * @param[in] dst_strd
508 * Destination stride
509 *
510 * @param[in] pu2_scale_matrix
511 * The quantization matrix for 4x4 transform
512 *
513 * @param[in] pu2_threshold_matrix
514 * Threshold matrix
515 *
516 * @param[in] u4_qbits
517 * 15+QP/6
518 *
519 * @param[in] u4_round_factor
520 * Round factor for quant
521 *
522 * @param[out] pu1_nnz
523 * Memory to store the non-zeros after transform
524 * The first byte will be the nnz od DC block for U plane
525 * From the next byte the AC nnzs will be storerd in raster scan order
526 * The fifth byte will be nnz of Dc block of V plane
527 * Then Ac blocks will follow
528 *
529 * @param u4_dc_flag
530 * Signals if Dc transform is to be done or not
531 * 1 -> Dc transform will be done
532 * 0 -> Dc transform will not be done
533 *
534 * @remarks
535 *
536 *******************************************************************************
537 */
isvce_chroma_8x8_resi_trans_dctrans_quant(buffer_container_t * ps_src,buffer_container_t * ps_pred,buffer_container_t * ps_quant_coeffs,buffer_container_t * ps_upsampled_res,isa_dependent_fxns_t * ps_isa_dependent_fxns,const UWORD16 * pu2_scale_matrix,const UWORD16 * pu2_threshold_matrix,UWORD8 * pu1_nnz,UWORD32 u4_qbits,UWORD32 u4_round_factor,UWORD8 u1_use_upsampled_res)538 void isvce_chroma_8x8_resi_trans_dctrans_quant(
539 buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_quant_coeffs,
540 buffer_container_t *ps_upsampled_res, isa_dependent_fxns_t *ps_isa_dependent_fxns,
541 const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, UWORD8 *pu1_nnz,
542 UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD8 u1_use_upsampled_res)
543 {
544 WORD32 blk_cntr;
545 WORD32 i4_offsetx, i4_offsety;
546 UWORD8 au1_dcnnz[2];
547
548 enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
549 buffer_container_t s_src = ps_src[0];
550 buffer_container_t s_pred = ps_pred[0];
551 buffer_container_t s_quant_coeffs = ps_quant_coeffs[0];
552 buffer_container_t s_upsampled_res = {0};
553 resi_trans_quant_constants_t s_resi_trans_quant_constants = {
554 .pu2_scale_matrix = pu2_scale_matrix,
555 .pu2_threshold_matrix = pu2_threshold_matrix,
556 .u4_qbits = u4_qbits,
557 .u4_round_factor = u4_round_factor};
558
559 UWORD8 u1_resi_trans_fxn_idx = isvc_get_resi_trans_quant_variant_idx(u1_use_upsampled_res);
560
561 if(u1_use_upsampled_res)
562 {
563 s_upsampled_res = ps_upsampled_res[0];
564 }
565
566 /* Move to the ac addresses */
567 pu1_nnz++;
568
569 for(blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++)
570 {
571 IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety);
572
573 s_src.pv_data =
574 ((UWORD8 *) ps_src[0].pv_data) + i4_offsetx + i4_offsety * ps_src[0].i4_data_stride;
575 s_pred.pv_data =
576 ((UWORD8 *) ps_pred[0].pv_data) + i4_offsetx + i4_offsety * ps_pred[0].i4_data_stride;
577 s_quant_coeffs.pv_data =
578 ((WORD16 *) ps_quant_coeffs[0].pv_data) + blk_cntr * ps_quant_coeffs[0].i4_data_stride;
579
580 if(u1_use_upsampled_res)
581 {
582 s_upsampled_res.pv_data = ((WORD16 *) ps_upsampled_res[0].pv_data) + i4_offsetx +
583 i4_offsety * ps_upsampled_res[0].i4_data_stride;
584 }
585
586 /* Move to the ac addresses */
587 s_quant_coeffs.pv_data =
588 ((WORD16 *) s_quant_coeffs.pv_data) + ps_quant_coeffs[0].i4_data_stride;
589
590 s_quant_coeffs.i4_data_stride = 4;
591
592 /* For chroma, v plane nnz is populated from position 5 */
593 ps_enc_loop_fxns->apf_resi_trans_quant_chroma_4x4[u1_resi_trans_fxn_idx](
594 &s_src, &s_pred, &s_quant_coeffs, &s_upsampled_res, &s_resi_trans_quant_constants,
595 &pu1_nnz[blk_cntr + (blk_cntr > 3)], ((WORD16 *) ps_quant_coeffs->pv_data) + blk_cntr,
596 u1_use_upsampled_res);
597 }
598
599 /* Adjust pointers to point to dc values */
600 s_quant_coeffs = ps_quant_coeffs[0];
601 pu1_nnz--;
602
603 s_resi_trans_quant_constants.u4_qbits++;
604 s_resi_trans_quant_constants.u4_round_factor <<= 1;
605
606 ps_enc_loop_fxns->pf_hadamard_quant_2x2_uv(((WORD16 *) ps_quant_coeffs->pv_data),
607 ((WORD16 *) ps_quant_coeffs->pv_data),
608 &s_resi_trans_quant_constants, au1_dcnnz);
609
610 /* Copy the dc nnzs */
611 pu1_nnz[0] = au1_dcnnz[0];
612 pu1_nnz[5] = au1_dcnnz[1];
613 }
614
615 /**
616 *******************************************************************************
617 * @brief
618 * This function performs the inverse transform with process for chroma MB of
619 *H264
620 *
621 * @par Description:
622 * Does inverse DC transform ,inverse quantization inverse transform
623 *
624 * @param[in] pi2_src
625 * Input data, 16x16 size
626 * The input is in the form of, first 4 locations will contain DC coeffs of
627 * U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
628 * in raster scan order will follow, each block as linear array in raster scan
629 *order. After a stride next AC block will follow. After all AC blocks of U plane
630 * V plane AC blocks will follow in exact same order.
631 *
632 * @param[in] pu1_pred
633 * The predicted data, 8x16 size, U and V interleaved
634 *
635 * @param[in] pu1_out
636 * Output 8x16, U and V interleaved
637 *
638 * @param[in] i4_src_stride
639 * Source stride
640 *
641 * @param[in] i4_pred_stride
642 * input stride for prediction buffer
643 *
644 * @param[in] i4_out_stride
645 * input stride for output buffer
646 *
647 * @param[in] pu2_iscale_mat
648 * Inverse quantization martix for 4x4 transform
649 *
650 * @param[in] pu2_weigh_mat
651 * weight matrix of 4x4 transform
652 *
653 * @param[in] u4_qp_div_6
654 * QP/6
655 *
656 * @param[in] pi4_tmp
657 * Input temporary buffer
658 * needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma *
659 *number of planes in size
660 *
661 * @param[in] pu4_cntrl
662 * Controls the transform path
663 * the 15 th bit will correspond to DC block of U plane , 14th will indicate the
664 *V plane Dc block 32-28 bits will indicate AC blocks of U plane in raster scan
665 *order 27-23 bits will indicate AC blocks of V plane in rater scan order The bit
666 *1 implies that there is at least one non zero coeff in a block
667 *
668 * @returns
669 * none
670 *
671 * @remarks
672 *******************************************************************************
673 */
isvce_chroma_8x8_idctrans_iquant_itrans_recon(buffer_container_t * ps_src,buffer_container_t * ps_pred,buffer_container_t * ps_recon,buffer_container_t * ps_res,buffer_container_t * ps_res_pred,iq_it_res_rec_constants_t * ps_iq_it_res_rec_constants,isa_dependent_fxns_t * ps_isa_dependent_fxns,WORD32 * pi4_tmp,UWORD32 u4_cntrl,UWORD8 u1_res_accumulate)674 void isvce_chroma_8x8_idctrans_iquant_itrans_recon(
675 buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_recon,
676 buffer_container_t *ps_res, buffer_container_t *ps_res_pred,
677 iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
678 isa_dependent_fxns_t *ps_isa_dependent_fxns, WORD32 *pi4_tmp, UWORD32 u4_cntrl,
679 UWORD8 u1_res_accumulate)
680 {
681 /* Cntrl bits for 4x4 transforms
682 * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
683 * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
684 * : dc block must contain only single dc coefficient
685 * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
686 * : ie not (ac or dc)
687 */
688 UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
689 WORD32 u4_blk_id;
690 WORD32 i4_offset_x, i4_offset_y;
691 WORD16 *pi2_dc_src;
692 /* Increment for dc block */
693 WORD32 i4_dc_inc;
694
695 enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
696 buffer_container_t s_src = ps_src[0];
697 buffer_container_t s_pred = ps_pred[0];
698 buffer_container_t s_recon = ps_recon[0];
699 buffer_container_t s_res = ps_res[0];
700 buffer_container_t s_res_pred = ps_res_pred[0];
701
702 WORD16 i2_zero = 0;
703 const UWORD16 *pu2_iscale_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
704 const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
705 UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
706 UWORD8 u1_iq_it_recon_fxn_idx = isvc_get_iq_it_recon_variant_idx(0, u1_res_accumulate);
707
708 /*
709 * Lets do the inverse transform for dc coeffs in chroma
710 */
711 if(u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA)
712 {
713 UWORD32 cntr, u4_dc_cntrl;
714 /* Do inv hadamard for u an v block */
715
716 ps_enc_loop_fxns->pf_ihadamard_scaling_2x2_uv(s_src.pv_data, s_src.pv_data, pu2_iscale_mat,
717 pu2_weigh_mat, u4_qp_div_6, NULL);
718 /*
719 * Update the cntrl flag
720 * Flag is updated as follows bits 15-11 -> u block dc bits
721 */
722 u4_dc_cntrl = 0;
723 for(cntr = 0; cntr < 8; cntr++)
724 {
725 u4_dc_cntrl |= ((((WORD16 *) ps_src->pv_data)[cntr] != 0) << (15 - cntr));
726 }
727
728 /* Mark dc bits as 1 if corresponding ac bit is 0 */
729 u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
730 /* Combine both ac and dc bits */
731 u4_cntrl =
732 (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA) | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA);
733
734 /* Since we populated the dc coffs, we have to read them from there */
735 pi2_dc_src = ((WORD16 *) ps_src->pv_data);
736 i4_dc_inc = 1;
737 }
738 else
739 {
740 u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA;
741 pi2_dc_src = &i2_zero;
742 i4_dc_inc = 0;
743 }
744
745 /* Get the block bits */
746 u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA);
747 u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16;
748 u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000;
749
750 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
751
752 while(u4_blk_id < 8)
753 {
754 WORD32 dc_src_offset = u4_blk_id * i4_dc_inc;
755
756 /* Tx blk coeffs are stored blk by blk */
757 /* Hence, in order to access rows of each Tx blk, one needs to stride of
758 * TxxSize */
759 s_src.i4_data_stride = 4;
760 s_src.pv_data = pi2_dc_src + dc_src_offset;
761
762 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
763
764 s_pred.pv_data =
765 ((UWORD8 *) ps_pred->pv_data) + i4_offset_x + i4_offset_y * ps_pred->i4_data_stride;
766 s_recon.pv_data =
767 ((UWORD8 *) ps_recon->pv_data) + i4_offset_x + i4_offset_y * ps_recon->i4_data_stride;
768 s_res.pv_data =
769 ((WORD16 *) ps_res->pv_data) + i4_offset_x + i4_offset_y * ps_res->i4_data_stride;
770 s_res_pred.pv_data = ((WORD16 *) ps_res_pred->pv_data) + i4_offset_x +
771 i4_offset_y * ps_res_pred->i4_data_stride;
772
773 ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4_dc[u1_iq_it_recon_fxn_idx](
774 &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, ps_iq_it_res_rec_constants, NULL,
775 s_src.pv_data, 0, u1_res_accumulate);
776
777 /* Get next DC block to process */
778 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
779 }
780
781 /* now process ac/mixed blocks */
782 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
783 while(u4_blk_id < 8)
784 {
785 WORD32 dc_src_offset = i4_dc_inc * u4_blk_id;
786
787 /* Tx blk coeffs are stored blk by blk */
788 /* Hence, in order to access rows of each Tx blk, one needs to stride of
789 * TxxSize */
790 s_src.i4_data_stride = 4;
791 /* The AC blocks starts from 2nd row */
792 s_src.pv_data = ((WORD16 *) ps_src->pv_data) + (u4_blk_id + 1) * ps_src->i4_data_stride;
793
794 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
795
796 s_pred.pv_data =
797 ((UWORD8 *) ps_pred->pv_data) + i4_offset_x + i4_offset_y * ps_pred->i4_data_stride;
798 s_recon.pv_data =
799 ((UWORD8 *) ps_recon->pv_data) + i4_offset_x + i4_offset_y * ps_recon->i4_data_stride;
800 s_res.pv_data =
801 ((WORD16 *) ps_res->pv_data) + i4_offset_x + i4_offset_y * ps_res->i4_data_stride;
802 s_res_pred.pv_data = ((WORD16 *) ps_res_pred->pv_data) + i4_offset_x +
803 i4_offset_y * ps_res_pred->i4_data_stride;
804
805 ps_enc_loop_fxns->apf_iquant_itrans_recon_chroma_4x4[u1_iq_it_recon_fxn_idx](
806 &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, ps_iq_it_res_rec_constants,
807 (WORD16 *) pi4_tmp, pi2_dc_src + dc_src_offset, 0, u1_res_accumulate);
808
809 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
810 }
811
812 /* Now process empty blocks */
813 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
814
815 while(u4_blk_id < 8)
816 {
817 WORD32 dc_src_offset = i4_dc_inc * u4_blk_id;
818
819 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
820
821 /* Tx blk coeffs are stored blk by blk */
822 /* Hence, in order to access rows of each Tx blk, one needs to stride of
823 * TxxSize */
824 s_src.i4_data_stride = 4;
825 /* The AC blocks starts from 2nd row */
826 s_src.pv_data = ((WORD16 *) ps_src->pv_data) + (u4_blk_id + 1) * ps_src->i4_data_stride;
827
828 s_pred.pv_data =
829 ((UWORD8 *) ps_pred->pv_data) + i4_offset_x + i4_offset_y * ps_pred->i4_data_stride;
830 s_recon.pv_data =
831 ((UWORD8 *) ps_recon->pv_data) + i4_offset_x + i4_offset_y * ps_recon->i4_data_stride;
832 s_res.pv_data =
833 ((WORD16 *) ps_res->pv_data) + i4_offset_x + i4_offset_y * ps_res->i4_data_stride;
834 s_res_pred.pv_data = ((WORD16 *) ps_res_pred->pv_data) + i4_offset_x +
835 i4_offset_y * ps_res_pred->i4_data_stride;
836
837 ps_enc_loop_fxns->pf_chroma_zcbf_iquant_itrans_recon_4x4(
838 &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, ps_iq_it_res_rec_constants,
839 (WORD16 *) pi4_tmp, pi2_dc_src + dc_src_offset, 0, u1_res_accumulate);
840
841 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
842 }
843 }
844
845 /**
846 ******************************************************************************
847 *
848 * @brief This function packs residue of an i16x16 luma mb for entropy coding
849 *
850 * @par Description
851 * An i16 macro block contains two classes of units, dc 4x4 block and
852 * 4x4 ac blocks. while packing the mb, the dc block is sent first, and
853 * the 16 ac blocks are sent next in scan order. Each and every block is
854 * represented by 3 parameters (nnz, significant coefficient map and the
855 * residue coefficients itself). If a 4x4 unit does not have any coefficients
856 * then only nnz is sent. Inside a 4x4 block the individual coefficients are
857 * sent in scan order.
858 *
859 * The first byte of each block will be nnz of the block, if it is non zero,
860 * a 2 byte significance map is sent. This is followed by nonzero coefficients.
861 * This is repeated for 1 dc + 16 ac blocks.
862 *
863 * @param[in] pi2_res_mb
864 * pointer to residue mb
865 *
866 * @param[in, out] pv_mb_coeff_data
867 * buffer pointing to packed residue coefficients
868 *
869 * @param[in] u4_res_strd
870 * residual block stride
871 *
872 * @param[out] u1_cbp_l
873 * coded block pattern luma
874 *
875 * @param[in] pu1_nnz
876 * number of non zero coefficients in each 4x4 unit
877 *
878 * @param[out]
879 * Control signal for inverse transform of 16x16 blocks
880 *
881 * @return none
882 *
883 * @ remarks
884 *
885 ******************************************************************************
886 */
isvce_pack_l_mb_i16(WORD16 * pi2_res_mb,void ** pv_mb_coeff_data,WORD32 i4_res_strd,UWORD8 * u1_cbp_l,UWORD8 * pu1_nnz,UWORD32 * pu4_cntrl)887 void isvce_pack_l_mb_i16(WORD16 *pi2_res_mb, void **pv_mb_coeff_data, WORD32 i4_res_strd,
888 UWORD8 *u1_cbp_l, UWORD8 *pu1_nnz, UWORD32 *pu4_cntrl)
889 {
890 /* pointer to packed sub block buffer space */
891 tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac;
892
893 /* no of non zero coefficients in the current sub block */
894 UWORD32 u4_nnz_cnt;
895
896 /* significant coefficient map */
897 UWORD32 u4_s_map;
898
899 /* pointer to scanning matrix */
900 const UWORD8 *pu1_scan_order;
901
902 /* number of non zeros in sub block */
903 UWORD32 u4_nnz;
904
905 /* coeff scan order */
906 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
907
908 /* temp var */
909 UWORD32 coeff_cnt, mask, b4, u4_cntrl = 0;
910
911 /*DC and AC coeff pointers*/
912 WORD16 *pi2_res_mb_ac, *pi2_res_mb_dc;
913
914 /********************************************************/
915 /* pack dc coeff data for entropy coding */
916 /********************************************************/
917
918 pi2_res_mb_dc = pi2_res_mb;
919 pu1_scan_order = gu1_luma_scan_order_dc;
920
921 u4_nnz = *pu1_nnz;
922 u4_cntrl = 0;
923
924 /* write number of non zero coefficients */
925 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
926
927 if(u4_nnz)
928 {
929 for(u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
930 {
931 if(pi2_res_mb_dc[pu1_scan_order[coeff_cnt]])
932 {
933 /* write residue */
934 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] =
935 pi2_res_mb_dc[pu1_scan_order[coeff_cnt]];
936 u4_s_map |= mask;
937 }
938 mask <<= 1;
939 }
940 /* write significant coeff map */
941 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
942 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
943
944 u4_cntrl = 0x00008000; // Set DC bit in ctrl code
945 }
946 else
947 {
948 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
949 }
950
951 /********************************************************/
952 /* pack ac coeff data for entropy coding */
953 /********************************************************/
954
955 pu1_nnz++;
956 pu1_scan_order = gu1_luma_scan_order;
957 pi2_res_mb += i4_res_strd; /*Move to AC block*/
958
959 ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
960
961 for(b4 = 0; b4 < 16; b4++)
962 {
963 ps_mb_coeff_data = (*pv_mb_coeff_data);
964
965 u4_nnz = pu1_nnz[u1_scan_order[b4]];
966
967 /* Jump according to the scan order */
968 pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
969
970 /*
971 * Since this is a i16x16 block, we should not count dc coeff on indi
972 * vidual 4x4 blocks to nnz. But due to the implementation of 16x16
973 * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that
974 * here
975 */
976 u4_nnz -= (pi2_res_mb_ac[0] != 0);
977
978 /* write number of non zero coefficients */
979 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
980
981 if(u4_nnz)
982 {
983 for(u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz;
984 coeff_cnt++)
985 {
986 if(pi2_res_mb_ac[pu1_scan_order[coeff_cnt]])
987 {
988 /* write residue */
989 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] =
990 pi2_res_mb_ac[pu1_scan_order[coeff_cnt]];
991 u4_s_map |= mask;
992 }
993 mask <<= 1;
994 }
995 /* write significant coeff map */
996 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
997 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
998 *u1_cbp_l = 15;
999
1000 u4_cntrl |= (1 << (31 - u1_scan_order[b4]));
1001 }
1002 else
1003 {
1004 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1005 }
1006 }
1007
1008 if(!(*u1_cbp_l))
1009 {
1010 (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
1011 }
1012
1013 /* Store the cntrl signal */
1014 (*pu4_cntrl) = u4_cntrl;
1015 return;
1016 }
1017
1018 /**
1019 ******************************************************************************
1020 *
1021 * @brief This function packs residue of an p16x16 luma mb for entropy coding
1022 *
1023 * @par Description
1024 * A p16x16 macro block contains two classes of units 16 4x4 ac blocks.
1025 * while packing the mb, the dc block is sent first, and
1026 * the 16 ac blocks are sent next in scan order. Each and every block is
1027 * represented by 3 parameters (nnz, significant coefficient map and the
1028 * residue coefficients itself). If a 4x4 unit does not have any coefficients
1029 * then only nnz is sent. Inside a 4x4 block the individual coefficients are
1030 * sent in scan order.
1031 *
1032 * The first byte of each block will be nnz of the block, if it is non zero,
1033 * a 2 byte significance map is sent. This is followed by nonzero coefficients.
1034 * This is repeated for 1 dc + 16 ac blocks.
1035 *
1036 * @param[in] pi2_res_mb
1037 * pointer to residue mb
1038 *
1039 * @param[in, out] pv_mb_coeff_data
1040 * buffer pointing to packed residue coefficients
1041 *
1042 * @param[in] i4_res_strd
1043 * residual block stride
1044 *
1045 * @param[out] u1_cbp_l
1046 * coded block pattern luma
1047 *
1048 * @param[in] pu1_nnz
1049 * number of non zero coefficients in each 4x4 unit
1050 *
1051 * @param[out] pu4_cntrl
1052 * Control signal for inverse transform
1053 *
1054 * @return none
1055 *
1056 * @remarks Killing coffs not yet coded
1057 *
1058 ******************************************************************************
1059 */
isvce_pack_l_mb(WORD16 * pi2_res_mb,void ** pv_mb_coeff_data,WORD32 i4_res_strd,UWORD8 * u1_cbp_l,UWORD8 * pu1_nnz,UWORD32 u4_thres_resi,UWORD32 * pu4_cntrl)1060 void isvce_pack_l_mb(WORD16 *pi2_res_mb, void **pv_mb_coeff_data, WORD32 i4_res_strd,
1061 UWORD8 *u1_cbp_l, UWORD8 *pu1_nnz, UWORD32 u4_thres_resi, UWORD32 *pu4_cntrl)
1062 {
1063 /* pointer to packed sub block buffer space */
1064 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb;
1065
1066 /* no of non zero coefficients in the current sub block */
1067 UWORD32 u4_nnz_cnt;
1068
1069 /* significant coefficient map */
1070 UWORD32 u4_s_map;
1071
1072 /* pointer to scanning matrix */
1073 const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
1074
1075 /* number of non zeros in sub block */
1076 UWORD32 u4_nnz;
1077
1078 /* pointer to residual sub block */
1079 WORD16 *pi2_res_sb;
1080
1081 /* coeff scan order */
1082 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
1083
1084 /* coeff cost */
1085 const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
1086
1087 /* temp var */
1088 UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8;
1089
1090 /* temp var */
1091 WORD32 i4_res_val, i4_run = -1, dcac_block;
1092
1093 /* When Hadamard transform is disabled, first row values are dont care, ignore
1094 * them */
1095 pi2_res_mb += i4_res_strd;
1096
1097 /* When Hadamard transform is disabled, first unit value is dont care, ignore
1098 * this */
1099 pu1_nnz++;
1100
1101 ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
1102
1103 /********************************************************/
1104 /* pack coeff data for entropy coding */
1105 /********************************************************/
1106
1107 for(b4 = 0; b4 < 16; b4++)
1108 {
1109 ps_mb_coeff_data = (*pv_mb_coeff_data);
1110
1111 b8 = b4 >> 2;
1112
1113 u4_nnz = pu1_nnz[u1_scan_order[b4]];
1114
1115 /* Jump according to the scan order */
1116 pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
1117
1118 /* write number of non zero coefficients */
1119 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1120
1121 if(u4_nnz)
1122 {
1123 for(u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz;
1124 coeff_cnt++)
1125 {
1126 /* number of runs of zero before, this is used to compute coeff cost */
1127 i4_run++;
1128
1129 i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1130
1131 if(i4_res_val)
1132 {
1133 /* write residue */
1134 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val;
1135 u4_s_map |= mask;
1136
1137 if(u4_thres_resi)
1138 {
1139 /* compute coeff cost */
1140 if(i4_res_val == 1 || i4_res_val == -1)
1141 {
1142 if(i4_run < 6) u4_b8_coeff_cost += pu1_coeff_cost[i4_run];
1143 }
1144 else
1145 u4_b8_coeff_cost += 9;
1146
1147 i4_run = -1;
1148 }
1149 }
1150
1151 mask <<= 1;
1152 }
1153
1154 /* write significant coeff map */
1155 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1156 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1157
1158 /* cbp */
1159 *u1_cbp_l |= (1 << b8);
1160
1161 /* Cntrl map for inverse transform computation
1162 *
1163 * If coeff_cnt is zero, it means that only nonzero was a dc coeff
1164 * Hence we have to set the 16 - u1_scan_order[b4]) position instead
1165 * of 31 - u1_scan_order[b4]
1166 */
1167 dcac_block = (coeff_cnt == 0) ? 16 : 31;
1168 u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4]));
1169 }
1170 else
1171 {
1172 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1173 }
1174
1175 /* Decide if the 8x8 unit has to be sent for entropy coding? */
1176 if((b4 + 1) % 4 == 0)
1177 {
1178 if(u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) &&
1179 (*u1_cbp_l & (1 << b8)))
1180 {
1181 /*
1182 * When we want to reset the full 8x8 block, we have to reset
1183 * both the dc and ac coeff bits hence we have the symmetric
1184 * arrangement of bits
1185 */
1186 const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033};
1187
1188 /* restore cbp */
1189 *u1_cbp_l = (*u1_cbp_l & (~(1 << b8)));
1190
1191 /* correct cntrl flag */
1192 u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]);
1193
1194 /* correct nnz */
1195 pu1_nnz[u1_scan_order[b4 - 3]] = 0;
1196 pu1_nnz[u1_scan_order[b4 - 2]] = 0;
1197 pu1_nnz[u1_scan_order[b4 - 1]] = 0;
1198 pu1_nnz[u1_scan_order[b4]] = 0;
1199
1200 /* reset blk cost */
1201 u4_b8_coeff_cost = 0;
1202 }
1203
1204 if(!(*u1_cbp_l & (1 << b8)))
1205 {
1206 (*pv_mb_coeff_data) = ps_mb_coeff_data_b8;
1207 }
1208
1209 u4_mb_coeff_cost += u4_b8_coeff_cost;
1210
1211 u4_b8_coeff_cost = 0;
1212 i4_run = -1;
1213 ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
1214 }
1215 }
1216
1217 if(u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD) && (*u1_cbp_l))
1218 {
1219 (*pv_mb_coeff_data) = ps_mb_coeff_data_mb;
1220 *u1_cbp_l = 0;
1221 u4_cntrl = 0;
1222 memset(pu1_nnz, 0, 16);
1223 }
1224
1225 (*pu4_cntrl) = u4_cntrl;
1226
1227 return;
1228 }
1229
1230 /**
1231 ******************************************************************************
1232 *
1233 * @brief This function packs residue of an i8x8 chroma mb for entropy coding
1234 *
1235 * @par Description
1236 * An i8 chroma macro block contains two classes of units, dc 2x2 block and
1237 * 4x4 ac blocks. while packing the mb, the dc block is sent first, and
1238 * the 4 ac blocks are sent next in scan order. Each and every block is
1239 * represented by 3 parameters (nnz, significant coefficient map and the
1240 * residue coefficients itself). If a 4x4 unit does not have any coefficients
1241 * then only nnz is sent. Inside a 4x4 block the individual coefficients are
1242 * sent in scan order.
1243 *
1244 * The first byte of each block will be nnz of the block, if it is non zero,
1245 * a 2 byte significance map is sent. This is followed by nonzero coefficients.
1246 * This is repeated for 1 dc + 4 ac blocks.
1247 *
1248 * @param[in] pi2_res_mb
1249 * pointer to residue mb
1250 *
1251 * @param[in, out] pv_mb_coeff_data
1252 * buffer pointing to packed residue coefficients
1253 *
1254 * @param[in] u4_res_strd
1255 * residual block stride
1256 *
1257 * @param[out] u1_cbp_c
1258 * coded block pattern chroma
1259 *
1260 * @param[in] pu1_nnz
1261 * number of non zero coefficients in each 4x4 unit
1262 *
1263 * @param[out] pu1_nnz
1264 * Control signal for inverse transform
1265 *
1266 * @param[in] u4_swap_uv
1267 * Swaps the order of U and V planes in entropy bitstream
1268 *
1269 * @return none
1270 *
1271 * @ remarks
1272 *
1273 ******************************************************************************
1274 */
isvce_pack_c_mb(WORD16 * pi2_res_mb,void ** pv_mb_coeff_data,WORD32 i4_res_strd,UWORD8 * u1_cbp_c,UWORD8 * pu1_nnz,UWORD32 u4_thres_resi,UWORD32 * pu4_cntrl,UWORD32 u4_swap_uv)1275 void isvce_pack_c_mb(WORD16 *pi2_res_mb, void **pv_mb_coeff_data, WORD32 i4_res_strd,
1276 UWORD8 *u1_cbp_c, UWORD8 *pu1_nnz, UWORD32 u4_thres_resi, UWORD32 *pu4_cntrl,
1277 UWORD32 u4_swap_uv)
1278 {
1279 /* pointer to packed sub block buffer space */
1280 tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data);
1281 tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac;
1282
1283 /* nnz pointer */
1284 UWORD8 *pu1_nnz_ac, *pu1_nnz_dc;
1285
1286 /* nnz counter */
1287 UWORD32 u4_nnz_cnt;
1288
1289 /* significant coefficient map */
1290 UWORD32 u4_s_map;
1291
1292 /* pointer to scanning matrix */
1293 const UWORD8 *pu1_scan_order;
1294
1295 /* no of non zero coefficients in the current sub block */
1296 UWORD32 u4_nnz;
1297
1298 /* pointer to residual sub block, res val */
1299 WORD16 *pi2_res_sb, i2_res_val;
1300
1301 /* temp var */
1302 UWORD32 coeff_cnt, mask, b4, plane;
1303
1304 /* temp var */
1305 UWORD32 u4_coeff_cost;
1306 WORD32 i4_run;
1307
1308 /* coeff cost */
1309 const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
1310
1311 /* pointer to packed buffer space */
1312 UWORD32 *pu4_mb_coeff_data = NULL;
1313
1314 /* ac coded block pattern */
1315 UWORD8 u1_cbp_ac;
1316
1317 /* Variable to store the current bit pos in cntrl variable*/
1318 UWORD32 cntrl_pos = 0;
1319
1320 /********************************************************/
1321 /* pack dc coeff data for entropy coding */
1322 /********************************************************/
1323 pu1_scan_order = gu1_chroma_scan_order_dc;
1324 pi2_res_sb = pi2_res_mb;
1325 pu1_nnz_dc = pu1_nnz;
1326 (*pu4_cntrl) = 0;
1327 cntrl_pos = 15;
1328 ps_mb_coeff_data_dc = (*pv_mb_coeff_data);
1329
1330 /* Color space conversion between SP_UV and SP_VU
1331 * We always assume SP_UV for all the processing
1332 * Hence to get proper stream output we need to swap U and V channels here
1333 *
1334 * For that there are two paths we need to look for
1335 * One is the path to bitstream , these variables should have the proper input
1336 * configured UV or VU
1337 * For the other path the inverse transform variables should have what ever
1338 * ordering the input had
1339 */
1340
1341 if(u4_swap_uv)
1342 {
1343 pu1_nnz_dc += 5; /* Move to NNZ of V planve */
1344 pi2_res_sb += 4; /* Move to DC coff of V plane */
1345
1346 cntrl_pos = 14; /* Control bit for V plane */
1347 }
1348
1349 for(plane = 0; plane < 2; plane++)
1350 {
1351 ps_mb_coeff_data = (*pv_mb_coeff_data);
1352
1353 u4_nnz = *pu1_nnz_dc;
1354 /* write number of non zero coefficients U/V */
1355 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1356
1357 if(u4_nnz)
1358 {
1359 for(u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz;
1360 coeff_cnt++)
1361 {
1362 i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1363 if(i2_res_val)
1364 {
1365 /* write residue U/V */
1366 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
1367 u4_s_map |= mask;
1368 }
1369 mask <<= 1;
1370 }
1371 /* write significant coeff map U/V */
1372 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1373 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1374 *u1_cbp_c = 1;
1375
1376 (*pu4_cntrl) |= (1 << cntrl_pos);
1377 }
1378 else
1379 {
1380 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1381 }
1382
1383 if(u4_swap_uv)
1384 {
1385 cntrl_pos++; /* Control bit for U plane */
1386 pu1_nnz_dc -= 5; /* Move to NNZ of U plane */
1387 pi2_res_sb -= 4; /* Move to DC coff of U plane */
1388 }
1389 else
1390 {
1391 cntrl_pos--; /* Control bit for U plane */
1392 pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */
1393 pi2_res_sb += 4; /* Move to DC coff of V plane */
1394 }
1395 }
1396
1397 /********************************************************/
1398 /* pack ac coeff data for entropy coding */
1399 /********************************************************/
1400
1401 pu1_scan_order = gu1_chroma_scan_order;
1402 ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
1403
1404 if(u4_swap_uv)
1405 {
1406 pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */
1407 cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */
1408 pu1_nnz_ac = pu1_nnz + 6; /*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
1409 }
1410 else
1411 {
1412 pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */
1413 cntrl_pos = 31;
1414 pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */
1415 }
1416
1417 for(plane = 0; plane < 2; plane++)
1418 {
1419 pu4_mb_coeff_data = (*pv_mb_coeff_data);
1420
1421 u4_coeff_cost = 0;
1422 i4_run = -1;
1423
1424 /* get the current cbp, so that it automatically
1425 * gets reverted in case of zero ac values */
1426 u1_cbp_ac = *u1_cbp_c;
1427
1428 for(b4 = 0; b4 < 4; b4++)
1429 {
1430 ps_mb_coeff_data = (*pv_mb_coeff_data);
1431
1432 u4_nnz = *pu1_nnz_ac;
1433
1434 /*
1435 * We are scanning only ac coeffs, but the nnz is for the
1436 * complete 4x4 block. Hence we have to discount the nnz contributed
1437 * by the dc coefficient
1438 */
1439 u4_nnz -= (pi2_res_sb[0] != 0);
1440
1441 /* write number of non zero coefficients U/V */
1442 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1443
1444 if(u4_nnz)
1445 {
1446 for(u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz;
1447 coeff_cnt++)
1448 {
1449 i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1450
1451 i4_run++;
1452
1453 if(i2_res_val)
1454 {
1455 /* write residue U/V */
1456 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
1457 u4_s_map |= mask;
1458
1459 if(u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD))
1460 {
1461 /* compute coeff cost */
1462 if(i2_res_val == 1 || i2_res_val == -1)
1463 {
1464 if(i4_run < 6) u4_coeff_cost += pu1_coeff_cost[i4_run];
1465 }
1466 else
1467 u4_coeff_cost += 9;
1468
1469 i4_run = -1;
1470 }
1471 }
1472 mask <<= 1;
1473 }
1474
1475 /* write significant coeff map U/V */
1476 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1477 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1478 u1_cbp_ac = 2;
1479
1480 (*pu4_cntrl) |= 1 << cntrl_pos;
1481 }
1482 else
1483 {
1484 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1485 }
1486
1487 pu1_nnz_ac++;
1488 pi2_res_sb += i4_res_strd;
1489 cntrl_pos--;
1490 }
1491
1492 /* reset block */
1493 if(u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD))
1494 {
1495 pu4_mb_coeff_data[0] = 0;
1496 pu4_mb_coeff_data[1] = 0;
1497 pu4_mb_coeff_data[2] = 0;
1498 pu4_mb_coeff_data[3] = 0;
1499 (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4;
1500
1501 /* Generate the control signal */
1502 /* Zero out the current plane's AC coefficients */
1503 (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF);
1504
1505 /* Similarly do for the NNZ also */
1506 *(pu1_nnz_ac - 4) = 0;
1507 *(pu1_nnz_ac - 3) = 0;
1508 *(pu1_nnz_ac - 2) = 0;
1509 *(pu1_nnz_ac - 1) = 0;
1510 }
1511 else
1512 {
1513 *u1_cbp_c = u1_cbp_ac;
1514 }
1515
1516 if(u4_swap_uv)
1517 {
1518 pi2_res_sb =
1519 pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */
1520 cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */
1521 pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
1522
1523 pu1_nnz_ac = pu1_nnz + 1;
1524 }
1525 else
1526 pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */
1527 }
1528
1529 /* restore the ptr basing on cbp */
1530 if(*u1_cbp_c == 0)
1531 {
1532 (*pv_mb_coeff_data) = ps_mb_coeff_data_dc;
1533 }
1534 else if(*u1_cbp_c == 1)
1535 {
1536 (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
1537 }
1538
1539 return;
1540 }
1541
1542 /**
1543 *******************************************************************************
1544 *
1545 * @brief performs luma core coding when intra mode is i16x16
1546 *
1547 * @par Description:
1548 * If the current mb is to be coded as intra of mb type i16x16, the mb is first
1549 * predicted using one of i16x16 prediction filters, basing on the intra mode
1550 * chosen. Then, error is computed between the input blk and the estimated blk.
1551 * This error is transformed (hierarchical transform i.e., dct followed by hada-
1552 * -mard), quantized. The quantized coefficients are packed in scan order for
1553 * entropy coding.
1554 *
1555 * @param[in] ps_proc_ctxt
1556 * pointer to the current macro block context
1557 *
1558 * @returns u1_cbp_l
1559 * coded block pattern luma
1560 *
1561 * @remarks none
1562 *
1563 *******************************************************************************
1564 */
1565
isvce_code_luma_intra_macroblock_16x16(isvce_process_ctxt_t * ps_proc)1566 UWORD8 isvce_code_luma_intra_macroblock_16x16(isvce_process_ctxt_t *ps_proc)
1567 {
1568 buffer_container_t s_src;
1569 buffer_container_t s_pred;
1570 buffer_container_t s_recon;
1571 buffer_container_t s_res;
1572 buffer_container_t s_quant_coeffs;
1573
1574 /*Cntrol signal for itrans*/
1575 UWORD32 u4_cntrl;
1576
1577 isvce_codec_t *ps_codec = ps_proc->ps_codec;
1578 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1579 isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
1580 inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
1581 iq_it_res_rec_constants_t s_iq_it_res_rec_constants = {
1582 .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat,
1583 .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat,
1584 .u4_qp_div_6 = ps_qp_params->u1_qp_div};
1585
1586 UWORD8 *pu1_pred_mb = NULL;
1587 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
1588 WORD32 i4_pred_stride = ps_proc->i4_pred_strd;
1589 WORD32 i4_res_strd = ps_proc->i4_res_strd;
1590 UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
1591 UWORD32 au4_nnz[5] = {0};
1592 UWORD8 u1_cbp_l = 0;
1593 UWORD8 *pu1_nnz = (UWORD8 *) au4_nnz;
1594 /* pointer to packed mb coeff data */
1595 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1596
1597 if(u1_intra_mode == PLANE_I16x16)
1598 {
1599 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane;
1600 }
1601 else
1602 {
1603 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16;
1604 }
1605
1606 s_src = ps_proc->s_src_buf_props.as_component_bufs[Y];
1607 s_recon = ps_proc->s_rec_buf_props.as_component_bufs[Y];
1608 s_pred.pv_data = pu1_pred_mb;
1609 s_pred.i4_data_stride = i4_pred_stride;
1610 s_quant_coeffs.pv_data = pi2_res_mb;
1611 s_quant_coeffs.i4_data_stride = i4_res_strd;
1612
1613 s_res = ps_codec->s_svc_ilp_data.ps_residual_bufs[ps_proc->u1_spatial_layer_id]
1614 .as_component_bufs[Y];
1615 s_res.pv_data = ((WORD16 *) s_res.pv_data) + ps_proc->i4_mb_x * MB_SIZE +
1616 ps_proc->i4_mb_y * MB_SIZE * s_res.i4_data_stride;
1617
1618 /********************************************************/
1619 /* error estimation, */
1620 /* transform */
1621 /* quantization */
1622 /********************************************************/
1623 isvce_luma_16x16_resi_trans_dctrans_quant(
1624 &s_src, &s_pred, &s_quant_coeffs, &ps_proc->ps_mb_res_buf->as_component_bufs[Y],
1625 ps_isa_dependent_fxns, ps_qp_params->pu2_scale_mat, ps_qp_params->pu2_thres_mat, pu1_nnz,
1626 ps_qp_params->u1_qbits, ps_qp_params->u4_dead_zone, ENABLE_DC_TRANSFORM,
1627 ps_proc->ps_mb_info->u1_residual_prediction_flag);
1628
1629 /********************************************************/
1630 /* pack coeff data for entropy coding */
1631 /********************************************************/
1632 isvce_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l, pu1_nnz, &u4_cntrl);
1633
1634 /********************************************************/
1635 /* ierror estimation, */
1636 /* itransform */
1637 /* iquantization */
1638 /********************************************************/
1639 /*
1640 *if refernce frame is not to be computed
1641 *we only need the right and bottom border 4x4 blocks to predict next intra
1642 *blocks, hence only compute them
1643 */
1644 if(!ps_proc->u4_compute_recon)
1645 {
1646 u4_cntrl &= 0x111F8000;
1647 }
1648
1649 if(u4_cntrl)
1650 {
1651 isvce_luma_16x16_idctrans_iquant_itrans_recon(
1652 &s_quant_coeffs, &s_pred, &s_recon, &s_res,
1653 &ps_proc->ps_mb_res_buf->as_component_bufs[Y], &s_iq_it_res_rec_constants,
1654 ps_isa_dependent_fxns, ps_proc->pv_scratch_buff, u4_cntrl, ENABLE_DC_TRANSFORM, 0);
1655 }
1656 else
1657 {
1658 ps_inter_pred_fxns->pf_inter_pred_luma_copy(pu1_pred_mb, (UWORD8 *) s_recon.pv_data,
1659 i4_pred_stride, s_recon.i4_data_stride, MB_SIZE,
1660 MB_SIZE, NULL, 0);
1661 }
1662
1663 return (u1_cbp_l);
1664 }
1665
1666 /**
1667 *******************************************************************************
1668 *
1669 * @brief performs luma core coding when intra mode is i4x4
1670 *
1671 * @par Description:
1672 * If the current mb is to be coded as intra of mb type i4x4, the mb is first
1673 * predicted using one of i4x4 prediction filters, basing on the intra mode
1674 * chosen. Then, error is computed between the input blk and the estimated blk.
1675 * This error is dct transformed and quantized. The quantized coefficients are
1676 * packed in scan order for entropy coding.
1677 *
1678 * @param[in] ps_proc_ctxt
1679 * pointer to the current macro block context
1680 *
1681 * @returns u1_cbp_l
1682 * coded block pattern luma
1683 *
1684 * @remarks
1685 * The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan
1686 *order mentioned in h.264 specification
1687 *
1688 *******************************************************************************
1689 */
isvce_code_luma_intra_macroblock_4x4(isvce_process_ctxt_t * ps_proc)1690 UWORD8 isvce_code_luma_intra_macroblock_4x4(isvce_process_ctxt_t *ps_proc)
1691 {
1692 buffer_container_t s_src;
1693 buffer_container_t s_pred;
1694 buffer_container_t s_recon;
1695 buffer_container_t s_res;
1696 buffer_container_t s_res_pred;
1697 buffer_container_t s_quant_coeffs;
1698
1699 /* pointer to neighbors: left, top, top-left */
1700 UWORD8 *pu1_mb_a;
1701 UWORD8 *pu1_mb_b;
1702 UWORD8 *pu1_mb_c;
1703 UWORD8 *pu1_mb_d;
1704 WORD32 i4_ngbr_avbl;
1705 UWORD8 u1_nnz;
1706 UWORD32 u4_nnz_cnt;
1707 /* significant coefficient map */
1708 UWORD32 u4_s_map;
1709 /*Dummy variable for 4x4 trans fucntion*/
1710 WORD16 i2_dc_dummy;
1711 UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask;
1712
1713 isvce_codec_t *ps_codec = ps_proc->ps_codec;
1714 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1715 /* pointer to packed mb coeff data */
1716 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
1717 isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
1718 enc_loop_fxns_t *ps_enc_loop_fxns = &ps_isa_dependent_fxns->s_enc_loop_fxns;
1719 inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
1720 resi_trans_quant_constants_t s_resi_trans_quant_constants = {
1721 .pu2_scale_matrix = ps_qp_params->pu2_scale_mat,
1722 .pu2_threshold_matrix = ps_qp_params->pu2_thres_mat,
1723 .u4_qbits = ps_qp_params->u1_qbits,
1724 .u4_round_factor = ps_qp_params->u4_dead_zone};
1725 iq_it_res_rec_constants_t s_iq_it_res_rec_constants = {
1726 .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat,
1727 .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat,
1728 .u4_qp_div_6 = ps_qp_params->u1_qp_div};
1729
1730 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1731 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
1732 WORD32 i4_pred_stride = ps_proc->i4_pred_strd;
1733 UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
1734 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1735 UWORD8 u1_cbp_l = 0;
1736 /* pointer to packed mb coeff data */
1737 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1738 const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
1739 UWORD8 u1_resi_trans_fxn_idx = isvc_get_resi_trans_quant_variant_idx(0);
1740 UWORD8 u1_iq_it_recon_fxn_idx = isvc_get_iq_it_recon_variant_idx(1, 0);
1741
1742 s_src = ps_proc->s_src_buf_props.as_component_bufs[Y];
1743 s_recon = ps_proc->s_rec_buf_props.as_component_bufs[Y];
1744 s_pred.pv_data = pu1_pred_mb;
1745 s_pred.i4_data_stride = i4_pred_stride;
1746 s_quant_coeffs.pv_data = pi2_res_mb;
1747 s_quant_coeffs.i4_data_stride = 4;
1748
1749 /* Process 16 4x4 lum sub-blocks of the MB in scan order */
1750 for(b8 = 0; b8 < 4; b8++)
1751 {
1752 u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3;
1753 u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3;
1754
1755 /* if in case cbp for the 8x8 block is zero, send no residue */
1756 ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
1757
1758 for(b4 = 0; b4 < 4; b4++)
1759 {
1760 /* index of pel in MB */
1761 u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2);
1762 u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2);
1763
1764 /* Initialize source and reference pointers */
1765 s_src = ps_proc->s_src_buf_props.as_component_bufs[Y];
1766 s_recon = ps_proc->s_rec_buf_props.as_component_bufs[Y];
1767 s_src.pv_data = ((UWORD8 *) s_src.pv_data) + u1_pix_x + u1_pix_y * s_src.i4_data_stride;
1768 s_recon.pv_data =
1769 ((UWORD8 *) s_recon.pv_data) + u1_pix_x + u1_pix_y * s_recon.i4_data_stride;
1770
1771 s_res = ps_codec->s_svc_ilp_data.ps_residual_bufs[ps_proc->u1_spatial_layer_id]
1772 .as_component_bufs[Y];
1773 s_res.pv_data = ((WORD16 *) s_res.pv_data) + ps_proc->i4_mb_x * MB_SIZE +
1774 ps_proc->i4_mb_y * MB_SIZE * s_res.i4_data_stride;
1775 s_res.pv_data = ((WORD16 *) s_res.pv_data) + u1_pix_x + u1_pix_y * s_res.i4_data_stride;
1776
1777 s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[Y];
1778 s_res_pred.pv_data =
1779 ((WORD16 *) s_res_pred.pv_data) + u1_pix_x + u1_pix_y * s_res_pred.i4_data_stride;
1780
1781 /* pointer to left of ref macro block */
1782 pu1_mb_a = ((UWORD8 *) s_recon.pv_data) - 1;
1783 /* pointer to top of ref macro block */
1784 pu1_mb_b = ((UWORD8 *) s_recon.pv_data) - s_recon.i4_data_stride;
1785 /* pointer to topright of ref macro block */
1786 pu1_mb_c = pu1_mb_b + 4;
1787 /* pointer to topleft macro block */
1788 pu1_mb_d = pu1_mb_b - 1;
1789
1790 /* compute neighbor availability */
1791 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1792
1793 /* sub block intra mode */
1794 u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4];
1795
1796 /********************************************************/
1797 /* gather prediction pels from neighbors for prediction */
1798 /********************************************************/
1799 /* left pels */
1800 if(i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK)
1801 {
1802 for(i = 0; i < 4; i++)
1803 pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * s_recon.i4_data_stride];
1804 }
1805 else
1806 {
1807 memset(pu1_ngbr_pels_i4, 0, 4);
1808 }
1809
1810 /* top pels */
1811 if(i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
1812 {
1813 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1814 }
1815 else
1816 {
1817 memset(pu1_ngbr_pels_i4 + 5, 0, 4);
1818 }
1819 /* top left pels */
1820 if(i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK)
1821 {
1822 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1823 }
1824 else
1825 {
1826 pu1_ngbr_pels_i4[4] = 0;
1827 }
1828 /* top right pels */
1829 if(i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK)
1830 {
1831 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
1832 }
1833 else if(i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
1834 {
1835 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
1836 }
1837
1838 /********************************************************/
1839 /* prediction */
1840 /********************************************************/
1841 (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4, pu1_pred_mb, 0,
1842 i4_pred_stride, i4_ngbr_avbl);
1843
1844 /********************************************************/
1845 /* error estimation, */
1846 /* transform */
1847 /* quantization */
1848 /********************************************************/
1849 ps_enc_loop_fxns->apf_resi_trans_quant_4x4[u1_resi_trans_fxn_idx](
1850 &s_src, &s_pred, &s_quant_coeffs, &s_res_pred, &s_resi_trans_quant_constants,
1851 &u1_nnz, &i2_dc_dummy, 0);
1852
1853 /********************************************************/
1854 /* pack coeff data for entropy coding */
1855 /********************************************************/
1856 ps_mb_coeff_data = *pv_mb_coeff_data;
1857
1858 /* write number of non zero coefficients */
1859 ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz;
1860
1861 if(u1_nnz)
1862 {
1863 for(u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz;
1864 coeff_cnt++)
1865 {
1866 if(pi2_res_mb[pu1_scan_order[coeff_cnt]])
1867 {
1868 /* write residue */
1869 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] =
1870 pi2_res_mb[pu1_scan_order[coeff_cnt]];
1871 u4_s_map |= mask;
1872 }
1873 mask <<= 1;
1874 }
1875 /* write significant coeff map */
1876 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1877
1878 /* update ptr to coeff data */
1879 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
1880
1881 /* cbp */
1882 u1_cbp_l |= (1 << b8);
1883 }
1884 else
1885 {
1886 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1887 }
1888
1889 /********************************************************/
1890 /* ierror estimation, */
1891 /* itransform */
1892 /* iquantization */
1893 /********************************************************/
1894 if(u1_nnz)
1895 {
1896 buffer_container_t s_src = s_quant_coeffs;
1897
1898 /* Tx blk coeffs are stored blk by blk */
1899 /* Hence, in order to access rows of each Tx blk, one needs to stride of
1900 * TxxSize */
1901 s_src.i4_data_stride = 4;
1902
1903 ps_enc_loop_fxns->apf_iquant_itrans_recon_4x4[u1_iq_it_recon_fxn_idx](
1904 &s_src, &s_pred, &s_res_pred, &s_res, &s_recon, &s_iq_it_res_rec_constants,
1905 (WORD16 *) ps_proc->pv_scratch_buff, s_src.pv_data, 0, 0);
1906 }
1907 else
1908 {
1909 ps_inter_pred_fxns->pf_inter_pred_luma_copy(
1910 (UWORD8 *) s_pred.pv_data, (UWORD8 *) s_recon.pv_data, s_pred.i4_data_stride,
1911 s_recon.i4_data_stride, BLK_SIZE, BLK_SIZE, NULL, 0);
1912 }
1913 }
1914
1915 /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
1916 if(!(u1_cbp_l & (1 << b8)))
1917 {
1918 *pv_mb_coeff_data = ps_mb_coeff_data_b8;
1919 }
1920 }
1921
1922 return (u1_cbp_l);
1923 }
1924
1925 /**
1926 *******************************************************************************
1927 *
1928 * @brief performs luma core coding when intra mode is i4x4
1929 *
1930 * @par Description:
1931 * If the current mb is to be coded as intra of mb type i4x4, the mb is first
1932 * predicted using one of i4x4 prediction filters, basing on the intra mode
1933 * chosen. Then, error is computed between the input blk and the estimated blk.
1934 * This error is dct transformed and quantized. The quantized coefficients are
1935 * packed in scan order for entropy coding.
1936 *
1937 * @param[in] ps_proc_ctxt
1938 * pointer to the current macro block context
1939 *
1940 * @returns u1_cbp_l
1941 * coded block pattern luma
1942 *
1943 * @remarks
1944 * The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan
1945 *order mentioned in h.264 specification
1946 *
1947 *******************************************************************************
1948 */
isvce_code_luma_intra_macroblock_4x4_rdopt_on(isvce_process_ctxt_t * ps_proc)1949 UWORD8 isvce_code_luma_intra_macroblock_4x4_rdopt_on(isvce_process_ctxt_t *ps_proc)
1950 {
1951 /* pointer to packed mb coeff data */
1952 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
1953
1954 UWORD32 u4_nnz_cnt;
1955 /* significant coefficient map */
1956 UWORD32 u4_s_map;
1957 UWORD32 b8, b4, coeff_cnt, mask;
1958
1959 isvce_codec_t *ps_codec = ps_proc->ps_codec;
1960 isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
1961 inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
1962
1963 UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4;
1964 UWORD8 *pu1_rec_mb = ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data);
1965 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
1966 WORD32 i4_rec_strd = ps_proc->s_rec_buf_props.as_component_bufs[0].i4_data_stride;
1967 UWORD8 *pu1_nnz = (UWORD8 *) ps_proc->au4_nnz_intra_4x4;
1968 UWORD8 u1_cbp_l = 0;
1969 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1970 const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
1971
1972 /* Process 16 4x4 lum sub-blocks of the MB in scan order */
1973 for(b8 = 0; b8 < 4; b8++)
1974 {
1975 /* if in case cbp for the 8x8 block is zero, send no residue */
1976 ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
1977
1978 for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
1979 {
1980 /********************************************************/
1981 /* pack coeff data for entropy coding */
1982 /********************************************************/
1983 ps_mb_coeff_data = *pv_mb_coeff_data;
1984
1985 /* write number of non zero coefficients */
1986 ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz;
1987
1988 if(*pu1_nnz)
1989 {
1990 for(u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz;
1991 coeff_cnt++)
1992 {
1993 if(pi2_res_mb[pu1_scan_order[coeff_cnt]])
1994 {
1995 /* write residue */
1996 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] =
1997 pi2_res_mb[pu1_scan_order[coeff_cnt]];
1998 u4_s_map |= mask;
1999 }
2000 mask <<= 1;
2001 }
2002 /* write significant coeff map */
2003 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
2004
2005 /* update ptr to coeff data */
2006 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
2007
2008 /* cbp */
2009 u1_cbp_l |= (1 << b8);
2010 }
2011 else
2012 {
2013 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
2014 }
2015 }
2016
2017 /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
2018 if(!(u1_cbp_l & (1 << b8)))
2019 {
2020 *pv_mb_coeff_data = ps_mb_coeff_data_b8;
2021 }
2022 }
2023
2024 ps_inter_pred_fxns->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE,
2025 i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0);
2026
2027 return (u1_cbp_l);
2028 }
2029
2030 /**
2031 *******************************************************************************
2032 *
2033 * @brief performs chroma core coding for intra macro blocks
2034 *
2035 * @par Description:
2036 * If the current MB is to be intra coded with mb type chroma I8x8, the MB is
2037 * first predicted using intra 8x8 prediction filters. The predicted data is
2038 * compared with the input for error and the error is transformed. The DC
2039 * coefficients of each transformed sub blocks are further transformed using
2040 * Hadamard transform. The resulting coefficients are quantized, packed and sent
2041 * for entropy coding.
2042 *
2043 * @param[in] ps_proc_ctxt
2044 * pointer to the current macro block context
2045 *
2046 * @returns u1_cbp_c
2047 * coded block pattern chroma
2048 *
2049 * @remarks
2050 * The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
2051 * mentioned in h.264 specification
2052 *
2053 *******************************************************************************
2054 */
isvce_code_chroma_intra_macroblock_8x8(isvce_process_ctxt_t * ps_proc)2055 UWORD8 isvce_code_chroma_intra_macroblock_8x8(isvce_process_ctxt_t *ps_proc)
2056 {
2057 buffer_container_t s_src;
2058 buffer_container_t s_pred;
2059 buffer_container_t s_recon;
2060 buffer_container_t s_res;
2061 buffer_container_t s_res_pred;
2062 buffer_container_t s_quant_coeffs;
2063
2064 /* Control signal for inverse transform */
2065 UWORD32 u4_cntrl;
2066
2067 isvce_codec_t *ps_codec = ps_proc->ps_codec;
2068 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
2069 isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
2070 iq_it_res_rec_constants_t s_iq_it_res_rec_constants = {
2071 .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat,
2072 .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat,
2073 .u4_qp_div_6 = ps_qp_params->u1_qp_div};
2074
2075 UWORD8 *pu1_pred_mb = NULL;
2076 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2077 WORD32 i4_pred_stride = ps_proc->i4_pred_strd;
2078 WORD32 i4_res_strd = ps_proc->i4_res_strd;
2079 UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode;
2080 UWORD8 u1_cbp_c = 0;
2081 UWORD8 au1_nnz[2 * (NUM_4x4_IN_8x8 + 1)] = {0};
2082 /* pointer to packed mb coeff data */
2083 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2084 /* See if we need to swap U and V plances for entropy */
2085 UWORD32 u4_swap_uv = (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU);
2086
2087 if(PLANE_CH_I8x8 == u1_intra_mode)
2088 {
2089 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane;
2090 }
2091 else
2092 {
2093 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
2094 }
2095
2096 s_src = ps_proc->s_src_buf_props.as_component_bufs[UV];
2097 s_recon = ps_proc->s_rec_buf_props.as_component_bufs[UV];
2098 s_pred.pv_data = pu1_pred_mb;
2099 s_pred.i4_data_stride = i4_pred_stride;
2100 s_quant_coeffs.pv_data = pi2_res_mb;
2101 s_quant_coeffs.i4_data_stride = i4_res_strd;
2102
2103 s_res = ps_codec->s_svc_ilp_data.ps_residual_bufs[ps_proc->u1_spatial_layer_id]
2104 .as_component_bufs[UV];
2105 s_res.pv_data = ((WORD16 *) s_res.pv_data) + ps_proc->i4_mb_x * MB_SIZE +
2106 ps_proc->i4_mb_y * (MB_SIZE / 2) * s_res.i4_data_stride;
2107
2108 s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[U];
2109
2110 /********************************************************/
2111 /* error estimation, */
2112 /* transform */
2113 /* quantization */
2114 /********************************************************/
2115 isvce_chroma_8x8_resi_trans_dctrans_quant(
2116 &s_src, &s_pred, &s_quant_coeffs, &s_res_pred, ps_isa_dependent_fxns,
2117 ps_qp_params->pu2_scale_mat, ps_qp_params->pu2_thres_mat, au1_nnz, ps_qp_params->u1_qbits,
2118 ps_qp_params->u4_dead_zone, 0);
2119
2120 /********************************************************/
2121 /* pack coeff data for entropy coding */
2122 /********************************************************/
2123 isvce_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c, au1_nnz,
2124 ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
2125
2126 /********************************************************/
2127 /* ierror estimation, */
2128 /* itransform */
2129 /* iquantization */
2130 /********************************************************/
2131 isvce_chroma_8x8_idctrans_iquant_itrans_recon(
2132 &s_quant_coeffs, &s_pred, &s_recon, &s_res, &s_res_pred, &s_iq_it_res_rec_constants,
2133 ps_isa_dependent_fxns, ps_proc->pv_scratch_buff, u4_cntrl, 0);
2134
2135 memcpy(ps_proc->au1_chroma_nnz, au1_nnz, sizeof(ps_proc->au1_chroma_nnz));
2136
2137 return (u1_cbp_c);
2138 }
2139
2140 /**
2141 *******************************************************************************
2142 *
2143 * @brief performs luma core coding when mode is inter
2144 *
2145 * @par Description:
2146 * If the current mb is to be coded as inter the mb is predicted based on the
2147 * sub mb partitions and corresponding motion vectors generated by ME. Then,
2148 * error is computed between the input blk and the estimated blk. This error is
2149 * transformed, quantized. The quantized coefficients are packed in scan order
2150 * for entropy coding
2151 *
2152 * @param[in] ps_proc_ctxt
2153 * pointer to the current macro block context
2154 *
2155 * @returns u1_cbp_l
2156 * coded block pattern luma
2157 *
2158 * @remarks none
2159 *
2160 *******************************************************************************
2161 */
2162
isvce_code_luma_inter_macroblock_16x16(isvce_process_ctxt_t * ps_proc)2163 UWORD8 isvce_code_luma_inter_macroblock_16x16(isvce_process_ctxt_t *ps_proc)
2164 {
2165 buffer_container_t s_src;
2166 buffer_container_t s_pred;
2167 buffer_container_t s_recon;
2168 buffer_container_t s_res;
2169 buffer_container_t s_res_pred;
2170 buffer_container_t s_quant_coeffs;
2171
2172 /*Control signal of itrans*/
2173 UWORD32 u4_cntrl;
2174
2175 isvce_codec_t *ps_codec = ps_proc->ps_codec;
2176 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
2177 isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
2178 iq_it_res_rec_constants_t s_iq_it_res_rec_constants = {
2179 .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat,
2180 .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat,
2181 .u4_qp_div_6 = ps_qp_params->u1_qp_div};
2182
2183 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
2184 WORD32 i4_res_strd = ps_proc->i4_res_strd;
2185 UWORD8 u1_cbp_l = 0;
2186 UWORD8 *pu1_nnz = (UWORD8 *) ps_proc->au4_nnz;
2187 /* pointer to packed mb coeff data */
2188 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2189
2190 ps_proc->au4_nnz[0] = 0;
2191 ps_proc->au4_nnz[1] = 0;
2192 ps_proc->au4_nnz[2] = 0;
2193 ps_proc->au4_nnz[3] = 0;
2194 ps_proc->au4_nnz[4] = 0;
2195
2196 /********************************************************/
2197 /* prediction */
2198 /********************************************************/
2199 isvce_motion_comp_luma(ps_proc, &s_pred);
2200
2201 s_src = ps_proc->s_src_buf_props.as_component_bufs[0];
2202 s_recon = ps_proc->s_rec_buf_props.as_component_bufs[0];
2203 s_quant_coeffs.pv_data = pi2_res_mb;
2204 s_quant_coeffs.i4_data_stride = i4_res_strd;
2205
2206 s_res = ps_codec->s_svc_ilp_data.ps_residual_bufs[ps_proc->u1_spatial_layer_id]
2207 .as_component_bufs[Y];
2208 s_res.pv_data = ((WORD16 *) s_res.pv_data) + ps_proc->i4_mb_x * MB_SIZE +
2209 ps_proc->i4_mb_y * MB_SIZE * s_res.i4_data_stride;
2210
2211 s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[Y];
2212
2213 /********************************************************/
2214 /* error estimation, */
2215 /* transform */
2216 /* quantization */
2217 /********************************************************/
2218 if(ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0)
2219 {
2220 isvce_luma_16x16_resi_trans_dctrans_quant(
2221 &s_src, &s_pred, &s_quant_coeffs, &s_res_pred, ps_isa_dependent_fxns,
2222 ps_qp_params->pu2_scale_mat, ps_qp_params->pu2_thres_mat, pu1_nnz,
2223 ps_qp_params->u1_qbits, ps_qp_params->u4_dead_zone, DISABLE_DC_TRANSFORM,
2224 ps_proc->ps_mb_info->u1_residual_prediction_flag);
2225
2226 /********************************************************/
2227 /* pack coeff data for entropy coding */
2228 /********************************************************/
2229 isvce_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l, pu1_nnz,
2230 ps_codec->u4_thres_resi, &u4_cntrl);
2231 }
2232 else
2233 {
2234 u1_cbp_l = 0;
2235 u4_cntrl = 0;
2236 }
2237
2238 /********************************************************/
2239 /* ierror estimation, */
2240 /* itransform */
2241 /* iquantization */
2242 /********************************************************/
2243
2244 /*If the frame is not to be used for P frame reference or dumping recon
2245 * we only will use the reocn for only predicting intra Mbs
2246 * THis will need only right and bottom edge 4x4 blocks recon
2247 * Hence we selectively enable them using control signal(including DC)
2248 */
2249 if(ps_proc->u4_compute_recon != 1)
2250 {
2251 u4_cntrl &= 0x111F0000;
2252 }
2253
2254 isvce_luma_16x16_idctrans_iquant_itrans_recon(
2255 &s_quant_coeffs, &s_pred, &s_recon, &s_res, &s_res_pred, &s_iq_it_res_rec_constants,
2256 ps_isa_dependent_fxns, ps_proc->pv_scratch_buff, u4_cntrl, DISABLE_DC_TRANSFORM,
2257 ps_proc->ps_mb_info->u1_residual_prediction_flag);
2258
2259 return (u1_cbp_l);
2260 }
2261
2262 /**
2263 *******************************************************************************
2264 *
2265 * @brief performs chroma core coding for inter macro blocks
2266 *
2267 * @par Description:
2268 * If the current mb is to be coded as inter predicted mb,based on the sub mb
2269 *partitions and corresponding motion vectors generated by ME ,prediction is
2270 *done. Then, error is computed between the input blk and the estimated blk. This
2271 *error is transformed , quantized. The quantized coefficients are packed in scan
2272 *order for entropy coding.
2273 *
2274 * @param[in] ps_proc_ctxt
2275 * pointer to the current macro block context
2276 *
2277 * @returns u1_cbp_l
2278 * coded block pattern chroma
2279 *
2280 * @remarks none
2281 *
2282 *******************************************************************************
2283 */
isvce_code_chroma_inter_macroblock_8x8(isvce_process_ctxt_t * ps_proc)2284 UWORD8 isvce_code_chroma_inter_macroblock_8x8(isvce_process_ctxt_t *ps_proc)
2285 {
2286 buffer_container_t s_src;
2287 buffer_container_t s_pred;
2288 buffer_container_t s_recon;
2289 buffer_container_t s_res;
2290 buffer_container_t s_res_pred;
2291 buffer_container_t s_quant_coeffs;
2292
2293 /*Control signal for inverse transform*/
2294 UWORD32 u4_cntrl;
2295
2296 isvce_codec_t *ps_codec = ps_proc->ps_codec;
2297 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
2298 isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
2299 iq_it_res_rec_constants_t s_iq_it_res_rec_constants = {
2300 .pu2_iscal_mat = ps_qp_params->pu2_iscale_mat,
2301 .pu2_weigh_mat = ps_qp_params->pu2_weigh_mat,
2302 .u4_qp_div_6 = ps_qp_params->u1_qp_div};
2303
2304 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2305 WORD32 i4_res_strd = ps_proc->i4_res_strd;
2306 UWORD8 u1_cbp_c = 0;
2307 UWORD8 au1_nnz[2 * (NUM_4x4_IN_8x8 + 1)] = {0};
2308 /* pointer to packed mb coeff data */
2309 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2310 /*See if we need to swap U and V plances for entropy*/
2311 UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
2312
2313 isvce_motion_comp_chroma(ps_proc, &s_pred);
2314
2315 s_src = ps_proc->s_src_buf_props.as_component_bufs[UV];
2316 s_recon = ps_proc->s_rec_buf_props.as_component_bufs[UV];
2317 s_quant_coeffs.pv_data = pi2_res_mb;
2318 s_quant_coeffs.i4_data_stride = i4_res_strd;
2319
2320 s_res = ps_codec->s_svc_ilp_data.ps_residual_bufs[ps_proc->u1_spatial_layer_id]
2321 .as_component_bufs[UV];
2322 s_res.pv_data = ((WORD16 *) s_res.pv_data) + ps_proc->i4_mb_x * MB_SIZE +
2323 ps_proc->i4_mb_y * (MB_SIZE / 2) * s_res.i4_data_stride;
2324
2325 s_res_pred = ps_proc->ps_mb_res_buf->as_component_bufs[UV];
2326
2327 /********************************************************/
2328 /* error estimation, */
2329 /* transform */
2330 /* quantization */
2331 /********************************************************/
2332 isvce_chroma_8x8_resi_trans_dctrans_quant(
2333 &s_src, &s_pred, &s_quant_coeffs, &s_res_pred, ps_isa_dependent_fxns,
2334 ps_qp_params->pu2_scale_mat, ps_qp_params->pu2_thres_mat, au1_nnz, ps_qp_params->u1_qbits,
2335 ps_qp_params->u4_dead_zone, ps_proc->ps_mb_info->u1_residual_prediction_flag);
2336
2337 /********************************************************/
2338 /* pack coeff data for entropy coding */
2339 /********************************************************/
2340 isvce_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c, au1_nnz,
2341 ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
2342
2343 /********************************************************/
2344 /* ierror estimation, */
2345 /* itransform */
2346 /* iquantization */
2347 /********************************************************/
2348
2349 /* If the frame is not to be used for P frame reference or dumping recon
2350 * we only will use the reocn for only predicting intra Mbs
2351 * THis will need only right and bottom edge 4x4 blocks recon
2352 * Hence we selectively enable them using control signal(including DC)
2353 */
2354 if(!ps_proc->u4_compute_recon)
2355 {
2356 u4_cntrl &= 0x7700C000;
2357 }
2358
2359 isvce_chroma_8x8_idctrans_iquant_itrans_recon(
2360 &s_quant_coeffs, &s_pred, &s_recon, &s_res, &s_res_pred, &s_iq_it_res_rec_constants,
2361 ps_isa_dependent_fxns, ps_proc->pv_scratch_buff, u4_cntrl,
2362 ps_proc->ps_mb_info->u1_residual_prediction_flag);
2363
2364 memcpy(ps_proc->au1_chroma_nnz, au1_nnz, sizeof(ps_proc->au1_chroma_nnz));
2365
2366 return (u1_cbp_c);
2367 }
2368