1*c83a76b0SSuyog Pawar /****************************************************************************** 2*c83a76b0SSuyog Pawar * 3*c83a76b0SSuyog Pawar * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4*c83a76b0SSuyog Pawar * 5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License"); 6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License. 7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at: 8*c83a76b0SSuyog Pawar * 9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0 10*c83a76b0SSuyog Pawar * 11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software 12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS, 13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and 15*c83a76b0SSuyog Pawar * limitations under the License. 16*c83a76b0SSuyog Pawar * 17*c83a76b0SSuyog Pawar ******************************************************************************/ 18*c83a76b0SSuyog Pawar /** 19*c83a76b0SSuyog Pawar ******************************************************************************* 20*c83a76b0SSuyog Pawar * @file 21*c83a76b0SSuyog Pawar * ihevc_trans_macros.h 22*c83a76b0SSuyog Pawar * 23*c83a76b0SSuyog Pawar * @brief 24*c83a76b0SSuyog Pawar * Macros used in the forward transform and inverse transform functions 25*c83a76b0SSuyog Pawar * 26*c83a76b0SSuyog Pawar * @author 27*c83a76b0SSuyog Pawar * Ittiam 28*c83a76b0SSuyog Pawar * 29*c83a76b0SSuyog Pawar * @remarks 30*c83a76b0SSuyog Pawar * None 31*c83a76b0SSuyog Pawar * 32*c83a76b0SSuyog Pawar ******************************************************************************* 33*c83a76b0SSuyog Pawar */ 34*c83a76b0SSuyog Pawar #ifndef IHEVC_TRANS_MACROS_H_ 35*c83a76b0SSuyog Pawar #define IHEVC_TRANS_MACROS_H_ 36*c83a76b0SSuyog Pawar 37*c83a76b0SSuyog Pawar #define QUANT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \ 38*c83a76b0SSuyog Pawar { \ 39*c83a76b0SSuyog Pawar LWORD64 tmp; \ 40*c83a76b0SSuyog Pawar WORD32 sign; \ 41*c83a76b0SSuyog Pawar WORD32 bit_depth,transform_shift; \ 42*c83a76b0SSuyog Pawar WORD32 q_bits, quant_multiplier; \ 43*c83a76b0SSuyog Pawar \ 44*c83a76b0SSuyog Pawar /* q_bits and q_add calculation*/ \ 45*c83a76b0SSuyog Pawar /* To be moved outside in neon. To be computer once per transform call */ \ 46*c83a76b0SSuyog Pawar bit_depth = 8; \ 47*c83a76b0SSuyog Pawar transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ 48*c83a76b0SSuyog Pawar quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ 49*c83a76b0SSuyog Pawar q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ; \ 50*c83a76b0SSuyog Pawar \ 51*c83a76b0SSuyog Pawar sign = (inp)<0 ? -1:1; \ 52*c83a76b0SSuyog Pawar \ 53*c83a76b0SSuyog Pawar tmp = (LWORD64)(abs(inp)); \ 54*c83a76b0SSuyog Pawar tmp = tmp * (quant_coeff); \ 55*c83a76b0SSuyog Pawar tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ 56*c83a76b0SSuyog Pawar tmp = tmp >> q_bits; \ 57*c83a76b0SSuyog Pawar \ 58*c83a76b0SSuyog Pawar tmp = tmp * sign; \ 59*c83a76b0SSuyog Pawar out = (WORD16) CLIP_S16(tmp); \ 60*c83a76b0SSuyog Pawar } \ 61*c83a76b0SSuyog Pawar 62*c83a76b0SSuyog Pawar #define QUANT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \ 63*c83a76b0SSuyog Pawar { \ 64*c83a76b0SSuyog Pawar LWORD64 tmp; \ 65*c83a76b0SSuyog Pawar WORD32 sign; \ 66*c83a76b0SSuyog Pawar WORD32 transform_shift; \ 67*c83a76b0SSuyog Pawar WORD32 q_bits, quant_multiplier; \ 68*c83a76b0SSuyog Pawar \ 69*c83a76b0SSuyog Pawar /* q_bits and q_add calculation*/ \ 70*c83a76b0SSuyog Pawar /* To be moved outside in neon. To be computer once per transform call */ \ 71*c83a76b0SSuyog Pawar \ 72*c83a76b0SSuyog Pawar transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ 73*c83a76b0SSuyog Pawar quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ 74*c83a76b0SSuyog Pawar q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ; \ 75*c83a76b0SSuyog Pawar \ 76*c83a76b0SSuyog Pawar sign = (inp)<0 ? -1:1; \ 77*c83a76b0SSuyog Pawar \ 78*c83a76b0SSuyog Pawar tmp = (LWORD64)(abs(inp)); \ 79*c83a76b0SSuyog Pawar tmp = tmp * (quant_coeff); \ 80*c83a76b0SSuyog Pawar tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ 81*c83a76b0SSuyog Pawar tmp = tmp >> q_bits; \ 82*c83a76b0SSuyog Pawar \ 83*c83a76b0SSuyog Pawar tmp = tmp * sign; \ 84*c83a76b0SSuyog Pawar out = (WORD16) CLIP_S16(tmp); \ 85*c83a76b0SSuyog Pawar } 86*c83a76b0SSuyog Pawar /* added by 100028 */ 87*c83a76b0SSuyog Pawar #define QUANT_NO_WEIGHTMAT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \ 88*c83a76b0SSuyog Pawar { \ 89*c83a76b0SSuyog Pawar WORD32 tmp; \ 90*c83a76b0SSuyog Pawar WORD32 sign; \ 91*c83a76b0SSuyog Pawar WORD32 bit_depth,transform_shift; \ 92*c83a76b0SSuyog Pawar WORD32 q_bits, quant_multiplier; \ 93*c83a76b0SSuyog Pawar \ 94*c83a76b0SSuyog Pawar /* q_bits and q_add calculation*/ \ 95*c83a76b0SSuyog Pawar /* To be moved outside in neon. To be computer once per transform call */ \ 96*c83a76b0SSuyog Pawar bit_depth = 8; \ 97*c83a76b0SSuyog Pawar transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ 98*c83a76b0SSuyog Pawar quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ 99*c83a76b0SSuyog Pawar q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \ 100*c83a76b0SSuyog Pawar \ 101*c83a76b0SSuyog Pawar sign = (inp)<0 ? -1:1; \ 102*c83a76b0SSuyog Pawar \ 103*c83a76b0SSuyog Pawar tmp = (WORD32)(abs(inp)); \ 104*c83a76b0SSuyog Pawar tmp = tmp * (quant_coeff); \ 105*c83a76b0SSuyog Pawar tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ 106*c83a76b0SSuyog Pawar tmp = tmp >> q_bits; \ 107*c83a76b0SSuyog Pawar \ 108*c83a76b0SSuyog Pawar tmp = tmp * sign; \ 109*c83a76b0SSuyog Pawar out = (WORD16) CLIP_S16(tmp); \ 110*c83a76b0SSuyog Pawar } 111*c83a76b0SSuyog Pawar 112*c83a76b0SSuyog Pawar #define QUANT_NO_WEIGHTMAT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \ 113*c83a76b0SSuyog Pawar { \ 114*c83a76b0SSuyog Pawar WORD32 tmp; \ 115*c83a76b0SSuyog Pawar WORD32 sign; \ 116*c83a76b0SSuyog Pawar WORD32 transform_shift; \ 117*c83a76b0SSuyog Pawar WORD32 q_bits, quant_multiplier; \ 118*c83a76b0SSuyog Pawar \ 119*c83a76b0SSuyog Pawar /* q_bits and q_add calculation*/ \ 120*c83a76b0SSuyog Pawar /* To be moved outside in neon. To be computer once per transform call */ \ 121*c83a76b0SSuyog Pawar \ 122*c83a76b0SSuyog Pawar transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ 123*c83a76b0SSuyog Pawar quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ 124*c83a76b0SSuyog Pawar q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \ 125*c83a76b0SSuyog Pawar \ 126*c83a76b0SSuyog Pawar sign = (inp)<0 ? -1:1; \ 127*c83a76b0SSuyog Pawar \ 128*c83a76b0SSuyog Pawar tmp = (WORD32)(abs(inp)); \ 129*c83a76b0SSuyog Pawar tmp = tmp * (quant_coeff); \ 130*c83a76b0SSuyog Pawar tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ 131*c83a76b0SSuyog Pawar tmp = tmp >> q_bits; \ 132*c83a76b0SSuyog Pawar \ 133*c83a76b0SSuyog Pawar tmp = tmp * sign; \ 134*c83a76b0SSuyog Pawar out = (WORD16) CLIP_S16(tmp); \ 135*c83a76b0SSuyog Pawar } 136*c83a76b0SSuyog Pawar /* Reference Inverse Quantization: "pi2_src"(Coefficients) will be clipped to 15 or 14 bits when (qp_div > shift_iq). Spec doesn't have any clip mentioned */ 137*c83a76b0SSuyog Pawar 138*c83a76b0SSuyog Pawar /* Inverse quantization other than 4x4 */ 139*c83a76b0SSuyog Pawar /* No clipping is needed for "pi2_src"(coefficients) */ 140*c83a76b0SSuyog Pawar #define IQUANT(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div) \ 141*c83a76b0SSuyog Pawar { \ 142*c83a76b0SSuyog Pawar WORD32 tmp, add_iq; \ 143*c83a76b0SSuyog Pawar \ 144*c83a76b0SSuyog Pawar add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1)); /* To be moved outside in neon. To be computed once per transform call */ \ 145*c83a76b0SSuyog Pawar \ 146*c83a76b0SSuyog Pawar tmp = coeff * dequant_coeff ; \ 147*c83a76b0SSuyog Pawar tmp = tmp + add_iq; \ 148*c83a76b0SSuyog Pawar tmp = SHR_NEG(tmp,(shift_iq - qp_div)); \ 149*c83a76b0SSuyog Pawar \ 150*c83a76b0SSuyog Pawar res = CLIP_S16(tmp); \ 151*c83a76b0SSuyog Pawar } 152*c83a76b0SSuyog Pawar 153*c83a76b0SSuyog Pawar /* 4x4 inverse quantization */ 154*c83a76b0SSuyog Pawar /* Options : */ 155*c83a76b0SSuyog Pawar /* 1. Clip "pi2_src"(coefficients) to 10 bits if "(qp_div >= shift_iq)" or 16 bits if "(qp_div < shift_iq)"*/ 156*c83a76b0SSuyog Pawar /* 2. Increasing precision of "pi2_src"(coefficients) to 64 bits */ 157*c83a76b0SSuyog Pawar 158*c83a76b0SSuyog Pawar #define IQUANT_4x4(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div) \ 159*c83a76b0SSuyog Pawar { \ 160*c83a76b0SSuyog Pawar WORD32 clip_coeff, tmp; \ 161*c83a76b0SSuyog Pawar WORD32 coeff_min,coeff_max; \ 162*c83a76b0SSuyog Pawar WORD32 coeff_bit_range; \ 163*c83a76b0SSuyog Pawar WORD32 add_iq; \ 164*c83a76b0SSuyog Pawar add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1)); /* To be moved outside in neon. To be computed once per transform call */ \ 165*c83a76b0SSuyog Pawar \ 166*c83a76b0SSuyog Pawar coeff_bit_range = 16; \ 167*c83a76b0SSuyog Pawar if(qp_div > shift_iq) \ 168*c83a76b0SSuyog Pawar coeff_bit_range = 10; \ 169*c83a76b0SSuyog Pawar \ 170*c83a76b0SSuyog Pawar coeff_min = -(1<<(coeff_bit_range-1)); \ 171*c83a76b0SSuyog Pawar coeff_max = (1<<(coeff_bit_range-1)) - 1; \ 172*c83a76b0SSuyog Pawar \ 173*c83a76b0SSuyog Pawar clip_coeff = CLIP3(coeff,coeff_min,coeff_max); \ 174*c83a76b0SSuyog Pawar \ 175*c83a76b0SSuyog Pawar tmp = clip_coeff * dequant_coeff ; \ 176*c83a76b0SSuyog Pawar tmp = tmp + add_iq; \ 177*c83a76b0SSuyog Pawar tmp = SHR_NEG(tmp,(shift_iq - qp_div)); \ 178*c83a76b0SSuyog Pawar \ 179*c83a76b0SSuyog Pawar res = CLIP_S16(tmp); \ 180*c83a76b0SSuyog Pawar } 181*c83a76b0SSuyog Pawar 182*c83a76b0SSuyog Pawar #endif /* IHEVC_TRANS_MACROS_H_ */ 183