xref: /aosp_15_r20/external/libhevc/common/ihevc_trans_macros.h (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar ******************************************************************************/
18*c83a76b0SSuyog Pawar /**
19*c83a76b0SSuyog Pawar *******************************************************************************
20*c83a76b0SSuyog Pawar * @file
21*c83a76b0SSuyog Pawar *  ihevc_trans_macros.h
22*c83a76b0SSuyog Pawar *
23*c83a76b0SSuyog Pawar * @brief
24*c83a76b0SSuyog Pawar *  Macros used in the forward transform and inverse transform functions
25*c83a76b0SSuyog Pawar *
26*c83a76b0SSuyog Pawar * @author
27*c83a76b0SSuyog Pawar *  Ittiam
28*c83a76b0SSuyog Pawar *
29*c83a76b0SSuyog Pawar * @remarks
30*c83a76b0SSuyog Pawar *  None
31*c83a76b0SSuyog Pawar *
32*c83a76b0SSuyog Pawar *******************************************************************************
33*c83a76b0SSuyog Pawar */
34*c83a76b0SSuyog Pawar #ifndef IHEVC_TRANS_MACROS_H_
35*c83a76b0SSuyog Pawar #define IHEVC_TRANS_MACROS_H_
36*c83a76b0SSuyog Pawar 
37*c83a76b0SSuyog Pawar #define QUANT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
38*c83a76b0SSuyog Pawar {                                                                                                                                                                \
39*c83a76b0SSuyog Pawar     LWORD64 tmp;                                                                                                                                                  \
40*c83a76b0SSuyog Pawar     WORD32 sign;                                                                                                                                                 \
41*c83a76b0SSuyog Pawar     WORD32 bit_depth,transform_shift;                                                                                                                            \
42*c83a76b0SSuyog Pawar     WORD32  q_bits, quant_multiplier;                                                                                                                            \
43*c83a76b0SSuyog Pawar                                                                                                                                                                  \
44*c83a76b0SSuyog Pawar     /* q_bits and q_add calculation*/                                                                                                                            \
45*c83a76b0SSuyog Pawar     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
46*c83a76b0SSuyog Pawar     bit_depth = 8;                                                                                                                                               \
47*c83a76b0SSuyog Pawar     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
48*c83a76b0SSuyog Pawar     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
49*c83a76b0SSuyog Pawar     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ;                                                                       \
50*c83a76b0SSuyog Pawar                                                                                                                                                                  \
51*c83a76b0SSuyog Pawar     sign = (inp)<0 ? -1:1;                                                                                                                                       \
52*c83a76b0SSuyog Pawar                                                                                                                                                                  \
53*c83a76b0SSuyog Pawar     tmp = (LWORD64)(abs(inp));                                                                                                                                    \
54*c83a76b0SSuyog Pawar     tmp = tmp * (quant_coeff);                                                                                                                                   \
55*c83a76b0SSuyog Pawar     tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
56*c83a76b0SSuyog Pawar     tmp = tmp >> q_bits;                                                                                                                                         \
57*c83a76b0SSuyog Pawar                                                                                                                                                                  \
58*c83a76b0SSuyog Pawar     tmp = tmp * sign;                                                                                                                                            \
59*c83a76b0SSuyog Pawar     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
60*c83a76b0SSuyog Pawar }                                                                                                                                                                \
61*c83a76b0SSuyog Pawar 
62*c83a76b0SSuyog Pawar #define QUANT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
63*c83a76b0SSuyog Pawar {                                                                                                                                                                \
64*c83a76b0SSuyog Pawar     LWORD64 tmp;                                                                                                                                                  \
65*c83a76b0SSuyog Pawar     WORD32 sign;                                                                                                                                                 \
66*c83a76b0SSuyog Pawar     WORD32 transform_shift;                                                                                                                                      \
67*c83a76b0SSuyog Pawar     WORD32  q_bits, quant_multiplier;                                                                                                                            \
68*c83a76b0SSuyog Pawar                                                                                                                                                                  \
69*c83a76b0SSuyog Pawar     /* q_bits and q_add calculation*/                                                                                                                            \
70*c83a76b0SSuyog Pawar     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
71*c83a76b0SSuyog Pawar                                                                                                                                                                  \
72*c83a76b0SSuyog Pawar     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
73*c83a76b0SSuyog Pawar     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
74*c83a76b0SSuyog Pawar     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ;                                                                       \
75*c83a76b0SSuyog Pawar                                                                                                                                                                  \
76*c83a76b0SSuyog Pawar     sign = (inp)<0 ? -1:1;                                                                                                                                       \
77*c83a76b0SSuyog Pawar                                                                                                                                                                  \
78*c83a76b0SSuyog Pawar     tmp = (LWORD64)(abs(inp));                                                                                                                                    \
79*c83a76b0SSuyog Pawar     tmp = tmp * (quant_coeff);                                                                                                                                   \
80*c83a76b0SSuyog Pawar     tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
81*c83a76b0SSuyog Pawar     tmp = tmp >> q_bits;                                                                                                                                         \
82*c83a76b0SSuyog Pawar                                                                                                                                                                  \
83*c83a76b0SSuyog Pawar     tmp = tmp * sign;                                                                                                                                            \
84*c83a76b0SSuyog Pawar     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
85*c83a76b0SSuyog Pawar }
86*c83a76b0SSuyog Pawar /* added by 100028 */
87*c83a76b0SSuyog Pawar #define QUANT_NO_WEIGHTMAT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
88*c83a76b0SSuyog Pawar {                                                                                                                                                                \
89*c83a76b0SSuyog Pawar     WORD32 tmp;                                                                                                                                                  \
90*c83a76b0SSuyog Pawar     WORD32 sign;                                                                                                                                                 \
91*c83a76b0SSuyog Pawar     WORD32 bit_depth,transform_shift;                                                                                                                            \
92*c83a76b0SSuyog Pawar     WORD32  q_bits, quant_multiplier;                                                                                                                            \
93*c83a76b0SSuyog Pawar                                                                                                                                                                  \
94*c83a76b0SSuyog Pawar     /* q_bits and q_add calculation*/                                                                                                                            \
95*c83a76b0SSuyog Pawar     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
96*c83a76b0SSuyog Pawar     bit_depth = 8;                                                                                                                                               \
97*c83a76b0SSuyog Pawar     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
98*c83a76b0SSuyog Pawar     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
99*c83a76b0SSuyog Pawar     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */;                                                                       \
100*c83a76b0SSuyog Pawar                                                                                                                                                                  \
101*c83a76b0SSuyog Pawar     sign = (inp)<0 ? -1:1;                                                                                                                                       \
102*c83a76b0SSuyog Pawar                                                                                                                                                                  \
103*c83a76b0SSuyog Pawar     tmp = (WORD32)(abs(inp));                                                                                                                                    \
104*c83a76b0SSuyog Pawar     tmp = tmp * (quant_coeff);                                                                                                                                   \
105*c83a76b0SSuyog Pawar     tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
106*c83a76b0SSuyog Pawar     tmp = tmp >> q_bits;                                                                                                                                         \
107*c83a76b0SSuyog Pawar                                                                                                                                                                  \
108*c83a76b0SSuyog Pawar     tmp = tmp * sign;                                                                                                                                            \
109*c83a76b0SSuyog Pawar     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
110*c83a76b0SSuyog Pawar }
111*c83a76b0SSuyog Pawar 
112*c83a76b0SSuyog Pawar #define QUANT_NO_WEIGHTMAT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
113*c83a76b0SSuyog Pawar {                                                                                                                                                                \
114*c83a76b0SSuyog Pawar     WORD32 tmp;                                                                                                                                                  \
115*c83a76b0SSuyog Pawar     WORD32 sign;                                                                                                                                                 \
116*c83a76b0SSuyog Pawar     WORD32 transform_shift;                                                                                                                                      \
117*c83a76b0SSuyog Pawar     WORD32  q_bits, quant_multiplier;                                                                                                                            \
118*c83a76b0SSuyog Pawar                                                                                                                                                                  \
119*c83a76b0SSuyog Pawar     /* q_bits and q_add calculation*/                                                                                                                            \
120*c83a76b0SSuyog Pawar     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
121*c83a76b0SSuyog Pawar                                                                                                                                                                  \
122*c83a76b0SSuyog Pawar     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
123*c83a76b0SSuyog Pawar     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
124*c83a76b0SSuyog Pawar     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */;                                                                       \
125*c83a76b0SSuyog Pawar                                                                                                                                                                  \
126*c83a76b0SSuyog Pawar     sign = (inp)<0 ? -1:1;                                                                                                                                       \
127*c83a76b0SSuyog Pawar                                                                                                                                                                  \
128*c83a76b0SSuyog Pawar     tmp = (WORD32)(abs(inp));                                                                                                                                    \
129*c83a76b0SSuyog Pawar     tmp = tmp * (quant_coeff);                                                                                                                                   \
130*c83a76b0SSuyog Pawar     tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
131*c83a76b0SSuyog Pawar     tmp = tmp >> q_bits;                                                                                                                                         \
132*c83a76b0SSuyog Pawar                                                                                                                                                                  \
133*c83a76b0SSuyog Pawar     tmp = tmp * sign;                                                                                                                                            \
134*c83a76b0SSuyog Pawar     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
135*c83a76b0SSuyog Pawar }
136*c83a76b0SSuyog Pawar /* Reference Inverse Quantization: "pi2_src"(Coefficients) will be clipped to 15 or 14 bits when (qp_div > shift_iq). Spec doesn't have any clip mentioned  */
137*c83a76b0SSuyog Pawar 
138*c83a76b0SSuyog Pawar /* Inverse quantization other than 4x4 */
139*c83a76b0SSuyog Pawar /* No clipping is needed for "pi2_src"(coefficients) */
140*c83a76b0SSuyog Pawar #define IQUANT(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div)       \
141*c83a76b0SSuyog Pawar {                                                                                                                                              \
142*c83a76b0SSuyog Pawar     WORD32 tmp, add_iq;                                                                                                                        \
143*c83a76b0SSuyog Pawar                                                                                                                                                \
144*c83a76b0SSuyog Pawar     add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1));  /* To be moved outside in neon. To be computed once per transform call */                  \
145*c83a76b0SSuyog Pawar                                                                                                                                                \
146*c83a76b0SSuyog Pawar     tmp = coeff * dequant_coeff ;                                                                                                              \
147*c83a76b0SSuyog Pawar     tmp = tmp + add_iq;                                                                                                                        \
148*c83a76b0SSuyog Pawar     tmp = SHR_NEG(tmp,(shift_iq - qp_div));                                                                                                    \
149*c83a76b0SSuyog Pawar                                                                                                                                                \
150*c83a76b0SSuyog Pawar     res = CLIP_S16(tmp);                                                                                                                       \
151*c83a76b0SSuyog Pawar }
152*c83a76b0SSuyog Pawar 
153*c83a76b0SSuyog Pawar /* 4x4 inverse quantization */
154*c83a76b0SSuyog Pawar /* Options : */
155*c83a76b0SSuyog Pawar /* 1. Clip "pi2_src"(coefficients) to 10 bits if "(qp_div >= shift_iq)" or 16 bits if "(qp_div < shift_iq)"*/
156*c83a76b0SSuyog Pawar /* 2. Increasing precision of "pi2_src"(coefficients) to 64 bits */
157*c83a76b0SSuyog Pawar 
158*c83a76b0SSuyog Pawar #define IQUANT_4x4(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div)   \
159*c83a76b0SSuyog Pawar {                                                                                                                                              \
160*c83a76b0SSuyog Pawar     WORD32 clip_coeff, tmp;                                                                                                                    \
161*c83a76b0SSuyog Pawar     WORD32 coeff_min,coeff_max;                                                                                                                \
162*c83a76b0SSuyog Pawar     WORD32 coeff_bit_range;                                                                                                                    \
163*c83a76b0SSuyog Pawar     WORD32 add_iq;                                                                                                                             \
164*c83a76b0SSuyog Pawar     add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1));  /* To be moved outside in neon. To be computed once per transform call */                  \
165*c83a76b0SSuyog Pawar                                                                                                                                                \
166*c83a76b0SSuyog Pawar     coeff_bit_range = 16;                                                                                                                      \
167*c83a76b0SSuyog Pawar     if(qp_div > shift_iq)                                                                                                                      \
168*c83a76b0SSuyog Pawar         coeff_bit_range = 10;                                                                                                                  \
169*c83a76b0SSuyog Pawar                                                                                                                                                \
170*c83a76b0SSuyog Pawar     coeff_min = -(1<<(coeff_bit_range-1));                                                                                                     \
171*c83a76b0SSuyog Pawar     coeff_max = (1<<(coeff_bit_range-1)) - 1;                                                                                                  \
172*c83a76b0SSuyog Pawar                                                                                                                                                \
173*c83a76b0SSuyog Pawar     clip_coeff = CLIP3(coeff,coeff_min,coeff_max);                                                                                             \
174*c83a76b0SSuyog Pawar                                                                                                                                                \
175*c83a76b0SSuyog Pawar     tmp = clip_coeff * dequant_coeff ;                                                                                                         \
176*c83a76b0SSuyog Pawar     tmp = tmp + add_iq;                                                                                                                        \
177*c83a76b0SSuyog Pawar     tmp = SHR_NEG(tmp,(shift_iq - qp_div));                                                                                                    \
178*c83a76b0SSuyog Pawar                                                                                                                                                \
179*c83a76b0SSuyog Pawar     res = CLIP_S16(tmp);                                                                                                                       \
180*c83a76b0SSuyog Pawar }
181*c83a76b0SSuyog Pawar 
182*c83a76b0SSuyog Pawar #endif /* IHEVC_TRANS_MACROS_H_ */
183