1*09537850SAkhilesh Sanikop // Copyright 2019 The libgav1 Authors
2*09537850SAkhilesh Sanikop //
3*09537850SAkhilesh Sanikop // Licensed under the Apache License, Version 2.0 (the "License");
4*09537850SAkhilesh Sanikop // you may not use this file except in compliance with the License.
5*09537850SAkhilesh Sanikop // You may obtain a copy of the License at
6*09537850SAkhilesh Sanikop //
7*09537850SAkhilesh Sanikop // http://www.apache.org/licenses/LICENSE-2.0
8*09537850SAkhilesh Sanikop //
9*09537850SAkhilesh Sanikop // Unless required by applicable law or agreed to in writing, software
10*09537850SAkhilesh Sanikop // distributed under the License is distributed on an "AS IS" BASIS,
11*09537850SAkhilesh Sanikop // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*09537850SAkhilesh Sanikop // See the License for the specific language governing permissions and
13*09537850SAkhilesh Sanikop // limitations under the License.
14*09537850SAkhilesh Sanikop
15*09537850SAkhilesh Sanikop #include "src/dsp/inverse_transform.h"
16*09537850SAkhilesh Sanikop
17*09537850SAkhilesh Sanikop #include <algorithm>
18*09537850SAkhilesh Sanikop #include <cassert>
19*09537850SAkhilesh Sanikop #include <cstdint>
20*09537850SAkhilesh Sanikop #include <cstring>
21*09537850SAkhilesh Sanikop #include <type_traits>
22*09537850SAkhilesh Sanikop
23*09537850SAkhilesh Sanikop #include "src/dsp/dsp.h"
24*09537850SAkhilesh Sanikop #include "src/utils/array_2d.h"
25*09537850SAkhilesh Sanikop #include "src/utils/common.h"
26*09537850SAkhilesh Sanikop #include "src/utils/compiler_attributes.h"
27*09537850SAkhilesh Sanikop #include "src/utils/logging.h"
28*09537850SAkhilesh Sanikop
29*09537850SAkhilesh Sanikop #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
30*09537850SAkhilesh Sanikop #undef LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
31*09537850SAkhilesh Sanikop #endif
32*09537850SAkhilesh Sanikop
33*09537850SAkhilesh Sanikop #if defined(LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK) && \
34*09537850SAkhilesh Sanikop LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
35*09537850SAkhilesh Sanikop #include <cinttypes>
36*09537850SAkhilesh Sanikop #endif
37*09537850SAkhilesh Sanikop
38*09537850SAkhilesh Sanikop namespace libgav1 {
39*09537850SAkhilesh Sanikop namespace dsp {
40*09537850SAkhilesh Sanikop namespace {
41*09537850SAkhilesh Sanikop
42*09537850SAkhilesh Sanikop // Include the constants and utility functions inside the anonymous namespace.
43*09537850SAkhilesh Sanikop #include "src/dsp/inverse_transform.inc"
44*09537850SAkhilesh Sanikop
45*09537850SAkhilesh Sanikop constexpr uint8_t kTransformColumnShift = 4;
46*09537850SAkhilesh Sanikop
47*09537850SAkhilesh Sanikop template <typename T>
RangeCheckValue(T value,int8_t range)48*09537850SAkhilesh Sanikop int32_t RangeCheckValue(T value, int8_t range) {
49*09537850SAkhilesh Sanikop #if defined(LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK) && \
50*09537850SAkhilesh Sanikop LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
51*09537850SAkhilesh Sanikop static_assert(
52*09537850SAkhilesh Sanikop std::is_same<T, int32_t>::value || std::is_same<T, std::int64_t>::value,
53*09537850SAkhilesh Sanikop "");
54*09537850SAkhilesh Sanikop assert(range <= 32);
55*09537850SAkhilesh Sanikop const auto min = static_cast<int32_t>(-(uint32_t{1} << (range - 1)));
56*09537850SAkhilesh Sanikop const auto max = static_cast<int32_t>((uint32_t{1} << (range - 1)) - 1);
57*09537850SAkhilesh Sanikop if (min > value || value > max) {
58*09537850SAkhilesh Sanikop LIBGAV1_DLOG(ERROR,
59*09537850SAkhilesh Sanikop "coeff out of bit range, value: %" PRId64 " bit range %d",
60*09537850SAkhilesh Sanikop static_cast<int64_t>(value), range);
61*09537850SAkhilesh Sanikop assert(min <= value && value <= max);
62*09537850SAkhilesh Sanikop }
63*09537850SAkhilesh Sanikop #endif // LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
64*09537850SAkhilesh Sanikop static_cast<void>(range);
65*09537850SAkhilesh Sanikop return static_cast<int32_t>(value);
66*09537850SAkhilesh Sanikop }
67*09537850SAkhilesh Sanikop
68*09537850SAkhilesh Sanikop template <typename Residual>
ButterflyRotation_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)69*09537850SAkhilesh Sanikop LIBGAV1_ALWAYS_INLINE void ButterflyRotation_C(Residual* const dst, int a,
70*09537850SAkhilesh Sanikop int b, int angle, bool flip,
71*09537850SAkhilesh Sanikop int8_t range) {
72*09537850SAkhilesh Sanikop // Note that we multiply in 32 bits and then add/subtract the products in 64
73*09537850SAkhilesh Sanikop // bits. The 32-bit multiplications do not overflow. Please see the comment
74*09537850SAkhilesh Sanikop // and assert() in Cos128().
75*09537850SAkhilesh Sanikop const int64_t x = static_cast<int64_t>(dst[a] * Cos128(angle)) -
76*09537850SAkhilesh Sanikop static_cast<int64_t>(dst[b] * Sin128(angle));
77*09537850SAkhilesh Sanikop const int64_t y = static_cast<int64_t>(dst[a] * Sin128(angle)) +
78*09537850SAkhilesh Sanikop static_cast<int64_t>(dst[b] * Cos128(angle));
79*09537850SAkhilesh Sanikop // Section 7.13.2.1: It is a requirement of bitstream conformance that the
80*09537850SAkhilesh Sanikop // values saved into the array T by this function are representable by a
81*09537850SAkhilesh Sanikop // signed integer using |range| bits of precision.
82*09537850SAkhilesh Sanikop dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
83*09537850SAkhilesh Sanikop dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
84*09537850SAkhilesh Sanikop }
85*09537850SAkhilesh Sanikop
86*09537850SAkhilesh Sanikop template <typename Residual>
ButterflyRotationFirstIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)87*09537850SAkhilesh Sanikop void ButterflyRotationFirstIsZero_C(Residual* const dst, int a, int b,
88*09537850SAkhilesh Sanikop int angle, bool flip, int8_t range) {
89*09537850SAkhilesh Sanikop // Note that we multiply in 32 bits and then add/subtract the products in 64
90*09537850SAkhilesh Sanikop // bits. The 32-bit multiplications do not overflow. Please see the comment
91*09537850SAkhilesh Sanikop // and assert() in Cos128().
92*09537850SAkhilesh Sanikop const auto x = static_cast<int64_t>(dst[b] * -Sin128(angle));
93*09537850SAkhilesh Sanikop const auto y = static_cast<int64_t>(dst[b] * Cos128(angle));
94*09537850SAkhilesh Sanikop // Section 7.13.2.1: It is a requirement of bitstream conformance that the
95*09537850SAkhilesh Sanikop // values saved into the array T by this function are representable by a
96*09537850SAkhilesh Sanikop // signed integer using |range| bits of precision.
97*09537850SAkhilesh Sanikop dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
98*09537850SAkhilesh Sanikop dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
99*09537850SAkhilesh Sanikop }
100*09537850SAkhilesh Sanikop
101*09537850SAkhilesh Sanikop template <typename Residual>
ButterflyRotationSecondIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)102*09537850SAkhilesh Sanikop void ButterflyRotationSecondIsZero_C(Residual* const dst, int a, int b,
103*09537850SAkhilesh Sanikop int angle, bool flip, int8_t range) {
104*09537850SAkhilesh Sanikop // Note that we multiply in 32 bits and then add/subtract the products in 64
105*09537850SAkhilesh Sanikop // bits. The 32-bit multiplications do not overflow. Please see the comment
106*09537850SAkhilesh Sanikop // and assert() in Cos128().
107*09537850SAkhilesh Sanikop const auto x = static_cast<int64_t>(dst[a] * Cos128(angle));
108*09537850SAkhilesh Sanikop const auto y = static_cast<int64_t>(dst[a] * Sin128(angle));
109*09537850SAkhilesh Sanikop
110*09537850SAkhilesh Sanikop // Section 7.13.2.1: It is a requirement of bitstream conformance that the
111*09537850SAkhilesh Sanikop // values saved into the array T by this function are representable by a
112*09537850SAkhilesh Sanikop // signed integer using |range| bits of precision.
113*09537850SAkhilesh Sanikop dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
114*09537850SAkhilesh Sanikop dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
115*09537850SAkhilesh Sanikop }
116*09537850SAkhilesh Sanikop
117*09537850SAkhilesh Sanikop template <typename Residual>
HadamardRotation_C(Residual * const dst,int a,int b,bool flip,int8_t range)118*09537850SAkhilesh Sanikop void HadamardRotation_C(Residual* const dst, int a, int b, bool flip,
119*09537850SAkhilesh Sanikop int8_t range) {
120*09537850SAkhilesh Sanikop if (flip) std::swap(a, b);
121*09537850SAkhilesh Sanikop --range;
122*09537850SAkhilesh Sanikop // For Adst and Dct, the maximum possible value for range is 20. So min and
123*09537850SAkhilesh Sanikop // max should always fit into int32_t.
124*09537850SAkhilesh Sanikop const int32_t min = -(1 << range);
125*09537850SAkhilesh Sanikop const int32_t max = (1 << range) - 1;
126*09537850SAkhilesh Sanikop const int32_t x = dst[a] + dst[b];
127*09537850SAkhilesh Sanikop const int32_t y = dst[a] - dst[b];
128*09537850SAkhilesh Sanikop dst[a] = Clip3(x, min, max);
129*09537850SAkhilesh Sanikop dst[b] = Clip3(y, min, max);
130*09537850SAkhilesh Sanikop }
131*09537850SAkhilesh Sanikop
132*09537850SAkhilesh Sanikop template <int bitdepth, typename Residual>
ClampIntermediate(Residual * const dst,int size)133*09537850SAkhilesh Sanikop void ClampIntermediate(Residual* const dst, int size) {
134*09537850SAkhilesh Sanikop // If Residual is int16_t (which implies bitdepth is 8), we don't need to
135*09537850SAkhilesh Sanikop // clip residual[i][j] to 16 bits.
136*09537850SAkhilesh Sanikop if (sizeof(Residual) > 2) {
137*09537850SAkhilesh Sanikop const Residual intermediate_clamp_max =
138*09537850SAkhilesh Sanikop (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
139*09537850SAkhilesh Sanikop const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
140*09537850SAkhilesh Sanikop for (int j = 0; j < size; ++j) {
141*09537850SAkhilesh Sanikop dst[j] = Clip3(dst[j], intermediate_clamp_min, intermediate_clamp_max);
142*09537850SAkhilesh Sanikop }
143*09537850SAkhilesh Sanikop }
144*09537850SAkhilesh Sanikop }
145*09537850SAkhilesh Sanikop
146*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
147*09537850SAkhilesh Sanikop // Discrete Cosine Transforms (DCT).
148*09537850SAkhilesh Sanikop
149*09537850SAkhilesh Sanikop // Value for index (i, j) is computed as bitreverse(j) and interpreting that as
150*09537850SAkhilesh Sanikop // an integer with bit-length i + 2.
151*09537850SAkhilesh Sanikop // For e.g. index (2, 3) will be computed as follows:
152*09537850SAkhilesh Sanikop // * bitreverse(3) = bitreverse(..000011) = 110000...
153*09537850SAkhilesh Sanikop // * interpreting that as an integer with bit-length 2+2 = 4 will be 1100 = 12
154*09537850SAkhilesh Sanikop constexpr uint8_t kBitReverseLookup[kNumTransform1dSizes][64] = {
155*09537850SAkhilesh Sanikop {0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2,
156*09537850SAkhilesh Sanikop 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3,
157*09537850SAkhilesh Sanikop 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3},
158*09537850SAkhilesh Sanikop {0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5,
159*09537850SAkhilesh Sanikop 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6,
160*09537850SAkhilesh Sanikop 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7},
161*09537850SAkhilesh Sanikop {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
162*09537850SAkhilesh Sanikop 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
163*09537850SAkhilesh Sanikop 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
164*09537850SAkhilesh Sanikop 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15},
165*09537850SAkhilesh Sanikop {0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
166*09537850SAkhilesh Sanikop 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31,
167*09537850SAkhilesh Sanikop 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
168*09537850SAkhilesh Sanikop 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31},
169*09537850SAkhilesh Sanikop {0, 32, 16, 48, 8, 40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
170*09537850SAkhilesh Sanikop 2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
171*09537850SAkhilesh Sanikop 1, 33, 17, 49, 9, 41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
172*09537850SAkhilesh Sanikop 3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63}};
173*09537850SAkhilesh Sanikop
174*09537850SAkhilesh Sanikop template <typename Residual, int size_log2>
Dct_C(void * dest,int8_t range)175*09537850SAkhilesh Sanikop void Dct_C(void* dest, int8_t range) {
176*09537850SAkhilesh Sanikop static_assert(size_log2 >= 2 && size_log2 <= 6, "");
177*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
178*09537850SAkhilesh Sanikop // stage 1.
179*09537850SAkhilesh Sanikop const int size = 1 << size_log2;
180*09537850SAkhilesh Sanikop Residual temp[size];
181*09537850SAkhilesh Sanikop memcpy(temp, dst, sizeof(temp));
182*09537850SAkhilesh Sanikop for (int i = 0; i < size; ++i) {
183*09537850SAkhilesh Sanikop dst[i] = temp[kBitReverseLookup[size_log2 - 2][i]];
184*09537850SAkhilesh Sanikop }
185*09537850SAkhilesh Sanikop // stages 2-32 are dependent on the value of size_log2.
186*09537850SAkhilesh Sanikop // stage 2.
187*09537850SAkhilesh Sanikop if (size_log2 == 6) {
188*09537850SAkhilesh Sanikop for (int i = 0; i < 16; ++i) {
189*09537850SAkhilesh Sanikop ButterflyRotation_C(dst, i + 32, 63 - i,
190*09537850SAkhilesh Sanikop 63 - MultiplyBy4(kBitReverseLookup[2][i]), false,
191*09537850SAkhilesh Sanikop range);
192*09537850SAkhilesh Sanikop }
193*09537850SAkhilesh Sanikop }
194*09537850SAkhilesh Sanikop // stage 3
195*09537850SAkhilesh Sanikop if (size_log2 >= 5) {
196*09537850SAkhilesh Sanikop for (int i = 0; i < 8; ++i) {
197*09537850SAkhilesh Sanikop ButterflyRotation_C(dst, i + 16, 31 - i,
198*09537850SAkhilesh Sanikop 6 + MultiplyBy8(kBitReverseLookup[1][7 - i]), false,
199*09537850SAkhilesh Sanikop range);
200*09537850SAkhilesh Sanikop }
201*09537850SAkhilesh Sanikop }
202*09537850SAkhilesh Sanikop // stage 4.
203*09537850SAkhilesh Sanikop if (size_log2 == 6) {
204*09537850SAkhilesh Sanikop for (int i = 0; i < 16; ++i) {
205*09537850SAkhilesh Sanikop HadamardRotation_C(dst, MultiplyBy2(i) + 32, MultiplyBy2(i) + 33,
206*09537850SAkhilesh Sanikop static_cast<bool>(i & 1), range);
207*09537850SAkhilesh Sanikop }
208*09537850SAkhilesh Sanikop }
209*09537850SAkhilesh Sanikop // stage 5.
210*09537850SAkhilesh Sanikop if (size_log2 >= 4) {
211*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
212*09537850SAkhilesh Sanikop ButterflyRotation_C(dst, i + 8, 15 - i,
213*09537850SAkhilesh Sanikop 12 + MultiplyBy16(kBitReverseLookup[0][3 - i]), false,
214*09537850SAkhilesh Sanikop range);
215*09537850SAkhilesh Sanikop }
216*09537850SAkhilesh Sanikop }
217*09537850SAkhilesh Sanikop // stage 6.
218*09537850SAkhilesh Sanikop if (size_log2 >= 5) {
219*09537850SAkhilesh Sanikop for (int i = 0; i < 8; ++i) {
220*09537850SAkhilesh Sanikop HadamardRotation_C(dst, MultiplyBy2(i) + 16, MultiplyBy2(i) + 17,
221*09537850SAkhilesh Sanikop static_cast<bool>(i & 1), range);
222*09537850SAkhilesh Sanikop }
223*09537850SAkhilesh Sanikop }
224*09537850SAkhilesh Sanikop // stage 7.
225*09537850SAkhilesh Sanikop if (size_log2 == 6) {
226*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
227*09537850SAkhilesh Sanikop for (int j = 0; j < 2; ++j) {
228*09537850SAkhilesh Sanikop ButterflyRotation_C(
229*09537850SAkhilesh Sanikop dst, 62 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 33,
230*09537850SAkhilesh Sanikop 60 - MultiplyBy16(kBitReverseLookup[0][i]) + MultiplyBy64(j), true,
231*09537850SAkhilesh Sanikop range);
232*09537850SAkhilesh Sanikop }
233*09537850SAkhilesh Sanikop }
234*09537850SAkhilesh Sanikop }
235*09537850SAkhilesh Sanikop // stage 8.
236*09537850SAkhilesh Sanikop if (size_log2 >= 3) {
237*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
238*09537850SAkhilesh Sanikop ButterflyRotation_C(dst, i + 4, 7 - i, 56 - 32 * i, false, range);
239*09537850SAkhilesh Sanikop }
240*09537850SAkhilesh Sanikop }
241*09537850SAkhilesh Sanikop // stage 9.
242*09537850SAkhilesh Sanikop if (size_log2 >= 4) {
243*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
244*09537850SAkhilesh Sanikop HadamardRotation_C(dst, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
245*09537850SAkhilesh Sanikop static_cast<bool>(i & 1), range);
246*09537850SAkhilesh Sanikop }
247*09537850SAkhilesh Sanikop }
248*09537850SAkhilesh Sanikop // stage 10.
249*09537850SAkhilesh Sanikop if (size_log2 >= 5) {
250*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
251*09537850SAkhilesh Sanikop for (int j = 0; j < 2; ++j) {
252*09537850SAkhilesh Sanikop ButterflyRotation_C(
253*09537850SAkhilesh Sanikop dst, 30 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 17,
254*09537850SAkhilesh Sanikop 24 + MultiplyBy64(j) + MultiplyBy32(1 - i), true, range);
255*09537850SAkhilesh Sanikop }
256*09537850SAkhilesh Sanikop }
257*09537850SAkhilesh Sanikop }
258*09537850SAkhilesh Sanikop // stage 11.
259*09537850SAkhilesh Sanikop if (size_log2 == 6) {
260*09537850SAkhilesh Sanikop for (int i = 0; i < 8; ++i) {
261*09537850SAkhilesh Sanikop for (int j = 0; j < 2; ++j) {
262*09537850SAkhilesh Sanikop HadamardRotation_C(dst, MultiplyBy4(i) + j + 32,
263*09537850SAkhilesh Sanikop MultiplyBy4(i) - j + 35, static_cast<bool>(i & 1),
264*09537850SAkhilesh Sanikop range);
265*09537850SAkhilesh Sanikop }
266*09537850SAkhilesh Sanikop }
267*09537850SAkhilesh Sanikop }
268*09537850SAkhilesh Sanikop // stage 12.
269*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
270*09537850SAkhilesh Sanikop ButterflyRotation_C(dst, MultiplyBy2(i), MultiplyBy2(i) + 1, 32 + 16 * i,
271*09537850SAkhilesh Sanikop i == 0, range);
272*09537850SAkhilesh Sanikop }
273*09537850SAkhilesh Sanikop // stage 13.
274*09537850SAkhilesh Sanikop if (size_log2 >= 3) {
275*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
276*09537850SAkhilesh Sanikop HadamardRotation_C(dst, MultiplyBy2(i) + 4, MultiplyBy2(i) + 5,
277*09537850SAkhilesh Sanikop /*flip=*/i != 0, range);
278*09537850SAkhilesh Sanikop }
279*09537850SAkhilesh Sanikop }
280*09537850SAkhilesh Sanikop // stage 14.
281*09537850SAkhilesh Sanikop if (size_log2 >= 4) {
282*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
283*09537850SAkhilesh Sanikop ButterflyRotation_C(dst, 14 - i, i + 9, 48 + 64 * i, true, range);
284*09537850SAkhilesh Sanikop }
285*09537850SAkhilesh Sanikop }
286*09537850SAkhilesh Sanikop // stage 15.
287*09537850SAkhilesh Sanikop if (size_log2 >= 5) {
288*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
289*09537850SAkhilesh Sanikop for (int j = 0; j < 2; ++j) {
290*09537850SAkhilesh Sanikop HadamardRotation_C(dst, MultiplyBy4(i) + j + 16,
291*09537850SAkhilesh Sanikop MultiplyBy4(i) - j + 19, static_cast<bool>(i & 1),
292*09537850SAkhilesh Sanikop range);
293*09537850SAkhilesh Sanikop }
294*09537850SAkhilesh Sanikop }
295*09537850SAkhilesh Sanikop }
296*09537850SAkhilesh Sanikop // stage 16.
297*09537850SAkhilesh Sanikop if (size_log2 == 6) {
298*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
299*09537850SAkhilesh Sanikop for (int j = 0; j < 4; ++j) {
300*09537850SAkhilesh Sanikop ButterflyRotation_C(
301*09537850SAkhilesh Sanikop dst, 61 - MultiplyBy8(i) - j, MultiplyBy8(i) + j + 34,
302*09537850SAkhilesh Sanikop 56 - MultiplyBy32(i) + MultiplyBy64(DivideBy2(j)), true, range);
303*09537850SAkhilesh Sanikop }
304*09537850SAkhilesh Sanikop }
305*09537850SAkhilesh Sanikop }
306*09537850SAkhilesh Sanikop // stage 17.
307*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
308*09537850SAkhilesh Sanikop HadamardRotation_C(dst, i, 3 - i, false, range);
309*09537850SAkhilesh Sanikop }
310*09537850SAkhilesh Sanikop // stage 18.
311*09537850SAkhilesh Sanikop if (size_log2 >= 3) {
312*09537850SAkhilesh Sanikop ButterflyRotation_C(dst, 6, 5, 32, true, range);
313*09537850SAkhilesh Sanikop }
314*09537850SAkhilesh Sanikop // stage 19.
315*09537850SAkhilesh Sanikop if (size_log2 >= 4) {
316*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
317*09537850SAkhilesh Sanikop for (int j = 0; j < 2; ++j) {
318*09537850SAkhilesh Sanikop HadamardRotation_C(dst, MultiplyBy4(i) + j + 8, MultiplyBy4(i) - j + 11,
319*09537850SAkhilesh Sanikop /*flip=*/i != 0, range);
320*09537850SAkhilesh Sanikop }
321*09537850SAkhilesh Sanikop }
322*09537850SAkhilesh Sanikop }
323*09537850SAkhilesh Sanikop // stage 20.
324*09537850SAkhilesh Sanikop if (size_log2 >= 5) {
325*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
326*09537850SAkhilesh Sanikop ButterflyRotation_C(dst, 29 - i, i + 18, 48 + 64 * DivideBy2(i), true,
327*09537850SAkhilesh Sanikop range);
328*09537850SAkhilesh Sanikop }
329*09537850SAkhilesh Sanikop }
330*09537850SAkhilesh Sanikop // stage 21.
331*09537850SAkhilesh Sanikop if (size_log2 == 6) {
332*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
333*09537850SAkhilesh Sanikop for (int j = 0; j < 4; ++j) {
334*09537850SAkhilesh Sanikop HadamardRotation_C(dst, MultiplyBy8(i) + j + 32,
335*09537850SAkhilesh Sanikop MultiplyBy8(i) - j + 39, static_cast<bool>(i & 1),
336*09537850SAkhilesh Sanikop range);
337*09537850SAkhilesh Sanikop }
338*09537850SAkhilesh Sanikop }
339*09537850SAkhilesh Sanikop }
340*09537850SAkhilesh Sanikop // stage 22.
341*09537850SAkhilesh Sanikop if (size_log2 >= 3) {
342*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
343*09537850SAkhilesh Sanikop HadamardRotation_C(dst, i, 7 - i, false, range);
344*09537850SAkhilesh Sanikop }
345*09537850SAkhilesh Sanikop }
346*09537850SAkhilesh Sanikop // stage 23.
347*09537850SAkhilesh Sanikop if (size_log2 >= 4) {
348*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
349*09537850SAkhilesh Sanikop ButterflyRotation_C(dst, 13 - i, i + 10, 32, true, range);
350*09537850SAkhilesh Sanikop }
351*09537850SAkhilesh Sanikop }
352*09537850SAkhilesh Sanikop // stage 24.
353*09537850SAkhilesh Sanikop if (size_log2 >= 5) {
354*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
355*09537850SAkhilesh Sanikop for (int j = 0; j < 4; ++j) {
356*09537850SAkhilesh Sanikop HadamardRotation_C(dst, MultiplyBy8(i) + j + 16,
357*09537850SAkhilesh Sanikop MultiplyBy8(i) - j + 23, i == 1, range);
358*09537850SAkhilesh Sanikop }
359*09537850SAkhilesh Sanikop }
360*09537850SAkhilesh Sanikop }
361*09537850SAkhilesh Sanikop // stage 25.
362*09537850SAkhilesh Sanikop if (size_log2 == 6) {
363*09537850SAkhilesh Sanikop for (int i = 0; i < 8; ++i) {
364*09537850SAkhilesh Sanikop ButterflyRotation_C(dst, 59 - i, i + 36, (i < 4) ? 48 : 112, true, range);
365*09537850SAkhilesh Sanikop }
366*09537850SAkhilesh Sanikop }
367*09537850SAkhilesh Sanikop // stage 26.
368*09537850SAkhilesh Sanikop if (size_log2 >= 4) {
369*09537850SAkhilesh Sanikop for (int i = 0; i < 8; ++i) {
370*09537850SAkhilesh Sanikop HadamardRotation_C(dst, i, 15 - i, false, range);
371*09537850SAkhilesh Sanikop }
372*09537850SAkhilesh Sanikop }
373*09537850SAkhilesh Sanikop // stage 27.
374*09537850SAkhilesh Sanikop if (size_log2 >= 5) {
375*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
376*09537850SAkhilesh Sanikop ButterflyRotation_C(dst, 27 - i, i + 20, 32, true, range);
377*09537850SAkhilesh Sanikop }
378*09537850SAkhilesh Sanikop }
379*09537850SAkhilesh Sanikop // stage 28.
380*09537850SAkhilesh Sanikop if (size_log2 == 6) {
381*09537850SAkhilesh Sanikop for (int i = 0; i < 8; ++i) {
382*09537850SAkhilesh Sanikop HadamardRotation_C(dst, i + 32, 47 - i, false, range);
383*09537850SAkhilesh Sanikop HadamardRotation_C(dst, i + 48, 63 - i, true, range);
384*09537850SAkhilesh Sanikop }
385*09537850SAkhilesh Sanikop }
386*09537850SAkhilesh Sanikop // stage 29.
387*09537850SAkhilesh Sanikop if (size_log2 >= 5) {
388*09537850SAkhilesh Sanikop for (int i = 0; i < 16; ++i) {
389*09537850SAkhilesh Sanikop HadamardRotation_C(dst, i, 31 - i, false, range);
390*09537850SAkhilesh Sanikop }
391*09537850SAkhilesh Sanikop }
392*09537850SAkhilesh Sanikop // stage 30.
393*09537850SAkhilesh Sanikop if (size_log2 == 6) {
394*09537850SAkhilesh Sanikop for (int i = 0; i < 8; ++i) {
395*09537850SAkhilesh Sanikop ButterflyRotation_C(dst, 55 - i, i + 40, 32, true, range);
396*09537850SAkhilesh Sanikop }
397*09537850SAkhilesh Sanikop }
398*09537850SAkhilesh Sanikop // stage 31.
399*09537850SAkhilesh Sanikop if (size_log2 == 6) {
400*09537850SAkhilesh Sanikop for (int i = 0; i < 32; ++i) {
401*09537850SAkhilesh Sanikop HadamardRotation_C(dst, i, 63 - i, false, range);
402*09537850SAkhilesh Sanikop }
403*09537850SAkhilesh Sanikop }
404*09537850SAkhilesh Sanikop }
405*09537850SAkhilesh Sanikop
406*09537850SAkhilesh Sanikop template <int bitdepth, typename Residual, int size_log2>
DctDcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)407*09537850SAkhilesh Sanikop void DctDcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
408*09537850SAkhilesh Sanikop bool is_row) {
409*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
410*09537850SAkhilesh Sanikop
411*09537850SAkhilesh Sanikop if (is_row && should_round) {
412*09537850SAkhilesh Sanikop dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
413*09537850SAkhilesh Sanikop }
414*09537850SAkhilesh Sanikop
415*09537850SAkhilesh Sanikop ButterflyRotationSecondIsZero_C(dst, 0, 1, 32, true, range);
416*09537850SAkhilesh Sanikop
417*09537850SAkhilesh Sanikop if (is_row && row_shift > 0) {
418*09537850SAkhilesh Sanikop dst[0] = RightShiftWithRounding(dst[0], row_shift);
419*09537850SAkhilesh Sanikop }
420*09537850SAkhilesh Sanikop
421*09537850SAkhilesh Sanikop ClampIntermediate<bitdepth, Residual>(dst, 1);
422*09537850SAkhilesh Sanikop
423*09537850SAkhilesh Sanikop const int size = 1 << size_log2;
424*09537850SAkhilesh Sanikop for (int i = 1; i < size; ++i) {
425*09537850SAkhilesh Sanikop dst[i] = dst[0];
426*09537850SAkhilesh Sanikop }
427*09537850SAkhilesh Sanikop }
428*09537850SAkhilesh Sanikop
429*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
430*09537850SAkhilesh Sanikop // Asymmetric Discrete Sine Transforms (ADST).
431*09537850SAkhilesh Sanikop
432*09537850SAkhilesh Sanikop /*
433*09537850SAkhilesh Sanikop * Row transform max range in bits for bitdepths 8/10/12: 28/30/32.
434*09537850SAkhilesh Sanikop * Column transform max range in bits for bitdepths 8/10/12: 28/28/30.
435*09537850SAkhilesh Sanikop */
436*09537850SAkhilesh Sanikop template <typename Residual>
Adst4_C(void * dest,int8_t range)437*09537850SAkhilesh Sanikop void Adst4_C(void* dest, int8_t range) {
438*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
439*09537850SAkhilesh Sanikop if ((dst[0] | dst[1] | dst[2] | dst[3]) == 0) {
440*09537850SAkhilesh Sanikop return;
441*09537850SAkhilesh Sanikop }
442*09537850SAkhilesh Sanikop
443*09537850SAkhilesh Sanikop // stage 1.
444*09537850SAkhilesh Sanikop // Section 7.13.2.6: It is a requirement of bitstream conformance that all
445*09537850SAkhilesh Sanikop // values stored in the s and x arrays by this process are representable by
446*09537850SAkhilesh Sanikop // a signed integer using range + 12 bits of precision.
447*09537850SAkhilesh Sanikop // Note the intermediate value can only exceed INT32_MAX with invalid 12-bit
448*09537850SAkhilesh Sanikop // content. For simplicity in unoptimized code, int64_t is used for both 10 &
449*09537850SAkhilesh Sanikop // 12-bit. SIMD implementations can allow these to rollover on platforms
450*09537850SAkhilesh Sanikop // where this has defined behavior.
451*09537850SAkhilesh Sanikop using Intermediate =
452*09537850SAkhilesh Sanikop typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
453*09537850SAkhilesh Sanikop Intermediate s[7];
454*09537850SAkhilesh Sanikop s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
455*09537850SAkhilesh Sanikop s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
456*09537850SAkhilesh Sanikop s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[1], range + 12);
457*09537850SAkhilesh Sanikop s[3] = RangeCheckValue(kAdst4Multiplier[3] * dst[2], range + 12);
458*09537850SAkhilesh Sanikop s[4] = RangeCheckValue(kAdst4Multiplier[0] * dst[2], range + 12);
459*09537850SAkhilesh Sanikop s[5] = RangeCheckValue(kAdst4Multiplier[1] * dst[3], range + 12);
460*09537850SAkhilesh Sanikop s[6] = RangeCheckValue(kAdst4Multiplier[3] * dst[3], range + 12);
461*09537850SAkhilesh Sanikop // stage 2.
462*09537850SAkhilesh Sanikop // Section 7.13.2.6: It is a requirement of bitstream conformance that
463*09537850SAkhilesh Sanikop // values stored in the variable a7 by this process are representable by a
464*09537850SAkhilesh Sanikop // signed integer using range + 1 bits of precision.
465*09537850SAkhilesh Sanikop const int32_t a7 = RangeCheckValue(dst[0] - dst[2], range + 1);
466*09537850SAkhilesh Sanikop // Section 7.13.2.6: It is a requirement of bitstream conformance that
467*09537850SAkhilesh Sanikop // values stored in the variable b7 by this process are representable by a
468*09537850SAkhilesh Sanikop // signed integer using |range| bits of precision.
469*09537850SAkhilesh Sanikop const int32_t b7 = RangeCheckValue(a7 + dst[3], range);
470*09537850SAkhilesh Sanikop // stage 3.
471*09537850SAkhilesh Sanikop s[0] = RangeCheckValue(s[0] + s[3], range + 12);
472*09537850SAkhilesh Sanikop s[1] = RangeCheckValue(s[1] - s[4], range + 12);
473*09537850SAkhilesh Sanikop s[3] = s[2];
474*09537850SAkhilesh Sanikop // With range checking enabled b7 would be trapped above. This prevents an
475*09537850SAkhilesh Sanikop // integer sanitizer warning. In SIMD implementations the multiply can be
476*09537850SAkhilesh Sanikop // allowed to rollover on platforms where this has defined behavior.
477*09537850SAkhilesh Sanikop const auto adst2_b7 = static_cast<Intermediate>(kAdst4Multiplier[2]) * b7;
478*09537850SAkhilesh Sanikop s[2] = RangeCheckValue(adst2_b7, range + 12);
479*09537850SAkhilesh Sanikop // stage 4.
480*09537850SAkhilesh Sanikop s[0] = RangeCheckValue(s[0] + s[5], range + 12);
481*09537850SAkhilesh Sanikop s[1] = RangeCheckValue(s[1] - s[6], range + 12);
482*09537850SAkhilesh Sanikop // stages 5 and 6.
483*09537850SAkhilesh Sanikop const Intermediate x0 = RangeCheckValue(s[0] + s[3], range + 12);
484*09537850SAkhilesh Sanikop const Intermediate x1 = RangeCheckValue(s[1] + s[3], range + 12);
485*09537850SAkhilesh Sanikop Intermediate x3 = RangeCheckValue(s[0] + s[1], range + 12);
486*09537850SAkhilesh Sanikop x3 = RangeCheckValue(x3 - s[3], range + 12);
487*09537850SAkhilesh Sanikop auto dst_0 = static_cast<int32_t>(RightShiftWithRounding(x0, 12));
488*09537850SAkhilesh Sanikop auto dst_1 = static_cast<int32_t>(RightShiftWithRounding(x1, 12));
489*09537850SAkhilesh Sanikop auto dst_2 = static_cast<int32_t>(RightShiftWithRounding(s[2], 12));
490*09537850SAkhilesh Sanikop auto dst_3 = static_cast<int32_t>(RightShiftWithRounding(x3, 12));
491*09537850SAkhilesh Sanikop if (sizeof(Residual) == 2) {
492*09537850SAkhilesh Sanikop // If the first argument to RightShiftWithRounding(..., 12) is only
493*09537850SAkhilesh Sanikop // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
494*09537850SAkhilesh Sanikop // in RightShiftWithRounding(..., 12) will cause the function to return
495*09537850SAkhilesh Sanikop // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
496*09537850SAkhilesh Sanikop dst_0 -= (dst_0 == 0x8000);
497*09537850SAkhilesh Sanikop dst_1 -= (dst_1 == 0x8000);
498*09537850SAkhilesh Sanikop dst_3 -= (dst_3 == 0x8000);
499*09537850SAkhilesh Sanikop }
500*09537850SAkhilesh Sanikop dst[0] = dst_0;
501*09537850SAkhilesh Sanikop dst[1] = dst_1;
502*09537850SAkhilesh Sanikop dst[2] = dst_2;
503*09537850SAkhilesh Sanikop dst[3] = dst_3;
504*09537850SAkhilesh Sanikop }
505*09537850SAkhilesh Sanikop
506*09537850SAkhilesh Sanikop template <int bitdepth, typename Residual>
Adst4DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)507*09537850SAkhilesh Sanikop void Adst4DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
508*09537850SAkhilesh Sanikop bool is_row) {
509*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
510*09537850SAkhilesh Sanikop
511*09537850SAkhilesh Sanikop if (is_row && should_round) {
512*09537850SAkhilesh Sanikop dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
513*09537850SAkhilesh Sanikop }
514*09537850SAkhilesh Sanikop
515*09537850SAkhilesh Sanikop // stage 1.
516*09537850SAkhilesh Sanikop // Section 7.13.2.6: It is a requirement of bitstream conformance that all
517*09537850SAkhilesh Sanikop // values stored in the s and x arrays by this process are representable by
518*09537850SAkhilesh Sanikop // a signed integer using range + 12 bits of precision.
519*09537850SAkhilesh Sanikop int32_t s[3];
520*09537850SAkhilesh Sanikop s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
521*09537850SAkhilesh Sanikop s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
522*09537850SAkhilesh Sanikop s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[0], range + 12);
523*09537850SAkhilesh Sanikop // stage 3.
524*09537850SAkhilesh Sanikop // stage 4.
525*09537850SAkhilesh Sanikop // stages 5 and 6.
526*09537850SAkhilesh Sanikop int32_t dst_0 = RightShiftWithRounding(s[0], 12);
527*09537850SAkhilesh Sanikop int32_t dst_1 = RightShiftWithRounding(s[1], 12);
528*09537850SAkhilesh Sanikop int32_t dst_2 = RightShiftWithRounding(s[2], 12);
529*09537850SAkhilesh Sanikop int32_t dst_3 =
530*09537850SAkhilesh Sanikop RightShiftWithRounding(RangeCheckValue(s[0] + s[1], range + 12), 12);
531*09537850SAkhilesh Sanikop if (sizeof(Residual) == 2) {
532*09537850SAkhilesh Sanikop // If the first argument to RightShiftWithRounding(..., 12) is only
533*09537850SAkhilesh Sanikop // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
534*09537850SAkhilesh Sanikop // in RightShiftWithRounding(..., 12) will cause the function to return
535*09537850SAkhilesh Sanikop // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
536*09537850SAkhilesh Sanikop dst_0 -= (dst_0 == 0x8000);
537*09537850SAkhilesh Sanikop dst_1 -= (dst_1 == 0x8000);
538*09537850SAkhilesh Sanikop dst_3 -= (dst_3 == 0x8000);
539*09537850SAkhilesh Sanikop }
540*09537850SAkhilesh Sanikop dst[0] = dst_0;
541*09537850SAkhilesh Sanikop dst[1] = dst_1;
542*09537850SAkhilesh Sanikop dst[2] = dst_2;
543*09537850SAkhilesh Sanikop dst[3] = dst_3;
544*09537850SAkhilesh Sanikop
545*09537850SAkhilesh Sanikop const int size = 4;
546*09537850SAkhilesh Sanikop if (is_row && row_shift > 0) {
547*09537850SAkhilesh Sanikop for (int j = 0; j < size; ++j) {
548*09537850SAkhilesh Sanikop dst[j] = RightShiftWithRounding(dst[j], row_shift);
549*09537850SAkhilesh Sanikop }
550*09537850SAkhilesh Sanikop }
551*09537850SAkhilesh Sanikop
552*09537850SAkhilesh Sanikop ClampIntermediate<bitdepth, Residual>(dst, 4);
553*09537850SAkhilesh Sanikop }
554*09537850SAkhilesh Sanikop
555*09537850SAkhilesh Sanikop template <typename Residual>
AdstInputPermutation(int32_t * LIBGAV1_RESTRICT const dst,const Residual * LIBGAV1_RESTRICT const src,int n)556*09537850SAkhilesh Sanikop void AdstInputPermutation(int32_t* LIBGAV1_RESTRICT const dst,
557*09537850SAkhilesh Sanikop const Residual* LIBGAV1_RESTRICT const src, int n) {
558*09537850SAkhilesh Sanikop assert(n == 8 || n == 16);
559*09537850SAkhilesh Sanikop for (int i = 0; i < n; ++i) {
560*09537850SAkhilesh Sanikop dst[i] = src[((i & 1) == 0) ? n - i - 1 : i - 1];
561*09537850SAkhilesh Sanikop }
562*09537850SAkhilesh Sanikop }
563*09537850SAkhilesh Sanikop
564*09537850SAkhilesh Sanikop constexpr int8_t kAdstOutputPermutationLookup[16] = {
565*09537850SAkhilesh Sanikop 0, 8, 12, 4, 6, 14, 10, 2, 3, 11, 15, 7, 5, 13, 9, 1};
566*09537850SAkhilesh Sanikop
567*09537850SAkhilesh Sanikop template <typename Residual>
AdstOutputPermutation(Residual * LIBGAV1_RESTRICT const dst,const int32_t * LIBGAV1_RESTRICT const src,int n)568*09537850SAkhilesh Sanikop void AdstOutputPermutation(Residual* LIBGAV1_RESTRICT const dst,
569*09537850SAkhilesh Sanikop const int32_t* LIBGAV1_RESTRICT const src, int n) {
570*09537850SAkhilesh Sanikop assert(n == 8 || n == 16);
571*09537850SAkhilesh Sanikop const auto shift = static_cast<int8_t>(n == 8);
572*09537850SAkhilesh Sanikop for (int i = 0; i < n; ++i) {
573*09537850SAkhilesh Sanikop const int8_t index = kAdstOutputPermutationLookup[i] >> shift;
574*09537850SAkhilesh Sanikop int32_t dst_i = ((i & 1) == 0) ? src[index] : -src[index];
575*09537850SAkhilesh Sanikop if (sizeof(Residual) == 2) {
576*09537850SAkhilesh Sanikop // If i is odd and src[index] is -32768, dst_i will be 32768, which
577*09537850SAkhilesh Sanikop // cannot be represented as an int16_t.
578*09537850SAkhilesh Sanikop dst_i -= (dst_i == 0x8000);
579*09537850SAkhilesh Sanikop }
580*09537850SAkhilesh Sanikop dst[i] = dst_i;
581*09537850SAkhilesh Sanikop }
582*09537850SAkhilesh Sanikop }
583*09537850SAkhilesh Sanikop
584*09537850SAkhilesh Sanikop template <typename Residual>
Adst8_C(void * dest,int8_t range)585*09537850SAkhilesh Sanikop void Adst8_C(void* dest, int8_t range) {
586*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
587*09537850SAkhilesh Sanikop // stage 1.
588*09537850SAkhilesh Sanikop int32_t temp[8];
589*09537850SAkhilesh Sanikop AdstInputPermutation(temp, dst, 8);
590*09537850SAkhilesh Sanikop // stage 2.
591*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
592*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 60 - 16 * i,
593*09537850SAkhilesh Sanikop true, range);
594*09537850SAkhilesh Sanikop }
595*09537850SAkhilesh Sanikop // stage 3.
596*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
597*09537850SAkhilesh Sanikop HadamardRotation_C(temp, i, i + 4, false, range);
598*09537850SAkhilesh Sanikop }
599*09537850SAkhilesh Sanikop // stage 4.
600*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
601*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, i * 3 + 4, i + 5, 48 - 32 * i, true, range);
602*09537850SAkhilesh Sanikop }
603*09537850SAkhilesh Sanikop // stage 5.
604*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
605*09537850SAkhilesh Sanikop for (int j = 0; j < 2; ++j) {
606*09537850SAkhilesh Sanikop HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
607*09537850SAkhilesh Sanikop false, range);
608*09537850SAkhilesh Sanikop }
609*09537850SAkhilesh Sanikop }
610*09537850SAkhilesh Sanikop // stage 6.
611*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
612*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
613*09537850SAkhilesh Sanikop range);
614*09537850SAkhilesh Sanikop }
615*09537850SAkhilesh Sanikop // stage 7.
616*09537850SAkhilesh Sanikop AdstOutputPermutation(dst, temp, 8);
617*09537850SAkhilesh Sanikop }
618*09537850SAkhilesh Sanikop
619*09537850SAkhilesh Sanikop template <int bitdepth, typename Residual>
Adst8DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)620*09537850SAkhilesh Sanikop void Adst8DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
621*09537850SAkhilesh Sanikop bool is_row) {
622*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
623*09537850SAkhilesh Sanikop
624*09537850SAkhilesh Sanikop // stage 1.
625*09537850SAkhilesh Sanikop int32_t temp[8];
626*09537850SAkhilesh Sanikop // After the permutation, the dc value is in temp[1]. The remaining are zero.
627*09537850SAkhilesh Sanikop AdstInputPermutation(temp, dst, 8);
628*09537850SAkhilesh Sanikop
629*09537850SAkhilesh Sanikop if (is_row && should_round) {
630*09537850SAkhilesh Sanikop temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
631*09537850SAkhilesh Sanikop }
632*09537850SAkhilesh Sanikop
633*09537850SAkhilesh Sanikop // stage 2.
634*09537850SAkhilesh Sanikop ButterflyRotationFirstIsZero_C(temp, 0, 1, 60, true, range);
635*09537850SAkhilesh Sanikop
636*09537850SAkhilesh Sanikop // stage 3.
637*09537850SAkhilesh Sanikop temp[4] = temp[0];
638*09537850SAkhilesh Sanikop temp[5] = temp[1];
639*09537850SAkhilesh Sanikop
640*09537850SAkhilesh Sanikop // stage 4.
641*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, 4, 5, 48, true, range);
642*09537850SAkhilesh Sanikop
643*09537850SAkhilesh Sanikop // stage 5.
644*09537850SAkhilesh Sanikop temp[2] = temp[0];
645*09537850SAkhilesh Sanikop temp[3] = temp[1];
646*09537850SAkhilesh Sanikop temp[6] = temp[4];
647*09537850SAkhilesh Sanikop temp[7] = temp[5];
648*09537850SAkhilesh Sanikop
649*09537850SAkhilesh Sanikop // stage 6.
650*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, 2, 3, 32, true, range);
651*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, 6, 7, 32, true, range);
652*09537850SAkhilesh Sanikop
653*09537850SAkhilesh Sanikop // stage 7.
654*09537850SAkhilesh Sanikop AdstOutputPermutation(dst, temp, 8);
655*09537850SAkhilesh Sanikop
656*09537850SAkhilesh Sanikop const int size = 8;
657*09537850SAkhilesh Sanikop if (is_row && row_shift > 0) {
658*09537850SAkhilesh Sanikop for (int j = 0; j < size; ++j) {
659*09537850SAkhilesh Sanikop dst[j] = RightShiftWithRounding(dst[j], row_shift);
660*09537850SAkhilesh Sanikop }
661*09537850SAkhilesh Sanikop }
662*09537850SAkhilesh Sanikop
663*09537850SAkhilesh Sanikop ClampIntermediate<bitdepth, Residual>(dst, 8);
664*09537850SAkhilesh Sanikop }
665*09537850SAkhilesh Sanikop
666*09537850SAkhilesh Sanikop template <typename Residual>
Adst16_C(void * dest,int8_t range)667*09537850SAkhilesh Sanikop void Adst16_C(void* dest, int8_t range) {
668*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
669*09537850SAkhilesh Sanikop // stage 1.
670*09537850SAkhilesh Sanikop int32_t temp[16];
671*09537850SAkhilesh Sanikop AdstInputPermutation(temp, dst, 16);
672*09537850SAkhilesh Sanikop // stage 2.
673*09537850SAkhilesh Sanikop for (int i = 0; i < 8; ++i) {
674*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 62 - 8 * i,
675*09537850SAkhilesh Sanikop true, range);
676*09537850SAkhilesh Sanikop }
677*09537850SAkhilesh Sanikop // stage 3.
678*09537850SAkhilesh Sanikop for (int i = 0; i < 8; ++i) {
679*09537850SAkhilesh Sanikop HadamardRotation_C(temp, i, i + 8, false, range);
680*09537850SAkhilesh Sanikop }
681*09537850SAkhilesh Sanikop // stage 4.
682*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
683*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
684*09537850SAkhilesh Sanikop 56 - 32 * i, true, range);
685*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, MultiplyBy2(i) + 13, MultiplyBy2(i) + 12,
686*09537850SAkhilesh Sanikop 8 + 32 * i, true, range);
687*09537850SAkhilesh Sanikop }
688*09537850SAkhilesh Sanikop // stage 5.
689*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
690*09537850SAkhilesh Sanikop for (int j = 0; j < 2; ++j) {
691*09537850SAkhilesh Sanikop HadamardRotation_C(temp, i + MultiplyBy8(j), i + MultiplyBy8(j) + 4,
692*09537850SAkhilesh Sanikop false, range);
693*09537850SAkhilesh Sanikop }
694*09537850SAkhilesh Sanikop }
695*09537850SAkhilesh Sanikop // stage 6.
696*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
697*09537850SAkhilesh Sanikop for (int j = 0; j < 2; ++j) {
698*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, i * 3 + MultiplyBy8(j) + 4,
699*09537850SAkhilesh Sanikop i + MultiplyBy8(j) + 5, 48 - 32 * i, true, range);
700*09537850SAkhilesh Sanikop }
701*09537850SAkhilesh Sanikop }
702*09537850SAkhilesh Sanikop // stage 7.
703*09537850SAkhilesh Sanikop for (int i = 0; i < 2; ++i) {
704*09537850SAkhilesh Sanikop for (int j = 0; j < 4; ++j) {
705*09537850SAkhilesh Sanikop HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
706*09537850SAkhilesh Sanikop false, range);
707*09537850SAkhilesh Sanikop }
708*09537850SAkhilesh Sanikop }
709*09537850SAkhilesh Sanikop // stage 8.
710*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
711*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
712*09537850SAkhilesh Sanikop range);
713*09537850SAkhilesh Sanikop }
714*09537850SAkhilesh Sanikop // stage 9.
715*09537850SAkhilesh Sanikop AdstOutputPermutation(dst, temp, 16);
716*09537850SAkhilesh Sanikop }
717*09537850SAkhilesh Sanikop
718*09537850SAkhilesh Sanikop template <int bitdepth, typename Residual>
Adst16DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)719*09537850SAkhilesh Sanikop void Adst16DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
720*09537850SAkhilesh Sanikop bool is_row) {
721*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
722*09537850SAkhilesh Sanikop
723*09537850SAkhilesh Sanikop // stage 1.
724*09537850SAkhilesh Sanikop int32_t temp[16];
725*09537850SAkhilesh Sanikop // After the permutation, the dc value is in temp[1]. The remaining are zero.
726*09537850SAkhilesh Sanikop AdstInputPermutation(temp, dst, 16);
727*09537850SAkhilesh Sanikop
728*09537850SAkhilesh Sanikop if (is_row && should_round) {
729*09537850SAkhilesh Sanikop temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
730*09537850SAkhilesh Sanikop }
731*09537850SAkhilesh Sanikop
732*09537850SAkhilesh Sanikop // stage 2.
733*09537850SAkhilesh Sanikop ButterflyRotationFirstIsZero_C(temp, 0, 1, 62, true, range);
734*09537850SAkhilesh Sanikop
735*09537850SAkhilesh Sanikop // stage 3.
736*09537850SAkhilesh Sanikop temp[8] = temp[0];
737*09537850SAkhilesh Sanikop temp[9] = temp[1];
738*09537850SAkhilesh Sanikop
739*09537850SAkhilesh Sanikop // stage 4.
740*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, 8, 9, 56, true, range);
741*09537850SAkhilesh Sanikop
742*09537850SAkhilesh Sanikop // stage 5.
743*09537850SAkhilesh Sanikop temp[4] = temp[0];
744*09537850SAkhilesh Sanikop temp[5] = temp[1];
745*09537850SAkhilesh Sanikop temp[12] = temp[8];
746*09537850SAkhilesh Sanikop temp[13] = temp[9];
747*09537850SAkhilesh Sanikop
748*09537850SAkhilesh Sanikop // stage 6.
749*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, 4, 5, 48, true, range);
750*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, 12, 13, 48, true, range);
751*09537850SAkhilesh Sanikop
752*09537850SAkhilesh Sanikop // stage 7.
753*09537850SAkhilesh Sanikop temp[2] = temp[0];
754*09537850SAkhilesh Sanikop temp[3] = temp[1];
755*09537850SAkhilesh Sanikop temp[10] = temp[8];
756*09537850SAkhilesh Sanikop temp[11] = temp[9];
757*09537850SAkhilesh Sanikop
758*09537850SAkhilesh Sanikop temp[6] = temp[4];
759*09537850SAkhilesh Sanikop temp[7] = temp[5];
760*09537850SAkhilesh Sanikop temp[14] = temp[12];
761*09537850SAkhilesh Sanikop temp[15] = temp[13];
762*09537850SAkhilesh Sanikop
763*09537850SAkhilesh Sanikop // stage 8.
764*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
765*09537850SAkhilesh Sanikop ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
766*09537850SAkhilesh Sanikop range);
767*09537850SAkhilesh Sanikop }
768*09537850SAkhilesh Sanikop
769*09537850SAkhilesh Sanikop // stage 9.
770*09537850SAkhilesh Sanikop AdstOutputPermutation(dst, temp, 16);
771*09537850SAkhilesh Sanikop
772*09537850SAkhilesh Sanikop const int size = 16;
773*09537850SAkhilesh Sanikop if (is_row && row_shift > 0) {
774*09537850SAkhilesh Sanikop for (int j = 0; j < size; ++j) {
775*09537850SAkhilesh Sanikop dst[j] = RightShiftWithRounding(dst[j], row_shift);
776*09537850SAkhilesh Sanikop }
777*09537850SAkhilesh Sanikop }
778*09537850SAkhilesh Sanikop
779*09537850SAkhilesh Sanikop ClampIntermediate<bitdepth, Residual>(dst, 16);
780*09537850SAkhilesh Sanikop }
781*09537850SAkhilesh Sanikop
782*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
783*09537850SAkhilesh Sanikop // Identity Transforms.
784*09537850SAkhilesh Sanikop //
785*09537850SAkhilesh Sanikop // In the spec, the inverse identity transform is followed by a Round2() call:
786*09537850SAkhilesh Sanikop // The row transforms with i = 0..(h-1) are applied as follows:
787*09537850SAkhilesh Sanikop // ...
788*09537850SAkhilesh Sanikop // * Otherwise, invoke the inverse identity transform process specified in
789*09537850SAkhilesh Sanikop // section 7.13.2.15 with the input variable n equal to log2W.
790*09537850SAkhilesh Sanikop // * Set Residual[ i ][ j ] equal to Round2( T[ j ], rowShift )
791*09537850SAkhilesh Sanikop // for j = 0..(w-1).
792*09537850SAkhilesh Sanikop // ...
793*09537850SAkhilesh Sanikop // The column transforms with j = 0..(w-1) are applied as follows:
794*09537850SAkhilesh Sanikop // ...
795*09537850SAkhilesh Sanikop // * Otherwise, invoke the inverse identity transform process specified in
796*09537850SAkhilesh Sanikop // section 7.13.2.15 with the input variable n equal to log2H.
797*09537850SAkhilesh Sanikop // * Residual[ i ][ j ] is set equal to Round2( T[ i ], colShift )
798*09537850SAkhilesh Sanikop // for i = 0..(h-1).
799*09537850SAkhilesh Sanikop //
800*09537850SAkhilesh Sanikop // Therefore, we define the identity transform functions to perform both the
801*09537850SAkhilesh Sanikop // inverse identity transform and the Round2() call. This has two advantages:
802*09537850SAkhilesh Sanikop // 1. The outputs of the inverse identity transform do not need to be stored
803*09537850SAkhilesh Sanikop // in the Residual array. They can be stored in int32_t local variables,
804*09537850SAkhilesh Sanikop // which have a larger range if Residual is an int16_t array.
805*09537850SAkhilesh Sanikop // 2. The inverse identity transform and the Round2() call can be jointly
806*09537850SAkhilesh Sanikop // optimized.
807*09537850SAkhilesh Sanikop //
808*09537850SAkhilesh Sanikop // The identity transform functions have the following prototype:
809*09537850SAkhilesh Sanikop // void Identity_C(void* dest, int8_t shift);
810*09537850SAkhilesh Sanikop //
811*09537850SAkhilesh Sanikop // The |shift| parameter is the amount of shift for the Round2() call. For row
812*09537850SAkhilesh Sanikop // transforms, |shift| is 0, 1, or 2. For column transforms, |shift| is always
813*09537850SAkhilesh Sanikop // 4. Therefore, an identity transform function can detect whether it is being
814*09537850SAkhilesh Sanikop // invoked as a row transform or a column transform by checking whether |shift|
815*09537850SAkhilesh Sanikop // is equal to 4.
816*09537850SAkhilesh Sanikop //
817*09537850SAkhilesh Sanikop // Input Range
818*09537850SAkhilesh Sanikop //
819*09537850SAkhilesh Sanikop // The inputs of row transforms, stored in the 2D array Dequant, are
820*09537850SAkhilesh Sanikop // representable by a signed integer using 8 + BitDepth bits of precision:
821*09537850SAkhilesh Sanikop // f. Dequant[ i ][ j ] is set equal to
822*09537850SAkhilesh Sanikop // Clip3( - ( 1 << ( 7 + BitDepth ) ), ( 1 << ( 7 + BitDepth ) ) - 1, dq2 ).
823*09537850SAkhilesh Sanikop //
824*09537850SAkhilesh Sanikop // The inputs of column transforms are representable by a signed integer using
825*09537850SAkhilesh Sanikop // Max( BitDepth + 6, 16 ) bits of precision:
826*09537850SAkhilesh Sanikop // Set the variable colClampRange equal to Max( BitDepth + 6, 16 ).
827*09537850SAkhilesh Sanikop // ...
828*09537850SAkhilesh Sanikop // Between the row and column transforms, Residual[ i ][ j ] is set equal to
829*09537850SAkhilesh Sanikop // Clip3( - ( 1 << ( colClampRange - 1 ) ),
830*09537850SAkhilesh Sanikop // ( 1 << (colClampRange - 1 ) ) - 1,
831*09537850SAkhilesh Sanikop // Residual[ i ][ j ] )
832*09537850SAkhilesh Sanikop // for i = 0..(h-1), for j = 0..(w-1).
833*09537850SAkhilesh Sanikop //
834*09537850SAkhilesh Sanikop // Output Range
835*09537850SAkhilesh Sanikop //
836*09537850SAkhilesh Sanikop // The outputs of row transforms are representable by a signed integer using
837*09537850SAkhilesh Sanikop // 8 + BitDepth + 1 = 9 + BitDepth bits of precision, because the net effect
838*09537850SAkhilesh Sanikop // of the multiplicative factor of inverse identity transforms minus the
839*09537850SAkhilesh Sanikop // smallest row shift is an increase of at most one bit.
840*09537850SAkhilesh Sanikop //
841*09537850SAkhilesh Sanikop // Transform | Multiplicative factor | Smallest row | Net increase
842*09537850SAkhilesh Sanikop // width | (in bits) | shift | in bits
843*09537850SAkhilesh Sanikop // ---------------------------------------------------------------
844*09537850SAkhilesh Sanikop // 4 | sqrt(2) (0.5 bits) | 0 | +0.5
845*09537850SAkhilesh Sanikop // 8 | 2 (1 bit) | 0 | +1
846*09537850SAkhilesh Sanikop // 16 | 2*sqrt(2) (1.5 bits) | 1 | +0.5
847*09537850SAkhilesh Sanikop // 32 | 4 (2 bits) | 1 | +1
848*09537850SAkhilesh Sanikop //
849*09537850SAkhilesh Sanikop // If BitDepth is 8 and Residual is an int16_t array, to avoid truncation we
850*09537850SAkhilesh Sanikop // clip the outputs (which have 17 bits of precision) to the range of int16_t
851*09537850SAkhilesh Sanikop // before storing them in the Residual array. This clipping happens to be the
852*09537850SAkhilesh Sanikop // same as the required clipping after the row transform (see the spec quoted
853*09537850SAkhilesh Sanikop // above), so we remain compliant with the spec. (In this case,
854*09537850SAkhilesh Sanikop // TransformLoop_C() skips clipping the outputs of row transforms to avoid
855*09537850SAkhilesh Sanikop // duplication of effort.)
856*09537850SAkhilesh Sanikop //
857*09537850SAkhilesh Sanikop // The outputs of column transforms are representable by a signed integer using
858*09537850SAkhilesh Sanikop // Max( BitDepth + 6, 16 ) + 2 - 4 = Max( BitDepth + 4, 14 ) bits of precision,
859*09537850SAkhilesh Sanikop // because the multiplicative factor of inverse identity transforms is at most
860*09537850SAkhilesh Sanikop // 4 (2 bits) and |shift| is always 4.
861*09537850SAkhilesh Sanikop
862*09537850SAkhilesh Sanikop template <typename Residual>
Identity4Row_C(void * dest,int8_t shift)863*09537850SAkhilesh Sanikop void Identity4Row_C(void* dest, int8_t shift) {
864*09537850SAkhilesh Sanikop // Note the intermediate value can only exceed 32 bits with 12-bit content.
865*09537850SAkhilesh Sanikop // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
866*09537850SAkhilesh Sanikop using Intermediate =
867*09537850SAkhilesh Sanikop typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
868*09537850SAkhilesh Sanikop assert(shift == 0 || shift == 1);
869*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
870*09537850SAkhilesh Sanikop // If |shift| is 0, |rounding| should be 1 << 11. If |shift| is 1, |rounding|
871*09537850SAkhilesh Sanikop // should be (1 + (1 << 1)) << 11. The following expression works for both
872*09537850SAkhilesh Sanikop // values of |shift|.
873*09537850SAkhilesh Sanikop const int32_t rounding = (1 + (shift << 1)) << 11;
874*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
875*09537850SAkhilesh Sanikop const auto intermediate =
876*09537850SAkhilesh Sanikop static_cast<Intermediate>(dst[i]) * kIdentity4Multiplier;
877*09537850SAkhilesh Sanikop int32_t dst_i =
878*09537850SAkhilesh Sanikop static_cast<int32_t>((intermediate + rounding) >> (12 + shift));
879*09537850SAkhilesh Sanikop if (sizeof(Residual) == 2) {
880*09537850SAkhilesh Sanikop dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
881*09537850SAkhilesh Sanikop }
882*09537850SAkhilesh Sanikop dst[i] = static_cast<Residual>(dst_i);
883*09537850SAkhilesh Sanikop }
884*09537850SAkhilesh Sanikop }
885*09537850SAkhilesh Sanikop
886*09537850SAkhilesh Sanikop template <typename Residual>
Identity4Column_C(void * dest,int8_t)887*09537850SAkhilesh Sanikop void Identity4Column_C(void* dest, int8_t /*shift*/) {
888*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
889*09537850SAkhilesh Sanikop const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
890*09537850SAkhilesh Sanikop for (int i = 0; i < 4; ++i) {
891*09537850SAkhilesh Sanikop // The intermediate value here will have to fit into an int32_t for it to be
892*09537850SAkhilesh Sanikop // bitstream conformant. The multiplication is promoted to int32_t by
893*09537850SAkhilesh Sanikop // defining kIdentity4Multiplier as int32_t.
894*09537850SAkhilesh Sanikop dst[i] = static_cast<Residual>((dst[i] * kIdentity4Multiplier + rounding) >>
895*09537850SAkhilesh Sanikop (12 + kTransformColumnShift));
896*09537850SAkhilesh Sanikop }
897*09537850SAkhilesh Sanikop }
898*09537850SAkhilesh Sanikop
899*09537850SAkhilesh Sanikop template <int bitdepth, typename Residual>
Identity4DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)900*09537850SAkhilesh Sanikop void Identity4DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
901*09537850SAkhilesh Sanikop int row_shift, bool is_row) {
902*09537850SAkhilesh Sanikop // Note the intermediate value can only exceed 32 bits with 12-bit content.
903*09537850SAkhilesh Sanikop // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
904*09537850SAkhilesh Sanikop using Intermediate =
905*09537850SAkhilesh Sanikop typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
906*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
907*09537850SAkhilesh Sanikop
908*09537850SAkhilesh Sanikop if (is_row) {
909*09537850SAkhilesh Sanikop if (should_round) {
910*09537850SAkhilesh Sanikop const auto intermediate =
911*09537850SAkhilesh Sanikop static_cast<Intermediate>(dst[0]) * kTransformRowMultiplier;
912*09537850SAkhilesh Sanikop dst[0] = RightShiftWithRounding(intermediate, 12);
913*09537850SAkhilesh Sanikop }
914*09537850SAkhilesh Sanikop
915*09537850SAkhilesh Sanikop const int32_t rounding = (1 + (row_shift << 1)) << 11;
916*09537850SAkhilesh Sanikop const auto intermediate =
917*09537850SAkhilesh Sanikop static_cast<Intermediate>(dst[0]) * kIdentity4Multiplier;
918*09537850SAkhilesh Sanikop int32_t dst_i =
919*09537850SAkhilesh Sanikop static_cast<int32_t>((intermediate + rounding) >> (12 + row_shift));
920*09537850SAkhilesh Sanikop if (sizeof(Residual) == 2) {
921*09537850SAkhilesh Sanikop dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
922*09537850SAkhilesh Sanikop }
923*09537850SAkhilesh Sanikop dst[0] = static_cast<Residual>(dst_i);
924*09537850SAkhilesh Sanikop
925*09537850SAkhilesh Sanikop ClampIntermediate<bitdepth, Residual>(dst, 1);
926*09537850SAkhilesh Sanikop return;
927*09537850SAkhilesh Sanikop }
928*09537850SAkhilesh Sanikop
929*09537850SAkhilesh Sanikop const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
930*09537850SAkhilesh Sanikop dst[0] = static_cast<Residual>((dst[0] * kIdentity4Multiplier + rounding) >>
931*09537850SAkhilesh Sanikop (12 + kTransformColumnShift));
932*09537850SAkhilesh Sanikop }
933*09537850SAkhilesh Sanikop
934*09537850SAkhilesh Sanikop template <typename Residual>
Identity8Row_C(void * dest,int8_t shift)935*09537850SAkhilesh Sanikop void Identity8Row_C(void* dest, int8_t shift) {
936*09537850SAkhilesh Sanikop assert(shift == 0 || shift == 1 || shift == 2);
937*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
938*09537850SAkhilesh Sanikop for (int i = 0; i < 8; ++i) {
939*09537850SAkhilesh Sanikop int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[i]), shift);
940*09537850SAkhilesh Sanikop if (sizeof(Residual) == 2) {
941*09537850SAkhilesh Sanikop dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
942*09537850SAkhilesh Sanikop }
943*09537850SAkhilesh Sanikop dst[i] = static_cast<Residual>(dst_i);
944*09537850SAkhilesh Sanikop }
945*09537850SAkhilesh Sanikop }
946*09537850SAkhilesh Sanikop
947*09537850SAkhilesh Sanikop template <typename Residual>
Identity8Column_C(void * dest,int8_t)948*09537850SAkhilesh Sanikop void Identity8Column_C(void* dest, int8_t /*shift*/) {
949*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
950*09537850SAkhilesh Sanikop for (int i = 0; i < 8; ++i) {
951*09537850SAkhilesh Sanikop dst[i] = static_cast<Residual>(
952*09537850SAkhilesh Sanikop RightShiftWithRounding(dst[i], kTransformColumnShift - 1));
953*09537850SAkhilesh Sanikop }
954*09537850SAkhilesh Sanikop }
955*09537850SAkhilesh Sanikop
956*09537850SAkhilesh Sanikop template <int bitdepth, typename Residual>
Identity8DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)957*09537850SAkhilesh Sanikop void Identity8DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
958*09537850SAkhilesh Sanikop int row_shift, bool is_row) {
959*09537850SAkhilesh Sanikop // Note the intermediate value can only exceed 32 bits with 12-bit content.
960*09537850SAkhilesh Sanikop // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
961*09537850SAkhilesh Sanikop using Intermediate =
962*09537850SAkhilesh Sanikop typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
963*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
964*09537850SAkhilesh Sanikop
965*09537850SAkhilesh Sanikop if (is_row) {
966*09537850SAkhilesh Sanikop if (should_round) {
967*09537850SAkhilesh Sanikop const auto intermediate =
968*09537850SAkhilesh Sanikop static_cast<Intermediate>(dst[0]) * kTransformRowMultiplier;
969*09537850SAkhilesh Sanikop dst[0] = RightShiftWithRounding(intermediate, 12);
970*09537850SAkhilesh Sanikop }
971*09537850SAkhilesh Sanikop
972*09537850SAkhilesh Sanikop int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[0]), row_shift);
973*09537850SAkhilesh Sanikop if (sizeof(Residual) == 2) {
974*09537850SAkhilesh Sanikop dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
975*09537850SAkhilesh Sanikop }
976*09537850SAkhilesh Sanikop dst[0] = static_cast<Residual>(dst_i);
977*09537850SAkhilesh Sanikop
978*09537850SAkhilesh Sanikop // If Residual is int16_t (which implies bitdepth is 8), we don't need to
979*09537850SAkhilesh Sanikop // clip residual[i][j] to 16 bits.
980*09537850SAkhilesh Sanikop if (sizeof(Residual) > 2) {
981*09537850SAkhilesh Sanikop const Residual intermediate_clamp_max =
982*09537850SAkhilesh Sanikop (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
983*09537850SAkhilesh Sanikop const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
984*09537850SAkhilesh Sanikop dst[0] = Clip3(dst[0], intermediate_clamp_min, intermediate_clamp_max);
985*09537850SAkhilesh Sanikop }
986*09537850SAkhilesh Sanikop return;
987*09537850SAkhilesh Sanikop }
988*09537850SAkhilesh Sanikop
989*09537850SAkhilesh Sanikop dst[0] = static_cast<Residual>(
990*09537850SAkhilesh Sanikop RightShiftWithRounding(dst[0], kTransformColumnShift - 1));
991*09537850SAkhilesh Sanikop }
992*09537850SAkhilesh Sanikop
993*09537850SAkhilesh Sanikop template <typename Residual>
Identity16Row_C(void * dest,int8_t shift)994*09537850SAkhilesh Sanikop void Identity16Row_C(void* dest, int8_t shift) {
995*09537850SAkhilesh Sanikop assert(shift == 1 || shift == 2);
996*09537850SAkhilesh Sanikop // Note the intermediate value can only exceed 32 bits with 12-bit content.
997*09537850SAkhilesh Sanikop // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
998*09537850SAkhilesh Sanikop using Intermediate =
999*09537850SAkhilesh Sanikop typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
1000*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
1001*09537850SAkhilesh Sanikop const int32_t rounding = (1 + (1 << shift)) << 11;
1002*09537850SAkhilesh Sanikop for (int i = 0; i < 16; ++i) {
1003*09537850SAkhilesh Sanikop // Note the intermediate value can only exceed 32 bits with 12-bit content.
1004*09537850SAkhilesh Sanikop // For simplicity in unoptimized code, int64_t is used for all cases.
1005*09537850SAkhilesh Sanikop const auto intermediate =
1006*09537850SAkhilesh Sanikop static_cast<Intermediate>(dst[i]) * kIdentity16Multiplier;
1007*09537850SAkhilesh Sanikop int32_t dst_i =
1008*09537850SAkhilesh Sanikop static_cast<int32_t>((intermediate + rounding) >> (12 + shift));
1009*09537850SAkhilesh Sanikop if (sizeof(Residual) == 2) {
1010*09537850SAkhilesh Sanikop dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1011*09537850SAkhilesh Sanikop }
1012*09537850SAkhilesh Sanikop dst[i] = static_cast<Residual>(dst_i);
1013*09537850SAkhilesh Sanikop }
1014*09537850SAkhilesh Sanikop }
1015*09537850SAkhilesh Sanikop
1016*09537850SAkhilesh Sanikop template <typename Residual>
Identity16Column_C(void * dest,int8_t)1017*09537850SAkhilesh Sanikop void Identity16Column_C(void* dest, int8_t /*shift*/) {
1018*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
1019*09537850SAkhilesh Sanikop const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
1020*09537850SAkhilesh Sanikop for (int i = 0; i < 16; ++i) {
1021*09537850SAkhilesh Sanikop // The intermediate value here will have to fit into an int32_t for it to be
1022*09537850SAkhilesh Sanikop // bitstream conformant. The multiplication is promoted to int32_t by
1023*09537850SAkhilesh Sanikop // defining kIdentity16Multiplier as int32_t.
1024*09537850SAkhilesh Sanikop dst[i] =
1025*09537850SAkhilesh Sanikop static_cast<Residual>((dst[i] * kIdentity16Multiplier + rounding) >>
1026*09537850SAkhilesh Sanikop (12 + kTransformColumnShift));
1027*09537850SAkhilesh Sanikop }
1028*09537850SAkhilesh Sanikop }
1029*09537850SAkhilesh Sanikop
1030*09537850SAkhilesh Sanikop template <int bitdepth, typename Residual>
Identity16DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)1031*09537850SAkhilesh Sanikop void Identity16DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
1032*09537850SAkhilesh Sanikop int row_shift, bool is_row) {
1033*09537850SAkhilesh Sanikop // Note the intermediate value can only exceed 32 bits with 12-bit content.
1034*09537850SAkhilesh Sanikop // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
1035*09537850SAkhilesh Sanikop using Intermediate =
1036*09537850SAkhilesh Sanikop typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
1037*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
1038*09537850SAkhilesh Sanikop
1039*09537850SAkhilesh Sanikop if (is_row) {
1040*09537850SAkhilesh Sanikop if (should_round) {
1041*09537850SAkhilesh Sanikop const auto intermediate =
1042*09537850SAkhilesh Sanikop static_cast<Intermediate>(dst[0]) * kTransformRowMultiplier;
1043*09537850SAkhilesh Sanikop dst[0] = RightShiftWithRounding(intermediate, 12);
1044*09537850SAkhilesh Sanikop }
1045*09537850SAkhilesh Sanikop
1046*09537850SAkhilesh Sanikop const int32_t rounding = (1 + (1 << row_shift)) << 11;
1047*09537850SAkhilesh Sanikop const auto intermediate =
1048*09537850SAkhilesh Sanikop static_cast<Intermediate>(dst[0]) * kIdentity16Multiplier;
1049*09537850SAkhilesh Sanikop int32_t dst_i =
1050*09537850SAkhilesh Sanikop static_cast<int32_t>((intermediate + rounding) >> (12 + row_shift));
1051*09537850SAkhilesh Sanikop if (sizeof(Residual) == 2) {
1052*09537850SAkhilesh Sanikop dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1053*09537850SAkhilesh Sanikop }
1054*09537850SAkhilesh Sanikop dst[0] = static_cast<Residual>(dst_i);
1055*09537850SAkhilesh Sanikop
1056*09537850SAkhilesh Sanikop ClampIntermediate<bitdepth, Residual>(dst, 1);
1057*09537850SAkhilesh Sanikop return;
1058*09537850SAkhilesh Sanikop }
1059*09537850SAkhilesh Sanikop
1060*09537850SAkhilesh Sanikop const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
1061*09537850SAkhilesh Sanikop dst[0] = static_cast<Residual>((dst[0] * kIdentity16Multiplier + rounding) >>
1062*09537850SAkhilesh Sanikop (12 + kTransformColumnShift));
1063*09537850SAkhilesh Sanikop }
1064*09537850SAkhilesh Sanikop
1065*09537850SAkhilesh Sanikop template <typename Residual>
Identity32Row_C(void * dest,int8_t shift)1066*09537850SAkhilesh Sanikop void Identity32Row_C(void* dest, int8_t shift) {
1067*09537850SAkhilesh Sanikop assert(shift == 1 || shift == 2);
1068*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
1069*09537850SAkhilesh Sanikop for (int i = 0; i < 32; ++i) {
1070*09537850SAkhilesh Sanikop int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[i]), shift);
1071*09537850SAkhilesh Sanikop if (sizeof(Residual) == 2) {
1072*09537850SAkhilesh Sanikop dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1073*09537850SAkhilesh Sanikop }
1074*09537850SAkhilesh Sanikop dst[i] = static_cast<Residual>(dst_i);
1075*09537850SAkhilesh Sanikop }
1076*09537850SAkhilesh Sanikop }
1077*09537850SAkhilesh Sanikop
1078*09537850SAkhilesh Sanikop template <typename Residual>
Identity32Column_C(void * dest,int8_t)1079*09537850SAkhilesh Sanikop void Identity32Column_C(void* dest, int8_t /*shift*/) {
1080*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
1081*09537850SAkhilesh Sanikop for (int i = 0; i < 32; ++i) {
1082*09537850SAkhilesh Sanikop dst[i] = static_cast<Residual>(
1083*09537850SAkhilesh Sanikop RightShiftWithRounding(dst[i], kTransformColumnShift - 2));
1084*09537850SAkhilesh Sanikop }
1085*09537850SAkhilesh Sanikop }
1086*09537850SAkhilesh Sanikop
1087*09537850SAkhilesh Sanikop template <int bitdepth, typename Residual>
Identity32DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)1088*09537850SAkhilesh Sanikop void Identity32DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
1089*09537850SAkhilesh Sanikop int row_shift, bool is_row) {
1090*09537850SAkhilesh Sanikop // Note the intermediate value can only exceed 32 bits with 12-bit content.
1091*09537850SAkhilesh Sanikop // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
1092*09537850SAkhilesh Sanikop using Intermediate =
1093*09537850SAkhilesh Sanikop typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
1094*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
1095*09537850SAkhilesh Sanikop
1096*09537850SAkhilesh Sanikop if (is_row) {
1097*09537850SAkhilesh Sanikop if (should_round) {
1098*09537850SAkhilesh Sanikop const auto intermediate =
1099*09537850SAkhilesh Sanikop static_cast<Intermediate>(dst[0]) * kTransformRowMultiplier;
1100*09537850SAkhilesh Sanikop dst[0] = RightShiftWithRounding(intermediate, 12);
1101*09537850SAkhilesh Sanikop }
1102*09537850SAkhilesh Sanikop
1103*09537850SAkhilesh Sanikop int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[0]), row_shift);
1104*09537850SAkhilesh Sanikop if (sizeof(Residual) == 2) {
1105*09537850SAkhilesh Sanikop dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1106*09537850SAkhilesh Sanikop }
1107*09537850SAkhilesh Sanikop dst[0] = static_cast<Residual>(dst_i);
1108*09537850SAkhilesh Sanikop
1109*09537850SAkhilesh Sanikop ClampIntermediate<bitdepth, Residual>(dst, 1);
1110*09537850SAkhilesh Sanikop return;
1111*09537850SAkhilesh Sanikop }
1112*09537850SAkhilesh Sanikop
1113*09537850SAkhilesh Sanikop dst[0] = static_cast<Residual>(
1114*09537850SAkhilesh Sanikop RightShiftWithRounding(dst[0], kTransformColumnShift - 2));
1115*09537850SAkhilesh Sanikop }
1116*09537850SAkhilesh Sanikop
1117*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
1118*09537850SAkhilesh Sanikop // Walsh Hadamard Transform.
1119*09537850SAkhilesh Sanikop
1120*09537850SAkhilesh Sanikop template <typename Residual>
Wht4_C(void * dest,int8_t shift)1121*09537850SAkhilesh Sanikop void Wht4_C(void* dest, int8_t shift) {
1122*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
1123*09537850SAkhilesh Sanikop Residual temp[4];
1124*09537850SAkhilesh Sanikop temp[0] = dst[0] >> shift;
1125*09537850SAkhilesh Sanikop temp[2] = dst[1] >> shift;
1126*09537850SAkhilesh Sanikop temp[3] = dst[2] >> shift;
1127*09537850SAkhilesh Sanikop temp[1] = dst[3] >> shift;
1128*09537850SAkhilesh Sanikop temp[0] += temp[2];
1129*09537850SAkhilesh Sanikop temp[3] -= temp[1];
1130*09537850SAkhilesh Sanikop // This signed right shift must be an arithmetic shift.
1131*09537850SAkhilesh Sanikop Residual e = (temp[0] - temp[3]) >> 1;
1132*09537850SAkhilesh Sanikop dst[1] = e - temp[1];
1133*09537850SAkhilesh Sanikop dst[2] = e - temp[2];
1134*09537850SAkhilesh Sanikop dst[0] = temp[0] - dst[1];
1135*09537850SAkhilesh Sanikop dst[3] = temp[3] + dst[2];
1136*09537850SAkhilesh Sanikop }
1137*09537850SAkhilesh Sanikop
1138*09537850SAkhilesh Sanikop template <int bitdepth, typename Residual>
Wht4DcOnly_C(void * dest,int8_t range,bool,int,bool)1139*09537850SAkhilesh Sanikop void Wht4DcOnly_C(void* dest, int8_t range, bool /*should_round*/,
1140*09537850SAkhilesh Sanikop int /*row_shift*/, bool /*is_row*/) {
1141*09537850SAkhilesh Sanikop auto* const dst = static_cast<Residual*>(dest);
1142*09537850SAkhilesh Sanikop const int shift = range;
1143*09537850SAkhilesh Sanikop
1144*09537850SAkhilesh Sanikop Residual temp = dst[0] >> shift;
1145*09537850SAkhilesh Sanikop // This signed right shift must be an arithmetic shift.
1146*09537850SAkhilesh Sanikop Residual e = temp >> 1;
1147*09537850SAkhilesh Sanikop dst[0] = temp - e;
1148*09537850SAkhilesh Sanikop dst[1] = e;
1149*09537850SAkhilesh Sanikop dst[2] = e;
1150*09537850SAkhilesh Sanikop dst[3] = e;
1151*09537850SAkhilesh Sanikop
1152*09537850SAkhilesh Sanikop ClampIntermediate<bitdepth, Residual>(dst, 4);
1153*09537850SAkhilesh Sanikop }
1154*09537850SAkhilesh Sanikop
1155*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
1156*09537850SAkhilesh Sanikop // row/column transform loop
1157*09537850SAkhilesh Sanikop
1158*09537850SAkhilesh Sanikop using InverseTransform1dFunc = void (*)(void* dst, int8_t range);
1159*09537850SAkhilesh Sanikop using InverseTransformDcOnlyFunc = void (*)(void* dest, int8_t range,
1160*09537850SAkhilesh Sanikop bool should_round, int row_shift,
1161*09537850SAkhilesh Sanikop bool is_row);
1162*09537850SAkhilesh Sanikop
1163*09537850SAkhilesh Sanikop template <int bitdepth, typename Residual, typename Pixel,
1164*09537850SAkhilesh Sanikop Transform1d transform1d_type,
1165*09537850SAkhilesh Sanikop InverseTransformDcOnlyFunc dconly_transform1d,
1166*09537850SAkhilesh Sanikop InverseTransform1dFunc transform1d_func, bool is_row>
TransformLoop_C(TransformType tx_type,TransformSize tx_size,int adjusted_tx_height,void * LIBGAV1_RESTRICT src_buffer,int start_x,int start_y,void * LIBGAV1_RESTRICT dst_frame)1167*09537850SAkhilesh Sanikop void TransformLoop_C(TransformType tx_type, TransformSize tx_size,
1168*09537850SAkhilesh Sanikop int adjusted_tx_height, void* LIBGAV1_RESTRICT src_buffer,
1169*09537850SAkhilesh Sanikop int start_x, int start_y,
1170*09537850SAkhilesh Sanikop void* LIBGAV1_RESTRICT dst_frame) {
1171*09537850SAkhilesh Sanikop constexpr bool lossless = transform1d_type == kTransform1dWht;
1172*09537850SAkhilesh Sanikop constexpr bool is_identity = transform1d_type == kTransform1dIdentity;
1173*09537850SAkhilesh Sanikop // The transform size of the WHT is always 4x4. Setting tx_width and
1174*09537850SAkhilesh Sanikop // tx_height to the constant 4 for the WHT speeds the code up.
1175*09537850SAkhilesh Sanikop assert(!lossless || tx_size == kTransformSize4x4);
1176*09537850SAkhilesh Sanikop const int tx_width = lossless ? 4 : kTransformWidth[tx_size];
1177*09537850SAkhilesh Sanikop const int tx_height = lossless ? 4 : kTransformHeight[tx_size];
1178*09537850SAkhilesh Sanikop const int tx_width_log2 = kTransformWidthLog2[tx_size];
1179*09537850SAkhilesh Sanikop const int tx_height_log2 = kTransformHeightLog2[tx_size];
1180*09537850SAkhilesh Sanikop auto* frame = static_cast<Array2DView<Pixel>*>(dst_frame);
1181*09537850SAkhilesh Sanikop
1182*09537850SAkhilesh Sanikop // Initially this points to the dequantized values. After the transforms are
1183*09537850SAkhilesh Sanikop // applied, this buffer contains the residual.
1184*09537850SAkhilesh Sanikop Array2DView<Residual> residual(tx_height, tx_width,
1185*09537850SAkhilesh Sanikop static_cast<Residual*>(src_buffer));
1186*09537850SAkhilesh Sanikop
1187*09537850SAkhilesh Sanikop if (is_row) {
1188*09537850SAkhilesh Sanikop // Row transform.
1189*09537850SAkhilesh Sanikop const uint8_t row_shift = lossless ? 0 : kTransformRowShift[tx_size];
1190*09537850SAkhilesh Sanikop // This is the |range| parameter of the InverseTransform1dFunc. For lossy
1191*09537850SAkhilesh Sanikop // transforms, this will be equal to the clamping range.
1192*09537850SAkhilesh Sanikop const int8_t row_clamp_range = lossless ? 2 : (bitdepth + 8);
1193*09537850SAkhilesh Sanikop // If the width:height ratio of the transform size is 2:1 or 1:2, multiply
1194*09537850SAkhilesh Sanikop // the input to the row transform by 1 / sqrt(2), which is approximated by
1195*09537850SAkhilesh Sanikop // the fraction 2896 / 2^12.
1196*09537850SAkhilesh Sanikop const bool should_round = std::abs(tx_width_log2 - tx_height_log2) == 1;
1197*09537850SAkhilesh Sanikop
1198*09537850SAkhilesh Sanikop if (adjusted_tx_height == 1) {
1199*09537850SAkhilesh Sanikop dconly_transform1d(residual[0], row_clamp_range, should_round, row_shift,
1200*09537850SAkhilesh Sanikop true);
1201*09537850SAkhilesh Sanikop return;
1202*09537850SAkhilesh Sanikop }
1203*09537850SAkhilesh Sanikop
1204*09537850SAkhilesh Sanikop // Row transforms need to be done only up to 32 because the rest of the rows
1205*09537850SAkhilesh Sanikop // are always all zero if |tx_height| is 64. Otherwise, only process the
1206*09537850SAkhilesh Sanikop // rows that have a non zero coefficients.
1207*09537850SAkhilesh Sanikop for (int i = 0; i < adjusted_tx_height; ++i) {
1208*09537850SAkhilesh Sanikop // If lossless, the transform size is 4x4, so should_round is false.
1209*09537850SAkhilesh Sanikop if (!lossless && should_round) {
1210*09537850SAkhilesh Sanikop // The last 32 values of every row are always zero if the |tx_width| is
1211*09537850SAkhilesh Sanikop // 64.
1212*09537850SAkhilesh Sanikop for (int j = 0; j < std::min(tx_width, 32); ++j) {
1213*09537850SAkhilesh Sanikop residual[i][j] = RightShiftWithRounding(
1214*09537850SAkhilesh Sanikop residual[i][j] * kTransformRowMultiplier, 12);
1215*09537850SAkhilesh Sanikop }
1216*09537850SAkhilesh Sanikop }
1217*09537850SAkhilesh Sanikop // For identity transform, |transform1d_func| also performs the
1218*09537850SAkhilesh Sanikop // Round2(T[j], rowShift) call in the spec.
1219*09537850SAkhilesh Sanikop transform1d_func(residual[i], is_identity ? row_shift : row_clamp_range);
1220*09537850SAkhilesh Sanikop if (!lossless && !is_identity && row_shift > 0) {
1221*09537850SAkhilesh Sanikop for (int j = 0; j < tx_width; ++j) {
1222*09537850SAkhilesh Sanikop residual[i][j] = RightShiftWithRounding(residual[i][j], row_shift);
1223*09537850SAkhilesh Sanikop }
1224*09537850SAkhilesh Sanikop }
1225*09537850SAkhilesh Sanikop
1226*09537850SAkhilesh Sanikop ClampIntermediate<bitdepth, Residual>(residual[i], tx_width);
1227*09537850SAkhilesh Sanikop }
1228*09537850SAkhilesh Sanikop return;
1229*09537850SAkhilesh Sanikop }
1230*09537850SAkhilesh Sanikop
1231*09537850SAkhilesh Sanikop assert(!is_row);
1232*09537850SAkhilesh Sanikop constexpr uint8_t column_shift = lossless ? 0 : kTransformColumnShift;
1233*09537850SAkhilesh Sanikop // This is the |range| parameter of the InverseTransform1dFunc. For lossy
1234*09537850SAkhilesh Sanikop // transforms, this will be equal to the clamping range.
1235*09537850SAkhilesh Sanikop const int8_t column_clamp_range = lossless ? 0 : std::max(bitdepth + 6, 16);
1236*09537850SAkhilesh Sanikop const bool flip_rows = transform1d_type == kTransform1dAdst &&
1237*09537850SAkhilesh Sanikop kTransformFlipRowsMask.Contains(tx_type);
1238*09537850SAkhilesh Sanikop const bool flip_columns =
1239*09537850SAkhilesh Sanikop !lossless && kTransformFlipColumnsMask.Contains(tx_type);
1240*09537850SAkhilesh Sanikop const int min_value = 0;
1241*09537850SAkhilesh Sanikop const int max_value = (1 << bitdepth) - 1;
1242*09537850SAkhilesh Sanikop // Note: 64 is the maximum size of a 1D transform buffer (the largest
1243*09537850SAkhilesh Sanikop // transform size is kTransformSize64x64).
1244*09537850SAkhilesh Sanikop Residual tx_buffer[64];
1245*09537850SAkhilesh Sanikop for (int j = 0; j < tx_width; ++j) {
1246*09537850SAkhilesh Sanikop const int flipped_j = flip_columns ? tx_width - j - 1 : j;
1247*09537850SAkhilesh Sanikop int i = 0;
1248*09537850SAkhilesh Sanikop do {
1249*09537850SAkhilesh Sanikop tx_buffer[i] = residual[i][flipped_j];
1250*09537850SAkhilesh Sanikop } while (++i != tx_height);
1251*09537850SAkhilesh Sanikop if (adjusted_tx_height == 1) {
1252*09537850SAkhilesh Sanikop dconly_transform1d(tx_buffer, column_clamp_range, false, 0, false);
1253*09537850SAkhilesh Sanikop } else {
1254*09537850SAkhilesh Sanikop // For identity transform, |transform1d_func| also performs the
1255*09537850SAkhilesh Sanikop // Round2(T[i], colShift) call in the spec.
1256*09537850SAkhilesh Sanikop transform1d_func(tx_buffer,
1257*09537850SAkhilesh Sanikop is_identity ? column_shift : column_clamp_range);
1258*09537850SAkhilesh Sanikop }
1259*09537850SAkhilesh Sanikop const int x = start_x + j;
1260*09537850SAkhilesh Sanikop for (int i = 0; i < tx_height; ++i) {
1261*09537850SAkhilesh Sanikop const int y = start_y + i;
1262*09537850SAkhilesh Sanikop const int index = flip_rows ? tx_height - i - 1 : i;
1263*09537850SAkhilesh Sanikop Residual residual_value = tx_buffer[index];
1264*09537850SAkhilesh Sanikop if (!lossless && !is_identity) {
1265*09537850SAkhilesh Sanikop residual_value = RightShiftWithRounding(residual_value, column_shift);
1266*09537850SAkhilesh Sanikop }
1267*09537850SAkhilesh Sanikop (*frame)[y][x] =
1268*09537850SAkhilesh Sanikop Clip3((*frame)[y][x] + residual_value, min_value, max_value);
1269*09537850SAkhilesh Sanikop }
1270*09537850SAkhilesh Sanikop }
1271*09537850SAkhilesh Sanikop }
1272*09537850SAkhilesh Sanikop
1273*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
1274*09537850SAkhilesh Sanikop
1275*09537850SAkhilesh Sanikop #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1276*09537850SAkhilesh Sanikop template <int bitdepth, typename Residual, typename Pixel>
InitAll(Dsp * const dsp)1277*09537850SAkhilesh Sanikop void InitAll(Dsp* const dsp) {
1278*09537850SAkhilesh Sanikop // Maximum transform size for Dct is 64.
1279*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1280*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1281*09537850SAkhilesh Sanikop DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1282*09537850SAkhilesh Sanikop /*is_row=*/true>;
1283*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1284*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1285*09537850SAkhilesh Sanikop DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1286*09537850SAkhilesh Sanikop /*is_row=*/false>;
1287*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1288*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1289*09537850SAkhilesh Sanikop DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1290*09537850SAkhilesh Sanikop /*is_row=*/true>;
1291*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1292*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1293*09537850SAkhilesh Sanikop DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1294*09537850SAkhilesh Sanikop /*is_row=*/false>;
1295*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1296*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1297*09537850SAkhilesh Sanikop DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1298*09537850SAkhilesh Sanikop /*is_row=*/true>;
1299*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1300*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1301*09537850SAkhilesh Sanikop DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1302*09537850SAkhilesh Sanikop /*is_row=*/false>;
1303*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1304*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1305*09537850SAkhilesh Sanikop DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1306*09537850SAkhilesh Sanikop /*is_row=*/true>;
1307*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1308*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1309*09537850SAkhilesh Sanikop DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1310*09537850SAkhilesh Sanikop /*is_row=*/false>;
1311*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1312*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1313*09537850SAkhilesh Sanikop DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1314*09537850SAkhilesh Sanikop /*is_row=*/true>;
1315*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1316*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1317*09537850SAkhilesh Sanikop DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1318*09537850SAkhilesh Sanikop /*is_row=*/false>;
1319*09537850SAkhilesh Sanikop
1320*09537850SAkhilesh Sanikop // Maximum transform size for Adst is 16.
1321*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1322*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1323*09537850SAkhilesh Sanikop Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1324*09537850SAkhilesh Sanikop /*is_row=*/true>;
1325*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1326*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1327*09537850SAkhilesh Sanikop Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1328*09537850SAkhilesh Sanikop /*is_row=*/false>;
1329*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1330*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1331*09537850SAkhilesh Sanikop Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1332*09537850SAkhilesh Sanikop /*is_row=*/true>;
1333*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1334*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1335*09537850SAkhilesh Sanikop Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1336*09537850SAkhilesh Sanikop /*is_row=*/false>;
1337*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1338*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1339*09537850SAkhilesh Sanikop Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1340*09537850SAkhilesh Sanikop /*is_row=*/true>;
1341*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1342*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1343*09537850SAkhilesh Sanikop Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1344*09537850SAkhilesh Sanikop /*is_row=*/false>;
1345*09537850SAkhilesh Sanikop
1346*09537850SAkhilesh Sanikop // Maximum transform size for Identity transform is 32.
1347*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1348*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1349*09537850SAkhilesh Sanikop Identity4DcOnly_C<bitdepth, Residual>,
1350*09537850SAkhilesh Sanikop Identity4Row_C<Residual>, /*is_row=*/true>;
1351*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1352*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1353*09537850SAkhilesh Sanikop Identity4DcOnly_C<bitdepth, Residual>,
1354*09537850SAkhilesh Sanikop Identity4Column_C<Residual>, /*is_row=*/false>;
1355*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1356*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1357*09537850SAkhilesh Sanikop Identity8DcOnly_C<bitdepth, Residual>,
1358*09537850SAkhilesh Sanikop Identity8Row_C<Residual>, /*is_row=*/true>;
1359*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1360*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1361*09537850SAkhilesh Sanikop Identity8DcOnly_C<bitdepth, Residual>,
1362*09537850SAkhilesh Sanikop Identity8Column_C<Residual>, /*is_row=*/false>;
1363*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1364*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1365*09537850SAkhilesh Sanikop Identity16DcOnly_C<bitdepth, Residual>,
1366*09537850SAkhilesh Sanikop Identity16Row_C<Residual>, /*is_row=*/true>;
1367*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1368*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1369*09537850SAkhilesh Sanikop Identity16DcOnly_C<bitdepth, Residual>,
1370*09537850SAkhilesh Sanikop Identity16Column_C<Residual>, /*is_row=*/false>;
1371*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1372*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1373*09537850SAkhilesh Sanikop Identity32DcOnly_C<bitdepth, Residual>,
1374*09537850SAkhilesh Sanikop Identity32Row_C<Residual>, /*is_row=*/true>;
1375*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1376*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1377*09537850SAkhilesh Sanikop Identity32DcOnly_C<bitdepth, Residual>,
1378*09537850SAkhilesh Sanikop Identity32Column_C<Residual>, /*is_row=*/false>;
1379*09537850SAkhilesh Sanikop
1380*09537850SAkhilesh Sanikop // Maximum transform size for Wht is 4.
1381*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1382*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dWht,
1383*09537850SAkhilesh Sanikop Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1384*09537850SAkhilesh Sanikop /*is_row=*/true>;
1385*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1386*09537850SAkhilesh Sanikop TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dWht,
1387*09537850SAkhilesh Sanikop Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1388*09537850SAkhilesh Sanikop /*is_row=*/false>;
1389*09537850SAkhilesh Sanikop }
1390*09537850SAkhilesh Sanikop #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1391*09537850SAkhilesh Sanikop
Init8bpp()1392*09537850SAkhilesh Sanikop void Init8bpp() {
1393*09537850SAkhilesh Sanikop Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
1394*09537850SAkhilesh Sanikop assert(dsp != nullptr);
1395*09537850SAkhilesh Sanikop static_cast<void>(dsp);
1396*09537850SAkhilesh Sanikop #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1397*09537850SAkhilesh Sanikop InitAll<8, int16_t, uint8_t>(dsp);
1398*09537850SAkhilesh Sanikop #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1399*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dDct
1400*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1401*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1402*09537850SAkhilesh Sanikop DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1403*09537850SAkhilesh Sanikop /*is_row=*/true>;
1404*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1405*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1406*09537850SAkhilesh Sanikop DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1407*09537850SAkhilesh Sanikop /*is_row=*/false>;
1408*09537850SAkhilesh Sanikop #endif
1409*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dDct
1410*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1411*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1412*09537850SAkhilesh Sanikop DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1413*09537850SAkhilesh Sanikop /*is_row=*/true>;
1414*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1415*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1416*09537850SAkhilesh Sanikop DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1417*09537850SAkhilesh Sanikop /*is_row=*/false>;
1418*09537850SAkhilesh Sanikop #endif
1419*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dDct
1420*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1421*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1422*09537850SAkhilesh Sanikop DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1423*09537850SAkhilesh Sanikop /*is_row=*/true>;
1424*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1425*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1426*09537850SAkhilesh Sanikop DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1427*09537850SAkhilesh Sanikop /*is_row=*/false>;
1428*09537850SAkhilesh Sanikop #endif
1429*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dDct
1430*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1431*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1432*09537850SAkhilesh Sanikop DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1433*09537850SAkhilesh Sanikop /*is_row=*/true>;
1434*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1435*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1436*09537850SAkhilesh Sanikop DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1437*09537850SAkhilesh Sanikop /*is_row=*/false>;
1438*09537850SAkhilesh Sanikop #endif
1439*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize64_Transform1dDct
1440*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1441*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1442*09537850SAkhilesh Sanikop DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1443*09537850SAkhilesh Sanikop /*is_row=*/true>;
1444*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1445*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1446*09537850SAkhilesh Sanikop DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1447*09537850SAkhilesh Sanikop /*is_row=*/false>;
1448*09537850SAkhilesh Sanikop #endif
1449*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dAdst
1450*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1451*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1452*09537850SAkhilesh Sanikop Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1453*09537850SAkhilesh Sanikop /*is_row=*/true>;
1454*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1455*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1456*09537850SAkhilesh Sanikop Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1457*09537850SAkhilesh Sanikop /*is_row=*/false>;
1458*09537850SAkhilesh Sanikop #endif
1459*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dAdst
1460*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1461*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1462*09537850SAkhilesh Sanikop Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1463*09537850SAkhilesh Sanikop /*is_row=*/true>;
1464*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1465*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1466*09537850SAkhilesh Sanikop Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1467*09537850SAkhilesh Sanikop /*is_row=*/false>;
1468*09537850SAkhilesh Sanikop #endif
1469*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dAdst
1470*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1471*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1472*09537850SAkhilesh Sanikop Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1473*09537850SAkhilesh Sanikop /*is_row=*/true>;
1474*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1475*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1476*09537850SAkhilesh Sanikop Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1477*09537850SAkhilesh Sanikop /*is_row=*/false>;
1478*09537850SAkhilesh Sanikop #endif
1479*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dIdentity
1480*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1481*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1482*09537850SAkhilesh Sanikop Identity4DcOnly_C<8, int16_t>, Identity4Row_C<int16_t>,
1483*09537850SAkhilesh Sanikop /*is_row=*/true>;
1484*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1485*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1486*09537850SAkhilesh Sanikop Identity4DcOnly_C<8, int16_t>, Identity4Column_C<int16_t>,
1487*09537850SAkhilesh Sanikop /*is_row=*/false>;
1488*09537850SAkhilesh Sanikop #endif
1489*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dIdentity
1490*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1491*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1492*09537850SAkhilesh Sanikop Identity8DcOnly_C<8, int16_t>, Identity8Row_C<int16_t>,
1493*09537850SAkhilesh Sanikop /*is_row=*/true>;
1494*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1495*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1496*09537850SAkhilesh Sanikop Identity8DcOnly_C<8, int16_t>, Identity8Column_C<int16_t>,
1497*09537850SAkhilesh Sanikop /*is_row=*/false>;
1498*09537850SAkhilesh Sanikop #endif
1499*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dIdentity
1500*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1501*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1502*09537850SAkhilesh Sanikop Identity16DcOnly_C<8, int16_t>, Identity16Row_C<int16_t>,
1503*09537850SAkhilesh Sanikop /*is_row=*/true>;
1504*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1505*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1506*09537850SAkhilesh Sanikop Identity16DcOnly_C<8, int16_t>,
1507*09537850SAkhilesh Sanikop Identity16Column_C<int16_t>, /*is_row=*/false>;
1508*09537850SAkhilesh Sanikop #endif
1509*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dIdentity
1510*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1511*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1512*09537850SAkhilesh Sanikop Identity32DcOnly_C<8, int16_t>, Identity32Row_C<int16_t>,
1513*09537850SAkhilesh Sanikop /*is_row=*/true>;
1514*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1515*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1516*09537850SAkhilesh Sanikop Identity32DcOnly_C<8, int16_t>,
1517*09537850SAkhilesh Sanikop Identity32Column_C<int16_t>, /*is_row=*/false>;
1518*09537850SAkhilesh Sanikop #endif
1519*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dWht
1520*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1521*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dWht,
1522*09537850SAkhilesh Sanikop Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1523*09537850SAkhilesh Sanikop /*is_row=*/true>;
1524*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1525*09537850SAkhilesh Sanikop TransformLoop_C<8, int16_t, uint8_t, kTransform1dWht,
1526*09537850SAkhilesh Sanikop Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1527*09537850SAkhilesh Sanikop /*is_row=*/false>;
1528*09537850SAkhilesh Sanikop #endif
1529*09537850SAkhilesh Sanikop #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1530*09537850SAkhilesh Sanikop }
1531*09537850SAkhilesh Sanikop
1532*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()1533*09537850SAkhilesh Sanikop void Init10bpp() {
1534*09537850SAkhilesh Sanikop Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
1535*09537850SAkhilesh Sanikop assert(dsp != nullptr);
1536*09537850SAkhilesh Sanikop static_cast<void>(dsp);
1537*09537850SAkhilesh Sanikop #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1538*09537850SAkhilesh Sanikop InitAll<10, int32_t, uint16_t>(dsp);
1539*09537850SAkhilesh Sanikop #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1540*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dDct
1541*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1542*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1543*09537850SAkhilesh Sanikop DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1544*09537850SAkhilesh Sanikop /*is_row=*/true>;
1545*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1546*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1547*09537850SAkhilesh Sanikop DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1548*09537850SAkhilesh Sanikop /*is_row=*/false>;
1549*09537850SAkhilesh Sanikop #endif
1550*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dDct
1551*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1552*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1553*09537850SAkhilesh Sanikop DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1554*09537850SAkhilesh Sanikop /*is_row=*/true>;
1555*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1556*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1557*09537850SAkhilesh Sanikop DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1558*09537850SAkhilesh Sanikop /*is_row=*/false>;
1559*09537850SAkhilesh Sanikop #endif
1560*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dDct
1561*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1562*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1563*09537850SAkhilesh Sanikop DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1564*09537850SAkhilesh Sanikop /*is_row=*/true>;
1565*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1566*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1567*09537850SAkhilesh Sanikop DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1568*09537850SAkhilesh Sanikop /*is_row=*/false>;
1569*09537850SAkhilesh Sanikop #endif
1570*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dDct
1571*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1572*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1573*09537850SAkhilesh Sanikop DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1574*09537850SAkhilesh Sanikop /*is_row=*/true>;
1575*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1576*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1577*09537850SAkhilesh Sanikop DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1578*09537850SAkhilesh Sanikop /*is_row=*/false>;
1579*09537850SAkhilesh Sanikop #endif
1580*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize64_Transform1dDct
1581*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1582*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1583*09537850SAkhilesh Sanikop DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1584*09537850SAkhilesh Sanikop /*is_row=*/true>;
1585*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1586*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1587*09537850SAkhilesh Sanikop DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1588*09537850SAkhilesh Sanikop /*is_row=*/false>;
1589*09537850SAkhilesh Sanikop #endif
1590*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dAdst
1591*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1592*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1593*09537850SAkhilesh Sanikop Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1594*09537850SAkhilesh Sanikop /*is_row=*/true>;
1595*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1596*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1597*09537850SAkhilesh Sanikop Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1598*09537850SAkhilesh Sanikop /*is_row=*/false>;
1599*09537850SAkhilesh Sanikop #endif
1600*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dAdst
1601*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1602*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1603*09537850SAkhilesh Sanikop Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1604*09537850SAkhilesh Sanikop /*is_row=*/true>;
1605*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1606*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1607*09537850SAkhilesh Sanikop Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1608*09537850SAkhilesh Sanikop /*is_row=*/false>;
1609*09537850SAkhilesh Sanikop #endif
1610*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dAdst
1611*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1612*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1613*09537850SAkhilesh Sanikop Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1614*09537850SAkhilesh Sanikop /*is_row=*/true>;
1615*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1616*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1617*09537850SAkhilesh Sanikop Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1618*09537850SAkhilesh Sanikop /*is_row=*/false>;
1619*09537850SAkhilesh Sanikop #endif
1620*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dIdentity
1621*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1622*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1623*09537850SAkhilesh Sanikop Identity4DcOnly_C<10, int32_t>, Identity4Row_C<int32_t>,
1624*09537850SAkhilesh Sanikop /*is_row=*/true>;
1625*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1626*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1627*09537850SAkhilesh Sanikop Identity4DcOnly_C<10, int32_t>,
1628*09537850SAkhilesh Sanikop Identity4Column_C<int32_t>, /*is_row=*/false>;
1629*09537850SAkhilesh Sanikop #endif
1630*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dIdentity
1631*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1632*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1633*09537850SAkhilesh Sanikop Identity8DcOnly_C<10, int32_t>, Identity8Row_C<int32_t>,
1634*09537850SAkhilesh Sanikop /*is_row=*/true>;
1635*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1636*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1637*09537850SAkhilesh Sanikop Identity8DcOnly_C<10, int32_t>,
1638*09537850SAkhilesh Sanikop Identity8Column_C<int32_t>, /*is_row=*/false>;
1639*09537850SAkhilesh Sanikop #endif
1640*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dIdentity
1641*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1642*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1643*09537850SAkhilesh Sanikop Identity16DcOnly_C<10, int32_t>, Identity16Row_C<int32_t>,
1644*09537850SAkhilesh Sanikop /*is_row=*/true>;
1645*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1646*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1647*09537850SAkhilesh Sanikop Identity16DcOnly_C<10, int32_t>,
1648*09537850SAkhilesh Sanikop Identity16Column_C<int32_t>, /*is_row=*/false>;
1649*09537850SAkhilesh Sanikop #endif
1650*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dIdentity
1651*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1652*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1653*09537850SAkhilesh Sanikop Identity32DcOnly_C<10, int32_t>, Identity32Row_C<int32_t>,
1654*09537850SAkhilesh Sanikop /*is_row=*/true>;
1655*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1656*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1657*09537850SAkhilesh Sanikop Identity32DcOnly_C<10, int32_t>,
1658*09537850SAkhilesh Sanikop Identity32Column_C<int32_t>, /*is_row=*/false>;
1659*09537850SAkhilesh Sanikop #endif
1660*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dWht
1661*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1662*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dWht,
1663*09537850SAkhilesh Sanikop Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1664*09537850SAkhilesh Sanikop /*is_row=*/true>;
1665*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1666*09537850SAkhilesh Sanikop TransformLoop_C<10, int32_t, uint16_t, kTransform1dWht,
1667*09537850SAkhilesh Sanikop Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1668*09537850SAkhilesh Sanikop /*is_row=*/false>;
1669*09537850SAkhilesh Sanikop #endif
1670*09537850SAkhilesh Sanikop #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1671*09537850SAkhilesh Sanikop }
1672*09537850SAkhilesh Sanikop #endif // LIBGAV1_MAX_BITDEPTH >= 10
1673*09537850SAkhilesh Sanikop
1674*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH == 12
Init12bpp()1675*09537850SAkhilesh Sanikop void Init12bpp() {
1676*09537850SAkhilesh Sanikop Dsp* const dsp = dsp_internal::GetWritableDspTable(12);
1677*09537850SAkhilesh Sanikop assert(dsp != nullptr);
1678*09537850SAkhilesh Sanikop static_cast<void>(dsp);
1679*09537850SAkhilesh Sanikop #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1680*09537850SAkhilesh Sanikop InitAll<12, int32_t, uint16_t>(dsp);
1681*09537850SAkhilesh Sanikop #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1682*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize4_Transform1dDct
1683*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1684*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1685*09537850SAkhilesh Sanikop DctDcOnly_C<12, int32_t, 2>, Dct_C<int32_t, 2>,
1686*09537850SAkhilesh Sanikop /*is_row=*/true>;
1687*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1688*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1689*09537850SAkhilesh Sanikop DctDcOnly_C<12, int32_t, 2>, Dct_C<int32_t, 2>,
1690*09537850SAkhilesh Sanikop /*is_row=*/false>;
1691*09537850SAkhilesh Sanikop #endif
1692*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize8_Transform1dDct
1693*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1694*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1695*09537850SAkhilesh Sanikop DctDcOnly_C<12, int32_t, 3>, Dct_C<int32_t, 3>,
1696*09537850SAkhilesh Sanikop /*is_row=*/true>;
1697*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1698*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1699*09537850SAkhilesh Sanikop DctDcOnly_C<12, int32_t, 3>, Dct_C<int32_t, 3>,
1700*09537850SAkhilesh Sanikop /*is_row=*/false>;
1701*09537850SAkhilesh Sanikop #endif
1702*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize16_Transform1dDct
1703*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1704*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1705*09537850SAkhilesh Sanikop DctDcOnly_C<12, int32_t, 4>, Dct_C<int32_t, 4>,
1706*09537850SAkhilesh Sanikop /*is_row=*/true>;
1707*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1708*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1709*09537850SAkhilesh Sanikop DctDcOnly_C<12, int32_t, 4>, Dct_C<int32_t, 4>,
1710*09537850SAkhilesh Sanikop /*is_row=*/false>;
1711*09537850SAkhilesh Sanikop #endif
1712*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize32_Transform1dDct
1713*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1714*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1715*09537850SAkhilesh Sanikop DctDcOnly_C<12, int32_t, 5>, Dct_C<int32_t, 5>,
1716*09537850SAkhilesh Sanikop /*is_row=*/true>;
1717*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1718*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1719*09537850SAkhilesh Sanikop DctDcOnly_C<12, int32_t, 5>, Dct_C<int32_t, 5>,
1720*09537850SAkhilesh Sanikop /*is_row=*/false>;
1721*09537850SAkhilesh Sanikop #endif
1722*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize64_Transform1dDct
1723*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1724*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1725*09537850SAkhilesh Sanikop DctDcOnly_C<12, int32_t, 6>, Dct_C<int32_t, 6>,
1726*09537850SAkhilesh Sanikop /*is_row=*/true>;
1727*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1728*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1729*09537850SAkhilesh Sanikop DctDcOnly_C<12, int32_t, 6>, Dct_C<int32_t, 6>,
1730*09537850SAkhilesh Sanikop /*is_row=*/false>;
1731*09537850SAkhilesh Sanikop #endif
1732*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize4_Transform1dAdst
1733*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1734*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1735*09537850SAkhilesh Sanikop Adst4DcOnly_C<12, int32_t>, Adst4_C<int32_t>,
1736*09537850SAkhilesh Sanikop /*is_row=*/true>;
1737*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1738*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1739*09537850SAkhilesh Sanikop Adst4DcOnly_C<12, int32_t>, Adst4_C<int32_t>,
1740*09537850SAkhilesh Sanikop /*is_row=*/false>;
1741*09537850SAkhilesh Sanikop #endif
1742*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize8_Transform1dAdst
1743*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1744*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1745*09537850SAkhilesh Sanikop Adst8DcOnly_C<12, int32_t>, Adst8_C<int32_t>,
1746*09537850SAkhilesh Sanikop /*is_row=*/true>;
1747*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1748*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1749*09537850SAkhilesh Sanikop Adst8DcOnly_C<12, int32_t>, Adst8_C<int32_t>,
1750*09537850SAkhilesh Sanikop /*is_row=*/false>;
1751*09537850SAkhilesh Sanikop #endif
1752*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize16_Transform1dAdst
1753*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1754*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1755*09537850SAkhilesh Sanikop Adst16DcOnly_C<12, int32_t>, Adst16_C<int32_t>,
1756*09537850SAkhilesh Sanikop /*is_row=*/true>;
1757*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1758*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1759*09537850SAkhilesh Sanikop Adst16DcOnly_C<12, int32_t>, Adst16_C<int32_t>,
1760*09537850SAkhilesh Sanikop /*is_row=*/false>;
1761*09537850SAkhilesh Sanikop #endif
1762*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize4_Transform1dIdentity
1763*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1764*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1765*09537850SAkhilesh Sanikop Identity4DcOnly_C<12, int32_t>, Identity4Row_C<int32_t>,
1766*09537850SAkhilesh Sanikop /*is_row=*/true>;
1767*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1768*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1769*09537850SAkhilesh Sanikop Identity4DcOnly_C<12, int32_t>,
1770*09537850SAkhilesh Sanikop Identity4Column_C<int32_t>, /*is_row=*/false>;
1771*09537850SAkhilesh Sanikop #endif
1772*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize8_Transform1dIdentity
1773*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1774*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1775*09537850SAkhilesh Sanikop Identity8DcOnly_C<12, int32_t>, Identity8Row_C<int32_t>,
1776*09537850SAkhilesh Sanikop /*is_row=*/true>;
1777*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1778*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1779*09537850SAkhilesh Sanikop Identity8DcOnly_C<12, int32_t>,
1780*09537850SAkhilesh Sanikop Identity8Column_C<int32_t>, /*is_row=*/false>;
1781*09537850SAkhilesh Sanikop #endif
1782*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize16_Transform1dIdentity
1783*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1784*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1785*09537850SAkhilesh Sanikop Identity16DcOnly_C<12, int32_t>, Identity16Row_C<int32_t>,
1786*09537850SAkhilesh Sanikop /*is_row=*/true>;
1787*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1788*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1789*09537850SAkhilesh Sanikop Identity16DcOnly_C<12, int32_t>,
1790*09537850SAkhilesh Sanikop Identity16Column_C<int32_t>, /*is_row=*/false>;
1791*09537850SAkhilesh Sanikop #endif
1792*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize32_Transform1dIdentity
1793*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1794*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1795*09537850SAkhilesh Sanikop Identity32DcOnly_C<12, int32_t>, Identity32Row_C<int32_t>,
1796*09537850SAkhilesh Sanikop /*is_row=*/true>;
1797*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1798*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1799*09537850SAkhilesh Sanikop Identity32DcOnly_C<12, int32_t>,
1800*09537850SAkhilesh Sanikop Identity32Column_C<int32_t>, /*is_row=*/false>;
1801*09537850SAkhilesh Sanikop #endif
1802*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Transform1dSize4_Transform1dWht
1803*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1804*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dWht,
1805*09537850SAkhilesh Sanikop Wht4DcOnly_C<12, int32_t>, Wht4_C<int32_t>,
1806*09537850SAkhilesh Sanikop /*is_row=*/true>;
1807*09537850SAkhilesh Sanikop dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1808*09537850SAkhilesh Sanikop TransformLoop_C<12, int32_t, uint16_t, kTransform1dWht,
1809*09537850SAkhilesh Sanikop Wht4DcOnly_C<12, int32_t>, Wht4_C<int32_t>,
1810*09537850SAkhilesh Sanikop /*is_row=*/false>;
1811*09537850SAkhilesh Sanikop #endif
1812*09537850SAkhilesh Sanikop #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1813*09537850SAkhilesh Sanikop }
1814*09537850SAkhilesh Sanikop #endif // LIBGAV1_MAX_BITDEPTH == 12
1815*09537850SAkhilesh Sanikop
1816*09537850SAkhilesh Sanikop } // namespace
1817*09537850SAkhilesh Sanikop
InverseTransformInit_C()1818*09537850SAkhilesh Sanikop void InverseTransformInit_C() {
1819*09537850SAkhilesh Sanikop Init8bpp();
1820*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
1821*09537850SAkhilesh Sanikop Init10bpp();
1822*09537850SAkhilesh Sanikop #endif
1823*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH == 12
1824*09537850SAkhilesh Sanikop Init12bpp();
1825*09537850SAkhilesh Sanikop #endif
1826*09537850SAkhilesh Sanikop
1827*09537850SAkhilesh Sanikop // Local functions that may be unused depending on the optimizations
1828*09537850SAkhilesh Sanikop // available.
1829*09537850SAkhilesh Sanikop static_cast<void>(kBitReverseLookup);
1830*09537850SAkhilesh Sanikop }
1831*09537850SAkhilesh Sanikop
1832*09537850SAkhilesh Sanikop } // namespace dsp
1833*09537850SAkhilesh Sanikop } // namespace libgav1
1834