1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/inverse_transform.h"
16
17 #include <algorithm>
18 #include <cassert>
19 #include <cstdint>
20 #include <cstring>
21 #include <type_traits>
22
23 #include "src/dsp/dsp.h"
24 #include "src/utils/array_2d.h"
25 #include "src/utils/common.h"
26 #include "src/utils/compiler_attributes.h"
27 #include "src/utils/logging.h"
28
29 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
30 #undef LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
31 #endif
32
33 #if defined(LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK) && \
34 LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
35 #include <cinttypes>
36 #endif
37
38 namespace libgav1 {
39 namespace dsp {
40 namespace {
41
42 // Include the constants and utility functions inside the anonymous namespace.
43 #include "src/dsp/inverse_transform.inc"
44
45 constexpr uint8_t kTransformColumnShift = 4;
46
47 template <typename T>
RangeCheckValue(T value,int8_t range)48 int32_t RangeCheckValue(T value, int8_t range) {
49 #if defined(LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK) && \
50 LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
51 static_assert(
52 std::is_same<T, int32_t>::value || std::is_same<T, std::int64_t>::value,
53 "");
54 assert(range <= 32);
55 const auto min = static_cast<int32_t>(-(uint32_t{1} << (range - 1)));
56 const auto max = static_cast<int32_t>((uint32_t{1} << (range - 1)) - 1);
57 if (min > value || value > max) {
58 LIBGAV1_DLOG(ERROR,
59 "coeff out of bit range, value: %" PRId64 " bit range %d",
60 static_cast<int64_t>(value), range);
61 assert(min <= value && value <= max);
62 }
63 #endif // LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
64 static_cast<void>(range);
65 return static_cast<int32_t>(value);
66 }
67
68 template <typename Residual>
ButterflyRotation_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)69 LIBGAV1_ALWAYS_INLINE void ButterflyRotation_C(Residual* const dst, int a,
70 int b, int angle, bool flip,
71 int8_t range) {
72 // Note that we multiply in 32 bits and then add/subtract the products in 64
73 // bits. The 32-bit multiplications do not overflow. Please see the comment
74 // and assert() in Cos128().
75 const int64_t x = static_cast<int64_t>(dst[a] * Cos128(angle)) -
76 static_cast<int64_t>(dst[b] * Sin128(angle));
77 const int64_t y = static_cast<int64_t>(dst[a] * Sin128(angle)) +
78 static_cast<int64_t>(dst[b] * Cos128(angle));
79 // Section 7.13.2.1: It is a requirement of bitstream conformance that the
80 // values saved into the array T by this function are representable by a
81 // signed integer using |range| bits of precision.
82 dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
83 dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
84 }
85
86 template <typename Residual>
ButterflyRotationFirstIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)87 void ButterflyRotationFirstIsZero_C(Residual* const dst, int a, int b,
88 int angle, bool flip, int8_t range) {
89 // Note that we multiply in 32 bits and then add/subtract the products in 64
90 // bits. The 32-bit multiplications do not overflow. Please see the comment
91 // and assert() in Cos128().
92 const auto x = static_cast<int64_t>(dst[b] * -Sin128(angle));
93 const auto y = static_cast<int64_t>(dst[b] * Cos128(angle));
94 // Section 7.13.2.1: It is a requirement of bitstream conformance that the
95 // values saved into the array T by this function are representable by a
96 // signed integer using |range| bits of precision.
97 dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
98 dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
99 }
100
101 template <typename Residual>
ButterflyRotationSecondIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)102 void ButterflyRotationSecondIsZero_C(Residual* const dst, int a, int b,
103 int angle, bool flip, int8_t range) {
104 // Note that we multiply in 32 bits and then add/subtract the products in 64
105 // bits. The 32-bit multiplications do not overflow. Please see the comment
106 // and assert() in Cos128().
107 const auto x = static_cast<int64_t>(dst[a] * Cos128(angle));
108 const auto y = static_cast<int64_t>(dst[a] * Sin128(angle));
109
110 // Section 7.13.2.1: It is a requirement of bitstream conformance that the
111 // values saved into the array T by this function are representable by a
112 // signed integer using |range| bits of precision.
113 dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
114 dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
115 }
116
117 template <typename Residual>
HadamardRotation_C(Residual * const dst,int a,int b,bool flip,int8_t range)118 void HadamardRotation_C(Residual* const dst, int a, int b, bool flip,
119 int8_t range) {
120 if (flip) std::swap(a, b);
121 --range;
122 // For Adst and Dct, the maximum possible value for range is 20. So min and
123 // max should always fit into int32_t.
124 const int32_t min = -(1 << range);
125 const int32_t max = (1 << range) - 1;
126 const int32_t x = dst[a] + dst[b];
127 const int32_t y = dst[a] - dst[b];
128 dst[a] = Clip3(x, min, max);
129 dst[b] = Clip3(y, min, max);
130 }
131
132 template <int bitdepth, typename Residual>
ClampIntermediate(Residual * const dst,int size)133 void ClampIntermediate(Residual* const dst, int size) {
134 // If Residual is int16_t (which implies bitdepth is 8), we don't need to
135 // clip residual[i][j] to 16 bits.
136 if (sizeof(Residual) > 2) {
137 const Residual intermediate_clamp_max =
138 (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
139 const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
140 for (int j = 0; j < size; ++j) {
141 dst[j] = Clip3(dst[j], intermediate_clamp_min, intermediate_clamp_max);
142 }
143 }
144 }
145
146 //------------------------------------------------------------------------------
147 // Discrete Cosine Transforms (DCT).
148
149 // Value for index (i, j) is computed as bitreverse(j) and interpreting that as
150 // an integer with bit-length i + 2.
151 // For e.g. index (2, 3) will be computed as follows:
152 // * bitreverse(3) = bitreverse(..000011) = 110000...
153 // * interpreting that as an integer with bit-length 2+2 = 4 will be 1100 = 12
154 constexpr uint8_t kBitReverseLookup[kNumTransform1dSizes][64] = {
155 {0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2,
156 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3,
157 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3},
158 {0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5,
159 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6,
160 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7},
161 {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
162 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
163 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
164 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15},
165 {0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
166 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31,
167 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
168 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31},
169 {0, 32, 16, 48, 8, 40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
170 2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
171 1, 33, 17, 49, 9, 41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
172 3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63}};
173
174 template <typename Residual, int size_log2>
Dct_C(void * dest,int8_t range)175 void Dct_C(void* dest, int8_t range) {
176 static_assert(size_log2 >= 2 && size_log2 <= 6, "");
177 auto* const dst = static_cast<Residual*>(dest);
178 // stage 1.
179 const int size = 1 << size_log2;
180 Residual temp[size];
181 memcpy(temp, dst, sizeof(temp));
182 for (int i = 0; i < size; ++i) {
183 dst[i] = temp[kBitReverseLookup[size_log2 - 2][i]];
184 }
185 // stages 2-32 are dependent on the value of size_log2.
186 // stage 2.
187 if (size_log2 == 6) {
188 for (int i = 0; i < 16; ++i) {
189 ButterflyRotation_C(dst, i + 32, 63 - i,
190 63 - MultiplyBy4(kBitReverseLookup[2][i]), false,
191 range);
192 }
193 }
194 // stage 3
195 if (size_log2 >= 5) {
196 for (int i = 0; i < 8; ++i) {
197 ButterflyRotation_C(dst, i + 16, 31 - i,
198 6 + MultiplyBy8(kBitReverseLookup[1][7 - i]), false,
199 range);
200 }
201 }
202 // stage 4.
203 if (size_log2 == 6) {
204 for (int i = 0; i < 16; ++i) {
205 HadamardRotation_C(dst, MultiplyBy2(i) + 32, MultiplyBy2(i) + 33,
206 static_cast<bool>(i & 1), range);
207 }
208 }
209 // stage 5.
210 if (size_log2 >= 4) {
211 for (int i = 0; i < 4; ++i) {
212 ButterflyRotation_C(dst, i + 8, 15 - i,
213 12 + MultiplyBy16(kBitReverseLookup[0][3 - i]), false,
214 range);
215 }
216 }
217 // stage 6.
218 if (size_log2 >= 5) {
219 for (int i = 0; i < 8; ++i) {
220 HadamardRotation_C(dst, MultiplyBy2(i) + 16, MultiplyBy2(i) + 17,
221 static_cast<bool>(i & 1), range);
222 }
223 }
224 // stage 7.
225 if (size_log2 == 6) {
226 for (int i = 0; i < 4; ++i) {
227 for (int j = 0; j < 2; ++j) {
228 ButterflyRotation_C(
229 dst, 62 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 33,
230 60 - MultiplyBy16(kBitReverseLookup[0][i]) + MultiplyBy64(j), true,
231 range);
232 }
233 }
234 }
235 // stage 8.
236 if (size_log2 >= 3) {
237 for (int i = 0; i < 2; ++i) {
238 ButterflyRotation_C(dst, i + 4, 7 - i, 56 - 32 * i, false, range);
239 }
240 }
241 // stage 9.
242 if (size_log2 >= 4) {
243 for (int i = 0; i < 4; ++i) {
244 HadamardRotation_C(dst, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
245 static_cast<bool>(i & 1), range);
246 }
247 }
248 // stage 10.
249 if (size_log2 >= 5) {
250 for (int i = 0; i < 2; ++i) {
251 for (int j = 0; j < 2; ++j) {
252 ButterflyRotation_C(
253 dst, 30 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 17,
254 24 + MultiplyBy64(j) + MultiplyBy32(1 - i), true, range);
255 }
256 }
257 }
258 // stage 11.
259 if (size_log2 == 6) {
260 for (int i = 0; i < 8; ++i) {
261 for (int j = 0; j < 2; ++j) {
262 HadamardRotation_C(dst, MultiplyBy4(i) + j + 32,
263 MultiplyBy4(i) - j + 35, static_cast<bool>(i & 1),
264 range);
265 }
266 }
267 }
268 // stage 12.
269 for (int i = 0; i < 2; ++i) {
270 ButterflyRotation_C(dst, MultiplyBy2(i), MultiplyBy2(i) + 1, 32 + 16 * i,
271 i == 0, range);
272 }
273 // stage 13.
274 if (size_log2 >= 3) {
275 for (int i = 0; i < 2; ++i) {
276 HadamardRotation_C(dst, MultiplyBy2(i) + 4, MultiplyBy2(i) + 5,
277 /*flip=*/i != 0, range);
278 }
279 }
280 // stage 14.
281 if (size_log2 >= 4) {
282 for (int i = 0; i < 2; ++i) {
283 ButterflyRotation_C(dst, 14 - i, i + 9, 48 + 64 * i, true, range);
284 }
285 }
286 // stage 15.
287 if (size_log2 >= 5) {
288 for (int i = 0; i < 4; ++i) {
289 for (int j = 0; j < 2; ++j) {
290 HadamardRotation_C(dst, MultiplyBy4(i) + j + 16,
291 MultiplyBy4(i) - j + 19, static_cast<bool>(i & 1),
292 range);
293 }
294 }
295 }
296 // stage 16.
297 if (size_log2 == 6) {
298 for (int i = 0; i < 2; ++i) {
299 for (int j = 0; j < 4; ++j) {
300 ButterflyRotation_C(
301 dst, 61 - MultiplyBy8(i) - j, MultiplyBy8(i) + j + 34,
302 56 - MultiplyBy32(i) + MultiplyBy64(DivideBy2(j)), true, range);
303 }
304 }
305 }
306 // stage 17.
307 for (int i = 0; i < 2; ++i) {
308 HadamardRotation_C(dst, i, 3 - i, false, range);
309 }
310 // stage 18.
311 if (size_log2 >= 3) {
312 ButterflyRotation_C(dst, 6, 5, 32, true, range);
313 }
314 // stage 19.
315 if (size_log2 >= 4) {
316 for (int i = 0; i < 2; ++i) {
317 for (int j = 0; j < 2; ++j) {
318 HadamardRotation_C(dst, MultiplyBy4(i) + j + 8, MultiplyBy4(i) - j + 11,
319 /*flip=*/i != 0, range);
320 }
321 }
322 }
323 // stage 20.
324 if (size_log2 >= 5) {
325 for (int i = 0; i < 4; ++i) {
326 ButterflyRotation_C(dst, 29 - i, i + 18, 48 + 64 * DivideBy2(i), true,
327 range);
328 }
329 }
330 // stage 21.
331 if (size_log2 == 6) {
332 for (int i = 0; i < 4; ++i) {
333 for (int j = 0; j < 4; ++j) {
334 HadamardRotation_C(dst, MultiplyBy8(i) + j + 32,
335 MultiplyBy8(i) - j + 39, static_cast<bool>(i & 1),
336 range);
337 }
338 }
339 }
340 // stage 22.
341 if (size_log2 >= 3) {
342 for (int i = 0; i < 4; ++i) {
343 HadamardRotation_C(dst, i, 7 - i, false, range);
344 }
345 }
346 // stage 23.
347 if (size_log2 >= 4) {
348 for (int i = 0; i < 2; ++i) {
349 ButterflyRotation_C(dst, 13 - i, i + 10, 32, true, range);
350 }
351 }
352 // stage 24.
353 if (size_log2 >= 5) {
354 for (int i = 0; i < 2; ++i) {
355 for (int j = 0; j < 4; ++j) {
356 HadamardRotation_C(dst, MultiplyBy8(i) + j + 16,
357 MultiplyBy8(i) - j + 23, i == 1, range);
358 }
359 }
360 }
361 // stage 25.
362 if (size_log2 == 6) {
363 for (int i = 0; i < 8; ++i) {
364 ButterflyRotation_C(dst, 59 - i, i + 36, (i < 4) ? 48 : 112, true, range);
365 }
366 }
367 // stage 26.
368 if (size_log2 >= 4) {
369 for (int i = 0; i < 8; ++i) {
370 HadamardRotation_C(dst, i, 15 - i, false, range);
371 }
372 }
373 // stage 27.
374 if (size_log2 >= 5) {
375 for (int i = 0; i < 4; ++i) {
376 ButterflyRotation_C(dst, 27 - i, i + 20, 32, true, range);
377 }
378 }
379 // stage 28.
380 if (size_log2 == 6) {
381 for (int i = 0; i < 8; ++i) {
382 HadamardRotation_C(dst, i + 32, 47 - i, false, range);
383 HadamardRotation_C(dst, i + 48, 63 - i, true, range);
384 }
385 }
386 // stage 29.
387 if (size_log2 >= 5) {
388 for (int i = 0; i < 16; ++i) {
389 HadamardRotation_C(dst, i, 31 - i, false, range);
390 }
391 }
392 // stage 30.
393 if (size_log2 == 6) {
394 for (int i = 0; i < 8; ++i) {
395 ButterflyRotation_C(dst, 55 - i, i + 40, 32, true, range);
396 }
397 }
398 // stage 31.
399 if (size_log2 == 6) {
400 for (int i = 0; i < 32; ++i) {
401 HadamardRotation_C(dst, i, 63 - i, false, range);
402 }
403 }
404 }
405
406 template <int bitdepth, typename Residual, int size_log2>
DctDcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)407 void DctDcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
408 bool is_row) {
409 auto* const dst = static_cast<Residual*>(dest);
410
411 if (is_row && should_round) {
412 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
413 }
414
415 ButterflyRotationSecondIsZero_C(dst, 0, 1, 32, true, range);
416
417 if (is_row && row_shift > 0) {
418 dst[0] = RightShiftWithRounding(dst[0], row_shift);
419 }
420
421 ClampIntermediate<bitdepth, Residual>(dst, 1);
422
423 const int size = 1 << size_log2;
424 for (int i = 1; i < size; ++i) {
425 dst[i] = dst[0];
426 }
427 }
428
429 //------------------------------------------------------------------------------
430 // Asymmetric Discrete Sine Transforms (ADST).
431
432 /*
433 * Row transform max range in bits for bitdepths 8/10/12: 28/30/32.
434 * Column transform max range in bits for bitdepths 8/10/12: 28/28/30.
435 */
436 template <typename Residual>
Adst4_C(void * dest,int8_t range)437 void Adst4_C(void* dest, int8_t range) {
438 auto* const dst = static_cast<Residual*>(dest);
439 if ((dst[0] | dst[1] | dst[2] | dst[3]) == 0) {
440 return;
441 }
442
443 // stage 1.
444 // Section 7.13.2.6: It is a requirement of bitstream conformance that all
445 // values stored in the s and x arrays by this process are representable by
446 // a signed integer using range + 12 bits of precision.
447 // Note the intermediate value can only exceed INT32_MAX with invalid 12-bit
448 // content. For simplicity in unoptimized code, int64_t is used for both 10 &
449 // 12-bit. SIMD implementations can allow these to rollover on platforms
450 // where this has defined behavior.
451 using Intermediate =
452 typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
453 Intermediate s[7];
454 s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
455 s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
456 s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[1], range + 12);
457 s[3] = RangeCheckValue(kAdst4Multiplier[3] * dst[2], range + 12);
458 s[4] = RangeCheckValue(kAdst4Multiplier[0] * dst[2], range + 12);
459 s[5] = RangeCheckValue(kAdst4Multiplier[1] * dst[3], range + 12);
460 s[6] = RangeCheckValue(kAdst4Multiplier[3] * dst[3], range + 12);
461 // stage 2.
462 // Section 7.13.2.6: It is a requirement of bitstream conformance that
463 // values stored in the variable a7 by this process are representable by a
464 // signed integer using range + 1 bits of precision.
465 const int32_t a7 = RangeCheckValue(dst[0] - dst[2], range + 1);
466 // Section 7.13.2.6: It is a requirement of bitstream conformance that
467 // values stored in the variable b7 by this process are representable by a
468 // signed integer using |range| bits of precision.
469 const int32_t b7 = RangeCheckValue(a7 + dst[3], range);
470 // stage 3.
471 s[0] = RangeCheckValue(s[0] + s[3], range + 12);
472 s[1] = RangeCheckValue(s[1] - s[4], range + 12);
473 s[3] = s[2];
474 // With range checking enabled b7 would be trapped above. This prevents an
475 // integer sanitizer warning. In SIMD implementations the multiply can be
476 // allowed to rollover on platforms where this has defined behavior.
477 const auto adst2_b7 = static_cast<Intermediate>(kAdst4Multiplier[2]) * b7;
478 s[2] = RangeCheckValue(adst2_b7, range + 12);
479 // stage 4.
480 s[0] = RangeCheckValue(s[0] + s[5], range + 12);
481 s[1] = RangeCheckValue(s[1] - s[6], range + 12);
482 // stages 5 and 6.
483 const Intermediate x0 = RangeCheckValue(s[0] + s[3], range + 12);
484 const Intermediate x1 = RangeCheckValue(s[1] + s[3], range + 12);
485 Intermediate x3 = RangeCheckValue(s[0] + s[1], range + 12);
486 x3 = RangeCheckValue(x3 - s[3], range + 12);
487 auto dst_0 = static_cast<int32_t>(RightShiftWithRounding(x0, 12));
488 auto dst_1 = static_cast<int32_t>(RightShiftWithRounding(x1, 12));
489 auto dst_2 = static_cast<int32_t>(RightShiftWithRounding(s[2], 12));
490 auto dst_3 = static_cast<int32_t>(RightShiftWithRounding(x3, 12));
491 if (sizeof(Residual) == 2) {
492 // If the first argument to RightShiftWithRounding(..., 12) is only
493 // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
494 // in RightShiftWithRounding(..., 12) will cause the function to return
495 // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
496 dst_0 -= (dst_0 == 0x8000);
497 dst_1 -= (dst_1 == 0x8000);
498 dst_3 -= (dst_3 == 0x8000);
499 }
500 dst[0] = dst_0;
501 dst[1] = dst_1;
502 dst[2] = dst_2;
503 dst[3] = dst_3;
504 }
505
506 template <int bitdepth, typename Residual>
Adst4DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)507 void Adst4DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
508 bool is_row) {
509 auto* const dst = static_cast<Residual*>(dest);
510
511 if (is_row && should_round) {
512 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
513 }
514
515 // stage 1.
516 // Section 7.13.2.6: It is a requirement of bitstream conformance that all
517 // values stored in the s and x arrays by this process are representable by
518 // a signed integer using range + 12 bits of precision.
519 int32_t s[3];
520 s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
521 s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
522 s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[0], range + 12);
523 // stage 3.
524 // stage 4.
525 // stages 5 and 6.
526 int32_t dst_0 = RightShiftWithRounding(s[0], 12);
527 int32_t dst_1 = RightShiftWithRounding(s[1], 12);
528 int32_t dst_2 = RightShiftWithRounding(s[2], 12);
529 int32_t dst_3 =
530 RightShiftWithRounding(RangeCheckValue(s[0] + s[1], range + 12), 12);
531 if (sizeof(Residual) == 2) {
532 // If the first argument to RightShiftWithRounding(..., 12) is only
533 // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
534 // in RightShiftWithRounding(..., 12) will cause the function to return
535 // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
536 dst_0 -= (dst_0 == 0x8000);
537 dst_1 -= (dst_1 == 0x8000);
538 dst_3 -= (dst_3 == 0x8000);
539 }
540 dst[0] = dst_0;
541 dst[1] = dst_1;
542 dst[2] = dst_2;
543 dst[3] = dst_3;
544
545 const int size = 4;
546 if (is_row && row_shift > 0) {
547 for (int j = 0; j < size; ++j) {
548 dst[j] = RightShiftWithRounding(dst[j], row_shift);
549 }
550 }
551
552 ClampIntermediate<bitdepth, Residual>(dst, 4);
553 }
554
555 template <typename Residual>
AdstInputPermutation(int32_t * LIBGAV1_RESTRICT const dst,const Residual * LIBGAV1_RESTRICT const src,int n)556 void AdstInputPermutation(int32_t* LIBGAV1_RESTRICT const dst,
557 const Residual* LIBGAV1_RESTRICT const src, int n) {
558 assert(n == 8 || n == 16);
559 for (int i = 0; i < n; ++i) {
560 dst[i] = src[((i & 1) == 0) ? n - i - 1 : i - 1];
561 }
562 }
563
564 constexpr int8_t kAdstOutputPermutationLookup[16] = {
565 0, 8, 12, 4, 6, 14, 10, 2, 3, 11, 15, 7, 5, 13, 9, 1};
566
567 template <typename Residual>
AdstOutputPermutation(Residual * LIBGAV1_RESTRICT const dst,const int32_t * LIBGAV1_RESTRICT const src,int n)568 void AdstOutputPermutation(Residual* LIBGAV1_RESTRICT const dst,
569 const int32_t* LIBGAV1_RESTRICT const src, int n) {
570 assert(n == 8 || n == 16);
571 const auto shift = static_cast<int8_t>(n == 8);
572 for (int i = 0; i < n; ++i) {
573 const int8_t index = kAdstOutputPermutationLookup[i] >> shift;
574 int32_t dst_i = ((i & 1) == 0) ? src[index] : -src[index];
575 if (sizeof(Residual) == 2) {
576 // If i is odd and src[index] is -32768, dst_i will be 32768, which
577 // cannot be represented as an int16_t.
578 dst_i -= (dst_i == 0x8000);
579 }
580 dst[i] = dst_i;
581 }
582 }
583
584 template <typename Residual>
Adst8_C(void * dest,int8_t range)585 void Adst8_C(void* dest, int8_t range) {
586 auto* const dst = static_cast<Residual*>(dest);
587 // stage 1.
588 int32_t temp[8];
589 AdstInputPermutation(temp, dst, 8);
590 // stage 2.
591 for (int i = 0; i < 4; ++i) {
592 ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 60 - 16 * i,
593 true, range);
594 }
595 // stage 3.
596 for (int i = 0; i < 4; ++i) {
597 HadamardRotation_C(temp, i, i + 4, false, range);
598 }
599 // stage 4.
600 for (int i = 0; i < 2; ++i) {
601 ButterflyRotation_C(temp, i * 3 + 4, i + 5, 48 - 32 * i, true, range);
602 }
603 // stage 5.
604 for (int i = 0; i < 2; ++i) {
605 for (int j = 0; j < 2; ++j) {
606 HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
607 false, range);
608 }
609 }
610 // stage 6.
611 for (int i = 0; i < 2; ++i) {
612 ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
613 range);
614 }
615 // stage 7.
616 AdstOutputPermutation(dst, temp, 8);
617 }
618
619 template <int bitdepth, typename Residual>
Adst8DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)620 void Adst8DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
621 bool is_row) {
622 auto* const dst = static_cast<Residual*>(dest);
623
624 // stage 1.
625 int32_t temp[8];
626 // After the permutation, the dc value is in temp[1]. The remaining are zero.
627 AdstInputPermutation(temp, dst, 8);
628
629 if (is_row && should_round) {
630 temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
631 }
632
633 // stage 2.
634 ButterflyRotationFirstIsZero_C(temp, 0, 1, 60, true, range);
635
636 // stage 3.
637 temp[4] = temp[0];
638 temp[5] = temp[1];
639
640 // stage 4.
641 ButterflyRotation_C(temp, 4, 5, 48, true, range);
642
643 // stage 5.
644 temp[2] = temp[0];
645 temp[3] = temp[1];
646 temp[6] = temp[4];
647 temp[7] = temp[5];
648
649 // stage 6.
650 ButterflyRotation_C(temp, 2, 3, 32, true, range);
651 ButterflyRotation_C(temp, 6, 7, 32, true, range);
652
653 // stage 7.
654 AdstOutputPermutation(dst, temp, 8);
655
656 const int size = 8;
657 if (is_row && row_shift > 0) {
658 for (int j = 0; j < size; ++j) {
659 dst[j] = RightShiftWithRounding(dst[j], row_shift);
660 }
661 }
662
663 ClampIntermediate<bitdepth, Residual>(dst, 8);
664 }
665
666 template <typename Residual>
Adst16_C(void * dest,int8_t range)667 void Adst16_C(void* dest, int8_t range) {
668 auto* const dst = static_cast<Residual*>(dest);
669 // stage 1.
670 int32_t temp[16];
671 AdstInputPermutation(temp, dst, 16);
672 // stage 2.
673 for (int i = 0; i < 8; ++i) {
674 ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 62 - 8 * i,
675 true, range);
676 }
677 // stage 3.
678 for (int i = 0; i < 8; ++i) {
679 HadamardRotation_C(temp, i, i + 8, false, range);
680 }
681 // stage 4.
682 for (int i = 0; i < 2; ++i) {
683 ButterflyRotation_C(temp, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
684 56 - 32 * i, true, range);
685 ButterflyRotation_C(temp, MultiplyBy2(i) + 13, MultiplyBy2(i) + 12,
686 8 + 32 * i, true, range);
687 }
688 // stage 5.
689 for (int i = 0; i < 4; ++i) {
690 for (int j = 0; j < 2; ++j) {
691 HadamardRotation_C(temp, i + MultiplyBy8(j), i + MultiplyBy8(j) + 4,
692 false, range);
693 }
694 }
695 // stage 6.
696 for (int i = 0; i < 2; ++i) {
697 for (int j = 0; j < 2; ++j) {
698 ButterflyRotation_C(temp, i * 3 + MultiplyBy8(j) + 4,
699 i + MultiplyBy8(j) + 5, 48 - 32 * i, true, range);
700 }
701 }
702 // stage 7.
703 for (int i = 0; i < 2; ++i) {
704 for (int j = 0; j < 4; ++j) {
705 HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
706 false, range);
707 }
708 }
709 // stage 8.
710 for (int i = 0; i < 4; ++i) {
711 ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
712 range);
713 }
714 // stage 9.
715 AdstOutputPermutation(dst, temp, 16);
716 }
717
718 template <int bitdepth, typename Residual>
Adst16DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)719 void Adst16DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
720 bool is_row) {
721 auto* const dst = static_cast<Residual*>(dest);
722
723 // stage 1.
724 int32_t temp[16];
725 // After the permutation, the dc value is in temp[1]. The remaining are zero.
726 AdstInputPermutation(temp, dst, 16);
727
728 if (is_row && should_round) {
729 temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
730 }
731
732 // stage 2.
733 ButterflyRotationFirstIsZero_C(temp, 0, 1, 62, true, range);
734
735 // stage 3.
736 temp[8] = temp[0];
737 temp[9] = temp[1];
738
739 // stage 4.
740 ButterflyRotation_C(temp, 8, 9, 56, true, range);
741
742 // stage 5.
743 temp[4] = temp[0];
744 temp[5] = temp[1];
745 temp[12] = temp[8];
746 temp[13] = temp[9];
747
748 // stage 6.
749 ButterflyRotation_C(temp, 4, 5, 48, true, range);
750 ButterflyRotation_C(temp, 12, 13, 48, true, range);
751
752 // stage 7.
753 temp[2] = temp[0];
754 temp[3] = temp[1];
755 temp[10] = temp[8];
756 temp[11] = temp[9];
757
758 temp[6] = temp[4];
759 temp[7] = temp[5];
760 temp[14] = temp[12];
761 temp[15] = temp[13];
762
763 // stage 8.
764 for (int i = 0; i < 4; ++i) {
765 ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
766 range);
767 }
768
769 // stage 9.
770 AdstOutputPermutation(dst, temp, 16);
771
772 const int size = 16;
773 if (is_row && row_shift > 0) {
774 for (int j = 0; j < size; ++j) {
775 dst[j] = RightShiftWithRounding(dst[j], row_shift);
776 }
777 }
778
779 ClampIntermediate<bitdepth, Residual>(dst, 16);
780 }
781
782 //------------------------------------------------------------------------------
783 // Identity Transforms.
784 //
785 // In the spec, the inverse identity transform is followed by a Round2() call:
786 // The row transforms with i = 0..(h-1) are applied as follows:
787 // ...
788 // * Otherwise, invoke the inverse identity transform process specified in
789 // section 7.13.2.15 with the input variable n equal to log2W.
790 // * Set Residual[ i ][ j ] equal to Round2( T[ j ], rowShift )
791 // for j = 0..(w-1).
792 // ...
793 // The column transforms with j = 0..(w-1) are applied as follows:
794 // ...
795 // * Otherwise, invoke the inverse identity transform process specified in
796 // section 7.13.2.15 with the input variable n equal to log2H.
797 // * Residual[ i ][ j ] is set equal to Round2( T[ i ], colShift )
798 // for i = 0..(h-1).
799 //
800 // Therefore, we define the identity transform functions to perform both the
801 // inverse identity transform and the Round2() call. This has two advantages:
802 // 1. The outputs of the inverse identity transform do not need to be stored
803 // in the Residual array. They can be stored in int32_t local variables,
804 // which have a larger range if Residual is an int16_t array.
805 // 2. The inverse identity transform and the Round2() call can be jointly
806 // optimized.
807 //
808 // The identity transform functions have the following prototype:
809 // void Identity_C(void* dest, int8_t shift);
810 //
811 // The |shift| parameter is the amount of shift for the Round2() call. For row
812 // transforms, |shift| is 0, 1, or 2. For column transforms, |shift| is always
813 // 4. Therefore, an identity transform function can detect whether it is being
814 // invoked as a row transform or a column transform by checking whether |shift|
815 // is equal to 4.
816 //
817 // Input Range
818 //
819 // The inputs of row transforms, stored in the 2D array Dequant, are
820 // representable by a signed integer using 8 + BitDepth bits of precision:
821 // f. Dequant[ i ][ j ] is set equal to
822 // Clip3( - ( 1 << ( 7 + BitDepth ) ), ( 1 << ( 7 + BitDepth ) ) - 1, dq2 ).
823 //
824 // The inputs of column transforms are representable by a signed integer using
825 // Max( BitDepth + 6, 16 ) bits of precision:
826 // Set the variable colClampRange equal to Max( BitDepth + 6, 16 ).
827 // ...
828 // Between the row and column transforms, Residual[ i ][ j ] is set equal to
829 // Clip3( - ( 1 << ( colClampRange - 1 ) ),
830 // ( 1 << (colClampRange - 1 ) ) - 1,
831 // Residual[ i ][ j ] )
832 // for i = 0..(h-1), for j = 0..(w-1).
833 //
834 // Output Range
835 //
836 // The outputs of row transforms are representable by a signed integer using
837 // 8 + BitDepth + 1 = 9 + BitDepth bits of precision, because the net effect
838 // of the multiplicative factor of inverse identity transforms minus the
839 // smallest row shift is an increase of at most one bit.
840 //
841 // Transform | Multiplicative factor | Smallest row | Net increase
842 // width | (in bits) | shift | in bits
843 // ---------------------------------------------------------------
844 // 4 | sqrt(2) (0.5 bits) | 0 | +0.5
845 // 8 | 2 (1 bit) | 0 | +1
846 // 16 | 2*sqrt(2) (1.5 bits) | 1 | +0.5
847 // 32 | 4 (2 bits) | 1 | +1
848 //
849 // If BitDepth is 8 and Residual is an int16_t array, to avoid truncation we
850 // clip the outputs (which have 17 bits of precision) to the range of int16_t
851 // before storing them in the Residual array. This clipping happens to be the
852 // same as the required clipping after the row transform (see the spec quoted
853 // above), so we remain compliant with the spec. (In this case,
854 // TransformLoop_C() skips clipping the outputs of row transforms to avoid
855 // duplication of effort.)
856 //
857 // The outputs of column transforms are representable by a signed integer using
858 // Max( BitDepth + 6, 16 ) + 2 - 4 = Max( BitDepth + 4, 14 ) bits of precision,
859 // because the multiplicative factor of inverse identity transforms is at most
860 // 4 (2 bits) and |shift| is always 4.
861
862 template <typename Residual>
Identity4Row_C(void * dest,int8_t shift)863 void Identity4Row_C(void* dest, int8_t shift) {
864 // Note the intermediate value can only exceed 32 bits with 12-bit content.
865 // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
866 using Intermediate =
867 typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
868 assert(shift == 0 || shift == 1);
869 auto* const dst = static_cast<Residual*>(dest);
870 // If |shift| is 0, |rounding| should be 1 << 11. If |shift| is 1, |rounding|
871 // should be (1 + (1 << 1)) << 11. The following expression works for both
872 // values of |shift|.
873 const int32_t rounding = (1 + (shift << 1)) << 11;
874 for (int i = 0; i < 4; ++i) {
875 const auto intermediate =
876 static_cast<Intermediate>(dst[i]) * kIdentity4Multiplier;
877 int32_t dst_i =
878 static_cast<int32_t>((intermediate + rounding) >> (12 + shift));
879 if (sizeof(Residual) == 2) {
880 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
881 }
882 dst[i] = static_cast<Residual>(dst_i);
883 }
884 }
885
886 template <typename Residual>
Identity4Column_C(void * dest,int8_t)887 void Identity4Column_C(void* dest, int8_t /*shift*/) {
888 auto* const dst = static_cast<Residual*>(dest);
889 const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
890 for (int i = 0; i < 4; ++i) {
891 // The intermediate value here will have to fit into an int32_t for it to be
892 // bitstream conformant. The multiplication is promoted to int32_t by
893 // defining kIdentity4Multiplier as int32_t.
894 dst[i] = static_cast<Residual>((dst[i] * kIdentity4Multiplier + rounding) >>
895 (12 + kTransformColumnShift));
896 }
897 }
898
899 template <int bitdepth, typename Residual>
Identity4DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)900 void Identity4DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
901 int row_shift, bool is_row) {
902 // Note the intermediate value can only exceed 32 bits with 12-bit content.
903 // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
904 using Intermediate =
905 typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
906 auto* const dst = static_cast<Residual*>(dest);
907
908 if (is_row) {
909 if (should_round) {
910 const auto intermediate =
911 static_cast<Intermediate>(dst[0]) * kTransformRowMultiplier;
912 dst[0] = RightShiftWithRounding(intermediate, 12);
913 }
914
915 const int32_t rounding = (1 + (row_shift << 1)) << 11;
916 const auto intermediate =
917 static_cast<Intermediate>(dst[0]) * kIdentity4Multiplier;
918 int32_t dst_i =
919 static_cast<int32_t>((intermediate + rounding) >> (12 + row_shift));
920 if (sizeof(Residual) == 2) {
921 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
922 }
923 dst[0] = static_cast<Residual>(dst_i);
924
925 ClampIntermediate<bitdepth, Residual>(dst, 1);
926 return;
927 }
928
929 const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
930 dst[0] = static_cast<Residual>((dst[0] * kIdentity4Multiplier + rounding) >>
931 (12 + kTransformColumnShift));
932 }
933
934 template <typename Residual>
Identity8Row_C(void * dest,int8_t shift)935 void Identity8Row_C(void* dest, int8_t shift) {
936 assert(shift == 0 || shift == 1 || shift == 2);
937 auto* const dst = static_cast<Residual*>(dest);
938 for (int i = 0; i < 8; ++i) {
939 int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[i]), shift);
940 if (sizeof(Residual) == 2) {
941 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
942 }
943 dst[i] = static_cast<Residual>(dst_i);
944 }
945 }
946
947 template <typename Residual>
Identity8Column_C(void * dest,int8_t)948 void Identity8Column_C(void* dest, int8_t /*shift*/) {
949 auto* const dst = static_cast<Residual*>(dest);
950 for (int i = 0; i < 8; ++i) {
951 dst[i] = static_cast<Residual>(
952 RightShiftWithRounding(dst[i], kTransformColumnShift - 1));
953 }
954 }
955
956 template <int bitdepth, typename Residual>
Identity8DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)957 void Identity8DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
958 int row_shift, bool is_row) {
959 // Note the intermediate value can only exceed 32 bits with 12-bit content.
960 // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
961 using Intermediate =
962 typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
963 auto* const dst = static_cast<Residual*>(dest);
964
965 if (is_row) {
966 if (should_round) {
967 const auto intermediate =
968 static_cast<Intermediate>(dst[0]) * kTransformRowMultiplier;
969 dst[0] = RightShiftWithRounding(intermediate, 12);
970 }
971
972 int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[0]), row_shift);
973 if (sizeof(Residual) == 2) {
974 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
975 }
976 dst[0] = static_cast<Residual>(dst_i);
977
978 // If Residual is int16_t (which implies bitdepth is 8), we don't need to
979 // clip residual[i][j] to 16 bits.
980 if (sizeof(Residual) > 2) {
981 const Residual intermediate_clamp_max =
982 (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
983 const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
984 dst[0] = Clip3(dst[0], intermediate_clamp_min, intermediate_clamp_max);
985 }
986 return;
987 }
988
989 dst[0] = static_cast<Residual>(
990 RightShiftWithRounding(dst[0], kTransformColumnShift - 1));
991 }
992
993 template <typename Residual>
Identity16Row_C(void * dest,int8_t shift)994 void Identity16Row_C(void* dest, int8_t shift) {
995 assert(shift == 1 || shift == 2);
996 // Note the intermediate value can only exceed 32 bits with 12-bit content.
997 // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
998 using Intermediate =
999 typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
1000 auto* const dst = static_cast<Residual*>(dest);
1001 const int32_t rounding = (1 + (1 << shift)) << 11;
1002 for (int i = 0; i < 16; ++i) {
1003 // Note the intermediate value can only exceed 32 bits with 12-bit content.
1004 // For simplicity in unoptimized code, int64_t is used for all cases.
1005 const auto intermediate =
1006 static_cast<Intermediate>(dst[i]) * kIdentity16Multiplier;
1007 int32_t dst_i =
1008 static_cast<int32_t>((intermediate + rounding) >> (12 + shift));
1009 if (sizeof(Residual) == 2) {
1010 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1011 }
1012 dst[i] = static_cast<Residual>(dst_i);
1013 }
1014 }
1015
1016 template <typename Residual>
Identity16Column_C(void * dest,int8_t)1017 void Identity16Column_C(void* dest, int8_t /*shift*/) {
1018 auto* const dst = static_cast<Residual*>(dest);
1019 const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
1020 for (int i = 0; i < 16; ++i) {
1021 // The intermediate value here will have to fit into an int32_t for it to be
1022 // bitstream conformant. The multiplication is promoted to int32_t by
1023 // defining kIdentity16Multiplier as int32_t.
1024 dst[i] =
1025 static_cast<Residual>((dst[i] * kIdentity16Multiplier + rounding) >>
1026 (12 + kTransformColumnShift));
1027 }
1028 }
1029
1030 template <int bitdepth, typename Residual>
Identity16DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)1031 void Identity16DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
1032 int row_shift, bool is_row) {
1033 // Note the intermediate value can only exceed 32 bits with 12-bit content.
1034 // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
1035 using Intermediate =
1036 typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
1037 auto* const dst = static_cast<Residual*>(dest);
1038
1039 if (is_row) {
1040 if (should_round) {
1041 const auto intermediate =
1042 static_cast<Intermediate>(dst[0]) * kTransformRowMultiplier;
1043 dst[0] = RightShiftWithRounding(intermediate, 12);
1044 }
1045
1046 const int32_t rounding = (1 + (1 << row_shift)) << 11;
1047 const auto intermediate =
1048 static_cast<Intermediate>(dst[0]) * kIdentity16Multiplier;
1049 int32_t dst_i =
1050 static_cast<int32_t>((intermediate + rounding) >> (12 + row_shift));
1051 if (sizeof(Residual) == 2) {
1052 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1053 }
1054 dst[0] = static_cast<Residual>(dst_i);
1055
1056 ClampIntermediate<bitdepth, Residual>(dst, 1);
1057 return;
1058 }
1059
1060 const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
1061 dst[0] = static_cast<Residual>((dst[0] * kIdentity16Multiplier + rounding) >>
1062 (12 + kTransformColumnShift));
1063 }
1064
1065 template <typename Residual>
Identity32Row_C(void * dest,int8_t shift)1066 void Identity32Row_C(void* dest, int8_t shift) {
1067 assert(shift == 1 || shift == 2);
1068 auto* const dst = static_cast<Residual*>(dest);
1069 for (int i = 0; i < 32; ++i) {
1070 int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[i]), shift);
1071 if (sizeof(Residual) == 2) {
1072 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1073 }
1074 dst[i] = static_cast<Residual>(dst_i);
1075 }
1076 }
1077
1078 template <typename Residual>
Identity32Column_C(void * dest,int8_t)1079 void Identity32Column_C(void* dest, int8_t /*shift*/) {
1080 auto* const dst = static_cast<Residual*>(dest);
1081 for (int i = 0; i < 32; ++i) {
1082 dst[i] = static_cast<Residual>(
1083 RightShiftWithRounding(dst[i], kTransformColumnShift - 2));
1084 }
1085 }
1086
1087 template <int bitdepth, typename Residual>
Identity32DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)1088 void Identity32DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
1089 int row_shift, bool is_row) {
1090 // Note the intermediate value can only exceed 32 bits with 12-bit content.
1091 // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
1092 using Intermediate =
1093 typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
1094 auto* const dst = static_cast<Residual*>(dest);
1095
1096 if (is_row) {
1097 if (should_round) {
1098 const auto intermediate =
1099 static_cast<Intermediate>(dst[0]) * kTransformRowMultiplier;
1100 dst[0] = RightShiftWithRounding(intermediate, 12);
1101 }
1102
1103 int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[0]), row_shift);
1104 if (sizeof(Residual) == 2) {
1105 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1106 }
1107 dst[0] = static_cast<Residual>(dst_i);
1108
1109 ClampIntermediate<bitdepth, Residual>(dst, 1);
1110 return;
1111 }
1112
1113 dst[0] = static_cast<Residual>(
1114 RightShiftWithRounding(dst[0], kTransformColumnShift - 2));
1115 }
1116
1117 //------------------------------------------------------------------------------
1118 // Walsh Hadamard Transform.
1119
1120 template <typename Residual>
Wht4_C(void * dest,int8_t shift)1121 void Wht4_C(void* dest, int8_t shift) {
1122 auto* const dst = static_cast<Residual*>(dest);
1123 Residual temp[4];
1124 temp[0] = dst[0] >> shift;
1125 temp[2] = dst[1] >> shift;
1126 temp[3] = dst[2] >> shift;
1127 temp[1] = dst[3] >> shift;
1128 temp[0] += temp[2];
1129 temp[3] -= temp[1];
1130 // This signed right shift must be an arithmetic shift.
1131 Residual e = (temp[0] - temp[3]) >> 1;
1132 dst[1] = e - temp[1];
1133 dst[2] = e - temp[2];
1134 dst[0] = temp[0] - dst[1];
1135 dst[3] = temp[3] + dst[2];
1136 }
1137
1138 template <int bitdepth, typename Residual>
Wht4DcOnly_C(void * dest,int8_t range,bool,int,bool)1139 void Wht4DcOnly_C(void* dest, int8_t range, bool /*should_round*/,
1140 int /*row_shift*/, bool /*is_row*/) {
1141 auto* const dst = static_cast<Residual*>(dest);
1142 const int shift = range;
1143
1144 Residual temp = dst[0] >> shift;
1145 // This signed right shift must be an arithmetic shift.
1146 Residual e = temp >> 1;
1147 dst[0] = temp - e;
1148 dst[1] = e;
1149 dst[2] = e;
1150 dst[3] = e;
1151
1152 ClampIntermediate<bitdepth, Residual>(dst, 4);
1153 }
1154
1155 //------------------------------------------------------------------------------
1156 // row/column transform loop
1157
1158 using InverseTransform1dFunc = void (*)(void* dst, int8_t range);
1159 using InverseTransformDcOnlyFunc = void (*)(void* dest, int8_t range,
1160 bool should_round, int row_shift,
1161 bool is_row);
1162
1163 template <int bitdepth, typename Residual, typename Pixel,
1164 Transform1d transform1d_type,
1165 InverseTransformDcOnlyFunc dconly_transform1d,
1166 InverseTransform1dFunc transform1d_func, bool is_row>
TransformLoop_C(TransformType tx_type,TransformSize tx_size,int adjusted_tx_height,void * LIBGAV1_RESTRICT src_buffer,int start_x,int start_y,void * LIBGAV1_RESTRICT dst_frame)1167 void TransformLoop_C(TransformType tx_type, TransformSize tx_size,
1168 int adjusted_tx_height, void* LIBGAV1_RESTRICT src_buffer,
1169 int start_x, int start_y,
1170 void* LIBGAV1_RESTRICT dst_frame) {
1171 constexpr bool lossless = transform1d_type == kTransform1dWht;
1172 constexpr bool is_identity = transform1d_type == kTransform1dIdentity;
1173 // The transform size of the WHT is always 4x4. Setting tx_width and
1174 // tx_height to the constant 4 for the WHT speeds the code up.
1175 assert(!lossless || tx_size == kTransformSize4x4);
1176 const int tx_width = lossless ? 4 : kTransformWidth[tx_size];
1177 const int tx_height = lossless ? 4 : kTransformHeight[tx_size];
1178 const int tx_width_log2 = kTransformWidthLog2[tx_size];
1179 const int tx_height_log2 = kTransformHeightLog2[tx_size];
1180 auto* frame = static_cast<Array2DView<Pixel>*>(dst_frame);
1181
1182 // Initially this points to the dequantized values. After the transforms are
1183 // applied, this buffer contains the residual.
1184 Array2DView<Residual> residual(tx_height, tx_width,
1185 static_cast<Residual*>(src_buffer));
1186
1187 if (is_row) {
1188 // Row transform.
1189 const uint8_t row_shift = lossless ? 0 : kTransformRowShift[tx_size];
1190 // This is the |range| parameter of the InverseTransform1dFunc. For lossy
1191 // transforms, this will be equal to the clamping range.
1192 const int8_t row_clamp_range = lossless ? 2 : (bitdepth + 8);
1193 // If the width:height ratio of the transform size is 2:1 or 1:2, multiply
1194 // the input to the row transform by 1 / sqrt(2), which is approximated by
1195 // the fraction 2896 / 2^12.
1196 const bool should_round = std::abs(tx_width_log2 - tx_height_log2) == 1;
1197
1198 if (adjusted_tx_height == 1) {
1199 dconly_transform1d(residual[0], row_clamp_range, should_round, row_shift,
1200 true);
1201 return;
1202 }
1203
1204 // Row transforms need to be done only up to 32 because the rest of the rows
1205 // are always all zero if |tx_height| is 64. Otherwise, only process the
1206 // rows that have a non zero coefficients.
1207 for (int i = 0; i < adjusted_tx_height; ++i) {
1208 // If lossless, the transform size is 4x4, so should_round is false.
1209 if (!lossless && should_round) {
1210 // The last 32 values of every row are always zero if the |tx_width| is
1211 // 64.
1212 for (int j = 0; j < std::min(tx_width, 32); ++j) {
1213 residual[i][j] = RightShiftWithRounding(
1214 residual[i][j] * kTransformRowMultiplier, 12);
1215 }
1216 }
1217 // For identity transform, |transform1d_func| also performs the
1218 // Round2(T[j], rowShift) call in the spec.
1219 transform1d_func(residual[i], is_identity ? row_shift : row_clamp_range);
1220 if (!lossless && !is_identity && row_shift > 0) {
1221 for (int j = 0; j < tx_width; ++j) {
1222 residual[i][j] = RightShiftWithRounding(residual[i][j], row_shift);
1223 }
1224 }
1225
1226 ClampIntermediate<bitdepth, Residual>(residual[i], tx_width);
1227 }
1228 return;
1229 }
1230
1231 assert(!is_row);
1232 constexpr uint8_t column_shift = lossless ? 0 : kTransformColumnShift;
1233 // This is the |range| parameter of the InverseTransform1dFunc. For lossy
1234 // transforms, this will be equal to the clamping range.
1235 const int8_t column_clamp_range = lossless ? 0 : std::max(bitdepth + 6, 16);
1236 const bool flip_rows = transform1d_type == kTransform1dAdst &&
1237 kTransformFlipRowsMask.Contains(tx_type);
1238 const bool flip_columns =
1239 !lossless && kTransformFlipColumnsMask.Contains(tx_type);
1240 const int min_value = 0;
1241 const int max_value = (1 << bitdepth) - 1;
1242 // Note: 64 is the maximum size of a 1D transform buffer (the largest
1243 // transform size is kTransformSize64x64).
1244 Residual tx_buffer[64];
1245 for (int j = 0; j < tx_width; ++j) {
1246 const int flipped_j = flip_columns ? tx_width - j - 1 : j;
1247 int i = 0;
1248 do {
1249 tx_buffer[i] = residual[i][flipped_j];
1250 } while (++i != tx_height);
1251 if (adjusted_tx_height == 1) {
1252 dconly_transform1d(tx_buffer, column_clamp_range, false, 0, false);
1253 } else {
1254 // For identity transform, |transform1d_func| also performs the
1255 // Round2(T[i], colShift) call in the spec.
1256 transform1d_func(tx_buffer,
1257 is_identity ? column_shift : column_clamp_range);
1258 }
1259 const int x = start_x + j;
1260 for (int i = 0; i < tx_height; ++i) {
1261 const int y = start_y + i;
1262 const int index = flip_rows ? tx_height - i - 1 : i;
1263 Residual residual_value = tx_buffer[index];
1264 if (!lossless && !is_identity) {
1265 residual_value = RightShiftWithRounding(residual_value, column_shift);
1266 }
1267 (*frame)[y][x] =
1268 Clip3((*frame)[y][x] + residual_value, min_value, max_value);
1269 }
1270 }
1271 }
1272
1273 //------------------------------------------------------------------------------
1274
1275 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1276 template <int bitdepth, typename Residual, typename Pixel>
InitAll(Dsp * const dsp)1277 void InitAll(Dsp* const dsp) {
1278 // Maximum transform size for Dct is 64.
1279 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1280 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1281 DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1282 /*is_row=*/true>;
1283 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1284 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1285 DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1286 /*is_row=*/false>;
1287 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1288 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1289 DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1290 /*is_row=*/true>;
1291 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1292 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1293 DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1294 /*is_row=*/false>;
1295 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1296 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1297 DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1298 /*is_row=*/true>;
1299 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1300 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1301 DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1302 /*is_row=*/false>;
1303 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1304 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1305 DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1306 /*is_row=*/true>;
1307 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1308 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1309 DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1310 /*is_row=*/false>;
1311 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1312 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1313 DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1314 /*is_row=*/true>;
1315 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1316 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1317 DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1318 /*is_row=*/false>;
1319
1320 // Maximum transform size for Adst is 16.
1321 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1322 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1323 Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1324 /*is_row=*/true>;
1325 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1326 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1327 Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1328 /*is_row=*/false>;
1329 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1330 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1331 Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1332 /*is_row=*/true>;
1333 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1334 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1335 Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1336 /*is_row=*/false>;
1337 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1338 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1339 Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1340 /*is_row=*/true>;
1341 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1342 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1343 Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1344 /*is_row=*/false>;
1345
1346 // Maximum transform size for Identity transform is 32.
1347 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1348 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1349 Identity4DcOnly_C<bitdepth, Residual>,
1350 Identity4Row_C<Residual>, /*is_row=*/true>;
1351 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1352 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1353 Identity4DcOnly_C<bitdepth, Residual>,
1354 Identity4Column_C<Residual>, /*is_row=*/false>;
1355 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1356 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1357 Identity8DcOnly_C<bitdepth, Residual>,
1358 Identity8Row_C<Residual>, /*is_row=*/true>;
1359 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1360 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1361 Identity8DcOnly_C<bitdepth, Residual>,
1362 Identity8Column_C<Residual>, /*is_row=*/false>;
1363 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1364 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1365 Identity16DcOnly_C<bitdepth, Residual>,
1366 Identity16Row_C<Residual>, /*is_row=*/true>;
1367 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1368 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1369 Identity16DcOnly_C<bitdepth, Residual>,
1370 Identity16Column_C<Residual>, /*is_row=*/false>;
1371 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1372 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1373 Identity32DcOnly_C<bitdepth, Residual>,
1374 Identity32Row_C<Residual>, /*is_row=*/true>;
1375 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1376 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1377 Identity32DcOnly_C<bitdepth, Residual>,
1378 Identity32Column_C<Residual>, /*is_row=*/false>;
1379
1380 // Maximum transform size for Wht is 4.
1381 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1382 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dWht,
1383 Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1384 /*is_row=*/true>;
1385 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1386 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dWht,
1387 Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1388 /*is_row=*/false>;
1389 }
1390 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1391
Init8bpp()1392 void Init8bpp() {
1393 Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
1394 assert(dsp != nullptr);
1395 static_cast<void>(dsp);
1396 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1397 InitAll<8, int16_t, uint8_t>(dsp);
1398 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1399 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dDct
1400 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1401 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1402 DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1403 /*is_row=*/true>;
1404 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1405 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1406 DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1407 /*is_row=*/false>;
1408 #endif
1409 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dDct
1410 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1411 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1412 DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1413 /*is_row=*/true>;
1414 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1415 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1416 DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1417 /*is_row=*/false>;
1418 #endif
1419 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dDct
1420 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1421 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1422 DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1423 /*is_row=*/true>;
1424 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1425 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1426 DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1427 /*is_row=*/false>;
1428 #endif
1429 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dDct
1430 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1431 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1432 DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1433 /*is_row=*/true>;
1434 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1435 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1436 DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1437 /*is_row=*/false>;
1438 #endif
1439 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize64_Transform1dDct
1440 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1441 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1442 DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1443 /*is_row=*/true>;
1444 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1445 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1446 DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1447 /*is_row=*/false>;
1448 #endif
1449 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dAdst
1450 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1451 TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1452 Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1453 /*is_row=*/true>;
1454 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1455 TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1456 Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1457 /*is_row=*/false>;
1458 #endif
1459 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dAdst
1460 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1461 TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1462 Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1463 /*is_row=*/true>;
1464 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1465 TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1466 Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1467 /*is_row=*/false>;
1468 #endif
1469 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dAdst
1470 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1471 TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1472 Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1473 /*is_row=*/true>;
1474 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1475 TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1476 Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1477 /*is_row=*/false>;
1478 #endif
1479 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dIdentity
1480 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1481 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1482 Identity4DcOnly_C<8, int16_t>, Identity4Row_C<int16_t>,
1483 /*is_row=*/true>;
1484 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1485 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1486 Identity4DcOnly_C<8, int16_t>, Identity4Column_C<int16_t>,
1487 /*is_row=*/false>;
1488 #endif
1489 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dIdentity
1490 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1491 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1492 Identity8DcOnly_C<8, int16_t>, Identity8Row_C<int16_t>,
1493 /*is_row=*/true>;
1494 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1495 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1496 Identity8DcOnly_C<8, int16_t>, Identity8Column_C<int16_t>,
1497 /*is_row=*/false>;
1498 #endif
1499 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dIdentity
1500 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1501 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1502 Identity16DcOnly_C<8, int16_t>, Identity16Row_C<int16_t>,
1503 /*is_row=*/true>;
1504 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1505 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1506 Identity16DcOnly_C<8, int16_t>,
1507 Identity16Column_C<int16_t>, /*is_row=*/false>;
1508 #endif
1509 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dIdentity
1510 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1511 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1512 Identity32DcOnly_C<8, int16_t>, Identity32Row_C<int16_t>,
1513 /*is_row=*/true>;
1514 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1515 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1516 Identity32DcOnly_C<8, int16_t>,
1517 Identity32Column_C<int16_t>, /*is_row=*/false>;
1518 #endif
1519 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dWht
1520 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1521 TransformLoop_C<8, int16_t, uint8_t, kTransform1dWht,
1522 Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1523 /*is_row=*/true>;
1524 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1525 TransformLoop_C<8, int16_t, uint8_t, kTransform1dWht,
1526 Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1527 /*is_row=*/false>;
1528 #endif
1529 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1530 }
1531
1532 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()1533 void Init10bpp() {
1534 Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
1535 assert(dsp != nullptr);
1536 static_cast<void>(dsp);
1537 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1538 InitAll<10, int32_t, uint16_t>(dsp);
1539 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1540 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dDct
1541 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1542 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1543 DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1544 /*is_row=*/true>;
1545 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1546 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1547 DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1548 /*is_row=*/false>;
1549 #endif
1550 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dDct
1551 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1552 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1553 DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1554 /*is_row=*/true>;
1555 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1556 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1557 DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1558 /*is_row=*/false>;
1559 #endif
1560 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dDct
1561 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1562 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1563 DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1564 /*is_row=*/true>;
1565 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1566 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1567 DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1568 /*is_row=*/false>;
1569 #endif
1570 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dDct
1571 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1572 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1573 DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1574 /*is_row=*/true>;
1575 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1576 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1577 DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1578 /*is_row=*/false>;
1579 #endif
1580 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize64_Transform1dDct
1581 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1582 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1583 DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1584 /*is_row=*/true>;
1585 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1586 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1587 DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1588 /*is_row=*/false>;
1589 #endif
1590 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dAdst
1591 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1592 TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1593 Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1594 /*is_row=*/true>;
1595 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1596 TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1597 Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1598 /*is_row=*/false>;
1599 #endif
1600 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dAdst
1601 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1602 TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1603 Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1604 /*is_row=*/true>;
1605 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1606 TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1607 Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1608 /*is_row=*/false>;
1609 #endif
1610 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dAdst
1611 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1612 TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1613 Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1614 /*is_row=*/true>;
1615 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1616 TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1617 Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1618 /*is_row=*/false>;
1619 #endif
1620 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dIdentity
1621 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1622 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1623 Identity4DcOnly_C<10, int32_t>, Identity4Row_C<int32_t>,
1624 /*is_row=*/true>;
1625 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1626 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1627 Identity4DcOnly_C<10, int32_t>,
1628 Identity4Column_C<int32_t>, /*is_row=*/false>;
1629 #endif
1630 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dIdentity
1631 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1632 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1633 Identity8DcOnly_C<10, int32_t>, Identity8Row_C<int32_t>,
1634 /*is_row=*/true>;
1635 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1636 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1637 Identity8DcOnly_C<10, int32_t>,
1638 Identity8Column_C<int32_t>, /*is_row=*/false>;
1639 #endif
1640 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dIdentity
1641 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1642 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1643 Identity16DcOnly_C<10, int32_t>, Identity16Row_C<int32_t>,
1644 /*is_row=*/true>;
1645 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1646 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1647 Identity16DcOnly_C<10, int32_t>,
1648 Identity16Column_C<int32_t>, /*is_row=*/false>;
1649 #endif
1650 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dIdentity
1651 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1652 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1653 Identity32DcOnly_C<10, int32_t>, Identity32Row_C<int32_t>,
1654 /*is_row=*/true>;
1655 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1656 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1657 Identity32DcOnly_C<10, int32_t>,
1658 Identity32Column_C<int32_t>, /*is_row=*/false>;
1659 #endif
1660 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dWht
1661 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1662 TransformLoop_C<10, int32_t, uint16_t, kTransform1dWht,
1663 Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1664 /*is_row=*/true>;
1665 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1666 TransformLoop_C<10, int32_t, uint16_t, kTransform1dWht,
1667 Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1668 /*is_row=*/false>;
1669 #endif
1670 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1671 }
1672 #endif // LIBGAV1_MAX_BITDEPTH >= 10
1673
1674 #if LIBGAV1_MAX_BITDEPTH == 12
Init12bpp()1675 void Init12bpp() {
1676 Dsp* const dsp = dsp_internal::GetWritableDspTable(12);
1677 assert(dsp != nullptr);
1678 static_cast<void>(dsp);
1679 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1680 InitAll<12, int32_t, uint16_t>(dsp);
1681 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1682 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize4_Transform1dDct
1683 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1684 TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1685 DctDcOnly_C<12, int32_t, 2>, Dct_C<int32_t, 2>,
1686 /*is_row=*/true>;
1687 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1688 TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1689 DctDcOnly_C<12, int32_t, 2>, Dct_C<int32_t, 2>,
1690 /*is_row=*/false>;
1691 #endif
1692 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize8_Transform1dDct
1693 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1694 TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1695 DctDcOnly_C<12, int32_t, 3>, Dct_C<int32_t, 3>,
1696 /*is_row=*/true>;
1697 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1698 TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1699 DctDcOnly_C<12, int32_t, 3>, Dct_C<int32_t, 3>,
1700 /*is_row=*/false>;
1701 #endif
1702 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize16_Transform1dDct
1703 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1704 TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1705 DctDcOnly_C<12, int32_t, 4>, Dct_C<int32_t, 4>,
1706 /*is_row=*/true>;
1707 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1708 TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1709 DctDcOnly_C<12, int32_t, 4>, Dct_C<int32_t, 4>,
1710 /*is_row=*/false>;
1711 #endif
1712 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize32_Transform1dDct
1713 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1714 TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1715 DctDcOnly_C<12, int32_t, 5>, Dct_C<int32_t, 5>,
1716 /*is_row=*/true>;
1717 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1718 TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1719 DctDcOnly_C<12, int32_t, 5>, Dct_C<int32_t, 5>,
1720 /*is_row=*/false>;
1721 #endif
1722 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize64_Transform1dDct
1723 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1724 TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1725 DctDcOnly_C<12, int32_t, 6>, Dct_C<int32_t, 6>,
1726 /*is_row=*/true>;
1727 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1728 TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1729 DctDcOnly_C<12, int32_t, 6>, Dct_C<int32_t, 6>,
1730 /*is_row=*/false>;
1731 #endif
1732 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize4_Transform1dAdst
1733 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1734 TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1735 Adst4DcOnly_C<12, int32_t>, Adst4_C<int32_t>,
1736 /*is_row=*/true>;
1737 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1738 TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1739 Adst4DcOnly_C<12, int32_t>, Adst4_C<int32_t>,
1740 /*is_row=*/false>;
1741 #endif
1742 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize8_Transform1dAdst
1743 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1744 TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1745 Adst8DcOnly_C<12, int32_t>, Adst8_C<int32_t>,
1746 /*is_row=*/true>;
1747 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1748 TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1749 Adst8DcOnly_C<12, int32_t>, Adst8_C<int32_t>,
1750 /*is_row=*/false>;
1751 #endif
1752 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize16_Transform1dAdst
1753 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1754 TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1755 Adst16DcOnly_C<12, int32_t>, Adst16_C<int32_t>,
1756 /*is_row=*/true>;
1757 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1758 TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1759 Adst16DcOnly_C<12, int32_t>, Adst16_C<int32_t>,
1760 /*is_row=*/false>;
1761 #endif
1762 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize4_Transform1dIdentity
1763 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1764 TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1765 Identity4DcOnly_C<12, int32_t>, Identity4Row_C<int32_t>,
1766 /*is_row=*/true>;
1767 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1768 TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1769 Identity4DcOnly_C<12, int32_t>,
1770 Identity4Column_C<int32_t>, /*is_row=*/false>;
1771 #endif
1772 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize8_Transform1dIdentity
1773 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1774 TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1775 Identity8DcOnly_C<12, int32_t>, Identity8Row_C<int32_t>,
1776 /*is_row=*/true>;
1777 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1778 TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1779 Identity8DcOnly_C<12, int32_t>,
1780 Identity8Column_C<int32_t>, /*is_row=*/false>;
1781 #endif
1782 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize16_Transform1dIdentity
1783 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1784 TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1785 Identity16DcOnly_C<12, int32_t>, Identity16Row_C<int32_t>,
1786 /*is_row=*/true>;
1787 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1788 TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1789 Identity16DcOnly_C<12, int32_t>,
1790 Identity16Column_C<int32_t>, /*is_row=*/false>;
1791 #endif
1792 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize32_Transform1dIdentity
1793 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1794 TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1795 Identity32DcOnly_C<12, int32_t>, Identity32Row_C<int32_t>,
1796 /*is_row=*/true>;
1797 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1798 TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1799 Identity32DcOnly_C<12, int32_t>,
1800 Identity32Column_C<int32_t>, /*is_row=*/false>;
1801 #endif
1802 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize4_Transform1dWht
1803 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1804 TransformLoop_C<12, int32_t, uint16_t, kTransform1dWht,
1805 Wht4DcOnly_C<12, int32_t>, Wht4_C<int32_t>,
1806 /*is_row=*/true>;
1807 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1808 TransformLoop_C<12, int32_t, uint16_t, kTransform1dWht,
1809 Wht4DcOnly_C<12, int32_t>, Wht4_C<int32_t>,
1810 /*is_row=*/false>;
1811 #endif
1812 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1813 }
1814 #endif // LIBGAV1_MAX_BITDEPTH == 12
1815
1816 } // namespace
1817
InverseTransformInit_C()1818 void InverseTransformInit_C() {
1819 Init8bpp();
1820 #if LIBGAV1_MAX_BITDEPTH >= 10
1821 Init10bpp();
1822 #endif
1823 #if LIBGAV1_MAX_BITDEPTH == 12
1824 Init12bpp();
1825 #endif
1826
1827 // Local functions that may be unused depending on the optimizations
1828 // available.
1829 static_cast<void>(kBitReverseLookup);
1830 }
1831
1832 } // namespace dsp
1833 } // namespace libgav1
1834