xref: /aosp_15_r20/external/libgav1/src/dsp/inverse_transform.cc (revision 095378508e87ed692bf8dfeb34008b65b3735891)
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/inverse_transform.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstdint>
20 #include <cstring>
21 #include <type_traits>
22 
23 #include "src/dsp/dsp.h"
24 #include "src/utils/array_2d.h"
25 #include "src/utils/common.h"
26 #include "src/utils/compiler_attributes.h"
27 #include "src/utils/logging.h"
28 
29 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
30 #undef LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
31 #endif
32 
33 #if defined(LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK) && \
34     LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
35 #include <cinttypes>
36 #endif
37 
38 namespace libgav1 {
39 namespace dsp {
40 namespace {
41 
42 // Include the constants and utility functions inside the anonymous namespace.
43 #include "src/dsp/inverse_transform.inc"
44 
45 constexpr uint8_t kTransformColumnShift = 4;
46 
47 template <typename T>
RangeCheckValue(T value,int8_t range)48 int32_t RangeCheckValue(T value, int8_t range) {
49 #if defined(LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK) && \
50     LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
51   static_assert(
52       std::is_same<T, int32_t>::value || std::is_same<T, std::int64_t>::value,
53       "");
54   assert(range <= 32);
55   const auto min = static_cast<int32_t>(-(uint32_t{1} << (range - 1)));
56   const auto max = static_cast<int32_t>((uint32_t{1} << (range - 1)) - 1);
57   if (min > value || value > max) {
58     LIBGAV1_DLOG(ERROR,
59                  "coeff out of bit range, value: %" PRId64 " bit range %d",
60                  static_cast<int64_t>(value), range);
61     assert(min <= value && value <= max);
62   }
63 #endif  // LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
64   static_cast<void>(range);
65   return static_cast<int32_t>(value);
66 }
67 
68 template <typename Residual>
ButterflyRotation_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)69 LIBGAV1_ALWAYS_INLINE void ButterflyRotation_C(Residual* const dst, int a,
70                                                int b, int angle, bool flip,
71                                                int8_t range) {
72   // Note that we multiply in 32 bits and then add/subtract the products in 64
73   // bits. The 32-bit multiplications do not overflow. Please see the comment
74   // and assert() in Cos128().
75   const int64_t x = static_cast<int64_t>(dst[a] * Cos128(angle)) -
76                     static_cast<int64_t>(dst[b] * Sin128(angle));
77   const int64_t y = static_cast<int64_t>(dst[a] * Sin128(angle)) +
78                     static_cast<int64_t>(dst[b] * Cos128(angle));
79   // Section 7.13.2.1: It is a requirement of bitstream conformance that the
80   // values saved into the array T by this function are representable by a
81   // signed integer using |range| bits of precision.
82   dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
83   dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
84 }
85 
86 template <typename Residual>
ButterflyRotationFirstIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)87 void ButterflyRotationFirstIsZero_C(Residual* const dst, int a, int b,
88                                     int angle, bool flip, int8_t range) {
89   // Note that we multiply in 32 bits and then add/subtract the products in 64
90   // bits. The 32-bit multiplications do not overflow. Please see the comment
91   // and assert() in Cos128().
92   const auto x = static_cast<int64_t>(dst[b] * -Sin128(angle));
93   const auto y = static_cast<int64_t>(dst[b] * Cos128(angle));
94   // Section 7.13.2.1: It is a requirement of bitstream conformance that the
95   // values saved into the array T by this function are representable by a
96   // signed integer using |range| bits of precision.
97   dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
98   dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
99 }
100 
101 template <typename Residual>
ButterflyRotationSecondIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)102 void ButterflyRotationSecondIsZero_C(Residual* const dst, int a, int b,
103                                      int angle, bool flip, int8_t range) {
104   // Note that we multiply in 32 bits and then add/subtract the products in 64
105   // bits. The 32-bit multiplications do not overflow. Please see the comment
106   // and assert() in Cos128().
107   const auto x = static_cast<int64_t>(dst[a] * Cos128(angle));
108   const auto y = static_cast<int64_t>(dst[a] * Sin128(angle));
109 
110   // Section 7.13.2.1: It is a requirement of bitstream conformance that the
111   // values saved into the array T by this function are representable by a
112   // signed integer using |range| bits of precision.
113   dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
114   dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
115 }
116 
117 template <typename Residual>
HadamardRotation_C(Residual * const dst,int a,int b,bool flip,int8_t range)118 void HadamardRotation_C(Residual* const dst, int a, int b, bool flip,
119                         int8_t range) {
120   if (flip) std::swap(a, b);
121   --range;
122   // For Adst and Dct, the maximum possible value for range is 20. So min and
123   // max should always fit into int32_t.
124   const int32_t min = -(1 << range);
125   const int32_t max = (1 << range) - 1;
126   const int32_t x = dst[a] + dst[b];
127   const int32_t y = dst[a] - dst[b];
128   dst[a] = Clip3(x, min, max);
129   dst[b] = Clip3(y, min, max);
130 }
131 
132 template <int bitdepth, typename Residual>
ClampIntermediate(Residual * const dst,int size)133 void ClampIntermediate(Residual* const dst, int size) {
134   // If Residual is int16_t (which implies bitdepth is 8), we don't need to
135   // clip residual[i][j] to 16 bits.
136   if (sizeof(Residual) > 2) {
137     const Residual intermediate_clamp_max =
138         (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
139     const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
140     for (int j = 0; j < size; ++j) {
141       dst[j] = Clip3(dst[j], intermediate_clamp_min, intermediate_clamp_max);
142     }
143   }
144 }
145 
146 //------------------------------------------------------------------------------
147 // Discrete Cosine Transforms (DCT).
148 
149 // Value for index (i, j) is computed as bitreverse(j) and interpreting that as
150 // an integer with bit-length i + 2.
151 // For e.g. index (2, 3) will be computed as follows:
152 //   * bitreverse(3) = bitreverse(..000011) = 110000...
153 //   * interpreting that as an integer with bit-length 2+2 = 4 will be 1100 = 12
154 constexpr uint8_t kBitReverseLookup[kNumTransform1dSizes][64] = {
155     {0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2,
156      1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3,
157      0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3},
158     {0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5,
159      3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6,
160      1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7},
161     {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
162      0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
163      0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
164      0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15},
165     {0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
166      1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31,
167      0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
168      1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31},
169     {0, 32, 16, 48, 8,  40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
170      2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
171      1, 33, 17, 49, 9,  41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
172      3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63}};
173 
174 template <typename Residual, int size_log2>
Dct_C(void * dest,int8_t range)175 void Dct_C(void* dest, int8_t range) {
176   static_assert(size_log2 >= 2 && size_log2 <= 6, "");
177   auto* const dst = static_cast<Residual*>(dest);
178   // stage 1.
179   const int size = 1 << size_log2;
180   Residual temp[size];
181   memcpy(temp, dst, sizeof(temp));
182   for (int i = 0; i < size; ++i) {
183     dst[i] = temp[kBitReverseLookup[size_log2 - 2][i]];
184   }
185   // stages 2-32 are dependent on the value of size_log2.
186   // stage 2.
187   if (size_log2 == 6) {
188     for (int i = 0; i < 16; ++i) {
189       ButterflyRotation_C(dst, i + 32, 63 - i,
190                           63 - MultiplyBy4(kBitReverseLookup[2][i]), false,
191                           range);
192     }
193   }
194   // stage 3
195   if (size_log2 >= 5) {
196     for (int i = 0; i < 8; ++i) {
197       ButterflyRotation_C(dst, i + 16, 31 - i,
198                           6 + MultiplyBy8(kBitReverseLookup[1][7 - i]), false,
199                           range);
200     }
201   }
202   // stage 4.
203   if (size_log2 == 6) {
204     for (int i = 0; i < 16; ++i) {
205       HadamardRotation_C(dst, MultiplyBy2(i) + 32, MultiplyBy2(i) + 33,
206                          static_cast<bool>(i & 1), range);
207     }
208   }
209   // stage 5.
210   if (size_log2 >= 4) {
211     for (int i = 0; i < 4; ++i) {
212       ButterflyRotation_C(dst, i + 8, 15 - i,
213                           12 + MultiplyBy16(kBitReverseLookup[0][3 - i]), false,
214                           range);
215     }
216   }
217   // stage 6.
218   if (size_log2 >= 5) {
219     for (int i = 0; i < 8; ++i) {
220       HadamardRotation_C(dst, MultiplyBy2(i) + 16, MultiplyBy2(i) + 17,
221                          static_cast<bool>(i & 1), range);
222     }
223   }
224   // stage 7.
225   if (size_log2 == 6) {
226     for (int i = 0; i < 4; ++i) {
227       for (int j = 0; j < 2; ++j) {
228         ButterflyRotation_C(
229             dst, 62 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 33,
230             60 - MultiplyBy16(kBitReverseLookup[0][i]) + MultiplyBy64(j), true,
231             range);
232       }
233     }
234   }
235   // stage 8.
236   if (size_log2 >= 3) {
237     for (int i = 0; i < 2; ++i) {
238       ButterflyRotation_C(dst, i + 4, 7 - i, 56 - 32 * i, false, range);
239     }
240   }
241   // stage 9.
242   if (size_log2 >= 4) {
243     for (int i = 0; i < 4; ++i) {
244       HadamardRotation_C(dst, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
245                          static_cast<bool>(i & 1), range);
246     }
247   }
248   // stage 10.
249   if (size_log2 >= 5) {
250     for (int i = 0; i < 2; ++i) {
251       for (int j = 0; j < 2; ++j) {
252         ButterflyRotation_C(
253             dst, 30 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 17,
254             24 + MultiplyBy64(j) + MultiplyBy32(1 - i), true, range);
255       }
256     }
257   }
258   // stage 11.
259   if (size_log2 == 6) {
260     for (int i = 0; i < 8; ++i) {
261       for (int j = 0; j < 2; ++j) {
262         HadamardRotation_C(dst, MultiplyBy4(i) + j + 32,
263                            MultiplyBy4(i) - j + 35, static_cast<bool>(i & 1),
264                            range);
265       }
266     }
267   }
268   // stage 12.
269   for (int i = 0; i < 2; ++i) {
270     ButterflyRotation_C(dst, MultiplyBy2(i), MultiplyBy2(i) + 1, 32 + 16 * i,
271                         i == 0, range);
272   }
273   // stage 13.
274   if (size_log2 >= 3) {
275     for (int i = 0; i < 2; ++i) {
276       HadamardRotation_C(dst, MultiplyBy2(i) + 4, MultiplyBy2(i) + 5,
277                          /*flip=*/i != 0, range);
278     }
279   }
280   // stage 14.
281   if (size_log2 >= 4) {
282     for (int i = 0; i < 2; ++i) {
283       ButterflyRotation_C(dst, 14 - i, i + 9, 48 + 64 * i, true, range);
284     }
285   }
286   // stage 15.
287   if (size_log2 >= 5) {
288     for (int i = 0; i < 4; ++i) {
289       for (int j = 0; j < 2; ++j) {
290         HadamardRotation_C(dst, MultiplyBy4(i) + j + 16,
291                            MultiplyBy4(i) - j + 19, static_cast<bool>(i & 1),
292                            range);
293       }
294     }
295   }
296   // stage 16.
297   if (size_log2 == 6) {
298     for (int i = 0; i < 2; ++i) {
299       for (int j = 0; j < 4; ++j) {
300         ButterflyRotation_C(
301             dst, 61 - MultiplyBy8(i) - j, MultiplyBy8(i) + j + 34,
302             56 - MultiplyBy32(i) + MultiplyBy64(DivideBy2(j)), true, range);
303       }
304     }
305   }
306   // stage 17.
307   for (int i = 0; i < 2; ++i) {
308     HadamardRotation_C(dst, i, 3 - i, false, range);
309   }
310   // stage 18.
311   if (size_log2 >= 3) {
312     ButterflyRotation_C(dst, 6, 5, 32, true, range);
313   }
314   // stage 19.
315   if (size_log2 >= 4) {
316     for (int i = 0; i < 2; ++i) {
317       for (int j = 0; j < 2; ++j) {
318         HadamardRotation_C(dst, MultiplyBy4(i) + j + 8, MultiplyBy4(i) - j + 11,
319                            /*flip=*/i != 0, range);
320       }
321     }
322   }
323   // stage 20.
324   if (size_log2 >= 5) {
325     for (int i = 0; i < 4; ++i) {
326       ButterflyRotation_C(dst, 29 - i, i + 18, 48 + 64 * DivideBy2(i), true,
327                           range);
328     }
329   }
330   // stage 21.
331   if (size_log2 == 6) {
332     for (int i = 0; i < 4; ++i) {
333       for (int j = 0; j < 4; ++j) {
334         HadamardRotation_C(dst, MultiplyBy8(i) + j + 32,
335                            MultiplyBy8(i) - j + 39, static_cast<bool>(i & 1),
336                            range);
337       }
338     }
339   }
340   // stage 22.
341   if (size_log2 >= 3) {
342     for (int i = 0; i < 4; ++i) {
343       HadamardRotation_C(dst, i, 7 - i, false, range);
344     }
345   }
346   // stage 23.
347   if (size_log2 >= 4) {
348     for (int i = 0; i < 2; ++i) {
349       ButterflyRotation_C(dst, 13 - i, i + 10, 32, true, range);
350     }
351   }
352   // stage 24.
353   if (size_log2 >= 5) {
354     for (int i = 0; i < 2; ++i) {
355       for (int j = 0; j < 4; ++j) {
356         HadamardRotation_C(dst, MultiplyBy8(i) + j + 16,
357                            MultiplyBy8(i) - j + 23, i == 1, range);
358       }
359     }
360   }
361   // stage 25.
362   if (size_log2 == 6) {
363     for (int i = 0; i < 8; ++i) {
364       ButterflyRotation_C(dst, 59 - i, i + 36, (i < 4) ? 48 : 112, true, range);
365     }
366   }
367   // stage 26.
368   if (size_log2 >= 4) {
369     for (int i = 0; i < 8; ++i) {
370       HadamardRotation_C(dst, i, 15 - i, false, range);
371     }
372   }
373   // stage 27.
374   if (size_log2 >= 5) {
375     for (int i = 0; i < 4; ++i) {
376       ButterflyRotation_C(dst, 27 - i, i + 20, 32, true, range);
377     }
378   }
379   // stage 28.
380   if (size_log2 == 6) {
381     for (int i = 0; i < 8; ++i) {
382       HadamardRotation_C(dst, i + 32, 47 - i, false, range);
383       HadamardRotation_C(dst, i + 48, 63 - i, true, range);
384     }
385   }
386   // stage 29.
387   if (size_log2 >= 5) {
388     for (int i = 0; i < 16; ++i) {
389       HadamardRotation_C(dst, i, 31 - i, false, range);
390     }
391   }
392   // stage 30.
393   if (size_log2 == 6) {
394     for (int i = 0; i < 8; ++i) {
395       ButterflyRotation_C(dst, 55 - i, i + 40, 32, true, range);
396     }
397   }
398   // stage 31.
399   if (size_log2 == 6) {
400     for (int i = 0; i < 32; ++i) {
401       HadamardRotation_C(dst, i, 63 - i, false, range);
402     }
403   }
404 }
405 
406 template <int bitdepth, typename Residual, int size_log2>
DctDcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)407 void DctDcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
408                  bool is_row) {
409   auto* const dst = static_cast<Residual*>(dest);
410 
411   if (is_row && should_round) {
412     dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
413   }
414 
415   ButterflyRotationSecondIsZero_C(dst, 0, 1, 32, true, range);
416 
417   if (is_row && row_shift > 0) {
418     dst[0] = RightShiftWithRounding(dst[0], row_shift);
419   }
420 
421   ClampIntermediate<bitdepth, Residual>(dst, 1);
422 
423   const int size = 1 << size_log2;
424   for (int i = 1; i < size; ++i) {
425     dst[i] = dst[0];
426   }
427 }
428 
429 //------------------------------------------------------------------------------
430 // Asymmetric Discrete Sine Transforms (ADST).
431 
432 /*
433  * Row transform max range in bits for bitdepths 8/10/12: 28/30/32.
434  * Column transform max range in bits for bitdepths 8/10/12: 28/28/30.
435  */
436 template <typename Residual>
Adst4_C(void * dest,int8_t range)437 void Adst4_C(void* dest, int8_t range) {
438   auto* const dst = static_cast<Residual*>(dest);
439   if ((dst[0] | dst[1] | dst[2] | dst[3]) == 0) {
440     return;
441   }
442 
443   // stage 1.
444   // Section 7.13.2.6: It is a requirement of bitstream conformance that all
445   // values stored in the s and x arrays by this process are representable by
446   // a signed integer using range + 12 bits of precision.
447   // Note the intermediate value can only exceed INT32_MAX with invalid 12-bit
448   // content. For simplicity in unoptimized code, int64_t is used for both 10 &
449   // 12-bit. SIMD implementations can allow these to rollover on platforms
450   // where this has defined behavior.
451   using Intermediate =
452       typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
453   Intermediate s[7];
454   s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
455   s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
456   s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[1], range + 12);
457   s[3] = RangeCheckValue(kAdst4Multiplier[3] * dst[2], range + 12);
458   s[4] = RangeCheckValue(kAdst4Multiplier[0] * dst[2], range + 12);
459   s[5] = RangeCheckValue(kAdst4Multiplier[1] * dst[3], range + 12);
460   s[6] = RangeCheckValue(kAdst4Multiplier[3] * dst[3], range + 12);
461   // stage 2.
462   // Section 7.13.2.6: It is a requirement of bitstream conformance that
463   // values stored in the variable a7 by this process are representable by a
464   // signed integer using range + 1 bits of precision.
465   const int32_t a7 = RangeCheckValue(dst[0] - dst[2], range + 1);
466   // Section 7.13.2.6: It is a requirement of bitstream conformance that
467   // values stored in the variable b7 by this process are representable by a
468   // signed integer using |range| bits of precision.
469   const int32_t b7 = RangeCheckValue(a7 + dst[3], range);
470   // stage 3.
471   s[0] = RangeCheckValue(s[0] + s[3], range + 12);
472   s[1] = RangeCheckValue(s[1] - s[4], range + 12);
473   s[3] = s[2];
474   // With range checking enabled b7 would be trapped above. This prevents an
475   // integer sanitizer warning. In SIMD implementations the multiply can be
476   // allowed to rollover on platforms where this has defined behavior.
477   const auto adst2_b7 = static_cast<Intermediate>(kAdst4Multiplier[2]) * b7;
478   s[2] = RangeCheckValue(adst2_b7, range + 12);
479   // stage 4.
480   s[0] = RangeCheckValue(s[0] + s[5], range + 12);
481   s[1] = RangeCheckValue(s[1] - s[6], range + 12);
482   // stages 5 and 6.
483   const Intermediate x0 = RangeCheckValue(s[0] + s[3], range + 12);
484   const Intermediate x1 = RangeCheckValue(s[1] + s[3], range + 12);
485   Intermediate x3 = RangeCheckValue(s[0] + s[1], range + 12);
486   x3 = RangeCheckValue(x3 - s[3], range + 12);
487   auto dst_0 = static_cast<int32_t>(RightShiftWithRounding(x0, 12));
488   auto dst_1 = static_cast<int32_t>(RightShiftWithRounding(x1, 12));
489   auto dst_2 = static_cast<int32_t>(RightShiftWithRounding(s[2], 12));
490   auto dst_3 = static_cast<int32_t>(RightShiftWithRounding(x3, 12));
491   if (sizeof(Residual) == 2) {
492     // If the first argument to RightShiftWithRounding(..., 12) is only
493     // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
494     // in RightShiftWithRounding(..., 12) will cause the function to return
495     // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
496     dst_0 -= (dst_0 == 0x8000);
497     dst_1 -= (dst_1 == 0x8000);
498     dst_3 -= (dst_3 == 0x8000);
499   }
500   dst[0] = dst_0;
501   dst[1] = dst_1;
502   dst[2] = dst_2;
503   dst[3] = dst_3;
504 }
505 
506 template <int bitdepth, typename Residual>
Adst4DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)507 void Adst4DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
508                    bool is_row) {
509   auto* const dst = static_cast<Residual*>(dest);
510 
511   if (is_row && should_round) {
512     dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
513   }
514 
515   // stage 1.
516   // Section 7.13.2.6: It is a requirement of bitstream conformance that all
517   // values stored in the s and x arrays by this process are representable by
518   // a signed integer using range + 12 bits of precision.
519   int32_t s[3];
520   s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
521   s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
522   s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[0], range + 12);
523   // stage 3.
524   // stage 4.
525   // stages 5 and 6.
526   int32_t dst_0 = RightShiftWithRounding(s[0], 12);
527   int32_t dst_1 = RightShiftWithRounding(s[1], 12);
528   int32_t dst_2 = RightShiftWithRounding(s[2], 12);
529   int32_t dst_3 =
530       RightShiftWithRounding(RangeCheckValue(s[0] + s[1], range + 12), 12);
531   if (sizeof(Residual) == 2) {
532     // If the first argument to RightShiftWithRounding(..., 12) is only
533     // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
534     // in RightShiftWithRounding(..., 12) will cause the function to return
535     // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
536     dst_0 -= (dst_0 == 0x8000);
537     dst_1 -= (dst_1 == 0x8000);
538     dst_3 -= (dst_3 == 0x8000);
539   }
540   dst[0] = dst_0;
541   dst[1] = dst_1;
542   dst[2] = dst_2;
543   dst[3] = dst_3;
544 
545   const int size = 4;
546   if (is_row && row_shift > 0) {
547     for (int j = 0; j < size; ++j) {
548       dst[j] = RightShiftWithRounding(dst[j], row_shift);
549     }
550   }
551 
552   ClampIntermediate<bitdepth, Residual>(dst, 4);
553 }
554 
555 template <typename Residual>
AdstInputPermutation(int32_t * LIBGAV1_RESTRICT const dst,const Residual * LIBGAV1_RESTRICT const src,int n)556 void AdstInputPermutation(int32_t* LIBGAV1_RESTRICT const dst,
557                           const Residual* LIBGAV1_RESTRICT const src, int n) {
558   assert(n == 8 || n == 16);
559   for (int i = 0; i < n; ++i) {
560     dst[i] = src[((i & 1) == 0) ? n - i - 1 : i - 1];
561   }
562 }
563 
564 constexpr int8_t kAdstOutputPermutationLookup[16] = {
565     0, 8, 12, 4, 6, 14, 10, 2, 3, 11, 15, 7, 5, 13, 9, 1};
566 
567 template <typename Residual>
AdstOutputPermutation(Residual * LIBGAV1_RESTRICT const dst,const int32_t * LIBGAV1_RESTRICT const src,int n)568 void AdstOutputPermutation(Residual* LIBGAV1_RESTRICT const dst,
569                            const int32_t* LIBGAV1_RESTRICT const src, int n) {
570   assert(n == 8 || n == 16);
571   const auto shift = static_cast<int8_t>(n == 8);
572   for (int i = 0; i < n; ++i) {
573     const int8_t index = kAdstOutputPermutationLookup[i] >> shift;
574     int32_t dst_i = ((i & 1) == 0) ? src[index] : -src[index];
575     if (sizeof(Residual) == 2) {
576       // If i is odd and src[index] is -32768, dst_i will be 32768, which
577       // cannot be represented as an int16_t.
578       dst_i -= (dst_i == 0x8000);
579     }
580     dst[i] = dst_i;
581   }
582 }
583 
584 template <typename Residual>
Adst8_C(void * dest,int8_t range)585 void Adst8_C(void* dest, int8_t range) {
586   auto* const dst = static_cast<Residual*>(dest);
587   // stage 1.
588   int32_t temp[8];
589   AdstInputPermutation(temp, dst, 8);
590   // stage 2.
591   for (int i = 0; i < 4; ++i) {
592     ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 60 - 16 * i,
593                         true, range);
594   }
595   // stage 3.
596   for (int i = 0; i < 4; ++i) {
597     HadamardRotation_C(temp, i, i + 4, false, range);
598   }
599   // stage 4.
600   for (int i = 0; i < 2; ++i) {
601     ButterflyRotation_C(temp, i * 3 + 4, i + 5, 48 - 32 * i, true, range);
602   }
603   // stage 5.
604   for (int i = 0; i < 2; ++i) {
605     for (int j = 0; j < 2; ++j) {
606       HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
607                          false, range);
608     }
609   }
610   // stage 6.
611   for (int i = 0; i < 2; ++i) {
612     ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
613                         range);
614   }
615   // stage 7.
616   AdstOutputPermutation(dst, temp, 8);
617 }
618 
619 template <int bitdepth, typename Residual>
Adst8DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)620 void Adst8DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
621                    bool is_row) {
622   auto* const dst = static_cast<Residual*>(dest);
623 
624   // stage 1.
625   int32_t temp[8];
626   // After the permutation, the dc value is in temp[1]. The remaining are zero.
627   AdstInputPermutation(temp, dst, 8);
628 
629   if (is_row && should_round) {
630     temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
631   }
632 
633   // stage 2.
634   ButterflyRotationFirstIsZero_C(temp, 0, 1, 60, true, range);
635 
636   // stage 3.
637   temp[4] = temp[0];
638   temp[5] = temp[1];
639 
640   // stage 4.
641   ButterflyRotation_C(temp, 4, 5, 48, true, range);
642 
643   // stage 5.
644   temp[2] = temp[0];
645   temp[3] = temp[1];
646   temp[6] = temp[4];
647   temp[7] = temp[5];
648 
649   // stage 6.
650   ButterflyRotation_C(temp, 2, 3, 32, true, range);
651   ButterflyRotation_C(temp, 6, 7, 32, true, range);
652 
653   // stage 7.
654   AdstOutputPermutation(dst, temp, 8);
655 
656   const int size = 8;
657   if (is_row && row_shift > 0) {
658     for (int j = 0; j < size; ++j) {
659       dst[j] = RightShiftWithRounding(dst[j], row_shift);
660     }
661   }
662 
663   ClampIntermediate<bitdepth, Residual>(dst, 8);
664 }
665 
666 template <typename Residual>
Adst16_C(void * dest,int8_t range)667 void Adst16_C(void* dest, int8_t range) {
668   auto* const dst = static_cast<Residual*>(dest);
669   // stage 1.
670   int32_t temp[16];
671   AdstInputPermutation(temp, dst, 16);
672   // stage 2.
673   for (int i = 0; i < 8; ++i) {
674     ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 62 - 8 * i,
675                         true, range);
676   }
677   // stage 3.
678   for (int i = 0; i < 8; ++i) {
679     HadamardRotation_C(temp, i, i + 8, false, range);
680   }
681   // stage 4.
682   for (int i = 0; i < 2; ++i) {
683     ButterflyRotation_C(temp, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
684                         56 - 32 * i, true, range);
685     ButterflyRotation_C(temp, MultiplyBy2(i) + 13, MultiplyBy2(i) + 12,
686                         8 + 32 * i, true, range);
687   }
688   // stage 5.
689   for (int i = 0; i < 4; ++i) {
690     for (int j = 0; j < 2; ++j) {
691       HadamardRotation_C(temp, i + MultiplyBy8(j), i + MultiplyBy8(j) + 4,
692                          false, range);
693     }
694   }
695   // stage 6.
696   for (int i = 0; i < 2; ++i) {
697     for (int j = 0; j < 2; ++j) {
698       ButterflyRotation_C(temp, i * 3 + MultiplyBy8(j) + 4,
699                           i + MultiplyBy8(j) + 5, 48 - 32 * i, true, range);
700     }
701   }
702   // stage 7.
703   for (int i = 0; i < 2; ++i) {
704     for (int j = 0; j < 4; ++j) {
705       HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
706                          false, range);
707     }
708   }
709   // stage 8.
710   for (int i = 0; i < 4; ++i) {
711     ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
712                         range);
713   }
714   // stage 9.
715   AdstOutputPermutation(dst, temp, 16);
716 }
717 
718 template <int bitdepth, typename Residual>
Adst16DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)719 void Adst16DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
720                     bool is_row) {
721   auto* const dst = static_cast<Residual*>(dest);
722 
723   // stage 1.
724   int32_t temp[16];
725   // After the permutation, the dc value is in temp[1].  The remaining are zero.
726   AdstInputPermutation(temp, dst, 16);
727 
728   if (is_row && should_round) {
729     temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
730   }
731 
732   // stage 2.
733   ButterflyRotationFirstIsZero_C(temp, 0, 1, 62, true, range);
734 
735   // stage 3.
736   temp[8] = temp[0];
737   temp[9] = temp[1];
738 
739   // stage 4.
740   ButterflyRotation_C(temp, 8, 9, 56, true, range);
741 
742   // stage 5.
743   temp[4] = temp[0];
744   temp[5] = temp[1];
745   temp[12] = temp[8];
746   temp[13] = temp[9];
747 
748   // stage 6.
749   ButterflyRotation_C(temp, 4, 5, 48, true, range);
750   ButterflyRotation_C(temp, 12, 13, 48, true, range);
751 
752   // stage 7.
753   temp[2] = temp[0];
754   temp[3] = temp[1];
755   temp[10] = temp[8];
756   temp[11] = temp[9];
757 
758   temp[6] = temp[4];
759   temp[7] = temp[5];
760   temp[14] = temp[12];
761   temp[15] = temp[13];
762 
763   // stage 8.
764   for (int i = 0; i < 4; ++i) {
765     ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
766                         range);
767   }
768 
769   // stage 9.
770   AdstOutputPermutation(dst, temp, 16);
771 
772   const int size = 16;
773   if (is_row && row_shift > 0) {
774     for (int j = 0; j < size; ++j) {
775       dst[j] = RightShiftWithRounding(dst[j], row_shift);
776     }
777   }
778 
779   ClampIntermediate<bitdepth, Residual>(dst, 16);
780 }
781 
782 //------------------------------------------------------------------------------
783 // Identity Transforms.
784 //
785 // In the spec, the inverse identity transform is followed by a Round2() call:
786 //   The row transforms with i = 0..(h-1) are applied as follows:
787 //     ...
788 //     * Otherwise, invoke the inverse identity transform process specified in
789 //       section 7.13.2.15 with the input variable n equal to log2W.
790 //     * Set Residual[ i ][ j ] equal to Round2( T[ j ], rowShift )
791 //       for j = 0..(w-1).
792 //   ...
793 //   The column transforms with j = 0..(w-1) are applied as follows:
794 //     ...
795 //     * Otherwise, invoke the inverse identity transform process specified in
796 //       section 7.13.2.15 with the input variable n equal to log2H.
797 //     * Residual[ i ][ j ] is set equal to Round2( T[ i ], colShift )
798 //       for i = 0..(h-1).
799 //
800 // Therefore, we define the identity transform functions to perform both the
801 // inverse identity transform and the Round2() call. This has two advantages:
802 // 1. The outputs of the inverse identity transform do not need to be stored
803 //    in the Residual array. They can be stored in int32_t local variables,
804 //    which have a larger range if Residual is an int16_t array.
805 // 2. The inverse identity transform and the Round2() call can be jointly
806 //    optimized.
807 //
808 // The identity transform functions have the following prototype:
809 //   void Identity_C(void* dest, int8_t shift);
810 //
811 // The |shift| parameter is the amount of shift for the Round2() call. For row
812 // transforms, |shift| is 0, 1, or 2. For column transforms, |shift| is always
813 // 4. Therefore, an identity transform function can detect whether it is being
814 // invoked as a row transform or a column transform by checking whether |shift|
815 // is equal to 4.
816 //
817 // Input Range
818 //
819 // The inputs of row transforms, stored in the 2D array Dequant, are
820 // representable by a signed integer using 8 + BitDepth bits of precision:
821 //   f. Dequant[ i ][ j ] is set equal to
822 //   Clip3( - ( 1 << ( 7 + BitDepth ) ), ( 1 << ( 7 + BitDepth ) ) - 1, dq2 ).
823 //
824 // The inputs of column transforms are representable by a signed integer using
825 // Max( BitDepth + 6, 16 ) bits of precision:
826 //   Set the variable colClampRange equal to Max( BitDepth + 6, 16 ).
827 //   ...
828 //   Between the row and column transforms, Residual[ i ][ j ] is set equal to
829 //   Clip3( - ( 1 << ( colClampRange - 1 ) ),
830 //          ( 1 << (colClampRange - 1 ) ) - 1,
831 //          Residual[ i ][ j ] )
832 //   for i = 0..(h-1), for j = 0..(w-1).
833 //
834 // Output Range
835 //
836 // The outputs of row transforms are representable by a signed integer using
837 // 8 + BitDepth + 1 = 9 + BitDepth bits of precision, because the net effect
838 // of the multiplicative factor of inverse identity transforms minus the
839 // smallest row shift is an increase of at most one bit.
840 //
841 // Transform | Multiplicative factor | Smallest row | Net increase
842 // width     | (in bits)             | shift        | in bits
843 // ---------------------------------------------------------------
844 //     4     |  sqrt(2)  (0.5 bits)  |      0       |    +0.5
845 //     8     |     2     (1 bit)     |      0       |    +1
846 //    16     | 2*sqrt(2) (1.5 bits)  |      1       |    +0.5
847 //    32     |     4     (2 bits)    |      1       |    +1
848 //
849 // If BitDepth is 8 and Residual is an int16_t array, to avoid truncation we
850 // clip the outputs (which have 17 bits of precision) to the range of int16_t
851 // before storing them in the Residual array. This clipping happens to be the
852 // same as the required clipping after the row transform (see the spec quoted
853 // above), so we remain compliant with the spec. (In this case,
854 // TransformLoop_C() skips clipping the outputs of row transforms to avoid
855 // duplication of effort.)
856 //
857 // The outputs of column transforms are representable by a signed integer using
858 // Max( BitDepth + 6, 16 ) + 2 - 4 = Max( BitDepth + 4, 14 ) bits of precision,
859 // because the multiplicative factor of inverse identity transforms is at most
860 // 4 (2 bits) and |shift| is always 4.
861 
862 template <typename Residual>
Identity4Row_C(void * dest,int8_t shift)863 void Identity4Row_C(void* dest, int8_t shift) {
864   // Note the intermediate value can only exceed 32 bits with 12-bit content.
865   // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
866   using Intermediate =
867       typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
868   assert(shift == 0 || shift == 1);
869   auto* const dst = static_cast<Residual*>(dest);
870   // If |shift| is 0, |rounding| should be 1 << 11. If |shift| is 1, |rounding|
871   // should be (1 + (1 << 1)) << 11. The following expression works for both
872   // values of |shift|.
873   const int32_t rounding = (1 + (shift << 1)) << 11;
874   for (int i = 0; i < 4; ++i) {
875     const auto intermediate =
876         static_cast<Intermediate>(dst[i]) * kIdentity4Multiplier;
877     int32_t dst_i =
878         static_cast<int32_t>((intermediate + rounding) >> (12 + shift));
879     if (sizeof(Residual) == 2) {
880       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
881     }
882     dst[i] = static_cast<Residual>(dst_i);
883   }
884 }
885 
886 template <typename Residual>
Identity4Column_C(void * dest,int8_t)887 void Identity4Column_C(void* dest, int8_t /*shift*/) {
888   auto* const dst = static_cast<Residual*>(dest);
889   const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
890   for (int i = 0; i < 4; ++i) {
891     // The intermediate value here will have to fit into an int32_t for it to be
892     // bitstream conformant. The multiplication is promoted to int32_t by
893     // defining kIdentity4Multiplier as int32_t.
894     dst[i] = static_cast<Residual>((dst[i] * kIdentity4Multiplier + rounding) >>
895                                    (12 + kTransformColumnShift));
896   }
897 }
898 
899 template <int bitdepth, typename Residual>
Identity4DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)900 void Identity4DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
901                        int row_shift, bool is_row) {
902   // Note the intermediate value can only exceed 32 bits with 12-bit content.
903   // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
904   using Intermediate =
905       typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
906   auto* const dst = static_cast<Residual*>(dest);
907 
908   if (is_row) {
909     if (should_round) {
910       const auto intermediate =
911           static_cast<Intermediate>(dst[0]) * kTransformRowMultiplier;
912       dst[0] = RightShiftWithRounding(intermediate, 12);
913     }
914 
915     const int32_t rounding = (1 + (row_shift << 1)) << 11;
916     const auto intermediate =
917         static_cast<Intermediate>(dst[0]) * kIdentity4Multiplier;
918     int32_t dst_i =
919         static_cast<int32_t>((intermediate + rounding) >> (12 + row_shift));
920     if (sizeof(Residual) == 2) {
921       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
922     }
923     dst[0] = static_cast<Residual>(dst_i);
924 
925     ClampIntermediate<bitdepth, Residual>(dst, 1);
926     return;
927   }
928 
929   const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
930   dst[0] = static_cast<Residual>((dst[0] * kIdentity4Multiplier + rounding) >>
931                                  (12 + kTransformColumnShift));
932 }
933 
934 template <typename Residual>
Identity8Row_C(void * dest,int8_t shift)935 void Identity8Row_C(void* dest, int8_t shift) {
936   assert(shift == 0 || shift == 1 || shift == 2);
937   auto* const dst = static_cast<Residual*>(dest);
938   for (int i = 0; i < 8; ++i) {
939     int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[i]), shift);
940     if (sizeof(Residual) == 2) {
941       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
942     }
943     dst[i] = static_cast<Residual>(dst_i);
944   }
945 }
946 
947 template <typename Residual>
Identity8Column_C(void * dest,int8_t)948 void Identity8Column_C(void* dest, int8_t /*shift*/) {
949   auto* const dst = static_cast<Residual*>(dest);
950   for (int i = 0; i < 8; ++i) {
951     dst[i] = static_cast<Residual>(
952         RightShiftWithRounding(dst[i], kTransformColumnShift - 1));
953   }
954 }
955 
956 template <int bitdepth, typename Residual>
Identity8DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)957 void Identity8DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
958                        int row_shift, bool is_row) {
959   // Note the intermediate value can only exceed 32 bits with 12-bit content.
960   // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
961   using Intermediate =
962       typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
963   auto* const dst = static_cast<Residual*>(dest);
964 
965   if (is_row) {
966     if (should_round) {
967       const auto intermediate =
968           static_cast<Intermediate>(dst[0]) * kTransformRowMultiplier;
969       dst[0] = RightShiftWithRounding(intermediate, 12);
970     }
971 
972     int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[0]), row_shift);
973     if (sizeof(Residual) == 2) {
974       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
975     }
976     dst[0] = static_cast<Residual>(dst_i);
977 
978     // If Residual is int16_t (which implies bitdepth is 8), we don't need to
979     // clip residual[i][j] to 16 bits.
980     if (sizeof(Residual) > 2) {
981       const Residual intermediate_clamp_max =
982           (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
983       const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
984       dst[0] = Clip3(dst[0], intermediate_clamp_min, intermediate_clamp_max);
985     }
986     return;
987   }
988 
989   dst[0] = static_cast<Residual>(
990       RightShiftWithRounding(dst[0], kTransformColumnShift - 1));
991 }
992 
993 template <typename Residual>
Identity16Row_C(void * dest,int8_t shift)994 void Identity16Row_C(void* dest, int8_t shift) {
995   assert(shift == 1 || shift == 2);
996   // Note the intermediate value can only exceed 32 bits with 12-bit content.
997   // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
998   using Intermediate =
999       typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
1000   auto* const dst = static_cast<Residual*>(dest);
1001   const int32_t rounding = (1 + (1 << shift)) << 11;
1002   for (int i = 0; i < 16; ++i) {
1003     // Note the intermediate value can only exceed 32 bits with 12-bit content.
1004     // For simplicity in unoptimized code, int64_t is used for all cases.
1005     const auto intermediate =
1006         static_cast<Intermediate>(dst[i]) * kIdentity16Multiplier;
1007     int32_t dst_i =
1008         static_cast<int32_t>((intermediate + rounding) >> (12 + shift));
1009     if (sizeof(Residual) == 2) {
1010       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1011     }
1012     dst[i] = static_cast<Residual>(dst_i);
1013   }
1014 }
1015 
1016 template <typename Residual>
Identity16Column_C(void * dest,int8_t)1017 void Identity16Column_C(void* dest, int8_t /*shift*/) {
1018   auto* const dst = static_cast<Residual*>(dest);
1019   const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
1020   for (int i = 0; i < 16; ++i) {
1021     // The intermediate value here will have to fit into an int32_t for it to be
1022     // bitstream conformant. The multiplication is promoted to int32_t by
1023     // defining kIdentity16Multiplier as int32_t.
1024     dst[i] =
1025         static_cast<Residual>((dst[i] * kIdentity16Multiplier + rounding) >>
1026                               (12 + kTransformColumnShift));
1027   }
1028 }
1029 
1030 template <int bitdepth, typename Residual>
Identity16DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)1031 void Identity16DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
1032                         int row_shift, bool is_row) {
1033   // Note the intermediate value can only exceed 32 bits with 12-bit content.
1034   // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
1035   using Intermediate =
1036       typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
1037   auto* const dst = static_cast<Residual*>(dest);
1038 
1039   if (is_row) {
1040     if (should_round) {
1041       const auto intermediate =
1042           static_cast<Intermediate>(dst[0]) * kTransformRowMultiplier;
1043       dst[0] = RightShiftWithRounding(intermediate, 12);
1044     }
1045 
1046     const int32_t rounding = (1 + (1 << row_shift)) << 11;
1047     const auto intermediate =
1048         static_cast<Intermediate>(dst[0]) * kIdentity16Multiplier;
1049     int32_t dst_i =
1050         static_cast<int32_t>((intermediate + rounding) >> (12 + row_shift));
1051     if (sizeof(Residual) == 2) {
1052       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1053     }
1054     dst[0] = static_cast<Residual>(dst_i);
1055 
1056     ClampIntermediate<bitdepth, Residual>(dst, 1);
1057     return;
1058   }
1059 
1060   const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
1061   dst[0] = static_cast<Residual>((dst[0] * kIdentity16Multiplier + rounding) >>
1062                                  (12 + kTransformColumnShift));
1063 }
1064 
1065 template <typename Residual>
Identity32Row_C(void * dest,int8_t shift)1066 void Identity32Row_C(void* dest, int8_t shift) {
1067   assert(shift == 1 || shift == 2);
1068   auto* const dst = static_cast<Residual*>(dest);
1069   for (int i = 0; i < 32; ++i) {
1070     int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[i]), shift);
1071     if (sizeof(Residual) == 2) {
1072       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1073     }
1074     dst[i] = static_cast<Residual>(dst_i);
1075   }
1076 }
1077 
1078 template <typename Residual>
Identity32Column_C(void * dest,int8_t)1079 void Identity32Column_C(void* dest, int8_t /*shift*/) {
1080   auto* const dst = static_cast<Residual*>(dest);
1081   for (int i = 0; i < 32; ++i) {
1082     dst[i] = static_cast<Residual>(
1083         RightShiftWithRounding(dst[i], kTransformColumnShift - 2));
1084   }
1085 }
1086 
1087 template <int bitdepth, typename Residual>
Identity32DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)1088 void Identity32DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
1089                         int row_shift, bool is_row) {
1090   // Note the intermediate value can only exceed 32 bits with 12-bit content.
1091   // For simplicity in unoptimized code, int64_t is used for both 10 & 12-bit.
1092   using Intermediate =
1093       typename std::conditional<sizeof(Residual) == 2, int32_t, int64_t>::type;
1094   auto* const dst = static_cast<Residual*>(dest);
1095 
1096   if (is_row) {
1097     if (should_round) {
1098       const auto intermediate =
1099           static_cast<Intermediate>(dst[0]) * kTransformRowMultiplier;
1100       dst[0] = RightShiftWithRounding(intermediate, 12);
1101     }
1102 
1103     int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[0]), row_shift);
1104     if (sizeof(Residual) == 2) {
1105       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1106     }
1107     dst[0] = static_cast<Residual>(dst_i);
1108 
1109     ClampIntermediate<bitdepth, Residual>(dst, 1);
1110     return;
1111   }
1112 
1113   dst[0] = static_cast<Residual>(
1114       RightShiftWithRounding(dst[0], kTransformColumnShift - 2));
1115 }
1116 
1117 //------------------------------------------------------------------------------
1118 // Walsh Hadamard Transform.
1119 
1120 template <typename Residual>
Wht4_C(void * dest,int8_t shift)1121 void Wht4_C(void* dest, int8_t shift) {
1122   auto* const dst = static_cast<Residual*>(dest);
1123   Residual temp[4];
1124   temp[0] = dst[0] >> shift;
1125   temp[2] = dst[1] >> shift;
1126   temp[3] = dst[2] >> shift;
1127   temp[1] = dst[3] >> shift;
1128   temp[0] += temp[2];
1129   temp[3] -= temp[1];
1130   // This signed right shift must be an arithmetic shift.
1131   Residual e = (temp[0] - temp[3]) >> 1;
1132   dst[1] = e - temp[1];
1133   dst[2] = e - temp[2];
1134   dst[0] = temp[0] - dst[1];
1135   dst[3] = temp[3] + dst[2];
1136 }
1137 
1138 template <int bitdepth, typename Residual>
Wht4DcOnly_C(void * dest,int8_t range,bool,int,bool)1139 void Wht4DcOnly_C(void* dest, int8_t range, bool /*should_round*/,
1140                   int /*row_shift*/, bool /*is_row*/) {
1141   auto* const dst = static_cast<Residual*>(dest);
1142   const int shift = range;
1143 
1144   Residual temp = dst[0] >> shift;
1145   // This signed right shift must be an arithmetic shift.
1146   Residual e = temp >> 1;
1147   dst[0] = temp - e;
1148   dst[1] = e;
1149   dst[2] = e;
1150   dst[3] = e;
1151 
1152   ClampIntermediate<bitdepth, Residual>(dst, 4);
1153 }
1154 
1155 //------------------------------------------------------------------------------
1156 // row/column transform loop
1157 
1158 using InverseTransform1dFunc = void (*)(void* dst, int8_t range);
1159 using InverseTransformDcOnlyFunc = void (*)(void* dest, int8_t range,
1160                                             bool should_round, int row_shift,
1161                                             bool is_row);
1162 
1163 template <int bitdepth, typename Residual, typename Pixel,
1164           Transform1d transform1d_type,
1165           InverseTransformDcOnlyFunc dconly_transform1d,
1166           InverseTransform1dFunc transform1d_func, bool is_row>
TransformLoop_C(TransformType tx_type,TransformSize tx_size,int adjusted_tx_height,void * LIBGAV1_RESTRICT src_buffer,int start_x,int start_y,void * LIBGAV1_RESTRICT dst_frame)1167 void TransformLoop_C(TransformType tx_type, TransformSize tx_size,
1168                      int adjusted_tx_height, void* LIBGAV1_RESTRICT src_buffer,
1169                      int start_x, int start_y,
1170                      void* LIBGAV1_RESTRICT dst_frame) {
1171   constexpr bool lossless = transform1d_type == kTransform1dWht;
1172   constexpr bool is_identity = transform1d_type == kTransform1dIdentity;
1173   // The transform size of the WHT is always 4x4. Setting tx_width and
1174   // tx_height to the constant 4 for the WHT speeds the code up.
1175   assert(!lossless || tx_size == kTransformSize4x4);
1176   const int tx_width = lossless ? 4 : kTransformWidth[tx_size];
1177   const int tx_height = lossless ? 4 : kTransformHeight[tx_size];
1178   const int tx_width_log2 = kTransformWidthLog2[tx_size];
1179   const int tx_height_log2 = kTransformHeightLog2[tx_size];
1180   auto* frame = static_cast<Array2DView<Pixel>*>(dst_frame);
1181 
1182   // Initially this points to the dequantized values. After the transforms are
1183   // applied, this buffer contains the residual.
1184   Array2DView<Residual> residual(tx_height, tx_width,
1185                                  static_cast<Residual*>(src_buffer));
1186 
1187   if (is_row) {
1188     // Row transform.
1189     const uint8_t row_shift = lossless ? 0 : kTransformRowShift[tx_size];
1190     // This is the |range| parameter of the InverseTransform1dFunc.  For lossy
1191     // transforms, this will be equal to the clamping range.
1192     const int8_t row_clamp_range = lossless ? 2 : (bitdepth + 8);
1193     // If the width:height ratio of the transform size is 2:1 or 1:2, multiply
1194     // the input to the row transform by 1 / sqrt(2), which is approximated by
1195     // the fraction 2896 / 2^12.
1196     const bool should_round = std::abs(tx_width_log2 - tx_height_log2) == 1;
1197 
1198     if (adjusted_tx_height == 1) {
1199       dconly_transform1d(residual[0], row_clamp_range, should_round, row_shift,
1200                          true);
1201       return;
1202     }
1203 
1204     // Row transforms need to be done only up to 32 because the rest of the rows
1205     // are always all zero if |tx_height| is 64.  Otherwise, only process the
1206     // rows that have a non zero coefficients.
1207     for (int i = 0; i < adjusted_tx_height; ++i) {
1208       // If lossless, the transform size is 4x4, so should_round is false.
1209       if (!lossless && should_round) {
1210         // The last 32 values of every row are always zero if the |tx_width| is
1211         // 64.
1212         for (int j = 0; j < std::min(tx_width, 32); ++j) {
1213           residual[i][j] = RightShiftWithRounding(
1214               residual[i][j] * kTransformRowMultiplier, 12);
1215         }
1216       }
1217       // For identity transform, |transform1d_func| also performs the
1218       // Round2(T[j], rowShift) call in the spec.
1219       transform1d_func(residual[i], is_identity ? row_shift : row_clamp_range);
1220       if (!lossless && !is_identity && row_shift > 0) {
1221         for (int j = 0; j < tx_width; ++j) {
1222           residual[i][j] = RightShiftWithRounding(residual[i][j], row_shift);
1223         }
1224       }
1225 
1226       ClampIntermediate<bitdepth, Residual>(residual[i], tx_width);
1227     }
1228     return;
1229   }
1230 
1231   assert(!is_row);
1232   constexpr uint8_t column_shift = lossless ? 0 : kTransformColumnShift;
1233   // This is the |range| parameter of the InverseTransform1dFunc.  For lossy
1234   // transforms, this will be equal to the clamping range.
1235   const int8_t column_clamp_range = lossless ? 0 : std::max(bitdepth + 6, 16);
1236   const bool flip_rows = transform1d_type == kTransform1dAdst &&
1237                          kTransformFlipRowsMask.Contains(tx_type);
1238   const bool flip_columns =
1239       !lossless && kTransformFlipColumnsMask.Contains(tx_type);
1240   const int min_value = 0;
1241   const int max_value = (1 << bitdepth) - 1;
1242   // Note: 64 is the maximum size of a 1D transform buffer (the largest
1243   // transform size is kTransformSize64x64).
1244   Residual tx_buffer[64];
1245   for (int j = 0; j < tx_width; ++j) {
1246     const int flipped_j = flip_columns ? tx_width - j - 1 : j;
1247     int i = 0;
1248     do {
1249       tx_buffer[i] = residual[i][flipped_j];
1250     } while (++i != tx_height);
1251     if (adjusted_tx_height == 1) {
1252       dconly_transform1d(tx_buffer, column_clamp_range, false, 0, false);
1253     } else {
1254       // For identity transform, |transform1d_func| also performs the
1255       // Round2(T[i], colShift) call in the spec.
1256       transform1d_func(tx_buffer,
1257                        is_identity ? column_shift : column_clamp_range);
1258     }
1259     const int x = start_x + j;
1260     for (int i = 0; i < tx_height; ++i) {
1261       const int y = start_y + i;
1262       const int index = flip_rows ? tx_height - i - 1 : i;
1263       Residual residual_value = tx_buffer[index];
1264       if (!lossless && !is_identity) {
1265         residual_value = RightShiftWithRounding(residual_value, column_shift);
1266       }
1267       (*frame)[y][x] =
1268           Clip3((*frame)[y][x] + residual_value, min_value, max_value);
1269     }
1270   }
1271 }
1272 
1273 //------------------------------------------------------------------------------
1274 
1275 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1276 template <int bitdepth, typename Residual, typename Pixel>
InitAll(Dsp * const dsp)1277 void InitAll(Dsp* const dsp) {
1278   // Maximum transform size for Dct is 64.
1279   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1280       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1281                       DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1282                       /*is_row=*/true>;
1283   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1284       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1285                       DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1286                       /*is_row=*/false>;
1287   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1288       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1289                       DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1290                       /*is_row=*/true>;
1291   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1292       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1293                       DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1294                       /*is_row=*/false>;
1295   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1296       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1297                       DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1298                       /*is_row=*/true>;
1299   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1300       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1301                       DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1302                       /*is_row=*/false>;
1303   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1304       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1305                       DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1306                       /*is_row=*/true>;
1307   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1308       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1309                       DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1310                       /*is_row=*/false>;
1311   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1312       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1313                       DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1314                       /*is_row=*/true>;
1315   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1316       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1317                       DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1318                       /*is_row=*/false>;
1319 
1320   // Maximum transform size for Adst is 16.
1321   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1322       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1323                       Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1324                       /*is_row=*/true>;
1325   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1326       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1327                       Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1328                       /*is_row=*/false>;
1329   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1330       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1331                       Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1332                       /*is_row=*/true>;
1333   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1334       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1335                       Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1336                       /*is_row=*/false>;
1337   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1338       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1339                       Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1340                       /*is_row=*/true>;
1341   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1342       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1343                       Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1344                       /*is_row=*/false>;
1345 
1346   // Maximum transform size for Identity transform is 32.
1347   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1348       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1349                       Identity4DcOnly_C<bitdepth, Residual>,
1350                       Identity4Row_C<Residual>, /*is_row=*/true>;
1351   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1352       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1353                       Identity4DcOnly_C<bitdepth, Residual>,
1354                       Identity4Column_C<Residual>, /*is_row=*/false>;
1355   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1356       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1357                       Identity8DcOnly_C<bitdepth, Residual>,
1358                       Identity8Row_C<Residual>, /*is_row=*/true>;
1359   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1360       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1361                       Identity8DcOnly_C<bitdepth, Residual>,
1362                       Identity8Column_C<Residual>, /*is_row=*/false>;
1363   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1364       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1365                       Identity16DcOnly_C<bitdepth, Residual>,
1366                       Identity16Row_C<Residual>, /*is_row=*/true>;
1367   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1368       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1369                       Identity16DcOnly_C<bitdepth, Residual>,
1370                       Identity16Column_C<Residual>, /*is_row=*/false>;
1371   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1372       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1373                       Identity32DcOnly_C<bitdepth, Residual>,
1374                       Identity32Row_C<Residual>, /*is_row=*/true>;
1375   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1376       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1377                       Identity32DcOnly_C<bitdepth, Residual>,
1378                       Identity32Column_C<Residual>, /*is_row=*/false>;
1379 
1380   // Maximum transform size for Wht is 4.
1381   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1382       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dWht,
1383                       Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1384                       /*is_row=*/true>;
1385   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1386       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dWht,
1387                       Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1388                       /*is_row=*/false>;
1389 }
1390 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1391 
Init8bpp()1392 void Init8bpp() {
1393   Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
1394   assert(dsp != nullptr);
1395   static_cast<void>(dsp);
1396 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1397   InitAll<8, int16_t, uint8_t>(dsp);
1398 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1399 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dDct
1400   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1401       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1402                       DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1403                       /*is_row=*/true>;
1404   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1405       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1406                       DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1407                       /*is_row=*/false>;
1408 #endif
1409 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dDct
1410   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1411       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1412                       DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1413                       /*is_row=*/true>;
1414   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1415       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1416                       DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1417                       /*is_row=*/false>;
1418 #endif
1419 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dDct
1420   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1421       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1422                       DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1423                       /*is_row=*/true>;
1424   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1425       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1426                       DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1427                       /*is_row=*/false>;
1428 #endif
1429 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dDct
1430   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1431       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1432                       DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1433                       /*is_row=*/true>;
1434   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1435       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1436                       DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1437                       /*is_row=*/false>;
1438 #endif
1439 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize64_Transform1dDct
1440   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1441       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1442                       DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1443                       /*is_row=*/true>;
1444   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1445       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1446                       DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1447                       /*is_row=*/false>;
1448 #endif
1449 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dAdst
1450   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1451       TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1452                       Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1453                       /*is_row=*/true>;
1454   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1455       TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1456                       Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1457                       /*is_row=*/false>;
1458 #endif
1459 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dAdst
1460   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1461       TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1462                       Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1463                       /*is_row=*/true>;
1464   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1465       TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1466                       Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1467                       /*is_row=*/false>;
1468 #endif
1469 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dAdst
1470   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1471       TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1472                       Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1473                       /*is_row=*/true>;
1474   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1475       TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1476                       Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1477                       /*is_row=*/false>;
1478 #endif
1479 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dIdentity
1480   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1481       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1482                       Identity4DcOnly_C<8, int16_t>, Identity4Row_C<int16_t>,
1483                       /*is_row=*/true>;
1484   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1485       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1486                       Identity4DcOnly_C<8, int16_t>, Identity4Column_C<int16_t>,
1487                       /*is_row=*/false>;
1488 #endif
1489 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dIdentity
1490   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1491       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1492                       Identity8DcOnly_C<8, int16_t>, Identity8Row_C<int16_t>,
1493                       /*is_row=*/true>;
1494   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1495       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1496                       Identity8DcOnly_C<8, int16_t>, Identity8Column_C<int16_t>,
1497                       /*is_row=*/false>;
1498 #endif
1499 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dIdentity
1500   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1501       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1502                       Identity16DcOnly_C<8, int16_t>, Identity16Row_C<int16_t>,
1503                       /*is_row=*/true>;
1504   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1505       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1506                       Identity16DcOnly_C<8, int16_t>,
1507                       Identity16Column_C<int16_t>, /*is_row=*/false>;
1508 #endif
1509 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dIdentity
1510   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1511       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1512                       Identity32DcOnly_C<8, int16_t>, Identity32Row_C<int16_t>,
1513                       /*is_row=*/true>;
1514   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1515       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1516                       Identity32DcOnly_C<8, int16_t>,
1517                       Identity32Column_C<int16_t>, /*is_row=*/false>;
1518 #endif
1519 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dWht
1520   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1521       TransformLoop_C<8, int16_t, uint8_t, kTransform1dWht,
1522                       Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1523                       /*is_row=*/true>;
1524   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1525       TransformLoop_C<8, int16_t, uint8_t, kTransform1dWht,
1526                       Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1527                       /*is_row=*/false>;
1528 #endif
1529 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1530 }
1531 
1532 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()1533 void Init10bpp() {
1534   Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
1535   assert(dsp != nullptr);
1536   static_cast<void>(dsp);
1537 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1538   InitAll<10, int32_t, uint16_t>(dsp);
1539 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1540 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dDct
1541   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1542       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1543                       DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1544                       /*is_row=*/true>;
1545   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1546       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1547                       DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1548                       /*is_row=*/false>;
1549 #endif
1550 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dDct
1551   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1552       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1553                       DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1554                       /*is_row=*/true>;
1555   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1556       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1557                       DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1558                       /*is_row=*/false>;
1559 #endif
1560 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dDct
1561   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1562       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1563                       DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1564                       /*is_row=*/true>;
1565   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1566       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1567                       DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1568                       /*is_row=*/false>;
1569 #endif
1570 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dDct
1571   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1572       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1573                       DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1574                       /*is_row=*/true>;
1575   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1576       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1577                       DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1578                       /*is_row=*/false>;
1579 #endif
1580 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize64_Transform1dDct
1581   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1582       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1583                       DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1584                       /*is_row=*/true>;
1585   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1586       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1587                       DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1588                       /*is_row=*/false>;
1589 #endif
1590 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dAdst
1591   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1592       TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1593                       Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1594                       /*is_row=*/true>;
1595   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1596       TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1597                       Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1598                       /*is_row=*/false>;
1599 #endif
1600 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dAdst
1601   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1602       TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1603                       Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1604                       /*is_row=*/true>;
1605   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1606       TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1607                       Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1608                       /*is_row=*/false>;
1609 #endif
1610 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dAdst
1611   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1612       TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1613                       Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1614                       /*is_row=*/true>;
1615   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1616       TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1617                       Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1618                       /*is_row=*/false>;
1619 #endif
1620 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dIdentity
1621   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1622       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1623                       Identity4DcOnly_C<10, int32_t>, Identity4Row_C<int32_t>,
1624                       /*is_row=*/true>;
1625   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1626       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1627                       Identity4DcOnly_C<10, int32_t>,
1628                       Identity4Column_C<int32_t>, /*is_row=*/false>;
1629 #endif
1630 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dIdentity
1631   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1632       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1633                       Identity8DcOnly_C<10, int32_t>, Identity8Row_C<int32_t>,
1634                       /*is_row=*/true>;
1635   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1636       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1637                       Identity8DcOnly_C<10, int32_t>,
1638                       Identity8Column_C<int32_t>, /*is_row=*/false>;
1639 #endif
1640 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dIdentity
1641   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1642       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1643                       Identity16DcOnly_C<10, int32_t>, Identity16Row_C<int32_t>,
1644                       /*is_row=*/true>;
1645   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1646       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1647                       Identity16DcOnly_C<10, int32_t>,
1648                       Identity16Column_C<int32_t>, /*is_row=*/false>;
1649 #endif
1650 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dIdentity
1651   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1652       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1653                       Identity32DcOnly_C<10, int32_t>, Identity32Row_C<int32_t>,
1654                       /*is_row=*/true>;
1655   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1656       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1657                       Identity32DcOnly_C<10, int32_t>,
1658                       Identity32Column_C<int32_t>, /*is_row=*/false>;
1659 #endif
1660 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dWht
1661   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1662       TransformLoop_C<10, int32_t, uint16_t, kTransform1dWht,
1663                       Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1664                       /*is_row=*/true>;
1665   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1666       TransformLoop_C<10, int32_t, uint16_t, kTransform1dWht,
1667                       Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1668                       /*is_row=*/false>;
1669 #endif
1670 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1671 }
1672 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
1673 
1674 #if LIBGAV1_MAX_BITDEPTH == 12
Init12bpp()1675 void Init12bpp() {
1676   Dsp* const dsp = dsp_internal::GetWritableDspTable(12);
1677   assert(dsp != nullptr);
1678   static_cast<void>(dsp);
1679 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1680   InitAll<12, int32_t, uint16_t>(dsp);
1681 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1682 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize4_Transform1dDct
1683   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1684       TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1685                       DctDcOnly_C<12, int32_t, 2>, Dct_C<int32_t, 2>,
1686                       /*is_row=*/true>;
1687   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1688       TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1689                       DctDcOnly_C<12, int32_t, 2>, Dct_C<int32_t, 2>,
1690                       /*is_row=*/false>;
1691 #endif
1692 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize8_Transform1dDct
1693   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1694       TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1695                       DctDcOnly_C<12, int32_t, 3>, Dct_C<int32_t, 3>,
1696                       /*is_row=*/true>;
1697   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1698       TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1699                       DctDcOnly_C<12, int32_t, 3>, Dct_C<int32_t, 3>,
1700                       /*is_row=*/false>;
1701 #endif
1702 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize16_Transform1dDct
1703   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1704       TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1705                       DctDcOnly_C<12, int32_t, 4>, Dct_C<int32_t, 4>,
1706                       /*is_row=*/true>;
1707   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1708       TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1709                       DctDcOnly_C<12, int32_t, 4>, Dct_C<int32_t, 4>,
1710                       /*is_row=*/false>;
1711 #endif
1712 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize32_Transform1dDct
1713   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1714       TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1715                       DctDcOnly_C<12, int32_t, 5>, Dct_C<int32_t, 5>,
1716                       /*is_row=*/true>;
1717   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1718       TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1719                       DctDcOnly_C<12, int32_t, 5>, Dct_C<int32_t, 5>,
1720                       /*is_row=*/false>;
1721 #endif
1722 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize64_Transform1dDct
1723   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1724       TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1725                       DctDcOnly_C<12, int32_t, 6>, Dct_C<int32_t, 6>,
1726                       /*is_row=*/true>;
1727   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1728       TransformLoop_C<12, int32_t, uint16_t, kTransform1dDct,
1729                       DctDcOnly_C<12, int32_t, 6>, Dct_C<int32_t, 6>,
1730                       /*is_row=*/false>;
1731 #endif
1732 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize4_Transform1dAdst
1733   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1734       TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1735                       Adst4DcOnly_C<12, int32_t>, Adst4_C<int32_t>,
1736                       /*is_row=*/true>;
1737   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1738       TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1739                       Adst4DcOnly_C<12, int32_t>, Adst4_C<int32_t>,
1740                       /*is_row=*/false>;
1741 #endif
1742 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize8_Transform1dAdst
1743   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1744       TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1745                       Adst8DcOnly_C<12, int32_t>, Adst8_C<int32_t>,
1746                       /*is_row=*/true>;
1747   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1748       TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1749                       Adst8DcOnly_C<12, int32_t>, Adst8_C<int32_t>,
1750                       /*is_row=*/false>;
1751 #endif
1752 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize16_Transform1dAdst
1753   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1754       TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1755                       Adst16DcOnly_C<12, int32_t>, Adst16_C<int32_t>,
1756                       /*is_row=*/true>;
1757   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1758       TransformLoop_C<12, int32_t, uint16_t, kTransform1dAdst,
1759                       Adst16DcOnly_C<12, int32_t>, Adst16_C<int32_t>,
1760                       /*is_row=*/false>;
1761 #endif
1762 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize4_Transform1dIdentity
1763   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1764       TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1765                       Identity4DcOnly_C<12, int32_t>, Identity4Row_C<int32_t>,
1766                       /*is_row=*/true>;
1767   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1768       TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1769                       Identity4DcOnly_C<12, int32_t>,
1770                       Identity4Column_C<int32_t>, /*is_row=*/false>;
1771 #endif
1772 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize8_Transform1dIdentity
1773   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1774       TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1775                       Identity8DcOnly_C<12, int32_t>, Identity8Row_C<int32_t>,
1776                       /*is_row=*/true>;
1777   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1778       TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1779                       Identity8DcOnly_C<12, int32_t>,
1780                       Identity8Column_C<int32_t>, /*is_row=*/false>;
1781 #endif
1782 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize16_Transform1dIdentity
1783   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1784       TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1785                       Identity16DcOnly_C<12, int32_t>, Identity16Row_C<int32_t>,
1786                       /*is_row=*/true>;
1787   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1788       TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1789                       Identity16DcOnly_C<12, int32_t>,
1790                       Identity16Column_C<int32_t>, /*is_row=*/false>;
1791 #endif
1792 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize32_Transform1dIdentity
1793   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1794       TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1795                       Identity32DcOnly_C<12, int32_t>, Identity32Row_C<int32_t>,
1796                       /*is_row=*/true>;
1797   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1798       TransformLoop_C<12, int32_t, uint16_t, kTransform1dIdentity,
1799                       Identity32DcOnly_C<12, int32_t>,
1800                       Identity32Column_C<int32_t>, /*is_row=*/false>;
1801 #endif
1802 #ifndef LIBGAV1_Dsp12bpp_Transform1dSize4_Transform1dWht
1803   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1804       TransformLoop_C<12, int32_t, uint16_t, kTransform1dWht,
1805                       Wht4DcOnly_C<12, int32_t>, Wht4_C<int32_t>,
1806                       /*is_row=*/true>;
1807   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1808       TransformLoop_C<12, int32_t, uint16_t, kTransform1dWht,
1809                       Wht4DcOnly_C<12, int32_t>, Wht4_C<int32_t>,
1810                       /*is_row=*/false>;
1811 #endif
1812 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1813 }
1814 #endif  // LIBGAV1_MAX_BITDEPTH == 12
1815 
1816 }  // namespace
1817 
InverseTransformInit_C()1818 void InverseTransformInit_C() {
1819   Init8bpp();
1820 #if LIBGAV1_MAX_BITDEPTH >= 10
1821   Init10bpp();
1822 #endif
1823 #if LIBGAV1_MAX_BITDEPTH == 12
1824   Init12bpp();
1825 #endif
1826 
1827   // Local functions that may be unused depending on the optimizations
1828   // available.
1829   static_cast<void>(kBitReverseLookup);
1830 }
1831 
1832 }  // namespace dsp
1833 }  // namespace libgav1
1834