xref: /aosp_15_r20/external/libvpx/test/dct32x32_test.cc (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1 /*
2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <tuple>
15 
16 #include "gtest/gtest.h"
17 
18 #include "./vp9_rtcd.h"
19 #include "./vpx_config.h"
20 #include "./vpx_dsp_rtcd.h"
21 #include "test/acm_random.h"
22 #include "test/bench.h"
23 #include "test/clear_system_state.h"
24 #include "test/register_state_check.h"
25 #include "test/util.h"
26 #include "vp9/common/vp9_entropy.h"
27 #include "vp9/common/vp9_scan.h"
28 #include "vpx/vpx_codec.h"
29 #include "vpx/vpx_integer.h"
30 #include "vpx_ports/mem.h"
31 #include "vpx_ports/vpx_timer.h"
32 
33 using libvpx_test::ACMRandom;
34 
35 namespace {
36 
37 const int kNumCoeffs = 1024;
38 const double kPi = 3.141592653589793238462643383279502884;
reference_32x32_dct_1d(const double in[32],double out[32])39 void reference_32x32_dct_1d(const double in[32], double out[32]) {
40   const double kInvSqrt2 = 0.707106781186547524400844362104;
41   for (int k = 0; k < 32; k++) {
42     out[k] = 0.0;
43     for (int n = 0; n < 32; n++) {
44       out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
45     }
46     if (k == 0) out[k] = out[k] * kInvSqrt2;
47   }
48 }
49 
reference_32x32_dct_2d(const int16_t input[kNumCoeffs],double output[kNumCoeffs])50 void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
51                             double output[kNumCoeffs]) {
52   // First transform columns
53   for (int i = 0; i < 32; ++i) {
54     double temp_in[32], temp_out[32];
55     for (int j = 0; j < 32; ++j) temp_in[j] = input[j * 32 + i];
56     reference_32x32_dct_1d(temp_in, temp_out);
57     for (int j = 0; j < 32; ++j) output[j * 32 + i] = temp_out[j];
58   }
59   // Then transform rows
60   for (int i = 0; i < 32; ++i) {
61     double temp_in[32], temp_out[32];
62     for (int j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32];
63     reference_32x32_dct_1d(temp_in, temp_out);
64     // Scale by some magic number
65     for (int j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j] / 4;
66   }
67 }
68 
69 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
70 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
71 
72 typedef std::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
73     Trans32x32Param;
74 
75 typedef std::tuple<InvTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t, int, int>
76     InvTrans32x32Param;
77 
78 #if CONFIG_VP9_HIGHBITDEPTH
idct32x32_10(const tran_low_t * in,uint8_t * out,int stride)79 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
80   vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
81 }
82 
idct32x32_12(const tran_low_t * in,uint8_t * out,int stride)83 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
84   vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
85 }
86 #endif  // CONFIG_VP9_HIGHBITDEPTH
87 
88 class Trans32x32Test : public AbstractBench,
89                        public ::testing::TestWithParam<Trans32x32Param> {
90  public:
91   ~Trans32x32Test() override = default;
SetUp()92   void SetUp() override {
93     fwd_txfm_ = GET_PARAM(0);
94     inv_txfm_ = GET_PARAM(1);
95     version_ = GET_PARAM(2);  // 0: high precision forward transform
96                               // 1: low precision version for rd loop
97     bit_depth_ = GET_PARAM(3);
98     mask_ = (1 << bit_depth_) - 1;
99   }
100 
TearDown()101   void TearDown() override { libvpx_test::ClearSystemState(); }
102 
103  protected:
104   int version_;
105   vpx_bit_depth_t bit_depth_;
106   int mask_;
107   FwdTxfmFunc fwd_txfm_;
108   InvTxfmFunc inv_txfm_;
109 
110   int16_t *bench_in_;
111   tran_low_t *bench_out_;
112   void Run() override;
113 };
114 
Run()115 void Trans32x32Test::Run() { fwd_txfm_(bench_in_, bench_out_, 32); }
116 
TEST_P(Trans32x32Test,AccuracyCheck)117 TEST_P(Trans32x32Test, AccuracyCheck) {
118   ACMRandom rnd(ACMRandom::DeterministicSeed());
119   uint32_t max_error = 0;
120   int64_t total_error = 0;
121   const int count_test_block = 10000;
122   DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
123   DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
124   DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
125   DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
126 #if CONFIG_VP9_HIGHBITDEPTH
127   DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
128   DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
129 #endif
130 
131   for (int i = 0; i < count_test_block; ++i) {
132     // Initialize a test block with input range [-mask_, mask_].
133     for (int j = 0; j < kNumCoeffs; ++j) {
134       if (bit_depth_ == VPX_BITS_8) {
135         src[j] = rnd.Rand8();
136         dst[j] = rnd.Rand8();
137         test_input_block[j] = src[j] - dst[j];
138 #if CONFIG_VP9_HIGHBITDEPTH
139       } else {
140         src16[j] = rnd.Rand16() & mask_;
141         dst16[j] = rnd.Rand16() & mask_;
142         test_input_block[j] = src16[j] - dst16[j];
143 #endif
144       }
145     }
146 
147     ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
148     if (bit_depth_ == VPX_BITS_8) {
149       ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
150 #if CONFIG_VP9_HIGHBITDEPTH
151     } else {
152       ASM_REGISTER_STATE_CHECK(
153           inv_txfm_(test_temp_block, CAST_TO_BYTEPTR(dst16), 32));
154 #endif
155     }
156 
157     for (int j = 0; j < kNumCoeffs; ++j) {
158 #if CONFIG_VP9_HIGHBITDEPTH
159       const int32_t diff =
160           bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
161 #else
162       const int32_t diff = dst[j] - src[j];
163 #endif
164       const uint32_t error = diff * diff;
165       if (max_error < error) max_error = error;
166       total_error += error;
167     }
168   }
169 
170   if (version_ == 1) {
171     max_error /= 2;
172     total_error /= 45;
173   }
174 
175   EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
176       << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
177 
178   EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
179       << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
180 }
181 
TEST_P(Trans32x32Test,CoeffCheck)182 TEST_P(Trans32x32Test, CoeffCheck) {
183   ACMRandom rnd(ACMRandom::DeterministicSeed());
184   const int count_test_block = 1000;
185 
186   DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
187   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
188   DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
189 
190   for (int i = 0; i < count_test_block; ++i) {
191     for (int j = 0; j < kNumCoeffs; ++j) {
192       input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
193     }
194 
195     const int stride = 32;
196     vpx_fdct32x32_c(input_block, output_ref_block, stride);
197     ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
198 
199     if (version_ == 0) {
200       for (int j = 0; j < kNumCoeffs; ++j)
201         EXPECT_EQ(output_block[j], output_ref_block[j])
202             << "Error: 32x32 FDCT versions have mismatched coefficients";
203     } else {
204       for (int j = 0; j < kNumCoeffs; ++j)
205         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
206             << "Error: 32x32 FDCT rd has mismatched coefficients";
207     }
208   }
209 }
210 
TEST_P(Trans32x32Test,MemCheck)211 TEST_P(Trans32x32Test, MemCheck) {
212   ACMRandom rnd(ACMRandom::DeterministicSeed());
213   const int count_test_block = 2000;
214 
215   DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
216   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
217   DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
218 
219   for (int i = 0; i < count_test_block; ++i) {
220     // Initialize a test block with input range [-mask_, mask_].
221     for (int j = 0; j < kNumCoeffs; ++j) {
222       input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
223     }
224     if (i == 0) {
225       for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
226     } else if (i == 1) {
227       for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
228     }
229 
230     const int stride = 32;
231     vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
232     ASM_REGISTER_STATE_CHECK(
233         fwd_txfm_(input_extreme_block, output_block, stride));
234 
235     // The minimum quant value is 4.
236     for (int j = 0; j < kNumCoeffs; ++j) {
237       if (version_ == 0) {
238         EXPECT_EQ(output_block[j], output_ref_block[j])
239             << "Error: 32x32 FDCT versions have mismatched coefficients";
240       } else {
241         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
242             << "Error: 32x32 FDCT rd has mismatched coefficients";
243       }
244       EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
245           << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
246       EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
247           << "Error: 32x32 FDCT has coefficient larger than "
248           << "4*DCT_MAX_VALUE";
249     }
250   }
251 }
252 
TEST_P(Trans32x32Test,DISABLED_Speed)253 TEST_P(Trans32x32Test, DISABLED_Speed) {
254   ACMRandom rnd(ACMRandom::DeterministicSeed());
255 
256   DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
257   DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
258 
259   bench_in_ = input_extreme_block;
260   bench_out_ = output_block;
261 
262   RunNTimes(INT16_MAX);
263   PrintMedian("32x32");
264 }
265 
TEST_P(Trans32x32Test,InverseAccuracy)266 TEST_P(Trans32x32Test, InverseAccuracy) {
267   ACMRandom rnd(ACMRandom::DeterministicSeed());
268   const int count_test_block = 1000;
269   DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
270   DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
271   DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
272   DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
273 #if CONFIG_VP9_HIGHBITDEPTH
274   DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
275   DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
276 #endif
277 
278   for (int i = 0; i < count_test_block; ++i) {
279     double out_r[kNumCoeffs];
280 
281     // Initialize a test block with input range [-255, 255]
282     for (int j = 0; j < kNumCoeffs; ++j) {
283       if (bit_depth_ == VPX_BITS_8) {
284         src[j] = rnd.Rand8();
285         dst[j] = rnd.Rand8();
286         in[j] = src[j] - dst[j];
287 #if CONFIG_VP9_HIGHBITDEPTH
288       } else {
289         src16[j] = rnd.Rand16() & mask_;
290         dst16[j] = rnd.Rand16() & mask_;
291         in[j] = src16[j] - dst16[j];
292 #endif
293       }
294     }
295 
296     reference_32x32_dct_2d(in, out_r);
297     for (int j = 0; j < kNumCoeffs; ++j) {
298       coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
299     }
300     if (bit_depth_ == VPX_BITS_8) {
301       ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
302 #if CONFIG_VP9_HIGHBITDEPTH
303     } else {
304       ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CAST_TO_BYTEPTR(dst16), 32));
305 #endif
306     }
307     for (int j = 0; j < kNumCoeffs; ++j) {
308 #if CONFIG_VP9_HIGHBITDEPTH
309       const int diff =
310           bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
311 #else
312       const int diff = dst[j] - src[j];
313 #endif
314       const int error = diff * diff;
315       EXPECT_GE(1, error) << "Error: 32x32 IDCT has error " << error
316                           << " at index " << j;
317     }
318   }
319 }
320 
321 class InvTrans32x32Test : public ::testing::TestWithParam<InvTrans32x32Param> {
322  public:
323   ~InvTrans32x32Test() override = default;
SetUp()324   void SetUp() override {
325     ref_txfm_ = GET_PARAM(0);
326     inv_txfm_ = GET_PARAM(1);
327     version_ = GET_PARAM(2);  // 0: high precision forward transform
328                               // 1: low precision version for rd loop
329     bit_depth_ = GET_PARAM(3);
330     eob_ = GET_PARAM(4);
331     thresh_ = GET_PARAM(4);
332     mask_ = (1 << bit_depth_) - 1;
333     pitch_ = 32;
334   }
335 
TearDown()336   void TearDown() override { libvpx_test::ClearSystemState(); }
337 
338  protected:
RunRefTxfm(tran_low_t * out,uint8_t * dst,int stride)339   void RunRefTxfm(tran_low_t *out, uint8_t *dst, int stride) {
340     ref_txfm_(out, dst, stride);
341   }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)342   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
343     inv_txfm_(out, dst, stride);
344   }
345   int version_;
346   vpx_bit_depth_t bit_depth_;
347   int mask_;
348   int eob_;
349   int thresh_;
350 
351   InvTxfmFunc ref_txfm_;
352   InvTxfmFunc inv_txfm_;
353   int pitch_;
354 
RunInvTrans32x32SpeedTest()355   void RunInvTrans32x32SpeedTest() {
356     ACMRandom rnd(ACMRandom::DeterministicSeed());
357     const int count_test_block = 10000;
358     int64_t c_sum_time = 0;
359     int64_t simd_sum_time = 0;
360     const int16_t *scan = vp9_default_scan_orders[TX_32X32].scan;
361     DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
362     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
363     DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
364 #if CONFIG_VP9_HIGHBITDEPTH
365     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
366     DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
367 #endif  // CONFIG_VP9_HIGHBITDEPTH
368 
369     for (int j = 0; j < kNumCoeffs; ++j) {
370       if (j < eob_) {
371         // Random values less than the threshold, either positive or negative
372         coeff[scan[j]] = rnd(thresh_);
373       } else {
374         coeff[scan[j]] = 0;
375       }
376       if (bit_depth_ == VPX_BITS_8) {
377         dst[j] = 0;
378         ref[j] = 0;
379 #if CONFIG_VP9_HIGHBITDEPTH
380       } else {
381         dst16[j] = 0;
382         ref16[j] = 0;
383 #endif  // CONFIG_VP9_HIGHBITDEPTH
384       }
385     }
386 
387     if (bit_depth_ == VPX_BITS_8) {
388       vpx_usec_timer timer_c;
389       vpx_usec_timer_start(&timer_c);
390       for (int i = 0; i < count_test_block; ++i) {
391         RunRefTxfm(coeff, ref, pitch_);
392       }
393       vpx_usec_timer_mark(&timer_c);
394       c_sum_time += vpx_usec_timer_elapsed(&timer_c);
395 
396       vpx_usec_timer timer_mod;
397       vpx_usec_timer_start(&timer_mod);
398       for (int i = 0; i < count_test_block; ++i) {
399         RunInvTxfm(coeff, dst, pitch_);
400       }
401       vpx_usec_timer_mark(&timer_mod);
402       simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
403     } else {
404 #if CONFIG_VP9_HIGHBITDEPTH
405       vpx_usec_timer timer_c;
406       vpx_usec_timer_start(&timer_c);
407       for (int i = 0; i < count_test_block; ++i) {
408         RunRefTxfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
409       }
410       vpx_usec_timer_mark(&timer_c);
411       c_sum_time += vpx_usec_timer_elapsed(&timer_c);
412 
413       vpx_usec_timer timer_mod;
414       vpx_usec_timer_start(&timer_mod);
415       for (int i = 0; i < count_test_block; ++i) {
416         RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_);
417       }
418       vpx_usec_timer_mark(&timer_mod);
419       simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
420 #endif  // CONFIG_VP9_HIGHBITDEPTH
421     }
422     printf(
423         "c_time = %" PRId64 " \t simd_time = %" PRId64 " \t Gain = %4.2f \n",
424         c_sum_time, simd_sum_time,
425         (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
426   }
427 
CompareInvReference32x32()428   void CompareInvReference32x32() {
429     ACMRandom rnd(ACMRandom::DeterministicSeed());
430     const int count_test_block = 10000;
431     const int eob = 31;
432     const int16_t *scan = vp9_default_scan_orders[TX_32X32].scan;
433     DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
434     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
435     DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
436 #if CONFIG_VP9_HIGHBITDEPTH
437     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
438     DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
439 #endif  // CONFIG_VP9_HIGHBITDEPTH
440 
441     for (int i = 0; i < count_test_block; ++i) {
442       for (int j = 0; j < kNumCoeffs; ++j) {
443         if (j < eob) {
444           coeff[scan[j]] = rnd.Rand8Extremes();
445         } else {
446           coeff[scan[j]] = 0;
447         }
448         if (bit_depth_ == VPX_BITS_8) {
449           dst[j] = 0;
450           ref[j] = 0;
451 #if CONFIG_VP9_HIGHBITDEPTH
452         } else {
453           dst16[j] = 0;
454           ref16[j] = 0;
455 #endif  // CONFIG_VP9_HIGHBITDEPTH
456         }
457       }
458       if (bit_depth_ == VPX_BITS_8) {
459         RunRefTxfm(coeff, ref, pitch_);
460         RunInvTxfm(coeff, dst, pitch_);
461       } else {
462 #if CONFIG_VP9_HIGHBITDEPTH
463         RunRefTxfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
464         ASM_REGISTER_STATE_CHECK(
465             RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
466 #endif  // CONFIG_VP9_HIGHBITDEPTH
467       }
468 
469       for (int j = 0; j < kNumCoeffs; ++j) {
470 #if CONFIG_VP9_HIGHBITDEPTH
471         const uint32_t diff =
472             bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
473 #else
474         const uint32_t diff = dst[j] - ref[j];
475 #endif  // CONFIG_VP9_HIGHBITDEPTH
476         const uint32_t error = diff * diff;
477         EXPECT_EQ(0u, error) << "Error: 32x32 IDCT Comparison has error "
478                              << error << " at index " << j;
479       }
480     }
481   }
482 };
483 
484 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InvTrans32x32Test);
485 
TEST_P(InvTrans32x32Test,DISABLED_Speed)486 TEST_P(InvTrans32x32Test, DISABLED_Speed) { RunInvTrans32x32SpeedTest(); }
TEST_P(InvTrans32x32Test,CompareReference)487 TEST_P(InvTrans32x32Test, CompareReference) { CompareInvReference32x32(); }
488 
489 using std::make_tuple;
490 
491 #if CONFIG_VP9_HIGHBITDEPTH
492 INSTANTIATE_TEST_SUITE_P(
493     C, Trans32x32Test,
494     ::testing::Values(
495         make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_10, 0, VPX_BITS_10),
496         make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_10, 1, VPX_BITS_10),
497         make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_12, 0, VPX_BITS_12),
498         make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_12, 1, VPX_BITS_12),
499         make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
500         make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1,
501                    VPX_BITS_8)));
502 #else
503 INSTANTIATE_TEST_SUITE_P(
504     C, Trans32x32Test,
505     ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0,
506                                  VPX_BITS_8),
507                       make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c,
508                                  1, VPX_BITS_8)));
509 
510 INSTANTIATE_TEST_SUITE_P(
511     C, InvTrans32x32Test,
512     ::testing::Values(
513         (make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_c, 0,
514                     VPX_BITS_8, 32, 6225)),
515         make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_c, 0,
516                    VPX_BITS_8, 16, 6255)));
517 #endif  // CONFIG_VP9_HIGHBITDEPTH
518 
519 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
520 INSTANTIATE_TEST_SUITE_P(
521     NEON, Trans32x32Test,
522     ::testing::Values(make_tuple(&vpx_fdct32x32_neon,
523                                  &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
524                       make_tuple(&vpx_fdct32x32_rd_neon,
525                                  &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
526 #endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
527 
528 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
529 INSTANTIATE_TEST_SUITE_P(
530     SSE2, Trans32x32Test,
531     ::testing::Values(make_tuple(&vpx_fdct32x32_sse2,
532                                  &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
533                       make_tuple(&vpx_fdct32x32_rd_sse2,
534                                  &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
535 
536 INSTANTIATE_TEST_SUITE_P(
537     SSE2, InvTrans32x32Test,
538     ::testing::Values(
539         (make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_sse2, 0,
540                     VPX_BITS_8, 32, 6225)),
541         make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_sse2, 0,
542                    VPX_BITS_8, 16, 6225)));
543 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
544 
545 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
546 INSTANTIATE_TEST_SUITE_P(
547     SSE2, Trans32x32Test,
548     ::testing::Values(
549         make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
550         make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
551                    VPX_BITS_10),
552         make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
553         make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
554                    VPX_BITS_12),
555         make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
556                    VPX_BITS_8),
557         make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
558                    VPX_BITS_8)));
559 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
560 
561 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
562 INSTANTIATE_TEST_SUITE_P(
563     AVX2, Trans32x32Test,
564     ::testing::Values(make_tuple(&vpx_fdct32x32_avx2,
565                                  &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
566                       make_tuple(&vpx_fdct32x32_rd_avx2,
567                                  &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
568 
569 INSTANTIATE_TEST_SUITE_P(
570     AVX2, InvTrans32x32Test,
571     ::testing::Values(
572         (make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_avx2, 0,
573                     VPX_BITS_8, 32, 6225)),
574         make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_avx2, 0,
575                    VPX_BITS_8, 16, 6225)));
576 #endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
577 
578 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
579 INSTANTIATE_TEST_SUITE_P(
580     MSA, Trans32x32Test,
581     ::testing::Values(make_tuple(&vpx_fdct32x32_msa,
582                                  &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
583                       make_tuple(&vpx_fdct32x32_rd_msa,
584                                  &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
585 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
586 
587 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
588 INSTANTIATE_TEST_SUITE_P(
589     VSX, Trans32x32Test,
590     ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_vsx,
591                                  0, VPX_BITS_8),
592                       make_tuple(&vpx_fdct32x32_rd_vsx,
593                                  &vpx_idct32x32_1024_add_vsx, 1, VPX_BITS_8)));
594 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
595 
596 #if HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
597 INSTANTIATE_TEST_SUITE_P(
598     LSX, Trans32x32Test,
599     ::testing::Values(make_tuple(&vpx_fdct32x32_lsx,
600                                  &vpx_idct32x32_1024_add_lsx, 0, VPX_BITS_8),
601                       make_tuple(&vpx_fdct32x32_rd_lsx,
602                                  &vpx_idct32x32_1024_add_lsx, 1, VPX_BITS_8)));
603 #endif  // HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
604 }  // namespace
605