xref: /aosp_15_r20/external/libaom/test/av1_highbd_iht_test.cc (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <tuple>
13 
14 #include "gtest/gtest.h"
15 
16 #include "config/av1_rtcd.h"
17 
18 #include "test/acm_random.h"
19 #include "test/av1_txfm_test.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
22 #include "av1/common/enums.h"
23 #include "av1/common/scan.h"
24 #include "aom_dsp/aom_dsp_common.h"
25 #include "aom_ports/mem.h"
26 
27 namespace {
28 
29 using libaom_test::ACMRandom;
30 using std::tuple;
31 
32 typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
33                           TX_TYPE tx_type, int bd);
34 
35 typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride,
36                            TX_TYPE tx_type, int bd);
37 static const char *tx_type_name[] = {
38   "DCT_DCT",
39   "ADST_DCT",
40   "DCT_ADST",
41   "ADST_ADST",
42   "FLIPADST_DCT",
43   "DCT_FLIPADST",
44   "FLIPADST_FLIPADST",
45   "ADST_FLIPADST",
46   "FLIPADST_ADST",
47   "IDTX",
48   "V_DCT",
49   "H_DCT",
50   "V_ADST",
51   "H_ADST",
52   "V_FLIPADST",
53   "H_FLIPADST",
54 };
55 // Test parameter argument list:
56 //   <transform reference function,
57 //    optimized inverse transform function,
58 //    inverse transform reference function,
59 //    num_coeffs,
60 //    tx_type,
61 //    bit_depth>
62 typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, TX_TYPE, int> IHbdHtParam;
63 
64 class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
65  public:
66   ~AV1HighbdInvHTNxN() override = default;
67 
SetUp()68   void SetUp() override {
69     txfm_ref_ = GET_PARAM(0);
70     inv_txfm_ = GET_PARAM(1);
71     inv_txfm_ref_ = GET_PARAM(2);
72     num_coeffs_ = GET_PARAM(3);
73     tx_type_ = GET_PARAM(4);
74     bit_depth_ = GET_PARAM(5);
75 
76     input_ = reinterpret_cast<int16_t *>(
77         aom_memalign(16, sizeof(input_[0]) * num_coeffs_));
78     ASSERT_NE(input_, nullptr);
79 
80     // Note:
81     // Inverse transform input buffer is 32-byte aligned
82     // Refer to <root>/av1/encoder/context_tree.c, function,
83     // void alloc_mode_context().
84     coeffs_ = reinterpret_cast<int32_t *>(
85         aom_memalign(32, sizeof(coeffs_[0]) * num_coeffs_));
86     ASSERT_NE(coeffs_, nullptr);
87     output_ = reinterpret_cast<uint16_t *>(
88         aom_memalign(32, sizeof(output_[0]) * num_coeffs_));
89     ASSERT_NE(output_, nullptr);
90     output_ref_ = reinterpret_cast<uint16_t *>(
91         aom_memalign(32, sizeof(output_ref_[0]) * num_coeffs_));
92     ASSERT_NE(output_ref_, nullptr);
93   }
94 
TearDown()95   void TearDown() override {
96     aom_free(input_);
97     aom_free(coeffs_);
98     aom_free(output_);
99     aom_free(output_ref_);
100   }
101 
102  protected:
103   void RunBitexactCheck();
104 
105  private:
GetStride() const106   int GetStride() const {
107     if (16 == num_coeffs_) {
108       return 4;
109     } else if (64 == num_coeffs_) {
110       return 8;
111     } else if (256 == num_coeffs_) {
112       return 16;
113     } else if (1024 == num_coeffs_) {
114       return 32;
115     } else if (4096 == num_coeffs_) {
116       return 64;
117     } else {
118       return 0;
119     }
120   }
121 
122   HbdHtFunc txfm_ref_;
123   IHbdHtFunc inv_txfm_;
124   IHbdHtFunc inv_txfm_ref_;
125   int num_coeffs_;
126   TX_TYPE tx_type_;
127   int bit_depth_;
128 
129   int16_t *input_;
130   int32_t *coeffs_;
131   uint16_t *output_;
132   uint16_t *output_ref_;
133 };
134 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdInvHTNxN);
135 
RunBitexactCheck()136 void AV1HighbdInvHTNxN::RunBitexactCheck() {
137   ACMRandom rnd(ACMRandom::DeterministicSeed());
138   const int stride = GetStride();
139   const int num_tests = 20000;
140   const uint16_t mask = (1 << bit_depth_) - 1;
141 
142   for (int i = 0; i < num_tests; ++i) {
143     for (int j = 0; j < num_coeffs_; ++j) {
144       input_[j] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
145       output_ref_[j] = rnd.Rand16() & mask;
146       output_[j] = output_ref_[j];
147     }
148 
149     txfm_ref_(input_, coeffs_, stride, tx_type_, bit_depth_);
150     inv_txfm_ref_(coeffs_, output_ref_, stride, tx_type_, bit_depth_);
151     API_REGISTER_STATE_CHECK(
152         inv_txfm_(coeffs_, output_, stride, tx_type_, bit_depth_));
153 
154     for (int j = 0; j < num_coeffs_; ++j) {
155       EXPECT_EQ(output_ref_[j], output_[j])
156           << "Not bit-exact result at index: " << j << " At test block: " << i;
157     }
158   }
159 }
160 
TEST_P(AV1HighbdInvHTNxN,InvTransResultCheck)161 TEST_P(AV1HighbdInvHTNxN, InvTransResultCheck) { RunBitexactCheck(); }
162 
163 using std::make_tuple;
164 
165 #if HAVE_SSE4_1
166 #define PARAM_LIST_4X4                                   \
167   &av1_fwd_txfm2d_4x4_c, &av1_inv_txfm2d_add_4x4_sse4_1, \
168       &av1_inv_txfm2d_add_4x4_c, 16
169 
170 const IHbdHtParam kArrayIhtParam[] = {
171   // 4x4
172   make_tuple(PARAM_LIST_4X4, DCT_DCT, 10),
173   make_tuple(PARAM_LIST_4X4, DCT_DCT, 12),
174   make_tuple(PARAM_LIST_4X4, ADST_DCT, 10),
175   make_tuple(PARAM_LIST_4X4, ADST_DCT, 12),
176   make_tuple(PARAM_LIST_4X4, DCT_ADST, 10),
177   make_tuple(PARAM_LIST_4X4, DCT_ADST, 12),
178   make_tuple(PARAM_LIST_4X4, ADST_ADST, 10),
179   make_tuple(PARAM_LIST_4X4, ADST_ADST, 12),
180   make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 10),
181   make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 12),
182   make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 10),
183   make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 12),
184   make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 10),
185   make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 12),
186   make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 10),
187   make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 12),
188   make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 10),
189   make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 12),
190 };
191 
192 INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdInvHTNxN,
193                          ::testing::ValuesIn(kArrayIhtParam));
194 #endif  // HAVE_SSE4_1
195 
196 typedef void (*HighbdInvTxfm2dFunc)(const int32_t *input, uint8_t *output,
197                                     int stride, const TxfmParam *txfm_param);
198 
199 typedef std::tuple<const HighbdInvTxfm2dFunc> AV1HighbdInvTxfm2dParam;
200 class AV1HighbdInvTxfm2d
201     : public ::testing::TestWithParam<AV1HighbdInvTxfm2dParam> {
202  public:
SetUp()203   void SetUp() override { target_func_ = GET_PARAM(0); }
204   void RunAV1InvTxfm2dTest(TX_TYPE tx_type, TX_SIZE tx_size, int run_times,
205                            int bit_depth, int gt_int16 = 0);
206 
207  private:
208   HighbdInvTxfm2dFunc target_func_;
209 };
210 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdInvTxfm2d);
211 
RunAV1InvTxfm2dTest(TX_TYPE tx_type_,TX_SIZE tx_size_,int run_times,int bit_depth_,int gt_int16)212 void AV1HighbdInvTxfm2d::RunAV1InvTxfm2dTest(TX_TYPE tx_type_, TX_SIZE tx_size_,
213                                              int run_times, int bit_depth_,
214                                              int gt_int16) {
215 #if CONFIG_REALTIME_ONLY
216   if (tx_size_ >= TX_4X16) {
217     return;
218   }
219 #endif
220   FwdTxfm2dFunc fwd_func_ = libaom_test::fwd_txfm_func_ls[tx_size_];
221   TxfmParam txfm_param;
222   const int BLK_WIDTH = 64;
223   const int BLK_SIZE = BLK_WIDTH * BLK_WIDTH;
224   DECLARE_ALIGNED(16, int16_t, input[BLK_SIZE]) = { 0 };
225   DECLARE_ALIGNED(32, int32_t, inv_input[BLK_SIZE]) = { 0 };
226   DECLARE_ALIGNED(32, uint16_t, output[BLK_SIZE]) = { 0 };
227   DECLARE_ALIGNED(32, uint16_t, ref_output[BLK_SIZE]) = { 0 };
228   int stride = BLK_WIDTH;
229   int rows = tx_size_high[tx_size_];
230   int cols = tx_size_wide[tx_size_];
231   const int rows_nonezero = AOMMIN(32, rows);
232   const int cols_nonezero = AOMMIN(32, cols);
233   const uint16_t mask = (1 << bit_depth_) - 1;
234   run_times /= (rows * cols);
235   run_times = AOMMAX(1, run_times);
236   const SCAN_ORDER *scan_order = get_default_scan(tx_size_, tx_type_);
237   const int16_t *scan = scan_order->scan;
238   const int16_t eobmax = rows_nonezero * cols_nonezero;
239   ACMRandom rnd(ACMRandom::DeterministicSeed());
240   int randTimes = run_times == 1 ? (eobmax) : 1;
241 
242   txfm_param.tx_type = tx_type_;
243   txfm_param.tx_size = tx_size_;
244   txfm_param.lossless = 0;
245   txfm_param.bd = bit_depth_;
246   txfm_param.is_hbd = 1;
247   txfm_param.tx_set_type = EXT_TX_SET_ALL16;
248 
249   for (int cnt = 0; cnt < randTimes; ++cnt) {
250     for (int r = 0; r < BLK_WIDTH; ++r) {
251       for (int c = 0; c < BLK_WIDTH; ++c) {
252         input[r * cols + c] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
253         output[r * stride + c] = rnd.Rand16() & mask;
254 
255         ref_output[r * stride + c] = output[r * stride + c];
256       }
257     }
258     fwd_func_(input, inv_input, stride, tx_type_, bit_depth_);
259 
260     // produce eob input by setting high freq coeffs to zero
261     const int eob = AOMMIN(cnt + 1, eobmax);
262     for (int i = eob; i < eobmax; i++) {
263       inv_input[scan[i]] = 0;
264     }
265     txfm_param.eob = eob;
266     if (gt_int16) {
267       const uint16_t inv_input_mask =
268           static_cast<uint16_t>((1 << (bit_depth_ + 7)) - 1);
269       for (int i = 0; i < eob; i++) {
270         inv_input[scan[i]] = (rnd.Rand31() & inv_input_mask);
271       }
272     }
273 
274     aom_usec_timer ref_timer, test_timer;
275     aom_usec_timer_start(&ref_timer);
276     for (int i = 0; i < run_times; ++i) {
277       av1_highbd_inv_txfm_add_c(inv_input, CONVERT_TO_BYTEPTR(ref_output),
278                                 stride, &txfm_param);
279     }
280     aom_usec_timer_mark(&ref_timer);
281     const int elapsed_time_c =
282         static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
283 
284     aom_usec_timer_start(&test_timer);
285     for (int i = 0; i < run_times; ++i) {
286       target_func_(inv_input, CONVERT_TO_BYTEPTR(output), stride, &txfm_param);
287     }
288     aom_usec_timer_mark(&test_timer);
289     const int elapsed_time_simd =
290         static_cast<int>(aom_usec_timer_elapsed(&test_timer));
291     if (run_times > 10) {
292       printf(
293           "txfm_size[%d] \t txfm_type[%d] \t c_time=%d \t simd_time=%d \t "
294           "gain=%d \n",
295           tx_size_, tx_type_, elapsed_time_c, elapsed_time_simd,
296           (elapsed_time_c / elapsed_time_simd));
297     } else {
298       for (int r = 0; r < rows; ++r) {
299         for (int c = 0; c < cols; ++c) {
300           ASSERT_EQ(ref_output[r * stride + c], output[r * stride + c])
301               << "[" << r << "," << c << "] " << cnt << " tx_size: " << cols
302               << "x" << rows << " bit_depth_: " << bit_depth_
303               << " tx_type: " << tx_type_name[tx_type_] << " eob " << eob;
304         }
305       }
306     }
307   }
308 }
309 
TEST_P(AV1HighbdInvTxfm2d,match)310 TEST_P(AV1HighbdInvTxfm2d, match) {
311   int bitdepth_ar[3] = { 8, 10, 12 };
312   for (int k = 0; k < 3; ++k) {
313     int bd = bitdepth_ar[k];
314     for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
315       for (int i = 0; i < (int)TX_TYPES; ++i) {
316         if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
317                                            static_cast<TX_TYPE>(i))) {
318           RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
319                               1, bd);
320         }
321       }
322     }
323   }
324 }
325 
TEST_P(AV1HighbdInvTxfm2d,gt_int16)326 TEST_P(AV1HighbdInvTxfm2d, gt_int16) {
327   int bitdepth_ar[3] = { 8, 10, 12 };
328   static const TX_TYPE types[] = {
329     DCT_DCT, ADST_DCT, FLIPADST_DCT, IDTX, V_DCT, H_DCT, H_ADST, H_FLIPADST
330   };
331   for (int k = 0; k < 3; ++k) {
332     int bd = bitdepth_ar[k];
333     for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
334       const TX_SIZE sz = static_cast<TX_SIZE>(j);
335       for (uint8_t i = 0; i < sizeof(types) / sizeof(TX_TYPE); ++i) {
336         const TX_TYPE tp = types[i];
337         if (libaom_test::IsTxSizeTypeValid(sz, tp)) {
338           RunAV1InvTxfm2dTest(tp, sz, 1, bd, 1);
339         }
340       }
341     }
342   }
343 }
344 
TEST_P(AV1HighbdInvTxfm2d,DISABLED_Speed)345 TEST_P(AV1HighbdInvTxfm2d, DISABLED_Speed) {
346   int bitdepth_ar[2] = { 10, 12 };
347   for (int k = 0; k < 2; ++k) {
348     int bd = bitdepth_ar[k];
349     for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
350       for (int i = 0; i < (int)TX_TYPES; ++i) {
351         if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
352                                            static_cast<TX_TYPE>(i))) {
353           RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
354                               1000000, bd);
355         }
356       }
357     }
358   }
359 }
360 
361 #if HAVE_SSE4_1
362 INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdInvTxfm2d,
363                          ::testing::Values(av1_highbd_inv_txfm_add_sse4_1));
364 #endif
365 
366 #if HAVE_AVX2
367 INSTANTIATE_TEST_SUITE_P(AVX2, AV1HighbdInvTxfm2d,
368                          ::testing::Values(av1_highbd_inv_txfm_add_avx2));
369 #endif
370 
371 #if HAVE_NEON
372 INSTANTIATE_TEST_SUITE_P(NEON, AV1HighbdInvTxfm2d,
373                          ::testing::Values(av1_highbd_inv_txfm_add_neon));
374 #endif
375 
376 }  // namespace
377