1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <tuple>
13
14 #include "gtest/gtest.h"
15
16 #include "config/av1_rtcd.h"
17
18 #include "test/acm_random.h"
19 #include "test/av1_txfm_test.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
22 #include "av1/common/enums.h"
23 #include "av1/common/scan.h"
24 #include "aom_dsp/aom_dsp_common.h"
25 #include "aom_ports/mem.h"
26
27 namespace {
28
29 using libaom_test::ACMRandom;
30 using std::tuple;
31
32 typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
33 TX_TYPE tx_type, int bd);
34
35 typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride,
36 TX_TYPE tx_type, int bd);
37 static const char *tx_type_name[] = {
38 "DCT_DCT",
39 "ADST_DCT",
40 "DCT_ADST",
41 "ADST_ADST",
42 "FLIPADST_DCT",
43 "DCT_FLIPADST",
44 "FLIPADST_FLIPADST",
45 "ADST_FLIPADST",
46 "FLIPADST_ADST",
47 "IDTX",
48 "V_DCT",
49 "H_DCT",
50 "V_ADST",
51 "H_ADST",
52 "V_FLIPADST",
53 "H_FLIPADST",
54 };
55 // Test parameter argument list:
56 // <transform reference function,
57 // optimized inverse transform function,
58 // inverse transform reference function,
59 // num_coeffs,
60 // tx_type,
61 // bit_depth>
62 typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, TX_TYPE, int> IHbdHtParam;
63
64 class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
65 public:
66 ~AV1HighbdInvHTNxN() override = default;
67
SetUp()68 void SetUp() override {
69 txfm_ref_ = GET_PARAM(0);
70 inv_txfm_ = GET_PARAM(1);
71 inv_txfm_ref_ = GET_PARAM(2);
72 num_coeffs_ = GET_PARAM(3);
73 tx_type_ = GET_PARAM(4);
74 bit_depth_ = GET_PARAM(5);
75
76 input_ = reinterpret_cast<int16_t *>(
77 aom_memalign(16, sizeof(input_[0]) * num_coeffs_));
78 ASSERT_NE(input_, nullptr);
79
80 // Note:
81 // Inverse transform input buffer is 32-byte aligned
82 // Refer to <root>/av1/encoder/context_tree.c, function,
83 // void alloc_mode_context().
84 coeffs_ = reinterpret_cast<int32_t *>(
85 aom_memalign(32, sizeof(coeffs_[0]) * num_coeffs_));
86 ASSERT_NE(coeffs_, nullptr);
87 output_ = reinterpret_cast<uint16_t *>(
88 aom_memalign(32, sizeof(output_[0]) * num_coeffs_));
89 ASSERT_NE(output_, nullptr);
90 output_ref_ = reinterpret_cast<uint16_t *>(
91 aom_memalign(32, sizeof(output_ref_[0]) * num_coeffs_));
92 ASSERT_NE(output_ref_, nullptr);
93 }
94
TearDown()95 void TearDown() override {
96 aom_free(input_);
97 aom_free(coeffs_);
98 aom_free(output_);
99 aom_free(output_ref_);
100 }
101
102 protected:
103 void RunBitexactCheck();
104
105 private:
GetStride() const106 int GetStride() const {
107 if (16 == num_coeffs_) {
108 return 4;
109 } else if (64 == num_coeffs_) {
110 return 8;
111 } else if (256 == num_coeffs_) {
112 return 16;
113 } else if (1024 == num_coeffs_) {
114 return 32;
115 } else if (4096 == num_coeffs_) {
116 return 64;
117 } else {
118 return 0;
119 }
120 }
121
122 HbdHtFunc txfm_ref_;
123 IHbdHtFunc inv_txfm_;
124 IHbdHtFunc inv_txfm_ref_;
125 int num_coeffs_;
126 TX_TYPE tx_type_;
127 int bit_depth_;
128
129 int16_t *input_;
130 int32_t *coeffs_;
131 uint16_t *output_;
132 uint16_t *output_ref_;
133 };
134 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdInvHTNxN);
135
RunBitexactCheck()136 void AV1HighbdInvHTNxN::RunBitexactCheck() {
137 ACMRandom rnd(ACMRandom::DeterministicSeed());
138 const int stride = GetStride();
139 const int num_tests = 20000;
140 const uint16_t mask = (1 << bit_depth_) - 1;
141
142 for (int i = 0; i < num_tests; ++i) {
143 for (int j = 0; j < num_coeffs_; ++j) {
144 input_[j] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
145 output_ref_[j] = rnd.Rand16() & mask;
146 output_[j] = output_ref_[j];
147 }
148
149 txfm_ref_(input_, coeffs_, stride, tx_type_, bit_depth_);
150 inv_txfm_ref_(coeffs_, output_ref_, stride, tx_type_, bit_depth_);
151 API_REGISTER_STATE_CHECK(
152 inv_txfm_(coeffs_, output_, stride, tx_type_, bit_depth_));
153
154 for (int j = 0; j < num_coeffs_; ++j) {
155 EXPECT_EQ(output_ref_[j], output_[j])
156 << "Not bit-exact result at index: " << j << " At test block: " << i;
157 }
158 }
159 }
160
TEST_P(AV1HighbdInvHTNxN,InvTransResultCheck)161 TEST_P(AV1HighbdInvHTNxN, InvTransResultCheck) { RunBitexactCheck(); }
162
163 using std::make_tuple;
164
165 #if HAVE_SSE4_1
166 #define PARAM_LIST_4X4 \
167 &av1_fwd_txfm2d_4x4_c, &av1_inv_txfm2d_add_4x4_sse4_1, \
168 &av1_inv_txfm2d_add_4x4_c, 16
169
170 const IHbdHtParam kArrayIhtParam[] = {
171 // 4x4
172 make_tuple(PARAM_LIST_4X4, DCT_DCT, 10),
173 make_tuple(PARAM_LIST_4X4, DCT_DCT, 12),
174 make_tuple(PARAM_LIST_4X4, ADST_DCT, 10),
175 make_tuple(PARAM_LIST_4X4, ADST_DCT, 12),
176 make_tuple(PARAM_LIST_4X4, DCT_ADST, 10),
177 make_tuple(PARAM_LIST_4X4, DCT_ADST, 12),
178 make_tuple(PARAM_LIST_4X4, ADST_ADST, 10),
179 make_tuple(PARAM_LIST_4X4, ADST_ADST, 12),
180 make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 10),
181 make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 12),
182 make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 10),
183 make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 12),
184 make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 10),
185 make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 12),
186 make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 10),
187 make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 12),
188 make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 10),
189 make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 12),
190 };
191
192 INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdInvHTNxN,
193 ::testing::ValuesIn(kArrayIhtParam));
194 #endif // HAVE_SSE4_1
195
196 typedef void (*HighbdInvTxfm2dFunc)(const int32_t *input, uint8_t *output,
197 int stride, const TxfmParam *txfm_param);
198
199 typedef std::tuple<const HighbdInvTxfm2dFunc> AV1HighbdInvTxfm2dParam;
200 class AV1HighbdInvTxfm2d
201 : public ::testing::TestWithParam<AV1HighbdInvTxfm2dParam> {
202 public:
SetUp()203 void SetUp() override { target_func_ = GET_PARAM(0); }
204 void RunAV1InvTxfm2dTest(TX_TYPE tx_type, TX_SIZE tx_size, int run_times,
205 int bit_depth, int gt_int16 = 0);
206
207 private:
208 HighbdInvTxfm2dFunc target_func_;
209 };
210 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdInvTxfm2d);
211
RunAV1InvTxfm2dTest(TX_TYPE tx_type_,TX_SIZE tx_size_,int run_times,int bit_depth_,int gt_int16)212 void AV1HighbdInvTxfm2d::RunAV1InvTxfm2dTest(TX_TYPE tx_type_, TX_SIZE tx_size_,
213 int run_times, int bit_depth_,
214 int gt_int16) {
215 #if CONFIG_REALTIME_ONLY
216 if (tx_size_ >= TX_4X16) {
217 return;
218 }
219 #endif
220 FwdTxfm2dFunc fwd_func_ = libaom_test::fwd_txfm_func_ls[tx_size_];
221 TxfmParam txfm_param;
222 const int BLK_WIDTH = 64;
223 const int BLK_SIZE = BLK_WIDTH * BLK_WIDTH;
224 DECLARE_ALIGNED(16, int16_t, input[BLK_SIZE]) = { 0 };
225 DECLARE_ALIGNED(32, int32_t, inv_input[BLK_SIZE]) = { 0 };
226 DECLARE_ALIGNED(32, uint16_t, output[BLK_SIZE]) = { 0 };
227 DECLARE_ALIGNED(32, uint16_t, ref_output[BLK_SIZE]) = { 0 };
228 int stride = BLK_WIDTH;
229 int rows = tx_size_high[tx_size_];
230 int cols = tx_size_wide[tx_size_];
231 const int rows_nonezero = AOMMIN(32, rows);
232 const int cols_nonezero = AOMMIN(32, cols);
233 const uint16_t mask = (1 << bit_depth_) - 1;
234 run_times /= (rows * cols);
235 run_times = AOMMAX(1, run_times);
236 const SCAN_ORDER *scan_order = get_default_scan(tx_size_, tx_type_);
237 const int16_t *scan = scan_order->scan;
238 const int16_t eobmax = rows_nonezero * cols_nonezero;
239 ACMRandom rnd(ACMRandom::DeterministicSeed());
240 int randTimes = run_times == 1 ? (eobmax) : 1;
241
242 txfm_param.tx_type = tx_type_;
243 txfm_param.tx_size = tx_size_;
244 txfm_param.lossless = 0;
245 txfm_param.bd = bit_depth_;
246 txfm_param.is_hbd = 1;
247 txfm_param.tx_set_type = EXT_TX_SET_ALL16;
248
249 for (int cnt = 0; cnt < randTimes; ++cnt) {
250 for (int r = 0; r < BLK_WIDTH; ++r) {
251 for (int c = 0; c < BLK_WIDTH; ++c) {
252 input[r * cols + c] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
253 output[r * stride + c] = rnd.Rand16() & mask;
254
255 ref_output[r * stride + c] = output[r * stride + c];
256 }
257 }
258 fwd_func_(input, inv_input, stride, tx_type_, bit_depth_);
259
260 // produce eob input by setting high freq coeffs to zero
261 const int eob = AOMMIN(cnt + 1, eobmax);
262 for (int i = eob; i < eobmax; i++) {
263 inv_input[scan[i]] = 0;
264 }
265 txfm_param.eob = eob;
266 if (gt_int16) {
267 const uint16_t inv_input_mask =
268 static_cast<uint16_t>((1 << (bit_depth_ + 7)) - 1);
269 for (int i = 0; i < eob; i++) {
270 inv_input[scan[i]] = (rnd.Rand31() & inv_input_mask);
271 }
272 }
273
274 aom_usec_timer ref_timer, test_timer;
275 aom_usec_timer_start(&ref_timer);
276 for (int i = 0; i < run_times; ++i) {
277 av1_highbd_inv_txfm_add_c(inv_input, CONVERT_TO_BYTEPTR(ref_output),
278 stride, &txfm_param);
279 }
280 aom_usec_timer_mark(&ref_timer);
281 const int elapsed_time_c =
282 static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
283
284 aom_usec_timer_start(&test_timer);
285 for (int i = 0; i < run_times; ++i) {
286 target_func_(inv_input, CONVERT_TO_BYTEPTR(output), stride, &txfm_param);
287 }
288 aom_usec_timer_mark(&test_timer);
289 const int elapsed_time_simd =
290 static_cast<int>(aom_usec_timer_elapsed(&test_timer));
291 if (run_times > 10) {
292 printf(
293 "txfm_size[%d] \t txfm_type[%d] \t c_time=%d \t simd_time=%d \t "
294 "gain=%d \n",
295 tx_size_, tx_type_, elapsed_time_c, elapsed_time_simd,
296 (elapsed_time_c / elapsed_time_simd));
297 } else {
298 for (int r = 0; r < rows; ++r) {
299 for (int c = 0; c < cols; ++c) {
300 ASSERT_EQ(ref_output[r * stride + c], output[r * stride + c])
301 << "[" << r << "," << c << "] " << cnt << " tx_size: " << cols
302 << "x" << rows << " bit_depth_: " << bit_depth_
303 << " tx_type: " << tx_type_name[tx_type_] << " eob " << eob;
304 }
305 }
306 }
307 }
308 }
309
TEST_P(AV1HighbdInvTxfm2d,match)310 TEST_P(AV1HighbdInvTxfm2d, match) {
311 int bitdepth_ar[3] = { 8, 10, 12 };
312 for (int k = 0; k < 3; ++k) {
313 int bd = bitdepth_ar[k];
314 for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
315 for (int i = 0; i < (int)TX_TYPES; ++i) {
316 if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
317 static_cast<TX_TYPE>(i))) {
318 RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
319 1, bd);
320 }
321 }
322 }
323 }
324 }
325
TEST_P(AV1HighbdInvTxfm2d,gt_int16)326 TEST_P(AV1HighbdInvTxfm2d, gt_int16) {
327 int bitdepth_ar[3] = { 8, 10, 12 };
328 static const TX_TYPE types[] = {
329 DCT_DCT, ADST_DCT, FLIPADST_DCT, IDTX, V_DCT, H_DCT, H_ADST, H_FLIPADST
330 };
331 for (int k = 0; k < 3; ++k) {
332 int bd = bitdepth_ar[k];
333 for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
334 const TX_SIZE sz = static_cast<TX_SIZE>(j);
335 for (uint8_t i = 0; i < sizeof(types) / sizeof(TX_TYPE); ++i) {
336 const TX_TYPE tp = types[i];
337 if (libaom_test::IsTxSizeTypeValid(sz, tp)) {
338 RunAV1InvTxfm2dTest(tp, sz, 1, bd, 1);
339 }
340 }
341 }
342 }
343 }
344
TEST_P(AV1HighbdInvTxfm2d,DISABLED_Speed)345 TEST_P(AV1HighbdInvTxfm2d, DISABLED_Speed) {
346 int bitdepth_ar[2] = { 10, 12 };
347 for (int k = 0; k < 2; ++k) {
348 int bd = bitdepth_ar[k];
349 for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
350 for (int i = 0; i < (int)TX_TYPES; ++i) {
351 if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
352 static_cast<TX_TYPE>(i))) {
353 RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
354 1000000, bd);
355 }
356 }
357 }
358 }
359 }
360
361 #if HAVE_SSE4_1
362 INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdInvTxfm2d,
363 ::testing::Values(av1_highbd_inv_txfm_add_sse4_1));
364 #endif
365
366 #if HAVE_AVX2
367 INSTANTIATE_TEST_SUITE_P(AVX2, AV1HighbdInvTxfm2d,
368 ::testing::Values(av1_highbd_inv_txfm_add_avx2));
369 #endif
370
371 #if HAVE_NEON
372 INSTANTIATE_TEST_SUITE_P(NEON, AV1HighbdInvTxfm2d,
373 ::testing::Values(av1_highbd_inv_txfm_add_neon));
374 #endif
375
376 } // namespace
377