1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <tuple>
16
17 #include "gtest/gtest.h"
18
19 #include "./vp9_rtcd.h"
20 #include "./vpx_config.h"
21 #include "./vpx_dsp_rtcd.h"
22 #include "test/acm_random.h"
23 #include "test/bench.h"
24 #include "test/buffer.h"
25 #include "test/clear_system_state.h"
26 #include "test/register_state_check.h"
27 #include "test/util.h"
28 #include "vp9/common/vp9_entropy.h"
29 #include "vp9/common/vp9_scan.h"
30 #include "vp9/encoder/vp9_block.h"
31 #include "vpx/vpx_codec.h"
32 #include "vpx/vpx_integer.h"
33 #include "vpx_ports/vpx_timer.h"
34
35 using libvpx_test::ACMRandom;
36 using libvpx_test::Buffer;
37
38 namespace {
39 const int number_of_iterations = 100;
40
41 typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
42 const macroblock_plane *mb_plane,
43 tran_low_t *qcoeff, tran_low_t *dqcoeff,
44 const int16_t *dequant, uint16_t *eob,
45 const struct ScanOrder *const scan_order);
46 typedef std::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
47 int /*max_size*/, bool /*is_fp*/>
48 QuantizeParam;
49
50 // Wrapper for 32x32 version which does not use count
51 typedef void (*Quantize32x32Func)(const tran_low_t *coeff,
52 const macroblock_plane *const mb_plane,
53 tran_low_t *qcoeff, tran_low_t *dqcoeff,
54 const int16_t *dequant, uint16_t *eob,
55 const struct ScanOrder *const scan_order);
56
57 template <Quantize32x32Func fn>
Quant32x32Wrapper(const tran_low_t * coeff,intptr_t count,const macroblock_plane * const mb_plane,tran_low_t * qcoeff,tran_low_t * dqcoeff,const int16_t * dequant,uint16_t * eob,const struct ScanOrder * const scan_order)58 void Quant32x32Wrapper(const tran_low_t *coeff, intptr_t count,
59 const macroblock_plane *const mb_plane,
60 tran_low_t *qcoeff, tran_low_t *dqcoeff,
61 const int16_t *dequant, uint16_t *eob,
62 const struct ScanOrder *const scan_order) {
63 (void)count;
64 fn(coeff, mb_plane, qcoeff, dqcoeff, dequant, eob, scan_order);
65 }
66
67 // Wrapper for FP version which does not use zbin or quant_shift.
68 typedef void (*QuantizeFPFunc)(const tran_low_t *coeff, intptr_t count,
69 const macroblock_plane *const mb_plane,
70 tran_low_t *qcoeff, tran_low_t *dqcoeff,
71 const int16_t *dequant, uint16_t *eob,
72 const struct ScanOrder *const scan_order);
73
74 template <QuantizeFPFunc fn>
QuantFPWrapper(const tran_low_t * coeff,intptr_t count,const macroblock_plane * const mb_plane,tran_low_t * qcoeff,tran_low_t * dqcoeff,const int16_t * dequant,uint16_t * eob,const struct ScanOrder * const scan_order)75 void QuantFPWrapper(const tran_low_t *coeff, intptr_t count,
76 const macroblock_plane *const mb_plane, tran_low_t *qcoeff,
77 tran_low_t *dqcoeff, const int16_t *dequant, uint16_t *eob,
78 const struct ScanOrder *const scan_order) {
79 fn(coeff, count, mb_plane, qcoeff, dqcoeff, dequant, eob, scan_order);
80 }
81
GenerateHelperArrays(ACMRandom * rnd,int16_t * zbin,int16_t * round,int16_t * quant,int16_t * quant_shift,int16_t * dequant,int16_t * round_fp,int16_t * quant_fp)82 void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
83 int16_t *quant, int16_t *quant_shift,
84 int16_t *dequant, int16_t *round_fp,
85 int16_t *quant_fp) {
86 // Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
87 constexpr int kMaxQRoundingFactorFp = 64;
88
89 for (int j = 0; j < 2; j++) {
90 // The range is 4 to 1828 in the VP9 tables.
91 const int qlookup = rnd->RandRange(1825) + 4;
92 round_fp[j] = (kMaxQRoundingFactorFp * qlookup) >> 7;
93 quant_fp[j] = (1 << 16) / qlookup;
94
95 // Values determined by deconstructing vp9_init_quantizer().
96 // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
97 // values or U/V values of any bit depth. This is because y_delta is not
98 // factored into the vp9_ac_quant() call.
99 zbin[j] = rnd->RandRange(1200);
100
101 // round may be up to 685 for Y values or 914 for U/V.
102 round[j] = rnd->RandRange(914);
103 // quant ranges from 1 to -32703
104 quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703;
105 // quant_shift goes up to 1 << 16.
106 quant_shift[j] = rnd->RandRange(16384);
107 // dequant maxes out at 1828 for all cases.
108 dequant[j] = rnd->RandRange(1828);
109 }
110 for (int j = 2; j < 8; j++) {
111 zbin[j] = zbin[1];
112 round_fp[j] = round_fp[1];
113 quant_fp[j] = quant_fp[1];
114 round[j] = round[1];
115 quant[j] = quant[1];
116 quant_shift[j] = quant_shift[1];
117 dequant[j] = dequant[1];
118 }
119 }
120
121 class VP9QuantizeBase : public AbstractBench {
122 public:
VP9QuantizeBase(vpx_bit_depth_t bit_depth,int max_size,bool is_fp)123 VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
124 : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp),
125 coeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 16)),
126 qcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)),
127 dqcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)) {
128 // TODO(jianj): SSSE3 and AVX2 tests fail on extreme values.
129 #if HAVE_NEON
130 max_value_ = (1 << (7 + bit_depth_)) - 1;
131 #else
132 max_value_ = (1 << bit_depth_) - 1;
133 #endif
134
135 mb_plane_ = reinterpret_cast<macroblock_plane *>(
136 vpx_memalign(16, sizeof(macroblock_plane)));
137
138 zbin_ptr_ = mb_plane_->zbin =
139 reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
140 round_fp_ptr_ = mb_plane_->round_fp = reinterpret_cast<int16_t *>(
141 vpx_memalign(16, 8 * sizeof(*round_fp_ptr_)));
142 quant_fp_ptr_ = mb_plane_->quant_fp = reinterpret_cast<int16_t *>(
143 vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_)));
144 round_ptr_ = mb_plane_->round =
145 reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*round_ptr_)));
146 quant_ptr_ = mb_plane_->quant =
147 reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*quant_ptr_)));
148 quant_shift_ptr_ = mb_plane_->quant_shift = reinterpret_cast<int16_t *>(
149 vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_)));
150 dequant_ptr_ = reinterpret_cast<int16_t *>(
151 vpx_memalign(16, 8 * sizeof(*dequant_ptr_)));
152
153 r_ptr_ = (is_fp_) ? round_fp_ptr_ : round_ptr_;
154 q_ptr_ = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
155 }
156
~VP9QuantizeBase()157 ~VP9QuantizeBase() override {
158 vpx_free(mb_plane_);
159 vpx_free(zbin_ptr_);
160 vpx_free(round_fp_ptr_);
161 vpx_free(quant_fp_ptr_);
162 vpx_free(round_ptr_);
163 vpx_free(quant_ptr_);
164 vpx_free(quant_shift_ptr_);
165 vpx_free(dequant_ptr_);
166 mb_plane_ = nullptr;
167 zbin_ptr_ = nullptr;
168 round_fp_ptr_ = nullptr;
169 quant_fp_ptr_ = nullptr;
170 round_ptr_ = nullptr;
171 quant_ptr_ = nullptr;
172 quant_shift_ptr_ = nullptr;
173 dequant_ptr_ = nullptr;
174 libvpx_test::ClearSystemState();
175 }
176
177 protected:
178 macroblock_plane *mb_plane_;
179 int16_t *zbin_ptr_;
180 int16_t *quant_fp_ptr_;
181 int16_t *round_fp_ptr_;
182 int16_t *round_ptr_;
183 int16_t *quant_ptr_;
184 int16_t *quant_shift_ptr_;
185 int16_t *dequant_ptr_;
186 const vpx_bit_depth_t bit_depth_;
187 int max_value_;
188 const int max_size_;
189 const bool is_fp_;
190 Buffer<tran_low_t> coeff_;
191 Buffer<tran_low_t> qcoeff_;
192 Buffer<tran_low_t> dqcoeff_;
193 int16_t *r_ptr_;
194 int16_t *q_ptr_;
195 int count_;
196 const ScanOrder *scan_;
197 uint16_t eob_;
198 };
199
200 class VP9QuantizeTest : public VP9QuantizeBase,
201 public ::testing::TestWithParam<QuantizeParam> {
202 public:
VP9QuantizeTest()203 VP9QuantizeTest()
204 : VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)),
205 quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
206
207 protected:
208 void Run() override;
209 void Speed(bool is_median);
210 const QuantizeFunc quantize_op_;
211 const QuantizeFunc ref_quantize_op_;
212 };
213
Run()214 void VP9QuantizeTest::Run() {
215 quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_, qcoeff_.TopLeftPixel(),
216 dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_);
217 }
218
Speed(bool is_median)219 void VP9QuantizeTest::Speed(bool is_median) {
220 ACMRandom rnd(ACMRandom::DeterministicSeed());
221 ASSERT_TRUE(coeff_.Init());
222 ASSERT_TRUE(qcoeff_.Init());
223 ASSERT_TRUE(dqcoeff_.Init());
224 TX_SIZE starting_sz, ending_sz;
225
226 if (max_size_ == 16) {
227 starting_sz = TX_4X4;
228 ending_sz = TX_16X16;
229 } else {
230 starting_sz = TX_32X32;
231 ending_sz = TX_32X32;
232 }
233
234 for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
235 // zbin > coeff, zbin < coeff.
236 for (int i = 0; i < 2; ++i) {
237 // TX_TYPE defines the scan order. That is not relevant to the speed test.
238 // Pick the first one.
239 const TX_TYPE tx_type = DCT_DCT;
240 count_ = (4 << sz) * (4 << sz);
241 scan_ = &vp9_scan_orders[sz][tx_type];
242
243 GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
244 quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
245 quant_fp_ptr_);
246
247 if (i == 0) {
248 // When |coeff values| are less than zbin the results are 0.
249 int threshold = 100;
250 if (max_size_ == 32) {
251 // For 32x32, the threshold is halved. Double it to keep the values
252 // from clearing it.
253 threshold = 200;
254 }
255 for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold;
256 coeff_.Set(&rnd, -99, 99);
257 } else if (i == 1) {
258 for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50;
259 coeff_.Set(&rnd, -500, 500);
260 }
261
262 const char *type =
263 (i == 0) ? "Bypass calculations " : "Full calculations ";
264 char block_size[16];
265 snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz);
266 char title[100];
267 snprintf(title, sizeof(title), "%25s %8s ", type, block_size);
268
269 if (is_median) {
270 RunNTimes(10000000 / count_);
271 PrintMedian(title);
272 } else {
273 Buffer<tran_low_t> ref_qcoeff =
274 Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
275 ASSERT_TRUE(ref_qcoeff.Init());
276 Buffer<tran_low_t> ref_dqcoeff =
277 Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
278 ASSERT_TRUE(ref_dqcoeff.Init());
279 uint16_t ref_eob = 0;
280
281 const int kNumTests = 5000000;
282 vpx_usec_timer timer, simd_timer;
283
284 vpx_usec_timer_start(&timer);
285 for (int n = 0; n < kNumTests; ++n) {
286 ref_quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_,
287 ref_qcoeff.TopLeftPixel(),
288 ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
289 scan_);
290 }
291 vpx_usec_timer_mark(&timer);
292
293 vpx_usec_timer_start(&simd_timer);
294 for (int n = 0; n < kNumTests; ++n) {
295 quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_,
296 qcoeff_.TopLeftPixel(), dqcoeff_.TopLeftPixel(),
297 dequant_ptr_, &eob_, scan_);
298 }
299 vpx_usec_timer_mark(&simd_timer);
300
301 const int elapsed_time =
302 static_cast<int>(vpx_usec_timer_elapsed(&timer));
303 const int simd_elapsed_time =
304 static_cast<int>(vpx_usec_timer_elapsed(&simd_timer));
305 printf("%s c_time = %d \t simd_time = %d \t Gain = %f \n", title,
306 elapsed_time, simd_elapsed_time,
307 ((float)elapsed_time / simd_elapsed_time));
308 }
309 }
310 }
311 }
312
313 // This quantizer compares the AC coefficients to the quantization step size to
314 // determine if further multiplication operations are needed.
315 // Based on vp9_quantize_fp_sse2().
quant_fp_nz(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const struct macroblock_plane * const mb_plane,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t * dequant_ptr,uint16_t * eob_ptr,const struct ScanOrder * const scan_order,int is_32x32)316 inline void quant_fp_nz(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
317 const struct macroblock_plane *const mb_plane,
318 tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
319 const int16_t *dequant_ptr, uint16_t *eob_ptr,
320 const struct ScanOrder *const scan_order,
321 int is_32x32) {
322 int i, eob = -1;
323 const int thr = dequant_ptr[1] >> (1 + is_32x32);
324 const int16_t *round_ptr = mb_plane->round_fp;
325 const int16_t *quant_ptr = mb_plane->quant_fp;
326 const int16_t *scan = scan_order->scan;
327
328 // Quantization pass: All coefficients with index >= zero_flag are
329 // skippable. Note: zero_flag can be zero.
330 for (i = 0; i < n_coeffs; i += 16) {
331 int y;
332 int nzflag_cnt = 0;
333 int abs_coeff[16];
334 int coeff_sign[16];
335
336 // count nzflag for each row (16 tran_low_t)
337 for (y = 0; y < 16; ++y) {
338 const int rc = i + y;
339 const int coeff = coeff_ptr[rc];
340 coeff_sign[y] = (coeff >> 31);
341 abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y];
342 // The first 16 are skipped in the sse2 code. Do the same here to match.
343 if (i >= 16 && (abs_coeff[y] <= thr)) {
344 nzflag_cnt++;
345 }
346 }
347
348 for (y = 0; y < 16; ++y) {
349 const int rc = i + y;
350 // If all of the AC coeffs in a row has magnitude less than the
351 // quantization step_size/2, quantize to zero.
352 if (nzflag_cnt < 16) {
353 int tmp;
354 int _round;
355
356 if (is_32x32) {
357 _round = ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
358 } else {
359 _round = round_ptr[rc != 0];
360 }
361 tmp = clamp(abs_coeff[y] + _round, INT16_MIN, INT16_MAX);
362 tmp = (tmp * quant_ptr[rc != 0]) >> (16 - is_32x32);
363 qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y];
364 dqcoeff_ptr[rc] =
365 static_cast<tran_low_t>(qcoeff_ptr[rc] * dequant_ptr[rc != 0]);
366
367 if (is_32x32) {
368 dqcoeff_ptr[rc] = static_cast<tran_low_t>(qcoeff_ptr[rc] *
369 dequant_ptr[rc != 0] / 2);
370 } else {
371 dqcoeff_ptr[rc] =
372 static_cast<tran_low_t>(qcoeff_ptr[rc] * dequant_ptr[rc != 0]);
373 }
374 } else {
375 qcoeff_ptr[rc] = 0;
376 dqcoeff_ptr[rc] = 0;
377 }
378 }
379 }
380
381 // Scan for eob.
382 for (i = 0; i < n_coeffs; i++) {
383 // Use the scan order to find the correct eob.
384 const int rc = scan[i];
385 if (qcoeff_ptr[rc]) {
386 eob = i;
387 }
388 }
389 *eob_ptr = eob + 1;
390 }
391
quantize_fp_nz_c(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const struct macroblock_plane * mb_plane,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t * dequant_ptr,uint16_t * eob_ptr,const struct ScanOrder * const scan_order)392 void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
393 const struct macroblock_plane *mb_plane,
394 tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
395 const int16_t *dequant_ptr, uint16_t *eob_ptr,
396 const struct ScanOrder *const scan_order) {
397 quant_fp_nz(coeff_ptr, n_coeffs, mb_plane, qcoeff_ptr, dqcoeff_ptr,
398 dequant_ptr, eob_ptr, scan_order, 0);
399 }
400
quantize_fp_32x32_nz_c(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const struct macroblock_plane * mb_plane,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t * dequant_ptr,uint16_t * eob_ptr,const struct ScanOrder * const scan_order)401 void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
402 const struct macroblock_plane *mb_plane,
403 tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
404 const int16_t *dequant_ptr, uint16_t *eob_ptr,
405 const struct ScanOrder *const scan_order) {
406 quant_fp_nz(coeff_ptr, n_coeffs, mb_plane, qcoeff_ptr, dqcoeff_ptr,
407 dequant_ptr, eob_ptr, scan_order, 1);
408 }
409
TEST_P(VP9QuantizeTest,OperationCheck)410 TEST_P(VP9QuantizeTest, OperationCheck) {
411 ACMRandom rnd(ACMRandom::DeterministicSeed());
412 ASSERT_TRUE(coeff_.Init());
413 ASSERT_TRUE(qcoeff_.Init());
414 ASSERT_TRUE(dqcoeff_.Init());
415 Buffer<tran_low_t> ref_qcoeff =
416 Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
417 ASSERT_TRUE(ref_qcoeff.Init());
418 Buffer<tran_low_t> ref_dqcoeff =
419 Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
420 ASSERT_TRUE(ref_dqcoeff.Init());
421 uint16_t ref_eob = 0;
422 eob_ = 0;
423
424 for (int i = 0; i < number_of_iterations; ++i) {
425 TX_SIZE sz;
426 if (max_size_ == 16) {
427 sz = static_cast<TX_SIZE>(i % 3); // TX_4X4, TX_8X8 TX_16X16
428 } else {
429 sz = TX_32X32;
430 }
431 const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
432 scan_ = &vp9_scan_orders[sz][tx_type];
433 count_ = (4 << sz) * (4 << sz);
434 coeff_.Set(&rnd, -max_value_, max_value_);
435 GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
436 quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
437 quant_fp_ptr_);
438 ref_quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_,
439 ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
440 dequant_ptr_, &ref_eob, scan_);
441
442 ASM_REGISTER_STATE_CHECK(quantize_op_(
443 coeff_.TopLeftPixel(), count_, mb_plane_, qcoeff_.TopLeftPixel(),
444 dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_));
445
446 EXPECT_TRUE(qcoeff_.CheckValues(ref_qcoeff));
447 EXPECT_TRUE(dqcoeff_.CheckValues(ref_dqcoeff));
448
449 EXPECT_EQ(eob_, ref_eob);
450
451 if (HasFailure()) {
452 printf("Failure on iteration %d.\n", i);
453 qcoeff_.PrintDifference(ref_qcoeff);
454 dqcoeff_.PrintDifference(ref_dqcoeff);
455 return;
456 }
457 }
458 }
459
TEST_P(VP9QuantizeTest,EOBCheck)460 TEST_P(VP9QuantizeTest, EOBCheck) {
461 ACMRandom rnd(ACMRandom::DeterministicSeed());
462 ASSERT_TRUE(coeff_.Init());
463 ASSERT_TRUE(qcoeff_.Init());
464 ASSERT_TRUE(dqcoeff_.Init());
465 Buffer<tran_low_t> ref_qcoeff =
466 Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
467 ASSERT_TRUE(ref_qcoeff.Init());
468 Buffer<tran_low_t> ref_dqcoeff =
469 Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
470 ASSERT_TRUE(ref_dqcoeff.Init());
471 uint16_t ref_eob = 0;
472 eob_ = 0;
473 const uint32_t max_index = max_size_ * max_size_ - 1;
474
475 for (int i = 0; i < number_of_iterations; ++i) {
476 TX_SIZE sz;
477 if (max_size_ == 16) {
478 sz = static_cast<TX_SIZE>(i % 3); // TX_4X4, TX_8X8 TX_16X16
479 } else {
480 sz = TX_32X32;
481 }
482 const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
483 scan_ = &vp9_scan_orders[sz][tx_type];
484 count_ = (4 << sz) * (4 << sz);
485 // Two random entries
486 coeff_.Set(0);
487 coeff_.TopLeftPixel()[rnd.RandRange(count_) & max_index] =
488 static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
489 coeff_.TopLeftPixel()[rnd.RandRange(count_) & max_index] =
490 static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
491 GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
492 quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
493 quant_fp_ptr_);
494 ref_quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_,
495 ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
496 dequant_ptr_, &ref_eob, scan_);
497
498 ASM_REGISTER_STATE_CHECK(quantize_op_(
499 coeff_.TopLeftPixel(), count_, mb_plane_, qcoeff_.TopLeftPixel(),
500 dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_));
501
502 EXPECT_TRUE(qcoeff_.CheckValues(ref_qcoeff));
503 EXPECT_TRUE(dqcoeff_.CheckValues(ref_dqcoeff));
504
505 EXPECT_EQ(eob_, ref_eob);
506
507 if (HasFailure()) {
508 printf("Failure on iteration %d.\n", i);
509 qcoeff_.PrintDifference(ref_qcoeff);
510 dqcoeff_.PrintDifference(ref_dqcoeff);
511 return;
512 }
513 }
514 }
515
TEST_P(VP9QuantizeTest,DISABLED_Speed)516 TEST_P(VP9QuantizeTest, DISABLED_Speed) { Speed(false); }
517
TEST_P(VP9QuantizeTest,DISABLED_SpeedMedian)518 TEST_P(VP9QuantizeTest, DISABLED_SpeedMedian) { Speed(true); }
519
520 using std::make_tuple;
521
522 #if HAVE_SSE2
523 #if CONFIG_VP9_HIGHBITDEPTH
524 INSTANTIATE_TEST_SUITE_P(
525 SSE2, VP9QuantizeTest,
526 ::testing::Values(
527 make_tuple(vpx_quantize_b_sse2, vpx_quantize_b_c, VPX_BITS_8, 16,
528 false),
529 make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
530 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
531 make_tuple(vpx_highbd_quantize_b_sse2, vpx_highbd_quantize_b_c,
532 VPX_BITS_8, 16, false),
533 make_tuple(vpx_highbd_quantize_b_sse2, vpx_highbd_quantize_b_c,
534 VPX_BITS_10, 16, false),
535 make_tuple(vpx_highbd_quantize_b_sse2, vpx_highbd_quantize_b_c,
536 VPX_BITS_12, 16, false),
537 make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
538 &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
539 VPX_BITS_8, 32, false),
540 make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
541 &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
542 VPX_BITS_10, 32, false),
543 make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
544 &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
545 VPX_BITS_12, 32, false)));
546
547 #else
548 INSTANTIATE_TEST_SUITE_P(
549 SSE2, VP9QuantizeTest,
550 ::testing::Values(make_tuple(vpx_quantize_b_sse2, vpx_quantize_b_c,
551 VPX_BITS_8, 16, false),
552 make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
553 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
554 16, true)));
555 #endif // CONFIG_VP9_HIGHBITDEPTH
556 #endif // HAVE_SSE2
557
558 #if HAVE_SSSE3
559 INSTANTIATE_TEST_SUITE_P(
560 SSSE3, VP9QuantizeTest,
561 ::testing::Values(make_tuple(vpx_quantize_b_ssse3, vpx_quantize_b_c,
562 VPX_BITS_8, 16, false),
563 make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_ssse3>,
564 &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
565 VPX_BITS_8, 32, false),
566 make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
567 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
568 16, true),
569 make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
570 &QuantFPWrapper<quantize_fp_32x32_nz_c>,
571 VPX_BITS_8, 32, true)));
572 #endif // HAVE_SSSE3
573
574 #if HAVE_AVX
575 INSTANTIATE_TEST_SUITE_P(
576 AVX, VP9QuantizeTest,
577 ::testing::Values(make_tuple(vpx_quantize_b_avx, vpx_quantize_b_c,
578 VPX_BITS_8, 16, false),
579 make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_avx>,
580 &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
581 VPX_BITS_8, 32, false)));
582 #endif // HAVE_AVX
583
584 #if VPX_ARCH_X86_64 && HAVE_AVX2
585 #if CONFIG_VP9_HIGHBITDEPTH
586 INSTANTIATE_TEST_SUITE_P(
587 AVX2, VP9QuantizeTest,
588 ::testing::Values(
589 make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
590 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
591 make_tuple(&QuantFPWrapper<vp9_highbd_quantize_fp_avx2>,
592 &QuantFPWrapper<vp9_highbd_quantize_fp_c>, VPX_BITS_12, 16,
593 true),
594 make_tuple(&QuantFPWrapper<vp9_highbd_quantize_fp_32x32_avx2>,
595 &QuantFPWrapper<vp9_highbd_quantize_fp_32x32_c>, VPX_BITS_12,
596 32, true),
597 make_tuple(vpx_quantize_b_avx2, vpx_quantize_b_c, VPX_BITS_8, 16,
598 false),
599 make_tuple(vpx_highbd_quantize_b_avx2, vpx_highbd_quantize_b_c,
600 VPX_BITS_8, 16, false),
601 make_tuple(vpx_highbd_quantize_b_avx2, vpx_highbd_quantize_b_c,
602 VPX_BITS_10, 16, false),
603 make_tuple(vpx_highbd_quantize_b_avx2, vpx_highbd_quantize_b_c,
604 VPX_BITS_12, 16, false),
605 make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_avx2>,
606 &Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
607 false),
608 make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
609 &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
610 VPX_BITS_8, 32, false),
611 make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
612 &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
613 VPX_BITS_10, 32, false),
614 make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
615 &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
616 VPX_BITS_12, 32, false)));
617 #else
618 INSTANTIATE_TEST_SUITE_P(
619 AVX2, VP9QuantizeTest,
620 ::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
621 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
622 16, true),
623 make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_avx2>,
624 &QuantFPWrapper<quantize_fp_32x32_nz_c>,
625 VPX_BITS_8, 32, true),
626 make_tuple(vpx_quantize_b_avx2, vpx_quantize_b_c,
627 VPX_BITS_8, 16, false),
628 make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_avx2>,
629 &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
630 VPX_BITS_8, 32, false)));
631 #endif // CONFIG_VP9_HIGHBITDEPTH
632 #endif // HAVE_AVX2
633
634 #if HAVE_NEON
635 #if CONFIG_VP9_HIGHBITDEPTH
636 INSTANTIATE_TEST_SUITE_P(
637 NEON, VP9QuantizeTest,
638 ::testing::Values(
639 make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c, VPX_BITS_8, 16,
640 false),
641 make_tuple(vpx_highbd_quantize_b_neon, vpx_highbd_quantize_b_c,
642 VPX_BITS_8, 16, false),
643 make_tuple(vpx_highbd_quantize_b_neon, vpx_highbd_quantize_b_c,
644 VPX_BITS_10, 16, false),
645 make_tuple(vpx_highbd_quantize_b_neon, vpx_highbd_quantize_b_c,
646 VPX_BITS_12, 16, false),
647 make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_neon>,
648 &Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
649 false),
650 make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
651 &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
652 VPX_BITS_8, 32, false),
653 make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
654 &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
655 VPX_BITS_10, 32, false),
656 make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
657 &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
658 VPX_BITS_12, 32, false),
659 make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
660 &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
661 make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
662 &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
663 true)));
664 #else
665 INSTANTIATE_TEST_SUITE_P(
666 NEON, VP9QuantizeTest,
667 ::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c,
668 VPX_BITS_8, 16, false),
669 make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_neon>,
670 &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
671 VPX_BITS_8, 32, false),
672 make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
673 &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
674 16, true),
675 make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
676 &QuantFPWrapper<vp9_quantize_fp_32x32_c>,
677 VPX_BITS_8, 32, true)));
678 #endif // CONFIG_VP9_HIGHBITDEPTH
679 #endif // HAVE_NEON
680
681 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
682 INSTANTIATE_TEST_SUITE_P(
683 VSX, VP9QuantizeTest,
684 ::testing::Values(make_tuple(&vpx_quantize_b_vsx, &vpx_quantize_b_c,
685 VPX_BITS_8, 16, false),
686 make_tuple(&vpx_quantize_b_32x32_vsx,
687 &vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
688 false),
689 make_tuple(&QuantFPWrapper<vp9_quantize_fp_vsx>,
690 &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
691 16, true),
692 make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_vsx>,
693 &QuantFPWrapper<vp9_quantize_fp_32x32_c>,
694 VPX_BITS_8, 32, true)));
695 #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
696
697 #if HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH
698 INSTANTIATE_TEST_SUITE_P(
699 LSX, VP9QuantizeTest,
700 ::testing::Values(make_tuple(&vpx_quantize_b_lsx, &vpx_quantize_b_c,
701 VPX_BITS_8, 16, false),
702 make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_lsx>,
703 &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
704 VPX_BITS_8, 32, false)));
705 #endif // HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH
706
707 // Only useful to compare "Speed" test results.
708 INSTANTIATE_TEST_SUITE_P(
709 DISABLED_C, VP9QuantizeTest,
710 ::testing::Values(
711 make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16, false),
712 make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
713 &Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
714 false),
715 make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
716 &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
717 make_tuple(&QuantFPWrapper<quantize_fp_nz_c>,
718 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
719 make_tuple(&QuantFPWrapper<quantize_fp_32x32_nz_c>,
720 &QuantFPWrapper<quantize_fp_32x32_nz_c>, VPX_BITS_8, 32,
721 true),
722 make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
723 &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
724 true)));
725 } // namespace
726