xref: /aosp_15_r20/external/libvpx/test/vp9_quantize_test.cc (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1 /*
2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <tuple>
16 
17 #include "gtest/gtest.h"
18 
19 #include "./vp9_rtcd.h"
20 #include "./vpx_config.h"
21 #include "./vpx_dsp_rtcd.h"
22 #include "test/acm_random.h"
23 #include "test/bench.h"
24 #include "test/buffer.h"
25 #include "test/clear_system_state.h"
26 #include "test/register_state_check.h"
27 #include "test/util.h"
28 #include "vp9/common/vp9_entropy.h"
29 #include "vp9/common/vp9_scan.h"
30 #include "vp9/encoder/vp9_block.h"
31 #include "vpx/vpx_codec.h"
32 #include "vpx/vpx_integer.h"
33 #include "vpx_ports/vpx_timer.h"
34 
35 using libvpx_test::ACMRandom;
36 using libvpx_test::Buffer;
37 
38 namespace {
39 const int number_of_iterations = 100;
40 
41 typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
42                              const macroblock_plane *mb_plane,
43                              tran_low_t *qcoeff, tran_low_t *dqcoeff,
44                              const int16_t *dequant, uint16_t *eob,
45                              const struct ScanOrder *const scan_order);
46 typedef std::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
47                    int /*max_size*/, bool /*is_fp*/>
48     QuantizeParam;
49 
50 // Wrapper for 32x32 version which does not use count
51 typedef void (*Quantize32x32Func)(const tran_low_t *coeff,
52                                   const macroblock_plane *const mb_plane,
53                                   tran_low_t *qcoeff, tran_low_t *dqcoeff,
54                                   const int16_t *dequant, uint16_t *eob,
55                                   const struct ScanOrder *const scan_order);
56 
57 template <Quantize32x32Func fn>
Quant32x32Wrapper(const tran_low_t * coeff,intptr_t count,const macroblock_plane * const mb_plane,tran_low_t * qcoeff,tran_low_t * dqcoeff,const int16_t * dequant,uint16_t * eob,const struct ScanOrder * const scan_order)58 void Quant32x32Wrapper(const tran_low_t *coeff, intptr_t count,
59                        const macroblock_plane *const mb_plane,
60                        tran_low_t *qcoeff, tran_low_t *dqcoeff,
61                        const int16_t *dequant, uint16_t *eob,
62                        const struct ScanOrder *const scan_order) {
63   (void)count;
64   fn(coeff, mb_plane, qcoeff, dqcoeff, dequant, eob, scan_order);
65 }
66 
67 // Wrapper for FP version which does not use zbin or quant_shift.
68 typedef void (*QuantizeFPFunc)(const tran_low_t *coeff, intptr_t count,
69                                const macroblock_plane *const mb_plane,
70                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
71                                const int16_t *dequant, uint16_t *eob,
72                                const struct ScanOrder *const scan_order);
73 
74 template <QuantizeFPFunc fn>
QuantFPWrapper(const tran_low_t * coeff,intptr_t count,const macroblock_plane * const mb_plane,tran_low_t * qcoeff,tran_low_t * dqcoeff,const int16_t * dequant,uint16_t * eob,const struct ScanOrder * const scan_order)75 void QuantFPWrapper(const tran_low_t *coeff, intptr_t count,
76                     const macroblock_plane *const mb_plane, tran_low_t *qcoeff,
77                     tran_low_t *dqcoeff, const int16_t *dequant, uint16_t *eob,
78                     const struct ScanOrder *const scan_order) {
79   fn(coeff, count, mb_plane, qcoeff, dqcoeff, dequant, eob, scan_order);
80 }
81 
GenerateHelperArrays(ACMRandom * rnd,int16_t * zbin,int16_t * round,int16_t * quant,int16_t * quant_shift,int16_t * dequant,int16_t * round_fp,int16_t * quant_fp)82 void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
83                           int16_t *quant, int16_t *quant_shift,
84                           int16_t *dequant, int16_t *round_fp,
85                           int16_t *quant_fp) {
86   // Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
87   constexpr int kMaxQRoundingFactorFp = 64;
88 
89   for (int j = 0; j < 2; j++) {
90     // The range is 4 to 1828 in the VP9 tables.
91     const int qlookup = rnd->RandRange(1825) + 4;
92     round_fp[j] = (kMaxQRoundingFactorFp * qlookup) >> 7;
93     quant_fp[j] = (1 << 16) / qlookup;
94 
95     // Values determined by deconstructing vp9_init_quantizer().
96     // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
97     // values or U/V values of any bit depth. This is because y_delta is not
98     // factored into the vp9_ac_quant() call.
99     zbin[j] = rnd->RandRange(1200);
100 
101     // round may be up to 685 for Y values or 914 for U/V.
102     round[j] = rnd->RandRange(914);
103     // quant ranges from 1 to -32703
104     quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703;
105     // quant_shift goes up to 1 << 16.
106     quant_shift[j] = rnd->RandRange(16384);
107     // dequant maxes out at 1828 for all cases.
108     dequant[j] = rnd->RandRange(1828);
109   }
110   for (int j = 2; j < 8; j++) {
111     zbin[j] = zbin[1];
112     round_fp[j] = round_fp[1];
113     quant_fp[j] = quant_fp[1];
114     round[j] = round[1];
115     quant[j] = quant[1];
116     quant_shift[j] = quant_shift[1];
117     dequant[j] = dequant[1];
118   }
119 }
120 
121 class VP9QuantizeBase : public AbstractBench {
122  public:
VP9QuantizeBase(vpx_bit_depth_t bit_depth,int max_size,bool is_fp)123   VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
124       : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp),
125         coeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 16)),
126         qcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)),
127         dqcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)) {
128     // TODO(jianj): SSSE3 and AVX2 tests fail on extreme values.
129 #if HAVE_NEON
130     max_value_ = (1 << (7 + bit_depth_)) - 1;
131 #else
132     max_value_ = (1 << bit_depth_) - 1;
133 #endif
134 
135     mb_plane_ = reinterpret_cast<macroblock_plane *>(
136         vpx_memalign(16, sizeof(macroblock_plane)));
137 
138     zbin_ptr_ = mb_plane_->zbin =
139         reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
140     round_fp_ptr_ = mb_plane_->round_fp = reinterpret_cast<int16_t *>(
141         vpx_memalign(16, 8 * sizeof(*round_fp_ptr_)));
142     quant_fp_ptr_ = mb_plane_->quant_fp = reinterpret_cast<int16_t *>(
143         vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_)));
144     round_ptr_ = mb_plane_->round =
145         reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*round_ptr_)));
146     quant_ptr_ = mb_plane_->quant =
147         reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*quant_ptr_)));
148     quant_shift_ptr_ = mb_plane_->quant_shift = reinterpret_cast<int16_t *>(
149         vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_)));
150     dequant_ptr_ = reinterpret_cast<int16_t *>(
151         vpx_memalign(16, 8 * sizeof(*dequant_ptr_)));
152 
153     r_ptr_ = (is_fp_) ? round_fp_ptr_ : round_ptr_;
154     q_ptr_ = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
155   }
156 
~VP9QuantizeBase()157   ~VP9QuantizeBase() override {
158     vpx_free(mb_plane_);
159     vpx_free(zbin_ptr_);
160     vpx_free(round_fp_ptr_);
161     vpx_free(quant_fp_ptr_);
162     vpx_free(round_ptr_);
163     vpx_free(quant_ptr_);
164     vpx_free(quant_shift_ptr_);
165     vpx_free(dequant_ptr_);
166     mb_plane_ = nullptr;
167     zbin_ptr_ = nullptr;
168     round_fp_ptr_ = nullptr;
169     quant_fp_ptr_ = nullptr;
170     round_ptr_ = nullptr;
171     quant_ptr_ = nullptr;
172     quant_shift_ptr_ = nullptr;
173     dequant_ptr_ = nullptr;
174     libvpx_test::ClearSystemState();
175   }
176 
177  protected:
178   macroblock_plane *mb_plane_;
179   int16_t *zbin_ptr_;
180   int16_t *quant_fp_ptr_;
181   int16_t *round_fp_ptr_;
182   int16_t *round_ptr_;
183   int16_t *quant_ptr_;
184   int16_t *quant_shift_ptr_;
185   int16_t *dequant_ptr_;
186   const vpx_bit_depth_t bit_depth_;
187   int max_value_;
188   const int max_size_;
189   const bool is_fp_;
190   Buffer<tran_low_t> coeff_;
191   Buffer<tran_low_t> qcoeff_;
192   Buffer<tran_low_t> dqcoeff_;
193   int16_t *r_ptr_;
194   int16_t *q_ptr_;
195   int count_;
196   const ScanOrder *scan_;
197   uint16_t eob_;
198 };
199 
200 class VP9QuantizeTest : public VP9QuantizeBase,
201                         public ::testing::TestWithParam<QuantizeParam> {
202  public:
VP9QuantizeTest()203   VP9QuantizeTest()
204       : VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)),
205         quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
206 
207  protected:
208   void Run() override;
209   void Speed(bool is_median);
210   const QuantizeFunc quantize_op_;
211   const QuantizeFunc ref_quantize_op_;
212 };
213 
Run()214 void VP9QuantizeTest::Run() {
215   quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_, qcoeff_.TopLeftPixel(),
216                dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_);
217 }
218 
Speed(bool is_median)219 void VP9QuantizeTest::Speed(bool is_median) {
220   ACMRandom rnd(ACMRandom::DeterministicSeed());
221   ASSERT_TRUE(coeff_.Init());
222   ASSERT_TRUE(qcoeff_.Init());
223   ASSERT_TRUE(dqcoeff_.Init());
224   TX_SIZE starting_sz, ending_sz;
225 
226   if (max_size_ == 16) {
227     starting_sz = TX_4X4;
228     ending_sz = TX_16X16;
229   } else {
230     starting_sz = TX_32X32;
231     ending_sz = TX_32X32;
232   }
233 
234   for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
235     // zbin > coeff, zbin < coeff.
236     for (int i = 0; i < 2; ++i) {
237       // TX_TYPE defines the scan order. That is not relevant to the speed test.
238       // Pick the first one.
239       const TX_TYPE tx_type = DCT_DCT;
240       count_ = (4 << sz) * (4 << sz);
241       scan_ = &vp9_scan_orders[sz][tx_type];
242 
243       GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
244                            quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
245                            quant_fp_ptr_);
246 
247       if (i == 0) {
248         // When |coeff values| are less than zbin the results are 0.
249         int threshold = 100;
250         if (max_size_ == 32) {
251           // For 32x32, the threshold is halved. Double it to keep the values
252           // from clearing it.
253           threshold = 200;
254         }
255         for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold;
256         coeff_.Set(&rnd, -99, 99);
257       } else if (i == 1) {
258         for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50;
259         coeff_.Set(&rnd, -500, 500);
260       }
261 
262       const char *type =
263           (i == 0) ? "Bypass calculations " : "Full calculations ";
264       char block_size[16];
265       snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz);
266       char title[100];
267       snprintf(title, sizeof(title), "%25s %8s ", type, block_size);
268 
269       if (is_median) {
270         RunNTimes(10000000 / count_);
271         PrintMedian(title);
272       } else {
273         Buffer<tran_low_t> ref_qcoeff =
274             Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
275         ASSERT_TRUE(ref_qcoeff.Init());
276         Buffer<tran_low_t> ref_dqcoeff =
277             Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
278         ASSERT_TRUE(ref_dqcoeff.Init());
279         uint16_t ref_eob = 0;
280 
281         const int kNumTests = 5000000;
282         vpx_usec_timer timer, simd_timer;
283 
284         vpx_usec_timer_start(&timer);
285         for (int n = 0; n < kNumTests; ++n) {
286           ref_quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_,
287                            ref_qcoeff.TopLeftPixel(),
288                            ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
289                            scan_);
290         }
291         vpx_usec_timer_mark(&timer);
292 
293         vpx_usec_timer_start(&simd_timer);
294         for (int n = 0; n < kNumTests; ++n) {
295           quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_,
296                        qcoeff_.TopLeftPixel(), dqcoeff_.TopLeftPixel(),
297                        dequant_ptr_, &eob_, scan_);
298         }
299         vpx_usec_timer_mark(&simd_timer);
300 
301         const int elapsed_time =
302             static_cast<int>(vpx_usec_timer_elapsed(&timer));
303         const int simd_elapsed_time =
304             static_cast<int>(vpx_usec_timer_elapsed(&simd_timer));
305         printf("%s c_time = %d \t simd_time = %d \t Gain = %f \n", title,
306                elapsed_time, simd_elapsed_time,
307                ((float)elapsed_time / simd_elapsed_time));
308       }
309     }
310   }
311 }
312 
313 // This quantizer compares the AC coefficients to the quantization step size to
314 // determine if further multiplication operations are needed.
315 // Based on vp9_quantize_fp_sse2().
quant_fp_nz(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const struct macroblock_plane * const mb_plane,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t * dequant_ptr,uint16_t * eob_ptr,const struct ScanOrder * const scan_order,int is_32x32)316 inline void quant_fp_nz(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
317                         const struct macroblock_plane *const mb_plane,
318                         tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
319                         const int16_t *dequant_ptr, uint16_t *eob_ptr,
320                         const struct ScanOrder *const scan_order,
321                         int is_32x32) {
322   int i, eob = -1;
323   const int thr = dequant_ptr[1] >> (1 + is_32x32);
324   const int16_t *round_ptr = mb_plane->round_fp;
325   const int16_t *quant_ptr = mb_plane->quant_fp;
326   const int16_t *scan = scan_order->scan;
327 
328   // Quantization pass: All coefficients with index >= zero_flag are
329   // skippable. Note: zero_flag can be zero.
330   for (i = 0; i < n_coeffs; i += 16) {
331     int y;
332     int nzflag_cnt = 0;
333     int abs_coeff[16];
334     int coeff_sign[16];
335 
336     // count nzflag for each row (16 tran_low_t)
337     for (y = 0; y < 16; ++y) {
338       const int rc = i + y;
339       const int coeff = coeff_ptr[rc];
340       coeff_sign[y] = (coeff >> 31);
341       abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y];
342       // The first 16 are skipped in the sse2 code.  Do the same here to match.
343       if (i >= 16 && (abs_coeff[y] <= thr)) {
344         nzflag_cnt++;
345       }
346     }
347 
348     for (y = 0; y < 16; ++y) {
349       const int rc = i + y;
350       // If all of the AC coeffs in a row has magnitude less than the
351       // quantization step_size/2, quantize to zero.
352       if (nzflag_cnt < 16) {
353         int tmp;
354         int _round;
355 
356         if (is_32x32) {
357           _round = ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
358         } else {
359           _round = round_ptr[rc != 0];
360         }
361         tmp = clamp(abs_coeff[y] + _round, INT16_MIN, INT16_MAX);
362         tmp = (tmp * quant_ptr[rc != 0]) >> (16 - is_32x32);
363         qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y];
364         dqcoeff_ptr[rc] =
365             static_cast<tran_low_t>(qcoeff_ptr[rc] * dequant_ptr[rc != 0]);
366 
367         if (is_32x32) {
368           dqcoeff_ptr[rc] = static_cast<tran_low_t>(qcoeff_ptr[rc] *
369                                                     dequant_ptr[rc != 0] / 2);
370         } else {
371           dqcoeff_ptr[rc] =
372               static_cast<tran_low_t>(qcoeff_ptr[rc] * dequant_ptr[rc != 0]);
373         }
374       } else {
375         qcoeff_ptr[rc] = 0;
376         dqcoeff_ptr[rc] = 0;
377       }
378     }
379   }
380 
381   // Scan for eob.
382   for (i = 0; i < n_coeffs; i++) {
383     // Use the scan order to find the correct eob.
384     const int rc = scan[i];
385     if (qcoeff_ptr[rc]) {
386       eob = i;
387     }
388   }
389   *eob_ptr = eob + 1;
390 }
391 
quantize_fp_nz_c(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const struct macroblock_plane * mb_plane,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t * dequant_ptr,uint16_t * eob_ptr,const struct ScanOrder * const scan_order)392 void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
393                       const struct macroblock_plane *mb_plane,
394                       tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
395                       const int16_t *dequant_ptr, uint16_t *eob_ptr,
396                       const struct ScanOrder *const scan_order) {
397   quant_fp_nz(coeff_ptr, n_coeffs, mb_plane, qcoeff_ptr, dqcoeff_ptr,
398               dequant_ptr, eob_ptr, scan_order, 0);
399 }
400 
quantize_fp_32x32_nz_c(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const struct macroblock_plane * mb_plane,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t * dequant_ptr,uint16_t * eob_ptr,const struct ScanOrder * const scan_order)401 void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
402                             const struct macroblock_plane *mb_plane,
403                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
404                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
405                             const struct ScanOrder *const scan_order) {
406   quant_fp_nz(coeff_ptr, n_coeffs, mb_plane, qcoeff_ptr, dqcoeff_ptr,
407               dequant_ptr, eob_ptr, scan_order, 1);
408 }
409 
TEST_P(VP9QuantizeTest,OperationCheck)410 TEST_P(VP9QuantizeTest, OperationCheck) {
411   ACMRandom rnd(ACMRandom::DeterministicSeed());
412   ASSERT_TRUE(coeff_.Init());
413   ASSERT_TRUE(qcoeff_.Init());
414   ASSERT_TRUE(dqcoeff_.Init());
415   Buffer<tran_low_t> ref_qcoeff =
416       Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
417   ASSERT_TRUE(ref_qcoeff.Init());
418   Buffer<tran_low_t> ref_dqcoeff =
419       Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
420   ASSERT_TRUE(ref_dqcoeff.Init());
421   uint16_t ref_eob = 0;
422   eob_ = 0;
423 
424   for (int i = 0; i < number_of_iterations; ++i) {
425     TX_SIZE sz;
426     if (max_size_ == 16) {
427       sz = static_cast<TX_SIZE>(i % 3);  // TX_4X4, TX_8X8 TX_16X16
428     } else {
429       sz = TX_32X32;
430     }
431     const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
432     scan_ = &vp9_scan_orders[sz][tx_type];
433     count_ = (4 << sz) * (4 << sz);
434     coeff_.Set(&rnd, -max_value_, max_value_);
435     GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
436                          quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
437                          quant_fp_ptr_);
438     ref_quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_,
439                      ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
440                      dequant_ptr_, &ref_eob, scan_);
441 
442     ASM_REGISTER_STATE_CHECK(quantize_op_(
443         coeff_.TopLeftPixel(), count_, mb_plane_, qcoeff_.TopLeftPixel(),
444         dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_));
445 
446     EXPECT_TRUE(qcoeff_.CheckValues(ref_qcoeff));
447     EXPECT_TRUE(dqcoeff_.CheckValues(ref_dqcoeff));
448 
449     EXPECT_EQ(eob_, ref_eob);
450 
451     if (HasFailure()) {
452       printf("Failure on iteration %d.\n", i);
453       qcoeff_.PrintDifference(ref_qcoeff);
454       dqcoeff_.PrintDifference(ref_dqcoeff);
455       return;
456     }
457   }
458 }
459 
TEST_P(VP9QuantizeTest,EOBCheck)460 TEST_P(VP9QuantizeTest, EOBCheck) {
461   ACMRandom rnd(ACMRandom::DeterministicSeed());
462   ASSERT_TRUE(coeff_.Init());
463   ASSERT_TRUE(qcoeff_.Init());
464   ASSERT_TRUE(dqcoeff_.Init());
465   Buffer<tran_low_t> ref_qcoeff =
466       Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
467   ASSERT_TRUE(ref_qcoeff.Init());
468   Buffer<tran_low_t> ref_dqcoeff =
469       Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
470   ASSERT_TRUE(ref_dqcoeff.Init());
471   uint16_t ref_eob = 0;
472   eob_ = 0;
473   const uint32_t max_index = max_size_ * max_size_ - 1;
474 
475   for (int i = 0; i < number_of_iterations; ++i) {
476     TX_SIZE sz;
477     if (max_size_ == 16) {
478       sz = static_cast<TX_SIZE>(i % 3);  // TX_4X4, TX_8X8 TX_16X16
479     } else {
480       sz = TX_32X32;
481     }
482     const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
483     scan_ = &vp9_scan_orders[sz][tx_type];
484     count_ = (4 << sz) * (4 << sz);
485     // Two random entries
486     coeff_.Set(0);
487     coeff_.TopLeftPixel()[rnd.RandRange(count_) & max_index] =
488         static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
489     coeff_.TopLeftPixel()[rnd.RandRange(count_) & max_index] =
490         static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
491     GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
492                          quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
493                          quant_fp_ptr_);
494     ref_quantize_op_(coeff_.TopLeftPixel(), count_, mb_plane_,
495                      ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(),
496                      dequant_ptr_, &ref_eob, scan_);
497 
498     ASM_REGISTER_STATE_CHECK(quantize_op_(
499         coeff_.TopLeftPixel(), count_, mb_plane_, qcoeff_.TopLeftPixel(),
500         dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_));
501 
502     EXPECT_TRUE(qcoeff_.CheckValues(ref_qcoeff));
503     EXPECT_TRUE(dqcoeff_.CheckValues(ref_dqcoeff));
504 
505     EXPECT_EQ(eob_, ref_eob);
506 
507     if (HasFailure()) {
508       printf("Failure on iteration %d.\n", i);
509       qcoeff_.PrintDifference(ref_qcoeff);
510       dqcoeff_.PrintDifference(ref_dqcoeff);
511       return;
512     }
513   }
514 }
515 
TEST_P(VP9QuantizeTest,DISABLED_Speed)516 TEST_P(VP9QuantizeTest, DISABLED_Speed) { Speed(false); }
517 
TEST_P(VP9QuantizeTest,DISABLED_SpeedMedian)518 TEST_P(VP9QuantizeTest, DISABLED_SpeedMedian) { Speed(true); }
519 
520 using std::make_tuple;
521 
522 #if HAVE_SSE2
523 #if CONFIG_VP9_HIGHBITDEPTH
524 INSTANTIATE_TEST_SUITE_P(
525     SSE2, VP9QuantizeTest,
526     ::testing::Values(
527         make_tuple(vpx_quantize_b_sse2, vpx_quantize_b_c, VPX_BITS_8, 16,
528                    false),
529         make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
530                    &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
531         make_tuple(vpx_highbd_quantize_b_sse2, vpx_highbd_quantize_b_c,
532                    VPX_BITS_8, 16, false),
533         make_tuple(vpx_highbd_quantize_b_sse2, vpx_highbd_quantize_b_c,
534                    VPX_BITS_10, 16, false),
535         make_tuple(vpx_highbd_quantize_b_sse2, vpx_highbd_quantize_b_c,
536                    VPX_BITS_12, 16, false),
537         make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
538                    &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
539                    VPX_BITS_8, 32, false),
540         make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
541                    &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
542                    VPX_BITS_10, 32, false),
543         make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_sse2>,
544                    &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
545                    VPX_BITS_12, 32, false)));
546 
547 #else
548 INSTANTIATE_TEST_SUITE_P(
549     SSE2, VP9QuantizeTest,
550     ::testing::Values(make_tuple(vpx_quantize_b_sse2, vpx_quantize_b_c,
551                                  VPX_BITS_8, 16, false),
552                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
553                                  &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
554                                  16, true)));
555 #endif  // CONFIG_VP9_HIGHBITDEPTH
556 #endif  // HAVE_SSE2
557 
558 #if HAVE_SSSE3
559 INSTANTIATE_TEST_SUITE_P(
560     SSSE3, VP9QuantizeTest,
561     ::testing::Values(make_tuple(vpx_quantize_b_ssse3, vpx_quantize_b_c,
562                                  VPX_BITS_8, 16, false),
563                       make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_ssse3>,
564                                  &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
565                                  VPX_BITS_8, 32, false),
566                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
567                                  &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
568                                  16, true),
569                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
570                                  &QuantFPWrapper<quantize_fp_32x32_nz_c>,
571                                  VPX_BITS_8, 32, true)));
572 #endif  // HAVE_SSSE3
573 
574 #if HAVE_AVX
575 INSTANTIATE_TEST_SUITE_P(
576     AVX, VP9QuantizeTest,
577     ::testing::Values(make_tuple(vpx_quantize_b_avx, vpx_quantize_b_c,
578                                  VPX_BITS_8, 16, false),
579                       make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_avx>,
580                                  &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
581                                  VPX_BITS_8, 32, false)));
582 #endif  // HAVE_AVX
583 
584 #if VPX_ARCH_X86_64 && HAVE_AVX2
585 #if CONFIG_VP9_HIGHBITDEPTH
586 INSTANTIATE_TEST_SUITE_P(
587     AVX2, VP9QuantizeTest,
588     ::testing::Values(
589         make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
590                    &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
591         make_tuple(&QuantFPWrapper<vp9_highbd_quantize_fp_avx2>,
592                    &QuantFPWrapper<vp9_highbd_quantize_fp_c>, VPX_BITS_12, 16,
593                    true),
594         make_tuple(&QuantFPWrapper<vp9_highbd_quantize_fp_32x32_avx2>,
595                    &QuantFPWrapper<vp9_highbd_quantize_fp_32x32_c>, VPX_BITS_12,
596                    32, true),
597         make_tuple(vpx_quantize_b_avx2, vpx_quantize_b_c, VPX_BITS_8, 16,
598                    false),
599         make_tuple(vpx_highbd_quantize_b_avx2, vpx_highbd_quantize_b_c,
600                    VPX_BITS_8, 16, false),
601         make_tuple(vpx_highbd_quantize_b_avx2, vpx_highbd_quantize_b_c,
602                    VPX_BITS_10, 16, false),
603         make_tuple(vpx_highbd_quantize_b_avx2, vpx_highbd_quantize_b_c,
604                    VPX_BITS_12, 16, false),
605         make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_avx2>,
606                    &Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
607                    false),
608         make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
609                    &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
610                    VPX_BITS_8, 32, false),
611         make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
612                    &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
613                    VPX_BITS_10, 32, false),
614         make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_avx2>,
615                    &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
616                    VPX_BITS_12, 32, false)));
617 #else
618 INSTANTIATE_TEST_SUITE_P(
619     AVX2, VP9QuantizeTest,
620     ::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
621                                  &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
622                                  16, true),
623                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_avx2>,
624                                  &QuantFPWrapper<quantize_fp_32x32_nz_c>,
625                                  VPX_BITS_8, 32, true),
626                       make_tuple(vpx_quantize_b_avx2, vpx_quantize_b_c,
627                                  VPX_BITS_8, 16, false),
628                       make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_avx2>,
629                                  &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
630                                  VPX_BITS_8, 32, false)));
631 #endif  // CONFIG_VP9_HIGHBITDEPTH
632 #endif  // HAVE_AVX2
633 
634 #if HAVE_NEON
635 #if CONFIG_VP9_HIGHBITDEPTH
636 INSTANTIATE_TEST_SUITE_P(
637     NEON, VP9QuantizeTest,
638     ::testing::Values(
639         make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c, VPX_BITS_8, 16,
640                    false),
641         make_tuple(vpx_highbd_quantize_b_neon, vpx_highbd_quantize_b_c,
642                    VPX_BITS_8, 16, false),
643         make_tuple(vpx_highbd_quantize_b_neon, vpx_highbd_quantize_b_c,
644                    VPX_BITS_10, 16, false),
645         make_tuple(vpx_highbd_quantize_b_neon, vpx_highbd_quantize_b_c,
646                    VPX_BITS_12, 16, false),
647         make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_neon>,
648                    &Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
649                    false),
650         make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
651                    &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
652                    VPX_BITS_8, 32, false),
653         make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
654                    &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
655                    VPX_BITS_10, 32, false),
656         make_tuple(&Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_neon>,
657                    &Quant32x32Wrapper<vpx_highbd_quantize_b_32x32_c>,
658                    VPX_BITS_12, 32, false),
659         make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
660                    &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
661         make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
662                    &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
663                    true)));
664 #else
665 INSTANTIATE_TEST_SUITE_P(
666     NEON, VP9QuantizeTest,
667     ::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c,
668                                  VPX_BITS_8, 16, false),
669                       make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_neon>,
670                                  &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
671                                  VPX_BITS_8, 32, false),
672                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
673                                  &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
674                                  16, true),
675                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
676                                  &QuantFPWrapper<vp9_quantize_fp_32x32_c>,
677                                  VPX_BITS_8, 32, true)));
678 #endif  // CONFIG_VP9_HIGHBITDEPTH
679 #endif  // HAVE_NEON
680 
681 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
682 INSTANTIATE_TEST_SUITE_P(
683     VSX, VP9QuantizeTest,
684     ::testing::Values(make_tuple(&vpx_quantize_b_vsx, &vpx_quantize_b_c,
685                                  VPX_BITS_8, 16, false),
686                       make_tuple(&vpx_quantize_b_32x32_vsx,
687                                  &vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
688                                  false),
689                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_vsx>,
690                                  &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
691                                  16, true),
692                       make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_vsx>,
693                                  &QuantFPWrapper<vp9_quantize_fp_32x32_c>,
694                                  VPX_BITS_8, 32, true)));
695 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
696 
697 #if HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH
698 INSTANTIATE_TEST_SUITE_P(
699     LSX, VP9QuantizeTest,
700     ::testing::Values(make_tuple(&vpx_quantize_b_lsx, &vpx_quantize_b_c,
701                                  VPX_BITS_8, 16, false),
702                       make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_lsx>,
703                                  &Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
704                                  VPX_BITS_8, 32, false)));
705 #endif  // HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH
706 
707 // Only useful to compare "Speed" test results.
708 INSTANTIATE_TEST_SUITE_P(
709     DISABLED_C, VP9QuantizeTest,
710     ::testing::Values(
711         make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16, false),
712         make_tuple(&Quant32x32Wrapper<vpx_quantize_b_32x32_c>,
713                    &Quant32x32Wrapper<vpx_quantize_b_32x32_c>, VPX_BITS_8, 32,
714                    false),
715         make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
716                    &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
717         make_tuple(&QuantFPWrapper<quantize_fp_nz_c>,
718                    &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
719         make_tuple(&QuantFPWrapper<quantize_fp_32x32_nz_c>,
720                    &QuantFPWrapper<quantize_fp_32x32_nz_c>, VPX_BITS_8, 32,
721                    true),
722         make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
723                    &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
724                    true)));
725 }  // namespace
726