1 // Copyright 2021 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/warp.h"
16
17 #include <algorithm>
18 #include <cassert>
19 #include <cmath>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdio>
23 #include <cstdlib>
24 #include <ostream>
25 #include <string>
26 #include <type_traits>
27
28 #include "absl/base/macros.h"
29 #include "absl/strings/match.h"
30 #include "absl/strings/str_format.h"
31 #include "absl/strings/string_view.h"
32 #include "absl/time/clock.h"
33 #include "absl/time/time.h"
34 #include "gtest/gtest.h"
35 #include "src/dsp/constants.h"
36 #include "src/dsp/dsp.h"
37 #include "src/post_filter.h"
38 #include "src/utils/common.h"
39 #include "src/utils/constants.h"
40 #include "src/utils/cpu.h"
41 #include "src/utils/memory.h"
42 #include "tests/block_utils.h"
43 #include "tests/third_party/libvpx/acm_random.h"
44 #include "tests/utils.h"
45
46 namespace libgav1 {
47 namespace dsp {
48 namespace {
49
50 constexpr int kSourceBorderHorizontal = 16;
51 constexpr int kSourceBorderVertical = 13;
52
53 constexpr int kMaxSourceBlockWidth =
54 kMaxSuperBlockSizeInPixels + kSourceBorderHorizontal * 2;
55 constexpr int kMaxSourceBlockHeight =
56 kMaxSuperBlockSizeInPixels + kSourceBorderVertical * 2;
57 constexpr int kMaxDestBlockWidth =
58 kMaxSuperBlockSizeInPixels + kConvolveBorderLeftTop * 2;
59 constexpr int kMaxDestBlockHeight =
60 kMaxSuperBlockSizeInPixels + kConvolveBorderLeftTop * 2;
61
62 constexpr uint16_t kDivisorLookup[257] = {
63 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
64 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
65 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
66 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
67 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
68 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
69 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
70 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
71 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
72 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
73 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
74 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
75 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
76 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
77 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
78 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
79 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
80 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
81 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
82 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
83 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
84 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
85 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
86 8240, 8224, 8208, 8192};
87
88 template <bool is_compound>
GetDigest8bpp(int id)89 const char* GetDigest8bpp(int id) {
90 static const char* const kDigest[] = {
91 "77ba358a0f5e19a8e69fa0a95712578e", "141b23d13a04e0b84d26d514de76d6b0",
92 "b0265858454b979852ffadae323f0fb7", "9cf38e3579265b656f1f2100ba15b0e9",
93 "ab51d05cc255ef8e37921182df1d89b1", "e3e96f90a4b07ca733e40f057dc01c41",
94 "4eee8c1a52a62a266db9b1c9338e124c", "901a87d8f88f6324dbc0960a6de861ac",
95 "da9cb6faf6adaeeae12b6784f39186c5", "14450ab05536cdb0d2f499716ccb559d",
96 "566b396cbf008bbb869b364fdc81860d", "681a872baf2de4e58d73ea9ab8643a72",
97 "7f17d290d513a7416761b3a01f10fd2f",
98 };
99 static const char* const kCompoundDigest[] = {
100 "7e9339d265b7beac7bbe32fe7bb0fccb", "f747d663b427bb38a3ff36b0815a394c",
101 "858cf54d2253281a919fbdb48fe91c53", "4721dd97a212c6068bd488f400259afc",
102 "36878c7906492bc740112abdea77616f", "89deb68aa35764bbf3024b501a6bed50",
103 "8ac5b08f9b2afd38143c357646af0f82", "bf6e2a64835ea0c9d7467394253d0eb2",
104 "7b0a539acd2a27eff398dd084abad933", "61c8d81b397c1cf727ff8a9fabab90af",
105 "4d412349a25a832c1fb3fb29e3f0e2b3", "2c6dd2a9a4ede9fa00adb567ba646f30",
106 "b2a0ce68db3cadd207299f73112bed74",
107 };
108 assert(id >= 0);
109 assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
110 return is_compound ? kCompoundDigest[id] : kDigest[id];
111 }
112
113 #if LIBGAV1_MAX_BITDEPTH >= 10
114 template <bool is_compound>
GetDigest10bpp(int id)115 const char* GetDigest10bpp(int id) {
116 static const char* const kDigest[] = {
117 "1fef54f56a0bafccf7f8da1ac3b18b76", "8a65c72f171feafa2f393d31d6b7fe1b",
118 "808019346f2f1f45f8cf2e9fc9a49320", "c28e2f2c6c830a29bcc2452166cba521",
119 "f040674d6f54e8910d655f0d11fd8cdd", "473af9bb1c6023965c2284b716feef97",
120 "e4f6d7babd0813d5afb0f575ebfa8166", "58f96ef8a880963a213624bb0d06d47c",
121 "1ec0995fa4490628b679d03683233388", "9526fb102fde7dc1a7e160e65af6da33",
122 "f0457427d0c0e31d82ea4f612f7f86f1", "ddc82ae298cccebad493ba9de0f69fbd",
123 "5ed615091e2f62df26de7e91a985cb81",
124 };
125 static const char* const kCompoundDigest[] = {
126 "8e6986ae143260e0b8b4887f15a141a1", "0a7f0db8316b8c3569f08834dd0c6f50",
127 "90705b2e7dbe083e8a1f70f29d6f257e", "e428a75bea77d769d21f3f7a1d2b0b38",
128 "a570b13d790c085c4ab50d71dd085d56", "e5d043c6cd6ff6dbab6e38a8877e93bd",
129 "12ea96991e46e3e9aa78ab812ffa0525", "84293a94a53f1cf814fa25e793c3fe27",
130 "b98a7502c84ac8437266f702dcc0a92e", "d8db5d52e9b0a5be0ad2d517d5bd16e9",
131 "f3be504bbb609ce4cc71c5539252638a", "fcde83b54e14e9de23460644f244b047",
132 "42eb66e752e9ef289b47053b5c73fdd6",
133 };
134 assert(id >= 0);
135 assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
136 return is_compound ? kCompoundDigest[id] : kDigest[id];
137 }
138 #endif // LIBGAV1_MAX_BITDEPTH >= 10
139
140 #if LIBGAV1_MAX_BITDEPTH == 12
141 template <bool is_compound>
GetDigest12bpp(int id)142 const char* GetDigest12bpp(int id) {
143 static const char* const kDigest[] = {
144 "cd5d5e2102b8917ad70778f523d24bdf", "374a5f1b53a3fdf2eefa741eb71e6889",
145 "311636841770ec2427084891df96bee5", "c40c537917b1f0d1d84c99dfcecd8219",
146 "a1d9bb920e6c3d20c0cf84adc18e1f15", "13b5659acdb39b717526cb358c6f4026",
147 "f81ea4f6fd1f4ebed1262e3fae37b5bb", "c1452fefcd9b9562fe3a0b7f9302809c",
148 "8fed8a3159dc7b6b59a39ab2be6bee13", "b46458bc0e5cf1cee92aac4f0f608749",
149 "2e6a1039ab111add89f5b44b13565f40", "9c666691860bdc89b03f601b40126196",
150 "418a47157d992b94c302ca2e2f6ee07e",
151 };
152 static const char* const kCompoundDigest[] = {
153 "8e6986ae143260e0b8b4887f15a141a1", "0a7f0db8316b8c3569f08834dd0c6f50",
154 "90705b2e7dbe083e8a1f70f29d6f257e", "e428a75bea77d769d21f3f7a1d2b0b38",
155 "a570b13d790c085c4ab50d71dd085d56", "e5d043c6cd6ff6dbab6e38a8877e93bd",
156 "12ea96991e46e3e9aa78ab812ffa0525", "84293a94a53f1cf814fa25e793c3fe27",
157 "b98a7502c84ac8437266f702dcc0a92e", "d8db5d52e9b0a5be0ad2d517d5bd16e9",
158 "f3be504bbb609ce4cc71c5539252638a", "fcde83b54e14e9de23460644f244b047",
159 "42eb66e752e9ef289b47053b5c73fdd6",
160 };
161 assert(id >= 0);
162 assert(id < sizeof(kDigest) / sizeof(kDigest[0]));
163 return is_compound ? kCompoundDigest[id] : kDigest[id];
164 }
165 #endif // LIBGAV1_MAX_BITDEPTH == 12
166
RandomWarpedParam(int seed_offset,int bits)167 int RandomWarpedParam(int seed_offset, int bits) {
168 libvpx_test::ACMRandom rnd(seed_offset +
169 libvpx_test::ACMRandom::DeterministicSeed());
170 // 1 in 8 chance of generating zero (arbitrary).
171 const bool zero = (rnd.Rand16() & 7) == 0;
172 if (zero) return 0;
173 // Generate uniform values in the range [-(1 << bits), 1] U [1, 1 <<
174 // bits].
175 const int mask = (1 << bits) - 1;
176 const int value = 1 + (rnd.RandRange(1u << 31) & mask);
177 const bool sign = (rnd.Rand16() & 1) != 0;
178 return sign ? value : -value;
179 }
180
181 // This function is a copy from warp_prediction.cc.
182 template <typename T>
GenerateApproximateDivisor(T value,int16_t * division_factor,int16_t * division_shift)183 void GenerateApproximateDivisor(T value, int16_t* division_factor,
184 int16_t* division_shift) {
185 const int n = FloorLog2(std::abs(value));
186 const T e = std::abs(value) - (static_cast<T>(1) << n);
187 const int entry = (n > kDivisorLookupBits)
188 ? RightShiftWithRounding(e, n - kDivisorLookupBits)
189 : static_cast<int>(e << (kDivisorLookupBits - n));
190 *division_shift = n + kDivisorLookupPrecisionBits;
191 *division_factor =
192 (value < 0) ? -kDivisorLookup[entry] : kDivisorLookup[entry];
193 }
194
195 // This function is a copy from warp_prediction.cc.
GetShearParameter(int value)196 int16_t GetShearParameter(int value) {
197 return static_cast<int16_t>(
198 LeftShift(RightShiftWithRoundingSigned(value, kWarpParamRoundingBits),
199 kWarpParamRoundingBits));
200 }
201
202 // This function is a copy from warp_prediction.cc.
203 // This function is used here to help generate valid warp parameters.
SetupShear(const int * params,int16_t * alpha,int16_t * beta,int16_t * gamma,int16_t * delta)204 bool SetupShear(const int* params, int16_t* alpha, int16_t* beta,
205 int16_t* gamma, int16_t* delta) {
206 int16_t division_shift;
207 int16_t division_factor;
208 GenerateApproximateDivisor<int32_t>(params[2], &division_factor,
209 &division_shift);
210 const int alpha0 =
211 Clip3(params[2] - (1 << kWarpedModelPrecisionBits), INT16_MIN, INT16_MAX);
212 const int beta0 = Clip3(params[3], INT16_MIN, INT16_MAX);
213 const int64_t v = LeftShift(params[4], kWarpedModelPrecisionBits);
214 const int gamma0 =
215 Clip3(RightShiftWithRoundingSigned(v * division_factor, division_shift),
216 INT16_MIN, INT16_MAX);
217 const int64_t w = static_cast<int64_t>(params[3]) * params[4];
218 const int delta0 = Clip3(
219 params[5] -
220 RightShiftWithRoundingSigned(w * division_factor, division_shift) -
221 (1 << kWarpedModelPrecisionBits),
222 INT16_MIN, INT16_MAX);
223
224 *alpha = GetShearParameter(alpha0);
225 *beta = GetShearParameter(beta0);
226 *gamma = GetShearParameter(gamma0);
227 *delta = GetShearParameter(delta0);
228 if ((4 * std::abs(*alpha) + 7 * std::abs(*beta) >=
229 (1 << kWarpedModelPrecisionBits)) ||
230 (4 * std::abs(*gamma) + 4 * std::abs(*delta) >=
231 (1 << kWarpedModelPrecisionBits))) {
232 return false; // NOLINT (easier condition to understand).
233 }
234
235 return true;
236 }
237
GenerateWarpedModel(int * params,int16_t * alpha,int16_t * beta,int16_t * gamma,int16_t * delta,int seed)238 void GenerateWarpedModel(int* params, int16_t* alpha, int16_t* beta,
239 int16_t* gamma, int16_t* delta, int seed) {
240 do {
241 params[0] = RandomWarpedParam(seed, kWarpedModelPrecisionBits + 6);
242 params[1] = RandomWarpedParam(seed, kWarpedModelPrecisionBits + 6);
243 params[2] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3) +
244 (1 << kWarpedModelPrecisionBits);
245 params[3] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3);
246 params[4] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3);
247 params[5] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3) +
248 (1 << kWarpedModelPrecisionBits);
249 ++seed;
250 } while (params[2] == 0 || !SetupShear(params, alpha, beta, gamma, delta));
251 }
252
253 struct WarpTestParam {
WarpTestParamlibgav1::dsp::__anon3602b0d60111::WarpTestParam254 WarpTestParam(int width, int height) : width(width), height(height) {}
255 int width;
256 int height;
257 };
258
259 template <bool is_compound, int bitdepth, typename Pixel>
260 class WarpTest : public testing::TestWithParam<WarpTestParam> {
261 public:
262 static_assert(bitdepth >= kBitdepth8 && bitdepth <= LIBGAV1_MAX_BITDEPTH, "");
263 WarpTest() = default;
264 ~WarpTest() override = default;
265
SetUp()266 void SetUp() override {
267 test_utils::ResetDspTable(bitdepth);
268 WarpInit_C();
269 const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth);
270 ASSERT_NE(dsp, nullptr);
271 const testing::TestInfo* const test_info =
272 testing::UnitTest::GetInstance()->current_test_info();
273 const absl::string_view test_case = test_info->test_suite_name();
274 if (absl::StartsWith(test_case, "C/")) {
275 } else if (absl::StartsWith(test_case, "NEON/")) {
276 WarpInit_NEON();
277 } else if (absl::StartsWith(test_case, "SSE41/")) {
278 if ((GetCpuInfo() & kSSE4_1) == 0) GTEST_SKIP() << "No SSE4.1 support!";
279 WarpInit_SSE4_1();
280 } else {
281 FAIL() << "Unrecognized architecture prefix in test case name: "
282 << test_case;
283 }
284 func_ = is_compound ? dsp->warp_compound : dsp->warp;
285 }
286
287 protected:
288 using DestType =
289 typename std::conditional<is_compound, uint16_t, Pixel>::type;
290
291 void SetInputData(bool use_fixed_values, int value);
292 void Test(bool use_fixed_values, int value, int num_runs = 1);
293 void TestFixedValues();
294 void TestRandomValues();
295 void TestSpeed();
296
297 const WarpTestParam param_ = GetParam();
298
299 private:
300 int warp_params_[8];
301 dsp::WarpFunc func_;
302 // Warp filters are 7-tap, which needs 3 pixels (kConvolveBorderLeftTop)
303 // padding. Destination buffer indices are based on subsampling values (x+y):
304 // 0: (4:4:4), 1:(4:2:2), 2: (4:2:0).
305 Pixel source_[kMaxSourceBlockHeight * kMaxSourceBlockWidth] = {};
306 DestType dest_[3][kMaxDestBlockHeight * kMaxDestBlockWidth] = {};
307 };
308
309 template <bool is_compound, int bitdepth, typename Pixel>
SetInputData(bool use_fixed_values,int value)310 void WarpTest<is_compound, bitdepth, Pixel>::SetInputData(bool use_fixed_values,
311 int value) {
312 if (use_fixed_values) {
313 for (int y = 0; y < param_.height; ++y) {
314 const int row = kSourceBorderVertical + y;
315 Memset(source_ + row * kMaxSourceBlockWidth + kSourceBorderHorizontal,
316 value, param_.width);
317 }
318 } else {
319 const int mask = (1 << bitdepth) - 1;
320 libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
321 for (int y = 0; y < param_.height; ++y) {
322 const int row = kSourceBorderVertical + y;
323 for (int x = 0; x < param_.width; ++x) {
324 const int column = kSourceBorderHorizontal + x;
325 source_[row * kMaxSourceBlockWidth + column] = rnd.Rand16() & mask;
326 }
327 }
328 }
329 PostFilter::ExtendFrame<Pixel>(
330 &source_[kSourceBorderVertical * kMaxSourceBlockWidth +
331 kSourceBorderHorizontal],
332 param_.width, param_.height, kMaxSourceBlockWidth,
333 kSourceBorderHorizontal, kSourceBorderHorizontal, kSourceBorderVertical,
334 kSourceBorderVertical);
335 }
336
337 template <bool is_compound, int bitdepth, typename Pixel>
Test(bool use_fixed_values,int value,int num_runs)338 void WarpTest<is_compound, bitdepth, Pixel>::Test(bool use_fixed_values,
339 int value,
340 int num_runs /*= 1*/) {
341 if (func_ == nullptr) return;
342 SetInputData(use_fixed_values, value);
343 libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
344 const int source_offset =
345 kSourceBorderVertical * kMaxSourceBlockWidth + kSourceBorderHorizontal;
346 const int dest_offset =
347 kConvolveBorderLeftTop * kMaxDestBlockWidth + kConvolveBorderLeftTop;
348 const Pixel* const src = source_ + source_offset;
349 const ptrdiff_t src_stride = kMaxSourceBlockWidth * sizeof(Pixel);
350 const ptrdiff_t dst_stride =
351 is_compound ? kMaxDestBlockWidth : kMaxDestBlockWidth * sizeof(Pixel);
352
353 absl::Duration elapsed_time;
354 for (int subsampling_x = 0; subsampling_x <= 1; ++subsampling_x) {
355 for (int subsampling_y = 0; subsampling_y <= 1; ++subsampling_y) {
356 if (subsampling_x == 0 && subsampling_y == 1) {
357 // When both are 0: 4:4:4
358 // When both are 1: 4:2:0
359 // When only |subsampling_x| is 1: 4:2:2
360 // Having only |subsampling_y| == 1 is unsupported.
361 continue;
362 }
363 int params[8];
364 int16_t alpha;
365 int16_t beta;
366 int16_t gamma;
367 int16_t delta;
368 GenerateWarpedModel(params, &alpha, &beta, &gamma, &delta, rnd.Rand8());
369
370 const int dest_id = subsampling_x + subsampling_y;
371 DestType* const dst = dest_[dest_id] + dest_offset;
372 const absl::Time start = absl::Now();
373 for (int n = 0; n < num_runs; ++n) {
374 func_(src, src_stride, param_.width, param_.height, params,
375 subsampling_x, subsampling_y, 0, 0, param_.width, param_.height,
376 alpha, beta, gamma, delta, dst, dst_stride);
377 }
378 elapsed_time += absl::Now() - start;
379 }
380 }
381
382 if (use_fixed_values) {
383 // For fixed values, input and output are identical.
384 for (size_t i = 0; i < ABSL_ARRAYSIZE(dest_); ++i) {
385 // |is_compound| holds a few more bits of precision and an offset value.
386 Pixel compensated_dest[kMaxDestBlockWidth * kMaxDestBlockHeight];
387 const int compound_offset = (bitdepth == 8) ? 0 : kCompoundOffset;
388 if (is_compound) {
389 for (int y = 0; y < param_.height; ++y) {
390 for (int x = 0; x < param_.width; ++x) {
391 const int compound_value =
392 dest_[i][dest_offset + y * kMaxDestBlockWidth + x];
393 const int remove_offset = compound_value - compound_offset;
394 const int full_shift =
395 remove_offset >>
396 (kInterRoundBitsVertical - kInterRoundBitsCompoundVertical);
397 compensated_dest[y * kMaxDestBlockWidth + x] =
398 Clip3(full_shift, 0, (1 << bitdepth) - 1);
399 }
400 }
401 }
402 Pixel* pixel_dest =
403 is_compound ? compensated_dest
404 : reinterpret_cast<Pixel*>(dest_[i] + dest_offset);
405 const bool success = test_utils::CompareBlocks(
406 src, pixel_dest, param_.width, param_.height, kMaxSourceBlockWidth,
407 kMaxDestBlockWidth, false);
408 EXPECT_TRUE(success) << "subsampling_x + subsampling_y: " << i;
409 }
410 } else {
411 // (width, height):
412 // (8, 8), id = 0. (8, 16), id = 1. (16, 8), id = 2.
413 // (16, 16), id = 3. (16, 32), id = 4. (32, 16), id = 5.
414 // ...
415 // (128, 128), id = 12.
416 int id;
417 if (param_.width == param_.height) {
418 id = 3 * static_cast<int>(FloorLog2(param_.width) - 3);
419 } else if (param_.width < param_.height) {
420 id = 1 + 3 * static_cast<int>(FloorLog2(param_.width) - 3);
421 } else {
422 id = 2 + 3 * static_cast<int>(FloorLog2(param_.height) - 3);
423 }
424
425 const char* expected_digest = nullptr;
426 switch (bitdepth) {
427 case 8:
428 expected_digest = GetDigest8bpp<is_compound>(id);
429 break;
430 #if LIBGAV1_MAX_BITDEPTH >= 10
431 case 10:
432 expected_digest = GetDigest10bpp<is_compound>(id);
433 break;
434 #endif
435 #if LIBGAV1_MAX_BITDEPTH == 12
436 case 12:
437 expected_digest = GetDigest12bpp<is_compound>(id);
438 break;
439 #endif
440 }
441 ASSERT_NE(expected_digest, nullptr);
442 test_utils::CheckMd5Digest(
443 "Warp", absl::StrFormat("%dx%d", param_.width, param_.height).c_str(),
444 expected_digest, dest_, sizeof(dest_), elapsed_time);
445 }
446 }
447
448 template <bool is_compound, int bitdepth, typename Pixel>
TestFixedValues()449 void WarpTest<is_compound, bitdepth, Pixel>::TestFixedValues() {
450 Test(true, 0);
451 Test(true, 1);
452 Test(true, 128);
453 Test(true, (1 << bitdepth) - 1);
454 }
455
456 template <bool is_compound, int bitdepth, typename Pixel>
TestRandomValues()457 void WarpTest<is_compound, bitdepth, Pixel>::TestRandomValues() {
458 Test(false, 0);
459 }
460
461 template <bool is_compound, int bitdepth, typename Pixel>
TestSpeed()462 void WarpTest<is_compound, bitdepth, Pixel>::TestSpeed() {
463 const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
464 Test(false, 0, num_runs);
465 }
466
ApplyFilterToSignedInput(const int min_input,const int max_input,const int8_t filter[kSubPixelTaps],int * min_output,int * max_output)467 void ApplyFilterToSignedInput(const int min_input, const int max_input,
468 const int8_t filter[kSubPixelTaps],
469 int* min_output, int* max_output) {
470 int min = 0, max = 0;
471 for (int i = 0; i < kSubPixelTaps; ++i) {
472 const int tap = filter[i];
473 if (tap > 0) {
474 max += max_input * tap;
475 min += min_input * tap;
476 } else {
477 min += max_input * tap;
478 max += min_input * tap;
479 }
480 }
481 *min_output = min;
482 *max_output = max;
483 }
484
ApplyFilterToUnsignedInput(const int max_input,const int8_t filter[kSubPixelTaps],int * min_output,int * max_output)485 void ApplyFilterToUnsignedInput(const int max_input,
486 const int8_t filter[kSubPixelTaps],
487 int* min_output, int* max_output) {
488 ApplyFilterToSignedInput(0, max_input, filter, min_output, max_output);
489 }
490
491 // Validate the maximum ranges for different parts of the Warp process.
492 template <int bitdepth>
ShowRange()493 void ShowRange() {
494 constexpr int horizontal_bits = (bitdepth == kBitdepth12)
495 ? kInterRoundBitsHorizontal12bpp
496 : kInterRoundBitsHorizontal;
497 constexpr int vertical_bits = (bitdepth == kBitdepth12)
498 ? kInterRoundBitsVertical12bpp
499 : kInterRoundBitsVertical;
500 constexpr int compound_vertical_bits = kInterRoundBitsCompoundVertical;
501
502 constexpr int compound_offset = (bitdepth == 8) ? 0 : kCompoundOffset;
503
504 constexpr int max_input = (1 << bitdepth) - 1;
505
506 const int8_t* worst_warp_filter = kWarpedFilters8[93];
507
508 // First pass.
509 printf("Bitdepth: %2d Input range: [%8d, %8d]\n", bitdepth, 0,
510 max_input);
511
512 int min = 0, max = 0;
513 ApplyFilterToUnsignedInput(max_input, worst_warp_filter, &min, &max);
514
515 int first_pass_offset;
516 if (bitdepth == 8) {
517 // Derive an offset for 8 bit.
518 for (first_pass_offset = 1; - first_pass_offset > min;
519 first_pass_offset <<= 1) {
520 }
521 printf(" 8bpp intermediate offset: %d.\n", first_pass_offset);
522 min += first_pass_offset;
523 max += first_pass_offset;
524 assert(min > 0);
525 assert(max < UINT16_MAX);
526 } else {
527 // 10bpp and 12bpp require int32_t for the intermediate values. Adding an
528 // offset is not required.
529 assert(min > INT32_MIN);
530 assert(max > INT16_MAX && max < INT32_MAX);
531 }
532
533 printf(" intermediate range: [%8d, %8d]\n", min, max);
534
535 const int first_pass_min = RightShiftWithRounding(min, horizontal_bits);
536 const int first_pass_max = RightShiftWithRounding(max, horizontal_bits);
537
538 printf(" first pass output range: [%8d, %8d]\n", first_pass_min,
539 first_pass_max);
540
541 // Second pass.
542 if (bitdepth == 8) {
543 ApplyFilterToUnsignedInput(first_pass_max, worst_warp_filter, &min, &max);
544 } else {
545 ApplyFilterToSignedInput(first_pass_min, first_pass_max, worst_warp_filter,
546 &min, &max);
547 }
548
549 if (bitdepth == 8) {
550 // Remove the offset that was applied in the first pass since we must use
551 // int32_t for this phase anyway. 128 is the sum of the filter taps.
552 const int offset_removal = (first_pass_offset >> horizontal_bits) * 128;
553 printf(" 8bpp intermediate offset removal: %d.\n", offset_removal);
554 max -= offset_removal;
555 min -= offset_removal;
556 assert(min < INT16_MIN && min > INT32_MIN);
557 assert(max > INT16_MAX && max < INT32_MAX);
558 } else {
559 // 10bpp and 12bpp require int32_t for the intermediate values. Adding an
560 // offset is not required.
561 assert(min > INT32_MIN);
562 assert(max > INT16_MAX && max < INT32_MAX);
563 }
564
565 printf(" intermediate range: [%8d, %8d]\n", min, max);
566
567 // Second pass non-compound output is clipped to Pixel values.
568 const int second_pass_min =
569 Clip3(RightShiftWithRounding(min, vertical_bits), 0, max_input);
570 const int second_pass_max =
571 Clip3(RightShiftWithRounding(max, vertical_bits), 0, max_input);
572 printf(" second pass output range: [%8d, %8d]\n", second_pass_min,
573 second_pass_max);
574
575 // Output is Pixel so matches Pixel values.
576 assert(second_pass_min == 0);
577 assert(second_pass_max == max_input);
578
579 const int compound_second_pass_min =
580 RightShiftWithRounding(min, compound_vertical_bits) + compound_offset;
581 const int compound_second_pass_max =
582 RightShiftWithRounding(max, compound_vertical_bits) + compound_offset;
583
584 printf(" compound second pass output range: [%8d, %8d]\n",
585 compound_second_pass_min, compound_second_pass_max);
586
587 if (bitdepth == 8) {
588 // 8bpp output is int16_t without an offset.
589 assert(compound_second_pass_min > INT16_MIN);
590 assert(compound_second_pass_max < INT16_MAX);
591 } else {
592 // 10bpp and 12bpp use the offset to fit inside uint16_t.
593 assert(compound_second_pass_min > 0);
594 assert(compound_second_pass_max < UINT16_MAX);
595 }
596
597 printf("\n");
598 }
599
TEST(WarpTest,ShowRange)600 TEST(WarpTest, ShowRange) {
601 ShowRange<kBitdepth8>();
602 ShowRange<kBitdepth10>();
603 ShowRange<kBitdepth12>();
604 }
605
606 using WarpTest8bpp = WarpTest</*is_compound=*/false, 8, uint8_t>;
607 // TODO(jzern): Coverage could be added for kInterRoundBitsCompoundVertical via
608 // WarpCompoundTest.
609 // using WarpCompoundTest8bpp = WarpTest</*is_compound=*/true, 8, uint8_t>;
610
611 // Verifies the sum of the warped filter coefficients is 128 for every filter.
612 //
613 // Verifies the properties used in the calculation of ranges of variables in
614 // the block warp process:
615 // * The maximum sum of the positive warped filter coefficients is 175.
616 // * The minimum (i.e., most negative) sum of the negative warped filter
617 // coefficients is -47.
618 //
619 // NOTE: This test is independent of the bitdepth and the implementation of the
620 // block warp function, so it just needs to be a test in the WarpTest8bpp class
621 // and does not need to be defined with TEST_P.
TEST(WarpTest8bpp,WarpedFilterCoefficientSums)622 TEST(WarpTest8bpp, WarpedFilterCoefficientSums) {
623 int max_positive_sum = 0;
624 int min_negative_sum = 0;
625 for (const auto& filter : kWarpedFilters) {
626 int sum = 0;
627 int positive_sum = 0;
628 int negative_sum = 0;
629 for (const auto coefficient : filter) {
630 sum += coefficient;
631 if (coefficient > 0) {
632 positive_sum += coefficient;
633 } else {
634 negative_sum += coefficient;
635 }
636 }
637 EXPECT_EQ(sum, 128);
638 max_positive_sum = std::max(positive_sum, max_positive_sum);
639 min_negative_sum = std::min(negative_sum, min_negative_sum);
640 }
641 EXPECT_EQ(max_positive_sum, 175);
642 EXPECT_EQ(min_negative_sum, -47);
643 }
644
TEST_P(WarpTest8bpp,FixedValues)645 TEST_P(WarpTest8bpp, FixedValues) { TestFixedValues(); }
646
TEST_P(WarpTest8bpp,RandomValues)647 TEST_P(WarpTest8bpp, RandomValues) { TestRandomValues(); }
648
TEST_P(WarpTest8bpp,DISABLED_Speed)649 TEST_P(WarpTest8bpp, DISABLED_Speed) { TestSpeed(); }
650 const WarpTestParam warp_test_param[] = {
651 WarpTestParam(8, 8), WarpTestParam(8, 16), WarpTestParam(16, 8),
652 WarpTestParam(16, 16), WarpTestParam(16, 32), WarpTestParam(32, 16),
653 WarpTestParam(32, 32), WarpTestParam(32, 64), WarpTestParam(64, 32),
654 WarpTestParam(64, 64), WarpTestParam(64, 128), WarpTestParam(128, 64),
655 WarpTestParam(128, 128),
656 };
657
658 INSTANTIATE_TEST_SUITE_P(C, WarpTest8bpp, testing::ValuesIn(warp_test_param));
659
660 #if LIBGAV1_ENABLE_NEON
661 INSTANTIATE_TEST_SUITE_P(NEON, WarpTest8bpp,
662 testing::ValuesIn(warp_test_param));
663 #endif
664
665 #if LIBGAV1_ENABLE_SSE4_1
666 INSTANTIATE_TEST_SUITE_P(SSE41, WarpTest8bpp,
667 testing::ValuesIn(warp_test_param));
668 #endif
669
670 #if LIBGAV1_MAX_BITDEPTH >= 10
671 using WarpTest10bpp = WarpTest</*is_compound=*/false, 10, uint16_t>;
672 // TODO(jzern): Coverage could be added for kInterRoundBitsCompoundVertical via
673 // WarpCompoundTest.
674 // using WarpCompoundTest10bpp = WarpTest</*is_compound=*/true, 10, uint16_t>;
675
TEST_P(WarpTest10bpp,FixedValues)676 TEST_P(WarpTest10bpp, FixedValues) { TestFixedValues(); }
677
TEST_P(WarpTest10bpp,RandomValues)678 TEST_P(WarpTest10bpp, RandomValues) { TestRandomValues(); }
679
TEST_P(WarpTest10bpp,DISABLED_Speed)680 TEST_P(WarpTest10bpp, DISABLED_Speed) { TestSpeed(); }
681
682 INSTANTIATE_TEST_SUITE_P(C, WarpTest10bpp, testing::ValuesIn(warp_test_param));
683
684 #if LIBGAV1_ENABLE_NEON
685 INSTANTIATE_TEST_SUITE_P(NEON, WarpTest10bpp,
686 testing::ValuesIn(warp_test_param));
687 #endif
688 #endif // LIBGAV1_MAX_BITDEPTH >= 10
689
690 #if LIBGAV1_MAX_BITDEPTH == 12
691 using WarpTest12bpp = WarpTest</*is_compound=*/false, 12, uint16_t>;
692 // TODO(jzern): Coverage could be added for kInterRoundBitsCompoundVertical via
693 // WarpCompoundTest.
694 // using WarpCompoundTest12bpp = WarpTest</*is_compound=*/true, 12, uint16_t>;
695
TEST_P(WarpTest12bpp,FixedValues)696 TEST_P(WarpTest12bpp, FixedValues) { TestFixedValues(); }
697
TEST_P(WarpTest12bpp,RandomValues)698 TEST_P(WarpTest12bpp, RandomValues) { TestRandomValues(); }
699
TEST_P(WarpTest12bpp,DISABLED_Speed)700 TEST_P(WarpTest12bpp, DISABLED_Speed) { TestSpeed(); }
701
702 INSTANTIATE_TEST_SUITE_P(C, WarpTest12bpp, testing::ValuesIn(warp_test_param));
703 #endif // LIBGAV1_MAX_BITDEPTH == 12
704
operator <<(std::ostream & os,const WarpTestParam & warp_param)705 std::ostream& operator<<(std::ostream& os, const WarpTestParam& warp_param) {
706 return os << "BlockSize" << warp_param.width << "x" << warp_param.height;
707 }
708
709 } // namespace
710 } // namespace dsp
711 } // namespace libgav1
712