1 /*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "tests/checkasm/checkasm.h"
29
30 #include <math.h>
31
32 #include "src/itx.h"
33 #include "src/levels.h"
34 #include "src/scan.h"
35 #include "src/tables.h"
36
37 #ifndef M_PI
38 #define M_PI 3.14159265358979323846
39 #endif
40 #ifndef M_SQRT1_2
41 #define M_SQRT1_2 0.707106781186547524401
42 #endif
43
44 enum Tx1D { DCT, ADST, FLIPADST, IDENTITY, WHT };
45
46 static const uint8_t itx_1d_types[N_TX_TYPES_PLUS_LL][2] = {
47 [DCT_DCT] = { DCT, DCT },
48 [ADST_DCT] = { DCT, ADST },
49 [DCT_ADST] = { ADST, DCT },
50 [ADST_ADST] = { ADST, ADST },
51 [FLIPADST_DCT] = { DCT, FLIPADST },
52 [DCT_FLIPADST] = { FLIPADST, DCT },
53 [FLIPADST_FLIPADST] = { FLIPADST, FLIPADST },
54 [ADST_FLIPADST] = { FLIPADST, ADST },
55 [FLIPADST_ADST] = { ADST, FLIPADST },
56 [IDTX] = { IDENTITY, IDENTITY },
57 [V_DCT] = { IDENTITY, DCT },
58 [H_DCT] = { DCT, IDENTITY },
59 [V_ADST] = { IDENTITY, ADST },
60 [H_ADST] = { ADST, IDENTITY },
61 [V_FLIPADST] = { IDENTITY, FLIPADST },
62 [H_FLIPADST] = { FLIPADST, IDENTITY },
63 [WHT_WHT] = { WHT, WHT },
64 };
65
66 static const char *const itx_1d_names[5] = {
67 [DCT] = "dct",
68 [ADST] = "adst",
69 [FLIPADST] = "flipadst",
70 [IDENTITY] = "identity",
71 [WHT] = "wht"
72 };
73
74 static const double scaling_factors[9] = {
75 4.0000, /* 4x4 */
76 4.0000 * M_SQRT1_2, /* 4x8 8x4 */
77 2.0000, /* 4x16 8x8 16x4 */
78 2.0000 * M_SQRT1_2, /* 8x16 16x8 */
79 1.0000, /* 8x32 16x16 32x8 */
80 0.5000 * M_SQRT1_2, /* 16x32 32x16 */
81 0.2500, /* 16x64 32x32 64x16 */
82 0.1250 * M_SQRT1_2, /* 32x64 64x32 */
83 0.0625, /* 64x64 */
84 };
85
86 /* FIXME: Ensure that those forward transforms are similar to the real AV1
87 * transforms. The FLIPADST currently uses the ADST forward transform for
88 * example which is obviously "incorrect", but we're just using it for now
89 * since it does produce coefficients in the correct range at least. */
90
91 /* DCT-II */
fdct_1d(double * const out,const double * const in,const int sz)92 static void fdct_1d(double *const out, const double *const in, const int sz) {
93 for (int i = 0; i < sz; i++) {
94 out[i] = 0.0;
95 for (int j = 0; j < sz; j++)
96 out[i] += in[j] * cos(M_PI * (2 * j + 1) * i / (sz * 2.0));
97 }
98 out[0] *= M_SQRT1_2;
99 }
100
101 /* See "Towards jointly optimal spatial prediction and adaptive transform in
102 * video/image coding", by J. Han, A. Saxena, and K. Rose
103 * IEEE Proc. ICASSP, pp. 726-729, Mar. 2010.
104 * and "A Butterfly Structured Design of The Hybrid Transform Coding Scheme",
105 * by Jingning Han, Yaowu Xu, and Debargha Mukherjee
106 * http://research.google.com/pubs/archive/41418.pdf
107 */
fadst_1d(double * const out,const double * const in,const int sz)108 static void fadst_1d(double *const out, const double *const in, const int sz) {
109 for (int i = 0; i < sz; i++) {
110 out[i] = 0.0;
111 for (int j = 0; j < sz; j++)
112 out[i] += in[j] * sin(M_PI *
113 (sz == 4 ? ( j + 1) * (2 * i + 1) / (8.0 + 1.0) :
114 (2 * j + 1) * (2 * i + 1) / (sz * 4.0)));
115 }
116 }
117
fwht4_1d(double * const out,const double * const in)118 static void fwht4_1d(double *const out, const double *const in)
119 {
120 const double t0 = in[0] + in[1];
121 const double t3 = in[3] - in[2];
122 const double t4 = (t0 - t3) * 0.5;
123 const double t1 = t4 - in[1];
124 const double t2 = t4 - in[2];
125 out[0] = t0 - t2;
126 out[1] = t2;
127 out[2] = t3 + t1;
128 out[3] = t1;
129 }
130
copy_subcoefs(coef * coeff,const enum RectTxfmSize tx,const enum TxfmType txtp,const int sw,const int sh,const int subsh,int * const max_eob)131 static int copy_subcoefs(coef *coeff,
132 const enum RectTxfmSize tx, const enum TxfmType txtp,
133 const int sw, const int sh, const int subsh,
134 int *const max_eob)
135 {
136 /* copy the topleft coefficients such that the return value (being the
137 * coefficient scantable index for the eob token) guarantees that only
138 * the topleft $sub out of $sz (where $sz >= $sub) coefficients in both
139 * dimensions are non-zero. This leads to braching to specific optimized
140 * simd versions (e.g. dc-only) so that we get full asm coverage in this
141 * test */
142
143 const enum TxClass tx_class = dav1d_tx_type_class[txtp];
144 const uint16_t *const scan = dav1d_scans[tx];
145 const int sub_high = subsh > 0 ? subsh * 8 - 1 : 0;
146 const int sub_low = subsh > 1 ? sub_high - 8 : 0;
147 int n, eob;
148
149 for (n = 0, eob = 0; n < sw * sh; n++) {
150 int rc, rcx, rcy;
151 if (tx_class == TX_CLASS_2D)
152 rc = scan[n], rcx = rc % sh, rcy = rc / sh;
153 else if (tx_class == TX_CLASS_H)
154 rcx = n % sh, rcy = n / sh, rc = n;
155 else /* tx_class == TX_CLASS_V */
156 rcx = n / sw, rcy = n % sw, rc = rcy * sh + rcx;
157
158 /* Pick a random eob within this sub-itx */
159 if (rcx > sub_high || rcy > sub_high) {
160 break; /* upper boundary */
161 } else if (!eob && (rcx > sub_low || rcy > sub_low))
162 eob = n; /* lower boundary */
163 }
164 *max_eob = n - 1;
165
166 if (eob)
167 eob += rnd() % (n - eob - 1);
168 if (tx_class == TX_CLASS_2D)
169 for (n = eob + 1; n < sw * sh; n++)
170 coeff[scan[n]] = 0;
171 else if (tx_class == TX_CLASS_H)
172 for (n = eob + 1; n < sw * sh; n++)
173 coeff[n] = 0;
174 else /* tx_class == TX_CLASS_V */ {
175 for (int rcx = eob / sw, rcy = eob % sw; rcx < sh; rcx++, rcy = -1)
176 while (++rcy < sw)
177 coeff[rcy * sh + rcx] = 0;
178 n = sw * sh;
179 }
180 for (; n < 32 * 32; n++)
181 coeff[n] = rnd();
182 return eob;
183 }
184
ftx(coef * const buf,const enum RectTxfmSize tx,const enum TxfmType txtp,const int w,const int h,const int subsh,int * const max_eob,const int bitdepth_max)185 static int ftx(coef *const buf, const enum RectTxfmSize tx,
186 const enum TxfmType txtp, const int w, const int h,
187 const int subsh, int *const max_eob, const int bitdepth_max)
188 {
189 double out[64 * 64], temp[64 * 64];
190 const double scale = scaling_factors[ctz(w * h) - 4];
191 const int sw = imin(w, 32), sh = imin(h, 32);
192
193 for (int i = 0; i < h; i++) {
194 double in[64], temp_out[64];
195
196 for (int i = 0; i < w; i++)
197 in[i] = (rnd() & (2 * bitdepth_max + 1)) - bitdepth_max;
198
199 switch (itx_1d_types[txtp][0]) {
200 case DCT:
201 fdct_1d(temp_out, in, w);
202 break;
203 case ADST:
204 case FLIPADST:
205 fadst_1d(temp_out, in, w);
206 break;
207 case WHT:
208 fwht4_1d(temp_out, in);
209 break;
210 case IDENTITY:
211 memcpy(temp_out, in, w * sizeof(*temp_out));
212 break;
213 }
214
215 for (int j = 0; j < w; j++)
216 temp[j * h + i] = temp_out[j] * scale;
217 }
218
219 for (int i = 0; i < w; i++) {
220 switch (itx_1d_types[txtp][0]) {
221 case DCT:
222 fdct_1d(&out[i * h], &temp[i * h], h);
223 break;
224 case ADST:
225 case FLIPADST:
226 fadst_1d(&out[i * h], &temp[i * h], h);
227 break;
228 case WHT:
229 fwht4_1d(&out[i * h], &temp[i * h]);
230 break;
231 case IDENTITY:
232 memcpy(&out[i * h], &temp[i * h], h * sizeof(*out));
233 break;
234 }
235 }
236
237 for (int y = 0; y < sh; y++)
238 for (int x = 0; x < sw; x++)
239 buf[y * sw + x] = (coef) (out[y * w + x] + 0.5);
240
241 return copy_subcoefs(buf, tx, txtp, sw, sh, subsh, max_eob);
242 }
243
check_itxfm_add(Dav1dInvTxfmDSPContext * const c,const enum RectTxfmSize tx)244 static void check_itxfm_add(Dav1dInvTxfmDSPContext *const c,
245 const enum RectTxfmSize tx)
246 {
247 ALIGN_STK_64(coef, coeff, 2, [32 * 32]);
248 PIXEL_RECT(c_dst, 64, 64);
249 PIXEL_RECT(a_dst, 64, 64);
250
251 static const uint8_t subsh_iters[5] = { 2, 2, 3, 5, 5 };
252
253 const int w = dav1d_txfm_dimensions[tx].w * 4;
254 const int h = dav1d_txfm_dimensions[tx].h * 4;
255 const int subsh_max = subsh_iters[imax(dav1d_txfm_dimensions[tx].lw,
256 dav1d_txfm_dimensions[tx].lh)];
257 #if BITDEPTH == 16
258 const int bpc_min = 10, bpc_max = 12;
259 #else
260 const int bpc_min = 8, bpc_max = 8;
261 #endif
262
263 declare_func(void, pixel *dst, ptrdiff_t dst_stride, coef *coeff,
264 int eob HIGHBD_DECL_SUFFIX);
265
266 for (int bpc = bpc_min; bpc <= bpc_max; bpc += 2) {
267 bitfn(dav1d_itx_dsp_init)(c, bpc);
268 for (enum TxfmType txtp = 0; txtp < N_TX_TYPES_PLUS_LL; txtp++)
269 for (int subsh = 0; subsh < subsh_max; subsh++)
270 if (check_func(c->itxfm_add[tx][txtp],
271 "inv_txfm_add_%dx%d_%s_%s_%d_%dbpc",
272 w, h, itx_1d_names[itx_1d_types[txtp][0]],
273 itx_1d_names[itx_1d_types[txtp][1]], subsh,
274 bpc))
275 {
276 const int bitdepth_max = (1 << bpc) - 1;
277 int max_eob;
278 const int eob = ftx(coeff[0], tx, txtp, w, h, subsh, &max_eob,
279 bitdepth_max);
280 memcpy(coeff[1], coeff[0], sizeof(*coeff));
281
282 CLEAR_PIXEL_RECT(c_dst);
283 CLEAR_PIXEL_RECT(a_dst);
284
285 for (int y = 0; y < h; y++)
286 for (int x = 0; x < w; x++)
287 c_dst[y*PXSTRIDE(c_dst_stride) + x] =
288 a_dst[y*PXSTRIDE(a_dst_stride) + x] = rnd() & bitdepth_max;
289
290 call_ref(c_dst, c_dst_stride, coeff[0], eob
291 HIGHBD_TAIL_SUFFIX);
292 call_new(a_dst, a_dst_stride, coeff[1], eob
293 HIGHBD_TAIL_SUFFIX);
294
295 checkasm_check_pixel_padded(c_dst, c_dst_stride,
296 a_dst, a_dst_stride,
297 w, h, "dst");
298 if (memcmp(coeff[0], coeff[1], sizeof(*coeff)))
299 fail();
300
301 bench_new(alternate(c_dst, a_dst), a_dst_stride,
302 alternate(coeff[0], coeff[1]), max_eob HIGHBD_TAIL_SUFFIX);
303 }
304 }
305 report("add_%dx%d", w, h);
306 }
307
bitfn(checkasm_check_itx)308 void bitfn(checkasm_check_itx)(void) {
309 static const uint8_t txfm_size_order[N_RECT_TX_SIZES] = {
310 TX_4X4, RTX_4X8, RTX_4X16,
311 RTX_8X4, TX_8X8, RTX_8X16, RTX_8X32,
312 RTX_16X4, RTX_16X8, TX_16X16, RTX_16X32, RTX_16X64,
313 RTX_32X8, RTX_32X16, TX_32X32, RTX_32X64,
314 RTX_64X16, RTX_64X32, TX_64X64
315 };
316
317 /* Zero unused function pointer elements. */
318 Dav1dInvTxfmDSPContext c = { { { 0 } } };
319
320 for (int i = 0; i < N_RECT_TX_SIZES; i++)
321 check_itxfm_add(&c, txfm_size_order[i]);
322 }
323