1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <stdlib.h>
13
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16
17 #include "aom/aom_integer.h"
18 #include "aom_ports/mem.h"
19 #include "aom_dsp/blend.h"
20
21 /* Sum the difference between every corresponding element of the buffers. */
sad(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride,int width,int height)22 static inline unsigned int sad(const uint8_t *a, int a_stride, const uint8_t *b,
23 int b_stride, int width, int height) {
24 int y, x;
25 unsigned int sad = 0;
26
27 for (y = 0; y < height; y++) {
28 for (x = 0; x < width; x++) {
29 sad += abs(a[x] - b[x]);
30 }
31
32 a += a_stride;
33 b += b_stride;
34 }
35 return sad;
36 }
37
38 #define SADMXN(m, n) \
39 unsigned int aom_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
40 const uint8_t *ref, int ref_stride) { \
41 return sad(src, src_stride, ref, ref_stride, m, n); \
42 } \
43 unsigned int aom_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \
44 const uint8_t *ref, int ref_stride, \
45 const uint8_t *second_pred) { \
46 uint8_t comp_pred[m * n]; \
47 aom_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
48 return sad(src, src_stride, comp_pred, m, m, n); \
49 } \
50 unsigned int aom_dist_wtd_sad##m##x##n##_avg_c( \
51 const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
52 const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param) { \
53 uint8_t comp_pred[m * n]; \
54 aom_dist_wtd_comp_avg_pred_c(comp_pred, second_pred, m, n, ref, \
55 ref_stride, jcp_param); \
56 return sad(src, src_stride, comp_pred, m, m, n); \
57 } \
58 unsigned int aom_sad_skip_##m##x##n##_c(const uint8_t *src, int src_stride, \
59 const uint8_t *ref, \
60 int ref_stride) { \
61 return 2 * sad(src, 2 * src_stride, ref, 2 * ref_stride, (m), (n / 2)); \
62 }
63
64 // Calculate sad against 4 reference locations and store each in sad_array
65 #define SAD_MXNX4D(m, n) \
66 void aom_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
67 const uint8_t *const ref_array[4], \
68 int ref_stride, uint32_t sad_array[4]) { \
69 int i; \
70 for (i = 0; i < 4; ++i) { \
71 sad_array[i] = \
72 aom_sad##m##x##n##_c(src, src_stride, ref_array[i], ref_stride); \
73 } \
74 } \
75 void aom_sad_skip_##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
76 const uint8_t *const ref_array[4], \
77 int ref_stride, uint32_t sad_array[4]) { \
78 int i; \
79 for (i = 0; i < 4; ++i) { \
80 sad_array[i] = 2 * sad(src, 2 * src_stride, ref_array[i], \
81 2 * ref_stride, (m), (n / 2)); \
82 } \
83 }
84 // Call SIMD version of aom_sad_mxnx4d if the 3d version is unavailable.
85 #define SAD_MXNX3D(m, n) \
86 void aom_sad##m##x##n##x3d_c(const uint8_t *src, int src_stride, \
87 const uint8_t *const ref_array[4], \
88 int ref_stride, uint32_t sad_array[4]) { \
89 aom_sad##m##x##n##x4d(src, src_stride, ref_array, ref_stride, sad_array); \
90 }
91
92 // 128x128
93 SADMXN(128, 128)
94 SAD_MXNX4D(128, 128)
95 SAD_MXNX3D(128, 128)
96
97 // 128x64
98 SADMXN(128, 64)
99 SAD_MXNX4D(128, 64)
100 SAD_MXNX3D(128, 64)
101
102 // 64x128
103 SADMXN(64, 128)
104 SAD_MXNX4D(64, 128)
105 SAD_MXNX3D(64, 128)
106
107 // 64x64
108 SADMXN(64, 64)
109 SAD_MXNX4D(64, 64)
110 SAD_MXNX3D(64, 64)
111
112 // 64x32
113 SADMXN(64, 32)
114 SAD_MXNX4D(64, 32)
115 SAD_MXNX3D(64, 32)
116
117 // 32x64
118 SADMXN(32, 64)
119 SAD_MXNX4D(32, 64)
120 SAD_MXNX3D(32, 64)
121
122 // 32x32
123 SADMXN(32, 32)
124 SAD_MXNX4D(32, 32)
125 SAD_MXNX3D(32, 32)
126
127 // 32x16
128 SADMXN(32, 16)
129 SAD_MXNX4D(32, 16)
130 SAD_MXNX3D(32, 16)
131
132 // 16x32
133 SADMXN(16, 32)
134 SAD_MXNX4D(16, 32)
135 SAD_MXNX3D(16, 32)
136
137 // 16x16
138 SADMXN(16, 16)
139 SAD_MXNX4D(16, 16)
140 SAD_MXNX3D(16, 16)
141
142 // 16x8
143 SADMXN(16, 8)
144 SAD_MXNX4D(16, 8)
145 SAD_MXNX3D(16, 8)
146
147 // 8x16
148 SADMXN(8, 16)
149 SAD_MXNX4D(8, 16)
150 SAD_MXNX3D(8, 16)
151
152 // 8x8
153 SADMXN(8, 8)
154 SAD_MXNX4D(8, 8)
155 SAD_MXNX3D(8, 8)
156
157 // 8x4
158 SADMXN(8, 4)
159 SAD_MXNX4D(8, 4)
160 SAD_MXNX3D(8, 4)
161
162 // 4x8
163 SADMXN(4, 8)
164 SAD_MXNX4D(4, 8)
165 SAD_MXNX3D(4, 8)
166
167 // 4x4
168 SADMXN(4, 4)
169 SAD_MXNX4D(4, 4)
170 SAD_MXNX3D(4, 4)
171
172 #if !CONFIG_REALTIME_ONLY
173 SADMXN(4, 16)
174 SAD_MXNX4D(4, 16)
175 SADMXN(16, 4)
176 SAD_MXNX4D(16, 4)
177 SADMXN(8, 32)
178 SAD_MXNX4D(8, 32)
179 SADMXN(32, 8)
180 SAD_MXNX4D(32, 8)
181 SADMXN(16, 64)
182 SAD_MXNX4D(16, 64)
183 SADMXN(64, 16)
184 SAD_MXNX4D(64, 16)
185 SAD_MXNX3D(4, 16)
186 SAD_MXNX3D(16, 4)
187 SAD_MXNX3D(8, 32)
188 SAD_MXNX3D(32, 8)
189 SAD_MXNX3D(16, 64)
190 SAD_MXNX3D(64, 16)
191 #endif // !CONFIG_REALTIME_ONLY
192
193 #if CONFIG_AV1_HIGHBITDEPTH
highbd_sad(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int width,int height)194 static inline unsigned int highbd_sad(const uint8_t *a8, int a_stride,
195 const uint8_t *b8, int b_stride,
196 int width, int height) {
197 int y, x;
198 unsigned int sad = 0;
199 const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
200 const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
201 for (y = 0; y < height; y++) {
202 for (x = 0; x < width; x++) {
203 sad += abs(a[x] - b[x]);
204 }
205
206 a += a_stride;
207 b += b_stride;
208 }
209 return sad;
210 }
211
highbd_sadb(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int width,int height)212 static inline unsigned int highbd_sadb(const uint8_t *a8, int a_stride,
213 const uint8_t *b8, int b_stride,
214 int width, int height) {
215 int y, x;
216 unsigned int sad = 0;
217 const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
218 const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
219 for (y = 0; y < height; y++) {
220 for (x = 0; x < width; x++) {
221 sad += abs(a[x] - b[x]);
222 }
223
224 a += a_stride;
225 b += b_stride;
226 }
227 return sad;
228 }
229
230 #define HIGHBD_SADMXN(m, n) \
231 unsigned int aom_highbd_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
232 const uint8_t *ref, \
233 int ref_stride) { \
234 return highbd_sad(src, src_stride, ref, ref_stride, m, n); \
235 } \
236 unsigned int aom_highbd_sad##m##x##n##_avg_c( \
237 const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
238 const uint8_t *second_pred) { \
239 uint16_t comp_pred[m * n]; \
240 uint8_t *const comp_pred8 = CONVERT_TO_BYTEPTR(comp_pred); \
241 aom_highbd_comp_avg_pred(comp_pred8, second_pred, m, n, ref, ref_stride); \
242 return highbd_sadb(src, src_stride, comp_pred8, m, m, n); \
243 } \
244 unsigned int aom_highbd_dist_wtd_sad##m##x##n##_avg_c( \
245 const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
246 const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param) { \
247 uint16_t comp_pred[m * n]; \
248 uint8_t *const comp_pred8 = CONVERT_TO_BYTEPTR(comp_pred); \
249 aom_highbd_dist_wtd_comp_avg_pred(comp_pred8, second_pred, m, n, ref, \
250 ref_stride, jcp_param); \
251 return highbd_sadb(src, src_stride, comp_pred8, m, m, n); \
252 } \
253 unsigned int aom_highbd_sad_skip_##m##x##n##_c( \
254 const uint8_t *src, int src_stride, const uint8_t *ref, \
255 int ref_stride) { \
256 return 2 * \
257 highbd_sad(src, 2 * src_stride, ref, 2 * ref_stride, (m), (n / 2)); \
258 }
259
260 #define HIGHBD_SAD_MXNX4D(m, n) \
261 void aom_highbd_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
262 const uint8_t *const ref_array[4], \
263 int ref_stride, uint32_t sad_array[4]) { \
264 int i; \
265 for (i = 0; i < 4; ++i) { \
266 sad_array[i] = aom_highbd_sad##m##x##n##_c(src, src_stride, \
267 ref_array[i], ref_stride); \
268 } \
269 } \
270 void aom_highbd_sad_skip_##m##x##n##x4d_c( \
271 const uint8_t *src, int src_stride, const uint8_t *const ref_array[4], \
272 int ref_stride, uint32_t sad_array[4]) { \
273 int i; \
274 for (i = 0; i < 4; ++i) { \
275 sad_array[i] = 2 * highbd_sad(src, 2 * src_stride, ref_array[i], \
276 2 * ref_stride, (m), (n / 2)); \
277 } \
278 }
279 // Call SIMD version of aom_highbd_sad_mxnx4d if the 3d version is unavailable.
280 #define HIGHBD_SAD_MXNX3D(m, n) \
281 void aom_highbd_sad##m##x##n##x3d_c(const uint8_t *src, int src_stride, \
282 const uint8_t *const ref_array[4], \
283 int ref_stride, uint32_t sad_array[4]) { \
284 aom_highbd_sad##m##x##n##x4d(src, src_stride, ref_array, ref_stride, \
285 sad_array); \
286 }
287
288 // 128x128
289 HIGHBD_SADMXN(128, 128)
290 HIGHBD_SAD_MXNX4D(128, 128)
291 HIGHBD_SAD_MXNX3D(128, 128)
292
293 // 128x64
294 HIGHBD_SADMXN(128, 64)
295 HIGHBD_SAD_MXNX4D(128, 64)
296 HIGHBD_SAD_MXNX3D(128, 64)
297
298 // 64x128
299 HIGHBD_SADMXN(64, 128)
300 HIGHBD_SAD_MXNX4D(64, 128)
301 HIGHBD_SAD_MXNX3D(64, 128)
302
303 // 64x64
304 HIGHBD_SADMXN(64, 64)
305 HIGHBD_SAD_MXNX4D(64, 64)
306 HIGHBD_SAD_MXNX3D(64, 64)
307
308 // 64x32
309 HIGHBD_SADMXN(64, 32)
310 HIGHBD_SAD_MXNX4D(64, 32)
311 HIGHBD_SAD_MXNX3D(64, 32)
312
313 // 32x64
314 HIGHBD_SADMXN(32, 64)
315 HIGHBD_SAD_MXNX4D(32, 64)
316 HIGHBD_SAD_MXNX3D(32, 64)
317
318 // 32x32
319 HIGHBD_SADMXN(32, 32)
320 HIGHBD_SAD_MXNX4D(32, 32)
321 HIGHBD_SAD_MXNX3D(32, 32)
322
323 // 32x16
324 HIGHBD_SADMXN(32, 16)
325 HIGHBD_SAD_MXNX4D(32, 16)
326 HIGHBD_SAD_MXNX3D(32, 16)
327
328 // 16x32
329 HIGHBD_SADMXN(16, 32)
330 HIGHBD_SAD_MXNX4D(16, 32)
331 HIGHBD_SAD_MXNX3D(16, 32)
332
333 // 16x16
334 HIGHBD_SADMXN(16, 16)
335 HIGHBD_SAD_MXNX4D(16, 16)
336 HIGHBD_SAD_MXNX3D(16, 16)
337
338 // 16x8
339 HIGHBD_SADMXN(16, 8)
340 HIGHBD_SAD_MXNX4D(16, 8)
341 HIGHBD_SAD_MXNX3D(16, 8)
342
343 // 8x16
344 HIGHBD_SADMXN(8, 16)
345 HIGHBD_SAD_MXNX4D(8, 16)
346 HIGHBD_SAD_MXNX3D(8, 16)
347
348 // 8x8
349 HIGHBD_SADMXN(8, 8)
350 HIGHBD_SAD_MXNX4D(8, 8)
351 HIGHBD_SAD_MXNX3D(8, 8)
352
353 // 8x4
354 HIGHBD_SADMXN(8, 4)
355 HIGHBD_SAD_MXNX4D(8, 4)
356 HIGHBD_SAD_MXNX3D(8, 4)
357
358 // 4x8
359 HIGHBD_SADMXN(4, 8)
360 HIGHBD_SAD_MXNX4D(4, 8)
361 HIGHBD_SAD_MXNX3D(4, 8)
362
363 // 4x4
364 HIGHBD_SADMXN(4, 4)
365 HIGHBD_SAD_MXNX4D(4, 4)
366 HIGHBD_SAD_MXNX3D(4, 4)
367
368 #if !CONFIG_REALTIME_ONLY
369 HIGHBD_SADMXN(4, 16)
370 HIGHBD_SAD_MXNX4D(4, 16)
371 HIGHBD_SADMXN(16, 4)
372 HIGHBD_SAD_MXNX4D(16, 4)
373 HIGHBD_SADMXN(8, 32)
374 HIGHBD_SAD_MXNX4D(8, 32)
375 HIGHBD_SADMXN(32, 8)
376 HIGHBD_SAD_MXNX4D(32, 8)
377 HIGHBD_SADMXN(16, 64)
378 HIGHBD_SAD_MXNX4D(16, 64)
379 HIGHBD_SADMXN(64, 16)
380 HIGHBD_SAD_MXNX4D(64, 16)
381
382 HIGHBD_SAD_MXNX3D(4, 16)
383 HIGHBD_SAD_MXNX3D(16, 4)
384 HIGHBD_SAD_MXNX3D(8, 32)
385 HIGHBD_SAD_MXNX3D(32, 8)
386 HIGHBD_SAD_MXNX3D(16, 64)
387 HIGHBD_SAD_MXNX3D(64, 16)
388 #endif // !CONFIG_REALTIME_ONLY
389 #endif // CONFIG_AV1_HIGHBITDEPTH
390