1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/cdef.h"
16
17 #include <algorithm>
18 #include <cassert>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22
23 #include "src/dsp/constants.h"
24 #include "src/dsp/dsp.h"
25 #include "src/utils/common.h"
26 #include "src/utils/constants.h"
27
28 namespace libgav1 {
29 namespace dsp {
30 namespace {
31
32 #include "src/dsp/cdef.inc"
33
34 // Silence unused function warnings when CdefDirection_C is obviated.
35 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
36 !defined(LIBGAV1_Dsp8bpp_CdefDirection) || \
37 (LIBGAV1_MAX_BITDEPTH >= 10 && \
38 !defined(LIBGAV1_Dsp10bpp_CdefDirection)) || \
39 (LIBGAV1_MAX_BITDEPTH == 12 && !defined(LIBGAV1_Dsp12bpp_CdefDirection))
40 constexpr int16_t kDivisionTable[] = {840, 420, 280, 210, 168, 140, 120, 105};
41
Square(int32_t x)42 int32_t Square(int32_t x) { return x * x; }
43
44 template <int bitdepth, typename Pixel>
CdefDirection_C(const void * LIBGAV1_RESTRICT const source,ptrdiff_t stride,uint8_t * LIBGAV1_RESTRICT const direction,int * LIBGAV1_RESTRICT const variance)45 void CdefDirection_C(const void* LIBGAV1_RESTRICT const source,
46 ptrdiff_t stride,
47 uint8_t* LIBGAV1_RESTRICT const direction,
48 int* LIBGAV1_RESTRICT const variance) {
49 assert(direction != nullptr);
50 assert(variance != nullptr);
51 const auto* src = static_cast<const Pixel*>(source);
52 stride /= sizeof(Pixel);
53 int32_t cost[8] = {};
54 // |partial| does not have to be int32_t for 8bpp. int16_t will suffice. We
55 // use int32_t to keep it simple since |cost| will have to be int32_t.
56 int32_t partial[8][15] = {};
57 for (int i = 0; i < 8; ++i) {
58 for (int j = 0; j < 8; ++j) {
59 const int x = (src[j] >> (bitdepth - 8)) - 128;
60 partial[0][i + j] += x;
61 partial[1][i + j / 2] += x;
62 partial[2][i] += x;
63 partial[3][3 + i - j / 2] += x;
64 partial[4][7 + i - j] += x;
65 partial[5][3 - i / 2 + j] += x;
66 partial[6][j] += x;
67 partial[7][i / 2 + j] += x;
68 }
69 src += stride;
70 }
71 for (int i = 0; i < 8; ++i) {
72 cost[2] += Square(partial[2][i]);
73 cost[6] += Square(partial[6][i]);
74 }
75 cost[2] *= kDivisionTable[7];
76 cost[6] *= kDivisionTable[7];
77 for (int i = 0; i < 7; ++i) {
78 cost[0] += (Square(partial[0][i]) + Square(partial[0][14 - i])) *
79 kDivisionTable[i];
80 cost[4] += (Square(partial[4][i]) + Square(partial[4][14 - i])) *
81 kDivisionTable[i];
82 }
83 cost[0] += Square(partial[0][7]) * kDivisionTable[7];
84 cost[4] += Square(partial[4][7]) * kDivisionTable[7];
85 for (int i = 1; i < 8; i += 2) {
86 for (int j = 0; j < 5; ++j) {
87 cost[i] += Square(partial[i][3 + j]);
88 }
89 cost[i] *= kDivisionTable[7];
90 for (int j = 0; j < 3; ++j) {
91 cost[i] += (Square(partial[i][j]) + Square(partial[i][10 - j])) *
92 kDivisionTable[2 * j + 1];
93 }
94 }
95 int32_t best_cost = 0;
96 *direction = 0;
97 for (int i = 0; i < 8; ++i) {
98 if (cost[i] > best_cost) {
99 best_cost = cost[i];
100 *direction = i;
101 }
102 }
103 *variance = (best_cost - cost[(*direction + 4) & 7]) >> 10;
104 }
105 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||
106 // !defined(LIBGAV1_Dsp8bpp_CdefDirection) ||
107 // (LIBGAV1_MAX_BITDEPTH >= 10 &&
108 // !defined(LIBGAV1_Dsp10bpp_CdefDirection))
109 // (LIBGAV1_MAX_BITDEPTH == 12 &&
110 // !defined(LIBGAV1_Dsp12bpp_CdefDirection))
111
112 // Silence unused function warnings when CdefFilter_C is obviated.
113 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
114 !defined(LIBGAV1_Dsp8bpp_CdefFilters) || \
115 (LIBGAV1_MAX_BITDEPTH >= 10 && !defined(LIBGAV1_Dsp10bpp_CdefFilters)) || \
116 (LIBGAV1_MAX_BITDEPTH == 12 && !defined(LIBGAV1_Dsp12bpp_CdefFilters))
117
Constrain(int diff,int threshold,int damping)118 int Constrain(int diff, int threshold, int damping) {
119 assert(threshold != 0);
120 damping = std::max(0, damping - FloorLog2(threshold));
121 const int sign = (diff < 0) ? -1 : 1;
122 return sign *
123 Clip3(threshold - (std::abs(diff) >> damping), 0, std::abs(diff));
124 }
125
126 // Filters the source block. It doesn't check whether the candidate pixel is
127 // inside the frame. However it requires the source input to be padded with a
128 // constant large value (kCdefLargeValue) if at the boundary.
129 template <int block_width, int bitdepth, typename Pixel,
130 bool enable_primary = true, bool enable_secondary = true>
CdefFilter_C(const uint16_t * LIBGAV1_RESTRICT src,const ptrdiff_t src_stride,const int block_height,const int primary_strength,const int secondary_strength,const int damping,const int direction,void * LIBGAV1_RESTRICT const dest,const ptrdiff_t dest_stride)131 void CdefFilter_C(const uint16_t* LIBGAV1_RESTRICT src,
132 const ptrdiff_t src_stride, const int block_height,
133 const int primary_strength, const int secondary_strength,
134 const int damping, const int direction,
135 void* LIBGAV1_RESTRICT const dest,
136 const ptrdiff_t dest_stride) {
137 static_assert(block_width == 4 || block_width == 8, "Invalid CDEF width.");
138 static_assert(enable_primary || enable_secondary, "");
139 assert(block_height == 4 || block_height == 8);
140 assert(direction >= 0 && direction <= 7);
141 constexpr int coeff_shift = bitdepth - 8;
142 // Section 5.9.19. CDEF params syntax.
143 assert(primary_strength >= 0 && primary_strength <= 15 << coeff_shift);
144 assert(secondary_strength >= 0 && secondary_strength <= 4 << coeff_shift &&
145 secondary_strength != 3 << coeff_shift);
146 assert(primary_strength != 0 || secondary_strength != 0);
147 // damping is decreased by 1 for chroma.
148 assert((damping >= 3 && damping <= 6 + coeff_shift) ||
149 (damping >= 2 && damping <= 5 + coeff_shift));
150 // When only primary_strength or secondary_strength are non-zero the number
151 // of pixels inspected (4 for primary_strength, 8 for secondary_strength) and
152 // the taps used don't exceed the amount the sum is
153 // descaled by (16) so we can skip tracking and clipping to the minimum and
154 // maximum value observed.
155 constexpr bool clipping_required = enable_primary && enable_secondary;
156 static constexpr int kCdefSecondaryTaps[2] = {kCdefSecondaryTap0,
157 kCdefSecondaryTap1};
158 auto* dst = static_cast<Pixel*>(dest);
159 const ptrdiff_t dst_stride = dest_stride / sizeof(Pixel);
160 int y = block_height;
161 do {
162 int x = 0;
163 do {
164 int16_t sum = 0;
165 const uint16_t pixel_value = src[x];
166 uint16_t max_value = pixel_value;
167 uint16_t min_value = pixel_value;
168 for (int k = 0; k < 2; ++k) {
169 static constexpr int signs[] = {-1, 1};
170 for (const int& sign : signs) {
171 if (enable_primary) {
172 const int dy = sign * kCdefDirections[direction][k][0];
173 const int dx = sign * kCdefDirections[direction][k][1];
174 const uint16_t value = src[dy * src_stride + dx + x];
175 // Note: the summation can ignore the condition check in SIMD
176 // implementation, because Constrain() will return 0 when
177 // value == kCdefLargeValue.
178 if (value != kCdefLargeValue) {
179 sum += Constrain(value - pixel_value, primary_strength, damping) *
180 kCdefPrimaryTaps[(primary_strength >> coeff_shift) & 1][k];
181 if (clipping_required) {
182 max_value = std::max(value, max_value);
183 min_value = std::min(value, min_value);
184 }
185 }
186 }
187
188 if (enable_secondary) {
189 static constexpr int offsets[] = {-2, 2};
190 for (const int& offset : offsets) {
191 const int dy = sign * kCdefDirections[direction + offset][k][0];
192 const int dx = sign * kCdefDirections[direction + offset][k][1];
193 const uint16_t value = src[dy * src_stride + dx + x];
194 // Note: the summation can ignore the condition check in SIMD
195 // implementation.
196 if (value != kCdefLargeValue) {
197 sum += Constrain(value - pixel_value, secondary_strength,
198 damping) *
199 kCdefSecondaryTaps[k];
200 if (clipping_required) {
201 max_value = std::max(value, max_value);
202 min_value = std::min(value, min_value);
203 }
204 }
205 }
206 }
207 }
208 }
209
210 const int offset = (8 + sum - (sum < 0)) >> 4;
211 if (clipping_required) {
212 dst[x] = static_cast<Pixel>(
213 Clip3(pixel_value + offset, min_value, max_value));
214 } else {
215 dst[x] = static_cast<Pixel>(pixel_value + offset);
216 }
217 } while (++x < block_width);
218
219 src += src_stride;
220 dst += dst_stride;
221 } while (--y != 0);
222 }
223 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||
224 // !defined(LIBGAV1_Dsp8bpp_CdefFilters) ||
225 // (LIBGAV1_MAX_BITDEPTH >= 10 &&
226 // !defined(LIBGAV1_Dsp10bpp_CdefFilters))
227 // (LIBGAV1_MAX_BITDEPTH == 12 &&
228 // !defined(LIBGAV1_Dsp12bpp_CdefFilters))
229
Init8bpp()230 void Init8bpp() {
231 Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
232 assert(dsp != nullptr);
233 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
234 dsp->cdef_direction = CdefDirection_C<8, uint8_t>;
235 dsp->cdef_filters[0][0] = CdefFilter_C<4, 8, uint8_t>;
236 dsp->cdef_filters[0][1] = CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/true,
237 /*enable_secondary=*/false>;
238 dsp->cdef_filters[0][2] =
239 CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/false>;
240 dsp->cdef_filters[1][0] = CdefFilter_C<8, 8, uint8_t>;
241 dsp->cdef_filters[1][1] = CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/true,
242 /*enable_secondary=*/false>;
243 dsp->cdef_filters[1][2] =
244 CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/false>;
245 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
246 static_cast<void>(dsp);
247 #ifndef LIBGAV1_Dsp8bpp_CdefDirection
248 dsp->cdef_direction = CdefDirection_C<8, uint8_t>;
249 #endif
250 #ifndef LIBGAV1_Dsp8bpp_CdefFilters
251 dsp->cdef_filters[0][0] = CdefFilter_C<4, 8, uint8_t>;
252 dsp->cdef_filters[0][1] = CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/true,
253 /*enable_secondary=*/false>;
254 dsp->cdef_filters[0][2] =
255 CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/false>;
256 dsp->cdef_filters[1][0] = CdefFilter_C<8, 8, uint8_t>;
257 dsp->cdef_filters[1][1] = CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/true,
258 /*enable_secondary=*/false>;
259 dsp->cdef_filters[1][2] =
260 CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/false>;
261 #endif
262 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
263 }
264
265 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()266 void Init10bpp() {
267 Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
268 assert(dsp != nullptr);
269 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
270 dsp->cdef_direction = CdefDirection_C<10, uint16_t>;
271 dsp->cdef_filters[0][0] = CdefFilter_C<4, 10, uint16_t>;
272 dsp->cdef_filters[0][1] =
273 CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/true,
274 /*enable_secondary=*/false>;
275 dsp->cdef_filters[0][2] =
276 CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/false>;
277 dsp->cdef_filters[1][0] = CdefFilter_C<8, 10, uint16_t>;
278 dsp->cdef_filters[1][1] =
279 CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/true,
280 /*enable_secondary=*/false>;
281 dsp->cdef_filters[1][2] =
282 CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/false>;
283 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
284 static_cast<void>(dsp);
285 #ifndef LIBGAV1_Dsp10bpp_CdefDirection
286 dsp->cdef_direction = CdefDirection_C<10, uint16_t>;
287 #endif
288 #ifndef LIBGAV1_Dsp10bpp_CdefFilters
289 dsp->cdef_filters[0][0] = CdefFilter_C<4, 10, uint16_t>;
290 dsp->cdef_filters[0][1] =
291 CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/true,
292 /*enable_secondary=*/false>;
293 dsp->cdef_filters[0][2] =
294 CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/false>;
295 dsp->cdef_filters[1][0] = CdefFilter_C<8, 10, uint16_t>;
296 dsp->cdef_filters[1][1] =
297 CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/true,
298 /*enable_secondary=*/false>;
299 dsp->cdef_filters[1][2] =
300 CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/false>;
301 #endif
302 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
303 }
304 #endif // LIBGAV1_MAX_BITDEPTH >= 10
305
306 #if LIBGAV1_MAX_BITDEPTH == 12
Init12bpp()307 void Init12bpp() {
308 Dsp* const dsp = dsp_internal::GetWritableDspTable(12);
309 assert(dsp != nullptr);
310 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
311 dsp->cdef_direction = CdefDirection_C<12, uint16_t>;
312 dsp->cdef_filters[0][0] = CdefFilter_C<4, 12, uint16_t>;
313 dsp->cdef_filters[0][1] =
314 CdefFilter_C<4, 12, uint16_t, /*enable_primary=*/true,
315 /*enable_secondary=*/false>;
316 dsp->cdef_filters[0][2] =
317 CdefFilter_C<4, 12, uint16_t, /*enable_primary=*/false>;
318 dsp->cdef_filters[1][0] = CdefFilter_C<8, 12, uint16_t>;
319 dsp->cdef_filters[1][1] =
320 CdefFilter_C<8, 12, uint16_t, /*enable_primary=*/true,
321 /*enable_secondary=*/false>;
322 dsp->cdef_filters[1][2] =
323 CdefFilter_C<8, 12, uint16_t, /*enable_primary=*/false>;
324 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
325 static_cast<void>(dsp);
326 #ifndef LIBGAV1_Dsp12bpp_CdefDirection
327 dsp->cdef_direction = CdefDirection_C<12, uint16_t>;
328 #endif
329 #ifndef LIBGAV1_Dsp12bpp_CdefFilters
330 dsp->cdef_filters[0][0] = CdefFilter_C<4, 12, uint16_t>;
331 dsp->cdef_filters[0][1] =
332 CdefFilter_C<4, 12, uint16_t, /*enable_primary=*/true,
333 /*enable_secondary=*/false>;
334 dsp->cdef_filters[0][2] =
335 CdefFilter_C<4, 12, uint16_t, /*enable_primary=*/false>;
336 dsp->cdef_filters[1][0] = CdefFilter_C<8, 12, uint16_t>;
337 dsp->cdef_filters[1][1] =
338 CdefFilter_C<8, 12, uint16_t, /*enable_primary=*/true,
339 /*enable_secondary=*/false>;
340 dsp->cdef_filters[1][2] =
341 CdefFilter_C<8, 12, uint16_t, /*enable_primary=*/false>;
342 #endif
343 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
344 }
345 #endif // LIBGAV1_MAX_BITDEPTH == 12
346
347 } // namespace
348
CdefInit_C()349 void CdefInit_C() {
350 Init8bpp();
351 #if LIBGAV1_MAX_BITDEPTH >= 10
352 Init10bpp();
353 #endif
354 #if LIBGAV1_MAX_BITDEPTH == 12
355 Init12bpp();
356 #endif
357 }
358
359 } // namespace dsp
360 } // namespace libgav1
361