xref: /aosp_15_r20/external/libgav1/src/dsp/cdef.cc (revision 095378508e87ed692bf8dfeb34008b65b3735891)
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/cdef.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22 
23 #include "src/dsp/constants.h"
24 #include "src/dsp/dsp.h"
25 #include "src/utils/common.h"
26 #include "src/utils/constants.h"
27 
28 namespace libgav1 {
29 namespace dsp {
30 namespace {
31 
32 #include "src/dsp/cdef.inc"
33 
34 // Silence unused function warnings when CdefDirection_C is obviated.
35 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||           \
36     !defined(LIBGAV1_Dsp8bpp_CdefDirection) ||    \
37     (LIBGAV1_MAX_BITDEPTH >= 10 &&                \
38      !defined(LIBGAV1_Dsp10bpp_CdefDirection)) || \
39     (LIBGAV1_MAX_BITDEPTH == 12 && !defined(LIBGAV1_Dsp12bpp_CdefDirection))
40 constexpr int16_t kDivisionTable[] = {840, 420, 280, 210, 168, 140, 120, 105};
41 
Square(int32_t x)42 int32_t Square(int32_t x) { return x * x; }
43 
44 template <int bitdepth, typename Pixel>
CdefDirection_C(const void * LIBGAV1_RESTRICT const source,ptrdiff_t stride,uint8_t * LIBGAV1_RESTRICT const direction,int * LIBGAV1_RESTRICT const variance)45 void CdefDirection_C(const void* LIBGAV1_RESTRICT const source,
46                      ptrdiff_t stride,
47                      uint8_t* LIBGAV1_RESTRICT const direction,
48                      int* LIBGAV1_RESTRICT const variance) {
49   assert(direction != nullptr);
50   assert(variance != nullptr);
51   const auto* src = static_cast<const Pixel*>(source);
52   stride /= sizeof(Pixel);
53   int32_t cost[8] = {};
54   // |partial| does not have to be int32_t for 8bpp. int16_t will suffice. We
55   // use int32_t to keep it simple since |cost| will have to be int32_t.
56   int32_t partial[8][15] = {};
57   for (int i = 0; i < 8; ++i) {
58     for (int j = 0; j < 8; ++j) {
59       const int x = (src[j] >> (bitdepth - 8)) - 128;
60       partial[0][i + j] += x;
61       partial[1][i + j / 2] += x;
62       partial[2][i] += x;
63       partial[3][3 + i - j / 2] += x;
64       partial[4][7 + i - j] += x;
65       partial[5][3 - i / 2 + j] += x;
66       partial[6][j] += x;
67       partial[7][i / 2 + j] += x;
68     }
69     src += stride;
70   }
71   for (int i = 0; i < 8; ++i) {
72     cost[2] += Square(partial[2][i]);
73     cost[6] += Square(partial[6][i]);
74   }
75   cost[2] *= kDivisionTable[7];
76   cost[6] *= kDivisionTable[7];
77   for (int i = 0; i < 7; ++i) {
78     cost[0] += (Square(partial[0][i]) + Square(partial[0][14 - i])) *
79                kDivisionTable[i];
80     cost[4] += (Square(partial[4][i]) + Square(partial[4][14 - i])) *
81                kDivisionTable[i];
82   }
83   cost[0] += Square(partial[0][7]) * kDivisionTable[7];
84   cost[4] += Square(partial[4][7]) * kDivisionTable[7];
85   for (int i = 1; i < 8; i += 2) {
86     for (int j = 0; j < 5; ++j) {
87       cost[i] += Square(partial[i][3 + j]);
88     }
89     cost[i] *= kDivisionTable[7];
90     for (int j = 0; j < 3; ++j) {
91       cost[i] += (Square(partial[i][j]) + Square(partial[i][10 - j])) *
92                  kDivisionTable[2 * j + 1];
93     }
94   }
95   int32_t best_cost = 0;
96   *direction = 0;
97   for (int i = 0; i < 8; ++i) {
98     if (cost[i] > best_cost) {
99       best_cost = cost[i];
100       *direction = i;
101     }
102   }
103   *variance = (best_cost - cost[(*direction + 4) & 7]) >> 10;
104 }
105 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||
106         // !defined(LIBGAV1_Dsp8bpp_CdefDirection) ||
107         // (LIBGAV1_MAX_BITDEPTH >= 10 &&
108         //  !defined(LIBGAV1_Dsp10bpp_CdefDirection))
109         // (LIBGAV1_MAX_BITDEPTH == 12 &&
110         //  !defined(LIBGAV1_Dsp12bpp_CdefDirection))
111 
112 // Silence unused function warnings when CdefFilter_C is obviated.
113 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||                                       \
114     !defined(LIBGAV1_Dsp8bpp_CdefFilters) ||                                  \
115     (LIBGAV1_MAX_BITDEPTH >= 10 && !defined(LIBGAV1_Dsp10bpp_CdefFilters)) || \
116     (LIBGAV1_MAX_BITDEPTH == 12 && !defined(LIBGAV1_Dsp12bpp_CdefFilters))
117 
Constrain(int diff,int threshold,int damping)118 int Constrain(int diff, int threshold, int damping) {
119   assert(threshold != 0);
120   damping = std::max(0, damping - FloorLog2(threshold));
121   const int sign = (diff < 0) ? -1 : 1;
122   return sign *
123          Clip3(threshold - (std::abs(diff) >> damping), 0, std::abs(diff));
124 }
125 
126 // Filters the source block. It doesn't check whether the candidate pixel is
127 // inside the frame. However it requires the source input to be padded with a
128 // constant large value (kCdefLargeValue) if at the boundary.
129 template <int block_width, int bitdepth, typename Pixel,
130           bool enable_primary = true, bool enable_secondary = true>
CdefFilter_C(const uint16_t * LIBGAV1_RESTRICT src,const ptrdiff_t src_stride,const int block_height,const int primary_strength,const int secondary_strength,const int damping,const int direction,void * LIBGAV1_RESTRICT const dest,const ptrdiff_t dest_stride)131 void CdefFilter_C(const uint16_t* LIBGAV1_RESTRICT src,
132                   const ptrdiff_t src_stride, const int block_height,
133                   const int primary_strength, const int secondary_strength,
134                   const int damping, const int direction,
135                   void* LIBGAV1_RESTRICT const dest,
136                   const ptrdiff_t dest_stride) {
137   static_assert(block_width == 4 || block_width == 8, "Invalid CDEF width.");
138   static_assert(enable_primary || enable_secondary, "");
139   assert(block_height == 4 || block_height == 8);
140   assert(direction >= 0 && direction <= 7);
141   constexpr int coeff_shift = bitdepth - 8;
142   // Section 5.9.19. CDEF params syntax.
143   assert(primary_strength >= 0 && primary_strength <= 15 << coeff_shift);
144   assert(secondary_strength >= 0 && secondary_strength <= 4 << coeff_shift &&
145          secondary_strength != 3 << coeff_shift);
146   assert(primary_strength != 0 || secondary_strength != 0);
147   // damping is decreased by 1 for chroma.
148   assert((damping >= 3 && damping <= 6 + coeff_shift) ||
149          (damping >= 2 && damping <= 5 + coeff_shift));
150   // When only primary_strength or secondary_strength are non-zero the number
151   // of pixels inspected (4 for primary_strength, 8 for secondary_strength) and
152   // the taps used don't exceed the amount the sum is
153   // descaled by (16) so we can skip tracking and clipping to the minimum and
154   // maximum value observed.
155   constexpr bool clipping_required = enable_primary && enable_secondary;
156   static constexpr int kCdefSecondaryTaps[2] = {kCdefSecondaryTap0,
157                                                 kCdefSecondaryTap1};
158   auto* dst = static_cast<Pixel*>(dest);
159   const ptrdiff_t dst_stride = dest_stride / sizeof(Pixel);
160   int y = block_height;
161   do {
162     int x = 0;
163     do {
164       int16_t sum = 0;
165       const uint16_t pixel_value = src[x];
166       uint16_t max_value = pixel_value;
167       uint16_t min_value = pixel_value;
168       for (int k = 0; k < 2; ++k) {
169         static constexpr int signs[] = {-1, 1};
170         for (const int& sign : signs) {
171           if (enable_primary) {
172             const int dy = sign * kCdefDirections[direction][k][0];
173             const int dx = sign * kCdefDirections[direction][k][1];
174             const uint16_t value = src[dy * src_stride + dx + x];
175             // Note: the summation can ignore the condition check in SIMD
176             // implementation, because Constrain() will return 0 when
177             // value == kCdefLargeValue.
178             if (value != kCdefLargeValue) {
179               sum += Constrain(value - pixel_value, primary_strength, damping) *
180                      kCdefPrimaryTaps[(primary_strength >> coeff_shift) & 1][k];
181               if (clipping_required) {
182                 max_value = std::max(value, max_value);
183                 min_value = std::min(value, min_value);
184               }
185             }
186           }
187 
188           if (enable_secondary) {
189             static constexpr int offsets[] = {-2, 2};
190             for (const int& offset : offsets) {
191               const int dy = sign * kCdefDirections[direction + offset][k][0];
192               const int dx = sign * kCdefDirections[direction + offset][k][1];
193               const uint16_t value = src[dy * src_stride + dx + x];
194               // Note: the summation can ignore the condition check in SIMD
195               // implementation.
196               if (value != kCdefLargeValue) {
197                 sum += Constrain(value - pixel_value, secondary_strength,
198                                  damping) *
199                        kCdefSecondaryTaps[k];
200                 if (clipping_required) {
201                   max_value = std::max(value, max_value);
202                   min_value = std::min(value, min_value);
203                 }
204               }
205             }
206           }
207         }
208       }
209 
210       const int offset = (8 + sum - (sum < 0)) >> 4;
211       if (clipping_required) {
212         dst[x] = static_cast<Pixel>(
213             Clip3(pixel_value + offset, min_value, max_value));
214       } else {
215         dst[x] = static_cast<Pixel>(pixel_value + offset);
216       }
217     } while (++x < block_width);
218 
219     src += src_stride;
220     dst += dst_stride;
221   } while (--y != 0);
222 }
223 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||
224         // !defined(LIBGAV1_Dsp8bpp_CdefFilters) ||
225         // (LIBGAV1_MAX_BITDEPTH >= 10 &&
226         //  !defined(LIBGAV1_Dsp10bpp_CdefFilters))
227         // (LIBGAV1_MAX_BITDEPTH == 12 &&
228         //  !defined(LIBGAV1_Dsp12bpp_CdefFilters))
229 
Init8bpp()230 void Init8bpp() {
231   Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
232   assert(dsp != nullptr);
233 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
234   dsp->cdef_direction = CdefDirection_C<8, uint8_t>;
235   dsp->cdef_filters[0][0] = CdefFilter_C<4, 8, uint8_t>;
236   dsp->cdef_filters[0][1] = CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/true,
237                                          /*enable_secondary=*/false>;
238   dsp->cdef_filters[0][2] =
239       CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/false>;
240   dsp->cdef_filters[1][0] = CdefFilter_C<8, 8, uint8_t>;
241   dsp->cdef_filters[1][1] = CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/true,
242                                          /*enable_secondary=*/false>;
243   dsp->cdef_filters[1][2] =
244       CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/false>;
245 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
246   static_cast<void>(dsp);
247 #ifndef LIBGAV1_Dsp8bpp_CdefDirection
248   dsp->cdef_direction = CdefDirection_C<8, uint8_t>;
249 #endif
250 #ifndef LIBGAV1_Dsp8bpp_CdefFilters
251   dsp->cdef_filters[0][0] = CdefFilter_C<4, 8, uint8_t>;
252   dsp->cdef_filters[0][1] = CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/true,
253                                          /*enable_secondary=*/false>;
254   dsp->cdef_filters[0][2] =
255       CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/false>;
256   dsp->cdef_filters[1][0] = CdefFilter_C<8, 8, uint8_t>;
257   dsp->cdef_filters[1][1] = CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/true,
258                                          /*enable_secondary=*/false>;
259   dsp->cdef_filters[1][2] =
260       CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/false>;
261 #endif
262 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
263 }
264 
265 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()266 void Init10bpp() {
267   Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
268   assert(dsp != nullptr);
269 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
270   dsp->cdef_direction = CdefDirection_C<10, uint16_t>;
271   dsp->cdef_filters[0][0] = CdefFilter_C<4, 10, uint16_t>;
272   dsp->cdef_filters[0][1] =
273       CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/true,
274                    /*enable_secondary=*/false>;
275   dsp->cdef_filters[0][2] =
276       CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/false>;
277   dsp->cdef_filters[1][0] = CdefFilter_C<8, 10, uint16_t>;
278   dsp->cdef_filters[1][1] =
279       CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/true,
280                    /*enable_secondary=*/false>;
281   dsp->cdef_filters[1][2] =
282       CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/false>;
283 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
284   static_cast<void>(dsp);
285 #ifndef LIBGAV1_Dsp10bpp_CdefDirection
286   dsp->cdef_direction = CdefDirection_C<10, uint16_t>;
287 #endif
288 #ifndef LIBGAV1_Dsp10bpp_CdefFilters
289   dsp->cdef_filters[0][0] = CdefFilter_C<4, 10, uint16_t>;
290   dsp->cdef_filters[0][1] =
291       CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/true,
292                    /*enable_secondary=*/false>;
293   dsp->cdef_filters[0][2] =
294       CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/false>;
295   dsp->cdef_filters[1][0] = CdefFilter_C<8, 10, uint16_t>;
296   dsp->cdef_filters[1][1] =
297       CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/true,
298                    /*enable_secondary=*/false>;
299   dsp->cdef_filters[1][2] =
300       CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/false>;
301 #endif
302 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
303 }
304 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
305 
306 #if LIBGAV1_MAX_BITDEPTH == 12
Init12bpp()307 void Init12bpp() {
308   Dsp* const dsp = dsp_internal::GetWritableDspTable(12);
309   assert(dsp != nullptr);
310 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
311   dsp->cdef_direction = CdefDirection_C<12, uint16_t>;
312   dsp->cdef_filters[0][0] = CdefFilter_C<4, 12, uint16_t>;
313   dsp->cdef_filters[0][1] =
314       CdefFilter_C<4, 12, uint16_t, /*enable_primary=*/true,
315                    /*enable_secondary=*/false>;
316   dsp->cdef_filters[0][2] =
317       CdefFilter_C<4, 12, uint16_t, /*enable_primary=*/false>;
318   dsp->cdef_filters[1][0] = CdefFilter_C<8, 12, uint16_t>;
319   dsp->cdef_filters[1][1] =
320       CdefFilter_C<8, 12, uint16_t, /*enable_primary=*/true,
321                    /*enable_secondary=*/false>;
322   dsp->cdef_filters[1][2] =
323       CdefFilter_C<8, 12, uint16_t, /*enable_primary=*/false>;
324 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
325   static_cast<void>(dsp);
326 #ifndef LIBGAV1_Dsp12bpp_CdefDirection
327   dsp->cdef_direction = CdefDirection_C<12, uint16_t>;
328 #endif
329 #ifndef LIBGAV1_Dsp12bpp_CdefFilters
330   dsp->cdef_filters[0][0] = CdefFilter_C<4, 12, uint16_t>;
331   dsp->cdef_filters[0][1] =
332       CdefFilter_C<4, 12, uint16_t, /*enable_primary=*/true,
333                    /*enable_secondary=*/false>;
334   dsp->cdef_filters[0][2] =
335       CdefFilter_C<4, 12, uint16_t, /*enable_primary=*/false>;
336   dsp->cdef_filters[1][0] = CdefFilter_C<8, 12, uint16_t>;
337   dsp->cdef_filters[1][1] =
338       CdefFilter_C<8, 12, uint16_t, /*enable_primary=*/true,
339                    /*enable_secondary=*/false>;
340   dsp->cdef_filters[1][2] =
341       CdefFilter_C<8, 12, uint16_t, /*enable_primary=*/false>;
342 #endif
343 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
344 }
345 #endif  // LIBGAV1_MAX_BITDEPTH == 12
346 
347 }  // namespace
348 
CdefInit_C()349 void CdefInit_C() {
350   Init8bpp();
351 #if LIBGAV1_MAX_BITDEPTH >= 10
352   Init10bpp();
353 #endif
354 #if LIBGAV1_MAX_BITDEPTH == 12
355   Init12bpp();
356 #endif
357 }
358 
359 }  // namespace dsp
360 }  // namespace libgav1
361