1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/post_filter.h"
16
17 #include <algorithm>
18 #include <atomic>
19 #include <cassert>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstring>
23
24 #include "src/dsp/constants.h"
25 #include "src/dsp/dsp.h"
26 #include "src/utils/array_2d.h"
27 #include "src/utils/blocking_counter.h"
28 #include "src/utils/common.h"
29 #include "src/utils/compiler_attributes.h"
30 #include "src/utils/constants.h"
31 #include "src/utils/memory.h"
32 #include "src/utils/types.h"
33
34 namespace libgav1 {
35 namespace {
36
37 // Import all the constants in the anonymous namespace.
38 #include "src/post_filter/deblock_thresholds.inc"
39
40 // Row indices of loop restoration border. This is used to populate the
41 // |loop_restoration_border_| when either cdef is on or multithreading is
42 // enabled. The dimension is subsampling_y.
43 constexpr int kLoopRestorationBorderRows[2] = {54, 26};
44
45 } // namespace
46
PostFilter(const ObuFrameHeader & frame_header,const ObuSequenceHeader & sequence_header,FrameScratchBuffer * const frame_scratch_buffer,YuvBuffer * const frame_buffer,const dsp::Dsp * dsp,int do_post_filter_mask)47 PostFilter::PostFilter(const ObuFrameHeader& frame_header,
48 const ObuSequenceHeader& sequence_header,
49 FrameScratchBuffer* const frame_scratch_buffer,
50 YuvBuffer* const frame_buffer, const dsp::Dsp* dsp,
51 int do_post_filter_mask)
52 : frame_header_(frame_header),
53 loop_restoration_(frame_header.loop_restoration),
54 dsp_(*dsp),
55 bitdepth_(sequence_header.color_config.bitdepth),
56 subsampling_x_{0, sequence_header.color_config.subsampling_x,
57 sequence_header.color_config.subsampling_x},
58 subsampling_y_{0, sequence_header.color_config.subsampling_y,
59 sequence_header.color_config.subsampling_y},
60 planes_(sequence_header.color_config.is_monochrome ? kMaxPlanesMonochrome
61 : kMaxPlanes),
62 pixel_size_log2_(static_cast<int>((bitdepth_ == 8) ? sizeof(uint8_t)
63 : sizeof(uint16_t)) -
64 1),
65 inner_thresh_(kInnerThresh[frame_header.loop_filter.sharpness]),
66 outer_thresh_(kOuterThresh[frame_header.loop_filter.sharpness]),
67 needs_chroma_deblock_(frame_header.loop_filter.level[kPlaneU + 1] != 0 ||
68 frame_header.loop_filter.level[kPlaneV + 1] != 0),
69 do_cdef_(DoCdef(frame_header, do_post_filter_mask)),
70 do_deblock_(DoDeblock(frame_header, do_post_filter_mask)),
71 do_restoration_(
72 DoRestoration(loop_restoration_, do_post_filter_mask, planes_)),
73 do_superres_(DoSuperRes(frame_header, do_post_filter_mask)),
74 cdef_index_(frame_scratch_buffer->cdef_index),
75 cdef_skip_(frame_scratch_buffer->cdef_skip),
76 inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes),
77 restoration_info_(&frame_scratch_buffer->loop_restoration_info),
78 superres_coefficients_{
79 frame_scratch_buffer->superres_coefficients[kPlaneTypeY].get(),
80 frame_scratch_buffer
81 ->superres_coefficients
82 [(sequence_header.color_config.is_monochrome ||
83 sequence_header.color_config.subsampling_x == 0)
84 ? kPlaneTypeY
85 : kPlaneTypeUV]
86 .get()},
87 superres_line_buffer_(frame_scratch_buffer->superres_line_buffer),
88 block_parameters_(frame_scratch_buffer->block_parameters_holder),
89 frame_buffer_(*frame_buffer),
90 cdef_border_(frame_scratch_buffer->cdef_border),
91 loop_restoration_border_(frame_scratch_buffer->loop_restoration_border),
92 thread_pool_(
93 frame_scratch_buffer->threading_strategy.post_filter_thread_pool()) {
94 const int8_t zero_delta_lf[kFrameLfCount] = {};
95 ComputeDeblockFilterLevels(zero_delta_lf, deblock_filter_levels_);
96 if (DoSuperRes()) {
97 int plane = kPlaneY;
98 const int width = frame_header_.width;
99 const int upscaled_width_fh = frame_header_.upscaled_width;
100 do {
101 const int downscaled_width =
102 SubsampledValue(width, subsampling_x_[plane]);
103 const int upscaled_width =
104 SubsampledValue(upscaled_width_fh, subsampling_x_[plane]);
105 const int superres_width = downscaled_width << kSuperResScaleBits;
106 super_res_info_[plane].step =
107 (superres_width + upscaled_width / 2) / upscaled_width;
108 const int error =
109 super_res_info_[plane].step * upscaled_width - superres_width;
110 super_res_info_[plane].initial_subpixel_x =
111 ((-((upscaled_width - downscaled_width) << (kSuperResScaleBits - 1)) +
112 DivideBy2(upscaled_width)) /
113 upscaled_width +
114 (1 << (kSuperResExtraBits - 1)) - error / 2) &
115 kSuperResScaleMask;
116 super_res_info_[plane].upscaled_width = upscaled_width;
117 } while (++plane < planes_);
118 if (dsp->super_res_coefficients != nullptr) {
119 int plane = kPlaneY;
120 const int number_loops = (superres_coefficients_[kPlaneTypeY] ==
121 superres_coefficients_[kPlaneTypeUV])
122 ? kMaxPlanesMonochrome
123 : static_cast<int>(kNumPlaneTypes);
124 do {
125 dsp->super_res_coefficients(super_res_info_[plane].upscaled_width,
126 super_res_info_[plane].initial_subpixel_x,
127 super_res_info_[plane].step,
128 superres_coefficients_[plane]);
129 } while (++plane < number_loops);
130 }
131 }
132 int plane = kPlaneY;
133 do {
134 loop_restoration_buffer_[plane] = frame_buffer_.data(plane);
135 cdef_buffer_[plane] = frame_buffer_.data(plane);
136 superres_buffer_[plane] = frame_buffer_.data(plane);
137 source_buffer_[plane] = frame_buffer_.data(plane);
138 } while (++plane < planes_);
139 if (DoCdef() || DoRestoration() || DoSuperRes()) {
140 plane = kPlaneY;
141 const int pixel_size_log2 = pixel_size_log2_;
142 do {
143 int horizontal_shift = 0;
144 int vertical_shift = 0;
145 if (DoRestoration() &&
146 loop_restoration_.type[plane] != kLoopRestorationTypeNone) {
147 horizontal_shift += frame_buffer_.alignment();
148 if (!DoCdef() && thread_pool_ == nullptr) {
149 vertical_shift += kRestorationVerticalBorder;
150 }
151 superres_buffer_[plane] +=
152 vertical_shift * frame_buffer_.stride(plane) +
153 (horizontal_shift << pixel_size_log2);
154 }
155 if (DoSuperRes()) {
156 vertical_shift += kSuperResVerticalBorder;
157 }
158 cdef_buffer_[plane] += vertical_shift * frame_buffer_.stride(plane) +
159 (horizontal_shift << pixel_size_log2);
160 if (DoCdef() && thread_pool_ == nullptr) {
161 horizontal_shift += frame_buffer_.alignment();
162 vertical_shift += kCdefBorder;
163 }
164 assert(horizontal_shift <= frame_buffer_.right_border(plane));
165 assert(vertical_shift <= frame_buffer_.bottom_border(plane));
166 source_buffer_[plane] += vertical_shift * frame_buffer_.stride(plane) +
167 (horizontal_shift << pixel_size_log2);
168 } while (++plane < planes_);
169 }
170 }
171
172 // The following example illustrates how ExtendFrame() extends a frame.
173 // Suppose the frame width is 8 and height is 4, and left, right, top, and
174 // bottom are all equal to 3.
175 //
176 // Before:
177 //
178 // ABCDEFGH
179 // IJKLMNOP
180 // QRSTUVWX
181 // YZabcdef
182 //
183 // After:
184 //
185 // AAA|ABCDEFGH|HHH [3]
186 // AAA|ABCDEFGH|HHH
187 // AAA|ABCDEFGH|HHH
188 // ---+--------+---
189 // AAA|ABCDEFGH|HHH [1]
190 // III|IJKLMNOP|PPP
191 // QQQ|QRSTUVWX|XXX
192 // YYY|YZabcdef|fff
193 // ---+--------+---
194 // YYY|YZabcdef|fff [2]
195 // YYY|YZabcdef|fff
196 // YYY|YZabcdef|fff
197 //
198 // ExtendFrame() first extends the rows to the left and to the right[1]. Then
199 // it copies the extended last row to the bottom borders[2]. Finally it copies
200 // the extended first row to the top borders[3].
201 // static
202 template <typename Pixel>
ExtendFrame(Pixel * const frame_start,const int width,const int height,const ptrdiff_t stride,const int left,const int right,const int top,const int bottom)203 void PostFilter::ExtendFrame(Pixel* const frame_start, const int width,
204 const int height, const ptrdiff_t stride,
205 const int left, const int right, const int top,
206 const int bottom) {
207 Pixel* src = frame_start;
208 // Copy to left and right borders.
209 int y = height;
210 do {
211 ExtendLine<Pixel>(src, width, left, right);
212 src += stride;
213 } while (--y != 0);
214 // Copy to bottom borders. For performance we copy |stride| pixels
215 // (including some padding pixels potentially) in each row, ending at the
216 // bottom right border pixel. In the diagram the asterisks indicate padding
217 // pixels.
218 //
219 // |<--- stride --->|
220 // **YYY|YZabcdef|fff <-- Copy from the extended last row.
221 // -----+--------+---
222 // **YYY|YZabcdef|fff
223 // **YYY|YZabcdef|fff
224 // **YYY|YZabcdef|fff <-- bottom right border pixel
225 assert(src == frame_start + height * stride);
226 Pixel* dst = src - left;
227 src = dst - stride;
228 for (int y = 0; y < bottom; ++y) {
229 memcpy(dst, src, sizeof(Pixel) * stride);
230 dst += stride;
231 }
232 // Copy to top borders. For performance we copy |stride| pixels (including
233 // some padding pixels potentially) in each row, starting from the top left
234 // border pixel. In the diagram the asterisks indicate padding pixels.
235 //
236 // +-- top left border pixel
237 // |
238 // v
239 // AAA|ABCDEFGH|HHH**
240 // AAA|ABCDEFGH|HHH**
241 // AAA|ABCDEFGH|HHH**
242 // ---+--------+-----
243 // AAA|ABCDEFGH|HHH** <-- Copy from the extended first row.
244 // |<--- stride --->|
245 src = frame_start - left;
246 dst = frame_start - left - top * stride;
247 for (int y = 0; y < top; ++y) {
248 memcpy(dst, src, sizeof(Pixel) * stride);
249 dst += stride;
250 }
251 }
252
253 template void PostFilter::ExtendFrame<uint8_t>(uint8_t* const frame_start,
254 const int width,
255 const int height,
256 const ptrdiff_t stride,
257 const int left, const int right,
258 const int top, const int bottom);
259
260 #if LIBGAV1_MAX_BITDEPTH >= 10
261 template void PostFilter::ExtendFrame<uint16_t>(
262 uint16_t* const frame_start, const int width, const int height,
263 const ptrdiff_t stride, const int left, const int right, const int top,
264 const int bottom);
265 #endif
266
ExtendFrameBoundary(uint8_t * const frame_start,const int width,const int height,const ptrdiff_t stride,const int left,const int right,const int top,const int bottom) const267 void PostFilter::ExtendFrameBoundary(uint8_t* const frame_start,
268 const int width, const int height,
269 const ptrdiff_t stride, const int left,
270 const int right, const int top,
271 const int bottom) const {
272 #if LIBGAV1_MAX_BITDEPTH >= 10
273 if (bitdepth_ >= 10) {
274 ExtendFrame<uint16_t>(reinterpret_cast<uint16_t*>(frame_start), width,
275 height, stride >> 1, left, right, top, bottom);
276 return;
277 }
278 #endif
279 ExtendFrame<uint8_t>(frame_start, width, height, stride, left, right, top,
280 bottom);
281 }
282
ExtendBordersForReferenceFrame()283 void PostFilter::ExtendBordersForReferenceFrame() {
284 if (frame_header_.refresh_frame_flags == 0) return;
285 const int upscaled_width = frame_header_.upscaled_width;
286 const int height = frame_header_.height;
287 int plane = kPlaneY;
288 do {
289 const int plane_width =
290 SubsampledValue(upscaled_width, subsampling_x_[plane]);
291 const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
292 assert(frame_buffer_.left_border(plane) >= kMinLeftBorderPixels &&
293 frame_buffer_.right_border(plane) >= kMinRightBorderPixels &&
294 frame_buffer_.top_border(plane) >= kMinTopBorderPixels &&
295 frame_buffer_.bottom_border(plane) >= kMinBottomBorderPixels);
296 // plane subsampling_x_ left_border
297 // Y N/A 64, 48
298 // U,V 0 64, 48
299 // U,V 1 32, 16
300 assert(frame_buffer_.left_border(plane) >= 16);
301 // The |left| argument to ExtendFrameBoundary() must be at least
302 // kMinLeftBorderPixels (13) for warp.
303 static_assert(16 >= kMinLeftBorderPixels, "");
304 ExtendFrameBoundary(
305 frame_buffer_.data(plane), plane_width, plane_height,
306 frame_buffer_.stride(plane), frame_buffer_.left_border(plane),
307 frame_buffer_.right_border(plane), frame_buffer_.top_border(plane),
308 frame_buffer_.bottom_border(plane));
309 } while (++plane < planes_);
310 }
311
CopyDeblockedPixels(Plane plane,int row4x4)312 void PostFilter::CopyDeblockedPixels(Plane plane, int row4x4) {
313 const ptrdiff_t src_stride = frame_buffer_.stride(plane);
314 const uint8_t* const src = GetSourceBuffer(plane, row4x4, 0);
315 const int row_offset = DivideBy4(row4x4);
316 const ptrdiff_t dst_stride = loop_restoration_border_.stride(plane);
317 uint8_t* dst = loop_restoration_border_.data(plane) + row_offset * dst_stride;
318 const int num_pixels = SubsampledValue(MultiplyBy4(frame_header_.columns4x4),
319 subsampling_x_[plane]);
320 const int row_width = num_pixels << pixel_size_log2_;
321 int last_valid_row = -1;
322 const int plane_height =
323 SubsampledValue(frame_header_.height, subsampling_y_[plane]);
324 int row = kLoopRestorationBorderRows[subsampling_y_[plane]];
325 const int absolute_row = (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row;
326 for (int i = 0; i < 4; ++i, ++row) {
327 if (absolute_row + i >= plane_height) {
328 if (last_valid_row == -1) break;
329 // If we run out of rows, copy the last valid row (mimics the bottom
330 // border extension).
331 row = last_valid_row;
332 }
333 memcpy(dst, src + row * src_stride, row_width);
334 last_valid_row = row;
335 dst += dst_stride;
336 }
337 }
338
CopyBordersForOneSuperBlockRow(int row4x4,int sb4x4,bool for_loop_restoration)339 void PostFilter::CopyBordersForOneSuperBlockRow(int row4x4, int sb4x4,
340 bool for_loop_restoration) {
341 // Number of rows to be subtracted from the start position described by
342 // row4x4. We always lag by 8 rows (to account for in-loop post filters).
343 const int row_offset = (row4x4 == 0) ? 0 : 8;
344 // Number of rows to be subtracted from the height described by sb4x4.
345 const int height_offset = (row4x4 == 0) ? 8 : 0;
346 // If cdef is off and post filter multithreading is off, then loop restoration
347 // needs 2 extra rows for the bottom border in each plane.
348 const int extra_rows =
349 (for_loop_restoration && thread_pool_ == nullptr && !DoCdef()) ? 2 : 0;
350 const int upscaled_width = frame_header_.upscaled_width;
351 const int height = frame_header_.height;
352 int plane = kPlaneY;
353 do {
354 const int plane_width =
355 SubsampledValue(upscaled_width, subsampling_x_[plane]);
356 const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
357 const int row = (MultiplyBy4(row4x4) - row_offset) >> subsampling_y_[plane];
358 assert(row >= 0);
359 if (row >= plane_height) break;
360 const int num_rows =
361 std::min(SubsampledValue(MultiplyBy4(sb4x4) - height_offset,
362 subsampling_y_[plane]) +
363 extra_rows,
364 plane_height - row);
365 // We only need to track the progress of the Y plane since the progress of
366 // the U and V planes will be inferred from the progress of the Y plane.
367 if (!for_loop_restoration && plane == kPlaneY) {
368 progress_row_ = row + num_rows;
369 }
370 const bool copy_bottom = row + num_rows == plane_height;
371 const ptrdiff_t stride = frame_buffer_.stride(plane);
372 uint8_t* const start = (for_loop_restoration ? superres_buffer_[plane]
373 : frame_buffer_.data(plane)) +
374 row * stride;
375 #if LIBGAV1_MSAN
376 const int right_padding =
377 (frame_buffer_.stride(plane) >> static_cast<int>(bitdepth_ > 8)) -
378 ((frame_buffer_.left_border(plane) + frame_buffer_.width(plane) +
379 frame_buffer_.right_border(plane)));
380 const int padded_right_border_size =
381 frame_buffer_.right_border(plane) + right_padding;
382 // The optimized loop restoration code may read into the next row's left
383 // border depending on the start of the last superblock and the size of the
384 // right border. This is safe as the post filter is applied after
385 // reconstruction is complete and the threaded implementations do not read
386 // from the left border.
387 const int left_border_overread =
388 (for_loop_restoration && padded_right_border_size < 64)
389 ? 63 - padded_right_border_size
390 : 0;
391 assert(!for_loop_restoration || left_border_overread == 0 ||
392 (frame_buffer_.bottom_border(plane) > 0 &&
393 left_border_overread <= frame_buffer_.left_border(plane)));
394 const int left_border = (for_loop_restoration && left_border_overread == 0)
395 ? kRestorationHorizontalBorder
396 : frame_buffer_.left_border(plane);
397 // The optimized loop restoration code will overread the visible frame
398 // buffer into the right border. Extend the right boundary further to
399 // prevent msan warnings.
400 const int right_border = for_loop_restoration
401 ? std::min(padded_right_border_size, 63)
402 : frame_buffer_.right_border(plane);
403 #else
404 const int left_border = for_loop_restoration
405 ? kRestorationHorizontalBorder
406 : frame_buffer_.left_border(plane);
407 const int right_border = for_loop_restoration
408 ? kRestorationHorizontalBorder
409 : frame_buffer_.right_border(plane);
410 #endif
411 const int top_border =
412 (row == 0) ? (for_loop_restoration ? kRestorationVerticalBorder
413 : frame_buffer_.top_border(plane))
414 : 0;
415 const int bottom_border =
416 copy_bottom
417 ? (for_loop_restoration ? kRestorationVerticalBorder
418 : frame_buffer_.bottom_border(plane))
419 : 0;
420 ExtendFrameBoundary(start, plane_width, num_rows, stride, left_border,
421 right_border, top_border, bottom_border);
422 } while (++plane < planes_);
423 }
424
SetupLoopRestorationBorder(const int row4x4)425 void PostFilter::SetupLoopRestorationBorder(const int row4x4) {
426 assert(row4x4 >= 0);
427 assert(!DoCdef());
428 assert(DoRestoration());
429 const int upscaled_width = frame_header_.upscaled_width;
430 const int height = frame_header_.height;
431 int plane = kPlaneY;
432 do {
433 if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
434 continue;
435 }
436 const int row_offset = DivideBy4(row4x4);
437 const int num_pixels =
438 SubsampledValue(upscaled_width, subsampling_x_[plane]);
439 const int row_width = num_pixels << pixel_size_log2_;
440 const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
441 const int row = kLoopRestorationBorderRows[subsampling_y_[plane]];
442 const int absolute_row =
443 (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row;
444 const ptrdiff_t src_stride = frame_buffer_.stride(plane);
445 const uint8_t* src =
446 GetSuperResBuffer(static_cast<Plane>(plane), row4x4, 0) +
447 row * src_stride;
448 const ptrdiff_t dst_stride = loop_restoration_border_.stride(plane);
449 uint8_t* dst =
450 loop_restoration_border_.data(plane) + row_offset * dst_stride;
451 for (int i = 0; i < 4; ++i) {
452 memcpy(dst, src, row_width);
453 #if LIBGAV1_MAX_BITDEPTH >= 10
454 if (bitdepth_ >= 10) {
455 ExtendLine<uint16_t>(dst, num_pixels, kRestorationHorizontalBorder,
456 kRestorationHorizontalBorder);
457 } else // NOLINT.
458 #endif
459 ExtendLine<uint8_t>(dst, num_pixels, kRestorationHorizontalBorder,
460 kRestorationHorizontalBorder);
461 // If we run out of rows, copy the last valid row (mimics the bottom
462 // border extension).
463 if (absolute_row + i < plane_height - 1) src += src_stride;
464 dst += dst_stride;
465 }
466 } while (++plane < planes_);
467 }
468
SetupLoopRestorationBorder(int row4x4_start,int sb4x4)469 void PostFilter::SetupLoopRestorationBorder(int row4x4_start, int sb4x4) {
470 assert(row4x4_start >= 0);
471 assert(DoCdef());
472 assert(DoRestoration());
473 for (int sb_y = 0; sb_y < sb4x4; sb_y += 16) {
474 const int row4x4 = row4x4_start + sb_y;
475 const int row_offset_start = DivideBy4(row4x4);
476 const std::array<uint8_t*, kMaxPlanes> dst = {
477 loop_restoration_border_.data(kPlaneY) +
478 row_offset_start * static_cast<ptrdiff_t>(
479 loop_restoration_border_.stride(kPlaneY)),
480 loop_restoration_border_.data(kPlaneU) +
481 row_offset_start * static_cast<ptrdiff_t>(
482 loop_restoration_border_.stride(kPlaneU)),
483 loop_restoration_border_.data(kPlaneV) +
484 row_offset_start * static_cast<ptrdiff_t>(
485 loop_restoration_border_.stride(kPlaneV))};
486 // If SuperRes is enabled, then we apply SuperRes for the rows to be copied
487 // directly with |loop_restoration_border_| as the destination. Otherwise,
488 // we simply copy the rows.
489 if (DoSuperRes()) {
490 std::array<uint8_t*, kMaxPlanes> src;
491 std::array<int, kMaxPlanes> rows;
492 const int height = frame_header_.height;
493 int plane = kPlaneY;
494 do {
495 if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
496 rows[plane] = 0;
497 continue;
498 }
499 const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
500 const int row = kLoopRestorationBorderRows[subsampling_y_[plane]];
501 const int absolute_row =
502 (MultiplyBy4(row4x4) >> subsampling_y_[plane]) + row;
503 src[plane] = GetSourceBuffer(static_cast<Plane>(plane), row4x4, 0) +
504 row * static_cast<ptrdiff_t>(frame_buffer_.stride(plane));
505 rows[plane] = Clip3(plane_height - absolute_row, 0, 4);
506 } while (++plane < planes_);
507 ApplySuperRes(src, rows, /*line_buffer_row=*/-1, dst,
508 /*dst_is_loop_restoration_border=*/true);
509 // If we run out of rows, copy the last valid row (mimics the bottom
510 // border extension).
511 plane = kPlaneY;
512 do {
513 if (rows[plane] == 0 || rows[plane] >= 4) continue;
514 const ptrdiff_t stride = loop_restoration_border_.stride(plane);
515 uint8_t* dst_line = dst[plane] + rows[plane] * stride;
516 const uint8_t* const src_line = dst_line - stride;
517 const int upscaled_width = super_res_info_[plane].upscaled_width
518 << pixel_size_log2_;
519 for (int i = rows[plane]; i < 4; ++i) {
520 memcpy(dst_line, src_line, upscaled_width);
521 dst_line += stride;
522 }
523 } while (++plane < planes_);
524 } else {
525 int plane = kPlaneY;
526 do {
527 CopyDeblockedPixels(static_cast<Plane>(plane), row4x4);
528 } while (++plane < planes_);
529 }
530 // Extend the left and right boundaries needed for loop restoration.
531 const int upscaled_width = frame_header_.upscaled_width;
532 int plane = kPlaneY;
533 do {
534 if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
535 continue;
536 }
537 uint8_t* dst_line = dst[plane];
538 const int plane_width =
539 SubsampledValue(upscaled_width, subsampling_x_[plane]);
540 for (int i = 0; i < 4; ++i) {
541 #if LIBGAV1_MAX_BITDEPTH >= 10
542 if (bitdepth_ >= 10) {
543 ExtendLine<uint16_t>(dst_line, plane_width,
544 kRestorationHorizontalBorder,
545 kRestorationHorizontalBorder);
546 } else // NOLINT.
547 #endif
548 {
549 ExtendLine<uint8_t>(dst_line, plane_width,
550 kRestorationHorizontalBorder,
551 kRestorationHorizontalBorder);
552 }
553 dst_line += loop_restoration_border_.stride(plane);
554 }
555 } while (++plane < planes_);
556 }
557 }
558
RunJobs(WorkerFunction worker)559 void PostFilter::RunJobs(WorkerFunction worker) {
560 std::atomic<int> row4x4(0);
561 const int num_workers = thread_pool_->num_threads();
562 BlockingCounter pending_workers(num_workers);
563 for (int i = 0; i < num_workers; ++i) {
564 thread_pool_->Schedule([this, &row4x4, &pending_workers, worker]() {
565 (this->*worker)(&row4x4);
566 pending_workers.Decrement();
567 });
568 }
569 // Run the jobs on the current thread.
570 (this->*worker)(&row4x4);
571 // Wait for the threadpool jobs to finish.
572 pending_workers.Wait();
573 }
574
ApplyFilteringThreaded()575 void PostFilter::ApplyFilteringThreaded() {
576 if (DoDeblock()) {
577 RunJobs(&PostFilter::DeblockFilterWorker<kLoopFilterTypeVertical>);
578 RunJobs(&PostFilter::DeblockFilterWorker<kLoopFilterTypeHorizontal>);
579 }
580 if (DoCdef() && DoRestoration()) {
581 for (int row4x4 = 0; row4x4 < frame_header_.rows4x4;
582 row4x4 += kNum4x4InLoopFilterUnit) {
583 SetupLoopRestorationBorder(row4x4, kNum4x4InLoopFilterUnit);
584 }
585 }
586 if (DoCdef()) {
587 for (int row4x4 = 0; row4x4 < frame_header_.rows4x4;
588 row4x4 += kNum4x4InLoopFilterUnit) {
589 SetupCdefBorder(row4x4);
590 }
591 RunJobs(&PostFilter::ApplyCdefWorker);
592 }
593 if (DoSuperRes()) ApplySuperResThreaded();
594 if (DoRestoration()) {
595 if (!DoCdef()) {
596 int row4x4 = 0;
597 do {
598 SetupLoopRestorationBorder(row4x4);
599 row4x4 += kNum4x4InLoopFilterUnit;
600 } while (row4x4 < frame_header_.rows4x4);
601 }
602 RunJobs(&PostFilter::ApplyLoopRestorationWorker);
603 }
604 ExtendBordersForReferenceFrame();
605 }
606
ApplyFilteringForOneSuperBlockRow(int row4x4,int sb4x4,bool is_last_row,bool do_deblock)607 int PostFilter::ApplyFilteringForOneSuperBlockRow(int row4x4, int sb4x4,
608 bool is_last_row,
609 bool do_deblock) {
610 if (row4x4 < 0) return -1;
611 if (DoDeblock() && do_deblock) {
612 VerticalDeblockFilter(row4x4, row4x4 + sb4x4, 0, frame_header_.columns4x4);
613 HorizontalDeblockFilter(row4x4, row4x4 + sb4x4, 0,
614 frame_header_.columns4x4);
615 }
616 if (DoRestoration() && DoCdef()) {
617 SetupLoopRestorationBorder(row4x4, sb4x4);
618 }
619 if (DoCdef()) {
620 ApplyCdefForOneSuperBlockRow(row4x4, sb4x4, is_last_row);
621 }
622 if (DoSuperRes()) {
623 ApplySuperResForOneSuperBlockRow(row4x4, sb4x4, is_last_row);
624 }
625 if (DoRestoration()) {
626 CopyBordersForOneSuperBlockRow(row4x4, sb4x4, true);
627 ApplyLoopRestoration(row4x4, sb4x4);
628 if (is_last_row) {
629 // Loop restoration operates with a lag of 8 rows. So make sure to cover
630 // all the rows of the last superblock row.
631 CopyBordersForOneSuperBlockRow(row4x4 + sb4x4, 16, true);
632 ApplyLoopRestoration(row4x4 + sb4x4, 16);
633 }
634 }
635 if (frame_header_.refresh_frame_flags != 0 && DoBorderExtensionInLoop()) {
636 CopyBordersForOneSuperBlockRow(row4x4, sb4x4, false);
637 if (is_last_row) {
638 CopyBordersForOneSuperBlockRow(row4x4 + sb4x4, 16, false);
639 }
640 }
641 if (is_last_row && !DoBorderExtensionInLoop()) {
642 ExtendBordersForReferenceFrame();
643 }
644 return is_last_row ? frame_header_.height : progress_row_;
645 }
646
647 } // namespace libgav1
648