1 // Copyright 2020 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 #include "src/post_filter.h"
15 #include "src/utils/blocking_counter.h"
16
17 namespace libgav1 {
18
19 template <typename Pixel>
ApplyLoopRestorationForOneRow(const Pixel * src_buffer,const ptrdiff_t stride,const Plane plane,const int plane_height,const int plane_width,const int unit_y,const int unit_row,const int current_process_unit_height,const int plane_unit_size,Pixel * dst_buffer)20 void PostFilter::ApplyLoopRestorationForOneRow(
21 const Pixel* src_buffer, const ptrdiff_t stride, const Plane plane,
22 const int plane_height, const int plane_width, const int unit_y,
23 const int unit_row, const int current_process_unit_height,
24 const int plane_unit_size, Pixel* dst_buffer) {
25 const int num_horizontal_units =
26 restoration_info_->num_horizontal_units(static_cast<Plane>(plane));
27 const RestorationUnitInfo* const restoration_info =
28 restoration_info_->loop_restoration_info(static_cast<Plane>(plane),
29 unit_row * num_horizontal_units);
30 const bool in_place = DoCdef() || thread_pool_ != nullptr;
31 const Pixel* border = nullptr;
32 ptrdiff_t border_stride = 0;
33 src_buffer += unit_y * stride;
34 if (in_place) {
35 const int border_unit_y = std::max(
36 RightShiftWithCeiling(unit_y, 4 - subsampling_y_[plane]) - 4, 0);
37 border_stride = loop_restoration_border_.stride(plane) / sizeof(Pixel);
38 border =
39 reinterpret_cast<const Pixel*>(loop_restoration_border_.data(plane)) +
40 border_unit_y * border_stride;
41 }
42 int unit_column = 0;
43 int column = 0;
44 do {
45 const int current_process_unit_width =
46 std::min(plane_unit_size, plane_width - column);
47 const Pixel* src = src_buffer + column;
48 unit_column = std::min(unit_column, num_horizontal_units - 1);
49 if (restoration_info[unit_column].type == kLoopRestorationTypeNone) {
50 Pixel* dst = dst_buffer + column;
51 if (in_place) {
52 int k = current_process_unit_height;
53 do {
54 memmove(dst, src, current_process_unit_width * sizeof(Pixel));
55 src += stride;
56 dst += stride;
57 } while (--k != 0);
58 } else {
59 CopyPlane(src, stride, current_process_unit_width,
60 current_process_unit_height, dst, stride);
61 }
62 } else {
63 const Pixel* top_border = src - kRestorationVerticalBorder * stride;
64 ptrdiff_t top_border_stride = stride;
65 const Pixel* bottom_border = src + current_process_unit_height * stride;
66 ptrdiff_t bottom_border_stride = stride;
67 const bool frame_bottom_border =
68 (unit_y + current_process_unit_height >= plane_height);
69 if (in_place && (unit_y != 0 || !frame_bottom_border)) {
70 const Pixel* loop_restoration_border = border + column;
71 if (unit_y != 0) {
72 top_border = loop_restoration_border;
73 top_border_stride = border_stride;
74 loop_restoration_border += 4 * border_stride;
75 }
76 if (!frame_bottom_border) {
77 bottom_border = loop_restoration_border +
78 kRestorationVerticalBorder * border_stride;
79 bottom_border_stride = border_stride;
80 }
81 }
82 #if LIBGAV1_MSAN
83 // The optimized loop filter may read past initialized values within the
84 // buffer.
85 RestorationBuffer restoration_buffer = {};
86 #else
87 RestorationBuffer restoration_buffer;
88 #endif
89 const LoopRestorationType type = restoration_info[unit_column].type;
90 assert(type == kLoopRestorationTypeSgrProj ||
91 type == kLoopRestorationTypeWiener);
92 const dsp::LoopRestorationFunc restoration_func =
93 dsp_.loop_restorations[type - 2];
94 restoration_func(restoration_info[unit_column], src, stride, top_border,
95 top_border_stride, bottom_border, bottom_border_stride,
96 current_process_unit_width, current_process_unit_height,
97 &restoration_buffer, dst_buffer + column);
98 }
99 ++unit_column;
100 column += plane_unit_size;
101 } while (column < plane_width);
102 }
103
104 template <typename Pixel>
ApplyLoopRestorationForOneSuperBlockRow(const int row4x4_start,const int sb4x4)105 void PostFilter::ApplyLoopRestorationForOneSuperBlockRow(const int row4x4_start,
106 const int sb4x4) {
107 assert(row4x4_start >= 0);
108 assert(DoRestoration());
109 int plane = kPlaneY;
110 const int upscaled_width = frame_header_.upscaled_width;
111 const int height = frame_header_.height;
112 do {
113 if (loop_restoration_.type[plane] == kLoopRestorationTypeNone) {
114 continue;
115 }
116 const ptrdiff_t stride = frame_buffer_.stride(plane) / sizeof(Pixel);
117 const int unit_height_offset =
118 kRestorationUnitOffset >> subsampling_y_[plane];
119 const int plane_height = SubsampledValue(height, subsampling_y_[plane]);
120 const int plane_width =
121 SubsampledValue(upscaled_width, subsampling_x_[plane]);
122 const int plane_unit_size = 1 << loop_restoration_.unit_size_log2[plane];
123 const int plane_process_unit_height =
124 kRestorationUnitHeight >> subsampling_y_[plane];
125 int y = (row4x4_start == 0)
126 ? 0
127 : (MultiplyBy4(row4x4_start) >> subsampling_y_[plane]) -
128 unit_height_offset;
129 int expected_height = plane_process_unit_height -
130 ((row4x4_start == 0) ? unit_height_offset : 0);
131 int current_process_unit_height;
132 for (int sb_y = 0; sb_y < sb4x4;
133 sb_y += 16, y += current_process_unit_height) {
134 if (y >= plane_height) break;
135 const int unit_row = std::min(
136 (y + unit_height_offset) >> loop_restoration_.unit_size_log2[plane],
137 restoration_info_->num_vertical_units(static_cast<Plane>(plane)) - 1);
138 current_process_unit_height = std::min(expected_height, plane_height - y);
139 expected_height = plane_process_unit_height;
140 ApplyLoopRestorationForOneRow<Pixel>(
141 reinterpret_cast<Pixel*>(superres_buffer_[plane]), stride,
142 static_cast<Plane>(plane), plane_height, plane_width, y, unit_row,
143 current_process_unit_height, plane_unit_size,
144 reinterpret_cast<Pixel*>(loop_restoration_buffer_[plane]) +
145 y * stride);
146 }
147 } while (++plane < planes_);
148 }
149
ApplyLoopRestoration(const int row4x4_start,const int sb4x4)150 void PostFilter::ApplyLoopRestoration(const int row4x4_start, const int sb4x4) {
151 #if LIBGAV1_MAX_BITDEPTH >= 10
152 if (bitdepth_ >= 10) {
153 ApplyLoopRestorationForOneSuperBlockRow<uint16_t>(row4x4_start, sb4x4);
154 return;
155 }
156 #endif
157 ApplyLoopRestorationForOneSuperBlockRow<uint8_t>(row4x4_start, sb4x4);
158 }
159
ApplyLoopRestorationWorker(std::atomic<int> * row4x4_atomic)160 void PostFilter::ApplyLoopRestorationWorker(std::atomic<int>* row4x4_atomic) {
161 int row4x4;
162 // Loop Restoration operates with a lag of 8 rows (4 for chroma with
163 // subsampling) and hence we need to make sure to cover the last 8 rows of the
164 // last superblock row. So we run this loop for an extra iteration to
165 // accomplish that.
166 const int row4x4_end = frame_header_.rows4x4 + kNum4x4InLoopRestorationUnit;
167 while ((row4x4 = row4x4_atomic->fetch_add(kNum4x4InLoopRestorationUnit,
168 std::memory_order_relaxed)) <
169 row4x4_end) {
170 CopyBordersForOneSuperBlockRow(row4x4, kNum4x4InLoopRestorationUnit,
171 /*for_loop_restoration=*/true);
172 #if LIBGAV1_MAX_BITDEPTH >= 10
173 if (bitdepth_ >= 10) {
174 ApplyLoopRestorationForOneSuperBlockRow<uint16_t>(
175 row4x4, kNum4x4InLoopRestorationUnit);
176 continue;
177 }
178 #endif
179 ApplyLoopRestorationForOneSuperBlockRow<uint8_t>(
180 row4x4, kNum4x4InLoopRestorationUnit);
181 }
182 }
183
184 } // namespace libgav1
185