1*09537850SAkhilesh Sanikop // Copyright 2019 The libgav1 Authors
2*09537850SAkhilesh Sanikop //
3*09537850SAkhilesh Sanikop // Licensed under the Apache License, Version 2.0 (the "License");
4*09537850SAkhilesh Sanikop // you may not use this file except in compliance with the License.
5*09537850SAkhilesh Sanikop // You may obtain a copy of the License at
6*09537850SAkhilesh Sanikop //
7*09537850SAkhilesh Sanikop // http://www.apache.org/licenses/LICENSE-2.0
8*09537850SAkhilesh Sanikop //
9*09537850SAkhilesh Sanikop // Unless required by applicable law or agreed to in writing, software
10*09537850SAkhilesh Sanikop // distributed under the License is distributed on an "AS IS" BASIS,
11*09537850SAkhilesh Sanikop // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*09537850SAkhilesh Sanikop // See the License for the specific language governing permissions and
13*09537850SAkhilesh Sanikop // limitations under the License.
14*09537850SAkhilesh Sanikop
15*09537850SAkhilesh Sanikop #include "src/dsp/convolve.h"
16*09537850SAkhilesh Sanikop
17*09537850SAkhilesh Sanikop #include <cassert>
18*09537850SAkhilesh Sanikop #include <cstddef>
19*09537850SAkhilesh Sanikop #include <cstdint>
20*09537850SAkhilesh Sanikop #include <cstdlib>
21*09537850SAkhilesh Sanikop #include <cstring>
22*09537850SAkhilesh Sanikop
23*09537850SAkhilesh Sanikop #include "src/dsp/constants.h"
24*09537850SAkhilesh Sanikop #include "src/dsp/dsp.h"
25*09537850SAkhilesh Sanikop #include "src/utils/common.h"
26*09537850SAkhilesh Sanikop #include "src/utils/constants.h"
27*09537850SAkhilesh Sanikop
28*09537850SAkhilesh Sanikop namespace libgav1 {
29*09537850SAkhilesh Sanikop namespace dsp {
30*09537850SAkhilesh Sanikop namespace {
31*09537850SAkhilesh Sanikop
32*09537850SAkhilesh Sanikop constexpr int kHorizontalOffset = 3;
33*09537850SAkhilesh Sanikop constexpr int kVerticalOffset = 3;
34*09537850SAkhilesh Sanikop
35*09537850SAkhilesh Sanikop // Compound prediction output ranges from ConvolveTest.ShowRange.
36*09537850SAkhilesh Sanikop // In some cases, the horizontal or vertical filter will be omitted. This table
37*09537850SAkhilesh Sanikop // shows the general case, where the downscaled horizontal output is input to
38*09537850SAkhilesh Sanikop // the vertical filter via the |intermediate_result| array. The final output is
39*09537850SAkhilesh Sanikop // either Pixel or compound values, depending on the |is_compound| variable.
40*09537850SAkhilesh Sanikop // Bitdepth: 8 Input range: [ 0, 255]
41*09537850SAkhilesh Sanikop // Horizontal upscaled range: [ -7140, 23460]
42*09537850SAkhilesh Sanikop // Horizontal downscaled range: [ -1785, 5865]
43*09537850SAkhilesh Sanikop // Vertical upscaled range: [ -328440, 589560]
44*09537850SAkhilesh Sanikop // Pixel output range: [ 0, 255]
45*09537850SAkhilesh Sanikop // Compound output range: [ -5132, 9212]
46*09537850SAkhilesh Sanikop //
47*09537850SAkhilesh Sanikop // Bitdepth: 10 Input range: [ 0, 1023]
48*09537850SAkhilesh Sanikop // Horizontal upscaled range: [ -28644, 94116]
49*09537850SAkhilesh Sanikop // Horizontal downscaled range: [ -7161, 23529]
50*09537850SAkhilesh Sanikop // Vertical upscaled range: [-1317624, 2365176]
51*09537850SAkhilesh Sanikop // Pixel output range: [ 0, 1023]
52*09537850SAkhilesh Sanikop // Compound output range: [ 3988, 61532]
53*09537850SAkhilesh Sanikop //
54*09537850SAkhilesh Sanikop // Bitdepth: 12 Input range: [ 0, 4095]
55*09537850SAkhilesh Sanikop // Horizontal upscaled range: [ -114660, 376740]
56*09537850SAkhilesh Sanikop // Horizontal downscaled range: [ -7166, 23546]
57*09537850SAkhilesh Sanikop // Vertical upscaled range: [-1318560, 2366880]
58*09537850SAkhilesh Sanikop // Pixel output range: [ 0, 4095]
59*09537850SAkhilesh Sanikop // Compound output range: [ 3974, 61559]
60*09537850SAkhilesh Sanikop
61*09537850SAkhilesh Sanikop template <int bitdepth, typename Pixel>
ConvolveScale2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int step_x,const int step_y,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)62*09537850SAkhilesh Sanikop void ConvolveScale2D_C(const void* LIBGAV1_RESTRICT const reference,
63*09537850SAkhilesh Sanikop const ptrdiff_t reference_stride,
64*09537850SAkhilesh Sanikop const int horizontal_filter_index,
65*09537850SAkhilesh Sanikop const int vertical_filter_index, const int subpixel_x,
66*09537850SAkhilesh Sanikop const int subpixel_y, const int step_x, const int step_y,
67*09537850SAkhilesh Sanikop const int width, const int height,
68*09537850SAkhilesh Sanikop void* LIBGAV1_RESTRICT prediction,
69*09537850SAkhilesh Sanikop const ptrdiff_t pred_stride) {
70*09537850SAkhilesh Sanikop constexpr int kRoundBitsHorizontal = (bitdepth == 12)
71*09537850SAkhilesh Sanikop ? kInterRoundBitsHorizontal12bpp
72*09537850SAkhilesh Sanikop : kInterRoundBitsHorizontal;
73*09537850SAkhilesh Sanikop constexpr int kRoundBitsVertical =
74*09537850SAkhilesh Sanikop (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
75*09537850SAkhilesh Sanikop const int intermediate_height =
76*09537850SAkhilesh Sanikop (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
77*09537850SAkhilesh Sanikop kScaleSubPixelBits) +
78*09537850SAkhilesh Sanikop kSubPixelTaps;
79*09537850SAkhilesh Sanikop // The output of the horizontal filter, i.e. the intermediate_result, is
80*09537850SAkhilesh Sanikop // guaranteed to fit in int16_t.
81*09537850SAkhilesh Sanikop int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
82*09537850SAkhilesh Sanikop (2 * kMaxSuperBlockSizeInPixels + 8)];
83*09537850SAkhilesh Sanikop const int intermediate_stride = kMaxSuperBlockSizeInPixels;
84*09537850SAkhilesh Sanikop const int max_pixel_value = (1 << bitdepth) - 1;
85*09537850SAkhilesh Sanikop
86*09537850SAkhilesh Sanikop // Horizontal filter.
87*09537850SAkhilesh Sanikop // Filter types used for width <= 4 are different from those for width > 4.
88*09537850SAkhilesh Sanikop // When width > 4, the valid filter index range is always [0, 3].
89*09537850SAkhilesh Sanikop // When width <= 4, the valid filter index range is always [4, 5].
90*09537850SAkhilesh Sanikop // Similarly for height.
91*09537850SAkhilesh Sanikop int filter_index = GetFilterIndex(horizontal_filter_index, width);
92*09537850SAkhilesh Sanikop int16_t* intermediate = intermediate_result;
93*09537850SAkhilesh Sanikop const auto* src = static_cast<const Pixel*>(reference);
94*09537850SAkhilesh Sanikop const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
95*09537850SAkhilesh Sanikop auto* dest = static_cast<Pixel*>(prediction);
96*09537850SAkhilesh Sanikop const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
97*09537850SAkhilesh Sanikop const int ref_x = subpixel_x >> kScaleSubPixelBits;
98*09537850SAkhilesh Sanikop // Note: assume the input src is already aligned to the correct start
99*09537850SAkhilesh Sanikop // position.
100*09537850SAkhilesh Sanikop int y = 0;
101*09537850SAkhilesh Sanikop do {
102*09537850SAkhilesh Sanikop int p = subpixel_x;
103*09537850SAkhilesh Sanikop int x = 0;
104*09537850SAkhilesh Sanikop do {
105*09537850SAkhilesh Sanikop int sum = 0;
106*09537850SAkhilesh Sanikop const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
107*09537850SAkhilesh Sanikop const int filter_id = (p >> 6) & kSubPixelMask;
108*09537850SAkhilesh Sanikop for (int k = 0; k < kSubPixelTaps; ++k) {
109*09537850SAkhilesh Sanikop sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
110*09537850SAkhilesh Sanikop }
111*09537850SAkhilesh Sanikop intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
112*09537850SAkhilesh Sanikop p += step_x;
113*09537850SAkhilesh Sanikop } while (++x < width);
114*09537850SAkhilesh Sanikop
115*09537850SAkhilesh Sanikop src += src_stride;
116*09537850SAkhilesh Sanikop intermediate += intermediate_stride;
117*09537850SAkhilesh Sanikop } while (++y < intermediate_height);
118*09537850SAkhilesh Sanikop
119*09537850SAkhilesh Sanikop // Vertical filter.
120*09537850SAkhilesh Sanikop filter_index = GetFilterIndex(vertical_filter_index, height);
121*09537850SAkhilesh Sanikop intermediate = intermediate_result;
122*09537850SAkhilesh Sanikop int p = subpixel_y & 1023;
123*09537850SAkhilesh Sanikop y = 0;
124*09537850SAkhilesh Sanikop do {
125*09537850SAkhilesh Sanikop const int filter_id = (p >> 6) & kSubPixelMask;
126*09537850SAkhilesh Sanikop int x = 0;
127*09537850SAkhilesh Sanikop do {
128*09537850SAkhilesh Sanikop int sum = 0;
129*09537850SAkhilesh Sanikop for (int k = 0; k < kSubPixelTaps; ++k) {
130*09537850SAkhilesh Sanikop sum +=
131*09537850SAkhilesh Sanikop kHalfSubPixelFilters[filter_index][filter_id][k] *
132*09537850SAkhilesh Sanikop intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
133*09537850SAkhilesh Sanikop x];
134*09537850SAkhilesh Sanikop }
135*09537850SAkhilesh Sanikop dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
136*09537850SAkhilesh Sanikop max_pixel_value);
137*09537850SAkhilesh Sanikop } while (++x < width);
138*09537850SAkhilesh Sanikop
139*09537850SAkhilesh Sanikop dest += dest_stride;
140*09537850SAkhilesh Sanikop p += step_y;
141*09537850SAkhilesh Sanikop } while (++y < height);
142*09537850SAkhilesh Sanikop }
143*09537850SAkhilesh Sanikop
144*09537850SAkhilesh Sanikop template <int bitdepth, typename Pixel>
ConvolveCompoundScale2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int step_x,const int step_y,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)145*09537850SAkhilesh Sanikop void ConvolveCompoundScale2D_C(
146*09537850SAkhilesh Sanikop const void* LIBGAV1_RESTRICT const reference,
147*09537850SAkhilesh Sanikop const ptrdiff_t reference_stride, const int horizontal_filter_index,
148*09537850SAkhilesh Sanikop const int vertical_filter_index, const int subpixel_x, const int subpixel_y,
149*09537850SAkhilesh Sanikop const int step_x, const int step_y, const int width, const int height,
150*09537850SAkhilesh Sanikop void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
151*09537850SAkhilesh Sanikop // All compound functions output to the predictor buffer with |pred_stride|
152*09537850SAkhilesh Sanikop // equal to |width|.
153*09537850SAkhilesh Sanikop assert(pred_stride == width);
154*09537850SAkhilesh Sanikop // Compound functions start at 4x4.
155*09537850SAkhilesh Sanikop assert(width >= 4 && height >= 4);
156*09537850SAkhilesh Sanikop constexpr int kRoundBitsHorizontal = (bitdepth == 12)
157*09537850SAkhilesh Sanikop ? kInterRoundBitsHorizontal12bpp
158*09537850SAkhilesh Sanikop : kInterRoundBitsHorizontal;
159*09537850SAkhilesh Sanikop constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
160*09537850SAkhilesh Sanikop const int intermediate_height =
161*09537850SAkhilesh Sanikop (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
162*09537850SAkhilesh Sanikop kScaleSubPixelBits) +
163*09537850SAkhilesh Sanikop kSubPixelTaps;
164*09537850SAkhilesh Sanikop // The output of the horizontal filter, i.e. the intermediate_result, is
165*09537850SAkhilesh Sanikop // guaranteed to fit in int16_t.
166*09537850SAkhilesh Sanikop int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
167*09537850SAkhilesh Sanikop (2 * kMaxSuperBlockSizeInPixels + 8)];
168*09537850SAkhilesh Sanikop const int intermediate_stride = kMaxSuperBlockSizeInPixels;
169*09537850SAkhilesh Sanikop
170*09537850SAkhilesh Sanikop // Horizontal filter.
171*09537850SAkhilesh Sanikop // Filter types used for width <= 4 are different from those for width > 4.
172*09537850SAkhilesh Sanikop // When width > 4, the valid filter index range is always [0, 3].
173*09537850SAkhilesh Sanikop // When width <= 4, the valid filter index range is always [4, 5].
174*09537850SAkhilesh Sanikop // Similarly for height.
175*09537850SAkhilesh Sanikop int filter_index = GetFilterIndex(horizontal_filter_index, width);
176*09537850SAkhilesh Sanikop int16_t* intermediate = intermediate_result;
177*09537850SAkhilesh Sanikop const auto* src = static_cast<const Pixel*>(reference);
178*09537850SAkhilesh Sanikop const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
179*09537850SAkhilesh Sanikop auto* dest = static_cast<uint16_t*>(prediction);
180*09537850SAkhilesh Sanikop const int ref_x = subpixel_x >> kScaleSubPixelBits;
181*09537850SAkhilesh Sanikop // Note: assume the input src is already aligned to the correct start
182*09537850SAkhilesh Sanikop // position.
183*09537850SAkhilesh Sanikop int y = 0;
184*09537850SAkhilesh Sanikop do {
185*09537850SAkhilesh Sanikop int p = subpixel_x;
186*09537850SAkhilesh Sanikop int x = 0;
187*09537850SAkhilesh Sanikop do {
188*09537850SAkhilesh Sanikop int sum = 0;
189*09537850SAkhilesh Sanikop const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
190*09537850SAkhilesh Sanikop const int filter_id = (p >> 6) & kSubPixelMask;
191*09537850SAkhilesh Sanikop for (int k = 0; k < kSubPixelTaps; ++k) {
192*09537850SAkhilesh Sanikop sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
193*09537850SAkhilesh Sanikop }
194*09537850SAkhilesh Sanikop intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
195*09537850SAkhilesh Sanikop p += step_x;
196*09537850SAkhilesh Sanikop } while (++x < width);
197*09537850SAkhilesh Sanikop
198*09537850SAkhilesh Sanikop src += src_stride;
199*09537850SAkhilesh Sanikop intermediate += intermediate_stride;
200*09537850SAkhilesh Sanikop } while (++y < intermediate_height);
201*09537850SAkhilesh Sanikop
202*09537850SAkhilesh Sanikop // Vertical filter.
203*09537850SAkhilesh Sanikop filter_index = GetFilterIndex(vertical_filter_index, height);
204*09537850SAkhilesh Sanikop intermediate = intermediate_result;
205*09537850SAkhilesh Sanikop int p = subpixel_y & 1023;
206*09537850SAkhilesh Sanikop y = 0;
207*09537850SAkhilesh Sanikop do {
208*09537850SAkhilesh Sanikop const int filter_id = (p >> 6) & kSubPixelMask;
209*09537850SAkhilesh Sanikop int x = 0;
210*09537850SAkhilesh Sanikop do {
211*09537850SAkhilesh Sanikop int sum = 0;
212*09537850SAkhilesh Sanikop for (int k = 0; k < kSubPixelTaps; ++k) {
213*09537850SAkhilesh Sanikop sum +=
214*09537850SAkhilesh Sanikop kHalfSubPixelFilters[filter_index][filter_id][k] *
215*09537850SAkhilesh Sanikop intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
216*09537850SAkhilesh Sanikop x];
217*09537850SAkhilesh Sanikop }
218*09537850SAkhilesh Sanikop sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
219*09537850SAkhilesh Sanikop sum += (bitdepth == 8) ? 0 : kCompoundOffset;
220*09537850SAkhilesh Sanikop dest[x] = sum;
221*09537850SAkhilesh Sanikop } while (++x < width);
222*09537850SAkhilesh Sanikop
223*09537850SAkhilesh Sanikop dest += pred_stride;
224*09537850SAkhilesh Sanikop p += step_y;
225*09537850SAkhilesh Sanikop } while (++y < height);
226*09537850SAkhilesh Sanikop }
227*09537850SAkhilesh Sanikop
228*09537850SAkhilesh Sanikop template <int bitdepth, typename Pixel>
ConvolveCompound2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int horizontal_filter_id,const int vertical_filter_id,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)229*09537850SAkhilesh Sanikop void ConvolveCompound2D_C(const void* LIBGAV1_RESTRICT const reference,
230*09537850SAkhilesh Sanikop const ptrdiff_t reference_stride,
231*09537850SAkhilesh Sanikop const int horizontal_filter_index,
232*09537850SAkhilesh Sanikop const int vertical_filter_index,
233*09537850SAkhilesh Sanikop const int horizontal_filter_id,
234*09537850SAkhilesh Sanikop const int vertical_filter_id, const int width,
235*09537850SAkhilesh Sanikop const int height, void* LIBGAV1_RESTRICT prediction,
236*09537850SAkhilesh Sanikop const ptrdiff_t pred_stride) {
237*09537850SAkhilesh Sanikop // All compound functions output to the predictor buffer with |pred_stride|
238*09537850SAkhilesh Sanikop // equal to |width|.
239*09537850SAkhilesh Sanikop assert(pred_stride == width);
240*09537850SAkhilesh Sanikop // Compound functions start at 4x4.
241*09537850SAkhilesh Sanikop assert(width >= 4 && height >= 4);
242*09537850SAkhilesh Sanikop constexpr int kRoundBitsHorizontal = (bitdepth == 12)
243*09537850SAkhilesh Sanikop ? kInterRoundBitsHorizontal12bpp
244*09537850SAkhilesh Sanikop : kInterRoundBitsHorizontal;
245*09537850SAkhilesh Sanikop constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
246*09537850SAkhilesh Sanikop const int intermediate_height = height + kSubPixelTaps - 1;
247*09537850SAkhilesh Sanikop // The output of the horizontal filter, i.e. the intermediate_result, is
248*09537850SAkhilesh Sanikop // guaranteed to fit in int16_t.
249*09537850SAkhilesh Sanikop int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
250*09537850SAkhilesh Sanikop (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
251*09537850SAkhilesh Sanikop const int intermediate_stride = kMaxSuperBlockSizeInPixels;
252*09537850SAkhilesh Sanikop
253*09537850SAkhilesh Sanikop // Horizontal filter.
254*09537850SAkhilesh Sanikop // Filter types used for width <= 4 are different from those for width > 4.
255*09537850SAkhilesh Sanikop // When width > 4, the valid filter index range is always [0, 3].
256*09537850SAkhilesh Sanikop // When width <= 4, the valid filter index range is always [4, 5].
257*09537850SAkhilesh Sanikop // Similarly for height.
258*09537850SAkhilesh Sanikop int filter_index = GetFilterIndex(horizontal_filter_index, width);
259*09537850SAkhilesh Sanikop int16_t* intermediate = intermediate_result;
260*09537850SAkhilesh Sanikop const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
261*09537850SAkhilesh Sanikop const auto* src = static_cast<const Pixel*>(reference) -
262*09537850SAkhilesh Sanikop kVerticalOffset * src_stride - kHorizontalOffset;
263*09537850SAkhilesh Sanikop auto* dest = static_cast<uint16_t*>(prediction);
264*09537850SAkhilesh Sanikop
265*09537850SAkhilesh Sanikop // If |horizontal_filter_id| == 0 then ConvolveVertical() should be called.
266*09537850SAkhilesh Sanikop assert(horizontal_filter_id != 0);
267*09537850SAkhilesh Sanikop int y = 0;
268*09537850SAkhilesh Sanikop do {
269*09537850SAkhilesh Sanikop int x = 0;
270*09537850SAkhilesh Sanikop do {
271*09537850SAkhilesh Sanikop int sum = 0;
272*09537850SAkhilesh Sanikop for (int k = 0; k < kSubPixelTaps; ++k) {
273*09537850SAkhilesh Sanikop sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
274*09537850SAkhilesh Sanikop src[x + k];
275*09537850SAkhilesh Sanikop }
276*09537850SAkhilesh Sanikop intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
277*09537850SAkhilesh Sanikop } while (++x < width);
278*09537850SAkhilesh Sanikop
279*09537850SAkhilesh Sanikop src += src_stride;
280*09537850SAkhilesh Sanikop intermediate += intermediate_stride;
281*09537850SAkhilesh Sanikop } while (++y < intermediate_height);
282*09537850SAkhilesh Sanikop
283*09537850SAkhilesh Sanikop // Vertical filter.
284*09537850SAkhilesh Sanikop filter_index = GetFilterIndex(vertical_filter_index, height);
285*09537850SAkhilesh Sanikop intermediate = intermediate_result;
286*09537850SAkhilesh Sanikop // If |vertical_filter_id| == 0 then ConvolveHorizontal() should be called.
287*09537850SAkhilesh Sanikop assert(vertical_filter_id != 0);
288*09537850SAkhilesh Sanikop y = 0;
289*09537850SAkhilesh Sanikop do {
290*09537850SAkhilesh Sanikop int x = 0;
291*09537850SAkhilesh Sanikop do {
292*09537850SAkhilesh Sanikop int sum = 0;
293*09537850SAkhilesh Sanikop for (int k = 0; k < kSubPixelTaps; ++k) {
294*09537850SAkhilesh Sanikop sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
295*09537850SAkhilesh Sanikop intermediate[k * intermediate_stride + x];
296*09537850SAkhilesh Sanikop }
297*09537850SAkhilesh Sanikop sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
298*09537850SAkhilesh Sanikop sum += (bitdepth == 8) ? 0 : kCompoundOffset;
299*09537850SAkhilesh Sanikop dest[x] = sum;
300*09537850SAkhilesh Sanikop } while (++x < width);
301*09537850SAkhilesh Sanikop
302*09537850SAkhilesh Sanikop dest += pred_stride;
303*09537850SAkhilesh Sanikop intermediate += intermediate_stride;
304*09537850SAkhilesh Sanikop } while (++y < height);
305*09537850SAkhilesh Sanikop }
306*09537850SAkhilesh Sanikop
307*09537850SAkhilesh Sanikop // This function is a simplified version of ConvolveCompound2D_C.
308*09537850SAkhilesh Sanikop // It is called when it is single prediction mode, where both horizontal and
309*09537850SAkhilesh Sanikop // vertical filtering are required.
310*09537850SAkhilesh Sanikop // The output is the single prediction of the block, clipped to valid pixel
311*09537850SAkhilesh Sanikop // range.
312*09537850SAkhilesh Sanikop template <int bitdepth, typename Pixel>
Convolve2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int horizontal_filter_id,const int vertical_filter_id,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)313*09537850SAkhilesh Sanikop void Convolve2D_C(const void* LIBGAV1_RESTRICT const reference,
314*09537850SAkhilesh Sanikop const ptrdiff_t reference_stride,
315*09537850SAkhilesh Sanikop const int horizontal_filter_index,
316*09537850SAkhilesh Sanikop const int vertical_filter_index,
317*09537850SAkhilesh Sanikop const int horizontal_filter_id, const int vertical_filter_id,
318*09537850SAkhilesh Sanikop const int width, const int height,
319*09537850SAkhilesh Sanikop void* LIBGAV1_RESTRICT prediction,
320*09537850SAkhilesh Sanikop const ptrdiff_t pred_stride) {
321*09537850SAkhilesh Sanikop constexpr int kRoundBitsHorizontal = (bitdepth == 12)
322*09537850SAkhilesh Sanikop ? kInterRoundBitsHorizontal12bpp
323*09537850SAkhilesh Sanikop : kInterRoundBitsHorizontal;
324*09537850SAkhilesh Sanikop constexpr int kRoundBitsVertical =
325*09537850SAkhilesh Sanikop (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
326*09537850SAkhilesh Sanikop const int intermediate_height = height + kSubPixelTaps - 1;
327*09537850SAkhilesh Sanikop // The output of the horizontal filter, i.e. the intermediate_result, is
328*09537850SAkhilesh Sanikop // guaranteed to fit in int16_t.
329*09537850SAkhilesh Sanikop int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
330*09537850SAkhilesh Sanikop (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
331*09537850SAkhilesh Sanikop const int intermediate_stride = kMaxSuperBlockSizeInPixels;
332*09537850SAkhilesh Sanikop const int max_pixel_value = (1 << bitdepth) - 1;
333*09537850SAkhilesh Sanikop
334*09537850SAkhilesh Sanikop // Horizontal filter.
335*09537850SAkhilesh Sanikop // Filter types used for width <= 4 are different from those for width > 4.
336*09537850SAkhilesh Sanikop // When width > 4, the valid filter index range is always [0, 3].
337*09537850SAkhilesh Sanikop // When width <= 4, the valid filter index range is always [4, 5].
338*09537850SAkhilesh Sanikop // Similarly for height.
339*09537850SAkhilesh Sanikop int filter_index = GetFilterIndex(horizontal_filter_index, width);
340*09537850SAkhilesh Sanikop int16_t* intermediate = intermediate_result;
341*09537850SAkhilesh Sanikop const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
342*09537850SAkhilesh Sanikop const auto* src = static_cast<const Pixel*>(reference) -
343*09537850SAkhilesh Sanikop kVerticalOffset * src_stride - kHorizontalOffset;
344*09537850SAkhilesh Sanikop auto* dest = static_cast<Pixel*>(prediction);
345*09537850SAkhilesh Sanikop const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
346*09537850SAkhilesh Sanikop // If |horizontal_filter_id| == 0 then ConvolveVertical() should be called.
347*09537850SAkhilesh Sanikop assert(horizontal_filter_id != 0);
348*09537850SAkhilesh Sanikop int y = 0;
349*09537850SAkhilesh Sanikop do {
350*09537850SAkhilesh Sanikop int x = 0;
351*09537850SAkhilesh Sanikop do {
352*09537850SAkhilesh Sanikop int sum = 0;
353*09537850SAkhilesh Sanikop for (int k = 0; k < kSubPixelTaps; ++k) {
354*09537850SAkhilesh Sanikop sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
355*09537850SAkhilesh Sanikop src[x + k];
356*09537850SAkhilesh Sanikop }
357*09537850SAkhilesh Sanikop intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
358*09537850SAkhilesh Sanikop } while (++x < width);
359*09537850SAkhilesh Sanikop
360*09537850SAkhilesh Sanikop src += src_stride;
361*09537850SAkhilesh Sanikop intermediate += intermediate_stride;
362*09537850SAkhilesh Sanikop } while (++y < intermediate_height);
363*09537850SAkhilesh Sanikop
364*09537850SAkhilesh Sanikop // Vertical filter.
365*09537850SAkhilesh Sanikop filter_index = GetFilterIndex(vertical_filter_index, height);
366*09537850SAkhilesh Sanikop intermediate = intermediate_result;
367*09537850SAkhilesh Sanikop // If |vertical_filter_id| == 0 then ConvolveHorizontal() should be called.
368*09537850SAkhilesh Sanikop assert(vertical_filter_id != 0);
369*09537850SAkhilesh Sanikop y = 0;
370*09537850SAkhilesh Sanikop do {
371*09537850SAkhilesh Sanikop int x = 0;
372*09537850SAkhilesh Sanikop do {
373*09537850SAkhilesh Sanikop int sum = 0;
374*09537850SAkhilesh Sanikop for (int k = 0; k < kSubPixelTaps; ++k) {
375*09537850SAkhilesh Sanikop sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
376*09537850SAkhilesh Sanikop intermediate[k * intermediate_stride + x];
377*09537850SAkhilesh Sanikop }
378*09537850SAkhilesh Sanikop dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
379*09537850SAkhilesh Sanikop max_pixel_value);
380*09537850SAkhilesh Sanikop } while (++x < width);
381*09537850SAkhilesh Sanikop
382*09537850SAkhilesh Sanikop dest += dest_stride;
383*09537850SAkhilesh Sanikop intermediate += intermediate_stride;
384*09537850SAkhilesh Sanikop } while (++y < height);
385*09537850SAkhilesh Sanikop }
386*09537850SAkhilesh Sanikop
387*09537850SAkhilesh Sanikop // This function is a simplified version of Convolve2D_C.
388*09537850SAkhilesh Sanikop // It is called when it is single prediction mode, where only horizontal
389*09537850SAkhilesh Sanikop // filtering is required.
390*09537850SAkhilesh Sanikop // The output is the single prediction of the block, clipped to valid pixel
391*09537850SAkhilesh Sanikop // range.
392*09537850SAkhilesh Sanikop template <int bitdepth, typename Pixel>
ConvolveHorizontal_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int,const int horizontal_filter_id,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)393*09537850SAkhilesh Sanikop void ConvolveHorizontal_C(const void* LIBGAV1_RESTRICT const reference,
394*09537850SAkhilesh Sanikop const ptrdiff_t reference_stride,
395*09537850SAkhilesh Sanikop const int horizontal_filter_index,
396*09537850SAkhilesh Sanikop const int /*vertical_filter_index*/,
397*09537850SAkhilesh Sanikop const int horizontal_filter_id,
398*09537850SAkhilesh Sanikop const int /*vertical_filter_id*/, const int width,
399*09537850SAkhilesh Sanikop const int height, void* LIBGAV1_RESTRICT prediction,
400*09537850SAkhilesh Sanikop const ptrdiff_t pred_stride) {
401*09537850SAkhilesh Sanikop constexpr int kRoundBitsHorizontal = (bitdepth == 12)
402*09537850SAkhilesh Sanikop ? kInterRoundBitsHorizontal12bpp
403*09537850SAkhilesh Sanikop : kInterRoundBitsHorizontal;
404*09537850SAkhilesh Sanikop const int filter_index = GetFilterIndex(horizontal_filter_index, width);
405*09537850SAkhilesh Sanikop const int bits = kFilterBits - kRoundBitsHorizontal;
406*09537850SAkhilesh Sanikop const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
407*09537850SAkhilesh Sanikop const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
408*09537850SAkhilesh Sanikop auto* dest = static_cast<Pixel*>(prediction);
409*09537850SAkhilesh Sanikop const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
410*09537850SAkhilesh Sanikop const int max_pixel_value = (1 << bitdepth) - 1;
411*09537850SAkhilesh Sanikop int y = 0;
412*09537850SAkhilesh Sanikop do {
413*09537850SAkhilesh Sanikop int x = 0;
414*09537850SAkhilesh Sanikop do {
415*09537850SAkhilesh Sanikop int sum = 0;
416*09537850SAkhilesh Sanikop for (int k = 0; k < kSubPixelTaps; ++k) {
417*09537850SAkhilesh Sanikop sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
418*09537850SAkhilesh Sanikop src[x + k];
419*09537850SAkhilesh Sanikop }
420*09537850SAkhilesh Sanikop sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
421*09537850SAkhilesh Sanikop dest[x] = Clip3(RightShiftWithRounding(sum, bits), 0, max_pixel_value);
422*09537850SAkhilesh Sanikop } while (++x < width);
423*09537850SAkhilesh Sanikop
424*09537850SAkhilesh Sanikop src += src_stride;
425*09537850SAkhilesh Sanikop dest += dest_stride;
426*09537850SAkhilesh Sanikop } while (++y < height);
427*09537850SAkhilesh Sanikop }
428*09537850SAkhilesh Sanikop
429*09537850SAkhilesh Sanikop // This function is a simplified version of Convolve2D_C.
430*09537850SAkhilesh Sanikop // It is called when it is single prediction mode, where only vertical
431*09537850SAkhilesh Sanikop // filtering is required.
432*09537850SAkhilesh Sanikop // The output is the single prediction of the block, clipped to valid pixel
433*09537850SAkhilesh Sanikop // range.
434*09537850SAkhilesh Sanikop template <int bitdepth, typename Pixel>
ConvolveVertical_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int vertical_filter_index,const int,const int vertical_filter_id,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)435*09537850SAkhilesh Sanikop void ConvolveVertical_C(const void* LIBGAV1_RESTRICT const reference,
436*09537850SAkhilesh Sanikop const ptrdiff_t reference_stride,
437*09537850SAkhilesh Sanikop const int /*horizontal_filter_index*/,
438*09537850SAkhilesh Sanikop const int vertical_filter_index,
439*09537850SAkhilesh Sanikop const int /*horizontal_filter_id*/,
440*09537850SAkhilesh Sanikop const int vertical_filter_id, const int width,
441*09537850SAkhilesh Sanikop const int height, void* LIBGAV1_RESTRICT prediction,
442*09537850SAkhilesh Sanikop const ptrdiff_t pred_stride) {
443*09537850SAkhilesh Sanikop const int filter_index = GetFilterIndex(vertical_filter_index, height);
444*09537850SAkhilesh Sanikop const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
445*09537850SAkhilesh Sanikop const auto* src =
446*09537850SAkhilesh Sanikop static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
447*09537850SAkhilesh Sanikop auto* dest = static_cast<Pixel*>(prediction);
448*09537850SAkhilesh Sanikop const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
449*09537850SAkhilesh Sanikop // Copy filters must call ConvolveCopy().
450*09537850SAkhilesh Sanikop assert(vertical_filter_id != 0);
451*09537850SAkhilesh Sanikop
452*09537850SAkhilesh Sanikop const int max_pixel_value = (1 << bitdepth) - 1;
453*09537850SAkhilesh Sanikop int y = 0;
454*09537850SAkhilesh Sanikop do {
455*09537850SAkhilesh Sanikop int x = 0;
456*09537850SAkhilesh Sanikop do {
457*09537850SAkhilesh Sanikop int sum = 0;
458*09537850SAkhilesh Sanikop for (int k = 0; k < kSubPixelTaps; ++k) {
459*09537850SAkhilesh Sanikop sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
460*09537850SAkhilesh Sanikop src[k * src_stride + x];
461*09537850SAkhilesh Sanikop }
462*09537850SAkhilesh Sanikop dest[x] = Clip3(RightShiftWithRounding(sum, kFilterBits - 1), 0,
463*09537850SAkhilesh Sanikop max_pixel_value);
464*09537850SAkhilesh Sanikop } while (++x < width);
465*09537850SAkhilesh Sanikop
466*09537850SAkhilesh Sanikop src += src_stride;
467*09537850SAkhilesh Sanikop dest += dest_stride;
468*09537850SAkhilesh Sanikop } while (++y < height);
469*09537850SAkhilesh Sanikop }
470*09537850SAkhilesh Sanikop
471*09537850SAkhilesh Sanikop template <int bitdepth, typename Pixel>
ConvolveCopy_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)472*09537850SAkhilesh Sanikop void ConvolveCopy_C(const void* LIBGAV1_RESTRICT const reference,
473*09537850SAkhilesh Sanikop const ptrdiff_t reference_stride,
474*09537850SAkhilesh Sanikop const int /*horizontal_filter_index*/,
475*09537850SAkhilesh Sanikop const int /*vertical_filter_index*/,
476*09537850SAkhilesh Sanikop const int /*horizontal_filter_id*/,
477*09537850SAkhilesh Sanikop const int /*vertical_filter_id*/, const int width,
478*09537850SAkhilesh Sanikop const int height, void* LIBGAV1_RESTRICT prediction,
479*09537850SAkhilesh Sanikop const ptrdiff_t pred_stride) {
480*09537850SAkhilesh Sanikop const auto* src = static_cast<const uint8_t*>(reference);
481*09537850SAkhilesh Sanikop auto* dest = static_cast<uint8_t*>(prediction);
482*09537850SAkhilesh Sanikop int y = 0;
483*09537850SAkhilesh Sanikop do {
484*09537850SAkhilesh Sanikop memcpy(dest, src, width * sizeof(Pixel));
485*09537850SAkhilesh Sanikop src += reference_stride;
486*09537850SAkhilesh Sanikop dest += pred_stride;
487*09537850SAkhilesh Sanikop } while (++y < height);
488*09537850SAkhilesh Sanikop }
489*09537850SAkhilesh Sanikop
490*09537850SAkhilesh Sanikop template <int bitdepth, typename Pixel>
ConvolveCompoundCopy_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)491*09537850SAkhilesh Sanikop void ConvolveCompoundCopy_C(const void* LIBGAV1_RESTRICT const reference,
492*09537850SAkhilesh Sanikop const ptrdiff_t reference_stride,
493*09537850SAkhilesh Sanikop const int /*horizontal_filter_index*/,
494*09537850SAkhilesh Sanikop const int /*vertical_filter_index*/,
495*09537850SAkhilesh Sanikop const int /*horizontal_filter_id*/,
496*09537850SAkhilesh Sanikop const int /*vertical_filter_id*/, const int width,
497*09537850SAkhilesh Sanikop const int height, void* LIBGAV1_RESTRICT prediction,
498*09537850SAkhilesh Sanikop const ptrdiff_t pred_stride) {
499*09537850SAkhilesh Sanikop // All compound functions output to the predictor buffer with |pred_stride|
500*09537850SAkhilesh Sanikop // equal to |width|.
501*09537850SAkhilesh Sanikop assert(pred_stride == width);
502*09537850SAkhilesh Sanikop // Compound functions start at 4x4.
503*09537850SAkhilesh Sanikop assert(width >= 4 && height >= 4);
504*09537850SAkhilesh Sanikop constexpr int kRoundBitsVertical =
505*09537850SAkhilesh Sanikop ((bitdepth == 12) ? kInterRoundBitsVertical12bpp
506*09537850SAkhilesh Sanikop : kInterRoundBitsVertical) -
507*09537850SAkhilesh Sanikop kInterRoundBitsCompoundVertical;
508*09537850SAkhilesh Sanikop const auto* src = static_cast<const Pixel*>(reference);
509*09537850SAkhilesh Sanikop const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
510*09537850SAkhilesh Sanikop auto* dest = static_cast<uint16_t*>(prediction);
511*09537850SAkhilesh Sanikop int y = 0;
512*09537850SAkhilesh Sanikop do {
513*09537850SAkhilesh Sanikop int x = 0;
514*09537850SAkhilesh Sanikop do {
515*09537850SAkhilesh Sanikop int sum = (bitdepth == 8) ? 0 : ((1 << bitdepth) + (1 << (bitdepth - 1)));
516*09537850SAkhilesh Sanikop sum += src[x];
517*09537850SAkhilesh Sanikop dest[x] = sum << kRoundBitsVertical;
518*09537850SAkhilesh Sanikop } while (++x < width);
519*09537850SAkhilesh Sanikop src += src_stride;
520*09537850SAkhilesh Sanikop dest += pred_stride;
521*09537850SAkhilesh Sanikop } while (++y < height);
522*09537850SAkhilesh Sanikop }
523*09537850SAkhilesh Sanikop
524*09537850SAkhilesh Sanikop // This function is a simplified version of ConvolveCompound2D_C.
525*09537850SAkhilesh Sanikop // It is called when it is compound prediction mode, where only horizontal
526*09537850SAkhilesh Sanikop // filtering is required.
527*09537850SAkhilesh Sanikop // The output is not clipped to valid pixel range. Its output will be
528*09537850SAkhilesh Sanikop // blended with another predictor to generate the final prediction of the block.
529*09537850SAkhilesh Sanikop template <int bitdepth, typename Pixel>
ConvolveCompoundHorizontal_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int,const int horizontal_filter_id,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)530*09537850SAkhilesh Sanikop void ConvolveCompoundHorizontal_C(
531*09537850SAkhilesh Sanikop const void* LIBGAV1_RESTRICT const reference,
532*09537850SAkhilesh Sanikop const ptrdiff_t reference_stride, const int horizontal_filter_index,
533*09537850SAkhilesh Sanikop const int /*vertical_filter_index*/, const int horizontal_filter_id,
534*09537850SAkhilesh Sanikop const int /*vertical_filter_id*/, const int width, const int height,
535*09537850SAkhilesh Sanikop void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
536*09537850SAkhilesh Sanikop // All compound functions output to the predictor buffer with |pred_stride|
537*09537850SAkhilesh Sanikop // equal to |width|.
538*09537850SAkhilesh Sanikop assert(pred_stride == width);
539*09537850SAkhilesh Sanikop // Compound functions start at 4x4.
540*09537850SAkhilesh Sanikop assert(width >= 4 && height >= 4);
541*09537850SAkhilesh Sanikop constexpr int kRoundBitsHorizontal = (bitdepth == 12)
542*09537850SAkhilesh Sanikop ? kInterRoundBitsHorizontal12bpp
543*09537850SAkhilesh Sanikop : kInterRoundBitsHorizontal;
544*09537850SAkhilesh Sanikop const int filter_index = GetFilterIndex(horizontal_filter_index, width);
545*09537850SAkhilesh Sanikop const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
546*09537850SAkhilesh Sanikop const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
547*09537850SAkhilesh Sanikop auto* dest = static_cast<uint16_t*>(prediction);
548*09537850SAkhilesh Sanikop // Copy filters must call ConvolveCopy().
549*09537850SAkhilesh Sanikop assert(horizontal_filter_id != 0);
550*09537850SAkhilesh Sanikop int y = 0;
551*09537850SAkhilesh Sanikop do {
552*09537850SAkhilesh Sanikop int x = 0;
553*09537850SAkhilesh Sanikop do {
554*09537850SAkhilesh Sanikop int sum = 0;
555*09537850SAkhilesh Sanikop for (int k = 0; k < kSubPixelTaps; ++k) {
556*09537850SAkhilesh Sanikop sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
557*09537850SAkhilesh Sanikop src[x + k];
558*09537850SAkhilesh Sanikop }
559*09537850SAkhilesh Sanikop sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
560*09537850SAkhilesh Sanikop sum += (bitdepth == 8) ? 0 : kCompoundOffset;
561*09537850SAkhilesh Sanikop dest[x] = sum;
562*09537850SAkhilesh Sanikop } while (++x < width);
563*09537850SAkhilesh Sanikop
564*09537850SAkhilesh Sanikop src += src_stride;
565*09537850SAkhilesh Sanikop dest += pred_stride;
566*09537850SAkhilesh Sanikop } while (++y < height);
567*09537850SAkhilesh Sanikop }
568*09537850SAkhilesh Sanikop
569*09537850SAkhilesh Sanikop // This function is a simplified version of ConvolveCompound2D_C.
570*09537850SAkhilesh Sanikop // It is called when it is compound prediction mode, where only vertical
571*09537850SAkhilesh Sanikop // filtering is required.
572*09537850SAkhilesh Sanikop // The output is not clipped to valid pixel range. Its output will be
573*09537850SAkhilesh Sanikop // blended with another predictor to generate the final prediction of the block.
574*09537850SAkhilesh Sanikop template <int bitdepth, typename Pixel>
ConvolveCompoundVertical_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int vertical_filter_index,const int,const int vertical_filter_id,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)575*09537850SAkhilesh Sanikop void ConvolveCompoundVertical_C(
576*09537850SAkhilesh Sanikop const void* LIBGAV1_RESTRICT const reference,
577*09537850SAkhilesh Sanikop const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
578*09537850SAkhilesh Sanikop const int vertical_filter_index, const int /*horizontal_filter_id*/,
579*09537850SAkhilesh Sanikop const int vertical_filter_id, const int width, const int height,
580*09537850SAkhilesh Sanikop void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
581*09537850SAkhilesh Sanikop // All compound functions output to the predictor buffer with |pred_stride|
582*09537850SAkhilesh Sanikop // equal to |width|.
583*09537850SAkhilesh Sanikop assert(pred_stride == width);
584*09537850SAkhilesh Sanikop // Compound functions start at 4x4.
585*09537850SAkhilesh Sanikop assert(width >= 4 && height >= 4);
586*09537850SAkhilesh Sanikop constexpr int kRoundBitsHorizontal = (bitdepth == 12)
587*09537850SAkhilesh Sanikop ? kInterRoundBitsHorizontal12bpp
588*09537850SAkhilesh Sanikop : kInterRoundBitsHorizontal;
589*09537850SAkhilesh Sanikop const int filter_index = GetFilterIndex(vertical_filter_index, height);
590*09537850SAkhilesh Sanikop const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
591*09537850SAkhilesh Sanikop const auto* src =
592*09537850SAkhilesh Sanikop static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
593*09537850SAkhilesh Sanikop auto* dest = static_cast<uint16_t*>(prediction);
594*09537850SAkhilesh Sanikop // Copy filters must call ConvolveCopy().
595*09537850SAkhilesh Sanikop assert(vertical_filter_id != 0);
596*09537850SAkhilesh Sanikop int y = 0;
597*09537850SAkhilesh Sanikop do {
598*09537850SAkhilesh Sanikop int x = 0;
599*09537850SAkhilesh Sanikop do {
600*09537850SAkhilesh Sanikop int sum = 0;
601*09537850SAkhilesh Sanikop for (int k = 0; k < kSubPixelTaps; ++k) {
602*09537850SAkhilesh Sanikop sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
603*09537850SAkhilesh Sanikop src[k * src_stride + x];
604*09537850SAkhilesh Sanikop }
605*09537850SAkhilesh Sanikop sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
606*09537850SAkhilesh Sanikop sum += (bitdepth == 8) ? 0 : kCompoundOffset;
607*09537850SAkhilesh Sanikop dest[x] = sum;
608*09537850SAkhilesh Sanikop } while (++x < width);
609*09537850SAkhilesh Sanikop src += src_stride;
610*09537850SAkhilesh Sanikop dest += pred_stride;
611*09537850SAkhilesh Sanikop } while (++y < height);
612*09537850SAkhilesh Sanikop }
613*09537850SAkhilesh Sanikop
614*09537850SAkhilesh Sanikop // This function is used when intra block copy is present.
615*09537850SAkhilesh Sanikop // It is called when it is single prediction mode for U/V plane, where the
616*09537850SAkhilesh Sanikop // reference block is from current frame and both horizontal and vertical
617*09537850SAkhilesh Sanikop // filtering are required.
618*09537850SAkhilesh Sanikop // The output is the single prediction of the block, clipped to valid pixel
619*09537850SAkhilesh Sanikop // range.
620*09537850SAkhilesh Sanikop template <int bitdepth, typename Pixel>
ConvolveIntraBlockCopy2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)621*09537850SAkhilesh Sanikop void ConvolveIntraBlockCopy2D_C(
622*09537850SAkhilesh Sanikop const void* LIBGAV1_RESTRICT const reference,
623*09537850SAkhilesh Sanikop const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
624*09537850SAkhilesh Sanikop const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
625*09537850SAkhilesh Sanikop const int /*vertical_filter_id*/, const int width, const int height,
626*09537850SAkhilesh Sanikop void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
627*09537850SAkhilesh Sanikop assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
628*09537850SAkhilesh Sanikop assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
629*09537850SAkhilesh Sanikop const auto* src = static_cast<const Pixel*>(reference);
630*09537850SAkhilesh Sanikop const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
631*09537850SAkhilesh Sanikop auto* dest = static_cast<Pixel*>(prediction);
632*09537850SAkhilesh Sanikop const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
633*09537850SAkhilesh Sanikop const int intermediate_height = height + 1;
634*09537850SAkhilesh Sanikop uint16_t intermediate_result[kMaxSuperBlockSizeInPixels *
635*09537850SAkhilesh Sanikop (kMaxSuperBlockSizeInPixels + 1)];
636*09537850SAkhilesh Sanikop uint16_t* intermediate = intermediate_result;
637*09537850SAkhilesh Sanikop // Note: allow vertical access to height + 1. Because this function is only
638*09537850SAkhilesh Sanikop // for u/v plane of intra block copy, such access is guaranteed to be within
639*09537850SAkhilesh Sanikop // the prediction block.
640*09537850SAkhilesh Sanikop int y = 0;
641*09537850SAkhilesh Sanikop do {
642*09537850SAkhilesh Sanikop int x = 0;
643*09537850SAkhilesh Sanikop do {
644*09537850SAkhilesh Sanikop intermediate[x] = src[x] + src[x + 1];
645*09537850SAkhilesh Sanikop } while (++x < width);
646*09537850SAkhilesh Sanikop
647*09537850SAkhilesh Sanikop src += src_stride;
648*09537850SAkhilesh Sanikop intermediate += width;
649*09537850SAkhilesh Sanikop } while (++y < intermediate_height);
650*09537850SAkhilesh Sanikop
651*09537850SAkhilesh Sanikop intermediate = intermediate_result;
652*09537850SAkhilesh Sanikop y = 0;
653*09537850SAkhilesh Sanikop do {
654*09537850SAkhilesh Sanikop int x = 0;
655*09537850SAkhilesh Sanikop do {
656*09537850SAkhilesh Sanikop dest[x] =
657*09537850SAkhilesh Sanikop RightShiftWithRounding(intermediate[x] + intermediate[x + width], 2);
658*09537850SAkhilesh Sanikop } while (++x < width);
659*09537850SAkhilesh Sanikop
660*09537850SAkhilesh Sanikop intermediate += width;
661*09537850SAkhilesh Sanikop dest += dest_stride;
662*09537850SAkhilesh Sanikop } while (++y < height);
663*09537850SAkhilesh Sanikop }
664*09537850SAkhilesh Sanikop
665*09537850SAkhilesh Sanikop // This function is used when intra block copy is present.
666*09537850SAkhilesh Sanikop // It is called when it is single prediction mode for U/V plane, where the
667*09537850SAkhilesh Sanikop // reference block is from the current frame and only horizontal or vertical
668*09537850SAkhilesh Sanikop // filtering is required.
669*09537850SAkhilesh Sanikop // The output is the single prediction of the block, clipped to valid pixel
670*09537850SAkhilesh Sanikop // range.
671*09537850SAkhilesh Sanikop // The filtering of intra block copy is simply the average of current and
672*09537850SAkhilesh Sanikop // the next pixel.
673*09537850SAkhilesh Sanikop template <int bitdepth, typename Pixel, bool is_horizontal>
ConvolveIntraBlockCopy1D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)674*09537850SAkhilesh Sanikop void ConvolveIntraBlockCopy1D_C(
675*09537850SAkhilesh Sanikop const void* LIBGAV1_RESTRICT const reference,
676*09537850SAkhilesh Sanikop const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
677*09537850SAkhilesh Sanikop const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
678*09537850SAkhilesh Sanikop const int /*vertical_filter_id*/, const int width, const int height,
679*09537850SAkhilesh Sanikop void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
680*09537850SAkhilesh Sanikop assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
681*09537850SAkhilesh Sanikop assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
682*09537850SAkhilesh Sanikop const auto* src = static_cast<const Pixel*>(reference);
683*09537850SAkhilesh Sanikop const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
684*09537850SAkhilesh Sanikop auto* dest = static_cast<Pixel*>(prediction);
685*09537850SAkhilesh Sanikop const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
686*09537850SAkhilesh Sanikop const ptrdiff_t offset = is_horizontal ? 1 : src_stride;
687*09537850SAkhilesh Sanikop int y = 0;
688*09537850SAkhilesh Sanikop do {
689*09537850SAkhilesh Sanikop int x = 0;
690*09537850SAkhilesh Sanikop do {
691*09537850SAkhilesh Sanikop dest[x] = RightShiftWithRounding(src[x] + src[x + offset], 1);
692*09537850SAkhilesh Sanikop } while (++x < width);
693*09537850SAkhilesh Sanikop
694*09537850SAkhilesh Sanikop src += src_stride;
695*09537850SAkhilesh Sanikop dest += dest_stride;
696*09537850SAkhilesh Sanikop } while (++y < height);
697*09537850SAkhilesh Sanikop }
698*09537850SAkhilesh Sanikop
Init8bpp()699*09537850SAkhilesh Sanikop void Init8bpp() {
700*09537850SAkhilesh Sanikop Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
701*09537850SAkhilesh Sanikop assert(dsp != nullptr);
702*09537850SAkhilesh Sanikop #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
703*09537850SAkhilesh Sanikop dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
704*09537850SAkhilesh Sanikop dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
705*09537850SAkhilesh Sanikop dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
706*09537850SAkhilesh Sanikop dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>;
707*09537850SAkhilesh Sanikop
708*09537850SAkhilesh Sanikop dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
709*09537850SAkhilesh Sanikop dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
710*09537850SAkhilesh Sanikop dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
711*09537850SAkhilesh Sanikop dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>;
712*09537850SAkhilesh Sanikop
713*09537850SAkhilesh Sanikop dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
714*09537850SAkhilesh Sanikop dsp->convolve[1][0][0][1] =
715*09537850SAkhilesh Sanikop ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
716*09537850SAkhilesh Sanikop dsp->convolve[1][0][1][0] =
717*09537850SAkhilesh Sanikop ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
718*09537850SAkhilesh Sanikop dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
719*09537850SAkhilesh Sanikop
720*09537850SAkhilesh Sanikop dsp->convolve[1][1][0][0] = nullptr;
721*09537850SAkhilesh Sanikop dsp->convolve[1][1][0][1] = nullptr;
722*09537850SAkhilesh Sanikop dsp->convolve[1][1][1][0] = nullptr;
723*09537850SAkhilesh Sanikop dsp->convolve[1][1][1][1] = nullptr;
724*09537850SAkhilesh Sanikop
725*09537850SAkhilesh Sanikop dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>;
726*09537850SAkhilesh Sanikop dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>;
727*09537850SAkhilesh Sanikop #else // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
728*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveCopy
729*09537850SAkhilesh Sanikop dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
730*09537850SAkhilesh Sanikop #endif
731*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveHorizontal
732*09537850SAkhilesh Sanikop dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
733*09537850SAkhilesh Sanikop #endif
734*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveVertical
735*09537850SAkhilesh Sanikop dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
736*09537850SAkhilesh Sanikop #endif
737*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_Convolve2D
738*09537850SAkhilesh Sanikop dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>;
739*09537850SAkhilesh Sanikop #endif
740*09537850SAkhilesh Sanikop
741*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundCopy
742*09537850SAkhilesh Sanikop dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
743*09537850SAkhilesh Sanikop #endif
744*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundHorizontal
745*09537850SAkhilesh Sanikop dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
746*09537850SAkhilesh Sanikop #endif
747*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundVertical
748*09537850SAkhilesh Sanikop dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
749*09537850SAkhilesh Sanikop #endif
750*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveCompound2D
751*09537850SAkhilesh Sanikop dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>;
752*09537850SAkhilesh Sanikop #endif
753*09537850SAkhilesh Sanikop
754*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy
755*09537850SAkhilesh Sanikop dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
756*09537850SAkhilesh Sanikop #endif
757*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyHorizontal
758*09537850SAkhilesh Sanikop dsp->convolve[1][0][0][1] =
759*09537850SAkhilesh Sanikop ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
760*09537850SAkhilesh Sanikop #endif
761*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyVertical
762*09537850SAkhilesh Sanikop dsp->convolve[1][0][1][0] =
763*09537850SAkhilesh Sanikop ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
764*09537850SAkhilesh Sanikop #endif
765*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy2D
766*09537850SAkhilesh Sanikop dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
767*09537850SAkhilesh Sanikop #endif
768*09537850SAkhilesh Sanikop
769*09537850SAkhilesh Sanikop dsp->convolve[1][1][0][0] = nullptr;
770*09537850SAkhilesh Sanikop dsp->convolve[1][1][0][1] = nullptr;
771*09537850SAkhilesh Sanikop dsp->convolve[1][1][1][0] = nullptr;
772*09537850SAkhilesh Sanikop dsp->convolve[1][1][1][1] = nullptr;
773*09537850SAkhilesh Sanikop
774*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveScale2D
775*09537850SAkhilesh Sanikop dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>;
776*09537850SAkhilesh Sanikop #endif
777*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundScale2D
778*09537850SAkhilesh Sanikop dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>;
779*09537850SAkhilesh Sanikop #endif
780*09537850SAkhilesh Sanikop #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
781*09537850SAkhilesh Sanikop }
782*09537850SAkhilesh Sanikop
783*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()784*09537850SAkhilesh Sanikop void Init10bpp() {
785*09537850SAkhilesh Sanikop Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
786*09537850SAkhilesh Sanikop assert(dsp != nullptr);
787*09537850SAkhilesh Sanikop #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
788*09537850SAkhilesh Sanikop dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
789*09537850SAkhilesh Sanikop dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
790*09537850SAkhilesh Sanikop dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
791*09537850SAkhilesh Sanikop dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>;
792*09537850SAkhilesh Sanikop
793*09537850SAkhilesh Sanikop dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
794*09537850SAkhilesh Sanikop dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
795*09537850SAkhilesh Sanikop dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
796*09537850SAkhilesh Sanikop dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>;
797*09537850SAkhilesh Sanikop
798*09537850SAkhilesh Sanikop dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
799*09537850SAkhilesh Sanikop dsp->convolve[1][0][0][1] =
800*09537850SAkhilesh Sanikop ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
801*09537850SAkhilesh Sanikop dsp->convolve[1][0][1][0] =
802*09537850SAkhilesh Sanikop ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
803*09537850SAkhilesh Sanikop dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
804*09537850SAkhilesh Sanikop
805*09537850SAkhilesh Sanikop dsp->convolve[1][1][0][0] = nullptr;
806*09537850SAkhilesh Sanikop dsp->convolve[1][1][0][1] = nullptr;
807*09537850SAkhilesh Sanikop dsp->convolve[1][1][1][0] = nullptr;
808*09537850SAkhilesh Sanikop dsp->convolve[1][1][1][1] = nullptr;
809*09537850SAkhilesh Sanikop
810*09537850SAkhilesh Sanikop dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>;
811*09537850SAkhilesh Sanikop dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>;
812*09537850SAkhilesh Sanikop #else // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
813*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveCopy
814*09537850SAkhilesh Sanikop dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
815*09537850SAkhilesh Sanikop #endif
816*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveHorizontal
817*09537850SAkhilesh Sanikop dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
818*09537850SAkhilesh Sanikop #endif
819*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveVertical
820*09537850SAkhilesh Sanikop dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
821*09537850SAkhilesh Sanikop #endif
822*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_Convolve2D
823*09537850SAkhilesh Sanikop dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>;
824*09537850SAkhilesh Sanikop #endif
825*09537850SAkhilesh Sanikop
826*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundCopy
827*09537850SAkhilesh Sanikop dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
828*09537850SAkhilesh Sanikop #endif
829*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundHorizontal
830*09537850SAkhilesh Sanikop dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
831*09537850SAkhilesh Sanikop #endif
832*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundVertical
833*09537850SAkhilesh Sanikop dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
834*09537850SAkhilesh Sanikop #endif
835*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveCompound2D
836*09537850SAkhilesh Sanikop dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>;
837*09537850SAkhilesh Sanikop #endif
838*09537850SAkhilesh Sanikop
839*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockCopy
840*09537850SAkhilesh Sanikop dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
841*09537850SAkhilesh Sanikop #endif
842*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockHorizontal
843*09537850SAkhilesh Sanikop dsp->convolve[1][0][0][1] =
844*09537850SAkhilesh Sanikop ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
845*09537850SAkhilesh Sanikop #endif
846*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockVertical
847*09537850SAkhilesh Sanikop dsp->convolve[1][0][1][0] =
848*09537850SAkhilesh Sanikop ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
849*09537850SAkhilesh Sanikop #endif
850*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlock2D
851*09537850SAkhilesh Sanikop dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
852*09537850SAkhilesh Sanikop #endif
853*09537850SAkhilesh Sanikop
854*09537850SAkhilesh Sanikop dsp->convolve[1][1][0][0] = nullptr;
855*09537850SAkhilesh Sanikop dsp->convolve[1][1][0][1] = nullptr;
856*09537850SAkhilesh Sanikop dsp->convolve[1][1][1][0] = nullptr;
857*09537850SAkhilesh Sanikop dsp->convolve[1][1][1][1] = nullptr;
858*09537850SAkhilesh Sanikop
859*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveScale2D
860*09537850SAkhilesh Sanikop dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>;
861*09537850SAkhilesh Sanikop #endif
862*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundScale2D
863*09537850SAkhilesh Sanikop dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>;
864*09537850SAkhilesh Sanikop #endif
865*09537850SAkhilesh Sanikop #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
866*09537850SAkhilesh Sanikop }
867*09537850SAkhilesh Sanikop #endif // LIBGAV1_MAX_BITDEPTH >= 10
868*09537850SAkhilesh Sanikop
869*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH == 12
Init12bpp()870*09537850SAkhilesh Sanikop void Init12bpp() {
871*09537850SAkhilesh Sanikop Dsp* const dsp = dsp_internal::GetWritableDspTable(12);
872*09537850SAkhilesh Sanikop assert(dsp != nullptr);
873*09537850SAkhilesh Sanikop #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
874*09537850SAkhilesh Sanikop dsp->convolve[0][0][0][0] = ConvolveCopy_C<12, uint16_t>;
875*09537850SAkhilesh Sanikop dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<12, uint16_t>;
876*09537850SAkhilesh Sanikop dsp->convolve[0][0][1][0] = ConvolveVertical_C<12, uint16_t>;
877*09537850SAkhilesh Sanikop dsp->convolve[0][0][1][1] = Convolve2D_C<12, uint16_t>;
878*09537850SAkhilesh Sanikop
879*09537850SAkhilesh Sanikop dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<12, uint16_t>;
880*09537850SAkhilesh Sanikop dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<12, uint16_t>;
881*09537850SAkhilesh Sanikop dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<12, uint16_t>;
882*09537850SAkhilesh Sanikop dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<12, uint16_t>;
883*09537850SAkhilesh Sanikop
884*09537850SAkhilesh Sanikop dsp->convolve[1][0][0][0] = ConvolveCopy_C<12, uint16_t>;
885*09537850SAkhilesh Sanikop dsp->convolve[1][0][0][1] =
886*09537850SAkhilesh Sanikop ConvolveIntraBlockCopy1D_C<12, uint16_t, /*is_horizontal=*/true>;
887*09537850SAkhilesh Sanikop dsp->convolve[1][0][1][0] =
888*09537850SAkhilesh Sanikop ConvolveIntraBlockCopy1D_C<12, uint16_t, /*is_horizontal=*/false>;
889*09537850SAkhilesh Sanikop dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<12, uint16_t>;
890*09537850SAkhilesh Sanikop
891*09537850SAkhilesh Sanikop dsp->convolve[1][1][0][0] = nullptr;
892*09537850SAkhilesh Sanikop dsp->convolve[1][1][0][1] = nullptr;
893*09537850SAkhilesh Sanikop dsp->convolve[1][1][1][0] = nullptr;
894*09537850SAkhilesh Sanikop dsp->convolve[1][1][1][1] = nullptr;
895*09537850SAkhilesh Sanikop
896*09537850SAkhilesh Sanikop dsp->convolve_scale[0] = ConvolveScale2D_C<12, uint16_t>;
897*09537850SAkhilesh Sanikop dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<12, uint16_t>;
898*09537850SAkhilesh Sanikop #else // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
899*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveCopy
900*09537850SAkhilesh Sanikop dsp->convolve[0][0][0][0] = ConvolveCopy_C<12, uint16_t>;
901*09537850SAkhilesh Sanikop #endif
902*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveHorizontal
903*09537850SAkhilesh Sanikop dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<12, uint16_t>;
904*09537850SAkhilesh Sanikop #endif
905*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveVertical
906*09537850SAkhilesh Sanikop dsp->convolve[0][0][1][0] = ConvolveVertical_C<12, uint16_t>;
907*09537850SAkhilesh Sanikop #endif
908*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_Convolve2D
909*09537850SAkhilesh Sanikop dsp->convolve[0][0][1][1] = Convolve2D_C<12, uint16_t>;
910*09537850SAkhilesh Sanikop #endif
911*09537850SAkhilesh Sanikop
912*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveCompoundCopy
913*09537850SAkhilesh Sanikop dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<12, uint16_t>;
914*09537850SAkhilesh Sanikop #endif
915*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveCompoundHorizontal
916*09537850SAkhilesh Sanikop dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<12, uint16_t>;
917*09537850SAkhilesh Sanikop #endif
918*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveCompoundVertical
919*09537850SAkhilesh Sanikop dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<12, uint16_t>;
920*09537850SAkhilesh Sanikop #endif
921*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveCompound2D
922*09537850SAkhilesh Sanikop dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<12, uint16_t>;
923*09537850SAkhilesh Sanikop #endif
924*09537850SAkhilesh Sanikop
925*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveIntraBlockCopy
926*09537850SAkhilesh Sanikop dsp->convolve[1][0][0][0] = ConvolveCopy_C<12, uint16_t>;
927*09537850SAkhilesh Sanikop #endif
928*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveIntraBlockHorizontal
929*09537850SAkhilesh Sanikop dsp->convolve[1][0][0][1] =
930*09537850SAkhilesh Sanikop ConvolveIntraBlockCopy1D_C<12, uint16_t, /*is_horizontal=*/true>;
931*09537850SAkhilesh Sanikop #endif
932*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveIntraBlockVertical
933*09537850SAkhilesh Sanikop dsp->convolve[1][0][1][0] =
934*09537850SAkhilesh Sanikop ConvolveIntraBlockCopy1D_C<12, uint16_t, /*is_horizontal=*/false>;
935*09537850SAkhilesh Sanikop #endif
936*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveIntraBlock2D
937*09537850SAkhilesh Sanikop dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<12, uint16_t>;
938*09537850SAkhilesh Sanikop #endif
939*09537850SAkhilesh Sanikop
940*09537850SAkhilesh Sanikop dsp->convolve[1][1][0][0] = nullptr;
941*09537850SAkhilesh Sanikop dsp->convolve[1][1][0][1] = nullptr;
942*09537850SAkhilesh Sanikop dsp->convolve[1][1][1][0] = nullptr;
943*09537850SAkhilesh Sanikop dsp->convolve[1][1][1][1] = nullptr;
944*09537850SAkhilesh Sanikop
945*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveScale2D
946*09537850SAkhilesh Sanikop dsp->convolve_scale[0] = ConvolveScale2D_C<12, uint16_t>;
947*09537850SAkhilesh Sanikop #endif
948*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_ConvolveCompoundScale2D
949*09537850SAkhilesh Sanikop dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<12, uint16_t>;
950*09537850SAkhilesh Sanikop #endif
951*09537850SAkhilesh Sanikop #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
952*09537850SAkhilesh Sanikop }
953*09537850SAkhilesh Sanikop #endif // LIBGAV1_MAX_BITDEPTH == 12
954*09537850SAkhilesh Sanikop
955*09537850SAkhilesh Sanikop } // namespace
956*09537850SAkhilesh Sanikop
ConvolveInit_C()957*09537850SAkhilesh Sanikop void ConvolveInit_C() {
958*09537850SAkhilesh Sanikop Init8bpp();
959*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
960*09537850SAkhilesh Sanikop Init10bpp();
961*09537850SAkhilesh Sanikop #endif
962*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH == 12
963*09537850SAkhilesh Sanikop Init12bpp();
964*09537850SAkhilesh Sanikop #endif
965*09537850SAkhilesh Sanikop }
966*09537850SAkhilesh Sanikop
967*09537850SAkhilesh Sanikop } // namespace dsp
968*09537850SAkhilesh Sanikop } // namespace libgav1
969