1*09537850SAkhilesh Sanikop // Copyright 2021 The libgav1 Authors
2*09537850SAkhilesh Sanikop //
3*09537850SAkhilesh Sanikop // Licensed under the Apache License, Version 2.0 (the "License");
4*09537850SAkhilesh Sanikop // you may not use this file except in compliance with the License.
5*09537850SAkhilesh Sanikop // You may obtain a copy of the License at
6*09537850SAkhilesh Sanikop //
7*09537850SAkhilesh Sanikop // http://www.apache.org/licenses/LICENSE-2.0
8*09537850SAkhilesh Sanikop //
9*09537850SAkhilesh Sanikop // Unless required by applicable law or agreed to in writing, software
10*09537850SAkhilesh Sanikop // distributed under the License is distributed on an "AS IS" BASIS,
11*09537850SAkhilesh Sanikop // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*09537850SAkhilesh Sanikop // See the License for the specific language governing permissions and
13*09537850SAkhilesh Sanikop // limitations under the License.
14*09537850SAkhilesh Sanikop
15*09537850SAkhilesh Sanikop #include "src/dsp/intrapred_directional.h"
16*09537850SAkhilesh Sanikop
17*09537850SAkhilesh Sanikop #include <cassert>
18*09537850SAkhilesh Sanikop #include <cstddef>
19*09537850SAkhilesh Sanikop #include <cstdint>
20*09537850SAkhilesh Sanikop #include <cstring>
21*09537850SAkhilesh Sanikop
22*09537850SAkhilesh Sanikop #include "src/dsp/constants.h"
23*09537850SAkhilesh Sanikop #include "src/dsp/dsp.h"
24*09537850SAkhilesh Sanikop #include "src/utils/common.h"
25*09537850SAkhilesh Sanikop #include "src/utils/constants.h"
26*09537850SAkhilesh Sanikop #include "src/utils/memory.h"
27*09537850SAkhilesh Sanikop
28*09537850SAkhilesh Sanikop namespace libgav1 {
29*09537850SAkhilesh Sanikop namespace dsp {
30*09537850SAkhilesh Sanikop namespace {
31*09537850SAkhilesh Sanikop
32*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
33*09537850SAkhilesh Sanikop // 7.11.2.4. Directional intra prediction process
34*09537850SAkhilesh Sanikop
35*09537850SAkhilesh Sanikop template <typename Pixel>
DirectionalIntraPredictorZone1_C(void * LIBGAV1_RESTRICT const dest,ptrdiff_t stride,const void * LIBGAV1_RESTRICT const top_row,const int width,const int height,const int xstep,const bool upsampled_top)36*09537850SAkhilesh Sanikop void DirectionalIntraPredictorZone1_C(
37*09537850SAkhilesh Sanikop void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
38*09537850SAkhilesh Sanikop const void* LIBGAV1_RESTRICT const top_row, const int width,
39*09537850SAkhilesh Sanikop const int height, const int xstep, const bool upsampled_top) {
40*09537850SAkhilesh Sanikop const auto* const top = static_cast<const Pixel*>(top_row);
41*09537850SAkhilesh Sanikop auto* dst = static_cast<Pixel*>(dest);
42*09537850SAkhilesh Sanikop stride /= sizeof(Pixel);
43*09537850SAkhilesh Sanikop
44*09537850SAkhilesh Sanikop assert(xstep > 0);
45*09537850SAkhilesh Sanikop
46*09537850SAkhilesh Sanikop // If xstep == 64 then |shift| always evaluates to 0 which sets |val| to
47*09537850SAkhilesh Sanikop // |top[top_base_x]|. This corresponds to a 45 degree prediction.
48*09537850SAkhilesh Sanikop if (xstep == 64) {
49*09537850SAkhilesh Sanikop // 7.11.2.10. Intra edge upsample selection process
50*09537850SAkhilesh Sanikop // if ( d <= 0 || d >= 40 ) useUpsample = 0
51*09537850SAkhilesh Sanikop // For |upsampled_top| the delta is |predictor_angle - 90|. Since the
52*09537850SAkhilesh Sanikop // |predictor_angle| is 45 the delta is also 45.
53*09537850SAkhilesh Sanikop assert(!upsampled_top);
54*09537850SAkhilesh Sanikop const Pixel* top_ptr = top + 1;
55*09537850SAkhilesh Sanikop for (int y = 0; y < height; ++y, dst += stride, ++top_ptr) {
56*09537850SAkhilesh Sanikop memcpy(dst, top_ptr, sizeof(*top_ptr) * width);
57*09537850SAkhilesh Sanikop }
58*09537850SAkhilesh Sanikop return;
59*09537850SAkhilesh Sanikop }
60*09537850SAkhilesh Sanikop
61*09537850SAkhilesh Sanikop const int upsample_shift = static_cast<int>(upsampled_top);
62*09537850SAkhilesh Sanikop const int max_base_x = ((width + height) - 1) << upsample_shift;
63*09537850SAkhilesh Sanikop const int scale_bits = 6 - upsample_shift;
64*09537850SAkhilesh Sanikop const int base_step = 1 << upsample_shift;
65*09537850SAkhilesh Sanikop int top_x = xstep;
66*09537850SAkhilesh Sanikop int y = 0;
67*09537850SAkhilesh Sanikop do {
68*09537850SAkhilesh Sanikop int top_base_x = top_x >> scale_bits;
69*09537850SAkhilesh Sanikop
70*09537850SAkhilesh Sanikop if (top_base_x >= max_base_x) {
71*09537850SAkhilesh Sanikop for (int i = y; i < height; ++i) {
72*09537850SAkhilesh Sanikop Memset(dst, top[max_base_x], width);
73*09537850SAkhilesh Sanikop dst += stride;
74*09537850SAkhilesh Sanikop }
75*09537850SAkhilesh Sanikop return;
76*09537850SAkhilesh Sanikop }
77*09537850SAkhilesh Sanikop
78*09537850SAkhilesh Sanikop const int shift = ((top_x << upsample_shift) & 0x3F) >> 1;
79*09537850SAkhilesh Sanikop int x = 0;
80*09537850SAkhilesh Sanikop do {
81*09537850SAkhilesh Sanikop if (top_base_x >= max_base_x) {
82*09537850SAkhilesh Sanikop Memset(dst + x, top[max_base_x], width - x);
83*09537850SAkhilesh Sanikop break;
84*09537850SAkhilesh Sanikop }
85*09537850SAkhilesh Sanikop
86*09537850SAkhilesh Sanikop const int val =
87*09537850SAkhilesh Sanikop top[top_base_x] * (32 - shift) + top[top_base_x + 1] * shift;
88*09537850SAkhilesh Sanikop dst[x] = RightShiftWithRounding(val, 5 /*log2(32)*/);
89*09537850SAkhilesh Sanikop top_base_x += base_step;
90*09537850SAkhilesh Sanikop } while (++x < width);
91*09537850SAkhilesh Sanikop
92*09537850SAkhilesh Sanikop dst += stride;
93*09537850SAkhilesh Sanikop top_x += xstep;
94*09537850SAkhilesh Sanikop } while (++y < height);
95*09537850SAkhilesh Sanikop }
96*09537850SAkhilesh Sanikop
97*09537850SAkhilesh Sanikop // clang 14.0.0 produces incorrect code with LIBGAV1_RESTRICT.
98*09537850SAkhilesh Sanikop // https://github.com/llvm/llvm-project/issues/54427
99*09537850SAkhilesh Sanikop #if defined(__clang__) && __clang_major__ == 14
100*09537850SAkhilesh Sanikop #define LOCAL_RESTRICT
101*09537850SAkhilesh Sanikop #else
102*09537850SAkhilesh Sanikop #define LOCAL_RESTRICT LIBGAV1_RESTRICT
103*09537850SAkhilesh Sanikop #endif
104*09537850SAkhilesh Sanikop
105*09537850SAkhilesh Sanikop template <typename Pixel>
DirectionalIntraPredictorZone2_C(void * LOCAL_RESTRICT const dest,ptrdiff_t stride,const void * LOCAL_RESTRICT const top_row,const void * LOCAL_RESTRICT const left_column,const int width,const int height,const int xstep,const int ystep,const bool upsampled_top,const bool upsampled_left)106*09537850SAkhilesh Sanikop void DirectionalIntraPredictorZone2_C(
107*09537850SAkhilesh Sanikop void* LOCAL_RESTRICT const dest, ptrdiff_t stride,
108*09537850SAkhilesh Sanikop const void* LOCAL_RESTRICT const top_row,
109*09537850SAkhilesh Sanikop const void* LOCAL_RESTRICT const left_column, const int width,
110*09537850SAkhilesh Sanikop const int height, const int xstep, const int ystep,
111*09537850SAkhilesh Sanikop const bool upsampled_top, const bool upsampled_left) {
112*09537850SAkhilesh Sanikop const auto* const top = static_cast<const Pixel*>(top_row);
113*09537850SAkhilesh Sanikop const auto* const left = static_cast<const Pixel*>(left_column);
114*09537850SAkhilesh Sanikop auto* dst = static_cast<Pixel*>(dest);
115*09537850SAkhilesh Sanikop stride /= sizeof(Pixel);
116*09537850SAkhilesh Sanikop
117*09537850SAkhilesh Sanikop assert(xstep > 0);
118*09537850SAkhilesh Sanikop assert(ystep > 0);
119*09537850SAkhilesh Sanikop
120*09537850SAkhilesh Sanikop const int upsample_top_shift = static_cast<int>(upsampled_top);
121*09537850SAkhilesh Sanikop const int upsample_left_shift = static_cast<int>(upsampled_left);
122*09537850SAkhilesh Sanikop const int scale_bits_x = 6 - upsample_top_shift;
123*09537850SAkhilesh Sanikop const int scale_bits_y = 6 - upsample_left_shift;
124*09537850SAkhilesh Sanikop const int min_base_x = -(1 << upsample_top_shift);
125*09537850SAkhilesh Sanikop const int base_step_x = 1 << upsample_top_shift;
126*09537850SAkhilesh Sanikop int y = 0;
127*09537850SAkhilesh Sanikop int top_x = -xstep;
128*09537850SAkhilesh Sanikop do {
129*09537850SAkhilesh Sanikop int top_base_x = top_x >> scale_bits_x;
130*09537850SAkhilesh Sanikop int left_y = (y << 6) - ystep;
131*09537850SAkhilesh Sanikop int x = 0;
132*09537850SAkhilesh Sanikop do {
133*09537850SAkhilesh Sanikop int val;
134*09537850SAkhilesh Sanikop if (top_base_x >= min_base_x) {
135*09537850SAkhilesh Sanikop const int shift = ((top_x * (1 << upsample_top_shift)) & 0x3F) >> 1;
136*09537850SAkhilesh Sanikop val = top[top_base_x] * (32 - shift) + top[top_base_x + 1] * shift;
137*09537850SAkhilesh Sanikop } else {
138*09537850SAkhilesh Sanikop // Note this assumes an arithmetic shift to handle negative values.
139*09537850SAkhilesh Sanikop const int left_base_y = left_y >> scale_bits_y;
140*09537850SAkhilesh Sanikop const int shift = ((left_y * (1 << upsample_left_shift)) & 0x3F) >> 1;
141*09537850SAkhilesh Sanikop assert(left_base_y >= -(1 << upsample_left_shift));
142*09537850SAkhilesh Sanikop val = left[left_base_y] * (32 - shift) + left[left_base_y + 1] * shift;
143*09537850SAkhilesh Sanikop }
144*09537850SAkhilesh Sanikop dst[x] = RightShiftWithRounding(val, 5);
145*09537850SAkhilesh Sanikop top_base_x += base_step_x;
146*09537850SAkhilesh Sanikop left_y -= ystep;
147*09537850SAkhilesh Sanikop } while (++x < width);
148*09537850SAkhilesh Sanikop
149*09537850SAkhilesh Sanikop top_x -= xstep;
150*09537850SAkhilesh Sanikop dst += stride;
151*09537850SAkhilesh Sanikop } while (++y < height);
152*09537850SAkhilesh Sanikop }
153*09537850SAkhilesh Sanikop
154*09537850SAkhilesh Sanikop #undef LOCAL_RESTRICT
155*09537850SAkhilesh Sanikop
156*09537850SAkhilesh Sanikop template <typename Pixel>
DirectionalIntraPredictorZone3_C(void * LIBGAV1_RESTRICT const dest,ptrdiff_t stride,const void * LIBGAV1_RESTRICT const left_column,const int width,const int height,const int ystep,const bool upsampled_left)157*09537850SAkhilesh Sanikop void DirectionalIntraPredictorZone3_C(
158*09537850SAkhilesh Sanikop void* LIBGAV1_RESTRICT const dest, ptrdiff_t stride,
159*09537850SAkhilesh Sanikop const void* LIBGAV1_RESTRICT const left_column, const int width,
160*09537850SAkhilesh Sanikop const int height, const int ystep, const bool upsampled_left) {
161*09537850SAkhilesh Sanikop const auto* const left = static_cast<const Pixel*>(left_column);
162*09537850SAkhilesh Sanikop stride /= sizeof(Pixel);
163*09537850SAkhilesh Sanikop
164*09537850SAkhilesh Sanikop assert(ystep > 0);
165*09537850SAkhilesh Sanikop
166*09537850SAkhilesh Sanikop const int upsample_shift = static_cast<int>(upsampled_left);
167*09537850SAkhilesh Sanikop const int scale_bits = 6 - upsample_shift;
168*09537850SAkhilesh Sanikop const int base_step = 1 << upsample_shift;
169*09537850SAkhilesh Sanikop // Zone3 never runs out of left_column values.
170*09537850SAkhilesh Sanikop assert((width + height - 1) << upsample_shift > // max_base_y
171*09537850SAkhilesh Sanikop ((ystep * width) >> scale_bits) +
172*09537850SAkhilesh Sanikop base_step * (height - 1)); // left_base_y
173*09537850SAkhilesh Sanikop
174*09537850SAkhilesh Sanikop int left_y = ystep;
175*09537850SAkhilesh Sanikop int x = 0;
176*09537850SAkhilesh Sanikop do {
177*09537850SAkhilesh Sanikop auto* dst = static_cast<Pixel*>(dest);
178*09537850SAkhilesh Sanikop
179*09537850SAkhilesh Sanikop int left_base_y = left_y >> scale_bits;
180*09537850SAkhilesh Sanikop int y = 0;
181*09537850SAkhilesh Sanikop do {
182*09537850SAkhilesh Sanikop const int shift = ((left_y << upsample_shift) & 0x3F) >> 1;
183*09537850SAkhilesh Sanikop const int val =
184*09537850SAkhilesh Sanikop left[left_base_y] * (32 - shift) + left[left_base_y + 1] * shift;
185*09537850SAkhilesh Sanikop dst[x] = RightShiftWithRounding(val, 5);
186*09537850SAkhilesh Sanikop dst += stride;
187*09537850SAkhilesh Sanikop left_base_y += base_step;
188*09537850SAkhilesh Sanikop } while (++y < height);
189*09537850SAkhilesh Sanikop
190*09537850SAkhilesh Sanikop left_y += ystep;
191*09537850SAkhilesh Sanikop } while (++x < width);
192*09537850SAkhilesh Sanikop }
193*09537850SAkhilesh Sanikop
Init8bpp()194*09537850SAkhilesh Sanikop void Init8bpp() {
195*09537850SAkhilesh Sanikop Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
196*09537850SAkhilesh Sanikop assert(dsp != nullptr);
197*09537850SAkhilesh Sanikop #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
198*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone1 =
199*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone1_C<uint8_t>;
200*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone2 =
201*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone2_C<uint8_t>;
202*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone3 =
203*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone3_C<uint8_t>;
204*09537850SAkhilesh Sanikop #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
205*09537850SAkhilesh Sanikop static_cast<void>(dsp);
206*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_DirectionalIntraPredictorZone1
207*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone1 =
208*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone1_C<uint8_t>;
209*09537850SAkhilesh Sanikop #endif
210*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_DirectionalIntraPredictorZone2
211*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone2 =
212*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone2_C<uint8_t>;
213*09537850SAkhilesh Sanikop #endif
214*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp8bpp_DirectionalIntraPredictorZone3
215*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone3 =
216*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone3_C<uint8_t>;
217*09537850SAkhilesh Sanikop #endif
218*09537850SAkhilesh Sanikop #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
219*09537850SAkhilesh Sanikop }
220*09537850SAkhilesh Sanikop
221*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()222*09537850SAkhilesh Sanikop void Init10bpp() {
223*09537850SAkhilesh Sanikop Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
224*09537850SAkhilesh Sanikop assert(dsp != nullptr);
225*09537850SAkhilesh Sanikop #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
226*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone1 =
227*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone1_C<uint16_t>;
228*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone2 =
229*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone2_C<uint16_t>;
230*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone3 =
231*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone3_C<uint16_t>;
232*09537850SAkhilesh Sanikop #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
233*09537850SAkhilesh Sanikop static_cast<void>(dsp);
234*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_DirectionalIntraPredictorZone1
235*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone1 =
236*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone1_C<uint16_t>;
237*09537850SAkhilesh Sanikop #endif
238*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_DirectionalIntraPredictorZone2
239*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone2 =
240*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone2_C<uint16_t>;
241*09537850SAkhilesh Sanikop #endif
242*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp10bpp_DirectionalIntraPredictorZone3
243*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone3 =
244*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone3_C<uint16_t>;
245*09537850SAkhilesh Sanikop #endif
246*09537850SAkhilesh Sanikop }
247*09537850SAkhilesh Sanikop #endif // LIBGAV1_MAX_BITDEPTH >= 10
248*09537850SAkhilesh Sanikop
249*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH == 12
Init12bpp()250*09537850SAkhilesh Sanikop void Init12bpp() {
251*09537850SAkhilesh Sanikop Dsp* const dsp = dsp_internal::GetWritableDspTable(12);
252*09537850SAkhilesh Sanikop assert(dsp != nullptr);
253*09537850SAkhilesh Sanikop #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
254*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone1 =
255*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone1_C<uint16_t>;
256*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone2 =
257*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone2_C<uint16_t>;
258*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone3 =
259*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone3_C<uint16_t>;
260*09537850SAkhilesh Sanikop #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
261*09537850SAkhilesh Sanikop static_cast<void>(dsp);
262*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_DirectionalIntraPredictorZone1
263*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone1 =
264*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone1_C<uint16_t>;
265*09537850SAkhilesh Sanikop #endif
266*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_DirectionalIntraPredictorZone2
267*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone2 =
268*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone2_C<uint16_t>;
269*09537850SAkhilesh Sanikop #endif
270*09537850SAkhilesh Sanikop #ifndef LIBGAV1_Dsp12bpp_DirectionalIntraPredictorZone3
271*09537850SAkhilesh Sanikop dsp->directional_intra_predictor_zone3 =
272*09537850SAkhilesh Sanikop DirectionalIntraPredictorZone3_C<uint16_t>;
273*09537850SAkhilesh Sanikop #endif
274*09537850SAkhilesh Sanikop }
275*09537850SAkhilesh Sanikop #endif // LIBGAV1_MAX_BITDEPTH == 12
276*09537850SAkhilesh Sanikop
277*09537850SAkhilesh Sanikop } // namespace
278*09537850SAkhilesh Sanikop
IntraPredDirectionalInit_C()279*09537850SAkhilesh Sanikop void IntraPredDirectionalInit_C() {
280*09537850SAkhilesh Sanikop Init8bpp();
281*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH >= 10
282*09537850SAkhilesh Sanikop Init10bpp();
283*09537850SAkhilesh Sanikop #endif
284*09537850SAkhilesh Sanikop #if LIBGAV1_MAX_BITDEPTH == 12
285*09537850SAkhilesh Sanikop Init12bpp();
286*09537850SAkhilesh Sanikop #endif
287*09537850SAkhilesh Sanikop }
288*09537850SAkhilesh Sanikop
289*09537850SAkhilesh Sanikop } // namespace dsp
290*09537850SAkhilesh Sanikop } // namespace libgav1
291