xref: /aosp_15_r20/external/libgav1/src/dsp/dsp.h (revision 095378508e87ed692bf8dfeb34008b65b3735891)
1*09537850SAkhilesh Sanikop /*
2*09537850SAkhilesh Sanikop  * Copyright 2019 The libgav1 Authors
3*09537850SAkhilesh Sanikop  *
4*09537850SAkhilesh Sanikop  * Licensed under the Apache License, Version 2.0 (the "License");
5*09537850SAkhilesh Sanikop  * you may not use this file except in compliance with the License.
6*09537850SAkhilesh Sanikop  * You may obtain a copy of the License at
7*09537850SAkhilesh Sanikop  *
8*09537850SAkhilesh Sanikop  *      http://www.apache.org/licenses/LICENSE-2.0
9*09537850SAkhilesh Sanikop  *
10*09537850SAkhilesh Sanikop  * Unless required by applicable law or agreed to in writing, software
11*09537850SAkhilesh Sanikop  * distributed under the License is distributed on an "AS IS" BASIS,
12*09537850SAkhilesh Sanikop  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*09537850SAkhilesh Sanikop  * See the License for the specific language governing permissions and
14*09537850SAkhilesh Sanikop  * limitations under the License.
15*09537850SAkhilesh Sanikop  */
16*09537850SAkhilesh Sanikop 
17*09537850SAkhilesh Sanikop #ifndef LIBGAV1_SRC_DSP_DSP_H_
18*09537850SAkhilesh Sanikop #define LIBGAV1_SRC_DSP_DSP_H_
19*09537850SAkhilesh Sanikop 
20*09537850SAkhilesh Sanikop #include <cstddef>
21*09537850SAkhilesh Sanikop #include <cstdint>
22*09537850SAkhilesh Sanikop #include <cstdlib>
23*09537850SAkhilesh Sanikop 
24*09537850SAkhilesh Sanikop #include "src/dsp/common.h"
25*09537850SAkhilesh Sanikop #include "src/dsp/constants.h"
26*09537850SAkhilesh Sanikop #include "src/dsp/film_grain_common.h"
27*09537850SAkhilesh Sanikop #include "src/utils/cpu.h"
28*09537850SAkhilesh Sanikop #include "src/utils/reference_info.h"
29*09537850SAkhilesh Sanikop #include "src/utils/types.h"
30*09537850SAkhilesh Sanikop 
31*09537850SAkhilesh Sanikop namespace libgav1 {
32*09537850SAkhilesh Sanikop namespace dsp {
33*09537850SAkhilesh Sanikop 
34*09537850SAkhilesh Sanikop #if !defined(LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS)
35*09537850SAkhilesh Sanikop #define LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS 0
36*09537850SAkhilesh Sanikop #endif
37*09537850SAkhilesh Sanikop 
38*09537850SAkhilesh Sanikop enum IntraPredictor : uint8_t {
39*09537850SAkhilesh Sanikop   kIntraPredictorDcFill,
40*09537850SAkhilesh Sanikop   kIntraPredictorDcTop,
41*09537850SAkhilesh Sanikop   kIntraPredictorDcLeft,
42*09537850SAkhilesh Sanikop   kIntraPredictorDc,
43*09537850SAkhilesh Sanikop   kIntraPredictorVertical,
44*09537850SAkhilesh Sanikop   kIntraPredictorHorizontal,
45*09537850SAkhilesh Sanikop   kIntraPredictorPaeth,
46*09537850SAkhilesh Sanikop   kIntraPredictorSmooth,
47*09537850SAkhilesh Sanikop   kIntraPredictorSmoothVertical,
48*09537850SAkhilesh Sanikop   kIntraPredictorSmoothHorizontal,
49*09537850SAkhilesh Sanikop   kNumIntraPredictors
50*09537850SAkhilesh Sanikop };
51*09537850SAkhilesh Sanikop 
52*09537850SAkhilesh Sanikop // List of valid 1D transforms.
53*09537850SAkhilesh Sanikop enum Transform1d : uint8_t {
54*09537850SAkhilesh Sanikop   kTransform1dDct,   // Discrete Cosine Transform.
55*09537850SAkhilesh Sanikop   kTransform1dAdst,  // Asymmetric Discrete Sine Transform.
56*09537850SAkhilesh Sanikop   kTransform1dIdentity,
57*09537850SAkhilesh Sanikop   kTransform1dWht,  // Walsh Hadamard Transform.
58*09537850SAkhilesh Sanikop   kNumTransform1ds
59*09537850SAkhilesh Sanikop };
60*09537850SAkhilesh Sanikop 
61*09537850SAkhilesh Sanikop // List of valid 1D transform sizes. Not all transforms may be available for all
62*09537850SAkhilesh Sanikop // the sizes.
63*09537850SAkhilesh Sanikop enum Transform1dSize : uint8_t {
64*09537850SAkhilesh Sanikop   kTransform1dSize4,
65*09537850SAkhilesh Sanikop   kTransform1dSize8,
66*09537850SAkhilesh Sanikop   kTransform1dSize16,
67*09537850SAkhilesh Sanikop   kTransform1dSize32,
68*09537850SAkhilesh Sanikop   kTransform1dSize64,
69*09537850SAkhilesh Sanikop   kNumTransform1dSizes
70*09537850SAkhilesh Sanikop };
71*09537850SAkhilesh Sanikop 
72*09537850SAkhilesh Sanikop // The maximum width of the loop filter, fewer pixels may be filtered depending
73*09537850SAkhilesh Sanikop // on strength thresholds.
74*09537850SAkhilesh Sanikop enum LoopFilterSize : uint8_t {
75*09537850SAkhilesh Sanikop   kLoopFilterSize4,
76*09537850SAkhilesh Sanikop   kLoopFilterSize6,
77*09537850SAkhilesh Sanikop   kLoopFilterSize8,
78*09537850SAkhilesh Sanikop   kLoopFilterSize14,
79*09537850SAkhilesh Sanikop   kNumLoopFilterSizes
80*09537850SAkhilesh Sanikop };
81*09537850SAkhilesh Sanikop 
82*09537850SAkhilesh Sanikop enum : uint8_t {
83*09537850SAkhilesh Sanikop   kRow = 0,
84*09537850SAkhilesh Sanikop   kColumn = 1,
85*09537850SAkhilesh Sanikop };
86*09537850SAkhilesh Sanikop 
87*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
88*09537850SAkhilesh Sanikop // ToString()
89*09537850SAkhilesh Sanikop //
90*09537850SAkhilesh Sanikop // These functions are meant to be used only in debug logging and within tests.
91*09537850SAkhilesh Sanikop // They are defined inline to avoid including the strings in the release
92*09537850SAkhilesh Sanikop // library when logging is disabled; unreferenced functions will not be added to
93*09537850SAkhilesh Sanikop // any object file in that case.
94*09537850SAkhilesh Sanikop 
ToString(const IntraPredictor predictor)95*09537850SAkhilesh Sanikop inline const char* ToString(const IntraPredictor predictor) {
96*09537850SAkhilesh Sanikop   switch (predictor) {
97*09537850SAkhilesh Sanikop     case kIntraPredictorDcFill:
98*09537850SAkhilesh Sanikop       return "kIntraPredictorDcFill";
99*09537850SAkhilesh Sanikop     case kIntraPredictorDcTop:
100*09537850SAkhilesh Sanikop       return "kIntraPredictorDcTop";
101*09537850SAkhilesh Sanikop     case kIntraPredictorDcLeft:
102*09537850SAkhilesh Sanikop       return "kIntraPredictorDcLeft";
103*09537850SAkhilesh Sanikop     case kIntraPredictorDc:
104*09537850SAkhilesh Sanikop       return "kIntraPredictorDc";
105*09537850SAkhilesh Sanikop     case kIntraPredictorVertical:
106*09537850SAkhilesh Sanikop       return "kIntraPredictorVertical";
107*09537850SAkhilesh Sanikop     case kIntraPredictorHorizontal:
108*09537850SAkhilesh Sanikop       return "kIntraPredictorHorizontal";
109*09537850SAkhilesh Sanikop     case kIntraPredictorPaeth:
110*09537850SAkhilesh Sanikop       return "kIntraPredictorPaeth";
111*09537850SAkhilesh Sanikop     case kIntraPredictorSmooth:
112*09537850SAkhilesh Sanikop       return "kIntraPredictorSmooth";
113*09537850SAkhilesh Sanikop     case kIntraPredictorSmoothVertical:
114*09537850SAkhilesh Sanikop       return "kIntraPredictorSmoothVertical";
115*09537850SAkhilesh Sanikop     case kIntraPredictorSmoothHorizontal:
116*09537850SAkhilesh Sanikop       return "kIntraPredictorSmoothHorizontal";
117*09537850SAkhilesh Sanikop     case kNumIntraPredictors:
118*09537850SAkhilesh Sanikop       return "kNumIntraPredictors";
119*09537850SAkhilesh Sanikop   }
120*09537850SAkhilesh Sanikop   abort();
121*09537850SAkhilesh Sanikop }
122*09537850SAkhilesh Sanikop 
ToString(const Transform1d transform)123*09537850SAkhilesh Sanikop inline const char* ToString(const Transform1d transform) {
124*09537850SAkhilesh Sanikop   switch (transform) {
125*09537850SAkhilesh Sanikop     case kTransform1dDct:
126*09537850SAkhilesh Sanikop       return "kTransform1dDct";
127*09537850SAkhilesh Sanikop     case kTransform1dAdst:
128*09537850SAkhilesh Sanikop       return "kTransform1dAdst";
129*09537850SAkhilesh Sanikop     case kTransform1dIdentity:
130*09537850SAkhilesh Sanikop       return "kTransform1dIdentity";
131*09537850SAkhilesh Sanikop     case kTransform1dWht:
132*09537850SAkhilesh Sanikop       return "kTransform1dWht";
133*09537850SAkhilesh Sanikop     case kNumTransform1ds:
134*09537850SAkhilesh Sanikop       return "kNumTransform1ds";
135*09537850SAkhilesh Sanikop   }
136*09537850SAkhilesh Sanikop   abort();
137*09537850SAkhilesh Sanikop }
138*09537850SAkhilesh Sanikop 
ToString(const Transform1dSize transform_size)139*09537850SAkhilesh Sanikop inline const char* ToString(const Transform1dSize transform_size) {
140*09537850SAkhilesh Sanikop   switch (transform_size) {
141*09537850SAkhilesh Sanikop     case kTransform1dSize4:
142*09537850SAkhilesh Sanikop       return "kTransform1dSize4";
143*09537850SAkhilesh Sanikop     case kTransform1dSize8:
144*09537850SAkhilesh Sanikop       return "kTransform1dSize8";
145*09537850SAkhilesh Sanikop     case kTransform1dSize16:
146*09537850SAkhilesh Sanikop       return "kTransform1dSize16";
147*09537850SAkhilesh Sanikop     case kTransform1dSize32:
148*09537850SAkhilesh Sanikop       return "kTransform1dSize32";
149*09537850SAkhilesh Sanikop     case kTransform1dSize64:
150*09537850SAkhilesh Sanikop       return "kTransform1dSize64";
151*09537850SAkhilesh Sanikop     case kNumTransform1dSizes:
152*09537850SAkhilesh Sanikop       return "kNumTransform1dSizes";
153*09537850SAkhilesh Sanikop   }
154*09537850SAkhilesh Sanikop   abort();
155*09537850SAkhilesh Sanikop }
156*09537850SAkhilesh Sanikop 
ToString(const LoopFilterSize filter_size)157*09537850SAkhilesh Sanikop inline const char* ToString(const LoopFilterSize filter_size) {
158*09537850SAkhilesh Sanikop   switch (filter_size) {
159*09537850SAkhilesh Sanikop     case kLoopFilterSize4:
160*09537850SAkhilesh Sanikop       return "kLoopFilterSize4";
161*09537850SAkhilesh Sanikop     case kLoopFilterSize6:
162*09537850SAkhilesh Sanikop       return "kLoopFilterSize6";
163*09537850SAkhilesh Sanikop     case kLoopFilterSize8:
164*09537850SAkhilesh Sanikop       return "kLoopFilterSize8";
165*09537850SAkhilesh Sanikop     case kLoopFilterSize14:
166*09537850SAkhilesh Sanikop       return "kLoopFilterSize14";
167*09537850SAkhilesh Sanikop     case kNumLoopFilterSizes:
168*09537850SAkhilesh Sanikop       return "kNumLoopFilterSizes";
169*09537850SAkhilesh Sanikop   }
170*09537850SAkhilesh Sanikop   abort();
171*09537850SAkhilesh Sanikop }
172*09537850SAkhilesh Sanikop 
ToString(const LoopFilterType filter_type)173*09537850SAkhilesh Sanikop inline const char* ToString(const LoopFilterType filter_type) {
174*09537850SAkhilesh Sanikop   switch (filter_type) {
175*09537850SAkhilesh Sanikop     case kLoopFilterTypeVertical:
176*09537850SAkhilesh Sanikop       return "kLoopFilterTypeVertical";
177*09537850SAkhilesh Sanikop     case kLoopFilterTypeHorizontal:
178*09537850SAkhilesh Sanikop       return "kLoopFilterTypeHorizontal";
179*09537850SAkhilesh Sanikop     case kNumLoopFilterTypes:
180*09537850SAkhilesh Sanikop       return "kNumLoopFilterTypes";
181*09537850SAkhilesh Sanikop   }
182*09537850SAkhilesh Sanikop   abort();
183*09537850SAkhilesh Sanikop }
184*09537850SAkhilesh Sanikop 
185*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
186*09537850SAkhilesh Sanikop // Intra predictors. Section 7.11.2.
187*09537850SAkhilesh Sanikop // These require access to one or both of the top row and left column. Some may
188*09537850SAkhilesh Sanikop // access the top-left (top[-1]), top-right (top[width+N]), bottom-left
189*09537850SAkhilesh Sanikop // (left[height+N]) or upper-left (left[-1]).
190*09537850SAkhilesh Sanikop 
191*09537850SAkhilesh Sanikop // Intra predictor function signature. Sections 7.11.2.2, 7.11.2.4 (#10,#11),
192*09537850SAkhilesh Sanikop // 7.11.2.5, 7.11.2.6.
193*09537850SAkhilesh Sanikop // |dst| is an unaligned pointer to the output block. Pixel size is determined
194*09537850SAkhilesh Sanikop // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
195*09537850SAkhilesh Sanikop // the row above |dst|. |left| is an aligned vector of the column to the left
196*09537850SAkhilesh Sanikop // of |dst|. top-left and bottom-left may be accessed.
197*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
198*09537850SAkhilesh Sanikop using IntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride,
199*09537850SAkhilesh Sanikop                                     const void* top, const void* left);
200*09537850SAkhilesh Sanikop using IntraPredictorFuncs =
201*09537850SAkhilesh Sanikop     IntraPredictorFunc[kNumTransformSizes][kNumIntraPredictors];
202*09537850SAkhilesh Sanikop 
203*09537850SAkhilesh Sanikop // Directional intra predictor function signature, zone 1 (0 < angle < 90).
204*09537850SAkhilesh Sanikop // Section 7.11.2.4 (#7).
205*09537850SAkhilesh Sanikop // |dst| is an unaligned pointer to the output block. Pixel size is determined
206*09537850SAkhilesh Sanikop // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
207*09537850SAkhilesh Sanikop // the row above |dst|. |width| and |height| give the dimensions of the block.
208*09537850SAkhilesh Sanikop // |xstep| is the scaled starting index to |top| from
209*09537850SAkhilesh Sanikop // kDirectionalIntraPredictorDerivative. |upsampled_top| indicates whether
210*09537850SAkhilesh Sanikop // |top| has been upsampled as described in '7.11.2.11. Intra edge upsample
211*09537850SAkhilesh Sanikop // process'. This can occur in cases with |width| + |height| <= 16. top-right
212*09537850SAkhilesh Sanikop // is accessed.
213*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
214*09537850SAkhilesh Sanikop using DirectionalIntraPredictorZone1Func = void (*)(void* dst, ptrdiff_t stride,
215*09537850SAkhilesh Sanikop                                                     const void* top, int width,
216*09537850SAkhilesh Sanikop                                                     int height, int xstep,
217*09537850SAkhilesh Sanikop                                                     bool upsampled_top);
218*09537850SAkhilesh Sanikop 
219*09537850SAkhilesh Sanikop // Directional intra predictor function signature, zone 2 (90 < angle < 180).
220*09537850SAkhilesh Sanikop // Section 7.11.2.4 (#8).
221*09537850SAkhilesh Sanikop // |dst| is an unaligned pointer to the output block. Pixel size is determined
222*09537850SAkhilesh Sanikop // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
223*09537850SAkhilesh Sanikop // the row above |dst|. |left| is an aligned vector of the column to the left of
224*09537850SAkhilesh Sanikop // |dst|. |width| and |height| give the dimensions of the block. |xstep| and
225*09537850SAkhilesh Sanikop // |ystep| are the scaled starting index to |top| and |left|, respectively,
226*09537850SAkhilesh Sanikop // from kDirectionalIntraPredictorDerivative. |upsampled_top| and
227*09537850SAkhilesh Sanikop // |upsampled_left| indicate whether |top| and |left| have been upsampled as
228*09537850SAkhilesh Sanikop // described in '7.11.2.11. Intra edge upsample process'. This can occur in
229*09537850SAkhilesh Sanikop // cases with |width| + |height| <= 16. top-left and upper-left are accessed,
230*09537850SAkhilesh Sanikop // up to [-2] in each if |upsampled_top/left| are set.
231*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
232*09537850SAkhilesh Sanikop using DirectionalIntraPredictorZone2Func = void (*)(
233*09537850SAkhilesh Sanikop     void* dst, ptrdiff_t stride, const void* top, const void* left, int width,
234*09537850SAkhilesh Sanikop     int height, int xstep, int ystep, bool upsampled_top, bool upsampled_left);
235*09537850SAkhilesh Sanikop 
236*09537850SAkhilesh Sanikop // Directional intra predictor function signature, zone 3 (180 < angle < 270).
237*09537850SAkhilesh Sanikop // Section 7.11.2.4 (#9).
238*09537850SAkhilesh Sanikop // |dst| is an unaligned pointer to the output block. Pixel size is determined
239*09537850SAkhilesh Sanikop // by bitdepth with |stride| given in bytes. |left| is an aligned vector of the
240*09537850SAkhilesh Sanikop // column to the left of |dst|. |width| and |height| give the dimensions of the
241*09537850SAkhilesh Sanikop // block. |ystep| is the scaled starting index to |left| from
242*09537850SAkhilesh Sanikop // kDirectionalIntraPredictorDerivative. |upsampled_left| indicates whether
243*09537850SAkhilesh Sanikop // |left| has been upsampled as described in '7.11.2.11. Intra edge upsample
244*09537850SAkhilesh Sanikop // process'. This can occur in cases with |width| + |height| <= 16. bottom-left
245*09537850SAkhilesh Sanikop // is accessed.
246*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
247*09537850SAkhilesh Sanikop using DirectionalIntraPredictorZone3Func = void (*)(void* dst, ptrdiff_t stride,
248*09537850SAkhilesh Sanikop                                                     const void* left, int width,
249*09537850SAkhilesh Sanikop                                                     int height, int ystep,
250*09537850SAkhilesh Sanikop                                                     bool upsampled_left);
251*09537850SAkhilesh Sanikop 
252*09537850SAkhilesh Sanikop // Filter intra predictor function signature. Section 7.11.2.3.
253*09537850SAkhilesh Sanikop // |dst| is an unaligned pointer to the output block. Pixel size is determined
254*09537850SAkhilesh Sanikop // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
255*09537850SAkhilesh Sanikop // the row above |dst|. |left| is an aligned vector of the column to the left
256*09537850SAkhilesh Sanikop // of |dst|. |width| and |height| are the size of the block in pixels.
257*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
258*09537850SAkhilesh Sanikop using FilterIntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride,
259*09537850SAkhilesh Sanikop                                           const void* top, const void* left,
260*09537850SAkhilesh Sanikop                                           FilterIntraPredictor pred, int width,
261*09537850SAkhilesh Sanikop                                           int height);
262*09537850SAkhilesh Sanikop 
263*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
264*09537850SAkhilesh Sanikop // Chroma from Luma (Cfl) prediction. Section 7.11.5.
265*09537850SAkhilesh Sanikop 
266*09537850SAkhilesh Sanikop // Chroma from Luma (Cfl) intra prediction function signature. |dst| is an
267*09537850SAkhilesh Sanikop // unaligned pointer to the output block. Pixel size is determined by bitdepth
268*09537850SAkhilesh Sanikop // with |stride| given in bytes. |luma| contains subsampled luma pixels with 3
269*09537850SAkhilesh Sanikop // fractional bits of precision. |alpha| is the signed Cfl alpha value for the
270*09537850SAkhilesh Sanikop // appropriate plane.
271*09537850SAkhilesh Sanikop using CflIntraPredictorFunc = void (*)(
272*09537850SAkhilesh Sanikop     void* dst, ptrdiff_t stride,
273*09537850SAkhilesh Sanikop     const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], int alpha);
274*09537850SAkhilesh Sanikop using CflIntraPredictorFuncs = CflIntraPredictorFunc[kNumTransformSizes];
275*09537850SAkhilesh Sanikop 
276*09537850SAkhilesh Sanikop // Chroma from Luma (Cfl) subsampler function signature. |luma| is an unaligned
277*09537850SAkhilesh Sanikop // pointer to the output block. |src| is an unaligned pointer to the input
278*09537850SAkhilesh Sanikop // block. Pixel size is determined by bitdepth with |stride| given in bytes.
279*09537850SAkhilesh Sanikop using CflSubsamplerFunc =
280*09537850SAkhilesh Sanikop     void (*)(int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
281*09537850SAkhilesh Sanikop              int max_luma_width, int max_luma_height, const void* source,
282*09537850SAkhilesh Sanikop              ptrdiff_t stride);
283*09537850SAkhilesh Sanikop using CflSubsamplerFuncs =
284*09537850SAkhilesh Sanikop     CflSubsamplerFunc[kNumTransformSizes][kNumSubsamplingTypes];
285*09537850SAkhilesh Sanikop 
286*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
287*09537850SAkhilesh Sanikop // Intra Edge Filtering and Upsampling. Step 4 in section 7.11.2.4.
288*09537850SAkhilesh Sanikop 
289*09537850SAkhilesh Sanikop // Intra edge filter function signature. |buffer| is a pointer to the top_row or
290*09537850SAkhilesh Sanikop // left_column that needs to be filtered. Typically the -1'th index of |top_row|
291*09537850SAkhilesh Sanikop // and |left_column| need to be filtered as well, so the caller can merely pass
292*09537850SAkhilesh Sanikop // the |buffer| as top_row[-1] or left_column[-1]. Pixel size is determined by
293*09537850SAkhilesh Sanikop // bitdepth. |size| is the number of pixels to be filtered. |strength| is the
294*09537850SAkhilesh Sanikop // filter strength. Section 7.11.2.12 in the spec.
295*09537850SAkhilesh Sanikop using IntraEdgeFilterFunc = void (*)(void* buffer, int size, int strength);
296*09537850SAkhilesh Sanikop 
297*09537850SAkhilesh Sanikop // Intra edge upsampler function signature. |buffer| is a pointer to the top_row
298*09537850SAkhilesh Sanikop // or left_column that needs to be upsampled. Pixel size is determined by
299*09537850SAkhilesh Sanikop // bitdepth. |size| is the number of pixels to be upsampled; valid values are:
300*09537850SAkhilesh Sanikop // 4, 8, 12, 16. This function needs access to negative indices -1 and -2 of
301*09537850SAkhilesh Sanikop // the |buffer|. Section 7.11.2.11 in the spec.
302*09537850SAkhilesh Sanikop using IntraEdgeUpsamplerFunc = void (*)(void* buffer, int size);
303*09537850SAkhilesh Sanikop 
304*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
305*09537850SAkhilesh Sanikop // Inverse transform add function signature.
306*09537850SAkhilesh Sanikop //
307*09537850SAkhilesh Sanikop // Steps 2 and 3 of section 7.12.3 (contains the implementation of section
308*09537850SAkhilesh Sanikop // 7.13.3).
309*09537850SAkhilesh Sanikop // Apply the inverse transforms and add the residual to the destination frame
310*09537850SAkhilesh Sanikop // for the transform type and block size |tx_size| starting at position
311*09537850SAkhilesh Sanikop // |start_x| and |start_y|. |dst_frame| is a pointer to an Array2D of Pixel
312*09537850SAkhilesh Sanikop // values. |adjusted_tx_height| is the number of rows to process based on the
313*09537850SAkhilesh Sanikop // non-zero coefficient count in the block. It will be 1 (non-zero coefficient
314*09537850SAkhilesh Sanikop // count == 1), 4 or a multiple of 8 up to 32 or the original transform height,
315*09537850SAkhilesh Sanikop // whichever is less. |src_buffer| is a pointer to an Array2D of Residual
316*09537850SAkhilesh Sanikop // values. On input |src_buffer| contains the dequantized values, on output it
317*09537850SAkhilesh Sanikop // contains the residual.
318*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
319*09537850SAkhilesh Sanikop using InverseTransformAddFunc = void (*)(TransformType tx_type,
320*09537850SAkhilesh Sanikop                                          TransformSize tx_size,
321*09537850SAkhilesh Sanikop                                          int adjusted_tx_height,
322*09537850SAkhilesh Sanikop                                          void* src_buffer, int start_x,
323*09537850SAkhilesh Sanikop                                          int start_y, void* dst_frame);
324*09537850SAkhilesh Sanikop // The final dimension holds row and column transforms indexed with kRow and
325*09537850SAkhilesh Sanikop // kColumn.
326*09537850SAkhilesh Sanikop using InverseTransformAddFuncs =
327*09537850SAkhilesh Sanikop     InverseTransformAddFunc[kNumTransform1ds][kNumTransform1dSizes][2];
328*09537850SAkhilesh Sanikop 
329*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
330*09537850SAkhilesh Sanikop // Post processing.
331*09537850SAkhilesh Sanikop 
332*09537850SAkhilesh Sanikop // Loop filter function signature. Section 7.14.
333*09537850SAkhilesh Sanikop // |dst| is an unaligned pointer to the output block. Pixel size is determined
334*09537850SAkhilesh Sanikop // by bitdepth with |stride| given in bytes.
335*09537850SAkhilesh Sanikop // <threshold param> <spec name> <range>
336*09537850SAkhilesh Sanikop // |outer_thresh|    blimit      [7, 193]
337*09537850SAkhilesh Sanikop // |inner_thresh|    limit       [1, 63]
338*09537850SAkhilesh Sanikop // |hev_thresh|      thresh      [0, 63]
339*09537850SAkhilesh Sanikop // These are scaled by the implementation by 'bitdepth - 8' to produce
340*09537850SAkhilesh Sanikop // the spec variables blimitBd, limitBd and threshBd.
341*09537850SAkhilesh Sanikop // Note these functions are not called when the loop filter level is 0.
342*09537850SAkhilesh Sanikop using LoopFilterFunc = void (*)(void* dst, ptrdiff_t stride, int outer_thresh,
343*09537850SAkhilesh Sanikop                                 int inner_thresh, int hev_thresh);
344*09537850SAkhilesh Sanikop using LoopFilterFuncs =
345*09537850SAkhilesh Sanikop     LoopFilterFunc[kNumLoopFilterSizes][kNumLoopFilterTypes];
346*09537850SAkhilesh Sanikop 
347*09537850SAkhilesh Sanikop // Cdef direction function signature. Section 7.15.2.
348*09537850SAkhilesh Sanikop // |src| is a pointer to the source block. Pixel size is determined by bitdepth
349*09537850SAkhilesh Sanikop // with |stride| given in bytes. |direction| and |variance| are output
350*09537850SAkhilesh Sanikop // parameters and must not be nullptr.
351*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
352*09537850SAkhilesh Sanikop using CdefDirectionFunc = void (*)(const void* src, ptrdiff_t stride,
353*09537850SAkhilesh Sanikop                                    uint8_t* direction, int* variance);
354*09537850SAkhilesh Sanikop 
355*09537850SAkhilesh Sanikop // Cdef filtering function signature. Section 7.15.3.
356*09537850SAkhilesh Sanikop // |source| is a pointer to the input block padded with kCdefLargeValue if at a
357*09537850SAkhilesh Sanikop // frame border. |source_stride| is given in units of uint16_t.
358*09537850SAkhilesh Sanikop // |block_width|, |block_height| are the width/height of the input block.
359*09537850SAkhilesh Sanikop // |primary_strength|, |secondary_strength|, and |damping| are Cdef filtering
360*09537850SAkhilesh Sanikop // parameters.
361*09537850SAkhilesh Sanikop // |direction| is the filtering direction.
362*09537850SAkhilesh Sanikop // |dest| is the output buffer. |dest_stride| is given in bytes.
363*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
364*09537850SAkhilesh Sanikop using CdefFilteringFunc = void (*)(const uint16_t* source,
365*09537850SAkhilesh Sanikop                                    ptrdiff_t source_stride, int block_height,
366*09537850SAkhilesh Sanikop                                    int primary_strength, int secondary_strength,
367*09537850SAkhilesh Sanikop                                    int damping, int direction, void* dest,
368*09537850SAkhilesh Sanikop                                    ptrdiff_t dest_stride);
369*09537850SAkhilesh Sanikop 
370*09537850SAkhilesh Sanikop // The first index is block width: [0]: 4, [1]: 8. The second is based on
371*09537850SAkhilesh Sanikop // non-zero strengths: [0]: |primary_strength| and |secondary_strength|, [1]:
372*09537850SAkhilesh Sanikop // |primary_strength| only, [2]: |secondary_strength| only.
373*09537850SAkhilesh Sanikop using CdefFilteringFuncs = CdefFilteringFunc[2][3];
374*09537850SAkhilesh Sanikop 
375*09537850SAkhilesh Sanikop // Upscaling coefficients function signature. Section 7.16.
376*09537850SAkhilesh Sanikop // This is an auxiliary function for SIMD optimizations and has no corresponding
377*09537850SAkhilesh Sanikop // C function. Different SIMD versions may have different outputs. So it must
378*09537850SAkhilesh Sanikop // pair with the corresponding version of SuperResFunc.
379*09537850SAkhilesh Sanikop // |upscaled_width| is the width of the output frame.
380*09537850SAkhilesh Sanikop // |step| is the number of subpixels to move the kernel for the next destination
381*09537850SAkhilesh Sanikop // pixel.
382*09537850SAkhilesh Sanikop // |initial_subpixel_x| is a base offset from which |step| increments.
383*09537850SAkhilesh Sanikop // |coefficients| is the upscale filter used by each pixel in a row.
384*09537850SAkhilesh Sanikop using SuperResCoefficientsFunc = void (*)(int upscaled_width,
385*09537850SAkhilesh Sanikop                                           int initial_subpixel_x, int step,
386*09537850SAkhilesh Sanikop                                           void* coefficients);
387*09537850SAkhilesh Sanikop 
388*09537850SAkhilesh Sanikop // Upscaling process function signature. Section 7.16.
389*09537850SAkhilesh Sanikop // |coefficients| is the upscale filter used by each pixel in a row. It is not
390*09537850SAkhilesh Sanikop // used by the C function.
391*09537850SAkhilesh Sanikop // |source| is the input frame buffer. It will be line extended.
392*09537850SAkhilesh Sanikop // |source_stride| is given in pixels.
393*09537850SAkhilesh Sanikop // |dest| is the output buffer.
394*09537850SAkhilesh Sanikop // |dest_stride| is given in pixels.
395*09537850SAkhilesh Sanikop // |height| is the height of the block to be processed.
396*09537850SAkhilesh Sanikop // |downscaled_width| is the width of the input frame.
397*09537850SAkhilesh Sanikop // |upscaled_width| is the width of the output frame.
398*09537850SAkhilesh Sanikop // |step| is the number of subpixels to move the kernel for the next destination
399*09537850SAkhilesh Sanikop // pixel.
400*09537850SAkhilesh Sanikop // |initial_subpixel_x| is a base offset from which |step| increments.
401*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
402*09537850SAkhilesh Sanikop using SuperResFunc = void (*)(const void* coefficients, void* source,
403*09537850SAkhilesh Sanikop                               ptrdiff_t source_stride, int height,
404*09537850SAkhilesh Sanikop                               int downscaled_width, int upscaled_width,
405*09537850SAkhilesh Sanikop                               int initial_subpixel_x, int step, void* dest,
406*09537850SAkhilesh Sanikop                               ptrdiff_t dest_stride);
407*09537850SAkhilesh Sanikop 
408*09537850SAkhilesh Sanikop // Loop restoration function signature. Sections 7.16, 7.17.
409*09537850SAkhilesh Sanikop // |restoration_info| contains loop restoration information, such as filter
410*09537850SAkhilesh Sanikop // type, strength.
411*09537850SAkhilesh Sanikop // |source| is the input frame buffer, which is deblocked and cdef filtered.
412*09537850SAkhilesh Sanikop // |top_border| and |bottom_border| are the top and bottom borders.
413*09537850SAkhilesh Sanikop // |dest| is the output.
414*09537850SAkhilesh Sanikop // |stride| is given in pixels, and shared by |source| and |dest|.
415*09537850SAkhilesh Sanikop // |top_border_stride| and |bottom_border_stride| are given in pixels.
416*09537850SAkhilesh Sanikop // |restoration_buffer| contains buffers required for self guided filter and
417*09537850SAkhilesh Sanikop // wiener filter. They must be initialized before calling.
418*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
419*09537850SAkhilesh Sanikop using LoopRestorationFunc = void (*)(
420*09537850SAkhilesh Sanikop     const RestorationUnitInfo& restoration_info, const void* source,
421*09537850SAkhilesh Sanikop     ptrdiff_t stride, const void* top_border, ptrdiff_t top_border_stride,
422*09537850SAkhilesh Sanikop     const void* bottom_border, ptrdiff_t bottom_border_stride, int width,
423*09537850SAkhilesh Sanikop     int height, RestorationBuffer* restoration_buffer, void* dest);
424*09537850SAkhilesh Sanikop 
425*09537850SAkhilesh Sanikop // Index 0 is Wiener Filter.
426*09537850SAkhilesh Sanikop // Index 1 is Self Guided Restoration Filter.
427*09537850SAkhilesh Sanikop // This can be accessed as LoopRestorationType - 2.
428*09537850SAkhilesh Sanikop using LoopRestorationFuncs = LoopRestorationFunc[2];
429*09537850SAkhilesh Sanikop 
430*09537850SAkhilesh Sanikop // Convolve function signature. Section 7.11.3.4.
431*09537850SAkhilesh Sanikop // This function applies a horizontal filter followed by a vertical filter.
432*09537850SAkhilesh Sanikop // |reference| is the input block (reference frame buffer). |reference_stride|
433*09537850SAkhilesh Sanikop // is the corresponding frame stride.
434*09537850SAkhilesh Sanikop // |vertical_filter_index|/|horizontal_filter_index| is the index to
435*09537850SAkhilesh Sanikop // retrieve the type of filter to be applied for vertical/horizontal direction
436*09537850SAkhilesh Sanikop // from the filter lookup table 'kSubPixelFilters'.
437*09537850SAkhilesh Sanikop // |horizontal_filter_id| and |vertical_filter_id| are the filter ids.
438*09537850SAkhilesh Sanikop // |width| and |height| are width and height of the block to be filtered.
439*09537850SAkhilesh Sanikop // |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in
440*09537850SAkhilesh Sanikop // x/y direction.
441*09537850SAkhilesh Sanikop // |prediction| is the output block (output frame buffer).
442*09537850SAkhilesh Sanikop // Rounding precision is derived from the function being called. For horizontal
443*09537850SAkhilesh Sanikop // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
444*09537850SAkhilesh Sanikop // used. For compound vertical filtering kInterRoundBitsCompoundVertical will be
445*09537850SAkhilesh Sanikop // used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will
446*09537850SAkhilesh Sanikop // be used.
447*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
448*09537850SAkhilesh Sanikop using ConvolveFunc = void (*)(const void* reference, ptrdiff_t reference_stride,
449*09537850SAkhilesh Sanikop                               int horizontal_filter_index,
450*09537850SAkhilesh Sanikop                               int vertical_filter_index,
451*09537850SAkhilesh Sanikop                               int horizontal_filter_id, int vertical_filter_id,
452*09537850SAkhilesh Sanikop                               int width, int height, void* prediction,
453*09537850SAkhilesh Sanikop                               ptrdiff_t pred_stride);
454*09537850SAkhilesh Sanikop 
455*09537850SAkhilesh Sanikop // Convolve functions signature. Each points to one convolve function with
456*09537850SAkhilesh Sanikop // a specific setting:
457*09537850SAkhilesh Sanikop // ConvolveFunc[is_intra_block_copy][is_compound][has_vertical_filter]
458*09537850SAkhilesh Sanikop // [has_horizontal_filter].
459*09537850SAkhilesh Sanikop // If is_compound is false, the prediction is clipped to Pixel.
460*09537850SAkhilesh Sanikop // If is_compound is true, the range of prediction is:
461*09537850SAkhilesh Sanikop //   8bpp:  [-5132,  9212] (int16_t)
462*09537850SAkhilesh Sanikop //   10bpp: [ 3988, 61532] (uint16_t)
463*09537850SAkhilesh Sanikop //   12bpp: [ 3974, 61559] (uint16_t)
464*09537850SAkhilesh Sanikop // See src/dsp/convolve.cc
465*09537850SAkhilesh Sanikop using ConvolveFuncs = ConvolveFunc[2][2][2][2];
466*09537850SAkhilesh Sanikop 
467*09537850SAkhilesh Sanikop // Convolve + scale function signature. Section 7.11.3.4.
468*09537850SAkhilesh Sanikop // This function applies a horizontal filter followed by a vertical filter.
469*09537850SAkhilesh Sanikop // |reference| is the input block (reference frame buffer). |reference_stride|
470*09537850SAkhilesh Sanikop // is the corresponding frame stride.
471*09537850SAkhilesh Sanikop // |vertical_filter_index|/|horizontal_filter_index| is the index to
472*09537850SAkhilesh Sanikop // retrieve the type of filter to be applied for vertical/horizontal direction
473*09537850SAkhilesh Sanikop // from the filter lookup table 'kSubPixelFilters'.
474*09537850SAkhilesh Sanikop // |subpixel_x| and |subpixel_y| are starting positions in units of 1/1024.
475*09537850SAkhilesh Sanikop // |step_x| and |step_y| are step sizes in units of 1/1024 of a pixel.
476*09537850SAkhilesh Sanikop // |width| and |height| are width and height of the block to be filtered.
477*09537850SAkhilesh Sanikop // |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in
478*09537850SAkhilesh Sanikop // x/y direction.
479*09537850SAkhilesh Sanikop // |prediction| is the output block (output frame buffer).
480*09537850SAkhilesh Sanikop // Rounding precision is derived from the function being called. For horizontal
481*09537850SAkhilesh Sanikop // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
482*09537850SAkhilesh Sanikop // used. For compound vertical filtering kInterRoundBitsCompoundVertical will be
483*09537850SAkhilesh Sanikop // used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will
484*09537850SAkhilesh Sanikop // be used.
485*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
486*09537850SAkhilesh Sanikop using ConvolveScaleFunc = void (*)(const void* reference,
487*09537850SAkhilesh Sanikop                                    ptrdiff_t reference_stride,
488*09537850SAkhilesh Sanikop                                    int horizontal_filter_index,
489*09537850SAkhilesh Sanikop                                    int vertical_filter_index, int subpixel_x,
490*09537850SAkhilesh Sanikop                                    int subpixel_y, int step_x, int step_y,
491*09537850SAkhilesh Sanikop                                    int width, int height, void* prediction,
492*09537850SAkhilesh Sanikop                                    ptrdiff_t pred_stride);
493*09537850SAkhilesh Sanikop 
494*09537850SAkhilesh Sanikop // Convolve functions signature for scaling version.
495*09537850SAkhilesh Sanikop // 0: single predictor. 1: compound predictor.
496*09537850SAkhilesh Sanikop using ConvolveScaleFuncs = ConvolveScaleFunc[2];
497*09537850SAkhilesh Sanikop 
498*09537850SAkhilesh Sanikop // Weight mask function signature. Section 7.11.3.12.
499*09537850SAkhilesh Sanikop // |prediction_0| is the first input block.
500*09537850SAkhilesh Sanikop // |prediction_1| is the second input block. Both blocks are int16_t* when
501*09537850SAkhilesh Sanikop // bitdepth == 8 and uint16_t* otherwise.
502*09537850SAkhilesh Sanikop // |width| and |height| are the prediction width and height.
503*09537850SAkhilesh Sanikop // The stride for the input buffers is equal to |width|.
504*09537850SAkhilesh Sanikop // The valid range of block size is [8x8, 128x128] for the luma plane.
505*09537850SAkhilesh Sanikop // |mask| is the output buffer. |mask_stride| is the output buffer stride.
506*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
507*09537850SAkhilesh Sanikop using WeightMaskFunc = void (*)(const void* prediction_0,
508*09537850SAkhilesh Sanikop                                 const void* prediction_1, uint8_t* mask,
509*09537850SAkhilesh Sanikop                                 ptrdiff_t mask_stride);
510*09537850SAkhilesh Sanikop 
511*09537850SAkhilesh Sanikop // Weight mask functions signature. The dimensions (in order) are:
512*09537850SAkhilesh Sanikop //   * Width index (4 => 0, 8 => 1, 16 => 2 and so on).
513*09537850SAkhilesh Sanikop //   * Height index (4 => 0, 8 => 1, 16 => 2 and so on).
514*09537850SAkhilesh Sanikop //   * mask_is_inverse.
515*09537850SAkhilesh Sanikop using WeightMaskFuncs = WeightMaskFunc[6][6][2];
516*09537850SAkhilesh Sanikop 
517*09537850SAkhilesh Sanikop // Average blending function signature.
518*09537850SAkhilesh Sanikop // Two predictors are averaged to generate the output.
519*09537850SAkhilesh Sanikop // Input predictor values are int16_t. Output type is uint8_t, with actual
520*09537850SAkhilesh Sanikop // range of Pixel value.
521*09537850SAkhilesh Sanikop // Average blending is in the bottom of Section 7.11.3.1 (COMPOUND_AVERAGE).
522*09537850SAkhilesh Sanikop // |prediction_0| is the first input block.
523*09537850SAkhilesh Sanikop // |prediction_1| is the second input block. Both blocks are int16_t* when
524*09537850SAkhilesh Sanikop // bitdepth == 8 and uint16_t* otherwise.
525*09537850SAkhilesh Sanikop // |width| and |height| are the same for the first and second input blocks.
526*09537850SAkhilesh Sanikop // The stride for the input buffers is equal to |width|.
527*09537850SAkhilesh Sanikop // The valid range of block size is [8x8, 128x128] for the luma plane.
528*09537850SAkhilesh Sanikop // |dest| is the output buffer. |dest_stride| is the output buffer stride.
529*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
530*09537850SAkhilesh Sanikop using AverageBlendFunc = void (*)(const void* prediction_0,
531*09537850SAkhilesh Sanikop                                   const void* prediction_1, int width,
532*09537850SAkhilesh Sanikop                                   int height, void* dest,
533*09537850SAkhilesh Sanikop                                   ptrdiff_t dest_stride);
534*09537850SAkhilesh Sanikop 
535*09537850SAkhilesh Sanikop // Distance weighted blending function signature.
536*09537850SAkhilesh Sanikop // Weights are generated in Section 7.11.3.15.
537*09537850SAkhilesh Sanikop // Weighted blending is in the bottom of Section 7.11.3.1 (COMPOUND_DISTANCE).
538*09537850SAkhilesh Sanikop // This function takes two blocks (inter frame prediction) and produces a
539*09537850SAkhilesh Sanikop // weighted output.
540*09537850SAkhilesh Sanikop // |prediction_0| is the first input block.
541*09537850SAkhilesh Sanikop // |prediction_1| is the second input block. Both blocks are int16_t* when
542*09537850SAkhilesh Sanikop // bitdepth == 8 and uint16_t* otherwise.
543*09537850SAkhilesh Sanikop // |weight_0| is the weight for the first block. It is derived from the relative
544*09537850SAkhilesh Sanikop // distance of the first reference frame and the current frame.
545*09537850SAkhilesh Sanikop // |weight_1| is the weight for the second block. It is derived from the
546*09537850SAkhilesh Sanikop // relative distance of the second reference frame and the current frame.
547*09537850SAkhilesh Sanikop // |width| and |height| are the same for the first and second input blocks.
548*09537850SAkhilesh Sanikop // The stride for the input buffers is equal to |width|.
549*09537850SAkhilesh Sanikop // The valid range of block size is [8x8, 128x128] for the luma plane.
550*09537850SAkhilesh Sanikop // |dest| is the output buffer. |dest_stride| is the output buffer stride.
551*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
552*09537850SAkhilesh Sanikop using DistanceWeightedBlendFunc = void (*)(const void* prediction_0,
553*09537850SAkhilesh Sanikop                                            const void* prediction_1,
554*09537850SAkhilesh Sanikop                                            uint8_t weight_0, uint8_t weight_1,
555*09537850SAkhilesh Sanikop                                            int width, int height, void* dest,
556*09537850SAkhilesh Sanikop                                            ptrdiff_t dest_stride);
557*09537850SAkhilesh Sanikop 
558*09537850SAkhilesh Sanikop // Mask blending function signature. Section 7.11.3.14.
559*09537850SAkhilesh Sanikop // This function takes two blocks and produces a blended output stored into the
560*09537850SAkhilesh Sanikop // output block |dest|. The blending is a weighted average process, controlled
561*09537850SAkhilesh Sanikop // by values of the mask.
562*09537850SAkhilesh Sanikop // |prediction_0| is the first input block. When prediction mode is inter_intra
563*09537850SAkhilesh Sanikop // (or wedge_inter_intra), this refers to the inter frame prediction. It is
564*09537850SAkhilesh Sanikop // int16_t* when bitdepth == 8 and uint16_t* otherwise.
565*09537850SAkhilesh Sanikop // The stride for |prediction_0| is equal to |width|.
566*09537850SAkhilesh Sanikop // |prediction_1| is the second input block. When prediction mode is inter_intra
567*09537850SAkhilesh Sanikop // (or wedge_inter_intra), this refers to the intra frame prediction and uses
568*09537850SAkhilesh Sanikop // Pixel values. It is only used for intra frame prediction when bitdepth >= 10.
569*09537850SAkhilesh Sanikop // It is int16_t* when bitdepth == 8 and uint16_t* otherwise.
570*09537850SAkhilesh Sanikop // |prediction_stride_1| is the stride, given in units of [u]int16_t. When
571*09537850SAkhilesh Sanikop // |is_inter_intra| is false (compound prediction) then |prediction_stride_1| is
572*09537850SAkhilesh Sanikop // equal to |width|.
573*09537850SAkhilesh Sanikop // |mask| is an integer array, whose value indicates the weight of the blending.
574*09537850SAkhilesh Sanikop // |mask_stride| is corresponding stride.
575*09537850SAkhilesh Sanikop // |width|, |height| are the same for both input blocks.
576*09537850SAkhilesh Sanikop // If it's inter_intra (or wedge_inter_intra), the valid range of block size is
577*09537850SAkhilesh Sanikop // [8x8, 32x32], no 4:1/1:4 blocks (Section 5.11.28). Otherwise (including
578*09537850SAkhilesh Sanikop // difference weighted prediction and compound average prediction), the valid
579*09537850SAkhilesh Sanikop // range is [8x8, 128x128].
580*09537850SAkhilesh Sanikop // If there's subsampling, the corresponding width and height are halved for
581*09537850SAkhilesh Sanikop // chroma planes.
582*09537850SAkhilesh Sanikop // |is_inter_intra| stands for the prediction mode. If it is true, one of the
583*09537850SAkhilesh Sanikop // prediction blocks is from intra prediction of current frame. Otherwise, two
584*09537850SAkhilesh Sanikop // prediction blocks are both inter frame predictions.
585*09537850SAkhilesh Sanikop // |is_wedge_inter_intra| indicates if the mask is for the wedge prediction.
586*09537850SAkhilesh Sanikop // |dest| is the output block.
587*09537850SAkhilesh Sanikop // |dest_stride| is the corresponding stride for dest.
588*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
589*09537850SAkhilesh Sanikop using MaskBlendFunc = void (*)(const void* prediction_0,
590*09537850SAkhilesh Sanikop                                const void* prediction_1,
591*09537850SAkhilesh Sanikop                                ptrdiff_t prediction_stride_1,
592*09537850SAkhilesh Sanikop                                const uint8_t* mask, ptrdiff_t mask_stride,
593*09537850SAkhilesh Sanikop                                int width, int height, void* dest,
594*09537850SAkhilesh Sanikop                                ptrdiff_t dest_stride);
595*09537850SAkhilesh Sanikop 
596*09537850SAkhilesh Sanikop // Mask blending functions signature. Each points to one function with
597*09537850SAkhilesh Sanikop // a specific setting:
598*09537850SAkhilesh Sanikop // MaskBlendFunc[subsampling_x + subsampling_y][is_inter_intra].
599*09537850SAkhilesh Sanikop using MaskBlendFuncs = MaskBlendFunc[3][2];
600*09537850SAkhilesh Sanikop 
601*09537850SAkhilesh Sanikop // This function is similar to the MaskBlendFunc. It is only used when
602*09537850SAkhilesh Sanikop // |is_inter_intra| is true and |bitdepth| == 8.
603*09537850SAkhilesh Sanikop // |prediction_[01]| are Pixel values (uint8_t).
604*09537850SAkhilesh Sanikop // |prediction_1| is also the output buffer.
605*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
606*09537850SAkhilesh Sanikop using InterIntraMaskBlendFunc8bpp = void (*)(const uint8_t* prediction_0,
607*09537850SAkhilesh Sanikop                                              uint8_t* prediction_1,
608*09537850SAkhilesh Sanikop                                              ptrdiff_t prediction_stride_1,
609*09537850SAkhilesh Sanikop                                              const uint8_t* mask,
610*09537850SAkhilesh Sanikop                                              ptrdiff_t mask_stride, int width,
611*09537850SAkhilesh Sanikop                                              int height);
612*09537850SAkhilesh Sanikop 
613*09537850SAkhilesh Sanikop // InterIntra8bpp mask blending functions signature. When is_wedge_inter_intra
614*09537850SAkhilesh Sanikop // is false, the function at index 0 must be used. Otherwise, the function at
615*09537850SAkhilesh Sanikop // index subsampling_x + subsampling_y must be used.
616*09537850SAkhilesh Sanikop using InterIntraMaskBlendFuncs8bpp = InterIntraMaskBlendFunc8bpp[3];
617*09537850SAkhilesh Sanikop 
618*09537850SAkhilesh Sanikop // Obmc (overlapped block motion compensation) blending function signature.
619*09537850SAkhilesh Sanikop // Section 7.11.3.10.
620*09537850SAkhilesh Sanikop // This function takes two blocks and produces a blended output stored into the
621*09537850SAkhilesh Sanikop // first input block. The blending is a weighted average process, controlled by
622*09537850SAkhilesh Sanikop // values of the mask.
623*09537850SAkhilesh Sanikop // Obmc is not a compound mode. It is different from other compound blending,
624*09537850SAkhilesh Sanikop // in terms of precision. The current block is computed using convolution with
625*09537850SAkhilesh Sanikop // clipping to the range of pixel values. Its above and left blocks are also
626*09537850SAkhilesh Sanikop // clipped. Therefore obmc blending process doesn't need to clip the output.
627*09537850SAkhilesh Sanikop // |prediction| is the first input block, which will be overwritten.
628*09537850SAkhilesh Sanikop // |prediction_stride| is the stride, given in bytes.
629*09537850SAkhilesh Sanikop // |width|, |height| are the same for both input blocks. The range is [4x2,
630*09537850SAkhilesh Sanikop // 32x32] for kObmcDirectionVertical and [2x4, 32x32] for
631*09537850SAkhilesh Sanikop // kObmcDirectionHorizontal, see Section 7.11.3.9.
632*09537850SAkhilesh Sanikop // |obmc_prediction| is the second input block.
633*09537850SAkhilesh Sanikop // |obmc_prediction_stride| is its stride, given in bytes.
634*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
635*09537850SAkhilesh Sanikop using ObmcBlendFunc = void (*)(void* prediction, ptrdiff_t prediction_stride,
636*09537850SAkhilesh Sanikop                                int width, int height,
637*09537850SAkhilesh Sanikop                                const void* obmc_prediction,
638*09537850SAkhilesh Sanikop                                ptrdiff_t obmc_prediction_stride);
639*09537850SAkhilesh Sanikop using ObmcBlendFuncs = ObmcBlendFunc[kNumObmcDirections];
640*09537850SAkhilesh Sanikop 
641*09537850SAkhilesh Sanikop // Warp function signature. Section 7.11.3.5.
642*09537850SAkhilesh Sanikop // This function applies warp filtering for each 8x8 block inside the current
643*09537850SAkhilesh Sanikop // coding block. The filtering process is similar to 2d convolve filtering.
644*09537850SAkhilesh Sanikop // The horizontal filter is applied followed by the vertical filter.
645*09537850SAkhilesh Sanikop // The function has to calculate corresponding pixel positions before and
646*09537850SAkhilesh Sanikop // after warping.
647*09537850SAkhilesh Sanikop // |source| is the input reference frame buffer.
648*09537850SAkhilesh Sanikop // |source_stride|, |source_width|, |source_height| are corresponding frame
649*09537850SAkhilesh Sanikop // stride, width, and height. |source_stride| is given in bytes.
650*09537850SAkhilesh Sanikop // |warp_params| is the matrix of warp motion: warp_params[i] = mN.
651*09537850SAkhilesh Sanikop //         [x'     (m2 m3 m0   [x
652*09537850SAkhilesh Sanikop //     z .  y'  =   m4 m5 m1 *  y
653*09537850SAkhilesh Sanikop //          1]      m6 m7 1)    1]
654*09537850SAkhilesh Sanikop // |subsampling_x/y| is the current frame's plane subsampling factor.
655*09537850SAkhilesh Sanikop // |block_start_x| and |block_start_y| are the starting position the current
656*09537850SAkhilesh Sanikop // coding block.
657*09537850SAkhilesh Sanikop // |block_width| and |block_height| are width and height of the current coding
658*09537850SAkhilesh Sanikop // block. |block_width| and |block_height| are at least 8.
659*09537850SAkhilesh Sanikop // |alpha|, |beta|, |gamma|, |delta| are valid warp parameters. See the
660*09537850SAkhilesh Sanikop // comments in the definition of struct GlobalMotion for the range of their
661*09537850SAkhilesh Sanikop // values.
662*09537850SAkhilesh Sanikop // |dest| is the output buffer of type Pixel. The output values are clipped to
663*09537850SAkhilesh Sanikop // Pixel values.
664*09537850SAkhilesh Sanikop // |dest_stride| is the stride, in units of bytes.
665*09537850SAkhilesh Sanikop // Rounding precision is derived from the function being called. For horizontal
666*09537850SAkhilesh Sanikop // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
667*09537850SAkhilesh Sanikop // used. For vertical filtering kInterRoundBitsVertical &
668*09537850SAkhilesh Sanikop // kInterRoundBitsVertical12bpp will be used.
669*09537850SAkhilesh Sanikop //
670*09537850SAkhilesh Sanikop // NOTE: WarpFunc assumes the source frame has left, right, top, and bottom
671*09537850SAkhilesh Sanikop // borders that extend the frame boundary pixels.
672*09537850SAkhilesh Sanikop // * The left and right borders must be at least 13 pixels wide. In addition,
673*09537850SAkhilesh Sanikop //   Warp_NEON() may read up to 14 bytes after a row in the |source| buffer.
674*09537850SAkhilesh Sanikop //   Therefore, there must be at least one extra padding byte after the right
675*09537850SAkhilesh Sanikop //   border of the last row in the source buffer.
676*09537850SAkhilesh Sanikop // * The top and bottom borders must be at least 13 pixels high.
677*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
678*09537850SAkhilesh Sanikop using WarpFunc = void (*)(const void* source, ptrdiff_t source_stride,
679*09537850SAkhilesh Sanikop                           int source_width, int source_height,
680*09537850SAkhilesh Sanikop                           const int* warp_params, int subsampling_x,
681*09537850SAkhilesh Sanikop                           int subsampling_y, int block_start_x,
682*09537850SAkhilesh Sanikop                           int block_start_y, int block_width, int block_height,
683*09537850SAkhilesh Sanikop                           int16_t alpha, int16_t beta, int16_t gamma,
684*09537850SAkhilesh Sanikop                           int16_t delta, void* dest, ptrdiff_t dest_stride);
685*09537850SAkhilesh Sanikop 
686*09537850SAkhilesh Sanikop // Warp for compound predictions. Section 7.11.3.5.
687*09537850SAkhilesh Sanikop // Similar to WarpFunc, but |dest| is a uint16_t predictor buffer,
688*09537850SAkhilesh Sanikop // |dest_stride| is given in units of uint16_t and |inter_round_bits_vertical|
689*09537850SAkhilesh Sanikop // is always 7 (kCompoundInterRoundBitsVertical).
690*09537850SAkhilesh Sanikop // Rounding precision is derived from the function being called. For horizontal
691*09537850SAkhilesh Sanikop // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
692*09537850SAkhilesh Sanikop // used. For vertical filtering kInterRoundBitsCompondVertical will be used.
693*09537850SAkhilesh Sanikop using WarpCompoundFunc = WarpFunc;
694*09537850SAkhilesh Sanikop 
695*09537850SAkhilesh Sanikop constexpr int kNumAutoRegressionLags = 4;
696*09537850SAkhilesh Sanikop // Applies an auto-regressive filter to the white noise in |luma_grain_buffer|.
697*09537850SAkhilesh Sanikop // Section 7.18.3.3, second code block
698*09537850SAkhilesh Sanikop // |params| are parameters read from frame header, mainly providing
699*09537850SAkhilesh Sanikop // auto_regression_coeff_y for the filter and auto_regression_shift to right
700*09537850SAkhilesh Sanikop // shift the filter sum by. Note: This method assumes
701*09537850SAkhilesh Sanikop // params.auto_regression_coeff_lag is not 0. Do not call this method if
702*09537850SAkhilesh Sanikop // params.auto_regression_coeff_lag is 0.
703*09537850SAkhilesh Sanikop using LumaAutoRegressionFunc = void (*)(const FilmGrainParams& params,
704*09537850SAkhilesh Sanikop                                         void* luma_grain_buffer);
705*09537850SAkhilesh Sanikop // Function index is auto_regression_coeff_lag - 1.
706*09537850SAkhilesh Sanikop using LumaAutoRegressionFuncs =
707*09537850SAkhilesh Sanikop     LumaAutoRegressionFunc[kNumAutoRegressionLags - 1];
708*09537850SAkhilesh Sanikop 
709*09537850SAkhilesh Sanikop // Applies an auto-regressive filter to the white noise in u_grain and v_grain.
710*09537850SAkhilesh Sanikop // Section 7.18.3.3, third code block
711*09537850SAkhilesh Sanikop // The |luma_grain_buffer| provides samples that are added to the autoregressive
712*09537850SAkhilesh Sanikop // sum when num_y_points > 0.
713*09537850SAkhilesh Sanikop // |u_grain_buffer| and |v_grain_buffer| point to the buffers of chroma noise
714*09537850SAkhilesh Sanikop // that were generated from the stored Gaussian sequence, and are overwritten
715*09537850SAkhilesh Sanikop // with the results of the autoregressive filter. |params| are parameters read
716*09537850SAkhilesh Sanikop // from frame header, mainly providing auto_regression_coeff_u and
717*09537850SAkhilesh Sanikop // auto_regression_coeff_v for each chroma plane's filter, and
718*09537850SAkhilesh Sanikop // auto_regression_shift to right shift the filter sums by.
719*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
720*09537850SAkhilesh Sanikop using ChromaAutoRegressionFunc = void (*)(const FilmGrainParams& params,
721*09537850SAkhilesh Sanikop                                           const void* luma_grain_buffer,
722*09537850SAkhilesh Sanikop                                           int subsampling_x, int subsampling_y,
723*09537850SAkhilesh Sanikop                                           void* u_grain_buffer,
724*09537850SAkhilesh Sanikop                                           void* v_grain_buffer);
725*09537850SAkhilesh Sanikop using ChromaAutoRegressionFuncs =
726*09537850SAkhilesh Sanikop     ChromaAutoRegressionFunc[/*use_luma*/ 2][kNumAutoRegressionLags];
727*09537850SAkhilesh Sanikop 
728*09537850SAkhilesh Sanikop // Build an image-wide "stripe" of grain noise for every 32 rows in the image.
729*09537850SAkhilesh Sanikop // Section 7.18.3.5, first code block.
730*09537850SAkhilesh Sanikop // Each 32x32 luma block is copied at a random offset specified via
731*09537850SAkhilesh Sanikop // |grain_seed| from the grain template produced by autoregression, and the same
732*09537850SAkhilesh Sanikop // is done for chroma grains, subject to subsampling.
733*09537850SAkhilesh Sanikop // |width| and |height| are the dimensions of the overall image.
734*09537850SAkhilesh Sanikop // |noise_stripes_buffer| points to an Array2DView with one row for each stripe.
735*09537850SAkhilesh Sanikop // Because this function treats all planes identically and independently, it is
736*09537850SAkhilesh Sanikop // simplified to take one grain buffer at a time. This means duplicating some
737*09537850SAkhilesh Sanikop // random number generations, but that work can be reduced in other ways.
738*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
739*09537850SAkhilesh Sanikop using ConstructNoiseStripesFunc = void (*)(const void* grain_buffer,
740*09537850SAkhilesh Sanikop                                            int grain_seed, int width,
741*09537850SAkhilesh Sanikop                                            int height, int subsampling_x,
742*09537850SAkhilesh Sanikop                                            int subsampling_y,
743*09537850SAkhilesh Sanikop                                            void* noise_stripes_buffer);
744*09537850SAkhilesh Sanikop using ConstructNoiseStripesFuncs =
745*09537850SAkhilesh Sanikop     ConstructNoiseStripesFunc[/*overlap_flag*/ 2];
746*09537850SAkhilesh Sanikop 
747*09537850SAkhilesh Sanikop // Compute the one or two overlap rows for each stripe copied to the noise
748*09537850SAkhilesh Sanikop // image.
749*09537850SAkhilesh Sanikop // Section 7.18.3.5, second code block. |width| and |height| are the
750*09537850SAkhilesh Sanikop // dimensions of the overall image. |noise_stripes_buffer| points to an
751*09537850SAkhilesh Sanikop // Array2DView with one row for each stripe. |noise_image_buffer| points to an
752*09537850SAkhilesh Sanikop // Array2D containing the allocated plane for this frame. Because this function
753*09537850SAkhilesh Sanikop // treats all planes identically and independently, it is simplified to take one
754*09537850SAkhilesh Sanikop // grain buffer at a time.
755*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
756*09537850SAkhilesh Sanikop using ConstructNoiseImageOverlapFunc =
757*09537850SAkhilesh Sanikop     void (*)(const void* noise_stripes_buffer, int width, int height,
758*09537850SAkhilesh Sanikop              int subsampling_x, int subsampling_y, void* noise_image_buffer);
759*09537850SAkhilesh Sanikop 
760*09537850SAkhilesh Sanikop // Populate a scaling lookup table with interpolated values of a piecewise
761*09537850SAkhilesh Sanikop // linear function where values in |point_value| are mapped to the values in
762*09537850SAkhilesh Sanikop // |point_scaling|.
763*09537850SAkhilesh Sanikop // |num_points| can be between 0 and 15. When 0, the lookup table is set to
764*09537850SAkhilesh Sanikop // zero.
765*09537850SAkhilesh Sanikop // |point_value| and |point_scaling| have |num_points| valid elements.
766*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
767*09537850SAkhilesh Sanikop using InitializeScalingLutFunc = void (*)(int num_points,
768*09537850SAkhilesh Sanikop                                           const uint8_t point_value[],
769*09537850SAkhilesh Sanikop                                           const uint8_t point_scaling[],
770*09537850SAkhilesh Sanikop                                           int16_t* scaling_lut,
771*09537850SAkhilesh Sanikop                                           const int scaling_lut_length);
772*09537850SAkhilesh Sanikop 
773*09537850SAkhilesh Sanikop // Blend noise with image. Section 7.18.3.5, third code block.
774*09537850SAkhilesh Sanikop // |width| is the width of each row, while |height| is how many rows to compute.
775*09537850SAkhilesh Sanikop // |start_height| is an offset for the noise image, to support multithreading.
776*09537850SAkhilesh Sanikop // |min_value|, |max_luma|, and |max_chroma| are computed by the caller of these
777*09537850SAkhilesh Sanikop // functions, according to the code in the spec.
778*09537850SAkhilesh Sanikop // |source_plane_y| and |source_plane_uv| are the plane buffers of the decoded
779*09537850SAkhilesh Sanikop // frame. They are blended with the film grain noise and written to
780*09537850SAkhilesh Sanikop // |dest_plane_y| and |dest_plane_uv| as final output for display.
781*09537850SAkhilesh Sanikop // source_plane_* and dest_plane_* may point to the same buffer, in which case
782*09537850SAkhilesh Sanikop // the film grain noise is added in place.
783*09537850SAkhilesh Sanikop // |scaling_lut_y|  and |scaling_lut| represent a piecewise linear mapping from
784*09537850SAkhilesh Sanikop // the frame's raw pixel value, to a scaling factor for the noise sample.
785*09537850SAkhilesh Sanikop // |scaling_shift| is applied as a right shift after scaling, so that scaling
786*09537850SAkhilesh Sanikop // down is possible. It is found in FilmGrainParams, but supplied directly to
787*09537850SAkhilesh Sanikop // BlendNoiseWithImageLumaFunc because it's the only member used.
788*09537850SAkhilesh Sanikop // The dest plane may point to the source plane, depending on the value of
789*09537850SAkhilesh Sanikop // frame_header.show_existing_frame. |noise_image_ptr| and scaling_lut.* do not
790*09537850SAkhilesh Sanikop // alias other arguments.
791*09537850SAkhilesh Sanikop using BlendNoiseWithImageLumaFunc = void (*)(
792*09537850SAkhilesh Sanikop     const void* noise_image_ptr, int min_value, int max_value,
793*09537850SAkhilesh Sanikop     int scaling_shift, int width, int height, int start_height,
794*09537850SAkhilesh Sanikop     const int16_t* scaling_lut_y, const void* source_plane_y,
795*09537850SAkhilesh Sanikop     ptrdiff_t source_stride_y, void* dest_plane_y, ptrdiff_t dest_stride_y);
796*09537850SAkhilesh Sanikop 
797*09537850SAkhilesh Sanikop using BlendNoiseWithImageChromaFunc = void (*)(
798*09537850SAkhilesh Sanikop     Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
799*09537850SAkhilesh Sanikop     int min_value, int max_value, int width, int height, int start_height,
800*09537850SAkhilesh Sanikop     int subsampling_x, int subsampling_y, const int16_t* scaling_lut,
801*09537850SAkhilesh Sanikop     const void* source_plane_y, ptrdiff_t source_stride_y,
802*09537850SAkhilesh Sanikop     const void* source_plane_uv, ptrdiff_t source_stride_uv,
803*09537850SAkhilesh Sanikop     void* dest_plane_uv, ptrdiff_t dest_stride_uv);
804*09537850SAkhilesh Sanikop 
805*09537850SAkhilesh Sanikop using BlendNoiseWithImageChromaFuncs =
806*09537850SAkhilesh Sanikop     BlendNoiseWithImageChromaFunc[/*chroma_scaling_from_luma*/ 2];
807*09537850SAkhilesh Sanikop 
808*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
809*09537850SAkhilesh Sanikop 
810*09537850SAkhilesh Sanikop struct FilmGrainFuncs {
811*09537850SAkhilesh Sanikop   LumaAutoRegressionFuncs luma_auto_regression;
812*09537850SAkhilesh Sanikop   ChromaAutoRegressionFuncs chroma_auto_regression;
813*09537850SAkhilesh Sanikop   ConstructNoiseStripesFuncs construct_noise_stripes;
814*09537850SAkhilesh Sanikop   ConstructNoiseImageOverlapFunc construct_noise_image_overlap;
815*09537850SAkhilesh Sanikop   InitializeScalingLutFunc initialize_scaling_lut;
816*09537850SAkhilesh Sanikop   BlendNoiseWithImageLumaFunc blend_noise_luma;
817*09537850SAkhilesh Sanikop   BlendNoiseWithImageChromaFuncs blend_noise_chroma;
818*09537850SAkhilesh Sanikop };
819*09537850SAkhilesh Sanikop 
820*09537850SAkhilesh Sanikop // Motion field projection function signature. Section 7.9.
821*09537850SAkhilesh Sanikop // |reference_info| provides reference information for motion field projection.
822*09537850SAkhilesh Sanikop // |reference_to_current_with_sign| is the precalculated reference frame id
823*09537850SAkhilesh Sanikop // distance from current frame.
824*09537850SAkhilesh Sanikop // |dst_sign| is -1 for LAST_FRAME and LAST2_FRAME, or 0 (1 in spec) for others.
825*09537850SAkhilesh Sanikop // |y8_start| and |y8_end| are the start and end 8x8 rows of the current tile.
826*09537850SAkhilesh Sanikop // |x8_start| and |x8_end| are the start and end 8x8 columns of the current
827*09537850SAkhilesh Sanikop // tile.
828*09537850SAkhilesh Sanikop // |motion_field| is the output which saves the projected motion field
829*09537850SAkhilesh Sanikop // information.
830*09537850SAkhilesh Sanikop // Note: Only the entry from the 8-bit Dsp table is used as this function is
831*09537850SAkhilesh Sanikop // bitdepth agnostic.
832*09537850SAkhilesh Sanikop using MotionFieldProjectionKernelFunc = void (*)(
833*09537850SAkhilesh Sanikop     const ReferenceInfo& reference_info, int reference_to_current_with_sign,
834*09537850SAkhilesh Sanikop     int dst_sign, int y8_start, int y8_end, int x8_start, int x8_end,
835*09537850SAkhilesh Sanikop     TemporalMotionField* motion_field);
836*09537850SAkhilesh Sanikop 
837*09537850SAkhilesh Sanikop // Compound temporal motion vector projection function signature.
838*09537850SAkhilesh Sanikop // Section 7.9.3 and 7.10.2.10.
839*09537850SAkhilesh Sanikop // |temporal_mvs| is the aligned set of temporal reference motion vectors.
840*09537850SAkhilesh Sanikop // |temporal_reference_offsets| specifies the number of frames covered by the
841*09537850SAkhilesh Sanikop // original motion vector.
842*09537850SAkhilesh Sanikop // |reference_offsets| specifies the number of frames to be covered by the
843*09537850SAkhilesh Sanikop // projected motion vector.
844*09537850SAkhilesh Sanikop // |count| is the number of the temporal motion vectors.
845*09537850SAkhilesh Sanikop // |candidate_mvs| is the aligned set of projected motion vectors.
846*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
847*09537850SAkhilesh Sanikop // Note: Only the entry from the 8-bit Dsp table is used as this function is
848*09537850SAkhilesh Sanikop // bitdepth agnostic.
849*09537850SAkhilesh Sanikop using MvProjectionCompoundFunc = void (*)(
850*09537850SAkhilesh Sanikop     const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
851*09537850SAkhilesh Sanikop     const int reference_offsets[2], int count,
852*09537850SAkhilesh Sanikop     CompoundMotionVector* candidate_mvs);
853*09537850SAkhilesh Sanikop 
854*09537850SAkhilesh Sanikop // Single temporal motion vector projection function signature.
855*09537850SAkhilesh Sanikop // Section 7.9.3 and 7.10.2.10.
856*09537850SAkhilesh Sanikop // |temporal_mvs| is the aligned set of temporal reference motion vectors.
857*09537850SAkhilesh Sanikop // |temporal_reference_offsets| specifies the number of frames covered by the
858*09537850SAkhilesh Sanikop // original motion vector.
859*09537850SAkhilesh Sanikop // |reference_offset| specifies the number of frames to be covered by the
860*09537850SAkhilesh Sanikop // projected motion vector.
861*09537850SAkhilesh Sanikop // |count| is the number of the temporal motion vectors.
862*09537850SAkhilesh Sanikop // |candidate_mvs| is the aligned set of projected motion vectors.
863*09537850SAkhilesh Sanikop // The pointer arguments do not alias one another.
864*09537850SAkhilesh Sanikop // Note: Only the entry from the 8-bit Dsp table is used as this function is
865*09537850SAkhilesh Sanikop // bitdepth agnostic.
866*09537850SAkhilesh Sanikop using MvProjectionSingleFunc = void (*)(
867*09537850SAkhilesh Sanikop     const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
868*09537850SAkhilesh Sanikop     int reference_offset, int count, MotionVector* candidate_mvs);
869*09537850SAkhilesh Sanikop 
870*09537850SAkhilesh Sanikop struct Dsp {
871*09537850SAkhilesh Sanikop   AverageBlendFunc average_blend;
872*09537850SAkhilesh Sanikop   CdefDirectionFunc cdef_direction;
873*09537850SAkhilesh Sanikop   CdefFilteringFuncs cdef_filters;
874*09537850SAkhilesh Sanikop   CflIntraPredictorFuncs cfl_intra_predictors;
875*09537850SAkhilesh Sanikop   CflSubsamplerFuncs cfl_subsamplers;
876*09537850SAkhilesh Sanikop   ConvolveFuncs convolve;
877*09537850SAkhilesh Sanikop   ConvolveScaleFuncs convolve_scale;
878*09537850SAkhilesh Sanikop   DirectionalIntraPredictorZone1Func directional_intra_predictor_zone1;
879*09537850SAkhilesh Sanikop   DirectionalIntraPredictorZone2Func directional_intra_predictor_zone2;
880*09537850SAkhilesh Sanikop   DirectionalIntraPredictorZone3Func directional_intra_predictor_zone3;
881*09537850SAkhilesh Sanikop   DistanceWeightedBlendFunc distance_weighted_blend;
882*09537850SAkhilesh Sanikop   FilmGrainFuncs film_grain;
883*09537850SAkhilesh Sanikop   FilterIntraPredictorFunc filter_intra_predictor;
884*09537850SAkhilesh Sanikop   InterIntraMaskBlendFuncs8bpp inter_intra_mask_blend_8bpp;
885*09537850SAkhilesh Sanikop   IntraEdgeFilterFunc intra_edge_filter;
886*09537850SAkhilesh Sanikop   IntraEdgeUpsamplerFunc intra_edge_upsampler;
887*09537850SAkhilesh Sanikop   IntraPredictorFuncs intra_predictors;
888*09537850SAkhilesh Sanikop   InverseTransformAddFuncs inverse_transforms;
889*09537850SAkhilesh Sanikop   LoopFilterFuncs loop_filters;
890*09537850SAkhilesh Sanikop   LoopRestorationFuncs loop_restorations;
891*09537850SAkhilesh Sanikop   MaskBlendFuncs mask_blend;
892*09537850SAkhilesh Sanikop   MotionFieldProjectionKernelFunc motion_field_projection_kernel;
893*09537850SAkhilesh Sanikop   MvProjectionCompoundFunc mv_projection_compound[3];
894*09537850SAkhilesh Sanikop   MvProjectionSingleFunc mv_projection_single[3];
895*09537850SAkhilesh Sanikop   ObmcBlendFuncs obmc_blend;
896*09537850SAkhilesh Sanikop   SuperResCoefficientsFunc super_res_coefficients;
897*09537850SAkhilesh Sanikop   SuperResFunc super_res;
898*09537850SAkhilesh Sanikop   WarpCompoundFunc warp_compound;
899*09537850SAkhilesh Sanikop   WarpFunc warp;
900*09537850SAkhilesh Sanikop   WeightMaskFuncs weight_mask;
901*09537850SAkhilesh Sanikop };
902*09537850SAkhilesh Sanikop 
903*09537850SAkhilesh Sanikop // Initializes function pointers based on build config and runtime
904*09537850SAkhilesh Sanikop // environment. Must be called once before first use. This function is
905*09537850SAkhilesh Sanikop // thread-safe.
906*09537850SAkhilesh Sanikop void DspInit();
907*09537850SAkhilesh Sanikop 
908*09537850SAkhilesh Sanikop // Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't
909*09537850SAkhilesh Sanikop // exist.
910*09537850SAkhilesh Sanikop const Dsp* GetDspTable(int bitdepth);
911*09537850SAkhilesh Sanikop 
912*09537850SAkhilesh Sanikop }  // namespace dsp
913*09537850SAkhilesh Sanikop 
914*09537850SAkhilesh Sanikop namespace dsp_internal {
915*09537850SAkhilesh Sanikop 
916*09537850SAkhilesh Sanikop // Visual Studio builds don't have a way to detect SSE4_1. Only exclude the C
917*09537850SAkhilesh Sanikop // functions if /arch:AVX2 is used across all sources.
918*09537850SAkhilesh Sanikop #if !LIBGAV1_TARGETING_AVX2 && \
919*09537850SAkhilesh Sanikop     (defined(_MSC_VER) || (defined(_M_IX86) || defined(_M_X64)))
920*09537850SAkhilesh Sanikop #undef LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
921*09537850SAkhilesh Sanikop #define LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS 1
922*09537850SAkhilesh Sanikop #endif
923*09537850SAkhilesh Sanikop 
924*09537850SAkhilesh Sanikop // Returns true if a more highly optimized version of |func| is not defined for
925*09537850SAkhilesh Sanikop // the associated bitdepth or if it is forcibly enabled with
926*09537850SAkhilesh Sanikop // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS. The define checked for |func| corresponds
927*09537850SAkhilesh Sanikop // to the LIBGAV1_Dsp<bitdepth>bpp_|func| define in the header file associated
928*09537850SAkhilesh Sanikop // with the module.
929*09537850SAkhilesh Sanikop // |func| is one of:
930*09537850SAkhilesh Sanikop //   - FunctionName, e.g., SelfGuidedFilter.
931*09537850SAkhilesh Sanikop //   - [sub-table-index1][...-indexN] e.g.,
932*09537850SAkhilesh Sanikop //     TransformSize4x4_IntraPredictorDc. The indices correspond to enum values
933*09537850SAkhilesh Sanikop //     used as lookups with leading 'k' removed.
934*09537850SAkhilesh Sanikop //
935*09537850SAkhilesh Sanikop //  NEON support is the only extension available for ARM and it is always
936*09537850SAkhilesh Sanikop //  required. Because of this restriction DSP_ENABLED_8BPP_NEON(func) is always
937*09537850SAkhilesh Sanikop //  true and can be omitted.
938*09537850SAkhilesh Sanikop #define DSP_ENABLED_8BPP_AVX2(func)    \
939*09537850SAkhilesh Sanikop   (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
940*09537850SAkhilesh Sanikop    LIBGAV1_Dsp8bpp_##func == LIBGAV1_CPU_AVX2)
941*09537850SAkhilesh Sanikop #define DSP_ENABLED_10BPP_AVX2(func)   \
942*09537850SAkhilesh Sanikop   (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
943*09537850SAkhilesh Sanikop    LIBGAV1_Dsp10bpp_##func == LIBGAV1_CPU_AVX2)
944*09537850SAkhilesh Sanikop #define DSP_ENABLED_8BPP_SSE4_1(func)  \
945*09537850SAkhilesh Sanikop   (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
946*09537850SAkhilesh Sanikop    LIBGAV1_Dsp8bpp_##func == LIBGAV1_CPU_SSE4_1)
947*09537850SAkhilesh Sanikop #define DSP_ENABLED_10BPP_SSE4_1(func) \
948*09537850SAkhilesh Sanikop   (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
949*09537850SAkhilesh Sanikop    LIBGAV1_Dsp10bpp_##func == LIBGAV1_CPU_SSE4_1)
950*09537850SAkhilesh Sanikop 
951*09537850SAkhilesh Sanikop // Initializes C-only function pointers. Note some entries may be set to
952*09537850SAkhilesh Sanikop // nullptr if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS is not defined. This is meant
953*09537850SAkhilesh Sanikop // for use in tests only, it is not thread-safe.
954*09537850SAkhilesh Sanikop void DspInit_C();
955*09537850SAkhilesh Sanikop 
956*09537850SAkhilesh Sanikop // Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't
957*09537850SAkhilesh Sanikop // exist. This version is meant for use by test or dsp/*Init() functions only.
958*09537850SAkhilesh Sanikop dsp::Dsp* GetWritableDspTable(int bitdepth);
959*09537850SAkhilesh Sanikop 
960*09537850SAkhilesh Sanikop }  // namespace dsp_internal
961*09537850SAkhilesh Sanikop }  // namespace libgav1
962*09537850SAkhilesh Sanikop 
963*09537850SAkhilesh Sanikop #endif  // LIBGAV1_SRC_DSP_DSP_H_
964