xref: /aosp_15_r20/external/libgav1/src/utils/common.h (revision 095378508e87ed692bf8dfeb34008b65b3735891)
1*09537850SAkhilesh Sanikop /*
2*09537850SAkhilesh Sanikop  * Copyright 2019 The libgav1 Authors
3*09537850SAkhilesh Sanikop  *
4*09537850SAkhilesh Sanikop  * Licensed under the Apache License, Version 2.0 (the "License");
5*09537850SAkhilesh Sanikop  * you may not use this file except in compliance with the License.
6*09537850SAkhilesh Sanikop  * You may obtain a copy of the License at
7*09537850SAkhilesh Sanikop  *
8*09537850SAkhilesh Sanikop  *      http://www.apache.org/licenses/LICENSE-2.0
9*09537850SAkhilesh Sanikop  *
10*09537850SAkhilesh Sanikop  * Unless required by applicable law or agreed to in writing, software
11*09537850SAkhilesh Sanikop  * distributed under the License is distributed on an "AS IS" BASIS,
12*09537850SAkhilesh Sanikop  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*09537850SAkhilesh Sanikop  * See the License for the specific language governing permissions and
14*09537850SAkhilesh Sanikop  * limitations under the License.
15*09537850SAkhilesh Sanikop  */
16*09537850SAkhilesh Sanikop 
17*09537850SAkhilesh Sanikop #ifndef LIBGAV1_SRC_UTILS_COMMON_H_
18*09537850SAkhilesh Sanikop #define LIBGAV1_SRC_UTILS_COMMON_H_
19*09537850SAkhilesh Sanikop 
20*09537850SAkhilesh Sanikop #if defined(_MSC_VER)
21*09537850SAkhilesh Sanikop #include <intrin.h>
22*09537850SAkhilesh Sanikop #pragma intrinsic(_BitScanForward)
23*09537850SAkhilesh Sanikop #pragma intrinsic(_BitScanReverse)
24*09537850SAkhilesh Sanikop #if defined(_M_X64) || defined(_M_ARM64)
25*09537850SAkhilesh Sanikop #pragma intrinsic(_BitScanReverse64)
26*09537850SAkhilesh Sanikop #define HAVE_BITSCANREVERSE64
27*09537850SAkhilesh Sanikop #endif  // defined(_M_X64) || defined(_M_ARM64)
28*09537850SAkhilesh Sanikop #endif  // defined(_MSC_VER)
29*09537850SAkhilesh Sanikop 
30*09537850SAkhilesh Sanikop #include <algorithm>
31*09537850SAkhilesh Sanikop #include <cassert>
32*09537850SAkhilesh Sanikop #include <cstddef>
33*09537850SAkhilesh Sanikop #include <cstdint>
34*09537850SAkhilesh Sanikop #include <cstdlib>
35*09537850SAkhilesh Sanikop #include <cstring>
36*09537850SAkhilesh Sanikop #include <type_traits>
37*09537850SAkhilesh Sanikop 
38*09537850SAkhilesh Sanikop #include "src/utils/bit_mask_set.h"
39*09537850SAkhilesh Sanikop #include "src/utils/constants.h"
40*09537850SAkhilesh Sanikop #include "src/utils/memory.h"
41*09537850SAkhilesh Sanikop #include "src/utils/types.h"
42*09537850SAkhilesh Sanikop 
43*09537850SAkhilesh Sanikop namespace libgav1 {
44*09537850SAkhilesh Sanikop 
45*09537850SAkhilesh Sanikop // LIBGAV1_RESTRICT
46*09537850SAkhilesh Sanikop // Declares a pointer with the restrict type qualifier if available.
47*09537850SAkhilesh Sanikop // This allows code to hint to the compiler that only this pointer references a
48*09537850SAkhilesh Sanikop // particular object or memory region within the scope of the block in which it
49*09537850SAkhilesh Sanikop // is declared. This may allow for improved optimizations due to the lack of
50*09537850SAkhilesh Sanikop // pointer aliasing. See also:
51*09537850SAkhilesh Sanikop // https://en.cppreference.com/w/c/language/restrict
52*09537850SAkhilesh Sanikop // Note a template alias is not used for compatibility with older compilers
53*09537850SAkhilesh Sanikop // (e.g., gcc < 10) that do not expand the type when instantiating a template
54*09537850SAkhilesh Sanikop // function, either explicitly or in an assignment to a function pointer as is
55*09537850SAkhilesh Sanikop // done within the dsp code. RestrictPtr<T>::type is an alternative to this,
56*09537850SAkhilesh Sanikop // similar to std::add_const, but for conciseness the macro is preferred.
57*09537850SAkhilesh Sanikop #ifdef __GNUC__
58*09537850SAkhilesh Sanikop #define LIBGAV1_RESTRICT __restrict__
59*09537850SAkhilesh Sanikop #elif defined(_MSC_VER)
60*09537850SAkhilesh Sanikop #define LIBGAV1_RESTRICT __restrict
61*09537850SAkhilesh Sanikop #else
62*09537850SAkhilesh Sanikop #define LIBGAV1_RESTRICT
63*09537850SAkhilesh Sanikop #endif
64*09537850SAkhilesh Sanikop 
65*09537850SAkhilesh Sanikop // Aligns |value| to the desired |alignment|. |alignment| must be a power of 2.
66*09537850SAkhilesh Sanikop template <typename T>
Align(T value,T alignment)67*09537850SAkhilesh Sanikop inline T Align(T value, T alignment) {
68*09537850SAkhilesh Sanikop   assert(alignment != 0);
69*09537850SAkhilesh Sanikop   const T alignment_mask = alignment - 1;
70*09537850SAkhilesh Sanikop   return (value + alignment_mask) & ~alignment_mask;
71*09537850SAkhilesh Sanikop }
72*09537850SAkhilesh Sanikop 
73*09537850SAkhilesh Sanikop // Aligns |addr| to the desired |alignment|. |alignment| must be a power of 2.
AlignAddr(uint8_t * const addr,const uintptr_t alignment)74*09537850SAkhilesh Sanikop inline uint8_t* AlignAddr(uint8_t* const addr, const uintptr_t alignment) {
75*09537850SAkhilesh Sanikop   const auto value = reinterpret_cast<uintptr_t>(addr);
76*09537850SAkhilesh Sanikop   return reinterpret_cast<uint8_t*>(Align(value, alignment));
77*09537850SAkhilesh Sanikop }
78*09537850SAkhilesh Sanikop 
Clip3(int32_t value,int32_t low,int32_t high)79*09537850SAkhilesh Sanikop inline int32_t Clip3(int32_t value, int32_t low, int32_t high) {
80*09537850SAkhilesh Sanikop   return value < low ? low : (value > high ? high : value);
81*09537850SAkhilesh Sanikop }
82*09537850SAkhilesh Sanikop 
83*09537850SAkhilesh Sanikop template <typename Pixel>
ExtendLine(void * const line_start,const int width,const int left,const int right)84*09537850SAkhilesh Sanikop void ExtendLine(void* const line_start, const int width, const int left,
85*09537850SAkhilesh Sanikop                 const int right) {
86*09537850SAkhilesh Sanikop   auto* const start = static_cast<Pixel*>(line_start);
87*09537850SAkhilesh Sanikop   const Pixel* src = start;
88*09537850SAkhilesh Sanikop   Pixel* dst = start - left;
89*09537850SAkhilesh Sanikop   // Copy to left and right borders.
90*09537850SAkhilesh Sanikop   Memset(dst, src[0], left);
91*09537850SAkhilesh Sanikop   Memset(dst + left + width, src[width - 1], right);
92*09537850SAkhilesh Sanikop }
93*09537850SAkhilesh Sanikop 
94*09537850SAkhilesh Sanikop // The following 2 templates set a block of data with uncontiguous memory to
95*09537850SAkhilesh Sanikop // |value|. The compilers usually generate several branches to handle different
96*09537850SAkhilesh Sanikop // cases of |columns| when inlining memset() and std::fill(), and these branches
97*09537850SAkhilesh Sanikop // are unfortunately within the loop of |rows|. So calling these templates
98*09537850SAkhilesh Sanikop // directly could be inefficient. It is recommended to specialize common cases
99*09537850SAkhilesh Sanikop // of |columns|, such as 1, 2, 4, 8, 16 and 32, etc. in advance before
100*09537850SAkhilesh Sanikop // processing the generic case of |columns|. The code size may be larger, but
101*09537850SAkhilesh Sanikop // there would be big speed gains.
102*09537850SAkhilesh Sanikop // Call template MemSetBlock<> when sizeof(|T|) is 1.
103*09537850SAkhilesh Sanikop // Call template SetBlock<> when sizeof(|T|) is larger than 1.
104*09537850SAkhilesh Sanikop template <typename T>
MemSetBlock(int rows,int columns,T value,T * dst,ptrdiff_t stride)105*09537850SAkhilesh Sanikop void MemSetBlock(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
106*09537850SAkhilesh Sanikop   static_assert(sizeof(T) == 1, "");
107*09537850SAkhilesh Sanikop   do {
108*09537850SAkhilesh Sanikop     memset(dst, value, columns);
109*09537850SAkhilesh Sanikop     dst += stride;
110*09537850SAkhilesh Sanikop   } while (--rows != 0);
111*09537850SAkhilesh Sanikop }
112*09537850SAkhilesh Sanikop 
113*09537850SAkhilesh Sanikop template <typename T>
SetBlock(int rows,int columns,T value,T * dst,ptrdiff_t stride)114*09537850SAkhilesh Sanikop void SetBlock(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
115*09537850SAkhilesh Sanikop   do {
116*09537850SAkhilesh Sanikop     std::fill(dst, dst + columns, value);
117*09537850SAkhilesh Sanikop     dst += stride;
118*09537850SAkhilesh Sanikop   } while (--rows != 0);
119*09537850SAkhilesh Sanikop }
120*09537850SAkhilesh Sanikop 
121*09537850SAkhilesh Sanikop #if defined(__GNUC__)
122*09537850SAkhilesh Sanikop 
CountLeadingZeros(uint32_t n)123*09537850SAkhilesh Sanikop inline int CountLeadingZeros(uint32_t n) {
124*09537850SAkhilesh Sanikop   assert(n != 0);
125*09537850SAkhilesh Sanikop   return __builtin_clz(n);
126*09537850SAkhilesh Sanikop }
127*09537850SAkhilesh Sanikop 
CountLeadingZeros(uint64_t n)128*09537850SAkhilesh Sanikop inline int CountLeadingZeros(uint64_t n) {
129*09537850SAkhilesh Sanikop   assert(n != 0);
130*09537850SAkhilesh Sanikop   return __builtin_clzll(n);
131*09537850SAkhilesh Sanikop }
132*09537850SAkhilesh Sanikop 
CountTrailingZeros(uint32_t n)133*09537850SAkhilesh Sanikop inline int CountTrailingZeros(uint32_t n) {
134*09537850SAkhilesh Sanikop   assert(n != 0);
135*09537850SAkhilesh Sanikop   return __builtin_ctz(n);
136*09537850SAkhilesh Sanikop }
137*09537850SAkhilesh Sanikop 
138*09537850SAkhilesh Sanikop #elif defined(_MSC_VER)
139*09537850SAkhilesh Sanikop 
CountLeadingZeros(uint32_t n)140*09537850SAkhilesh Sanikop inline int CountLeadingZeros(uint32_t n) {
141*09537850SAkhilesh Sanikop   assert(n != 0);
142*09537850SAkhilesh Sanikop   unsigned long first_set_bit;  // NOLINT(runtime/int)
143*09537850SAkhilesh Sanikop   const unsigned char bit_set = _BitScanReverse(&first_set_bit, n);
144*09537850SAkhilesh Sanikop   assert(bit_set != 0);
145*09537850SAkhilesh Sanikop   static_cast<void>(bit_set);
146*09537850SAkhilesh Sanikop   return 31 ^ static_cast<int>(first_set_bit);
147*09537850SAkhilesh Sanikop }
148*09537850SAkhilesh Sanikop 
CountLeadingZeros(uint64_t n)149*09537850SAkhilesh Sanikop inline int CountLeadingZeros(uint64_t n) {
150*09537850SAkhilesh Sanikop   assert(n != 0);
151*09537850SAkhilesh Sanikop   unsigned long first_set_bit;  // NOLINT(runtime/int)
152*09537850SAkhilesh Sanikop #if defined(HAVE_BITSCANREVERSE64)
153*09537850SAkhilesh Sanikop   const unsigned char bit_set =
154*09537850SAkhilesh Sanikop       _BitScanReverse64(&first_set_bit, static_cast<unsigned __int64>(n));
155*09537850SAkhilesh Sanikop #else   // !defined(HAVE_BITSCANREVERSE64)
156*09537850SAkhilesh Sanikop   const auto n_hi = static_cast<unsigned long>(n >> 32);  // NOLINT(runtime/int)
157*09537850SAkhilesh Sanikop   if (n_hi != 0) {
158*09537850SAkhilesh Sanikop     const unsigned char bit_set = _BitScanReverse(&first_set_bit, n_hi);
159*09537850SAkhilesh Sanikop     assert(bit_set != 0);
160*09537850SAkhilesh Sanikop     static_cast<void>(bit_set);
161*09537850SAkhilesh Sanikop     return 31 ^ static_cast<int>(first_set_bit);
162*09537850SAkhilesh Sanikop   }
163*09537850SAkhilesh Sanikop   const unsigned char bit_set = _BitScanReverse(
164*09537850SAkhilesh Sanikop       &first_set_bit, static_cast<unsigned long>(n));  // NOLINT(runtime/int)
165*09537850SAkhilesh Sanikop #endif  // defined(HAVE_BITSCANREVERSE64)
166*09537850SAkhilesh Sanikop   assert(bit_set != 0);
167*09537850SAkhilesh Sanikop   static_cast<void>(bit_set);
168*09537850SAkhilesh Sanikop   return 63 ^ static_cast<int>(first_set_bit);
169*09537850SAkhilesh Sanikop }
170*09537850SAkhilesh Sanikop 
171*09537850SAkhilesh Sanikop #undef HAVE_BITSCANREVERSE64
172*09537850SAkhilesh Sanikop 
CountTrailingZeros(uint32_t n)173*09537850SAkhilesh Sanikop inline int CountTrailingZeros(uint32_t n) {
174*09537850SAkhilesh Sanikop   assert(n != 0);
175*09537850SAkhilesh Sanikop   unsigned long first_set_bit;  // NOLINT(runtime/int)
176*09537850SAkhilesh Sanikop   const unsigned char bit_set = _BitScanForward(&first_set_bit, n);
177*09537850SAkhilesh Sanikop   assert(bit_set != 0);
178*09537850SAkhilesh Sanikop   static_cast<void>(bit_set);
179*09537850SAkhilesh Sanikop   return static_cast<int>(first_set_bit);
180*09537850SAkhilesh Sanikop }
181*09537850SAkhilesh Sanikop 
182*09537850SAkhilesh Sanikop #else  // !defined(__GNUC__) && !defined(_MSC_VER)
183*09537850SAkhilesh Sanikop 
184*09537850SAkhilesh Sanikop template <const int kMSB, typename T>
CountLeadingZeros(T n)185*09537850SAkhilesh Sanikop inline int CountLeadingZeros(T n) {
186*09537850SAkhilesh Sanikop   assert(n != 0);
187*09537850SAkhilesh Sanikop   const T msb = T{1} << kMSB;
188*09537850SAkhilesh Sanikop   int count = 0;
189*09537850SAkhilesh Sanikop   while ((n & msb) == 0) {
190*09537850SAkhilesh Sanikop     ++count;
191*09537850SAkhilesh Sanikop     n <<= 1;
192*09537850SAkhilesh Sanikop   }
193*09537850SAkhilesh Sanikop   return count;
194*09537850SAkhilesh Sanikop }
195*09537850SAkhilesh Sanikop 
CountLeadingZeros(uint32_t n)196*09537850SAkhilesh Sanikop inline int CountLeadingZeros(uint32_t n) { return CountLeadingZeros<31>(n); }
197*09537850SAkhilesh Sanikop 
CountLeadingZeros(uint64_t n)198*09537850SAkhilesh Sanikop inline int CountLeadingZeros(uint64_t n) { return CountLeadingZeros<63>(n); }
199*09537850SAkhilesh Sanikop 
200*09537850SAkhilesh Sanikop // This is the algorithm on the left in Figure 5-23, Hacker's Delight, Second
201*09537850SAkhilesh Sanikop // Edition, page 109. The book says:
202*09537850SAkhilesh Sanikop //   If the number of trailing 0's is expected to be small or large, then the
203*09537850SAkhilesh Sanikop //   simple loops shown in Figure 5-23 are quite fast.
CountTrailingZeros(uint32_t n)204*09537850SAkhilesh Sanikop inline int CountTrailingZeros(uint32_t n) {
205*09537850SAkhilesh Sanikop   assert(n != 0);
206*09537850SAkhilesh Sanikop   // Create a word with 1's at the positions of the trailing 0's in |n|, and
207*09537850SAkhilesh Sanikop   // 0's elsewhere (e.g., 01011000 => 00000111).
208*09537850SAkhilesh Sanikop   n = ~n & (n - 1);
209*09537850SAkhilesh Sanikop   int count = 0;
210*09537850SAkhilesh Sanikop   while (n != 0) {
211*09537850SAkhilesh Sanikop     ++count;
212*09537850SAkhilesh Sanikop     n >>= 1;
213*09537850SAkhilesh Sanikop   }
214*09537850SAkhilesh Sanikop   return count;
215*09537850SAkhilesh Sanikop }
216*09537850SAkhilesh Sanikop 
217*09537850SAkhilesh Sanikop #endif  // defined(__GNUC__)
218*09537850SAkhilesh Sanikop 
FloorLog2(int32_t n)219*09537850SAkhilesh Sanikop inline int FloorLog2(int32_t n) {
220*09537850SAkhilesh Sanikop   assert(n > 0);
221*09537850SAkhilesh Sanikop   return 31 ^ CountLeadingZeros(static_cast<uint32_t>(n));
222*09537850SAkhilesh Sanikop }
223*09537850SAkhilesh Sanikop 
FloorLog2(uint32_t n)224*09537850SAkhilesh Sanikop inline int FloorLog2(uint32_t n) {
225*09537850SAkhilesh Sanikop   assert(n > 0);
226*09537850SAkhilesh Sanikop   return 31 ^ CountLeadingZeros(n);
227*09537850SAkhilesh Sanikop }
228*09537850SAkhilesh Sanikop 
FloorLog2(int64_t n)229*09537850SAkhilesh Sanikop inline int FloorLog2(int64_t n) {
230*09537850SAkhilesh Sanikop   assert(n > 0);
231*09537850SAkhilesh Sanikop   return 63 ^ CountLeadingZeros(static_cast<uint64_t>(n));
232*09537850SAkhilesh Sanikop }
233*09537850SAkhilesh Sanikop 
FloorLog2(uint64_t n)234*09537850SAkhilesh Sanikop inline int FloorLog2(uint64_t n) {
235*09537850SAkhilesh Sanikop   assert(n > 0);
236*09537850SAkhilesh Sanikop   return 63 ^ CountLeadingZeros(n);
237*09537850SAkhilesh Sanikop }
238*09537850SAkhilesh Sanikop 
CeilLog2(unsigned int n)239*09537850SAkhilesh Sanikop inline int CeilLog2(unsigned int n) {
240*09537850SAkhilesh Sanikop   // The expression FloorLog2(n - 1) + 1 is undefined not only for n == 0 but
241*09537850SAkhilesh Sanikop   // also for n == 1, so this expression must be guarded by the n < 2 test. An
242*09537850SAkhilesh Sanikop   // alternative implementation is:
243*09537850SAkhilesh Sanikop   // return (n == 0) ? 0 : FloorLog2(n) + static_cast<int>((n & (n - 1)) != 0);
244*09537850SAkhilesh Sanikop   return (n < 2) ? 0 : FloorLog2(n - 1) + 1;
245*09537850SAkhilesh Sanikop }
246*09537850SAkhilesh Sanikop 
RightShiftWithCeiling(int value,int bits)247*09537850SAkhilesh Sanikop inline int RightShiftWithCeiling(int value, int bits) {
248*09537850SAkhilesh Sanikop   assert(bits > 0);
249*09537850SAkhilesh Sanikop   return (value + (1 << bits) - 1) >> bits;
250*09537850SAkhilesh Sanikop }
251*09537850SAkhilesh Sanikop 
RightShiftWithRounding(int32_t value,int bits)252*09537850SAkhilesh Sanikop inline int32_t RightShiftWithRounding(int32_t value, int bits) {
253*09537850SAkhilesh Sanikop   assert(bits >= 0);
254*09537850SAkhilesh Sanikop   return (value + ((1 << bits) >> 1)) >> bits;
255*09537850SAkhilesh Sanikop }
256*09537850SAkhilesh Sanikop 
RightShiftWithRounding(uint32_t value,int bits)257*09537850SAkhilesh Sanikop inline uint32_t RightShiftWithRounding(uint32_t value, int bits) {
258*09537850SAkhilesh Sanikop   assert(bits >= 0);
259*09537850SAkhilesh Sanikop   return (value + ((1 << bits) >> 1)) >> bits;
260*09537850SAkhilesh Sanikop }
261*09537850SAkhilesh Sanikop 
262*09537850SAkhilesh Sanikop // This variant is used when |value| can exceed 32 bits. Although the final
263*09537850SAkhilesh Sanikop // result must always fit into int32_t.
RightShiftWithRounding(int64_t value,int bits)264*09537850SAkhilesh Sanikop inline int32_t RightShiftWithRounding(int64_t value, int bits) {
265*09537850SAkhilesh Sanikop   assert(bits >= 0);
266*09537850SAkhilesh Sanikop   return static_cast<int32_t>((value + ((int64_t{1} << bits) >> 1)) >> bits);
267*09537850SAkhilesh Sanikop }
268*09537850SAkhilesh Sanikop 
RightShiftWithRoundingSigned(int32_t value,int bits)269*09537850SAkhilesh Sanikop inline int32_t RightShiftWithRoundingSigned(int32_t value, int bits) {
270*09537850SAkhilesh Sanikop   assert(bits > 0);
271*09537850SAkhilesh Sanikop   // The next line is equivalent to:
272*09537850SAkhilesh Sanikop   // return (value >= 0) ? RightShiftWithRounding(value, bits)
273*09537850SAkhilesh Sanikop   //                     : -RightShiftWithRounding(-value, bits);
274*09537850SAkhilesh Sanikop   return RightShiftWithRounding(value + (value >> 31), bits);
275*09537850SAkhilesh Sanikop }
276*09537850SAkhilesh Sanikop 
277*09537850SAkhilesh Sanikop // This variant is used when |value| can exceed 32 bits. Although the final
278*09537850SAkhilesh Sanikop // result must always fit into int32_t.
RightShiftWithRoundingSigned(int64_t value,int bits)279*09537850SAkhilesh Sanikop inline int32_t RightShiftWithRoundingSigned(int64_t value, int bits) {
280*09537850SAkhilesh Sanikop   assert(bits > 0);
281*09537850SAkhilesh Sanikop   // The next line is equivalent to:
282*09537850SAkhilesh Sanikop   // return (value >= 0) ? RightShiftWithRounding(value, bits)
283*09537850SAkhilesh Sanikop   //                     : -RightShiftWithRounding(-value, bits);
284*09537850SAkhilesh Sanikop   return RightShiftWithRounding(value + (value >> 63), bits);
285*09537850SAkhilesh Sanikop }
286*09537850SAkhilesh Sanikop 
DivideBy2(int n)287*09537850SAkhilesh Sanikop constexpr int DivideBy2(int n) { return n >> 1; }
DivideBy4(int n)288*09537850SAkhilesh Sanikop constexpr int DivideBy4(int n) { return n >> 2; }
DivideBy8(int n)289*09537850SAkhilesh Sanikop constexpr int DivideBy8(int n) { return n >> 3; }
DivideBy16(int n)290*09537850SAkhilesh Sanikop constexpr int DivideBy16(int n) { return n >> 4; }
DivideBy32(int n)291*09537850SAkhilesh Sanikop constexpr int DivideBy32(int n) { return n >> 5; }
DivideBy64(int n)292*09537850SAkhilesh Sanikop constexpr int DivideBy64(int n) { return n >> 6; }
DivideBy128(int n)293*09537850SAkhilesh Sanikop constexpr int DivideBy128(int n) { return n >> 7; }
294*09537850SAkhilesh Sanikop 
295*09537850SAkhilesh Sanikop // Convert |value| to unsigned before shifting to avoid undefined behavior with
296*09537850SAkhilesh Sanikop // negative values.
LeftShift(int value,int bits)297*09537850SAkhilesh Sanikop inline int LeftShift(int value, int bits) {
298*09537850SAkhilesh Sanikop   assert(bits >= 0);
299*09537850SAkhilesh Sanikop   assert(value >= -(int64_t{1} << (31 - bits)));
300*09537850SAkhilesh Sanikop   assert(value <= (int64_t{1} << (31 - bits)) - ((bits == 0) ? 1 : 0));
301*09537850SAkhilesh Sanikop   return static_cast<int>(static_cast<uint32_t>(value) << bits);
302*09537850SAkhilesh Sanikop }
MultiplyBy2(int n)303*09537850SAkhilesh Sanikop inline int MultiplyBy2(int n) { return LeftShift(n, 1); }
MultiplyBy4(int n)304*09537850SAkhilesh Sanikop inline int MultiplyBy4(int n) { return LeftShift(n, 2); }
MultiplyBy8(int n)305*09537850SAkhilesh Sanikop inline int MultiplyBy8(int n) { return LeftShift(n, 3); }
MultiplyBy16(int n)306*09537850SAkhilesh Sanikop inline int MultiplyBy16(int n) { return LeftShift(n, 4); }
MultiplyBy32(int n)307*09537850SAkhilesh Sanikop inline int MultiplyBy32(int n) { return LeftShift(n, 5); }
MultiplyBy64(int n)308*09537850SAkhilesh Sanikop inline int MultiplyBy64(int n) { return LeftShift(n, 6); }
309*09537850SAkhilesh Sanikop 
Mod32(int n)310*09537850SAkhilesh Sanikop constexpr int Mod32(int n) { return n & 0x1f; }
Mod64(int n)311*09537850SAkhilesh Sanikop constexpr int Mod64(int n) { return n & 0x3f; }
312*09537850SAkhilesh Sanikop 
313*09537850SAkhilesh Sanikop //------------------------------------------------------------------------------
314*09537850SAkhilesh Sanikop // Bitstream functions
315*09537850SAkhilesh Sanikop 
IsIntraFrame(FrameType type)316*09537850SAkhilesh Sanikop constexpr bool IsIntraFrame(FrameType type) {
317*09537850SAkhilesh Sanikop   return type == kFrameKey || type == kFrameIntraOnly;
318*09537850SAkhilesh Sanikop }
319*09537850SAkhilesh Sanikop 
GetTransformClass(TransformType tx_type)320*09537850SAkhilesh Sanikop inline TransformClass GetTransformClass(TransformType tx_type) {
321*09537850SAkhilesh Sanikop   constexpr BitMaskSet kTransformClassVerticalMask(
322*09537850SAkhilesh Sanikop       kTransformTypeIdentityDct, kTransformTypeIdentityAdst,
323*09537850SAkhilesh Sanikop       kTransformTypeIdentityFlipadst);
324*09537850SAkhilesh Sanikop   if (kTransformClassVerticalMask.Contains(tx_type)) {
325*09537850SAkhilesh Sanikop     return kTransformClassVertical;
326*09537850SAkhilesh Sanikop   }
327*09537850SAkhilesh Sanikop   constexpr BitMaskSet kTransformClassHorizontalMask(
328*09537850SAkhilesh Sanikop       kTransformTypeDctIdentity, kTransformTypeAdstIdentity,
329*09537850SAkhilesh Sanikop       kTransformTypeFlipadstIdentity);
330*09537850SAkhilesh Sanikop   if (kTransformClassHorizontalMask.Contains(tx_type)) {
331*09537850SAkhilesh Sanikop     return kTransformClassHorizontal;
332*09537850SAkhilesh Sanikop   }
333*09537850SAkhilesh Sanikop   return kTransformClass2D;
334*09537850SAkhilesh Sanikop }
335*09537850SAkhilesh Sanikop 
RowOrColumn4x4ToPixel(int row_or_column4x4,Plane plane,int8_t subsampling)336*09537850SAkhilesh Sanikop inline int RowOrColumn4x4ToPixel(int row_or_column4x4, Plane plane,
337*09537850SAkhilesh Sanikop                                  int8_t subsampling) {
338*09537850SAkhilesh Sanikop   return MultiplyBy4(row_or_column4x4) >> (plane == kPlaneY ? 0 : subsampling);
339*09537850SAkhilesh Sanikop }
340*09537850SAkhilesh Sanikop 
GetPlaneType(Plane plane)341*09537850SAkhilesh Sanikop constexpr PlaneType GetPlaneType(Plane plane) {
342*09537850SAkhilesh Sanikop   return static_cast<PlaneType>(plane != kPlaneY);
343*09537850SAkhilesh Sanikop }
344*09537850SAkhilesh Sanikop 
345*09537850SAkhilesh Sanikop // 5.11.44.
IsDirectionalMode(PredictionMode mode)346*09537850SAkhilesh Sanikop constexpr bool IsDirectionalMode(PredictionMode mode) {
347*09537850SAkhilesh Sanikop   return mode >= kPredictionModeVertical && mode <= kPredictionModeD67;
348*09537850SAkhilesh Sanikop }
349*09537850SAkhilesh Sanikop 
350*09537850SAkhilesh Sanikop // 5.9.3.
351*09537850SAkhilesh Sanikop //
352*09537850SAkhilesh Sanikop // |a| and |b| are order hints, treated as unsigned order_hint_bits-bit
353*09537850SAkhilesh Sanikop // integers. |order_hint_shift_bits| equals (32 - order_hint_bits) % 32.
354*09537850SAkhilesh Sanikop // order_hint_bits is at most 8, so |order_hint_shift_bits| is zero or a
355*09537850SAkhilesh Sanikop // value between 24 and 31 (inclusive).
356*09537850SAkhilesh Sanikop //
357*09537850SAkhilesh Sanikop // If |order_hint_shift_bits| is zero, |a| and |b| are both zeros, and the
358*09537850SAkhilesh Sanikop // result is zero. If |order_hint_shift_bits| is not zero, returns the
359*09537850SAkhilesh Sanikop // signed difference |a| - |b| using "modular arithmetic". More precisely, the
360*09537850SAkhilesh Sanikop // signed difference |a| - |b| is treated as a signed order_hint_bits-bit
361*09537850SAkhilesh Sanikop // integer and cast to an int. The returned difference is between
362*09537850SAkhilesh Sanikop // -(1 << (order_hint_bits - 1)) and (1 << (order_hint_bits - 1)) - 1
363*09537850SAkhilesh Sanikop // (inclusive).
364*09537850SAkhilesh Sanikop //
365*09537850SAkhilesh Sanikop // NOTE: |a| and |b| are the order_hint_bits least significant bits of the
366*09537850SAkhilesh Sanikop // actual values. This function returns the signed difference between the
367*09537850SAkhilesh Sanikop // actual values. The returned difference is correct as long as the actual
368*09537850SAkhilesh Sanikop // values are not more than 1 << (order_hint_bits - 1) - 1 apart.
369*09537850SAkhilesh Sanikop //
370*09537850SAkhilesh Sanikop // Example: Suppose order_hint_bits is 4 and |order_hint_shift_bits|
371*09537850SAkhilesh Sanikop // is 28. Then |a| and |b| are in the range [0, 15], and the actual values for
372*09537850SAkhilesh Sanikop // |a| and |b| must not be more than 7 apart. (If the actual values for |a| and
373*09537850SAkhilesh Sanikop // |b| are exactly 8 apart, this function cannot tell whether the actual value
374*09537850SAkhilesh Sanikop // for |a| is before or after the actual value for |b|.)
375*09537850SAkhilesh Sanikop //
376*09537850SAkhilesh Sanikop // First, consider the order hints 2 and 6. For this simple case, we have
377*09537850SAkhilesh Sanikop //   GetRelativeDistance(2, 6, 28) = 2 - 6 = -4, and
378*09537850SAkhilesh Sanikop //   GetRelativeDistance(6, 2, 28) = 6 - 2 = 4.
379*09537850SAkhilesh Sanikop //
380*09537850SAkhilesh Sanikop // On the other hand, consider the order hints 2 and 14. The order hints are
381*09537850SAkhilesh Sanikop // 12 (> 7) apart, so we need to use the actual values instead. The actual
382*09537850SAkhilesh Sanikop // values may be 34 (= 2 mod 16) and 30 (= 14 mod 16), respectively. Therefore
383*09537850SAkhilesh Sanikop // we have
384*09537850SAkhilesh Sanikop //   GetRelativeDistance(2, 14, 28) = 34 - 30 = 4, and
385*09537850SAkhilesh Sanikop //   GetRelativeDistance(14, 2, 28) = 30 - 34 = -4.
386*09537850SAkhilesh Sanikop //
387*09537850SAkhilesh Sanikop // The following comments apply only to specific CPUs' SIMD implementations,
388*09537850SAkhilesh Sanikop // such as intrinsics code.
389*09537850SAkhilesh Sanikop // For the 2 shift operations in this function, if the SIMD packed data is
390*09537850SAkhilesh Sanikop // 16-bit wide, try to use |order_hint_shift_bits| - 16 as the number of bits to
391*09537850SAkhilesh Sanikop // shift; If the SIMD packed data is 8-bit wide, try to use
392*09537850SAkhilesh Sanikop // |order_hint_shift_bits| - 24 as as the number of bits to shift.
393*09537850SAkhilesh Sanikop // |order_hint_shift_bits| - 16 and |order_hint_shift_bits| - 24 could be -16 or
394*09537850SAkhilesh Sanikop // -24. In these cases diff is 0, and the behavior of left or right shifting -16
395*09537850SAkhilesh Sanikop // or -24 bits is defined for x86 SIMD instructions and ARM NEON instructions,
396*09537850SAkhilesh Sanikop // and the result of shifting 0 is still 0. There is no guarantee that this
397*09537850SAkhilesh Sanikop // behavior and result apply to other CPUs' SIMD instructions.
GetRelativeDistance(const unsigned int a,const unsigned int b,const unsigned int order_hint_shift_bits)398*09537850SAkhilesh Sanikop inline int GetRelativeDistance(const unsigned int a, const unsigned int b,
399*09537850SAkhilesh Sanikop                                const unsigned int order_hint_shift_bits) {
400*09537850SAkhilesh Sanikop   const int diff = static_cast<int>(a) - static_cast<int>(b);
401*09537850SAkhilesh Sanikop   assert(order_hint_shift_bits <= 31);
402*09537850SAkhilesh Sanikop   if (order_hint_shift_bits == 0) {
403*09537850SAkhilesh Sanikop     assert(a == 0);
404*09537850SAkhilesh Sanikop     assert(b == 0);
405*09537850SAkhilesh Sanikop   } else {
406*09537850SAkhilesh Sanikop     assert(order_hint_shift_bits >= 24);  // i.e., order_hint_bits <= 8
407*09537850SAkhilesh Sanikop     assert(a < (1u << (32 - order_hint_shift_bits)));
408*09537850SAkhilesh Sanikop     assert(b < (1u << (32 - order_hint_shift_bits)));
409*09537850SAkhilesh Sanikop     assert(diff < (1 << (32 - order_hint_shift_bits)));
410*09537850SAkhilesh Sanikop     assert(diff >= -(1 << (32 - order_hint_shift_bits)));
411*09537850SAkhilesh Sanikop   }
412*09537850SAkhilesh Sanikop   // Sign extend the result of subtracting the values.
413*09537850SAkhilesh Sanikop   // Cast to unsigned int and then left shift to avoid undefined behavior with
414*09537850SAkhilesh Sanikop   // negative values. Cast to int to do the sign extension through right shift.
415*09537850SAkhilesh Sanikop   // This requires the right shift of a signed integer be an arithmetic shift,
416*09537850SAkhilesh Sanikop   // which is true for clang, gcc, and Visual C++.
417*09537850SAkhilesh Sanikop   // These two casts do not generate extra instructions.
418*09537850SAkhilesh Sanikop   // Don't use LeftShift(diff) since a valid diff may fail its assertions.
419*09537850SAkhilesh Sanikop   // For example, GetRelativeDistance(2, 14, 28), diff equals -12 and is less
420*09537850SAkhilesh Sanikop   // than the minimum allowed value of LeftShift() which is -8.
421*09537850SAkhilesh Sanikop   // The next 3 lines are equivalent to:
422*09537850SAkhilesh Sanikop   // const int order_hint_bits = Mod32(32 - order_hint_shift_bits);
423*09537850SAkhilesh Sanikop   // const int m = (1 << order_hint_bits) >> 1;
424*09537850SAkhilesh Sanikop   // return (diff & (m - 1)) - (diff & m);
425*09537850SAkhilesh Sanikop   return static_cast<int>(static_cast<unsigned int>(diff)
426*09537850SAkhilesh Sanikop                           << order_hint_shift_bits) >>
427*09537850SAkhilesh Sanikop          order_hint_shift_bits;
428*09537850SAkhilesh Sanikop }
429*09537850SAkhilesh Sanikop 
430*09537850SAkhilesh Sanikop // Applies |sign| (must be 0 or -1) to |value|, i.e.,
431*09537850SAkhilesh Sanikop //   return (sign == 0) ? value : -value;
432*09537850SAkhilesh Sanikop // and does so without a branch.
ApplySign(int value,int sign)433*09537850SAkhilesh Sanikop constexpr int ApplySign(int value, int sign) { return (value ^ sign) - sign; }
434*09537850SAkhilesh Sanikop 
435*09537850SAkhilesh Sanikop // 7.9.3. (without the clamp for numerator and denominator).
GetMvProjection(const MotionVector & mv,int numerator,int division_multiplier,MotionVector * projection_mv)436*09537850SAkhilesh Sanikop inline void GetMvProjection(const MotionVector& mv, int numerator,
437*09537850SAkhilesh Sanikop                             int division_multiplier,
438*09537850SAkhilesh Sanikop                             MotionVector* projection_mv) {
439*09537850SAkhilesh Sanikop   // Allow numerator and to be 0 so that this function can be called
440*09537850SAkhilesh Sanikop   // unconditionally. When numerator is 0, |projection_mv| will be 0, and this
441*09537850SAkhilesh Sanikop   // is what we want.
442*09537850SAkhilesh Sanikop   assert(std::abs(numerator) <= kMaxFrameDistance);
443*09537850SAkhilesh Sanikop   for (int i = 0; i < 2; ++i) {
444*09537850SAkhilesh Sanikop     projection_mv->mv[i] =
445*09537850SAkhilesh Sanikop         Clip3(RightShiftWithRoundingSigned(
446*09537850SAkhilesh Sanikop                   mv.mv[i] * numerator * division_multiplier, 14),
447*09537850SAkhilesh Sanikop               -kProjectionMvClamp, kProjectionMvClamp);
448*09537850SAkhilesh Sanikop   }
449*09537850SAkhilesh Sanikop }
450*09537850SAkhilesh Sanikop 
451*09537850SAkhilesh Sanikop // 7.9.4.
Project(int value,int delta,int dst_sign)452*09537850SAkhilesh Sanikop constexpr int Project(int value, int delta, int dst_sign) {
453*09537850SAkhilesh Sanikop   return value + ApplySign(delta / 64, dst_sign);
454*09537850SAkhilesh Sanikop }
455*09537850SAkhilesh Sanikop 
IsBlockSmallerThan8x8(BlockSize size)456*09537850SAkhilesh Sanikop inline bool IsBlockSmallerThan8x8(BlockSize size) {
457*09537850SAkhilesh Sanikop   return size < kBlock8x8 && size != kBlock4x16;
458*09537850SAkhilesh Sanikop }
459*09537850SAkhilesh Sanikop 
460*09537850SAkhilesh Sanikop // Returns true if the either the width or the height of the block is equal to
461*09537850SAkhilesh Sanikop // four.
IsBlockDimension4(BlockSize size)462*09537850SAkhilesh Sanikop inline bool IsBlockDimension4(BlockSize size) {
463*09537850SAkhilesh Sanikop   return size < kBlock8x8 || size == kBlock16x4;
464*09537850SAkhilesh Sanikop }
465*09537850SAkhilesh Sanikop 
466*09537850SAkhilesh Sanikop // Converts bitdepth 8, 10, and 12 to array index 0, 1, and 2, respectively.
BitdepthToArrayIndex(int bitdepth)467*09537850SAkhilesh Sanikop constexpr int BitdepthToArrayIndex(int bitdepth) { return (bitdepth - 8) >> 1; }
468*09537850SAkhilesh Sanikop 
469*09537850SAkhilesh Sanikop // Maps a square transform to an index between [0, 4]. kTransformSize4x4 maps
470*09537850SAkhilesh Sanikop // to 0, kTransformSize8x8 maps to 1 and so on.
TransformSizeToSquareTransformIndex(TransformSize tx_size)471*09537850SAkhilesh Sanikop inline int TransformSizeToSquareTransformIndex(TransformSize tx_size) {
472*09537850SAkhilesh Sanikop   assert(kTransformWidth[tx_size] == kTransformHeight[tx_size]);
473*09537850SAkhilesh Sanikop 
474*09537850SAkhilesh Sanikop   // The values of the square transform sizes happen to be in the right
475*09537850SAkhilesh Sanikop   // ranges, so we can just divide them by 4 to get the indexes.
476*09537850SAkhilesh Sanikop   static_assert(
477*09537850SAkhilesh Sanikop       std::is_unsigned<std::underlying_type<TransformSize>::type>::value, "");
478*09537850SAkhilesh Sanikop   static_assert(kTransformSize4x4 < 4, "");
479*09537850SAkhilesh Sanikop   static_assert(4 <= kTransformSize8x8 && kTransformSize8x8 < 8, "");
480*09537850SAkhilesh Sanikop   static_assert(8 <= kTransformSize16x16 && kTransformSize16x16 < 12, "");
481*09537850SAkhilesh Sanikop   static_assert(12 <= kTransformSize32x32 && kTransformSize32x32 < 16, "");
482*09537850SAkhilesh Sanikop   static_assert(16 <= kTransformSize64x64 && kTransformSize64x64 < 20, "");
483*09537850SAkhilesh Sanikop   return DivideBy4(tx_size);
484*09537850SAkhilesh Sanikop }
485*09537850SAkhilesh Sanikop 
486*09537850SAkhilesh Sanikop // Gets the corresponding Y/U/V position, to set and get filter masks
487*09537850SAkhilesh Sanikop // in deblock filtering.
488*09537850SAkhilesh Sanikop // Returns luma_position if it's Y plane, whose subsampling must be 0.
489*09537850SAkhilesh Sanikop // Returns the odd position for U/V plane, if there is subsampling.
GetDeblockPosition(const int luma_position,const int subsampling)490*09537850SAkhilesh Sanikop constexpr int GetDeblockPosition(const int luma_position,
491*09537850SAkhilesh Sanikop                                  const int subsampling) {
492*09537850SAkhilesh Sanikop   return luma_position | subsampling;
493*09537850SAkhilesh Sanikop }
494*09537850SAkhilesh Sanikop 
495*09537850SAkhilesh Sanikop // Returns the size of the residual buffer required to hold the residual values
496*09537850SAkhilesh Sanikop // for a block or frame of size |rows| by |columns| (taking into account
497*09537850SAkhilesh Sanikop // |subsampling_x|, |subsampling_y| and |residual_size|). |residual_size| is the
498*09537850SAkhilesh Sanikop // number of bytes required to represent one residual value.
GetResidualBufferSize(const int rows,const int columns,const int subsampling_x,const int subsampling_y,const size_t residual_size)499*09537850SAkhilesh Sanikop inline size_t GetResidualBufferSize(const int rows, const int columns,
500*09537850SAkhilesh Sanikop                                     const int subsampling_x,
501*09537850SAkhilesh Sanikop                                     const int subsampling_y,
502*09537850SAkhilesh Sanikop                                     const size_t residual_size) {
503*09537850SAkhilesh Sanikop   // The subsampling multipliers are:
504*09537850SAkhilesh Sanikop   //   Both x and y are subsampled: 3 / 2.
505*09537850SAkhilesh Sanikop   //   Only x or y is subsampled: 2 / 1 (which is equivalent to 4 / 2).
506*09537850SAkhilesh Sanikop   //   Both x and y are not subsampled: 3 / 1 (which is equivalent to 6 / 2).
507*09537850SAkhilesh Sanikop   // So we compute the final subsampling multiplier as follows:
508*09537850SAkhilesh Sanikop   //   multiplier = (2 + (4 >> subsampling_x >> subsampling_y)) / 2.
509*09537850SAkhilesh Sanikop   // Add 32 * |kResidualPaddingVertical| padding to avoid bottom boundary checks
510*09537850SAkhilesh Sanikop   // when parsing quantized coefficients.
511*09537850SAkhilesh Sanikop   const int subsampling_multiplier_num =
512*09537850SAkhilesh Sanikop       2 + (4 >> subsampling_x >> subsampling_y);
513*09537850SAkhilesh Sanikop   const int number_elements =
514*09537850SAkhilesh Sanikop       (rows * columns * subsampling_multiplier_num) >> 1;
515*09537850SAkhilesh Sanikop   const int tx_padding = 32 * kResidualPaddingVertical;
516*09537850SAkhilesh Sanikop   return residual_size * (number_elements + tx_padding);
517*09537850SAkhilesh Sanikop }
518*09537850SAkhilesh Sanikop 
519*09537850SAkhilesh Sanikop // This function is equivalent to:
520*09537850SAkhilesh Sanikop // std::min({kTransformWidthLog2[tx_size] - 2,
521*09537850SAkhilesh Sanikop //           kTransformWidthLog2[left_tx_size] - 2,
522*09537850SAkhilesh Sanikop //           2});
GetTransformSizeIdWidth(TransformSize tx_size,TransformSize left_tx_size)523*09537850SAkhilesh Sanikop constexpr LoopFilterTransformSizeId GetTransformSizeIdWidth(
524*09537850SAkhilesh Sanikop     TransformSize tx_size, TransformSize left_tx_size) {
525*09537850SAkhilesh Sanikop   return static_cast<LoopFilterTransformSizeId>(
526*09537850SAkhilesh Sanikop       static_cast<int>(tx_size > kTransformSize4x16 &&
527*09537850SAkhilesh Sanikop                        left_tx_size > kTransformSize4x16) +
528*09537850SAkhilesh Sanikop       static_cast<int>(tx_size > kTransformSize8x32 &&
529*09537850SAkhilesh Sanikop                        left_tx_size > kTransformSize8x32));
530*09537850SAkhilesh Sanikop }
531*09537850SAkhilesh Sanikop 
532*09537850SAkhilesh Sanikop // This is used for 7.11.3.4 Block Inter Prediction Process, to select convolve
533*09537850SAkhilesh Sanikop // filters.
GetFilterIndex(const int filter_index,const int length)534*09537850SAkhilesh Sanikop inline int GetFilterIndex(const int filter_index, const int length) {
535*09537850SAkhilesh Sanikop   if (length <= 4) {
536*09537850SAkhilesh Sanikop     if (filter_index == kInterpolationFilterEightTap ||
537*09537850SAkhilesh Sanikop         filter_index == kInterpolationFilterEightTapSharp) {
538*09537850SAkhilesh Sanikop       return 4;
539*09537850SAkhilesh Sanikop     }
540*09537850SAkhilesh Sanikop     if (filter_index == kInterpolationFilterEightTapSmooth) {
541*09537850SAkhilesh Sanikop       return 5;
542*09537850SAkhilesh Sanikop     }
543*09537850SAkhilesh Sanikop   }
544*09537850SAkhilesh Sanikop   return filter_index;
545*09537850SAkhilesh Sanikop }
546*09537850SAkhilesh Sanikop 
547*09537850SAkhilesh Sanikop // This has identical results as RightShiftWithRounding since |subsampling| can
548*09537850SAkhilesh Sanikop // only be 0 or 1.
SubsampledValue(int value,int subsampling)549*09537850SAkhilesh Sanikop constexpr int SubsampledValue(int value, int subsampling) {
550*09537850SAkhilesh Sanikop   return (value + subsampling) >> subsampling;
551*09537850SAkhilesh Sanikop }
552*09537850SAkhilesh Sanikop 
553*09537850SAkhilesh Sanikop }  // namespace libgav1
554*09537850SAkhilesh Sanikop 
555*09537850SAkhilesh Sanikop #endif  // LIBGAV1_SRC_UTILS_COMMON_H_
556