src/utils/common.h

*09537850SAkhilesh Sanikop/*
*09537850SAkhilesh Sanikop * Copyright 2019 The libgav1 Authors
*09537850SAkhilesh Sanikop *
*09537850SAkhilesh Sanikop * Licensed under the Apache License, Version 2.0 (the "License");
*09537850SAkhilesh Sanikop * you may not use this file except in compliance with the License.
*09537850SAkhilesh Sanikop * You may obtain a copy of the License at
*09537850SAkhilesh Sanikop *
*09537850SAkhilesh Sanikop *      http://www.apache.org/licenses/LICENSE-2.0
*09537850SAkhilesh Sanikop *
*09537850SAkhilesh Sanikop * Unless required by applicable law or agreed to in writing, software
*09537850SAkhilesh Sanikop * distributed under the License is distributed on an "AS IS" BASIS,
*09537850SAkhilesh Sanikop * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*09537850SAkhilesh Sanikop * See the License for the specific language governing permissions and
*09537850SAkhilesh Sanikop * limitations under the License.
*09537850SAkhilesh Sanikop */
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop#ifndef LIBGAV1_SRC_UTILS_COMMON_H_
*09537850SAkhilesh Sanikop#define LIBGAV1_SRC_UTILS_COMMON_H_
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop#if defined(_MSC_VER)
*09537850SAkhilesh Sanikop#include <intrin.h>
*09537850SAkhilesh Sanikop#pragma intrinsic(_BitScanForward)
*09537850SAkhilesh Sanikop#pragma intrinsic(_BitScanReverse)
*09537850SAkhilesh Sanikop#if defined(_M_X64) || defined(_M_ARM64)
*09537850SAkhilesh Sanikop#pragma intrinsic(_BitScanReverse64)
*09537850SAkhilesh Sanikop#define HAVE_BITSCANREVERSE64
*09537850SAkhilesh Sanikop#endif  // defined(_M_X64) || defined(_M_ARM64)
*09537850SAkhilesh Sanikop#endif  // defined(_MSC_VER)
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop#include <algorithm>
*09537850SAkhilesh Sanikop#include <cassert>
*09537850SAkhilesh Sanikop#include <cstddef>
*09537850SAkhilesh Sanikop#include <cstdint>
*09537850SAkhilesh Sanikop#include <cstdlib>
*09537850SAkhilesh Sanikop#include <cstring>
*09537850SAkhilesh Sanikop#include <type_traits>
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop#include "src/utils/bit_mask_set.h"
*09537850SAkhilesh Sanikop#include "src/utils/constants.h"
*09537850SAkhilesh Sanikop#include "src/utils/memory.h"
*09537850SAkhilesh Sanikop#include "src/utils/types.h"
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopnamespace libgav1 {
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// LIBGAV1_RESTRICT
*09537850SAkhilesh Sanikop// Declares a pointer with the restrict type qualifier if available.
*09537850SAkhilesh Sanikop// This allows code to hint to the compiler that only this pointer references a
*09537850SAkhilesh Sanikop// particular object or memory region within the scope of the block in which it
*09537850SAkhilesh Sanikop// is declared. This may allow for improved optimizations due to the lack of
*09537850SAkhilesh Sanikop// pointer aliasing. See also:
*09537850SAkhilesh Sanikop// https://en.cppreference.com/w/c/language/restrict
*09537850SAkhilesh Sanikop// Note a template alias is not used for compatibility with older compilers
*09537850SAkhilesh Sanikop// (e.g., gcc < 10) that do not expand the type when instantiating a template
*09537850SAkhilesh Sanikop// function, either explicitly or in an assignment to a function pointer as is
*09537850SAkhilesh Sanikop// done within the dsp code. RestrictPtr<T>::type is an alternative to this,
*09537850SAkhilesh Sanikop// similar to std::add_const, but for conciseness the macro is preferred.
*09537850SAkhilesh Sanikop#ifdef __GNUC__
*09537850SAkhilesh Sanikop#define LIBGAV1_RESTRICT __restrict__
*09537850SAkhilesh Sanikop#elif defined(_MSC_VER)
*09537850SAkhilesh Sanikop#define LIBGAV1_RESTRICT __restrict
*09537850SAkhilesh Sanikop#else
*09537850SAkhilesh Sanikop#define LIBGAV1_RESTRICT
*09537850SAkhilesh Sanikop#endif
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// Aligns |value| to the desired |alignment|. |alignment| must be a power of 2.
*09537850SAkhilesh Sanikoptemplate <typename T>
*09537850SAkhilesh Sanikopinline T Align(T value, T alignment) {
*09537850SAkhilesh Sanikop  assert(alignment != 0);
*09537850SAkhilesh Sanikop  const T alignment_mask = alignment - 1;
*09537850SAkhilesh Sanikop  return (value + alignment_mask) & ~alignment_mask;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// Aligns |addr| to the desired |alignment|. |alignment| must be a power of 2.
*09537850SAkhilesh Sanikopinline uint8_t* AlignAddr(uint8_t* const addr, const uintptr_t alignment) {
*09537850SAkhilesh Sanikop  const auto value = reinterpret_cast<uintptr_t>(addr);
*09537850SAkhilesh Sanikop  return reinterpret_cast<uint8_t*>(Align(value, alignment));
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int32_t Clip3(int32_t value, int32_t low, int32_t high) {
*09537850SAkhilesh Sanikop  return value < low ? low : (value > high ? high : value);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikoptemplate <typename Pixel>
*09537850SAkhilesh Sanikopvoid ExtendLine(void* const line_start, const int width, const int left,
*09537850SAkhilesh Sanikop                const int right) {
*09537850SAkhilesh Sanikop  auto* const start = static_cast<Pixel*>(line_start);
*09537850SAkhilesh Sanikop  const Pixel* src = start;
*09537850SAkhilesh Sanikop  Pixel* dst = start - left;
*09537850SAkhilesh Sanikop  // Copy to left and right borders.
*09537850SAkhilesh Sanikop  Memset(dst, src[0], left);
*09537850SAkhilesh Sanikop  Memset(dst + left + width, src[width - 1], right);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// The following 2 templates set a block of data with uncontiguous memory to
*09537850SAkhilesh Sanikop// |value|. The compilers usually generate several branches to handle different
*09537850SAkhilesh Sanikop// cases of |columns| when inlining memset() and std::fill(), and these branches
*09537850SAkhilesh Sanikop// are unfortunately within the loop of |rows|. So calling these templates
*09537850SAkhilesh Sanikop// directly could be inefficient. It is recommended to specialize common cases
*09537850SAkhilesh Sanikop// of |columns|, such as 1, 2, 4, 8, 16 and 32, etc. in advance before
*09537850SAkhilesh Sanikop// processing the generic case of |columns|. The code size may be larger, but
*09537850SAkhilesh Sanikop// there would be big speed gains.
*09537850SAkhilesh Sanikop// Call template MemSetBlock<> when sizeof(|T|) is 1.
*09537850SAkhilesh Sanikop// Call template SetBlock<> when sizeof(|T|) is larger than 1.
*09537850SAkhilesh Sanikoptemplate <typename T>
*09537850SAkhilesh Sanikopvoid MemSetBlock(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
*09537850SAkhilesh Sanikop  static_assert(sizeof(T) == 1, "");
*09537850SAkhilesh Sanikop  do {
*09537850SAkhilesh Sanikop    memset(dst, value, columns);
*09537850SAkhilesh Sanikop    dst += stride;
*09537850SAkhilesh Sanikop  } while (--rows != 0);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikoptemplate <typename T>
*09537850SAkhilesh Sanikopvoid SetBlock(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
*09537850SAkhilesh Sanikop  do {
*09537850SAkhilesh Sanikop    std::fill(dst, dst + columns, value);
*09537850SAkhilesh Sanikop    dst += stride;
*09537850SAkhilesh Sanikop  } while (--rows != 0);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop#if defined(__GNUC__)
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int CountLeadingZeros(uint32_t n) {
*09537850SAkhilesh Sanikop  assert(n != 0);
*09537850SAkhilesh Sanikop  return __builtin_clz(n);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int CountLeadingZeros(uint64_t n) {
*09537850SAkhilesh Sanikop  assert(n != 0);
*09537850SAkhilesh Sanikop  return __builtin_clzll(n);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int CountTrailingZeros(uint32_t n) {
*09537850SAkhilesh Sanikop  assert(n != 0);
*09537850SAkhilesh Sanikop  return __builtin_ctz(n);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop#elif defined(_MSC_VER)
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int CountLeadingZeros(uint32_t n) {
*09537850SAkhilesh Sanikop  assert(n != 0);
*09537850SAkhilesh Sanikop  unsigned long first_set_bit;  // NOLINT(runtime/int)
*09537850SAkhilesh Sanikop  const unsigned char bit_set = _BitScanReverse(&first_set_bit, n);
*09537850SAkhilesh Sanikop  assert(bit_set != 0);
*09537850SAkhilesh Sanikop  static_cast<void>(bit_set);
*09537850SAkhilesh Sanikop  return 31 ^ static_cast<int>(first_set_bit);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int CountLeadingZeros(uint64_t n) {
*09537850SAkhilesh Sanikop  assert(n != 0);
*09537850SAkhilesh Sanikop  unsigned long first_set_bit;  // NOLINT(runtime/int)
*09537850SAkhilesh Sanikop#if defined(HAVE_BITSCANREVERSE64)
*09537850SAkhilesh Sanikop  const unsigned char bit_set =
*09537850SAkhilesh Sanikop      _BitScanReverse64(&first_set_bit, static_cast<unsigned __int64>(n));
*09537850SAkhilesh Sanikop#else   // !defined(HAVE_BITSCANREVERSE64)
*09537850SAkhilesh Sanikop  const auto n_hi = static_cast<unsigned long>(n >> 32);  // NOLINT(runtime/int)
*09537850SAkhilesh Sanikop  if (n_hi != 0) {
*09537850SAkhilesh Sanikop    const unsigned char bit_set = _BitScanReverse(&first_set_bit, n_hi);
*09537850SAkhilesh Sanikop    assert(bit_set != 0);
*09537850SAkhilesh Sanikop    static_cast<void>(bit_set);
*09537850SAkhilesh Sanikop    return 31 ^ static_cast<int>(first_set_bit);
*09537850SAkhilesh Sanikop  }
*09537850SAkhilesh Sanikop  const unsigned char bit_set = _BitScanReverse(
*09537850SAkhilesh Sanikop      &first_set_bit, static_cast<unsigned long>(n));  // NOLINT(runtime/int)
*09537850SAkhilesh Sanikop#endif  // defined(HAVE_BITSCANREVERSE64)
*09537850SAkhilesh Sanikop  assert(bit_set != 0);
*09537850SAkhilesh Sanikop  static_cast<void>(bit_set);
*09537850SAkhilesh Sanikop  return 63 ^ static_cast<int>(first_set_bit);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop#undef HAVE_BITSCANREVERSE64
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int CountTrailingZeros(uint32_t n) {
*09537850SAkhilesh Sanikop  assert(n != 0);
*09537850SAkhilesh Sanikop  unsigned long first_set_bit;  // NOLINT(runtime/int)
*09537850SAkhilesh Sanikop  const unsigned char bit_set = _BitScanForward(&first_set_bit, n);
*09537850SAkhilesh Sanikop  assert(bit_set != 0);
*09537850SAkhilesh Sanikop  static_cast<void>(bit_set);
*09537850SAkhilesh Sanikop  return static_cast<int>(first_set_bit);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop#else  // !defined(__GNUC__) && !defined(_MSC_VER)
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikoptemplate <const int kMSB, typename T>
*09537850SAkhilesh Sanikopinline int CountLeadingZeros(T n) {
*09537850SAkhilesh Sanikop  assert(n != 0);
*09537850SAkhilesh Sanikop  const T msb = T{1} << kMSB;
*09537850SAkhilesh Sanikop  int count = 0;
*09537850SAkhilesh Sanikop  while ((n & msb) == 0) {
*09537850SAkhilesh Sanikop    ++count;
*09537850SAkhilesh Sanikop    n <<= 1;
*09537850SAkhilesh Sanikop  }
*09537850SAkhilesh Sanikop  return count;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int CountLeadingZeros(uint32_t n) { return CountLeadingZeros<31>(n); }
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int CountLeadingZeros(uint64_t n) { return CountLeadingZeros<63>(n); }
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// This is the algorithm on the left in Figure 5-23, Hacker's Delight, Second
*09537850SAkhilesh Sanikop// Edition, page 109. The book says:
*09537850SAkhilesh Sanikop//   If the number of trailing 0's is expected to be small or large, then the
*09537850SAkhilesh Sanikop//   simple loops shown in Figure 5-23 are quite fast.
*09537850SAkhilesh Sanikopinline int CountTrailingZeros(uint32_t n) {
*09537850SAkhilesh Sanikop  assert(n != 0);
*09537850SAkhilesh Sanikop  // Create a word with 1's at the positions of the trailing 0's in |n|, and
*09537850SAkhilesh Sanikop  // 0's elsewhere (e.g., 01011000 => 00000111).
*09537850SAkhilesh Sanikop  n = ~n & (n - 1);
*09537850SAkhilesh Sanikop  int count = 0;
*09537850SAkhilesh Sanikop  while (n != 0) {
*09537850SAkhilesh Sanikop    ++count;
*09537850SAkhilesh Sanikop    n >>= 1;
*09537850SAkhilesh Sanikop  }
*09537850SAkhilesh Sanikop  return count;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop#endif  // defined(__GNUC__)
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int FloorLog2(int32_t n) {
*09537850SAkhilesh Sanikop  assert(n > 0);
*09537850SAkhilesh Sanikop  return 31 ^ CountLeadingZeros(static_cast<uint32_t>(n));
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int FloorLog2(uint32_t n) {
*09537850SAkhilesh Sanikop  assert(n > 0);
*09537850SAkhilesh Sanikop  return 31 ^ CountLeadingZeros(n);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int FloorLog2(int64_t n) {
*09537850SAkhilesh Sanikop  assert(n > 0);
*09537850SAkhilesh Sanikop  return 63 ^ CountLeadingZeros(static_cast<uint64_t>(n));
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int FloorLog2(uint64_t n) {
*09537850SAkhilesh Sanikop  assert(n > 0);
*09537850SAkhilesh Sanikop  return 63 ^ CountLeadingZeros(n);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int CeilLog2(unsigned int n) {
*09537850SAkhilesh Sanikop  // The expression FloorLog2(n - 1) + 1 is undefined not only for n == 0 but
*09537850SAkhilesh Sanikop  // also for n == 1, so this expression must be guarded by the n < 2 test. An
*09537850SAkhilesh Sanikop  // alternative implementation is:
*09537850SAkhilesh Sanikop  // return (n == 0) ? 0 : FloorLog2(n) + static_cast<int>((n & (n - 1)) != 0);
*09537850SAkhilesh Sanikop  return (n < 2) ? 0 : FloorLog2(n - 1) + 1;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int RightShiftWithCeiling(int value, int bits) {
*09537850SAkhilesh Sanikop  assert(bits > 0);
*09537850SAkhilesh Sanikop  return (value + (1 << bits) - 1) >> bits;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int32_t RightShiftWithRounding(int32_t value, int bits) {
*09537850SAkhilesh Sanikop  assert(bits >= 0);
*09537850SAkhilesh Sanikop  return (value + ((1 << bits) >> 1)) >> bits;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline uint32_t RightShiftWithRounding(uint32_t value, int bits) {
*09537850SAkhilesh Sanikop  assert(bits >= 0);
*09537850SAkhilesh Sanikop  return (value + ((1 << bits) >> 1)) >> bits;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// This variant is used when |value| can exceed 32 bits. Although the final
*09537850SAkhilesh Sanikop// result must always fit into int32_t.
*09537850SAkhilesh Sanikopinline int32_t RightShiftWithRounding(int64_t value, int bits) {
*09537850SAkhilesh Sanikop  assert(bits >= 0);
*09537850SAkhilesh Sanikop  return static_cast<int32_t>((value + ((int64_t{1} << bits) >> 1)) >> bits);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int32_t RightShiftWithRoundingSigned(int32_t value, int bits) {
*09537850SAkhilesh Sanikop  assert(bits > 0);
*09537850SAkhilesh Sanikop  // The next line is equivalent to:
*09537850SAkhilesh Sanikop  // return (value >= 0) ? RightShiftWithRounding(value, bits)
*09537850SAkhilesh Sanikop  //                     : -RightShiftWithRounding(-value, bits);
*09537850SAkhilesh Sanikop  return RightShiftWithRounding(value + (value >> 31), bits);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// This variant is used when |value| can exceed 32 bits. Although the final
*09537850SAkhilesh Sanikop// result must always fit into int32_t.
*09537850SAkhilesh Sanikopinline int32_t RightShiftWithRoundingSigned(int64_t value, int bits) {
*09537850SAkhilesh Sanikop  assert(bits > 0);
*09537850SAkhilesh Sanikop  // The next line is equivalent to:
*09537850SAkhilesh Sanikop  // return (value >= 0) ? RightShiftWithRounding(value, bits)
*09537850SAkhilesh Sanikop  //                     : -RightShiftWithRounding(-value, bits);
*09537850SAkhilesh Sanikop  return RightShiftWithRounding(value + (value >> 63), bits);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopconstexpr int DivideBy2(int n) { return n >> 1; }
*09537850SAkhilesh Sanikopconstexpr int DivideBy4(int n) { return n >> 2; }
*09537850SAkhilesh Sanikopconstexpr int DivideBy8(int n) { return n >> 3; }
*09537850SAkhilesh Sanikopconstexpr int DivideBy16(int n) { return n >> 4; }
*09537850SAkhilesh Sanikopconstexpr int DivideBy32(int n) { return n >> 5; }
*09537850SAkhilesh Sanikopconstexpr int DivideBy64(int n) { return n >> 6; }
*09537850SAkhilesh Sanikopconstexpr int DivideBy128(int n) { return n >> 7; }
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// Convert |value| to unsigned before shifting to avoid undefined behavior with
*09537850SAkhilesh Sanikop// negative values.
*09537850SAkhilesh Sanikopinline int LeftShift(int value, int bits) {
*09537850SAkhilesh Sanikop  assert(bits >= 0);
*09537850SAkhilesh Sanikop  assert(value >= -(int64_t{1} << (31 - bits)));
*09537850SAkhilesh Sanikop  assert(value <= (int64_t{1} << (31 - bits)) - ((bits == 0) ? 1 : 0));
*09537850SAkhilesh Sanikop  return static_cast<int>(static_cast<uint32_t>(value) << bits);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikopinline int MultiplyBy2(int n) { return LeftShift(n, 1); }
*09537850SAkhilesh Sanikopinline int MultiplyBy4(int n) { return LeftShift(n, 2); }
*09537850SAkhilesh Sanikopinline int MultiplyBy8(int n) { return LeftShift(n, 3); }
*09537850SAkhilesh Sanikopinline int MultiplyBy16(int n) { return LeftShift(n, 4); }
*09537850SAkhilesh Sanikopinline int MultiplyBy32(int n) { return LeftShift(n, 5); }
*09537850SAkhilesh Sanikopinline int MultiplyBy64(int n) { return LeftShift(n, 6); }
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopconstexpr int Mod32(int n) { return n & 0x1f; }
*09537850SAkhilesh Sanikopconstexpr int Mod64(int n) { return n & 0x3f; }
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop//------------------------------------------------------------------------------
*09537850SAkhilesh Sanikop// Bitstream functions
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopconstexpr bool IsIntraFrame(FrameType type) {
*09537850SAkhilesh Sanikop  return type == kFrameKey || type == kFrameIntraOnly;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline TransformClass GetTransformClass(TransformType tx_type) {
*09537850SAkhilesh Sanikop  constexpr BitMaskSet kTransformClassVerticalMask(
*09537850SAkhilesh Sanikop      kTransformTypeIdentityDct, kTransformTypeIdentityAdst,
*09537850SAkhilesh Sanikop      kTransformTypeIdentityFlipadst);
*09537850SAkhilesh Sanikop  if (kTransformClassVerticalMask.Contains(tx_type)) {
*09537850SAkhilesh Sanikop    return kTransformClassVertical;
*09537850SAkhilesh Sanikop  }
*09537850SAkhilesh Sanikop  constexpr BitMaskSet kTransformClassHorizontalMask(
*09537850SAkhilesh Sanikop      kTransformTypeDctIdentity, kTransformTypeAdstIdentity,
*09537850SAkhilesh Sanikop      kTransformTypeFlipadstIdentity);
*09537850SAkhilesh Sanikop  if (kTransformClassHorizontalMask.Contains(tx_type)) {
*09537850SAkhilesh Sanikop    return kTransformClassHorizontal;
*09537850SAkhilesh Sanikop  }
*09537850SAkhilesh Sanikop  return kTransformClass2D;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline int RowOrColumn4x4ToPixel(int row_or_column4x4, Plane plane,
*09537850SAkhilesh Sanikop                                 int8_t subsampling) {
*09537850SAkhilesh Sanikop  return MultiplyBy4(row_or_column4x4) >> (plane == kPlaneY ? 0 : subsampling);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopconstexpr PlaneType GetPlaneType(Plane plane) {
*09537850SAkhilesh Sanikop  return static_cast<PlaneType>(plane != kPlaneY);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// 5.11.44.
*09537850SAkhilesh Sanikopconstexpr bool IsDirectionalMode(PredictionMode mode) {
*09537850SAkhilesh Sanikop  return mode >= kPredictionModeVertical && mode <= kPredictionModeD67;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// 5.9.3.
*09537850SAkhilesh Sanikop//
*09537850SAkhilesh Sanikop// |a| and |b| are order hints, treated as unsigned order_hint_bits-bit
*09537850SAkhilesh Sanikop// integers. |order_hint_shift_bits| equals (32 - order_hint_bits) % 32.
*09537850SAkhilesh Sanikop// order_hint_bits is at most 8, so |order_hint_shift_bits| is zero or a
*09537850SAkhilesh Sanikop// value between 24 and 31 (inclusive).
*09537850SAkhilesh Sanikop//
*09537850SAkhilesh Sanikop// If |order_hint_shift_bits| is zero, |a| and |b| are both zeros, and the
*09537850SAkhilesh Sanikop// result is zero. If |order_hint_shift_bits| is not zero, returns the
*09537850SAkhilesh Sanikop// signed difference |a| - |b| using "modular arithmetic". More precisely, the
*09537850SAkhilesh Sanikop// signed difference |a| - |b| is treated as a signed order_hint_bits-bit
*09537850SAkhilesh Sanikop// integer and cast to an int. The returned difference is between
*09537850SAkhilesh Sanikop// -(1 << (order_hint_bits - 1)) and (1 << (order_hint_bits - 1)) - 1
*09537850SAkhilesh Sanikop// (inclusive).
*09537850SAkhilesh Sanikop//
*09537850SAkhilesh Sanikop// NOTE: |a| and |b| are the order_hint_bits least significant bits of the
*09537850SAkhilesh Sanikop// actual values. This function returns the signed difference between the
*09537850SAkhilesh Sanikop// actual values. The returned difference is correct as long as the actual
*09537850SAkhilesh Sanikop// values are not more than 1 << (order_hint_bits - 1) - 1 apart.
*09537850SAkhilesh Sanikop//
*09537850SAkhilesh Sanikop// Example: Suppose order_hint_bits is 4 and |order_hint_shift_bits|
*09537850SAkhilesh Sanikop// is 28. Then |a| and |b| are in the range [0, 15], and the actual values for
*09537850SAkhilesh Sanikop// |a| and |b| must not be more than 7 apart. (If the actual values for |a| and
*09537850SAkhilesh Sanikop// |b| are exactly 8 apart, this function cannot tell whether the actual value
*09537850SAkhilesh Sanikop// for |a| is before or after the actual value for |b|.)
*09537850SAkhilesh Sanikop//
*09537850SAkhilesh Sanikop// First, consider the order hints 2 and 6. For this simple case, we have
*09537850SAkhilesh Sanikop//   GetRelativeDistance(2, 6, 28) = 2 - 6 = -4, and
*09537850SAkhilesh Sanikop//   GetRelativeDistance(6, 2, 28) = 6 - 2 = 4.
*09537850SAkhilesh Sanikop//
*09537850SAkhilesh Sanikop// On the other hand, consider the order hints 2 and 14. The order hints are
*09537850SAkhilesh Sanikop// 12 (> 7) apart, so we need to use the actual values instead. The actual
*09537850SAkhilesh Sanikop// values may be 34 (= 2 mod 16) and 30 (= 14 mod 16), respectively. Therefore
*09537850SAkhilesh Sanikop// we have
*09537850SAkhilesh Sanikop//   GetRelativeDistance(2, 14, 28) = 34 - 30 = 4, and
*09537850SAkhilesh Sanikop//   GetRelativeDistance(14, 2, 28) = 30 - 34 = -4.
*09537850SAkhilesh Sanikop//
*09537850SAkhilesh Sanikop// The following comments apply only to specific CPUs' SIMD implementations,
*09537850SAkhilesh Sanikop// such as intrinsics code.
*09537850SAkhilesh Sanikop// For the 2 shift operations in this function, if the SIMD packed data is
*09537850SAkhilesh Sanikop// 16-bit wide, try to use |order_hint_shift_bits| - 16 as the number of bits to
*09537850SAkhilesh Sanikop// shift; If the SIMD packed data is 8-bit wide, try to use
*09537850SAkhilesh Sanikop// |order_hint_shift_bits| - 24 as as the number of bits to shift.
*09537850SAkhilesh Sanikop// |order_hint_shift_bits| - 16 and |order_hint_shift_bits| - 24 could be -16 or
*09537850SAkhilesh Sanikop// -24. In these cases diff is 0, and the behavior of left or right shifting -16
*09537850SAkhilesh Sanikop// or -24 bits is defined for x86 SIMD instructions and ARM NEON instructions,
*09537850SAkhilesh Sanikop// and the result of shifting 0 is still 0. There is no guarantee that this
*09537850SAkhilesh Sanikop// behavior and result apply to other CPUs' SIMD instructions.
*09537850SAkhilesh Sanikopinline int GetRelativeDistance(const unsigned int a, const unsigned int b,
*09537850SAkhilesh Sanikop                               const unsigned int order_hint_shift_bits) {
*09537850SAkhilesh Sanikop  const int diff = static_cast<int>(a) - static_cast<int>(b);
*09537850SAkhilesh Sanikop  assert(order_hint_shift_bits <= 31);
*09537850SAkhilesh Sanikop  if (order_hint_shift_bits == 0) {
*09537850SAkhilesh Sanikop    assert(a == 0);
*09537850SAkhilesh Sanikop    assert(b == 0);
*09537850SAkhilesh Sanikop  } else {
*09537850SAkhilesh Sanikop    assert(order_hint_shift_bits >= 24);  // i.e., order_hint_bits <= 8
*09537850SAkhilesh Sanikop    assert(a < (1u << (32 - order_hint_shift_bits)));
*09537850SAkhilesh Sanikop    assert(b < (1u << (32 - order_hint_shift_bits)));
*09537850SAkhilesh Sanikop    assert(diff < (1 << (32 - order_hint_shift_bits)));
*09537850SAkhilesh Sanikop    assert(diff >= -(1 << (32 - order_hint_shift_bits)));
*09537850SAkhilesh Sanikop  }
*09537850SAkhilesh Sanikop  // Sign extend the result of subtracting the values.
*09537850SAkhilesh Sanikop  // Cast to unsigned int and then left shift to avoid undefined behavior with
*09537850SAkhilesh Sanikop  // negative values. Cast to int to do the sign extension through right shift.
*09537850SAkhilesh Sanikop  // This requires the right shift of a signed integer be an arithmetic shift,
*09537850SAkhilesh Sanikop  // which is true for clang, gcc, and Visual C++.
*09537850SAkhilesh Sanikop  // These two casts do not generate extra instructions.
*09537850SAkhilesh Sanikop  // Don't use LeftShift(diff) since a valid diff may fail its assertions.
*09537850SAkhilesh Sanikop  // For example, GetRelativeDistance(2, 14, 28), diff equals -12 and is less
*09537850SAkhilesh Sanikop  // than the minimum allowed value of LeftShift() which is -8.
*09537850SAkhilesh Sanikop  // The next 3 lines are equivalent to:
*09537850SAkhilesh Sanikop  // const int order_hint_bits = Mod32(32 - order_hint_shift_bits);
*09537850SAkhilesh Sanikop  // const int m = (1 << order_hint_bits) >> 1;
*09537850SAkhilesh Sanikop  // return (diff & (m - 1)) - (diff & m);
*09537850SAkhilesh Sanikop  return static_cast<int>(static_cast<unsigned int>(diff)
*09537850SAkhilesh Sanikop                          << order_hint_shift_bits) >>
*09537850SAkhilesh Sanikop         order_hint_shift_bits;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// Applies |sign| (must be 0 or -1) to |value|, i.e.,
*09537850SAkhilesh Sanikop//   return (sign == 0) ? value : -value;
*09537850SAkhilesh Sanikop// and does so without a branch.
*09537850SAkhilesh Sanikopconstexpr int ApplySign(int value, int sign) { return (value ^ sign) - sign; }
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// 7.9.3. (without the clamp for numerator and denominator).
*09537850SAkhilesh Sanikopinline void GetMvProjection(const MotionVector& mv, int numerator,
*09537850SAkhilesh Sanikop                            int division_multiplier,
*09537850SAkhilesh Sanikop                            MotionVector* projection_mv) {
*09537850SAkhilesh Sanikop  // Allow numerator and to be 0 so that this function can be called
*09537850SAkhilesh Sanikop  // unconditionally. When numerator is 0, |projection_mv| will be 0, and this
*09537850SAkhilesh Sanikop  // is what we want.
*09537850SAkhilesh Sanikop  assert(std::abs(numerator) <= kMaxFrameDistance);
*09537850SAkhilesh Sanikop  for (int i = 0; i < 2; ++i) {
*09537850SAkhilesh Sanikop    projection_mv->mv[i] =
*09537850SAkhilesh Sanikop        Clip3(RightShiftWithRoundingSigned(
*09537850SAkhilesh Sanikop                  mv.mv[i] * numerator * division_multiplier, 14),
*09537850SAkhilesh Sanikop              -kProjectionMvClamp, kProjectionMvClamp);
*09537850SAkhilesh Sanikop  }
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// 7.9.4.
*09537850SAkhilesh Sanikopconstexpr int Project(int value, int delta, int dst_sign) {
*09537850SAkhilesh Sanikop  return value + ApplySign(delta / 64, dst_sign);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikopinline bool IsBlockSmallerThan8x8(BlockSize size) {
*09537850SAkhilesh Sanikop  return size < kBlock8x8 && size != kBlock4x16;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// Returns true if the either the width or the height of the block is equal to
*09537850SAkhilesh Sanikop// four.
*09537850SAkhilesh Sanikopinline bool IsBlockDimension4(BlockSize size) {
*09537850SAkhilesh Sanikop  return size < kBlock8x8 || size == kBlock16x4;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// Converts bitdepth 8, 10, and 12 to array index 0, 1, and 2, respectively.
*09537850SAkhilesh Sanikopconstexpr int BitdepthToArrayIndex(int bitdepth) { return (bitdepth - 8) >> 1; }
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// Maps a square transform to an index between [0, 4]. kTransformSize4x4 maps
*09537850SAkhilesh Sanikop// to 0, kTransformSize8x8 maps to 1 and so on.
*09537850SAkhilesh Sanikopinline int TransformSizeToSquareTransformIndex(TransformSize tx_size) {
*09537850SAkhilesh Sanikop  assert(kTransformWidth[tx_size] == kTransformHeight[tx_size]);
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop  // The values of the square transform sizes happen to be in the right
*09537850SAkhilesh Sanikop  // ranges, so we can just divide them by 4 to get the indexes.
*09537850SAkhilesh Sanikop  static_assert(
*09537850SAkhilesh Sanikop      std::is_unsigned<std::underlying_type<TransformSize>::type>::value, "");
*09537850SAkhilesh Sanikop  static_assert(kTransformSize4x4 < 4, "");
*09537850SAkhilesh Sanikop  static_assert(4 <= kTransformSize8x8 && kTransformSize8x8 < 8, "");
*09537850SAkhilesh Sanikop  static_assert(8 <= kTransformSize16x16 && kTransformSize16x16 < 12, "");
*09537850SAkhilesh Sanikop  static_assert(12 <= kTransformSize32x32 && kTransformSize32x32 < 16, "");
*09537850SAkhilesh Sanikop  static_assert(16 <= kTransformSize64x64 && kTransformSize64x64 < 20, "");
*09537850SAkhilesh Sanikop  return DivideBy4(tx_size);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// Gets the corresponding Y/U/V position, to set and get filter masks
*09537850SAkhilesh Sanikop// in deblock filtering.
*09537850SAkhilesh Sanikop// Returns luma_position if it's Y plane, whose subsampling must be 0.
*09537850SAkhilesh Sanikop// Returns the odd position for U/V plane, if there is subsampling.
*09537850SAkhilesh Sanikopconstexpr int GetDeblockPosition(const int luma_position,
*09537850SAkhilesh Sanikop                                 const int subsampling) {
*09537850SAkhilesh Sanikop  return luma_position | subsampling;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// Returns the size of the residual buffer required to hold the residual values
*09537850SAkhilesh Sanikop// for a block or frame of size |rows| by |columns| (taking into account
*09537850SAkhilesh Sanikop// |subsampling_x|, |subsampling_y| and |residual_size|). |residual_size| is the
*09537850SAkhilesh Sanikop// number of bytes required to represent one residual value.
*09537850SAkhilesh Sanikopinline size_t GetResidualBufferSize(const int rows, const int columns,
*09537850SAkhilesh Sanikop                                    const int subsampling_x,
*09537850SAkhilesh Sanikop                                    const int subsampling_y,
*09537850SAkhilesh Sanikop                                    const size_t residual_size) {
*09537850SAkhilesh Sanikop  // The subsampling multipliers are:
*09537850SAkhilesh Sanikop  //   Both x and y are subsampled: 3 / 2.
*09537850SAkhilesh Sanikop  //   Only x or y is subsampled: 2 / 1 (which is equivalent to 4 / 2).
*09537850SAkhilesh Sanikop  //   Both x and y are not subsampled: 3 / 1 (which is equivalent to 6 / 2).
*09537850SAkhilesh Sanikop  // So we compute the final subsampling multiplier as follows:
*09537850SAkhilesh Sanikop  //   multiplier = (2 + (4 >> subsampling_x >> subsampling_y)) / 2.
*09537850SAkhilesh Sanikop  // Add 32 * |kResidualPaddingVertical| padding to avoid bottom boundary checks
*09537850SAkhilesh Sanikop  // when parsing quantized coefficients.
*09537850SAkhilesh Sanikop  const int subsampling_multiplier_num =
*09537850SAkhilesh Sanikop      2 + (4 >> subsampling_x >> subsampling_y);
*09537850SAkhilesh Sanikop  const int number_elements =
*09537850SAkhilesh Sanikop      (rows * columns * subsampling_multiplier_num) >> 1;
*09537850SAkhilesh Sanikop  const int tx_padding = 32 * kResidualPaddingVertical;
*09537850SAkhilesh Sanikop  return residual_size * (number_elements + tx_padding);
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// This function is equivalent to:
*09537850SAkhilesh Sanikop// std::min({kTransformWidthLog2[tx_size] - 2,
*09537850SAkhilesh Sanikop//           kTransformWidthLog2[left_tx_size] - 2,
*09537850SAkhilesh Sanikop//           2});
*09537850SAkhilesh Sanikopconstexpr LoopFilterTransformSizeId GetTransformSizeIdWidth(
*09537850SAkhilesh Sanikop    TransformSize tx_size, TransformSize left_tx_size) {
*09537850SAkhilesh Sanikop  return static_cast<LoopFilterTransformSizeId>(
*09537850SAkhilesh Sanikop      static_cast<int>(tx_size > kTransformSize4x16 &&
*09537850SAkhilesh Sanikop                       left_tx_size > kTransformSize4x16) +
*09537850SAkhilesh Sanikop      static_cast<int>(tx_size > kTransformSize8x32 &&
*09537850SAkhilesh Sanikop                       left_tx_size > kTransformSize8x32));
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// This is used for 7.11.3.4 Block Inter Prediction Process, to select convolve
*09537850SAkhilesh Sanikop// filters.
*09537850SAkhilesh Sanikopinline int GetFilterIndex(const int filter_index, const int length) {
*09537850SAkhilesh Sanikop  if (length <= 4) {
*09537850SAkhilesh Sanikop    if (filter_index == kInterpolationFilterEightTap ||
*09537850SAkhilesh Sanikop        filter_index == kInterpolationFilterEightTapSharp) {
*09537850SAkhilesh Sanikop      return 4;
*09537850SAkhilesh Sanikop    }
*09537850SAkhilesh Sanikop    if (filter_index == kInterpolationFilterEightTapSmooth) {
*09537850SAkhilesh Sanikop      return 5;
*09537850SAkhilesh Sanikop    }
*09537850SAkhilesh Sanikop  }
*09537850SAkhilesh Sanikop  return filter_index;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop// This has identical results as RightShiftWithRounding since |subsampling| can
*09537850SAkhilesh Sanikop// only be 0 or 1.
*09537850SAkhilesh Sanikopconstexpr int SubsampledValue(int value, int subsampling) {
*09537850SAkhilesh Sanikop  return (value + subsampling) >> subsampling;
*09537850SAkhilesh Sanikop}
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop}  // namespace libgav1
*09537850SAkhilesh Sanikop
*09537850SAkhilesh Sanikop#endif  // LIBGAV1_SRC_UTILS_COMMON_H_