1 /* 2 * Copyright 2019 The libgav1 Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBGAV1_SRC_DSP_X86_COMMON_SSE4_H_ 18 #define LIBGAV1_SRC_DSP_X86_COMMON_SSE4_H_ 19 20 #include "src/utils/compiler_attributes.h" 21 #include "src/utils/cpu.h" 22 23 #if LIBGAV1_TARGETING_SSE4_1 24 25 #include <emmintrin.h> 26 #include <smmintrin.h> 27 28 #include <cassert> 29 #include <cstddef> 30 #include <cstdint> 31 #include <cstring> 32 33 #if 0 34 #include <cinttypes> 35 #include <cstdio> 36 37 // Quite useful macro for debugging. Left here for convenience. 38 inline void PrintReg(const __m128i r, const char* const name, int size) { 39 int n; 40 union { 41 __m128i r; 42 uint8_t i8[16]; 43 uint16_t i16[8]; 44 uint32_t i32[4]; 45 uint64_t i64[2]; 46 } tmp; 47 tmp.r = r; 48 fprintf(stderr, "%s\t: ", name); 49 if (size == 8) { 50 for (n = 0; n < 16; ++n) fprintf(stderr, "%.2x ", tmp.i8[n]); 51 } else if (size == 16) { 52 for (n = 0; n < 8; ++n) fprintf(stderr, "%.4x ", tmp.i16[n]); 53 } else if (size == 32) { 54 for (n = 0; n < 4; ++n) fprintf(stderr, "%.8x ", tmp.i32[n]); 55 } else { 56 for (n = 0; n < 2; ++n) 57 fprintf(stderr, "%.16" PRIx64 " ", static_cast<uint64_t>(tmp.i64[n])); 58 } 59 fprintf(stderr, "\n"); 60 } 61 62 inline void PrintReg(const int r, const char* const name) { 63 fprintf(stderr, "%s: %d\n", name, r); 64 } 65 66 inline void PrintRegX(const int r, const char* const name) { 67 fprintf(stderr, "%s: %.8x\n", name, r); 68 } 69 70 #define PR(var, N) PrintReg(var, #var, N) 71 #define PD(var) PrintReg(var, #var); 72 #define PX(var) PrintRegX(var, #var); 73 74 #if LIBGAV1_MSAN 75 #include <sanitizer/msan_interface.h> 76 77 inline void PrintShadow(const void* r, const char* const name, 78 const size_t size) { 79 fprintf(stderr, "Shadow for %s:\n", name); 80 __msan_print_shadow(r, size); 81 } 82 #define PS(var, N) PrintShadow(var, #var, N) 83 84 #endif // LIBGAV1_MSAN 85 86 #endif // 0 87 88 namespace libgav1 { 89 namespace dsp { 90 namespace sse4 { 91 92 #include "src/dsp/x86/common_sse4.inc" 93 94 } // namespace sse4 95 96 // NOLINTBEGIN(misc-unused-using-decls) 97 // These function aliases shall not be visible to external code. They are 98 // restricted to x86/*_sse4.cc files only. This scheme exists to distinguish two 99 // possible implementations of common functions, which may differ based on 100 // whether the compiler is permitted to use avx2 instructions. 101 using sse4::Load2; 102 using sse4::Load2x2; 103 using sse4::Load4; 104 using sse4::Load4x2; 105 using sse4::LoadAligned16; 106 using sse4::LoadAligned16Msan; 107 using sse4::LoadHi8; 108 using sse4::LoadHi8Msan; 109 using sse4::LoadLo8; 110 using sse4::LoadLo8Msan; 111 using sse4::LoadUnaligned16; 112 using sse4::LoadUnaligned16Msan; 113 using sse4::MaskHighNBytes; 114 using sse4::RightShiftWithRounding_S16; 115 using sse4::RightShiftWithRounding_S32; 116 using sse4::RightShiftWithRounding_U16; 117 using sse4::RightShiftWithRounding_U32; 118 using sse4::Store2; 119 using sse4::Store4; 120 using sse4::StoreAligned16; 121 using sse4::StoreHi8; 122 using sse4::StoreLo8; 123 using sse4::StoreUnaligned16; 124 // NOLINTEND 125 126 } // namespace dsp 127 } // namespace libgav1 128 129 #endif // LIBGAV1_TARGETING_SSE4_1 130 #endif // LIBGAV1_SRC_DSP_X86_COMMON_SSE4_H_ 131