xref: /aosp_15_r20/external/libgav1/src/dsp/x86/common_sse4.h (revision 095378508e87ed692bf8dfeb34008b65b3735891)
1 /*
2  * Copyright 2019 The libgav1 Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBGAV1_SRC_DSP_X86_COMMON_SSE4_H_
18 #define LIBGAV1_SRC_DSP_X86_COMMON_SSE4_H_
19 
20 #include "src/utils/compiler_attributes.h"
21 #include "src/utils/cpu.h"
22 
23 #if LIBGAV1_TARGETING_SSE4_1
24 
25 #include <emmintrin.h>
26 #include <smmintrin.h>
27 
28 #include <cassert>
29 #include <cstddef>
30 #include <cstdint>
31 #include <cstring>
32 
33 #if 0
34 #include <cinttypes>
35 #include <cstdio>
36 
37 // Quite useful macro for debugging. Left here for convenience.
38 inline void PrintReg(const __m128i r, const char* const name, int size) {
39   int n;
40   union {
41     __m128i r;
42     uint8_t i8[16];
43     uint16_t i16[8];
44     uint32_t i32[4];
45     uint64_t i64[2];
46   } tmp;
47   tmp.r = r;
48   fprintf(stderr, "%s\t: ", name);
49   if (size == 8) {
50     for (n = 0; n < 16; ++n) fprintf(stderr, "%.2x ", tmp.i8[n]);
51   } else if (size == 16) {
52     for (n = 0; n < 8; ++n) fprintf(stderr, "%.4x ", tmp.i16[n]);
53   } else if (size == 32) {
54     for (n = 0; n < 4; ++n) fprintf(stderr, "%.8x ", tmp.i32[n]);
55   } else {
56     for (n = 0; n < 2; ++n)
57       fprintf(stderr, "%.16" PRIx64 " ", static_cast<uint64_t>(tmp.i64[n]));
58   }
59   fprintf(stderr, "\n");
60 }
61 
62 inline void PrintReg(const int r, const char* const name) {
63   fprintf(stderr, "%s: %d\n", name, r);
64 }
65 
66 inline void PrintRegX(const int r, const char* const name) {
67   fprintf(stderr, "%s: %.8x\n", name, r);
68 }
69 
70 #define PR(var, N) PrintReg(var, #var, N)
71 #define PD(var) PrintReg(var, #var);
72 #define PX(var) PrintRegX(var, #var);
73 
74 #if LIBGAV1_MSAN
75 #include <sanitizer/msan_interface.h>
76 
77 inline void PrintShadow(const void* r, const char* const name,
78                         const size_t size) {
79   fprintf(stderr, "Shadow for %s:\n", name);
80   __msan_print_shadow(r, size);
81 }
82 #define PS(var, N) PrintShadow(var, #var, N)
83 
84 #endif  // LIBGAV1_MSAN
85 
86 #endif  // 0
87 
88 namespace libgav1 {
89 namespace dsp {
90 namespace sse4 {
91 
92 #include "src/dsp/x86/common_sse4.inc"
93 
94 }  // namespace sse4
95 
96 // NOLINTBEGIN(misc-unused-using-decls)
97 // These function aliases shall not be visible to external code. They are
98 // restricted to x86/*_sse4.cc files only. This scheme exists to distinguish two
99 // possible implementations of common functions, which may differ based on
100 // whether the compiler is permitted to use avx2 instructions.
101 using sse4::Load2;
102 using sse4::Load2x2;
103 using sse4::Load4;
104 using sse4::Load4x2;
105 using sse4::LoadAligned16;
106 using sse4::LoadAligned16Msan;
107 using sse4::LoadHi8;
108 using sse4::LoadHi8Msan;
109 using sse4::LoadLo8;
110 using sse4::LoadLo8Msan;
111 using sse4::LoadUnaligned16;
112 using sse4::LoadUnaligned16Msan;
113 using sse4::MaskHighNBytes;
114 using sse4::RightShiftWithRounding_S16;
115 using sse4::RightShiftWithRounding_S32;
116 using sse4::RightShiftWithRounding_U16;
117 using sse4::RightShiftWithRounding_U32;
118 using sse4::Store2;
119 using sse4::Store4;
120 using sse4::StoreAligned16;
121 using sse4::StoreHi8;
122 using sse4::StoreLo8;
123 using sse4::StoreUnaligned16;
124 // NOLINTEND
125 
126 }  // namespace dsp
127 }  // namespace libgav1
128 
129 #endif  // LIBGAV1_TARGETING_SSE4_1
130 #endif  // LIBGAV1_SRC_DSP_X86_COMMON_SSE4_H_
131