1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_INTRINSICS_SIMD_REGISTER_H_
18 #define BERBERIS_INTRINSICS_SIMD_REGISTER_H_
19 
20 #include <cstdint>
21 #include <cstring>
22 #include <tuple>
23 
24 #include "berberis/base/bit_util.h"
25 #include "berberis/intrinsics/common/intrinsics_float.h"
26 
27 namespace berberis {
28 
29 class SIMD128Register;
30 
31 /*
32  * We want to use partial specialization for SIMD128Register::[GS]et, but it's
33  * it's not allowed for class members.  Use helper functions instead.
34  */
35 template <typename T>
36 [[nodiscard]] constexpr T SIMD128RegisterGet(const SIMD128Register* reg, int index) = delete;
37 template <typename T>
38 constexpr T SIMD128RegisterSet(SIMD128Register* reg, T elem, int index) = delete;
39 
40 [[nodiscard]] constexpr bool operator==(SIMD128Register lhs, SIMD128Register rhs);
41 [[nodiscard]] constexpr bool operator!=(SIMD128Register lhs, SIMD128Register rhs);
42 [[nodiscard]] constexpr SIMD128Register operator&(SIMD128Register lhs, SIMD128Register rhs);
43 [[nodiscard]] constexpr SIMD128Register operator|(SIMD128Register lhs, SIMD128Register rhs);
44 [[nodiscard]] constexpr SIMD128Register operator^(SIMD128Register lhs, SIMD128Register rhs);
45 [[nodiscard]] constexpr SIMD128Register operator~(SIMD128Register lhs);
46 
47 #if defined(__GNUC__)
48 using Int8x16 = char __attribute__((__vector_size__(16), may_alias));
49 using UInt8x16 = unsigned char __attribute__((__vector_size__(16), may_alias));
50 using Int16x8 = short __attribute__((__vector_size__(16), may_alias));
51 using UInt16x8 = unsigned short __attribute__((__vector_size__(16), may_alias));
52 using Int32x4 = int __attribute__((__vector_size__(16), may_alias));
53 using UInt32x4 = unsigned int __attribute__((__vector_size__(16), may_alias));
54 using UInt64x2 = unsigned long long __attribute__((__vector_size__(16), may_alias));
55 using Float64x2 = double __attribute__((__vector_size__(16), may_alias));
56 using Int64x2 = long long __attribute__((__vector_size__(16), __aligned__(16), may_alias));
57 using Float32x4 = float __attribute__((__vector_size__(16), __aligned__(16), may_alias));
58 
59 using UInt8x16Tuple =
60     std::tuple<uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
61                uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t>;
62 using UInt16x8Tuple =
63     std::tuple<uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t>;
64 using UInt32x4Tuple = std::tuple<uint32_t, uint32_t, uint32_t, uint32_t>;
65 using UInt64x2Tuple = std::tuple<uint64_t, uint64_t>;
66 #endif
67 
68 class SIMD128Register {
69  public:
70   // TODO(b/260725458): use explicit(sizeof(T) == 16) instead of three constructors when C++20 would
71   // be available.
72   template <typename T, typename = std::enable_if_t<sizeof(T) < 16>>
73   explicit SIMD128Register(T elem) : int8{} {
74     Set<T>(elem, 0);
75   }
76   SIMD128Register() = default;
77   SIMD128Register(const SIMD128Register&) = default;
78   SIMD128Register(SIMD128Register&&) = default;
79 
SIMD128Register(UInt8x16Tuple uint8x16_tuple)80   SIMD128Register(UInt8x16Tuple uint8x16_tuple) noexcept
81       : uint8{[&uint8x16_tuple] {
82           auto [x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15] =
83               uint8x16_tuple;
84           uint8_t result[16] = {
85               x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15};
86           return std::bit_cast<Int8x16>(result);
87         }()} {}
SIMD128Register(UInt16x8Tuple uint16x8_tuple)88   SIMD128Register(UInt16x8Tuple uint16x8_tuple) noexcept
89       : uint8{[&uint16x8_tuple] {
90           auto [x0, x1, x2, x3, x4, x5, x6, x7] = uint16x8_tuple;
91           uint16_t result[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
92           return std::bit_cast<Int16x8>(result);
93         }()} {}
SIMD128Register(UInt32x4Tuple uint32x4_tuple)94   SIMD128Register(UInt32x4Tuple uint32x4_tuple) noexcept
95       : uint8{[&uint32x4_tuple] {
96           auto [x0, x1, x2, x3] = uint32x4_tuple;
97           uint32_t result[4] = {x0, x1, x2, x3};
98           return std::bit_cast<Int32x4>(result);
99         }()} {}
SIMD128Register(UInt64x2Tuple uint64x2_tuple)100   SIMD128Register(UInt64x2Tuple uint64x2_tuple) noexcept
101       : uint8{[&uint64x2_tuple] {
102           auto [x0, x1] = uint64x2_tuple;
103           uint64_t result[2] = {x0, x1};
104           return std::bit_cast<Int64x2>(result);
105         }()} {}
106 
107   SIMD128Register& operator=(const SIMD128Register&) = default;
108   SIMD128Register& operator=(SIMD128Register&&) = default;
109   // Note that all other constructos are not constexpr because they not compatible with notion of
110   // “active union member”.
111   // Attribute gnu::may_alias prevents UB at runtime, but doesn't make it possible to make “active
112   // union member” diffused in constexpr.
113 #if defined(__LP64__)
SIMD128Register(__int128_t elem)114   constexpr SIMD128Register(__int128_t elem) : int128{(elem)} {}
SIMD128Register(Int128 elem)115   constexpr SIMD128Register(Int128 elem) : int128{(elem.value)} {}
SIMD128Register(SatInt128 elem)116   constexpr SIMD128Register(SatInt128 elem) : int128{(elem.value)} {}
SIMD128Register(__uint128_t elem)117   constexpr SIMD128Register(__uint128_t elem) : uint128{(elem)} {}
SIMD128Register(UInt128 elem)118   constexpr SIMD128Register(UInt128 elem) : uint128{(elem.value)} {}
SIMD128Register(SatUInt128 elem)119   constexpr SIMD128Register(SatUInt128 elem) : uint128{(elem.value)} {}
120 #endif
121 #if defined(__GNUC__)
122   // Note: we couldn't use elem's below to directly initialize SIMD128Register (even if it works
123   // fine with __int128_t and __uint128_t), but Set works correctly if we pick correct “active
124   // union member” first.
SIMD128Register(Int8x16 elem)125   constexpr SIMD128Register(Int8x16 elem) : int8{} { Set(elem); }
SIMD128Register(UInt8x16 elem)126   constexpr SIMD128Register(UInt8x16 elem) : uint8{} { Set(elem); }
SIMD128Register(Int16x8 elem)127   constexpr SIMD128Register(Int16x8 elem) : int16{} { Set(elem); }
SIMD128Register(UInt16x8 elem)128   constexpr SIMD128Register(UInt16x8 elem) : uint16{} { Set(elem); }
SIMD128Register(Int32x4 elem)129   constexpr SIMD128Register(Int32x4 elem) : int32{} { Set(elem); }
SIMD128Register(UInt32x4 elem)130   constexpr SIMD128Register(UInt32x4 elem) : uint32{} { Set(elem); }
SIMD128Register(UInt64x2 elem)131   constexpr SIMD128Register(UInt64x2 elem) : uint64{} { Set(elem); }
SIMD128Register(Float64x2 elem)132   constexpr SIMD128Register(Float64x2 elem) : float64{} { Set(elem); }
SIMD128Register(Int64x2 elem)133   constexpr SIMD128Register(Int64x2 elem) : int64{} { Set(elem); }
SIMD128Register(Float32x4 elem)134   constexpr SIMD128Register(Float32x4 elem) : float32{} { Set(elem); }
135 #endif
136 
137   // Generates optimal assembly for x86 and riscv.
138   template <typename T>
compareVectors(T x,T y)139   static bool compareVectors(T x, T y) {
140     T res = x == y;
141     bool result = true;
142     for (int i = 0; i < int{sizeof(SIMD128Register) / sizeof(T)}; ++i) {
143       result &= res[i];
144     }
145     return result;
146   }
147 
148   template <typename T>
149   [[nodiscard]] constexpr auto Get(int index) const
150       -> std::enable_if_t<sizeof(T) < 16, std::decay_t<T>> {
151     return SIMD128RegisterGet<std::decay_t<T>>(this, index);
152   }
153   template <typename T>
154   constexpr auto Set(T elem, int index) -> std::enable_if_t<sizeof(T) < 16, std::decay_t<T>> {
155     return SIMD128RegisterSet<T>(this, elem, index);
156   }
157   template <typename T>
158   [[nodiscard]] constexpr auto Get() const -> std::enable_if_t<sizeof(T) == 16, std::decay_t<T>> {
159     return SIMD128RegisterGet<std::decay_t<T>>(this, 0);
160   }
161   template <typename T>
162   [[nodiscard]] constexpr auto Get(int index) const
163       -> std::enable_if_t<sizeof(T) == 16, std::decay_t<T>> {
164     CHECK_EQ(index, 0);
165     return SIMD128RegisterGet<std::decay_t<T>>(this, 0);
166   }
167   template <typename T>
168   constexpr auto Set(T elem) -> std::enable_if_t<sizeof(T) == 16, std::decay_t<T>> {
169     return SIMD128RegisterSet<std::decay_t<T>>(this, elem, 0);
170   }
171   template <typename T>
172   constexpr auto Set(T elem, int index) -> std::enable_if_t<sizeof(T) == 16, std::decay_t<T>> {
173     CHECK_EQ(index, 0);
174     return SIMD128RegisterSet<std::decay_t<T>>(this, elem, 0);
175   }
176   template <typename T>
177   friend bool operator==(T lhs, SIMD128Register rhs) {
178     // Note comparison of two vectors return vector of the same type. In such a case we need to
179     // merge many bools that we got.
180     if constexpr (sizeof(decltype(lhs == rhs.template Get<T>())) == sizeof(SIMD128Register)) {
181       return compareVectors(lhs, rhs.template Get<T>());
182     } else {
183       return lhs == rhs.Get<T>();
184     }
185   }
186   template <typename T>
187   friend bool operator!=(T lhs, SIMD128Register rhs) {
188     // Note comparison of two vectors return vector of the same type. In such a case we need to
189     // merge many bools that we got.
190     if constexpr (sizeof(decltype(lhs != rhs.template Get<T>())) == sizeof(SIMD128Register)) {
191       return !compareVectors(lhs, rhs.template Get<T>());
192     } else {
193       return lhs != rhs.Get<T>();
194     }
195   }
196   template <typename T>
197   friend bool operator==(SIMD128Register lhs, T rhs) {
198     // Note comparison of two vectors return vector of the same type. In such a case we need to
199     // merge many bools that we got.
200     if constexpr (sizeof(decltype(lhs.template Get<T>() == rhs)) == sizeof(SIMD128Register)) {
201       // On CPUs with _mm_movemask_epi8 (native, like on x86, or emulated, like on Power)
202       // _mm_movemask_epi8 return 0xffff if and only if all comparisons returned true.
203       return compareVectors(lhs.template Get<T>(), rhs);
204     } else {
205       return lhs.Get<T>() == rhs;
206     }
207   }
208   template <typename T>
209   friend bool operator!=(SIMD128Register lhs, T rhs) {
210     // Note comparison of two vectors return vector of the same type. In such a case we need to
211     // merge many bools that we got.
212     if constexpr (sizeof(decltype(lhs.template Get<T>() == rhs)) == sizeof(SIMD128Register)) {
213       // On CPUs with _mm_movemask_epi8 (native, like on x86, or emulated, like on Power)
214       // _mm_movemask_epi8 return 0xffff if and only if all comparisons returned true.
215       return !compareVectors(lhs.template Get<T>(), rhs);
216     } else {
217       return lhs.Get<T>() != rhs;
218     }
219   }
220 #if defined(__GNUC__)
221   friend constexpr bool operator==(SIMD128Register lhs, SIMD128Register rhs);
222   friend constexpr bool operator!=(SIMD128Register lhs, SIMD128Register rhs);
223   friend constexpr SIMD128Register operator&(SIMD128Register lhs, SIMD128Register rhs);
224   constexpr SIMD128Register& operator&=(SIMD128Register other) { return *this = *this & other; }
225   friend constexpr SIMD128Register operator|(SIMD128Register lhs, SIMD128Register rhs);
226   constexpr SIMD128Register& operator|=(SIMD128Register other) { return *this = *this | other; }
227   friend constexpr SIMD128Register operator^(SIMD128Register lhs, SIMD128Register rhs);
228   constexpr SIMD128Register& operator^=(SIMD128Register other) { return *this = *this ^ other; }
229   friend constexpr SIMD128Register operator~(SIMD128Register lhs);
230 #endif
231 
232  private:
233   union {
234 #ifdef __GNUC__
235     // Note: we are violating strict aliasing rules in the code below (Get and Set function) thus we
236     // need to mask these fields "may_alias". Unknown attributes could be silently ignored by the
237     // compiler. We protect definitions with #ifdef __GNU__ to make sure may_alias is not ignored.
238     [[gnu::vector_size(16), gnu::may_alias]] int8_t int8;
239     [[gnu::vector_size(16), gnu::may_alias]] uint8_t uint8;
240     [[gnu::vector_size(16), gnu::may_alias]] int16_t int16;
241     [[gnu::vector_size(16), gnu::may_alias]] uint16_t uint16;
242     [[gnu::vector_size(16), gnu::may_alias]] int32_t int32;
243     [[gnu::vector_size(16), gnu::may_alias]] uint32_t uint32;
244     [[gnu::vector_size(16), gnu::may_alias]] int64_t int64;
245     [[gnu::vector_size(16), gnu::may_alias]] uint64_t uint64;
246 #if defined(__LP64__)
247     [[gnu::vector_size(16), gnu::may_alias]] __int128_t int128;
248     [[gnu::vector_size(16), gnu::may_alias]] __uint128_t uint128;
249 #endif
250     // Note: we couldn't use Float32/Float64 here because [[gnu::vector]] only works with
251     // raw integer or FP-types.
252     [[gnu::vector_size(16), gnu::may_alias]] float float32;
253     [[gnu::vector_size(16), gnu::may_alias]] double float64;
254 #else
255 #error Unsupported compiler.
256 #endif
257   };
258   template <typename T>
259   friend constexpr T SIMD128RegisterGet(const SIMD128Register* reg, int index);
260   template <typename T>
261   friend constexpr T SIMD128RegisterSet(SIMD128Register* reg, T elem, int index);
262 };
263 
264 static_assert(sizeof(SIMD128Register) == 16, "Unexpected size of SIMD128Register");
265 
266 #if defined(__i386__) || defined(__x86_64__) || defined(__riscv) || defined(__aarch64__)
267 static_assert(alignof(SIMD128Register) == 16, "Unexpected align of SIMD128Register");
268 #else
269 #error Unsupported architecture
270 #endif
271 
272 /*
273  * Partial specializations of SIMD128Register getters/setters for most types
274  *
275  * GNU C makes it possible to use unions to quickly and efficiently
276  * operate with subvalues of different types:
277  *   http://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#Type-punning
278  * Unfortunately it's not a valid ANSI C code thus we always do that via
279  * Get<type>(index) and Set<type>(value, index) accessors.
280  *
281  * For other compilers one will need to use memcpy to guarantee safety.
282  */
283 #ifdef __GNUC__
284 #define SIMD_128_STDINT_REGISTER_GETTER_SETTER(TYPE, MEMBER)                          \
285   template <>                                                                         \
286   inline TYPE SIMD128RegisterGet<TYPE>(const SIMD128Register* reg, int index) {       \
287     CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE));                           \
288     return reg->MEMBER[index];                                                        \
289   }                                                                                   \
290   template <>                                                                         \
291   inline TYPE SIMD128RegisterSet<TYPE>(SIMD128Register * reg, TYPE elem, int index) { \
292     CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE));                           \
293     return reg->MEMBER[index] = elem;                                                 \
294   }
295 #define SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(TYPE, MEMBER)                         \
296   template <>                                                                         \
297   inline TYPE SIMD128RegisterGet<TYPE>(const SIMD128Register* reg, int index) {       \
298     CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE));                           \
299     return {reg->MEMBER[index]};                                                      \
300   }                                                                                   \
301   template <>                                                                         \
302   inline TYPE SIMD128RegisterSet<TYPE>(SIMD128Register * reg, TYPE elem, int index) { \
303     CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE));                           \
304     return {reg->MEMBER[index] = elem};                                               \
305   }
306 #define SIMD_128_FLOAT_REGISTER_GETTER_SETTER(TYPE, MEMBER_TYPE, MEMBER)              \
307   template <>                                                                         \
308   inline TYPE SIMD128RegisterGet<TYPE>(const SIMD128Register* reg, int index) {       \
309     CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE));                           \
310     static_assert(sizeof(TYPE) == sizeof(MEMBER_TYPE));                               \
311     /* Don't use bit_cast because it's unsafe if -O0 is used. */                      \
312     /* See intrinsics_float.h for explanation. */                                     \
313     TYPE elem;                                                                        \
314     MEMBER_TYPE melem;                                                                \
315     melem = reg->MEMBER[index];                                                       \
316     memcpy(&elem, &melem, sizeof(TYPE));                                              \
317     return elem;                                                                      \
318   }                                                                                   \
319   template <>                                                                         \
320   inline TYPE SIMD128RegisterSet<TYPE>(SIMD128Register * reg, TYPE elem, int index) { \
321     CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE));                           \
322     static_assert(sizeof(TYPE) == sizeof(MEMBER_TYPE));                               \
323     /* Don't use bit_cast because it's unsafe if -O0 is used. */                      \
324     /* See intrinsics_float.h for explanation. */                                     \
325     MEMBER_TYPE melem;                                                                \
326     memcpy(&melem, &elem, sizeof(TYPE));                                              \
327     reg->MEMBER[index] = melem;                                                       \
328     return elem;                                                                      \
329   }
330 #define SIMD_128_FULL_REGISTER_GETTER_SETTER(TYPE, MEMBER)                               \
331   template <>                                                                            \
332   constexpr TYPE SIMD128RegisterGet<TYPE>(const SIMD128Register* reg, int index) {       \
333     CHECK_EQ(index, 0);                                                                  \
334     return reg->MEMBER;                                                                  \
335   }                                                                                      \
336   template <>                                                                            \
337   constexpr TYPE SIMD128RegisterSet<TYPE>(SIMD128Register * reg, TYPE elem, int index) { \
338     CHECK_EQ(index, 0);                                                                  \
339     return reg->MEMBER = elem;                                                           \
340   }
341 #endif
342 SIMD_128_STDINT_REGISTER_GETTER_SETTER(int8_t, int8);
343 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(RawInt8, uint8);
344 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(Int8, int8);
345 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatInt8, int8);
346 SIMD_128_STDINT_REGISTER_GETTER_SETTER(uint8_t, uint8);
347 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(UInt8, uint8);
348 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatUInt8, uint8);
349 SIMD_128_STDINT_REGISTER_GETTER_SETTER(int16_t, int16);
350 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(RawInt16, uint16);
351 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(Int16, int16);
352 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatInt16, int16);
353 SIMD_128_STDINT_REGISTER_GETTER_SETTER(uint16_t, uint16);
354 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(UInt16, uint16);
355 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatUInt16, uint16);
356 SIMD_128_STDINT_REGISTER_GETTER_SETTER(int32_t, int32);
357 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(RawInt32, uint32);
358 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(Int32, int32);
359 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatInt32, int32);
360 SIMD_128_STDINT_REGISTER_GETTER_SETTER(uint32_t, uint32);
361 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(UInt32, uint32);
362 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatUInt32, uint32);
363 SIMD_128_STDINT_REGISTER_GETTER_SETTER(int64_t, int64);
364 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(RawInt64, uint64);
365 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(Int64, int64);
366 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatInt64, int64);
367 SIMD_128_STDINT_REGISTER_GETTER_SETTER(uint64_t, uint64);
368 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(UInt64, uint64);
369 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatUInt64, uint64);
370 #if defined(__LP64__)
371 SIMD_128_STDINT_REGISTER_GETTER_SETTER(__int128_t, int128);
372 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(RawInt128, uint128);
373 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(Int128, int128);
374 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatInt128, int128);
375 SIMD_128_STDINT_REGISTER_GETTER_SETTER(__uint128_t, uint128);
376 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(UInt128, uint128);
377 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatUInt128, uint128);
378 #endif
379 #if defined(__GNUC__)
380 SIMD_128_FULL_REGISTER_GETTER_SETTER(Int8x16, int8);
381 SIMD_128_FULL_REGISTER_GETTER_SETTER(UInt8x16, uint8);
382 SIMD_128_FULL_REGISTER_GETTER_SETTER(Int16x8, int16);
383 SIMD_128_FULL_REGISTER_GETTER_SETTER(UInt16x8, uint16);
384 SIMD_128_FULL_REGISTER_GETTER_SETTER(Int32x4, int32);
385 SIMD_128_FULL_REGISTER_GETTER_SETTER(UInt32x4, uint32);
386 SIMD_128_FULL_REGISTER_GETTER_SETTER(UInt64x2, uint64);
387 SIMD_128_FULL_REGISTER_GETTER_SETTER(Float64x2, float64);
388 SIMD_128_FULL_REGISTER_GETTER_SETTER(Int64x2, int64);
389 SIMD_128_FULL_REGISTER_GETTER_SETTER(Float32x4, float32);
390 #endif
391 SIMD_128_FLOAT_REGISTER_GETTER_SETTER(intrinsics::Float32, float, float32);
392 SIMD_128_FLOAT_REGISTER_GETTER_SETTER(intrinsics::Float64, double, float64);
393 #undef SIMD_128_FULL_REGISTER_GETTER_SETTER
394 #undef SIMD_128_fLOAT_REGISTER_GETTER_SETTER
395 #undef SIMD_128_SAFEINT_REGISTER_GETTER_SETTER
396 #undef SIMD_128_STDINT_REGISTER_GETTER_SETTER
397 
398 #if defined(__GNUC__)
399 [[nodiscard]] constexpr bool operator==(SIMD128Register lhs, SIMD128Register rhs) {
400   // Note comparison of two vectors return vector of the same type. In such a case we need to
401   // merge many bools that we got.
402   // On CPUs with _mm_movemask_epi8 (native, like on x86, or emulated, like on Power)
403   // _mm_movemask_epi8 return 0xffff if and only if all comparisons returned true.
404   return SIMD128Register::compareVectors(lhs.Get<Int64x2>(), rhs.Get<Int64x2>());
405 }
406 [[nodiscard]] constexpr bool operator!=(SIMD128Register lhs, SIMD128Register rhs) {
407   // Note comparison of two vectors return vector of the same type. In such a case we need to
408   // merge many bools that we got.
409   // On CPUs with _mm_movemask_epi8 (native, like on x86, or emulated, like on Power)
410   // _mm_movemask_epi8 return 0xffff if and only if all comparisons returned true.
411   return !SIMD128Register::compareVectors(lhs.Get<Int64x2>(), rhs.Get<Int64x2>());
412 }
413 [[nodiscard]] constexpr SIMD128Register operator&(SIMD128Register lhs, SIMD128Register rhs) {
414   return lhs.Get<Int64x2>() & rhs.Get<Int64x2>();
415 }
416 [[nodiscard]] constexpr SIMD128Register operator|(SIMD128Register lhs, SIMD128Register rhs) {
417   return lhs.Get<Int64x2>() | rhs.Get<Int64x2>();
418 }
419 [[nodiscard]] constexpr SIMD128Register operator^(SIMD128Register lhs, SIMD128Register rhs) {
420   return lhs.Get<Int64x2>() ^ rhs.Get<Int64x2>();
421 }
422 [[nodiscard]] constexpr SIMD128Register operator~(SIMD128Register lhs) {
423   return ~lhs.Get<Int64x2>();
424 }
425 #endif
426 
427 }  // namespace berberis
428 
429 #endif  // BERBERIS_INTRINSICS_SIMD_REGISTER_H_
430