1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef BERBERIS_INTRINSICS_SIMD_REGISTER_H_ 18 #define BERBERIS_INTRINSICS_SIMD_REGISTER_H_ 19 20 #include <cstdint> 21 #include <cstring> 22 #include <tuple> 23 24 #include "berberis/base/bit_util.h" 25 #include "berberis/intrinsics/common/intrinsics_float.h" 26 27 namespace berberis { 28 29 class SIMD128Register; 30 31 /* 32 * We want to use partial specialization for SIMD128Register::[GS]et, but it's 33 * it's not allowed for class members. Use helper functions instead. 34 */ 35 template <typename T> 36 [[nodiscard]] constexpr T SIMD128RegisterGet(const SIMD128Register* reg, int index) = delete; 37 template <typename T> 38 constexpr T SIMD128RegisterSet(SIMD128Register* reg, T elem, int index) = delete; 39 40 [[nodiscard]] constexpr bool operator==(SIMD128Register lhs, SIMD128Register rhs); 41 [[nodiscard]] constexpr bool operator!=(SIMD128Register lhs, SIMD128Register rhs); 42 [[nodiscard]] constexpr SIMD128Register operator&(SIMD128Register lhs, SIMD128Register rhs); 43 [[nodiscard]] constexpr SIMD128Register operator|(SIMD128Register lhs, SIMD128Register rhs); 44 [[nodiscard]] constexpr SIMD128Register operator^(SIMD128Register lhs, SIMD128Register rhs); 45 [[nodiscard]] constexpr SIMD128Register operator~(SIMD128Register lhs); 46 47 #if defined(__GNUC__) 48 using Int8x16 = char __attribute__((__vector_size__(16), may_alias)); 49 using UInt8x16 = unsigned char __attribute__((__vector_size__(16), may_alias)); 50 using Int16x8 = short __attribute__((__vector_size__(16), may_alias)); 51 using UInt16x8 = unsigned short __attribute__((__vector_size__(16), may_alias)); 52 using Int32x4 = int __attribute__((__vector_size__(16), may_alias)); 53 using UInt32x4 = unsigned int __attribute__((__vector_size__(16), may_alias)); 54 using UInt64x2 = unsigned long long __attribute__((__vector_size__(16), may_alias)); 55 using Float64x2 = double __attribute__((__vector_size__(16), may_alias)); 56 using Int64x2 = long long __attribute__((__vector_size__(16), __aligned__(16), may_alias)); 57 using Float32x4 = float __attribute__((__vector_size__(16), __aligned__(16), may_alias)); 58 59 using UInt8x16Tuple = 60 std::tuple<uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, 61 uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t>; 62 using UInt16x8Tuple = 63 std::tuple<uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t>; 64 using UInt32x4Tuple = std::tuple<uint32_t, uint32_t, uint32_t, uint32_t>; 65 using UInt64x2Tuple = std::tuple<uint64_t, uint64_t>; 66 #endif 67 68 class SIMD128Register { 69 public: 70 // TODO(b/260725458): use explicit(sizeof(T) == 16) instead of three constructors when C++20 would 71 // be available. 72 template <typename T, typename = std::enable_if_t<sizeof(T) < 16>> 73 explicit SIMD128Register(T elem) : int8{} { 74 Set<T>(elem, 0); 75 } 76 SIMD128Register() = default; 77 SIMD128Register(const SIMD128Register&) = default; 78 SIMD128Register(SIMD128Register&&) = default; 79 SIMD128Register(UInt8x16Tuple uint8x16_tuple)80 SIMD128Register(UInt8x16Tuple uint8x16_tuple) noexcept 81 : uint8{[&uint8x16_tuple] { 82 auto [x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15] = 83 uint8x16_tuple; 84 uint8_t result[16] = { 85 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15}; 86 return std::bit_cast<Int8x16>(result); 87 }()} {} SIMD128Register(UInt16x8Tuple uint16x8_tuple)88 SIMD128Register(UInt16x8Tuple uint16x8_tuple) noexcept 89 : uint8{[&uint16x8_tuple] { 90 auto [x0, x1, x2, x3, x4, x5, x6, x7] = uint16x8_tuple; 91 uint16_t result[8] = {x0, x1, x2, x3, x4, x5, x6, x7}; 92 return std::bit_cast<Int16x8>(result); 93 }()} {} SIMD128Register(UInt32x4Tuple uint32x4_tuple)94 SIMD128Register(UInt32x4Tuple uint32x4_tuple) noexcept 95 : uint8{[&uint32x4_tuple] { 96 auto [x0, x1, x2, x3] = uint32x4_tuple; 97 uint32_t result[4] = {x0, x1, x2, x3}; 98 return std::bit_cast<Int32x4>(result); 99 }()} {} SIMD128Register(UInt64x2Tuple uint64x2_tuple)100 SIMD128Register(UInt64x2Tuple uint64x2_tuple) noexcept 101 : uint8{[&uint64x2_tuple] { 102 auto [x0, x1] = uint64x2_tuple; 103 uint64_t result[2] = {x0, x1}; 104 return std::bit_cast<Int64x2>(result); 105 }()} {} 106 107 SIMD128Register& operator=(const SIMD128Register&) = default; 108 SIMD128Register& operator=(SIMD128Register&&) = default; 109 // Note that all other constructos are not constexpr because they not compatible with notion of 110 // “active union member”. 111 // Attribute gnu::may_alias prevents UB at runtime, but doesn't make it possible to make “active 112 // union member” diffused in constexpr. 113 #if defined(__LP64__) SIMD128Register(__int128_t elem)114 constexpr SIMD128Register(__int128_t elem) : int128{(elem)} {} SIMD128Register(Int128 elem)115 constexpr SIMD128Register(Int128 elem) : int128{(elem.value)} {} SIMD128Register(SatInt128 elem)116 constexpr SIMD128Register(SatInt128 elem) : int128{(elem.value)} {} SIMD128Register(__uint128_t elem)117 constexpr SIMD128Register(__uint128_t elem) : uint128{(elem)} {} SIMD128Register(UInt128 elem)118 constexpr SIMD128Register(UInt128 elem) : uint128{(elem.value)} {} SIMD128Register(SatUInt128 elem)119 constexpr SIMD128Register(SatUInt128 elem) : uint128{(elem.value)} {} 120 #endif 121 #if defined(__GNUC__) 122 // Note: we couldn't use elem's below to directly initialize SIMD128Register (even if it works 123 // fine with __int128_t and __uint128_t), but Set works correctly if we pick correct “active 124 // union member” first. SIMD128Register(Int8x16 elem)125 constexpr SIMD128Register(Int8x16 elem) : int8{} { Set(elem); } SIMD128Register(UInt8x16 elem)126 constexpr SIMD128Register(UInt8x16 elem) : uint8{} { Set(elem); } SIMD128Register(Int16x8 elem)127 constexpr SIMD128Register(Int16x8 elem) : int16{} { Set(elem); } SIMD128Register(UInt16x8 elem)128 constexpr SIMD128Register(UInt16x8 elem) : uint16{} { Set(elem); } SIMD128Register(Int32x4 elem)129 constexpr SIMD128Register(Int32x4 elem) : int32{} { Set(elem); } SIMD128Register(UInt32x4 elem)130 constexpr SIMD128Register(UInt32x4 elem) : uint32{} { Set(elem); } SIMD128Register(UInt64x2 elem)131 constexpr SIMD128Register(UInt64x2 elem) : uint64{} { Set(elem); } SIMD128Register(Float64x2 elem)132 constexpr SIMD128Register(Float64x2 elem) : float64{} { Set(elem); } SIMD128Register(Int64x2 elem)133 constexpr SIMD128Register(Int64x2 elem) : int64{} { Set(elem); } SIMD128Register(Float32x4 elem)134 constexpr SIMD128Register(Float32x4 elem) : float32{} { Set(elem); } 135 #endif 136 137 // Generates optimal assembly for x86 and riscv. 138 template <typename T> compareVectors(T x,T y)139 static bool compareVectors(T x, T y) { 140 T res = x == y; 141 bool result = true; 142 for (int i = 0; i < int{sizeof(SIMD128Register) / sizeof(T)}; ++i) { 143 result &= res[i]; 144 } 145 return result; 146 } 147 148 template <typename T> 149 [[nodiscard]] constexpr auto Get(int index) const 150 -> std::enable_if_t<sizeof(T) < 16, std::decay_t<T>> { 151 return SIMD128RegisterGet<std::decay_t<T>>(this, index); 152 } 153 template <typename T> 154 constexpr auto Set(T elem, int index) -> std::enable_if_t<sizeof(T) < 16, std::decay_t<T>> { 155 return SIMD128RegisterSet<T>(this, elem, index); 156 } 157 template <typename T> 158 [[nodiscard]] constexpr auto Get() const -> std::enable_if_t<sizeof(T) == 16, std::decay_t<T>> { 159 return SIMD128RegisterGet<std::decay_t<T>>(this, 0); 160 } 161 template <typename T> 162 [[nodiscard]] constexpr auto Get(int index) const 163 -> std::enable_if_t<sizeof(T) == 16, std::decay_t<T>> { 164 CHECK_EQ(index, 0); 165 return SIMD128RegisterGet<std::decay_t<T>>(this, 0); 166 } 167 template <typename T> 168 constexpr auto Set(T elem) -> std::enable_if_t<sizeof(T) == 16, std::decay_t<T>> { 169 return SIMD128RegisterSet<std::decay_t<T>>(this, elem, 0); 170 } 171 template <typename T> 172 constexpr auto Set(T elem, int index) -> std::enable_if_t<sizeof(T) == 16, std::decay_t<T>> { 173 CHECK_EQ(index, 0); 174 return SIMD128RegisterSet<std::decay_t<T>>(this, elem, 0); 175 } 176 template <typename T> 177 friend bool operator==(T lhs, SIMD128Register rhs) { 178 // Note comparison of two vectors return vector of the same type. In such a case we need to 179 // merge many bools that we got. 180 if constexpr (sizeof(decltype(lhs == rhs.template Get<T>())) == sizeof(SIMD128Register)) { 181 return compareVectors(lhs, rhs.template Get<T>()); 182 } else { 183 return lhs == rhs.Get<T>(); 184 } 185 } 186 template <typename T> 187 friend bool operator!=(T lhs, SIMD128Register rhs) { 188 // Note comparison of two vectors return vector of the same type. In such a case we need to 189 // merge many bools that we got. 190 if constexpr (sizeof(decltype(lhs != rhs.template Get<T>())) == sizeof(SIMD128Register)) { 191 return !compareVectors(lhs, rhs.template Get<T>()); 192 } else { 193 return lhs != rhs.Get<T>(); 194 } 195 } 196 template <typename T> 197 friend bool operator==(SIMD128Register lhs, T rhs) { 198 // Note comparison of two vectors return vector of the same type. In such a case we need to 199 // merge many bools that we got. 200 if constexpr (sizeof(decltype(lhs.template Get<T>() == rhs)) == sizeof(SIMD128Register)) { 201 // On CPUs with _mm_movemask_epi8 (native, like on x86, or emulated, like on Power) 202 // _mm_movemask_epi8 return 0xffff if and only if all comparisons returned true. 203 return compareVectors(lhs.template Get<T>(), rhs); 204 } else { 205 return lhs.Get<T>() == rhs; 206 } 207 } 208 template <typename T> 209 friend bool operator!=(SIMD128Register lhs, T rhs) { 210 // Note comparison of two vectors return vector of the same type. In such a case we need to 211 // merge many bools that we got. 212 if constexpr (sizeof(decltype(lhs.template Get<T>() == rhs)) == sizeof(SIMD128Register)) { 213 // On CPUs with _mm_movemask_epi8 (native, like on x86, or emulated, like on Power) 214 // _mm_movemask_epi8 return 0xffff if and only if all comparisons returned true. 215 return !compareVectors(lhs.template Get<T>(), rhs); 216 } else { 217 return lhs.Get<T>() != rhs; 218 } 219 } 220 #if defined(__GNUC__) 221 friend constexpr bool operator==(SIMD128Register lhs, SIMD128Register rhs); 222 friend constexpr bool operator!=(SIMD128Register lhs, SIMD128Register rhs); 223 friend constexpr SIMD128Register operator&(SIMD128Register lhs, SIMD128Register rhs); 224 constexpr SIMD128Register& operator&=(SIMD128Register other) { return *this = *this & other; } 225 friend constexpr SIMD128Register operator|(SIMD128Register lhs, SIMD128Register rhs); 226 constexpr SIMD128Register& operator|=(SIMD128Register other) { return *this = *this | other; } 227 friend constexpr SIMD128Register operator^(SIMD128Register lhs, SIMD128Register rhs); 228 constexpr SIMD128Register& operator^=(SIMD128Register other) { return *this = *this ^ other; } 229 friend constexpr SIMD128Register operator~(SIMD128Register lhs); 230 #endif 231 232 private: 233 union { 234 #ifdef __GNUC__ 235 // Note: we are violating strict aliasing rules in the code below (Get and Set function) thus we 236 // need to mask these fields "may_alias". Unknown attributes could be silently ignored by the 237 // compiler. We protect definitions with #ifdef __GNU__ to make sure may_alias is not ignored. 238 [[gnu::vector_size(16), gnu::may_alias]] int8_t int8; 239 [[gnu::vector_size(16), gnu::may_alias]] uint8_t uint8; 240 [[gnu::vector_size(16), gnu::may_alias]] int16_t int16; 241 [[gnu::vector_size(16), gnu::may_alias]] uint16_t uint16; 242 [[gnu::vector_size(16), gnu::may_alias]] int32_t int32; 243 [[gnu::vector_size(16), gnu::may_alias]] uint32_t uint32; 244 [[gnu::vector_size(16), gnu::may_alias]] int64_t int64; 245 [[gnu::vector_size(16), gnu::may_alias]] uint64_t uint64; 246 #if defined(__LP64__) 247 [[gnu::vector_size(16), gnu::may_alias]] __int128_t int128; 248 [[gnu::vector_size(16), gnu::may_alias]] __uint128_t uint128; 249 #endif 250 // Note: we couldn't use Float32/Float64 here because [[gnu::vector]] only works with 251 // raw integer or FP-types. 252 [[gnu::vector_size(16), gnu::may_alias]] float float32; 253 [[gnu::vector_size(16), gnu::may_alias]] double float64; 254 #else 255 #error Unsupported compiler. 256 #endif 257 }; 258 template <typename T> 259 friend constexpr T SIMD128RegisterGet(const SIMD128Register* reg, int index); 260 template <typename T> 261 friend constexpr T SIMD128RegisterSet(SIMD128Register* reg, T elem, int index); 262 }; 263 264 static_assert(sizeof(SIMD128Register) == 16, "Unexpected size of SIMD128Register"); 265 266 #if defined(__i386__) || defined(__x86_64__) || defined(__riscv) || defined(__aarch64__) 267 static_assert(alignof(SIMD128Register) == 16, "Unexpected align of SIMD128Register"); 268 #else 269 #error Unsupported architecture 270 #endif 271 272 /* 273 * Partial specializations of SIMD128Register getters/setters for most types 274 * 275 * GNU C makes it possible to use unions to quickly and efficiently 276 * operate with subvalues of different types: 277 * http://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#Type-punning 278 * Unfortunately it's not a valid ANSI C code thus we always do that via 279 * Get<type>(index) and Set<type>(value, index) accessors. 280 * 281 * For other compilers one will need to use memcpy to guarantee safety. 282 */ 283 #ifdef __GNUC__ 284 #define SIMD_128_STDINT_REGISTER_GETTER_SETTER(TYPE, MEMBER) \ 285 template <> \ 286 inline TYPE SIMD128RegisterGet<TYPE>(const SIMD128Register* reg, int index) { \ 287 CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE)); \ 288 return reg->MEMBER[index]; \ 289 } \ 290 template <> \ 291 inline TYPE SIMD128RegisterSet<TYPE>(SIMD128Register * reg, TYPE elem, int index) { \ 292 CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE)); \ 293 return reg->MEMBER[index] = elem; \ 294 } 295 #define SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(TYPE, MEMBER) \ 296 template <> \ 297 inline TYPE SIMD128RegisterGet<TYPE>(const SIMD128Register* reg, int index) { \ 298 CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE)); \ 299 return {reg->MEMBER[index]}; \ 300 } \ 301 template <> \ 302 inline TYPE SIMD128RegisterSet<TYPE>(SIMD128Register * reg, TYPE elem, int index) { \ 303 CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE)); \ 304 return {reg->MEMBER[index] = elem}; \ 305 } 306 #define SIMD_128_FLOAT_REGISTER_GETTER_SETTER(TYPE, MEMBER_TYPE, MEMBER) \ 307 template <> \ 308 inline TYPE SIMD128RegisterGet<TYPE>(const SIMD128Register* reg, int index) { \ 309 CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE)); \ 310 static_assert(sizeof(TYPE) == sizeof(MEMBER_TYPE)); \ 311 /* Don't use bit_cast because it's unsafe if -O0 is used. */ \ 312 /* See intrinsics_float.h for explanation. */ \ 313 TYPE elem; \ 314 MEMBER_TYPE melem; \ 315 melem = reg->MEMBER[index]; \ 316 memcpy(&elem, &melem, sizeof(TYPE)); \ 317 return elem; \ 318 } \ 319 template <> \ 320 inline TYPE SIMD128RegisterSet<TYPE>(SIMD128Register * reg, TYPE elem, int index) { \ 321 CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE)); \ 322 static_assert(sizeof(TYPE) == sizeof(MEMBER_TYPE)); \ 323 /* Don't use bit_cast because it's unsafe if -O0 is used. */ \ 324 /* See intrinsics_float.h for explanation. */ \ 325 MEMBER_TYPE melem; \ 326 memcpy(&melem, &elem, sizeof(TYPE)); \ 327 reg->MEMBER[index] = melem; \ 328 return elem; \ 329 } 330 #define SIMD_128_FULL_REGISTER_GETTER_SETTER(TYPE, MEMBER) \ 331 template <> \ 332 constexpr TYPE SIMD128RegisterGet<TYPE>(const SIMD128Register* reg, int index) { \ 333 CHECK_EQ(index, 0); \ 334 return reg->MEMBER; \ 335 } \ 336 template <> \ 337 constexpr TYPE SIMD128RegisterSet<TYPE>(SIMD128Register * reg, TYPE elem, int index) { \ 338 CHECK_EQ(index, 0); \ 339 return reg->MEMBER = elem; \ 340 } 341 #endif 342 SIMD_128_STDINT_REGISTER_GETTER_SETTER(int8_t, int8); 343 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(RawInt8, uint8); 344 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(Int8, int8); 345 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatInt8, int8); 346 SIMD_128_STDINT_REGISTER_GETTER_SETTER(uint8_t, uint8); 347 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(UInt8, uint8); 348 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatUInt8, uint8); 349 SIMD_128_STDINT_REGISTER_GETTER_SETTER(int16_t, int16); 350 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(RawInt16, uint16); 351 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(Int16, int16); 352 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatInt16, int16); 353 SIMD_128_STDINT_REGISTER_GETTER_SETTER(uint16_t, uint16); 354 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(UInt16, uint16); 355 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatUInt16, uint16); 356 SIMD_128_STDINT_REGISTER_GETTER_SETTER(int32_t, int32); 357 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(RawInt32, uint32); 358 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(Int32, int32); 359 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatInt32, int32); 360 SIMD_128_STDINT_REGISTER_GETTER_SETTER(uint32_t, uint32); 361 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(UInt32, uint32); 362 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatUInt32, uint32); 363 SIMD_128_STDINT_REGISTER_GETTER_SETTER(int64_t, int64); 364 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(RawInt64, uint64); 365 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(Int64, int64); 366 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatInt64, int64); 367 SIMD_128_STDINT_REGISTER_GETTER_SETTER(uint64_t, uint64); 368 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(UInt64, uint64); 369 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatUInt64, uint64); 370 #if defined(__LP64__) 371 SIMD_128_STDINT_REGISTER_GETTER_SETTER(__int128_t, int128); 372 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(RawInt128, uint128); 373 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(Int128, int128); 374 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatInt128, int128); 375 SIMD_128_STDINT_REGISTER_GETTER_SETTER(__uint128_t, uint128); 376 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(UInt128, uint128); 377 SIMD_128_SAFEINT_REGISTER_GETTER_SETTER(SatUInt128, uint128); 378 #endif 379 #if defined(__GNUC__) 380 SIMD_128_FULL_REGISTER_GETTER_SETTER(Int8x16, int8); 381 SIMD_128_FULL_REGISTER_GETTER_SETTER(UInt8x16, uint8); 382 SIMD_128_FULL_REGISTER_GETTER_SETTER(Int16x8, int16); 383 SIMD_128_FULL_REGISTER_GETTER_SETTER(UInt16x8, uint16); 384 SIMD_128_FULL_REGISTER_GETTER_SETTER(Int32x4, int32); 385 SIMD_128_FULL_REGISTER_GETTER_SETTER(UInt32x4, uint32); 386 SIMD_128_FULL_REGISTER_GETTER_SETTER(UInt64x2, uint64); 387 SIMD_128_FULL_REGISTER_GETTER_SETTER(Float64x2, float64); 388 SIMD_128_FULL_REGISTER_GETTER_SETTER(Int64x2, int64); 389 SIMD_128_FULL_REGISTER_GETTER_SETTER(Float32x4, float32); 390 #endif 391 SIMD_128_FLOAT_REGISTER_GETTER_SETTER(intrinsics::Float32, float, float32); 392 SIMD_128_FLOAT_REGISTER_GETTER_SETTER(intrinsics::Float64, double, float64); 393 #undef SIMD_128_FULL_REGISTER_GETTER_SETTER 394 #undef SIMD_128_fLOAT_REGISTER_GETTER_SETTER 395 #undef SIMD_128_SAFEINT_REGISTER_GETTER_SETTER 396 #undef SIMD_128_STDINT_REGISTER_GETTER_SETTER 397 398 #if defined(__GNUC__) 399 [[nodiscard]] constexpr bool operator==(SIMD128Register lhs, SIMD128Register rhs) { 400 // Note comparison of two vectors return vector of the same type. In such a case we need to 401 // merge many bools that we got. 402 // On CPUs with _mm_movemask_epi8 (native, like on x86, or emulated, like on Power) 403 // _mm_movemask_epi8 return 0xffff if and only if all comparisons returned true. 404 return SIMD128Register::compareVectors(lhs.Get<Int64x2>(), rhs.Get<Int64x2>()); 405 } 406 [[nodiscard]] constexpr bool operator!=(SIMD128Register lhs, SIMD128Register rhs) { 407 // Note comparison of two vectors return vector of the same type. In such a case we need to 408 // merge many bools that we got. 409 // On CPUs with _mm_movemask_epi8 (native, like on x86, or emulated, like on Power) 410 // _mm_movemask_epi8 return 0xffff if and only if all comparisons returned true. 411 return !SIMD128Register::compareVectors(lhs.Get<Int64x2>(), rhs.Get<Int64x2>()); 412 } 413 [[nodiscard]] constexpr SIMD128Register operator&(SIMD128Register lhs, SIMD128Register rhs) { 414 return lhs.Get<Int64x2>() & rhs.Get<Int64x2>(); 415 } 416 [[nodiscard]] constexpr SIMD128Register operator|(SIMD128Register lhs, SIMD128Register rhs) { 417 return lhs.Get<Int64x2>() | rhs.Get<Int64x2>(); 418 } 419 [[nodiscard]] constexpr SIMD128Register operator^(SIMD128Register lhs, SIMD128Register rhs) { 420 return lhs.Get<Int64x2>() ^ rhs.Get<Int64x2>(); 421 } 422 [[nodiscard]] constexpr SIMD128Register operator~(SIMD128Register lhs) { 423 return ~lhs.Get<Int64x2>(); 424 } 425 #endif 426 427 } // namespace berberis 428 429 #endif // BERBERIS_INTRINSICS_SIMD_REGISTER_H_ 430