xref: /aosp_15_r20/external/executorch/kernels/optimized/vec/intrinsics.h (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #pragma once
10 #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
11 /* GCC or clang-compatible compiler, targeting x86/x86-64 */
12 #include <x86intrin.h>
13 #elif defined(__clang__) && (defined(__ARM_NEON__) || defined(__aarch64__))
14 /* Clang-compatible compiler, targeting arm neon */
15 #include <arm_neon.h>
16 #elif defined(_MSC_VER)
17 /* Microsoft C/C++-compatible compiler */
18 #include <intrin.h>
19 #if _MSC_VER <= 1900
20 #define _mm256_extract_epi64(X, Y) (_mm_extract_epi64(_mm256_extractf128_si256(X, Y >> 1), Y % 2))
21 #define _mm256_extract_epi32(X, Y) (_mm_extract_epi32(_mm256_extractf128_si256(X, Y >> 2), Y % 4))
22 #define _mm256_extract_epi16(X, Y) (_mm_extract_epi16(_mm256_extractf128_si256(X, Y >> 3), Y % 8))
23 #define _mm256_extract_epi8(X, Y) (_mm_extract_epi8(_mm256_extractf128_si256(X, Y >> 4), Y % 16))
24 #endif
25 #elif defined(__GNUC__) && (defined(__ARM_NEON__) || defined(__aarch64__))
26 /* GCC-compatible compiler, targeting ARM with NEON */
27 #include <arm_neon.h>
28 #if defined (MISSING_ARM_VLD1)
29 #include <executorch/kernels/optimized/vec/vec256/missing_vld1_neon.h>
30 #elif defined (MISSING_ARM_VST1)
31 #include <executorch/kernels/optimized/vec/vec256/missing_vst1_neon.h>
32 #endif
33 #elif defined(__GNUC__) && defined(__IWMMXT__)
34 /* GCC-compatible compiler, targeting ARM with WMMX */
35 #include <mmintrin.h>
36 #elif defined(__s390x__)
37 // targets Z/architecture
38 // we will include vecintrin later
39 #elif (defined(__GNUC__) || defined(__xlC__)) &&                               \
40         (defined(__VEC__) || defined(__ALTIVEC__))
41 /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
42 #include <altivec.h>
43 /* We need to undef those tokens defined by <altivec.h> to avoid conflicts
44    with the C++ types. => Can still use __bool/__vector */
45 #undef bool
46 #undef vector
47 #undef pixel
48 #elif defined(__GNUC__) && defined(__SPE__)
49 /* GCC-compatible compiler, targeting PowerPC with SPE */
50 #include <spe.h>
51 #endif
52