xref: /aosp_15_r20/external/executorch/kernels/optimized/vec/vec.h (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #pragma once
10 
11 #include <executorch/kernels/optimized/vec/vec256/vec256.h>
12 
13 namespace executorch {
14 namespace vec {
15 
16 // See Note [CPU_CAPABILITY namespace]
17 inline namespace CPU_CAPABILITY {
18 
convert_to_bool(Vectorized<int8_t> x)19 inline Vectorized<bool> convert_to_bool(Vectorized<int8_t> x) {
20   __at_align__ bool buffer[x.size()];
21   x.ne(Vectorized<int8_t>(0)).store(buffer);
22 
23   Vectorized<bool> ret;
24   static_assert(x.size() == ret.size(), "");
25   std::memcpy(ret, buffer, ret.size() * sizeof(bool));
26   return ret;
27 }
28 
29 template <>
loadu(const void * ptr)30 inline Vectorized<bool> Vectorized<bool>::loadu(const void* ptr) {
31   // See NOTE [Loading boolean values]
32   return convert_to_bool(Vectorized<int8_t>::loadu(ptr));
33 }
34 
35 template <>
loadu(const void * ptr,int64_t count)36 inline Vectorized<bool> Vectorized<bool>::loadu(const void* ptr, int64_t count) {
37   // See NOTE [Loading boolean values]
38   return convert_to_bool(Vectorized<int8_t>::loadu(ptr, count));
39 }
40 
41 } // namespace CPU_CAPABILITY
42 
43 } // namespace vec
44 } // namespace executorch
45