xref: /aosp_15_r20/external/ComputeLibrary/src/cpu/CpuContext.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1*c217d954SCole Faust /*
2*c217d954SCole Faust  * Copyright (c) 2021-2022 Arm Limited.
3*c217d954SCole Faust  *
4*c217d954SCole Faust  * SPDX-License-Identifier: MIT
5*c217d954SCole Faust  *
6*c217d954SCole Faust  * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust  * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust  * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust  * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust  * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust  *
13*c217d954SCole Faust  * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust  * copies or substantial portions of the Software.
15*c217d954SCole Faust  *
16*c217d954SCole Faust  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust  * SOFTWARE.
23*c217d954SCole Faust  */
24*c217d954SCole Faust #include "src/cpu/CpuContext.h"
25*c217d954SCole Faust 
26*c217d954SCole Faust #include "arm_compute/core/CPP/CPPTypes.h"
27*c217d954SCole Faust #include "src/cpu/CpuQueue.h"
28*c217d954SCole Faust #include "src/cpu/CpuTensor.h"
29*c217d954SCole Faust 
30*c217d954SCole Faust #include <cstdlib>
31*c217d954SCole Faust #if !defined(__APPLE__) && !defined(__OpenBSD__)
32*c217d954SCole Faust #include <malloc.h>
33*c217d954SCole Faust 
34*c217d954SCole Faust #if defined(_WIN64)
35*c217d954SCole Faust #define posix_memalign _aligned_realloc
36*c217d954SCole Faust #define posix_memalign_free _aligned_free
37*c217d954SCole Faust #endif // defined(_WIN64)
38*c217d954SCole Faust #endif // !defined(__APPLE__) && !defined(__OpenBSD__)
39*c217d954SCole Faust 
40*c217d954SCole Faust namespace arm_compute
41*c217d954SCole Faust {
42*c217d954SCole Faust namespace cpu
43*c217d954SCole Faust {
44*c217d954SCole Faust namespace
45*c217d954SCole Faust {
default_allocate(void * user_data,size_t size)46*c217d954SCole Faust void *default_allocate(void *user_data, size_t size)
47*c217d954SCole Faust {
48*c217d954SCole Faust     ARM_COMPUTE_UNUSED(user_data);
49*c217d954SCole Faust     return ::operator new(size);
50*c217d954SCole Faust }
default_free(void * user_data,void * ptr)51*c217d954SCole Faust void default_free(void *user_data, void *ptr)
52*c217d954SCole Faust {
53*c217d954SCole Faust     ARM_COMPUTE_UNUSED(user_data);
54*c217d954SCole Faust     ::operator delete(ptr);
55*c217d954SCole Faust }
default_aligned_allocate(void * user_data,size_t size,size_t alignment)56*c217d954SCole Faust void *default_aligned_allocate(void *user_data, size_t size, size_t alignment)
57*c217d954SCole Faust {
58*c217d954SCole Faust     ARM_COMPUTE_UNUSED(user_data);
59*c217d954SCole Faust     void *ptr = nullptr;
60*c217d954SCole Faust #if defined(BARE_METAL)
61*c217d954SCole Faust     size_t rem       = size % alignment;
62*c217d954SCole Faust     size_t real_size = (rem) ? (size + alignment - rem) : size;
63*c217d954SCole Faust     ptr              = memalign(alignment, real_size);
64*c217d954SCole Faust #else  /* defined(BARE_METAL) */
65*c217d954SCole Faust     if(posix_memalign(&ptr, alignment, size) != 0)
66*c217d954SCole Faust     {
67*c217d954SCole Faust         // posix_memalign returns non-zero on failures, the return values will be
68*c217d954SCole Faust         // - EINVAL: wrong alignment
69*c217d954SCole Faust         // - ENOMEM: insufficient memory
70*c217d954SCole Faust         ARM_COMPUTE_LOG_ERROR_ACL("posix_memalign failed, the returned pointer will be invalid");
71*c217d954SCole Faust     }
72*c217d954SCole Faust #endif /* defined(BARE_METAL) */
73*c217d954SCole Faust     return ptr;
74*c217d954SCole Faust }
default_aligned_free(void * user_data,void * ptr)75*c217d954SCole Faust void default_aligned_free(void *user_data, void *ptr)
76*c217d954SCole Faust {
77*c217d954SCole Faust     ARM_COMPUTE_UNUSED(user_data);
78*c217d954SCole Faust     free(ptr);
79*c217d954SCole Faust }
80*c217d954SCole Faust static AclAllocator default_allocator = { &default_allocate,
81*c217d954SCole Faust                                           &default_free,
82*c217d954SCole Faust                                           &default_aligned_allocate,
83*c217d954SCole Faust                                           &default_aligned_free,
84*c217d954SCole Faust                                           nullptr
85*c217d954SCole Faust                                         };
86*c217d954SCole Faust 
populate_allocator(AclAllocator * external_allocator)87*c217d954SCole Faust AllocatorWrapper populate_allocator(AclAllocator *external_allocator)
88*c217d954SCole Faust {
89*c217d954SCole Faust     bool is_valid = (external_allocator != nullptr);
90*c217d954SCole Faust     if(is_valid)
91*c217d954SCole Faust     {
92*c217d954SCole Faust         is_valid = is_valid && (external_allocator->alloc != nullptr);
93*c217d954SCole Faust         is_valid = is_valid && (external_allocator->free != nullptr);
94*c217d954SCole Faust         is_valid = is_valid && (external_allocator->aligned_alloc != nullptr);
95*c217d954SCole Faust         is_valid = is_valid && (external_allocator->aligned_free != nullptr);
96*c217d954SCole Faust     }
97*c217d954SCole Faust     return is_valid ? AllocatorWrapper(*external_allocator) : AllocatorWrapper(default_allocator);
98*c217d954SCole Faust }
99*c217d954SCole Faust 
populate_capabilities_flags(AclTargetCapabilities external_caps)100*c217d954SCole Faust cpuinfo::CpuIsaInfo populate_capabilities_flags(AclTargetCapabilities external_caps)
101*c217d954SCole Faust {
102*c217d954SCole Faust     cpuinfo::CpuIsaInfo isa_caps;
103*c217d954SCole Faust 
104*c217d954SCole Faust     // Extract SIMD extension
105*c217d954SCole Faust     isa_caps.neon = external_caps & AclCpuCapabilitiesNeon;
106*c217d954SCole Faust     isa_caps.sve  = external_caps & AclCpuCapabilitiesSve;
107*c217d954SCole Faust     isa_caps.sve2 = external_caps & AclCpuCapabilitiesSve2;
108*c217d954SCole Faust 
109*c217d954SCole Faust     // Extract data-type support
110*c217d954SCole Faust     isa_caps.fp16    = external_caps & AclCpuCapabilitiesFp16;
111*c217d954SCole Faust     isa_caps.bf16    = external_caps & AclCpuCapabilitiesBf16;
112*c217d954SCole Faust     isa_caps.svebf16 = isa_caps.bf16;
113*c217d954SCole Faust 
114*c217d954SCole Faust     // Extract ISA extensions
115*c217d954SCole Faust     isa_caps.dot      = external_caps & AclCpuCapabilitiesDot;
116*c217d954SCole Faust     isa_caps.i8mm     = external_caps & AclCpuCapabilitiesMmlaInt8;
117*c217d954SCole Faust     isa_caps.svef32mm = external_caps & AclCpuCapabilitiesMmlaFp;
118*c217d954SCole Faust 
119*c217d954SCole Faust     return isa_caps;
120*c217d954SCole Faust }
121*c217d954SCole Faust 
populate_capabilities(AclTargetCapabilities external_caps,int32_t max_threads)122*c217d954SCole Faust CpuCapabilities populate_capabilities(AclTargetCapabilities external_caps,
123*c217d954SCole Faust                                       int32_t               max_threads)
124*c217d954SCole Faust {
125*c217d954SCole Faust     CpuCapabilities caps;
126*c217d954SCole Faust 
127*c217d954SCole Faust     // Populate capabilities with system information
128*c217d954SCole Faust     caps.cpu_info = cpuinfo::CpuInfo::build();
129*c217d954SCole Faust     if(external_caps != AclCpuCapabilitiesAuto)
130*c217d954SCole Faust     {
131*c217d954SCole Faust         cpuinfo::CpuIsaInfo isa  = populate_capabilities_flags(external_caps);
132*c217d954SCole Faust         auto                cpus = caps.cpu_info.cpus();
133*c217d954SCole Faust 
134*c217d954SCole Faust         caps.cpu_info = cpuinfo::CpuInfo(isa, cpus);
135*c217d954SCole Faust     }
136*c217d954SCole Faust 
137*c217d954SCole Faust     // Set max number of threads
138*c217d954SCole Faust #if defined(BARE_METAL)
139*c217d954SCole Faust     ARM_COMPUTE_UNUSED(max_threads);
140*c217d954SCole Faust     caps.max_threads = 1;
141*c217d954SCole Faust #else  /* defined(BARE_METAL) */
142*c217d954SCole Faust     caps.max_threads = (max_threads > 0) ? max_threads : std::thread::hardware_concurrency();
143*c217d954SCole Faust #endif /* defined(BARE_METAL) */
144*c217d954SCole Faust 
145*c217d954SCole Faust     return caps;
146*c217d954SCole Faust }
147*c217d954SCole Faust } // namespace
148*c217d954SCole Faust 
CpuContext(const AclContextOptions * options)149*c217d954SCole Faust CpuContext::CpuContext(const AclContextOptions *options)
150*c217d954SCole Faust     : IContext(Target::Cpu),
151*c217d954SCole Faust       _allocator(default_allocator),
152*c217d954SCole Faust       _caps(populate_capabilities(AclCpuCapabilitiesAuto, -1))
153*c217d954SCole Faust {
154*c217d954SCole Faust     if(options != nullptr)
155*c217d954SCole Faust     {
156*c217d954SCole Faust         _allocator = populate_allocator(options->allocator);
157*c217d954SCole Faust         _caps      = populate_capabilities(options->capabilities, options->max_compute_units);
158*c217d954SCole Faust     }
159*c217d954SCole Faust }
160*c217d954SCole Faust 
capabilities() const161*c217d954SCole Faust const CpuCapabilities &CpuContext::capabilities() const
162*c217d954SCole Faust {
163*c217d954SCole Faust     return _caps;
164*c217d954SCole Faust }
165*c217d954SCole Faust 
allocator()166*c217d954SCole Faust AllocatorWrapper &CpuContext::allocator()
167*c217d954SCole Faust {
168*c217d954SCole Faust     return _allocator;
169*c217d954SCole Faust }
170*c217d954SCole Faust 
create_tensor(const AclTensorDescriptor & desc,bool allocate)171*c217d954SCole Faust ITensorV2 *CpuContext::create_tensor(const AclTensorDescriptor &desc, bool allocate)
172*c217d954SCole Faust {
173*c217d954SCole Faust     CpuTensor *tensor = new CpuTensor(this, desc);
174*c217d954SCole Faust     if(tensor != nullptr && allocate)
175*c217d954SCole Faust     {
176*c217d954SCole Faust         tensor->allocate();
177*c217d954SCole Faust     }
178*c217d954SCole Faust     return tensor;
179*c217d954SCole Faust }
180*c217d954SCole Faust 
create_queue(const AclQueueOptions * options)181*c217d954SCole Faust IQueue *CpuContext::create_queue(const AclQueueOptions *options)
182*c217d954SCole Faust {
183*c217d954SCole Faust     return new CpuQueue(this, options);
184*c217d954SCole Faust }
185*c217d954SCole Faust } // namespace cpu
186*c217d954SCole Faust } // namespace arm_compute
187