xref: /aosp_15_r20/external/ComputeLibrary/src/cpu/CpuContext.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2021-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/cpu/CpuContext.h"
25 
26 #include "arm_compute/core/CPP/CPPTypes.h"
27 #include "src/cpu/CpuQueue.h"
28 #include "src/cpu/CpuTensor.h"
29 
30 #include <cstdlib>
31 #if !defined(__APPLE__) && !defined(__OpenBSD__)
32 #include <malloc.h>
33 
34 #if defined(_WIN64)
35 #define posix_memalign _aligned_realloc
36 #define posix_memalign_free _aligned_free
37 #endif // defined(_WIN64)
38 #endif // !defined(__APPLE__) && !defined(__OpenBSD__)
39 
40 namespace arm_compute
41 {
42 namespace cpu
43 {
44 namespace
45 {
default_allocate(void * user_data,size_t size)46 void *default_allocate(void *user_data, size_t size)
47 {
48     ARM_COMPUTE_UNUSED(user_data);
49     return ::operator new(size);
50 }
default_free(void * user_data,void * ptr)51 void default_free(void *user_data, void *ptr)
52 {
53     ARM_COMPUTE_UNUSED(user_data);
54     ::operator delete(ptr);
55 }
default_aligned_allocate(void * user_data,size_t size,size_t alignment)56 void *default_aligned_allocate(void *user_data, size_t size, size_t alignment)
57 {
58     ARM_COMPUTE_UNUSED(user_data);
59     void *ptr = nullptr;
60 #if defined(BARE_METAL)
61     size_t rem       = size % alignment;
62     size_t real_size = (rem) ? (size + alignment - rem) : size;
63     ptr              = memalign(alignment, real_size);
64 #else  /* defined(BARE_METAL) */
65     if(posix_memalign(&ptr, alignment, size) != 0)
66     {
67         // posix_memalign returns non-zero on failures, the return values will be
68         // - EINVAL: wrong alignment
69         // - ENOMEM: insufficient memory
70         ARM_COMPUTE_LOG_ERROR_ACL("posix_memalign failed, the returned pointer will be invalid");
71     }
72 #endif /* defined(BARE_METAL) */
73     return ptr;
74 }
default_aligned_free(void * user_data,void * ptr)75 void default_aligned_free(void *user_data, void *ptr)
76 {
77     ARM_COMPUTE_UNUSED(user_data);
78     free(ptr);
79 }
80 static AclAllocator default_allocator = { &default_allocate,
81                                           &default_free,
82                                           &default_aligned_allocate,
83                                           &default_aligned_free,
84                                           nullptr
85                                         };
86 
populate_allocator(AclAllocator * external_allocator)87 AllocatorWrapper populate_allocator(AclAllocator *external_allocator)
88 {
89     bool is_valid = (external_allocator != nullptr);
90     if(is_valid)
91     {
92         is_valid = is_valid && (external_allocator->alloc != nullptr);
93         is_valid = is_valid && (external_allocator->free != nullptr);
94         is_valid = is_valid && (external_allocator->aligned_alloc != nullptr);
95         is_valid = is_valid && (external_allocator->aligned_free != nullptr);
96     }
97     return is_valid ? AllocatorWrapper(*external_allocator) : AllocatorWrapper(default_allocator);
98 }
99 
populate_capabilities_flags(AclTargetCapabilities external_caps)100 cpuinfo::CpuIsaInfo populate_capabilities_flags(AclTargetCapabilities external_caps)
101 {
102     cpuinfo::CpuIsaInfo isa_caps;
103 
104     // Extract SIMD extension
105     isa_caps.neon = external_caps & AclCpuCapabilitiesNeon;
106     isa_caps.sve  = external_caps & AclCpuCapabilitiesSve;
107     isa_caps.sve2 = external_caps & AclCpuCapabilitiesSve2;
108 
109     // Extract data-type support
110     isa_caps.fp16    = external_caps & AclCpuCapabilitiesFp16;
111     isa_caps.bf16    = external_caps & AclCpuCapabilitiesBf16;
112     isa_caps.svebf16 = isa_caps.bf16;
113 
114     // Extract ISA extensions
115     isa_caps.dot      = external_caps & AclCpuCapabilitiesDot;
116     isa_caps.i8mm     = external_caps & AclCpuCapabilitiesMmlaInt8;
117     isa_caps.svef32mm = external_caps & AclCpuCapabilitiesMmlaFp;
118 
119     return isa_caps;
120 }
121 
populate_capabilities(AclTargetCapabilities external_caps,int32_t max_threads)122 CpuCapabilities populate_capabilities(AclTargetCapabilities external_caps,
123                                       int32_t               max_threads)
124 {
125     CpuCapabilities caps;
126 
127     // Populate capabilities with system information
128     caps.cpu_info = cpuinfo::CpuInfo::build();
129     if(external_caps != AclCpuCapabilitiesAuto)
130     {
131         cpuinfo::CpuIsaInfo isa  = populate_capabilities_flags(external_caps);
132         auto                cpus = caps.cpu_info.cpus();
133 
134         caps.cpu_info = cpuinfo::CpuInfo(isa, cpus);
135     }
136 
137     // Set max number of threads
138 #if defined(BARE_METAL)
139     ARM_COMPUTE_UNUSED(max_threads);
140     caps.max_threads = 1;
141 #else  /* defined(BARE_METAL) */
142     caps.max_threads = (max_threads > 0) ? max_threads : std::thread::hardware_concurrency();
143 #endif /* defined(BARE_METAL) */
144 
145     return caps;
146 }
147 } // namespace
148 
CpuContext(const AclContextOptions * options)149 CpuContext::CpuContext(const AclContextOptions *options)
150     : IContext(Target::Cpu),
151       _allocator(default_allocator),
152       _caps(populate_capabilities(AclCpuCapabilitiesAuto, -1))
153 {
154     if(options != nullptr)
155     {
156         _allocator = populate_allocator(options->allocator);
157         _caps      = populate_capabilities(options->capabilities, options->max_compute_units);
158     }
159 }
160 
capabilities() const161 const CpuCapabilities &CpuContext::capabilities() const
162 {
163     return _caps;
164 }
165 
allocator()166 AllocatorWrapper &CpuContext::allocator()
167 {
168     return _allocator;
169 }
170 
create_tensor(const AclTensorDescriptor & desc,bool allocate)171 ITensorV2 *CpuContext::create_tensor(const AclTensorDescriptor &desc, bool allocate)
172 {
173     CpuTensor *tensor = new CpuTensor(this, desc);
174     if(tensor != nullptr && allocate)
175     {
176         tensor->allocate();
177     }
178     return tensor;
179 }
180 
create_queue(const AclQueueOptions * options)181 IQueue *CpuContext::create_queue(const AclQueueOptions *options)
182 {
183     return new CpuQueue(this, options);
184 }
185 } // namespace cpu
186 } // namespace arm_compute
187