xref: /aosp_15_r20/external/armnn/src/backends/cl/ClImportTensorHandle.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include <aclCommon/ArmComputeTensorHandle.hpp>
9 #include <aclCommon/ArmComputeTensorUtils.hpp>
10 
11 #include <Half.hpp>
12 
13 #include <armnn/utility/PolymorphicDowncast.hpp>
14 
15 #include <arm_compute/runtime/CL/CLTensor.h>
16 #include <arm_compute/runtime/CL/CLSubTensor.h>
17 #include <arm_compute/runtime/IMemoryGroup.h>
18 #include <arm_compute/runtime/MemoryGroup.h>
19 #include <arm_compute/core/TensorShape.h>
20 #include <arm_compute/core/Coordinates.h>
21 
22 #include <aclCommon/IClTensorHandle.hpp>
23 
24 #include <CL/cl_ext.h>
25 #include <arm_compute/core/CL/CLKernelLibrary.h>
26 
27 namespace armnn
28 {
29 
30 class ClImportTensorHandle : public IClTensorHandle
31 {
32 public:
ClImportTensorHandle(const TensorInfo & tensorInfo,MemorySourceFlags importFlags)33     ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
34         : m_ImportFlags(importFlags)
35     {
36         armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
37     }
38 
ClImportTensorHandle(const TensorInfo & tensorInfo,DataLayout dataLayout,MemorySourceFlags importFlags)39     ClImportTensorHandle(const TensorInfo& tensorInfo,
40                          DataLayout dataLayout,
41                          MemorySourceFlags importFlags)
42         : m_ImportFlags(importFlags), m_Imported(false)
43     {
44         armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
45     }
46 
GetTensor()47     arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
GetTensor() const48     arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
Allocate()49     virtual void Allocate() override {}
Manage()50     virtual void Manage() override {}
51 
Map(bool blocking=true) const52     virtual const void* Map(bool blocking = true) const override
53     {
54         IgnoreUnused(blocking);
55         return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
56     }
57 
Unmap() const58     virtual void Unmap() const override {}
59 
GetParent() const60     virtual ITensorHandle* GetParent() const override { return nullptr; }
61 
GetDataType() const62     virtual arm_compute::DataType GetDataType() const override
63     {
64         return m_Tensor.info()->data_type();
65     }
66 
SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup> & memoryGroup)67     virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
68     {
69         IgnoreUnused(memoryGroup);
70     }
71 
GetStrides() const72     TensorShape GetStrides() const override
73     {
74         return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
75     }
76 
GetShape() const77     TensorShape GetShape() const override
78     {
79         return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
80     }
81 
SetImportFlags(MemorySourceFlags importFlags)82     void SetImportFlags(MemorySourceFlags importFlags)
83     {
84         m_ImportFlags = importFlags;
85     }
86 
GetImportFlags() const87     MemorySourceFlags GetImportFlags() const override
88     {
89         return m_ImportFlags;
90     }
91 
Import(void * memory,MemorySource source)92     virtual bool Import(void* memory, MemorySource source) override
93     {
94         if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
95         {
96             if (source == MemorySource::Malloc)
97             {
98                 const cl_import_properties_arm importProperties[] =
99                 {
100                     CL_IMPORT_TYPE_ARM,
101                     CL_IMPORT_TYPE_HOST_ARM,
102                     0
103                 };
104                 return ClImport(importProperties, memory);
105             }
106             if (source == MemorySource::DmaBuf)
107             {
108                 const cl_import_properties_arm importProperties[] =
109                 {
110                     CL_IMPORT_TYPE_ARM,
111                     CL_IMPORT_TYPE_DMA_BUF_ARM,
112                     CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
113                     CL_TRUE,
114                     0
115                 };
116 
117                 return ClImport(importProperties, memory);
118 
119             }
120             if (source == MemorySource::DmaBufProtected)
121             {
122                 const cl_import_properties_arm importProperties[] =
123                 {
124                     CL_IMPORT_TYPE_ARM,
125                     CL_IMPORT_TYPE_DMA_BUF_ARM,
126                     CL_IMPORT_TYPE_PROTECTED_ARM,
127                     CL_TRUE,
128                     0
129                 };
130 
131                 return ClImport(importProperties, memory, true);
132 
133             }
134             // Case for importing memory allocated by OpenCl externally directly into the tensor
135             else if (source == MemorySource::Gralloc)
136             {
137                 // m_Tensor not yet Allocated
138                 if (!m_Imported && !m_Tensor.buffer())
139                 {
140                     // Importing memory allocated by OpenCl into the tensor directly.
141                     arm_compute::Status status =
142                         m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
143                     m_Imported = bool(status);
144                     if (!m_Imported)
145                     {
146                         throw MemoryImportException(status.error_description());
147                     }
148                     return m_Imported;
149                 }
150 
151                 // m_Tensor.buffer() initially allocated with Allocate().
152                 else if (!m_Imported && m_Tensor.buffer())
153                 {
154                     throw MemoryImportException(
155                         "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
156                 }
157 
158                 // m_Tensor.buffer() previously imported.
159                 else if (m_Imported)
160                 {
161                     // Importing memory allocated by OpenCl into the tensor directly.
162                     arm_compute::Status status =
163                         m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
164                     m_Imported = bool(status);
165                     if (!m_Imported)
166                     {
167                         throw MemoryImportException(status.error_description());
168                     }
169                     return m_Imported;
170                 }
171                 else
172                 {
173                     throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
174                 }
175             }
176             else
177             {
178                 throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
179             }
180         }
181         else
182         {
183             throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
184         }
185     }
186 
CanBeImported(void *,MemorySource source)187     virtual bool CanBeImported(void* /*memory*/, MemorySource source) override
188     {
189         if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
190         {
191             if (source == MemorySource::Malloc)
192             {
193                 // Returning true as ClImport() function will decide if memory can be imported or not
194                 return true;
195             }
196         }
197         else
198         {
199             throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
200         }
201         return false;
202     }
203 
204 private:
ClImport(const cl_import_properties_arm * importProperties,void * memory,bool isProtected=false)205     bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
206     {
207         size_t totalBytes = m_Tensor.info()->total_size();
208 
209         // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
210         // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
211         auto cachelineAlignment =
212                 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
213         auto roundedSize = totalBytes;
214         if (totalBytes % cachelineAlignment != 0)
215         {
216             roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
217         }
218 
219         cl_int error = CL_SUCCESS;
220         cl_mem buffer;
221         if (isProtected)
222         {
223             buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
224                                        CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
225         }
226         else
227         {
228             buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
229                                        CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
230         }
231 
232         if (error != CL_SUCCESS)
233         {
234             throw MemoryImportException("ClImportTensorHandle::Invalid imported memory: " + std::to_string(error));
235         }
236 
237         cl::Buffer wrappedBuffer(buffer);
238         arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer);
239 
240         // Use the overloaded bool operator of Status to check if it is success, if not throw an exception
241         // with the Status error message
242         bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
243         if (!imported)
244         {
245             throw MemoryImportException(status.error_description());
246         }
247 
248         ARMNN_ASSERT(!m_Tensor.info()->is_resizable());
249         return imported;
250     }
251     // Only used for testing
CopyOutTo(void * memory) const252     void CopyOutTo(void* memory) const override
253     {
254         const_cast<armnn::ClImportTensorHandle*>(this)->Map(true);
255         switch(this->GetDataType())
256         {
257             case arm_compute::DataType::F32:
258                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
259                                                                  static_cast<float*>(memory));
260                 break;
261             case arm_compute::DataType::U8:
262             case arm_compute::DataType::QASYMM8:
263                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
264                                                                  static_cast<uint8_t*>(memory));
265                 break;
266             case arm_compute::DataType::QSYMM8_PER_CHANNEL:
267             case arm_compute::DataType::QASYMM8_SIGNED:
268                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
269                                                                  static_cast<int8_t*>(memory));
270                 break;
271             case arm_compute::DataType::F16:
272                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
273                                                                  static_cast<armnn::Half*>(memory));
274                 break;
275             case arm_compute::DataType::S16:
276             case arm_compute::DataType::QSYMM16:
277                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
278                                                                  static_cast<int16_t*>(memory));
279                 break;
280             case arm_compute::DataType::S32:
281                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
282                                                                  static_cast<int32_t*>(memory));
283                 break;
284             default:
285             {
286                 throw armnn::UnimplementedException();
287             }
288         }
289         const_cast<armnn::ClImportTensorHandle*>(this)->Unmap();
290     }
291 
292     // Only used for testing
CopyInFrom(const void * memory)293     void CopyInFrom(const void* memory) override
294     {
295         this->Map(true);
296         switch(this->GetDataType())
297         {
298             case arm_compute::DataType::F32:
299                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
300                                                                  this->GetTensor());
301                 break;
302             case arm_compute::DataType::U8:
303             case arm_compute::DataType::QASYMM8:
304                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
305                                                                  this->GetTensor());
306                 break;
307             case arm_compute::DataType::F16:
308                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
309                                                                  this->GetTensor());
310                 break;
311             case arm_compute::DataType::S16:
312             case arm_compute::DataType::QSYMM8_PER_CHANNEL:
313             case arm_compute::DataType::QASYMM8_SIGNED:
314                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
315                                                                  this->GetTensor());
316                 break;
317             case arm_compute::DataType::QSYMM16:
318                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
319                                                                  this->GetTensor());
320                 break;
321             case arm_compute::DataType::S32:
322                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
323                                                                  this->GetTensor());
324                 break;
325             default:
326             {
327                 throw armnn::UnimplementedException();
328             }
329         }
330         this->Unmap();
331     }
332 
333     arm_compute::CLTensor m_Tensor;
334     MemorySourceFlags m_ImportFlags;
335     bool m_Imported;
336 };
337 
338 class ClImportSubTensorHandle : public IClTensorHandle
339 {
340 public:
ClImportSubTensorHandle(IClTensorHandle * parent,const arm_compute::TensorShape & shape,const arm_compute::Coordinates & coords)341     ClImportSubTensorHandle(IClTensorHandle* parent,
342                             const arm_compute::TensorShape& shape,
343                             const arm_compute::Coordinates& coords)
344     : m_Tensor(&parent->GetTensor(), shape, coords)
345     {
346         parentHandle = parent;
347     }
348 
GetTensor()349     arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
GetTensor() const350     arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
351 
Allocate()352     virtual void Allocate() override {}
Manage()353     virtual void Manage() override {}
354 
Map(bool blocking=true) const355     virtual const void* Map(bool blocking = true) const override
356     {
357         IgnoreUnused(blocking);
358         return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
359     }
Unmap() const360     virtual void Unmap() const override {}
361 
GetParent() const362     virtual ITensorHandle* GetParent() const override { return parentHandle; }
363 
GetDataType() const364     virtual arm_compute::DataType GetDataType() const override
365     {
366         return m_Tensor.info()->data_type();
367     }
368 
SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup> & memoryGroup)369     virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
370     {
371         IgnoreUnused(memoryGroup);
372     }
373 
GetStrides() const374     TensorShape GetStrides() const override
375     {
376         return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
377     }
378 
GetShape() const379     TensorShape GetShape() const override
380     {
381         return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
382     }
383 
384 private:
385     // Only used for testing
CopyOutTo(void * memory) const386     void CopyOutTo(void* memory) const override
387     {
388         const_cast<ClImportSubTensorHandle*>(this)->Map(true);
389         switch(this->GetDataType())
390         {
391             case arm_compute::DataType::F32:
392                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
393                                                                  static_cast<float*>(memory));
394                 break;
395             case arm_compute::DataType::U8:
396             case arm_compute::DataType::QASYMM8:
397                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
398                                                                  static_cast<uint8_t*>(memory));
399                 break;
400             case arm_compute::DataType::F16:
401                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
402                                                                  static_cast<armnn::Half*>(memory));
403                 break;
404             case arm_compute::DataType::QSYMM8_PER_CHANNEL:
405             case arm_compute::DataType::QASYMM8_SIGNED:
406             armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
407                                                              static_cast<int8_t*>(memory));
408                 break;
409             case arm_compute::DataType::S16:
410             case arm_compute::DataType::QSYMM16:
411                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
412                                                                  static_cast<int16_t*>(memory));
413                 break;
414             case arm_compute::DataType::S32:
415                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
416                                                                  static_cast<int32_t*>(memory));
417                 break;
418             default:
419             {
420                 throw armnn::UnimplementedException();
421             }
422         }
423         const_cast<ClImportSubTensorHandle*>(this)->Unmap();
424     }
425 
426     // Only used for testing
CopyInFrom(const void * memory)427     void CopyInFrom(const void* memory) override
428     {
429         this->Map(true);
430         switch(this->GetDataType())
431         {
432             case arm_compute::DataType::F32:
433                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
434                                                                  this->GetTensor());
435                 break;
436             case arm_compute::DataType::U8:
437             case arm_compute::DataType::QASYMM8:
438                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
439                                                                  this->GetTensor());
440                 break;
441             case arm_compute::DataType::F16:
442                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
443                                                                  this->GetTensor());
444                 break;
445             case arm_compute::DataType::QSYMM8_PER_CHANNEL:
446             case arm_compute::DataType::QASYMM8_SIGNED:
447                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
448                                                                  this->GetTensor());
449                 break;
450             case arm_compute::DataType::S16:
451             case arm_compute::DataType::QSYMM16:
452                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
453                                                                  this->GetTensor());
454                 break;
455             case arm_compute::DataType::S32:
456                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
457                                                                  this->GetTensor());
458                 break;
459             default:
460             {
461                 throw armnn::UnimplementedException();
462             }
463         }
464         this->Unmap();
465     }
466 
467     mutable arm_compute::CLSubTensor m_Tensor;
468     ITensorHandle* parentHandle = nullptr;
469 };
470 
471 } // namespace armnn
472