1 // 2 // Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 6 #pragma once 7 8 #include <aclCommon/ArmComputeTensorHandle.hpp> 9 #include <aclCommon/ArmComputeTensorUtils.hpp> 10 11 #include <Half.hpp> 12 13 #include <armnn/utility/PolymorphicDowncast.hpp> 14 15 #include <arm_compute/runtime/CL/CLTensor.h> 16 #include <arm_compute/runtime/CL/CLSubTensor.h> 17 #include <arm_compute/runtime/IMemoryGroup.h> 18 #include <arm_compute/runtime/MemoryGroup.h> 19 #include <arm_compute/core/TensorShape.h> 20 #include <arm_compute/core/Coordinates.h> 21 22 #include <aclCommon/IClTensorHandle.hpp> 23 24 #include <CL/cl_ext.h> 25 #include <arm_compute/core/CL/CLKernelLibrary.h> 26 27 namespace armnn 28 { 29 30 class ClImportTensorHandle : public IClTensorHandle 31 { 32 public: ClImportTensorHandle(const TensorInfo & tensorInfo,MemorySourceFlags importFlags)33 ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags) 34 : m_ImportFlags(importFlags) 35 { 36 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); 37 } 38 ClImportTensorHandle(const TensorInfo & tensorInfo,DataLayout dataLayout,MemorySourceFlags importFlags)39 ClImportTensorHandle(const TensorInfo& tensorInfo, 40 DataLayout dataLayout, 41 MemorySourceFlags importFlags) 42 : m_ImportFlags(importFlags), m_Imported(false) 43 { 44 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout); 45 } 46 GetTensor()47 arm_compute::CLTensor& GetTensor() override { return m_Tensor; } GetTensor() const48 arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; } Allocate()49 virtual void Allocate() override {} Manage()50 virtual void Manage() override {} 51 Map(bool blocking=true) const52 virtual const void* Map(bool blocking = true) const override 53 { 54 IgnoreUnused(blocking); 55 return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); 56 } 57 Unmap() const58 virtual void Unmap() const override {} 59 GetParent() const60 virtual ITensorHandle* GetParent() const override { return nullptr; } 61 GetDataType() const62 virtual arm_compute::DataType GetDataType() const override 63 { 64 return m_Tensor.info()->data_type(); 65 } 66 SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup> & memoryGroup)67 virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override 68 { 69 IgnoreUnused(memoryGroup); 70 } 71 GetStrides() const72 TensorShape GetStrides() const override 73 { 74 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); 75 } 76 GetShape() const77 TensorShape GetShape() const override 78 { 79 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); 80 } 81 SetImportFlags(MemorySourceFlags importFlags)82 void SetImportFlags(MemorySourceFlags importFlags) 83 { 84 m_ImportFlags = importFlags; 85 } 86 GetImportFlags() const87 MemorySourceFlags GetImportFlags() const override 88 { 89 return m_ImportFlags; 90 } 91 Import(void * memory,MemorySource source)92 virtual bool Import(void* memory, MemorySource source) override 93 { 94 if (m_ImportFlags & static_cast<MemorySourceFlags>(source)) 95 { 96 if (source == MemorySource::Malloc) 97 { 98 const cl_import_properties_arm importProperties[] = 99 { 100 CL_IMPORT_TYPE_ARM, 101 CL_IMPORT_TYPE_HOST_ARM, 102 0 103 }; 104 return ClImport(importProperties, memory); 105 } 106 if (source == MemorySource::DmaBuf) 107 { 108 const cl_import_properties_arm importProperties[] = 109 { 110 CL_IMPORT_TYPE_ARM, 111 CL_IMPORT_TYPE_DMA_BUF_ARM, 112 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM, 113 CL_TRUE, 114 0 115 }; 116 117 return ClImport(importProperties, memory); 118 119 } 120 if (source == MemorySource::DmaBufProtected) 121 { 122 const cl_import_properties_arm importProperties[] = 123 { 124 CL_IMPORT_TYPE_ARM, 125 CL_IMPORT_TYPE_DMA_BUF_ARM, 126 CL_IMPORT_TYPE_PROTECTED_ARM, 127 CL_TRUE, 128 0 129 }; 130 131 return ClImport(importProperties, memory, true); 132 133 } 134 // Case for importing memory allocated by OpenCl externally directly into the tensor 135 else if (source == MemorySource::Gralloc) 136 { 137 // m_Tensor not yet Allocated 138 if (!m_Imported && !m_Tensor.buffer()) 139 { 140 // Importing memory allocated by OpenCl into the tensor directly. 141 arm_compute::Status status = 142 m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory))); 143 m_Imported = bool(status); 144 if (!m_Imported) 145 { 146 throw MemoryImportException(status.error_description()); 147 } 148 return m_Imported; 149 } 150 151 // m_Tensor.buffer() initially allocated with Allocate(). 152 else if (!m_Imported && m_Tensor.buffer()) 153 { 154 throw MemoryImportException( 155 "ClImportTensorHandle::Import Attempting to import on an already allocated tensor"); 156 } 157 158 // m_Tensor.buffer() previously imported. 159 else if (m_Imported) 160 { 161 // Importing memory allocated by OpenCl into the tensor directly. 162 arm_compute::Status status = 163 m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory))); 164 m_Imported = bool(status); 165 if (!m_Imported) 166 { 167 throw MemoryImportException(status.error_description()); 168 } 169 return m_Imported; 170 } 171 else 172 { 173 throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory"); 174 } 175 } 176 else 177 { 178 throw MemoryImportException("ClImportTensorHandle::Import flag is not supported"); 179 } 180 } 181 else 182 { 183 throw MemoryImportException("ClImportTensorHandle::Incorrect import flag"); 184 } 185 } 186 CanBeImported(void *,MemorySource source)187 virtual bool CanBeImported(void* /*memory*/, MemorySource source) override 188 { 189 if (m_ImportFlags & static_cast<MemorySourceFlags>(source)) 190 { 191 if (source == MemorySource::Malloc) 192 { 193 // Returning true as ClImport() function will decide if memory can be imported or not 194 return true; 195 } 196 } 197 else 198 { 199 throw MemoryImportException("ClImportTensorHandle::Incorrect import flag"); 200 } 201 return false; 202 } 203 204 private: ClImport(const cl_import_properties_arm * importProperties,void * memory,bool isProtected=false)205 bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false) 206 { 207 size_t totalBytes = m_Tensor.info()->total_size(); 208 209 // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 210 // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to 211 auto cachelineAlignment = 212 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>(); 213 auto roundedSize = totalBytes; 214 if (totalBytes % cachelineAlignment != 0) 215 { 216 roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment); 217 } 218 219 cl_int error = CL_SUCCESS; 220 cl_mem buffer; 221 if (isProtected) 222 { 223 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(), 224 CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error); 225 } 226 else 227 { 228 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(), 229 CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error); 230 } 231 232 if (error != CL_SUCCESS) 233 { 234 throw MemoryImportException("ClImportTensorHandle::Invalid imported memory: " + std::to_string(error)); 235 } 236 237 cl::Buffer wrappedBuffer(buffer); 238 arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer); 239 240 // Use the overloaded bool operator of Status to check if it is success, if not throw an exception 241 // with the Status error message 242 bool imported = (status.error_code() == arm_compute::ErrorCode::OK); 243 if (!imported) 244 { 245 throw MemoryImportException(status.error_description()); 246 } 247 248 ARMNN_ASSERT(!m_Tensor.info()->is_resizable()); 249 return imported; 250 } 251 // Only used for testing CopyOutTo(void * memory) const252 void CopyOutTo(void* memory) const override 253 { 254 const_cast<armnn::ClImportTensorHandle*>(this)->Map(true); 255 switch(this->GetDataType()) 256 { 257 case arm_compute::DataType::F32: 258 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), 259 static_cast<float*>(memory)); 260 break; 261 case arm_compute::DataType::U8: 262 case arm_compute::DataType::QASYMM8: 263 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), 264 static_cast<uint8_t*>(memory)); 265 break; 266 case arm_compute::DataType::QSYMM8_PER_CHANNEL: 267 case arm_compute::DataType::QASYMM8_SIGNED: 268 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), 269 static_cast<int8_t*>(memory)); 270 break; 271 case arm_compute::DataType::F16: 272 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), 273 static_cast<armnn::Half*>(memory)); 274 break; 275 case arm_compute::DataType::S16: 276 case arm_compute::DataType::QSYMM16: 277 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), 278 static_cast<int16_t*>(memory)); 279 break; 280 case arm_compute::DataType::S32: 281 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), 282 static_cast<int32_t*>(memory)); 283 break; 284 default: 285 { 286 throw armnn::UnimplementedException(); 287 } 288 } 289 const_cast<armnn::ClImportTensorHandle*>(this)->Unmap(); 290 } 291 292 // Only used for testing CopyInFrom(const void * memory)293 void CopyInFrom(const void* memory) override 294 { 295 this->Map(true); 296 switch(this->GetDataType()) 297 { 298 case arm_compute::DataType::F32: 299 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory), 300 this->GetTensor()); 301 break; 302 case arm_compute::DataType::U8: 303 case arm_compute::DataType::QASYMM8: 304 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory), 305 this->GetTensor()); 306 break; 307 case arm_compute::DataType::F16: 308 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory), 309 this->GetTensor()); 310 break; 311 case arm_compute::DataType::S16: 312 case arm_compute::DataType::QSYMM8_PER_CHANNEL: 313 case arm_compute::DataType::QASYMM8_SIGNED: 314 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory), 315 this->GetTensor()); 316 break; 317 case arm_compute::DataType::QSYMM16: 318 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory), 319 this->GetTensor()); 320 break; 321 case arm_compute::DataType::S32: 322 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory), 323 this->GetTensor()); 324 break; 325 default: 326 { 327 throw armnn::UnimplementedException(); 328 } 329 } 330 this->Unmap(); 331 } 332 333 arm_compute::CLTensor m_Tensor; 334 MemorySourceFlags m_ImportFlags; 335 bool m_Imported; 336 }; 337 338 class ClImportSubTensorHandle : public IClTensorHandle 339 { 340 public: ClImportSubTensorHandle(IClTensorHandle * parent,const arm_compute::TensorShape & shape,const arm_compute::Coordinates & coords)341 ClImportSubTensorHandle(IClTensorHandle* parent, 342 const arm_compute::TensorShape& shape, 343 const arm_compute::Coordinates& coords) 344 : m_Tensor(&parent->GetTensor(), shape, coords) 345 { 346 parentHandle = parent; 347 } 348 GetTensor()349 arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; } GetTensor() const350 arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; } 351 Allocate()352 virtual void Allocate() override {} Manage()353 virtual void Manage() override {} 354 Map(bool blocking=true) const355 virtual const void* Map(bool blocking = true) const override 356 { 357 IgnoreUnused(blocking); 358 return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); 359 } Unmap() const360 virtual void Unmap() const override {} 361 GetParent() const362 virtual ITensorHandle* GetParent() const override { return parentHandle; } 363 GetDataType() const364 virtual arm_compute::DataType GetDataType() const override 365 { 366 return m_Tensor.info()->data_type(); 367 } 368 SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup> & memoryGroup)369 virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override 370 { 371 IgnoreUnused(memoryGroup); 372 } 373 GetStrides() const374 TensorShape GetStrides() const override 375 { 376 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); 377 } 378 GetShape() const379 TensorShape GetShape() const override 380 { 381 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); 382 } 383 384 private: 385 // Only used for testing CopyOutTo(void * memory) const386 void CopyOutTo(void* memory) const override 387 { 388 const_cast<ClImportSubTensorHandle*>(this)->Map(true); 389 switch(this->GetDataType()) 390 { 391 case arm_compute::DataType::F32: 392 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), 393 static_cast<float*>(memory)); 394 break; 395 case arm_compute::DataType::U8: 396 case arm_compute::DataType::QASYMM8: 397 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), 398 static_cast<uint8_t*>(memory)); 399 break; 400 case arm_compute::DataType::F16: 401 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), 402 static_cast<armnn::Half*>(memory)); 403 break; 404 case arm_compute::DataType::QSYMM8_PER_CHANNEL: 405 case arm_compute::DataType::QASYMM8_SIGNED: 406 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), 407 static_cast<int8_t*>(memory)); 408 break; 409 case arm_compute::DataType::S16: 410 case arm_compute::DataType::QSYMM16: 411 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), 412 static_cast<int16_t*>(memory)); 413 break; 414 case arm_compute::DataType::S32: 415 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), 416 static_cast<int32_t*>(memory)); 417 break; 418 default: 419 { 420 throw armnn::UnimplementedException(); 421 } 422 } 423 const_cast<ClImportSubTensorHandle*>(this)->Unmap(); 424 } 425 426 // Only used for testing CopyInFrom(const void * memory)427 void CopyInFrom(const void* memory) override 428 { 429 this->Map(true); 430 switch(this->GetDataType()) 431 { 432 case arm_compute::DataType::F32: 433 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory), 434 this->GetTensor()); 435 break; 436 case arm_compute::DataType::U8: 437 case arm_compute::DataType::QASYMM8: 438 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory), 439 this->GetTensor()); 440 break; 441 case arm_compute::DataType::F16: 442 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory), 443 this->GetTensor()); 444 break; 445 case arm_compute::DataType::QSYMM8_PER_CHANNEL: 446 case arm_compute::DataType::QASYMM8_SIGNED: 447 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory), 448 this->GetTensor()); 449 break; 450 case arm_compute::DataType::S16: 451 case arm_compute::DataType::QSYMM16: 452 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory), 453 this->GetTensor()); 454 break; 455 case arm_compute::DataType::S32: 456 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory), 457 this->GetTensor()); 458 break; 459 default: 460 { 461 throw armnn::UnimplementedException(); 462 } 463 } 464 this->Unmap(); 465 } 466 467 mutable arm_compute::CLSubTensor m_Tensor; 468 ITensorHandle* parentHandle = nullptr; 469 }; 470 471 } // namespace armnn 472