xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/cl/tensor.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
17 
18 #include <cstdint>
19 #include <cstring>
20 #include <memory>
21 #include <utility>
22 #include <vector>
23 
24 #include "absl/strings/str_cat.h"
25 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
26 #include "tensorflow/lite/delegates/gpu/cl/cl_image_format.h"
27 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
28 #include "tensorflow/lite/delegates/gpu/common/status.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
30 
31 namespace tflite {
32 namespace gpu {
33 namespace cl {
34 namespace {
AllocateTensorMemoryInternal(const CLContext & context,const TensorDescriptor & descriptor,CLMemory * result)35 absl::Status AllocateTensorMemoryInternal(const CLContext& context,
36                                           const TensorDescriptor& descriptor,
37                                           CLMemory* result) {
38   cl_mem_flags mem_flags = CL_MEM_READ_WRITE;
39   const uint8_t* data_ptr = nullptr;
40   if (!descriptor.GetData().empty()) {
41     data_ptr = descriptor.GetData().data();
42     mem_flags |= CL_MEM_COPY_HOST_PTR;
43   }
44   std::vector<uint64_t> storage_dims = descriptor.GetStorageDims();
45   switch (descriptor.GetStorageType()) {
46     case TensorStorageType::BUFFER:
47     case TensorStorageType::IMAGE_BUFFER: {
48       const size_t data_size = storage_dims[0] * descriptor.GetElementSize() *
49                                SizeOf(descriptor.GetDataType());
50       cl_int error_code;
51       cl_mem memory =
52           clCreateBuffer(context.context(), mem_flags, data_size,
53                          const_cast<uint8_t*>(data_ptr), &error_code);
54       if (!memory) {
55         return absl::UnknownError(
56             absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
57                          CLErrorCodeToString(error_code)));
58       }
59       *result = CLMemory(memory, true);
60       return absl::OkStatus();
61     }
62     case TensorStorageType::TEXTURE_2D: {
63       cl_image_desc desc;
64       desc.image_type = CL_MEM_OBJECT_IMAGE2D;
65       desc.image_width = storage_dims[0];
66       desc.image_height = storage_dims[1];
67       desc.image_depth = 0;
68       desc.image_row_pitch = 0;
69       desc.image_slice_pitch = 0;
70       desc.num_mip_levels = 0;
71       desc.num_samples = 0;
72       desc.buffer = nullptr;
73 
74       cl_image_format format;
75       format.image_channel_order = CL_RGBA;
76       format.image_channel_data_type =
77           DataTypeToChannelType(descriptor.GetDataType());
78 
79       cl_int error_code;
80       cl_mem memory =
81           CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
82                               const_cast<uint8_t*>(data_ptr), &error_code);
83       if (error_code != CL_SUCCESS) {
84         return absl::UnknownError(
85             absl::StrCat("Failed to create 2D texture (clCreateImage): ",
86                          CLErrorCodeToString(error_code)));
87       }
88 
89       *result = CLMemory(memory, true);
90       return absl::OkStatus();
91     }
92     case TensorStorageType::TEXTURE_3D: {
93       cl_image_desc desc;
94       desc.image_type = CL_MEM_OBJECT_IMAGE3D;
95       desc.image_width = storage_dims[0];
96       desc.image_height = storage_dims[1];
97       desc.image_depth = storage_dims[2];
98       desc.image_row_pitch = 0;
99       desc.image_slice_pitch = 0;
100       desc.num_mip_levels = 0;
101       desc.num_samples = 0;
102       desc.buffer = nullptr;
103 
104       cl_image_format format;
105       format.image_channel_order = CL_RGBA;
106       format.image_channel_data_type =
107           DataTypeToChannelType(descriptor.GetDataType());
108 
109       cl_int error_code;
110       cl_mem memory =
111           CreateImage3DLegacy(context.context(), mem_flags, &format, &desc,
112                               const_cast<uint8_t*>(data_ptr), &error_code);
113       if (error_code != CL_SUCCESS) {
114         return absl::UnknownError(
115             absl::StrCat("Failed to create 3D texture (clCreateImage): ",
116                          CLErrorCodeToString(error_code)));
117       }
118 
119       *result = CLMemory(memory, true);
120       return absl::OkStatus();
121     }
122     case TensorStorageType::TEXTURE_ARRAY: {
123       cl_image_desc desc;
124       desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
125       desc.image_width = storage_dims[0];
126       desc.image_height = storage_dims[1];
127       desc.image_depth = 0;
128       desc.image_array_size = storage_dims[2];
129       desc.image_row_pitch = 0;
130       desc.image_slice_pitch = 0;
131       desc.num_mip_levels = 0;
132       desc.num_samples = 0;
133       desc.buffer = nullptr;
134 
135       cl_image_format format;
136       format.image_channel_order = CL_RGBA;
137       format.image_channel_data_type =
138           DataTypeToChannelType(descriptor.GetDataType());
139 
140       cl_int error_code;
141       cl_mem memory =
142           clCreateImage(context.context(), mem_flags, &format, &desc,
143                         const_cast<uint8_t*>(data_ptr), &error_code);
144       if (error_code != CL_SUCCESS) {
145         return absl::UnknownError(
146             absl::StrCat("Failed to create 2D texture array (clCreateImage): ",
147                          CLErrorCodeToString(error_code)));
148       }
149 
150       *result = CLMemory(memory, true);
151       return absl::OkStatus();
152     }
153 
154     case TensorStorageType::SINGLE_TEXTURE_2D: {
155       const int element_size = descriptor.GetElementSize();
156       if (element_size > 4) {
157         return absl::InvalidArgumentError(absl::StrCat(
158             "SINGLE_TEXTURE_2D support only channels in range [1-4], but ",
159             element_size, "was provided"));
160       }
161       cl_image_desc desc;
162       desc.image_type = CL_MEM_OBJECT_IMAGE2D;
163       desc.image_width = storage_dims[0];
164       desc.image_height = storage_dims[1];
165       desc.image_depth = 0;
166       desc.image_row_pitch = 0;
167       desc.image_slice_pitch = 0;
168       desc.num_mip_levels = 0;
169       desc.num_samples = 0;
170       desc.buffer = nullptr;
171 
172       cl_image_format format;
173       if (context.IsFloatTexture2DSupported(element_size,
174                                             descriptor.GetDataType())) {
175         format.image_channel_order = ToChannelOrder(element_size);
176         format.image_channel_data_type =
177             DataTypeToChannelType(descriptor.GetDataType());
178       } else {
179         return absl::InvalidArgumentError(
180             absl::StrCat("This device doesn't support ", element_size,
181                          "-channel textures."));
182       }
183 
184       cl_int error_code;
185       cl_mem memory =
186           CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
187                               const_cast<uint8_t*>(data_ptr), &error_code);
188       if (error_code != CL_SUCCESS) {
189         return absl::UnknownError(
190             absl::StrCat("Failed to create single 2D texture (clCreateImage): ",
191                          CLErrorCodeToString(error_code)));
192       }
193 
194       *result = CLMemory(memory, true);
195       return absl::OkStatus();
196     }
197 
198     default:
199       return absl::InternalError("Unsupported tensor storage type");
200   }
201 }
202 
CreateImageBufferFromBuffer(const CLContext & context,cl_mem memory,DataType data_type,int width,cl_mem * result)203 absl::Status CreateImageBufferFromBuffer(const CLContext& context,
204                                          cl_mem memory, DataType data_type,
205                                          int width, cl_mem* result) {
206   cl_image_format format;
207   cl_image_desc desc;
208   std::memset(&desc, 0, sizeof(desc));
209   desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
210   desc.image_width = width;
211   desc.mem_object = memory;
212 
213   format.image_channel_data_type = DataTypeToChannelType(data_type);
214   format.image_channel_order = CL_RGBA;
215 
216   cl_int error_code;
217   *result = clCreateImage(context.context(), CL_MEM_READ_WRITE, &format, &desc,
218                           nullptr, &error_code);
219   if (error_code != CL_SUCCESS) {
220     return absl::UnknownError(
221         absl::StrCat("Failed to create Image from Buffer (clCreateImage): ",
222                      CLErrorCodeToString(error_code)));
223   }
224   return absl::OkStatus();
225 }
226 
CreateImage2DFromBuffer(const CLContext & context,cl_mem memory,DataType data_type,int width,int height,int channels,int width_pixel_alignment,cl_mem * result)227 absl::Status CreateImage2DFromBuffer(const CLContext& context, cl_mem memory,
228                                      DataType data_type, int width, int height,
229                                      int channels, int width_pixel_alignment,
230                                      cl_mem* result) {
231   if (!context.IsFloatTexture2DSupported(channels, data_type)) {
232     return absl::InvalidArgumentError(absl::StrCat(
233         "This device doesn't support ", channels, "-channel textures."));
234   }
235 
236   cl_image_desc desc;
237   desc.image_type = CL_MEM_OBJECT_IMAGE2D;
238   desc.image_width = width;
239   desc.image_height = height;
240   desc.image_depth = 0;
241   const size_t width_aligned = AlignByN(width, width_pixel_alignment);
242   desc.image_row_pitch = width_aligned * channels * SizeOf(data_type);
243   desc.image_slice_pitch = 0;
244   desc.num_mip_levels = 0;
245   desc.num_samples = 0;
246   desc.mem_object = memory;
247 
248   cl_image_format format;
249   format.image_channel_order = ToChannelOrder(channels);
250   format.image_channel_data_type = DataTypeToChannelType(data_type);
251 
252   cl_int error_code;
253   *result = CreateImage2DLegacy(context.context(), CL_MEM_READ_WRITE, &format,
254                                 &desc, nullptr, &error_code);
255   if (error_code != CL_SUCCESS) {
256     return absl::UnknownError(
257         absl::StrCat("Failed to create Image2D from Buffer (clCreateImage): ",
258                      CLErrorCodeToString(error_code)));
259   }
260   return absl::OkStatus();
261 }
262 }  // namespace
263 
Tensor(cl_mem memory,bool memory_owner,const TensorDescriptor & descriptor)264 Tensor::Tensor(cl_mem memory, bool memory_owner,
265                const TensorDescriptor& descriptor)
266     : memory_(memory),
267       image_buffer_memory_(nullptr),
268       memory_owner_(memory_owner),
269       descriptor_(descriptor) {}
270 
Tensor(cl_mem memory,bool memory_owner,cl_mem image_buffer_memory,const TensorDescriptor & descriptor)271 Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
272                const TensorDescriptor& descriptor)
273     : memory_(memory),
274       image_buffer_memory_(image_buffer_memory),
275       memory_owner_(memory_owner),
276       descriptor_(descriptor) {
277   if (image_buffer_memory &&
278       (descriptor.GetStorageType() == TensorStorageType::TEXTURE_2D ||
279        descriptor.GetStorageType() == TensorStorageType::SINGLE_TEXTURE_2D)) {
280     buffer_based_ = true;
281   }
282 }
283 
Tensor(Tensor && tensor)284 Tensor::Tensor(Tensor&& tensor)
285     : memory_(tensor.memory_),
286       image_buffer_memory_(tensor.image_buffer_memory_),
287       memory_owner_(tensor.memory_owner_),
288       buffer_based_(tensor.buffer_based_),
289       descriptor_(std::move(tensor.descriptor_)),
290       aligned_texture_width_(tensor.aligned_texture_width_) {
291   tensor.memory_ = nullptr;
292   tensor.image_buffer_memory_ = nullptr;
293 }
294 
operator =(Tensor && tensor)295 Tensor& Tensor::operator=(Tensor&& tensor) {
296   if (this != &tensor) {
297     Release();
298     std::swap(memory_, tensor.memory_);
299     std::swap(image_buffer_memory_, tensor.image_buffer_memory_);
300     std::swap(memory_owner_, tensor.memory_owner_);
301     std::swap(buffer_based_, tensor.buffer_based_);
302     descriptor_ = std::move(tensor.descriptor_);
303     std::swap(aligned_texture_width_, tensor.aligned_texture_width_);
304   }
305   return *this;
306 }
307 
Release()308 void Tensor::Release() {
309   // image_buffer_memory_ always owned by object
310   if (image_buffer_memory_) {
311     clReleaseMemObject(image_buffer_memory_);
312     image_buffer_memory_ = nullptr;
313   }
314   if (memory_owner_ && memory_) {
315     clReleaseMemObject(memory_);
316     memory_ = nullptr;
317   }
318 }
319 
GetGPUResources(const GPUObjectDescriptor * obj_ptr,GPUResourcesWithValue * resources) const320 absl::Status Tensor::GetGPUResources(const GPUObjectDescriptor* obj_ptr,
321                                      GPUResourcesWithValue* resources) const {
322   const auto* buffer_desc = dynamic_cast<const BufferDescriptor*>(obj_ptr);
323   if (buffer_desc) {
324     if (descriptor_.GetStorageType() != TensorStorageType::BUFFER &&
325         descriptor_.GetStorageType() != TensorStorageType::IMAGE_BUFFER) {
326       return absl::InvalidArgumentError(
327           "Tensor can be used with BufferDescriptor only with "
328           "TensorStorageType::BUFFER/TensorStorageType::IMAGE_BUFFER.");
329     }
330     resources->buffers.push_back({"buffer", memory_});
331     return absl::OkStatus();
332   }
333   const auto* tensor_desc = dynamic_cast<const TensorDescriptor*>(obj_ptr);
334   if (!tensor_desc) {
335     return absl::InvalidArgumentError("Expected TensorDescriptor on input.");
336   }
337   tensor_desc->GetGpuResources(descriptor_.GetBHWDCShape(),
338                                &resources->generic);
339 
340   if (descriptor_.GetStorageType() == TensorStorageType::BUFFER) {
341     resources->buffers.push_back({"buffer", memory_});
342   } else if (descriptor_.GetStorageType() == TensorStorageType::TEXTURE_2D ||
343              descriptor_.GetStorageType() ==
344                  TensorStorageType::SINGLE_TEXTURE_2D) {
345     if (obj_ptr->GetAccess() == AccessType::WRITE &&
346         tensor_desc->GetUseBufferForWriteOnlyTexture2d()) {
347       resources->AddInt("aligned_texture_width", aligned_texture_width_);
348       resources->buffers.push_back({"buffer", memory_});
349     } else {
350       cl_mem mem = buffer_based_ ? image_buffer_memory_ : memory_;
351       resources->images2d.push_back({"image2d", mem});
352     }
353   } else if (descriptor_.GetStorageType() == TensorStorageType::TEXTURE_ARRAY) {
354     resources->image2d_arrays.push_back({"image2d_array", memory_});
355   } else if (descriptor_.GetStorageType() == TensorStorageType::TEXTURE_3D) {
356     resources->images3d.push_back({"image3d", memory_});
357   } else if (descriptor_.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
358     if (obj_ptr->GetAccess() == AccessType::WRITE &&
359         tensor_desc->GetUseBufferForWriteOnlyImageBuffer()) {
360       resources->buffers.push_back({"buffer", memory_});
361     } else {
362       resources->image_buffers.push_back(
363           {"image_buffer", image_buffer_memory_});
364     }
365   }
366 
367   return absl::OkStatus();
368 }
369 
GetMemoryPtr() const370 cl_mem Tensor::GetMemoryPtr() const {
371   if (buffer_based_) {
372     return image_buffer_memory_;
373   } else {
374     return descriptor_.GetStorageType() == TensorStorageType::IMAGE_BUFFER
375                ? image_buffer_memory_
376                : memory_;
377   }
378 }
379 
GetMemoryPtrForWriting() const380 cl_mem Tensor::GetMemoryPtrForWriting() const {
381   if (buffer_based_) {
382     return image_buffer_memory_;
383   } else {
384     return memory_;
385   }
386 }
387 
CreateFromDescriptor(const TensorDescriptor & desc,CLContext * context)388 absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc,
389                                           CLContext* context) {
390   desc.CopyWithoutData(&descriptor_);
391   memory_owner_ = true;
392   CLMemory memory;
393   RETURN_IF_ERROR(AllocateTensorMemoryInternal(*context, desc, &memory));
394   memory_ = memory.Release();
395   if (desc.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
396     std::vector<uint64_t> storage_dims = descriptor_.GetStorageDims();
397     RETURN_IF_ERROR(
398         CreateImageBufferFromBuffer(*context, memory_, desc.GetDataType(),
399                                     storage_dims[0], &image_buffer_memory_));
400   }
401   return absl::OkStatus();
402 }
403 
UploadDescriptorData(const TensorDescriptor & desc,CLCommandQueue * queue)404 absl::Status Tensor::UploadDescriptorData(const TensorDescriptor& desc,
405                                           CLCommandQueue* queue) {
406   return WriteData(desc.GetData().data(), queue);
407 }
408 
ToDescriptor(TensorDescriptor * desc,CLCommandQueue * queue) const409 absl::Status Tensor::ToDescriptor(TensorDescriptor* desc,
410                                   CLCommandQueue* queue) const {
411   *desc = descriptor_;
412   std::vector<uint8_t> data(GetMemorySizeInBytes());
413   RETURN_IF_ERROR(ReadData(data.data(), queue));
414   desc->SetData(std::move(data));
415   return absl::OkStatus();
416 }
417 
WriteData(const void * ptr,CLCommandQueue * queue)418 absl::Status Tensor::WriteData(const void* ptr, CLCommandQueue* queue) {
419   switch (descriptor_.GetStorageType()) {
420     case TensorStorageType::BUFFER:
421     case TensorStorageType::IMAGE_BUFFER:
422       RETURN_IF_ERROR(
423           queue->EnqueueWriteBuffer(memory_, GetMemorySizeInBytes(), ptr));
424       break;
425     case TensorStorageType::TEXTURE_ARRAY:
426     case TensorStorageType::TEXTURE_2D:
427     case TensorStorageType::TEXTURE_3D:
428     case TensorStorageType::SINGLE_TEXTURE_2D: {
429       cl_mem mem = buffer_based_ ? image_buffer_memory_ : memory_;
430       RETURN_IF_ERROR(queue->EnqueueWriteImage(
431           mem, descriptor_.GetFullTensorRegion(), ptr));
432       break;
433     }
434     default:
435       return absl::InternalError("Unsupported tensor storage type");
436   }
437   return absl::OkStatus();
438 }
439 
ReadData(void * ptr,CLCommandQueue * queue) const440 absl::Status Tensor::ReadData(void* ptr, CLCommandQueue* queue) const {
441   switch (descriptor_.GetStorageType()) {
442     case TensorStorageType::BUFFER:
443     case TensorStorageType::IMAGE_BUFFER:
444       RETURN_IF_ERROR(
445           queue->EnqueueReadBuffer(memory_, GetMemorySizeInBytes(), ptr));
446       break;
447     case TensorStorageType::TEXTURE_ARRAY:
448     case TensorStorageType::TEXTURE_2D:
449     case TensorStorageType::TEXTURE_3D:
450     case TensorStorageType::SINGLE_TEXTURE_2D: {
451       cl_mem mem = buffer_based_ ? image_buffer_memory_ : memory_;
452       RETURN_IF_ERROR(
453           queue->EnqueueReadImage(mem, descriptor_.GetFullTensorRegion(), ptr));
454       break;
455     }
456     default:
457       return absl::InternalError("Unsupported tensor storage type");
458   }
459   return absl::OkStatus();
460 }
461 
CreateTensor(const CLContext & context,const TensorDescriptor & descriptor,Tensor * result)462 absl::Status CreateTensor(const CLContext& context,
463                           const TensorDescriptor& descriptor, Tensor* result) {
464   CLMemory mem;
465   RETURN_IF_ERROR(AllocateTensorMemoryInternal(context, descriptor, &mem));
466   cl_mem memory = mem.Release();
467   if (descriptor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
468     std::vector<uint64_t> storage_dims = descriptor.GetStorageDims();
469     cl_mem image_memory;
470     RETURN_IF_ERROR(
471         CreateImageBufferFromBuffer(context, memory, descriptor.GetDataType(),
472                                     storage_dims[0], &image_memory));
473     *result = Tensor(memory, /*memory_owner*/ true, image_memory, descriptor);
474   } else {
475     *result = Tensor(memory, /*memory_owner*/ true, descriptor);
476   }
477   return absl::OkStatus();
478 }
479 
CreateTensorShared(const CLContext & context,cl_mem memory,const TensorDescriptor & descriptor,Tensor * result)480 absl::Status CreateTensorShared(const CLContext& context, cl_mem memory,
481                                 const TensorDescriptor& descriptor,
482                                 Tensor* result) {
483   const bool memory_owner = false;
484   if (descriptor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
485     std::vector<uint64_t> storage_dims = descriptor.GetStorageDims();
486     cl_mem image_memory;
487     RETURN_IF_ERROR(
488         CreateImageBufferFromBuffer(context, memory, descriptor.GetDataType(),
489                                     storage_dims[0], &image_memory));
490     *result = Tensor(memory, memory_owner, image_memory, descriptor);
491   } else {
492     *result = Tensor(memory, memory_owner, descriptor);
493   }
494   return absl::OkStatus();
495 }
496 
CreateTensorSharedImage2DBuffer(const CLContext & context,cl_mem memory,const TensorDescriptor & descriptor,int width_pixel_alignment,Tensor * result)497 absl::Status CreateTensorSharedImage2DBuffer(const CLContext& context,
498                                              cl_mem memory,
499                                              const TensorDescriptor& descriptor,
500                                              int width_pixel_alignment,
501                                              Tensor* result) {
502   std::vector<uint64_t> storage_dims = descriptor.GetStorageDims();
503   const int width = storage_dims[0];
504   const int height = storage_dims[1];
505   const int channels = descriptor.GetElementSize();
506   cl_mem image_memory;
507   RETURN_IF_ERROR(CreateImage2DFromBuffer(
508       context, memory, descriptor.GetDataType(), width, height, channels,
509       width_pixel_alignment, &image_memory));
510   *result = Tensor(memory, false, image_memory, descriptor);
511   result->aligned_texture_width_ = AlignByN(width, width_pixel_alignment);
512   return absl::OkStatus();
513 }
514 
AllocateTensorMemory(const CLContext & context,const TensorDescriptor & descriptor,CLMemory * result)515 absl::Status AllocateTensorMemory(const CLContext& context,
516                                   const TensorDescriptor& descriptor,
517                                   CLMemory* result) {
518   return AllocateTensorMemoryInternal(context, descriptor, result);
519 }
520 
521 }  // namespace cl
522 }  // namespace gpu
523 }  // namespace tflite
524