1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
17
18 #include <cstdint>
19 #include <cstring>
20 #include <memory>
21 #include <utility>
22 #include <vector>
23
24 #include "absl/strings/str_cat.h"
25 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
26 #include "tensorflow/lite/delegates/gpu/cl/cl_image_format.h"
27 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
28 #include "tensorflow/lite/delegates/gpu/common/status.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
30
31 namespace tflite {
32 namespace gpu {
33 namespace cl {
34 namespace {
AllocateTensorMemoryInternal(const CLContext & context,const TensorDescriptor & descriptor,CLMemory * result)35 absl::Status AllocateTensorMemoryInternal(const CLContext& context,
36 const TensorDescriptor& descriptor,
37 CLMemory* result) {
38 cl_mem_flags mem_flags = CL_MEM_READ_WRITE;
39 const uint8_t* data_ptr = nullptr;
40 if (!descriptor.GetData().empty()) {
41 data_ptr = descriptor.GetData().data();
42 mem_flags |= CL_MEM_COPY_HOST_PTR;
43 }
44 std::vector<uint64_t> storage_dims = descriptor.GetStorageDims();
45 switch (descriptor.GetStorageType()) {
46 case TensorStorageType::BUFFER:
47 case TensorStorageType::IMAGE_BUFFER: {
48 const size_t data_size = storage_dims[0] * descriptor.GetElementSize() *
49 SizeOf(descriptor.GetDataType());
50 cl_int error_code;
51 cl_mem memory =
52 clCreateBuffer(context.context(), mem_flags, data_size,
53 const_cast<uint8_t*>(data_ptr), &error_code);
54 if (!memory) {
55 return absl::UnknownError(
56 absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
57 CLErrorCodeToString(error_code)));
58 }
59 *result = CLMemory(memory, true);
60 return absl::OkStatus();
61 }
62 case TensorStorageType::TEXTURE_2D: {
63 cl_image_desc desc;
64 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
65 desc.image_width = storage_dims[0];
66 desc.image_height = storage_dims[1];
67 desc.image_depth = 0;
68 desc.image_row_pitch = 0;
69 desc.image_slice_pitch = 0;
70 desc.num_mip_levels = 0;
71 desc.num_samples = 0;
72 desc.buffer = nullptr;
73
74 cl_image_format format;
75 format.image_channel_order = CL_RGBA;
76 format.image_channel_data_type =
77 DataTypeToChannelType(descriptor.GetDataType());
78
79 cl_int error_code;
80 cl_mem memory =
81 CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
82 const_cast<uint8_t*>(data_ptr), &error_code);
83 if (error_code != CL_SUCCESS) {
84 return absl::UnknownError(
85 absl::StrCat("Failed to create 2D texture (clCreateImage): ",
86 CLErrorCodeToString(error_code)));
87 }
88
89 *result = CLMemory(memory, true);
90 return absl::OkStatus();
91 }
92 case TensorStorageType::TEXTURE_3D: {
93 cl_image_desc desc;
94 desc.image_type = CL_MEM_OBJECT_IMAGE3D;
95 desc.image_width = storage_dims[0];
96 desc.image_height = storage_dims[1];
97 desc.image_depth = storage_dims[2];
98 desc.image_row_pitch = 0;
99 desc.image_slice_pitch = 0;
100 desc.num_mip_levels = 0;
101 desc.num_samples = 0;
102 desc.buffer = nullptr;
103
104 cl_image_format format;
105 format.image_channel_order = CL_RGBA;
106 format.image_channel_data_type =
107 DataTypeToChannelType(descriptor.GetDataType());
108
109 cl_int error_code;
110 cl_mem memory =
111 CreateImage3DLegacy(context.context(), mem_flags, &format, &desc,
112 const_cast<uint8_t*>(data_ptr), &error_code);
113 if (error_code != CL_SUCCESS) {
114 return absl::UnknownError(
115 absl::StrCat("Failed to create 3D texture (clCreateImage): ",
116 CLErrorCodeToString(error_code)));
117 }
118
119 *result = CLMemory(memory, true);
120 return absl::OkStatus();
121 }
122 case TensorStorageType::TEXTURE_ARRAY: {
123 cl_image_desc desc;
124 desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
125 desc.image_width = storage_dims[0];
126 desc.image_height = storage_dims[1];
127 desc.image_depth = 0;
128 desc.image_array_size = storage_dims[2];
129 desc.image_row_pitch = 0;
130 desc.image_slice_pitch = 0;
131 desc.num_mip_levels = 0;
132 desc.num_samples = 0;
133 desc.buffer = nullptr;
134
135 cl_image_format format;
136 format.image_channel_order = CL_RGBA;
137 format.image_channel_data_type =
138 DataTypeToChannelType(descriptor.GetDataType());
139
140 cl_int error_code;
141 cl_mem memory =
142 clCreateImage(context.context(), mem_flags, &format, &desc,
143 const_cast<uint8_t*>(data_ptr), &error_code);
144 if (error_code != CL_SUCCESS) {
145 return absl::UnknownError(
146 absl::StrCat("Failed to create 2D texture array (clCreateImage): ",
147 CLErrorCodeToString(error_code)));
148 }
149
150 *result = CLMemory(memory, true);
151 return absl::OkStatus();
152 }
153
154 case TensorStorageType::SINGLE_TEXTURE_2D: {
155 const int element_size = descriptor.GetElementSize();
156 if (element_size > 4) {
157 return absl::InvalidArgumentError(absl::StrCat(
158 "SINGLE_TEXTURE_2D support only channels in range [1-4], but ",
159 element_size, "was provided"));
160 }
161 cl_image_desc desc;
162 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
163 desc.image_width = storage_dims[0];
164 desc.image_height = storage_dims[1];
165 desc.image_depth = 0;
166 desc.image_row_pitch = 0;
167 desc.image_slice_pitch = 0;
168 desc.num_mip_levels = 0;
169 desc.num_samples = 0;
170 desc.buffer = nullptr;
171
172 cl_image_format format;
173 if (context.IsFloatTexture2DSupported(element_size,
174 descriptor.GetDataType())) {
175 format.image_channel_order = ToChannelOrder(element_size);
176 format.image_channel_data_type =
177 DataTypeToChannelType(descriptor.GetDataType());
178 } else {
179 return absl::InvalidArgumentError(
180 absl::StrCat("This device doesn't support ", element_size,
181 "-channel textures."));
182 }
183
184 cl_int error_code;
185 cl_mem memory =
186 CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
187 const_cast<uint8_t*>(data_ptr), &error_code);
188 if (error_code != CL_SUCCESS) {
189 return absl::UnknownError(
190 absl::StrCat("Failed to create single 2D texture (clCreateImage): ",
191 CLErrorCodeToString(error_code)));
192 }
193
194 *result = CLMemory(memory, true);
195 return absl::OkStatus();
196 }
197
198 default:
199 return absl::InternalError("Unsupported tensor storage type");
200 }
201 }
202
CreateImageBufferFromBuffer(const CLContext & context,cl_mem memory,DataType data_type,int width,cl_mem * result)203 absl::Status CreateImageBufferFromBuffer(const CLContext& context,
204 cl_mem memory, DataType data_type,
205 int width, cl_mem* result) {
206 cl_image_format format;
207 cl_image_desc desc;
208 std::memset(&desc, 0, sizeof(desc));
209 desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
210 desc.image_width = width;
211 desc.mem_object = memory;
212
213 format.image_channel_data_type = DataTypeToChannelType(data_type);
214 format.image_channel_order = CL_RGBA;
215
216 cl_int error_code;
217 *result = clCreateImage(context.context(), CL_MEM_READ_WRITE, &format, &desc,
218 nullptr, &error_code);
219 if (error_code != CL_SUCCESS) {
220 return absl::UnknownError(
221 absl::StrCat("Failed to create Image from Buffer (clCreateImage): ",
222 CLErrorCodeToString(error_code)));
223 }
224 return absl::OkStatus();
225 }
226
CreateImage2DFromBuffer(const CLContext & context,cl_mem memory,DataType data_type,int width,int height,int channels,int width_pixel_alignment,cl_mem * result)227 absl::Status CreateImage2DFromBuffer(const CLContext& context, cl_mem memory,
228 DataType data_type, int width, int height,
229 int channels, int width_pixel_alignment,
230 cl_mem* result) {
231 if (!context.IsFloatTexture2DSupported(channels, data_type)) {
232 return absl::InvalidArgumentError(absl::StrCat(
233 "This device doesn't support ", channels, "-channel textures."));
234 }
235
236 cl_image_desc desc;
237 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
238 desc.image_width = width;
239 desc.image_height = height;
240 desc.image_depth = 0;
241 const size_t width_aligned = AlignByN(width, width_pixel_alignment);
242 desc.image_row_pitch = width_aligned * channels * SizeOf(data_type);
243 desc.image_slice_pitch = 0;
244 desc.num_mip_levels = 0;
245 desc.num_samples = 0;
246 desc.mem_object = memory;
247
248 cl_image_format format;
249 format.image_channel_order = ToChannelOrder(channels);
250 format.image_channel_data_type = DataTypeToChannelType(data_type);
251
252 cl_int error_code;
253 *result = CreateImage2DLegacy(context.context(), CL_MEM_READ_WRITE, &format,
254 &desc, nullptr, &error_code);
255 if (error_code != CL_SUCCESS) {
256 return absl::UnknownError(
257 absl::StrCat("Failed to create Image2D from Buffer (clCreateImage): ",
258 CLErrorCodeToString(error_code)));
259 }
260 return absl::OkStatus();
261 }
262 } // namespace
263
Tensor(cl_mem memory,bool memory_owner,const TensorDescriptor & descriptor)264 Tensor::Tensor(cl_mem memory, bool memory_owner,
265 const TensorDescriptor& descriptor)
266 : memory_(memory),
267 image_buffer_memory_(nullptr),
268 memory_owner_(memory_owner),
269 descriptor_(descriptor) {}
270
Tensor(cl_mem memory,bool memory_owner,cl_mem image_buffer_memory,const TensorDescriptor & descriptor)271 Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
272 const TensorDescriptor& descriptor)
273 : memory_(memory),
274 image_buffer_memory_(image_buffer_memory),
275 memory_owner_(memory_owner),
276 descriptor_(descriptor) {
277 if (image_buffer_memory &&
278 (descriptor.GetStorageType() == TensorStorageType::TEXTURE_2D ||
279 descriptor.GetStorageType() == TensorStorageType::SINGLE_TEXTURE_2D)) {
280 buffer_based_ = true;
281 }
282 }
283
Tensor(Tensor && tensor)284 Tensor::Tensor(Tensor&& tensor)
285 : memory_(tensor.memory_),
286 image_buffer_memory_(tensor.image_buffer_memory_),
287 memory_owner_(tensor.memory_owner_),
288 buffer_based_(tensor.buffer_based_),
289 descriptor_(std::move(tensor.descriptor_)),
290 aligned_texture_width_(tensor.aligned_texture_width_) {
291 tensor.memory_ = nullptr;
292 tensor.image_buffer_memory_ = nullptr;
293 }
294
operator =(Tensor && tensor)295 Tensor& Tensor::operator=(Tensor&& tensor) {
296 if (this != &tensor) {
297 Release();
298 std::swap(memory_, tensor.memory_);
299 std::swap(image_buffer_memory_, tensor.image_buffer_memory_);
300 std::swap(memory_owner_, tensor.memory_owner_);
301 std::swap(buffer_based_, tensor.buffer_based_);
302 descriptor_ = std::move(tensor.descriptor_);
303 std::swap(aligned_texture_width_, tensor.aligned_texture_width_);
304 }
305 return *this;
306 }
307
Release()308 void Tensor::Release() {
309 // image_buffer_memory_ always owned by object
310 if (image_buffer_memory_) {
311 clReleaseMemObject(image_buffer_memory_);
312 image_buffer_memory_ = nullptr;
313 }
314 if (memory_owner_ && memory_) {
315 clReleaseMemObject(memory_);
316 memory_ = nullptr;
317 }
318 }
319
GetGPUResources(const GPUObjectDescriptor * obj_ptr,GPUResourcesWithValue * resources) const320 absl::Status Tensor::GetGPUResources(const GPUObjectDescriptor* obj_ptr,
321 GPUResourcesWithValue* resources) const {
322 const auto* buffer_desc = dynamic_cast<const BufferDescriptor*>(obj_ptr);
323 if (buffer_desc) {
324 if (descriptor_.GetStorageType() != TensorStorageType::BUFFER &&
325 descriptor_.GetStorageType() != TensorStorageType::IMAGE_BUFFER) {
326 return absl::InvalidArgumentError(
327 "Tensor can be used with BufferDescriptor only with "
328 "TensorStorageType::BUFFER/TensorStorageType::IMAGE_BUFFER.");
329 }
330 resources->buffers.push_back({"buffer", memory_});
331 return absl::OkStatus();
332 }
333 const auto* tensor_desc = dynamic_cast<const TensorDescriptor*>(obj_ptr);
334 if (!tensor_desc) {
335 return absl::InvalidArgumentError("Expected TensorDescriptor on input.");
336 }
337 tensor_desc->GetGpuResources(descriptor_.GetBHWDCShape(),
338 &resources->generic);
339
340 if (descriptor_.GetStorageType() == TensorStorageType::BUFFER) {
341 resources->buffers.push_back({"buffer", memory_});
342 } else if (descriptor_.GetStorageType() == TensorStorageType::TEXTURE_2D ||
343 descriptor_.GetStorageType() ==
344 TensorStorageType::SINGLE_TEXTURE_2D) {
345 if (obj_ptr->GetAccess() == AccessType::WRITE &&
346 tensor_desc->GetUseBufferForWriteOnlyTexture2d()) {
347 resources->AddInt("aligned_texture_width", aligned_texture_width_);
348 resources->buffers.push_back({"buffer", memory_});
349 } else {
350 cl_mem mem = buffer_based_ ? image_buffer_memory_ : memory_;
351 resources->images2d.push_back({"image2d", mem});
352 }
353 } else if (descriptor_.GetStorageType() == TensorStorageType::TEXTURE_ARRAY) {
354 resources->image2d_arrays.push_back({"image2d_array", memory_});
355 } else if (descriptor_.GetStorageType() == TensorStorageType::TEXTURE_3D) {
356 resources->images3d.push_back({"image3d", memory_});
357 } else if (descriptor_.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
358 if (obj_ptr->GetAccess() == AccessType::WRITE &&
359 tensor_desc->GetUseBufferForWriteOnlyImageBuffer()) {
360 resources->buffers.push_back({"buffer", memory_});
361 } else {
362 resources->image_buffers.push_back(
363 {"image_buffer", image_buffer_memory_});
364 }
365 }
366
367 return absl::OkStatus();
368 }
369
GetMemoryPtr() const370 cl_mem Tensor::GetMemoryPtr() const {
371 if (buffer_based_) {
372 return image_buffer_memory_;
373 } else {
374 return descriptor_.GetStorageType() == TensorStorageType::IMAGE_BUFFER
375 ? image_buffer_memory_
376 : memory_;
377 }
378 }
379
GetMemoryPtrForWriting() const380 cl_mem Tensor::GetMemoryPtrForWriting() const {
381 if (buffer_based_) {
382 return image_buffer_memory_;
383 } else {
384 return memory_;
385 }
386 }
387
CreateFromDescriptor(const TensorDescriptor & desc,CLContext * context)388 absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc,
389 CLContext* context) {
390 desc.CopyWithoutData(&descriptor_);
391 memory_owner_ = true;
392 CLMemory memory;
393 RETURN_IF_ERROR(AllocateTensorMemoryInternal(*context, desc, &memory));
394 memory_ = memory.Release();
395 if (desc.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
396 std::vector<uint64_t> storage_dims = descriptor_.GetStorageDims();
397 RETURN_IF_ERROR(
398 CreateImageBufferFromBuffer(*context, memory_, desc.GetDataType(),
399 storage_dims[0], &image_buffer_memory_));
400 }
401 return absl::OkStatus();
402 }
403
UploadDescriptorData(const TensorDescriptor & desc,CLCommandQueue * queue)404 absl::Status Tensor::UploadDescriptorData(const TensorDescriptor& desc,
405 CLCommandQueue* queue) {
406 return WriteData(desc.GetData().data(), queue);
407 }
408
ToDescriptor(TensorDescriptor * desc,CLCommandQueue * queue) const409 absl::Status Tensor::ToDescriptor(TensorDescriptor* desc,
410 CLCommandQueue* queue) const {
411 *desc = descriptor_;
412 std::vector<uint8_t> data(GetMemorySizeInBytes());
413 RETURN_IF_ERROR(ReadData(data.data(), queue));
414 desc->SetData(std::move(data));
415 return absl::OkStatus();
416 }
417
WriteData(const void * ptr,CLCommandQueue * queue)418 absl::Status Tensor::WriteData(const void* ptr, CLCommandQueue* queue) {
419 switch (descriptor_.GetStorageType()) {
420 case TensorStorageType::BUFFER:
421 case TensorStorageType::IMAGE_BUFFER:
422 RETURN_IF_ERROR(
423 queue->EnqueueWriteBuffer(memory_, GetMemorySizeInBytes(), ptr));
424 break;
425 case TensorStorageType::TEXTURE_ARRAY:
426 case TensorStorageType::TEXTURE_2D:
427 case TensorStorageType::TEXTURE_3D:
428 case TensorStorageType::SINGLE_TEXTURE_2D: {
429 cl_mem mem = buffer_based_ ? image_buffer_memory_ : memory_;
430 RETURN_IF_ERROR(queue->EnqueueWriteImage(
431 mem, descriptor_.GetFullTensorRegion(), ptr));
432 break;
433 }
434 default:
435 return absl::InternalError("Unsupported tensor storage type");
436 }
437 return absl::OkStatus();
438 }
439
ReadData(void * ptr,CLCommandQueue * queue) const440 absl::Status Tensor::ReadData(void* ptr, CLCommandQueue* queue) const {
441 switch (descriptor_.GetStorageType()) {
442 case TensorStorageType::BUFFER:
443 case TensorStorageType::IMAGE_BUFFER:
444 RETURN_IF_ERROR(
445 queue->EnqueueReadBuffer(memory_, GetMemorySizeInBytes(), ptr));
446 break;
447 case TensorStorageType::TEXTURE_ARRAY:
448 case TensorStorageType::TEXTURE_2D:
449 case TensorStorageType::TEXTURE_3D:
450 case TensorStorageType::SINGLE_TEXTURE_2D: {
451 cl_mem mem = buffer_based_ ? image_buffer_memory_ : memory_;
452 RETURN_IF_ERROR(
453 queue->EnqueueReadImage(mem, descriptor_.GetFullTensorRegion(), ptr));
454 break;
455 }
456 default:
457 return absl::InternalError("Unsupported tensor storage type");
458 }
459 return absl::OkStatus();
460 }
461
CreateTensor(const CLContext & context,const TensorDescriptor & descriptor,Tensor * result)462 absl::Status CreateTensor(const CLContext& context,
463 const TensorDescriptor& descriptor, Tensor* result) {
464 CLMemory mem;
465 RETURN_IF_ERROR(AllocateTensorMemoryInternal(context, descriptor, &mem));
466 cl_mem memory = mem.Release();
467 if (descriptor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
468 std::vector<uint64_t> storage_dims = descriptor.GetStorageDims();
469 cl_mem image_memory;
470 RETURN_IF_ERROR(
471 CreateImageBufferFromBuffer(context, memory, descriptor.GetDataType(),
472 storage_dims[0], &image_memory));
473 *result = Tensor(memory, /*memory_owner*/ true, image_memory, descriptor);
474 } else {
475 *result = Tensor(memory, /*memory_owner*/ true, descriptor);
476 }
477 return absl::OkStatus();
478 }
479
CreateTensorShared(const CLContext & context,cl_mem memory,const TensorDescriptor & descriptor,Tensor * result)480 absl::Status CreateTensorShared(const CLContext& context, cl_mem memory,
481 const TensorDescriptor& descriptor,
482 Tensor* result) {
483 const bool memory_owner = false;
484 if (descriptor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
485 std::vector<uint64_t> storage_dims = descriptor.GetStorageDims();
486 cl_mem image_memory;
487 RETURN_IF_ERROR(
488 CreateImageBufferFromBuffer(context, memory, descriptor.GetDataType(),
489 storage_dims[0], &image_memory));
490 *result = Tensor(memory, memory_owner, image_memory, descriptor);
491 } else {
492 *result = Tensor(memory, memory_owner, descriptor);
493 }
494 return absl::OkStatus();
495 }
496
CreateTensorSharedImage2DBuffer(const CLContext & context,cl_mem memory,const TensorDescriptor & descriptor,int width_pixel_alignment,Tensor * result)497 absl::Status CreateTensorSharedImage2DBuffer(const CLContext& context,
498 cl_mem memory,
499 const TensorDescriptor& descriptor,
500 int width_pixel_alignment,
501 Tensor* result) {
502 std::vector<uint64_t> storage_dims = descriptor.GetStorageDims();
503 const int width = storage_dims[0];
504 const int height = storage_dims[1];
505 const int channels = descriptor.GetElementSize();
506 cl_mem image_memory;
507 RETURN_IF_ERROR(CreateImage2DFromBuffer(
508 context, memory, descriptor.GetDataType(), width, height, channels,
509 width_pixel_alignment, &image_memory));
510 *result = Tensor(memory, false, image_memory, descriptor);
511 result->aligned_texture_width_ = AlignByN(width, width_pixel_alignment);
512 return absl::OkStatus();
513 }
514
AllocateTensorMemory(const CLContext & context,const TensorDescriptor & descriptor,CLMemory * result)515 absl::Status AllocateTensorMemory(const CLContext& context,
516 const TensorDescriptor& descriptor,
517 CLMemory* result) {
518 return AllocateTensorMemoryInternal(context, descriptor, result);
519 }
520
521 } // namespace cl
522 } // namespace gpu
523 } // namespace tflite
524