1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_OPERATION_H_ 17 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_OPERATION_H_ 18 19 #include <cstdint> 20 #include <string> 21 #include <utility> 22 #include <vector> 23 24 #include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h" 25 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h" 26 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h" 27 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h" 28 #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h" 29 #include "tensorflow/lite/delegates/gpu/cl/program_cache.h" 30 #include "tensorflow/lite/delegates/gpu/cl/tensor.h" 31 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h" 32 33 namespace tflite { 34 namespace gpu { 35 namespace cl { 36 37 struct CreationContext { 38 const CLDevice* device; 39 CLContext* context; 40 CLCommandQueue* queue; 41 ProgramCache* cache; 42 GetGpuInfoCreationContext43 const GpuInfo& GetGpuInfo() const { return device->info_; } 44 }; 45 46 class ClOperation { 47 public: 48 ClOperation() = default; 49 virtual ~ClOperation() = default; 50 // Move only 51 ClOperation(ClOperation&& operation) = default; 52 ClOperation& operator=(ClOperation&& operation) = default; 53 ClOperation(const ClOperation&) = delete; 54 ClOperation& operator=(const ClOperation&) = delete; 55 Init(std::unique_ptr<GPUOperation> && gpu_operation)56 void Init(std::unique_ptr<GPUOperation>&& gpu_operation) { 57 operation_ = std::move(gpu_operation); 58 } 59 GetGpuOperation()60 GPUOperation& GetGpuOperation() { return *operation_; } GetGpuOperation()61 const GPUOperation& GetGpuOperation() const { return *operation_; } GetKernelFingerprint()62 uint64_t GetKernelFingerprint() const { return kernel_fingerprint_; } 63 GetDefinition()64 const OperationDef& GetDefinition() const { 65 return operation_->GetDefinition(); 66 } 67 68 // should be called after changes of inputs/outputs. 69 absl::Status UpdateParams(); 70 71 absl::Status SetSrcTensor(int index, Tensor* tensor); 72 absl::Status SetDstTensor(int index, Tensor* tensor); 73 AddToQueue(CLCommandQueue * queue)74 absl::Status AddToQueue(CLCommandQueue* queue) { 75 RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel())); 76 return queue->Dispatch(kernel_, operation_->GetWorkGroupsCount(), 77 operation_->work_group_size_); 78 } 79 AddToCommanBuffer(cl_command_buffer_khr cb)80 absl::Status AddToCommanBuffer(cl_command_buffer_khr cb) { 81 RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel())); 82 std::array<size_t, 3> local; 83 std::array<size_t, 3> global; 84 for (int i = 0; i < 3; ++i) { 85 local[i] = operation_->work_group_size_[i]; 86 global[i] = 87 operation_->GetWorkGroupsCount()[i] * operation_->work_group_size_[i]; 88 } 89 const int error_code = clCommandNDRangeKernelKHR( 90 cb, nullptr, nullptr, kernel_.kernel(), 3, nullptr, global.data(), 91 local.data(), 0, nullptr, nullptr, nullptr); 92 if (error_code != CL_SUCCESS) { 93 return absl::UnknownError( 94 absl::StrCat("Failed to clCommandNDRangeKernelKHR - ", 95 CLErrorCodeToString(error_code))); 96 } 97 return absl::OkStatus(); 98 } 99 AddToQueue(ProfilingCommandQueue * queue,CLEvent * event)100 absl::Status AddToQueue(ProfilingCommandQueue* queue, CLEvent* event) { 101 RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel())); 102 return queue->CLCommandQueue::Dispatch(kernel_, 103 operation_->GetWorkGroupsCount(), 104 operation_->work_group_size_, event); 105 } 106 107 // for better profiling 108 absl::Status AddToQueueNTimes(ProfilingCommandQueue* queue, int n, 109 int flush_period = 0) { 110 RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel())); 111 return queue->DispatchNTimes(kernel_, operation_->GetWorkGroupsCount(), 112 operation_->work_group_size_, n, flush_period); 113 } 114 115 absl::Status Tune(TuningType tuning_type, const GpuInfo& gpu_info, 116 ProfilingCommandQueue* profiling_queue); 117 118 absl::Status Compile(const CreationContext& creation_context); 119 120 absl::Status RestoreDeserialized(const ProgramCache& program_cache, 121 uint64_t fingerprint, 122 const GpuInfo& gpu_info, 123 const int3& work_group_size, 124 CLContext* context); 125 GetWorkGroupSize()126 int3 GetWorkGroupSize() const { return operation_->work_group_size_; } 127 128 private: 129 std::unique_ptr<GPUOperation> operation_; 130 CLKernel kernel_; 131 uint64_t kernel_fingerprint_; 132 CLArguments cl_args_; 133 }; 134 135 } // namespace cl 136 } // namespace gpu 137 } // namespace tflite 138 139 #endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_OPERATION_H_ 140