1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ 17 #define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ 18 19 #include <list> 20 #include <map> 21 #include <memory> 22 #include <string> 23 #include <unordered_map> 24 25 #include "tensorflow/lite/allocation.h" 26 #include "tensorflow/lite/c/common.h" 27 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" 28 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_plugin.h" 29 #include "tensorflow/lite/nnapi/nnapi_implementation.h" 30 31 namespace tflite { 32 namespace delegate { 33 namespace nnapi { 34 35 constexpr int32_t kMinSdkVersionForNNAPI = 27; 36 constexpr int32_t kMinSdkVersionForNNAPI11 = 28; 37 constexpr int32_t kMinSdkVersionForNNAPI12 = 29; 38 constexpr int32_t kMinSdkVersionForNNAPI13 = 30; 39 constexpr int32_t kNNAPIRuntimeFeatureLevel5 = 31; 40 constexpr int32_t kNNAPIRuntimeFeatureLevel6 = 1000006; 41 constexpr int32_t kNNAPIRuntimeFeatureLevel7 = 1000007; 42 constexpr int32_t kNNAPIRuntimeFeatureLevel8 = 1000008; 43 44 class NNAPIOpBuilder; 45 46 // The kernel that represents the node sub set of TF Lite being run on NN API. 47 struct NNAPIOpMappingArgs { 48 TfLiteContext* context; 49 NNAPIOpBuilder* builder; 50 TfLiteNode* node; 51 int node_index; 52 std::vector<int>* model_state_outputs; 53 std::vector<int>* model_state_tfl_inputs; 54 std::vector<std::tuple<int, int>>* feedback_loops; 55 int* nnapi_errno; 56 }; 57 58 // RAII NN API Model Destructor for use with std::unique_ptr 59 class NNFreeModel { 60 public: NNFreeModel(const NnApi * nnapi)61 explicit NNFreeModel(const NnApi* nnapi) : nnapi_(nnapi) {} operator()62 void operator()(ANeuralNetworksModel* model) { 63 nnapi_->ANeuralNetworksModel_free(model); 64 } 65 66 private: 67 // NnApi instance to use. Not owned by this object. 68 const NnApi* nnapi_; 69 }; 70 // RAII NN API Compilation Destructor for use with std::unique_ptr 71 class NNFreeCompilation { 72 public: NNFreeCompilation(const NnApi * nnapi)73 explicit NNFreeCompilation(const NnApi* nnapi) : nnapi_(nnapi) {} operator()74 void operator()(ANeuralNetworksCompilation* model) { 75 nnapi_->ANeuralNetworksCompilation_free(model); 76 } 77 78 private: 79 // NnApi instance to use. Not owned by this object. 80 const NnApi* nnapi_; 81 }; 82 // RAII NN API Execution Destructor for use with std::unique_ptr 83 class NNFreeExecution { 84 public: NNFreeExecution(const NnApi * nnapi)85 explicit NNFreeExecution(const NnApi* nnapi) : nnapi_(nnapi) {} operator()86 void operator()(ANeuralNetworksExecution* execution) { 87 nnapi_->ANeuralNetworksExecution_free(execution); 88 } 89 90 private: 91 // NnApi instance to use. Not owned by this object. 92 const NnApi* nnapi_; 93 }; 94 // RAII NN API Burst Destructor for use with std::unique_ptr 95 class NNFreeBurst { 96 public: NNFreeBurst(const NnApi * nnapi)97 explicit NNFreeBurst(const NnApi* nnapi) : nnapi_(nnapi) {} operator()98 void operator()(ANeuralNetworksBurst* model) { 99 nnapi_->ANeuralNetworksBurst_free(model); 100 } 101 102 private: 103 // NnApi instance to use. Not owned by this object. 104 const NnApi* nnapi_; 105 }; 106 107 using UniqueExecution = 108 std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution>; 109 110 // RAII NN API MappingUtil Destructor for use with std::unique_ptr 111 class NNFreeMappingUtil { 112 public: 113 void operator()(NnapiMappingUtilCInterface* mapping_util); 114 }; 115 116 // Manage NNAPI shared memory handle 117 class NNMemory { 118 public: 119 NNMemory(const NnApi* nnapi, const char* name, size_t size); 120 121 ~NNMemory(); 122 get_handle()123 ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; } get_data_ptr()124 uint8_t* get_data_ptr() { return data_ptr_; } get_byte_size()125 size_t get_byte_size() { return byte_size_; } 126 127 private: 128 // NnApi instance to use. Not owned by this object. 129 const NnApi* nnapi_; 130 int fd_ = 0; 131 size_t byte_size_ = 0; 132 uint8_t* data_ptr_ = nullptr; 133 ANeuralNetworksMemory* nn_memory_handle_ = nullptr; 134 #ifndef __ANDROID__ 135 std::string shm_region_name_; 136 #endif 137 }; 138 139 // LINT.IfChange 140 enum class NNAPIValidationFailureType : int { 141 // The operator is not supported by either NNAPI or the NNAPI Delegate. 142 kUnsupportedOperator = 0, 143 // The given operation or operands are not supported on the specified 144 // Android SDK version. The min supported version is specified in the 145 // validation failure message. 146 kUnsupportedAndroidVersion = 1, 147 // The version of the operator (value of TfLiteRegistration::version) 148 // for the given op is not supported. The max supported version 149 // is specified in the validation failure message. 150 // For more details on each operator version see 151 // the GetBuiltinOperatorVersion function in 152 // third_party/tensorflow/lite/tools/versioning/op_version.cc. 153 kUnsupportedOperatorVersion = 2, 154 // The given input operand type is not supported for the current combination 155 // of operator type and sdk version. 156 kUnsupportedInputType = 3, 157 // When using NN API version 1.0 or 1.1, the condition 158 // input_scale * filter_scale < output_scale 159 // must be true for quantized versions of the following ops: 160 // * CONV_2D 161 // * DEPTHWISE_CONV_2D 162 // * FULLY_CONNECTED (where filter actually stands for weights) 163 // The condition is relaxed and no longer required since version 1.2. 164 kNotRestrictedScaleCompliant = 4, 165 // The given output operand type is not supported for the current combination 166 // of operator type and sdk version. 167 kUnsupportedOutputType = 5, 168 // The size of the operand tensor is too large. 169 kUnsupportedOperandSize = 6, 170 // The value of one of the operands or of a combination of operands is 171 // not supported. Details are provided in the failure message. 172 kUnsupportedOperandValue = 7, 173 // The combination of float inputs and quantized weights or filters 174 // is not supported 175 kUnsupportedHybridOperator = 8, 176 // The quantization type (for example per-channel quantization) is not 177 // supported. 178 kUnsupportedQuantizationType = 9, 179 // The accelerated version of operation requires a specific operand to be 180 // specified. 181 kMissingRequiredOperand = 10, 182 // The rank of the operand is not supported. Details in the failure message. 183 kUnsupportedOperandRank = 11, 184 // The input tensor cannot be dynamically-sized. 185 kInputTensorShouldHaveConstantShape = 12, 186 // The operator has a different number of inputs of the one or ones that 187 // are supported by NNAPI. 188 kUnsupportedOperatorVariant = 13, 189 // The accelerated version of the operator cannot specify an activation 190 // function. 191 kNoActivationExpected = 14, 192 // Quantization scale and/or zero point are not in the supported value(s) 193 // for the accelerated operation. 194 kUnsupportedQuantizationParameters = 15, 195 }; 196 // LINT.ThenChange(nnapi_linter/linter.proto) 197 198 struct NNAPIValidationFailure { 199 NNAPIValidationFailureType type; 200 std::string message; 201 NNAPIValidationFailureNNAPIValidationFailure202 NNAPIValidationFailure(NNAPIValidationFailureType type, const char* message) 203 : type(type), message(message) {} 204 }; 205 206 // LRU cache of reusable NNAPI executions. 207 class NNAPIExecutionCache { 208 public: 209 // The cache signature. Uniquely identifies an execution request. 210 struct Signature { 211 std::vector<uint64_t> tensor_handle_timestamps; 212 std::vector<int> dynamic_dimensions; 213 214 bool operator==(const Signature& other) const; 215 struct Hasher { 216 std::size_t operator()(const Signature& signature) const; 217 }; 218 }; 219 NNAPIExecutionCache(uint32_t max_cache_size)220 explicit NNAPIExecutionCache(uint32_t max_cache_size) 221 : max_cache_size_(max_cache_size) {} 222 223 // Gets the cached execution by signature. 224 // On cache hit, the target execution is set to be the most recently used one. 225 // On cache miss, nullptr is returned. 226 ANeuralNetworksExecution* Get(const Signature& signature); 227 228 // Puts the execution in cache and set it to be the most recently used one. 229 // If the cache is full, the least recently used entry will be released. 230 void Put(const Signature& signature, UniqueExecution execution); 231 232 // Clears all cache entries. 233 void Clear(); 234 235 // Resets the max cache size. 236 void SetMaxCacheSize(uint32_t max_cache_size); 237 238 private: 239 // Releases the least recently used cache. 240 void ReleaseLRU(); 241 242 // The maximum number of reusable executions to cache. 243 uint32_t max_cache_size_; 244 245 // Cache signatures in the order of most recent use. The most recently used 246 // signature is at the front of the list. 247 std::list<Signature> order_; 248 249 // A hash map to lookup a managed execution by its signature. 250 std::unordered_map<Signature, 251 std::pair<std::list<Signature>::iterator, UniqueExecution>, 252 Signature::Hasher> 253 lookup_; 254 }; 255 256 // The kernel that represents the node sub set of TF Lite being run on NN API. 257 class NNAPIDelegateKernel { 258 public: 259 explicit NNAPIDelegateKernel( 260 const NnApi* nnapi, NnapiDelegateVendorPlugin* vendor_plugin = nullptr) initialised_(false)261 : initialised_(false), 262 nnapi_(nnapi), 263 nn_model_(nullptr, NNFreeModel(nnapi_)), 264 nn_compilation_(nullptr, NNFreeCompilation(nnapi_)), 265 nn_burst_(nullptr, NNFreeBurst(nnapi_)), 266 nn_execution_cache_(/*max_cache_size=*/4), 267 mapping_util_(NnapiMappingUtilCInterfaceCreate(), NNFreeMappingUtil()), 268 vendor_plugin_(vendor_plugin) {} NNAPIDelegateKernel()269 NNAPIDelegateKernel() : NNAPIDelegateKernel(NnApiImplementation()) {} ~NNAPIDelegateKernel()270 ~NNAPIDelegateKernel() { 271 for (auto content : allocation_memory_mapping_) { 272 nnapi_->ANeuralNetworksMemory_free(content.second); 273 } 274 } 275 276 static NnapiMappingUtilCInterface* NnapiMappingUtilCInterfaceCreate(); 277 278 // Translate a node into its operands 279 // It assumes that the call to Validate for has been successful for 280 // the operation. 281 // In case of success it returns kTfLiteOk and stores in n_op_type the 282 // NNAPI Operation code. 283 // Returns kTfLiteError in case of failures during mapping. 284 static TfLiteStatus Map(TfLiteContext* context, int builtin_code, int version, 285 int android_sdk_version, 286 const NNAPIOpMappingArgs& mapping_args, 287 ANeuralNetworksOperationType* nn_op_type, 288 NnapiDelegateVendorPlugin* vendor_plugin = nullptr); 289 290 // Returns true if the node can be accelerated with NNAPI. 291 static bool Validate( 292 const TfLiteContext* context, const TfLiteRegistration* registration, 293 int android_sdk_version, const TfLiteNode* node, 294 bool is_accelerator_specified, 295 NnapiDelegateVendorPlugin* vendor_plugin = nullptr, 296 // Collects lists of failures collected during 297 // the validation of the possibility of accelerating 298 // the given node 299 std::vector<NNAPIValidationFailure>* map_failures = nullptr); 300 301 // Initialize the kernel (a NN model) and builds the NN Model. 302 // Any NNAPI Related error causing this method to fail will have the 303 // associated error number stored in nnapi_errno 304 TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params, 305 int* nnapi_errno); 306 307 // Creates the NNAPI Compilation for the NN model. It assumes that Init has 308 // been called and completed successfully. 309 // Any NNAPI Related error causing this method to fail will have the 310 // associated error number stored in nnapi_errno 311 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node, 312 int* nnapi_errno); 313 314 // Invoke the NN Model. Expects Init and Prepare to have been completed 315 // successfully. 316 // Any NNAPI Related error causing this method to fail will have the 317 // associated error number stored in nnapi_errno 318 TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node, 319 int* nnapi_errno); 320 321 // Returns the list of operations supported by the current NNAPI model as 322 // built in Prepare. Every operation is identified by the index as provided 323 // in the delegate parameters given to the delegate during the Init call. 324 // It expects the Init method has been called and completed successfully and 325 // returns kTfLiteError if not. Returns an error if any of the NNAPI 326 // operations fails or if the 327 // ANeuralNetworksModel_getSupportedOperationsForDevices function is not 328 // available in the NnApi object. 329 TfLiteStatus GetOperationsSupportedByTargetNnApiDevices( 330 TfLiteContext* context, std::vector<int>* supported_nodes, 331 int* nnapi_errno); 332 333 private: 334 // True if initialization has been completed successfully 335 bool initialised_; 336 // Access to NNApi. 337 const NnApi* nnapi_; 338 // ANN device handle. 339 std::vector<ANeuralNetworksDevice*> nnapi_devices_; 340 // Name of the nnapi device, empty if nnapi_devices_ is empty; 341 std::string device_name_; 342 // ANN API state. 343 std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_; 344 std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation> 345 nn_compilation_; 346 std::unique_ptr<ANeuralNetworksBurst, NNFreeBurst> nn_burst_; 347 NNAPIExecutionCache nn_execution_cache_; 348 // The mappings of tenor id to BufferHandle. Needed to track BufferHandle 349 // change and alter nn_reusable_execution_ if necessary. 350 std::vector<int> tensor_handle_map_; 351 // Node indices that this delegate is responsible for. Indices here 352 // indexes into the nodes array in the TfLiteContext. 353 std::vector<int> nodes_; 354 // Track indices we use 355 std::unique_ptr<NnapiMappingUtilCInterface, NNFreeMappingUtil> mapping_util_; 356 357 std::map<const MMAPAllocation*, ANeuralNetworksMemory*> 358 allocation_memory_mapping_; 359 // Track memory map 360 const std::vector<StatefulNnApiDelegate::MemoryRegistration>* 361 tensor_memory_map_; 362 std::vector<int> model_state_outputs_; 363 std::vector<int> model_state_tfl_inputs_; 364 // This is the equivalent of the pair model_state_outputs_, 365 // model_state_tfl_inputs_ for all tensors where we have to keep the output 366 // data available for TFLite model users 367 std::vector<std::tuple<int, int>> feedback_loops_; 368 // The mappings of tenor id to max size in bytes. If the hint is not provided 369 // for a tensor, it is set to 0. 370 std::vector<size_t> tensor_max_size_hints_; 371 372 std::unique_ptr<NNMemory> nn_input_memory_; 373 std::unique_ptr<NNMemory> nn_output_memory_; 374 375 std::vector<uint8_t> nn_compilation_cache_token_; 376 377 // Map of DENSIFY output tensor id to node id. 378 std::vector<int> densify_output_to_node_mapping_; 379 // Map of DEQUANTIZE output tensor id to node id. 380 // Only contains DEQUANTIZE nodes with non-const input. 381 std::vector<int> non_const_dequantize_output_to_node_mapping_; 382 383 NnapiDelegateVendorPlugin* vendor_plugin_ = nullptr; 384 385 // Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors 386 int target_feature_level_ = 27; // kMinSdkVersionForNNAPI10 387 388 void AddDequantizeOperatorsWhereNeeded( 389 const TfLiteContext* context, int builtin_code, const TfLiteNode* node, 390 int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno); 391 392 TfLiteStatus DensifyAndDequantizeConstTensor(TfLiteContext* context, 393 int densify_node_id, 394 bool should_dequantize, 395 NNAPIOpBuilder& builder); 396 397 TfLiteStatus AddOpsAndTensors(TfLiteContext* context, int* nnapi_errno, 398 bool allow_dynamic_dimensions); 399 400 TfLiteStatus BuildGraph(TfLiteContext* context, 401 const StatefulNnApiDelegate::Options& options, 402 const TfLiteIntArray* input_tensors, 403 const TfLiteIntArray* output_tensors, 404 int* nnapi_errno); 405 406 // Log the compilation info provided by the support library at the end of 407 // a compilation (failed or successful). 408 // To avoid output spamming, logging is done only once, on the first call to 409 // this method, subsequent runs will only retrieve the information but not 410 // log it. 411 // 412 // This method is registered as a callback with the SL which calls it. 413 static void LogCompilationInfoOnce( 414 const NnApi* nnapi, const ANeuralNetworksDiagnosticCompilationInfo* info); 415 416 // Log the execution info provided by the support library at the end of 417 // an execution (failed or successful). 418 // To avoid output spamming, logging is done only once, on the first call to 419 // this method, subsequent runs will only retrieve the information but not 420 // log it. 421 // 422 // This method is registered as a callback with the SL which calls it. 423 static void LogExecutionInfoOnce( 424 const NnApi* nnapi, const ANeuralNetworksDiagnosticExecutionInfo* info); 425 }; 426 427 } // namespace nnapi 428 } // namespace delegate 429 } // namespace tflite 430 431 #endif // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ 432