xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
17 #define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
18 
19 #include <list>
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <unordered_map>
24 
25 #include "tensorflow/lite/allocation.h"
26 #include "tensorflow/lite/c/common.h"
27 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
28 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_plugin.h"
29 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
30 
31 namespace tflite {
32 namespace delegate {
33 namespace nnapi {
34 
35 constexpr int32_t kMinSdkVersionForNNAPI = 27;
36 constexpr int32_t kMinSdkVersionForNNAPI11 = 28;
37 constexpr int32_t kMinSdkVersionForNNAPI12 = 29;
38 constexpr int32_t kMinSdkVersionForNNAPI13 = 30;
39 constexpr int32_t kNNAPIRuntimeFeatureLevel5 = 31;
40 constexpr int32_t kNNAPIRuntimeFeatureLevel6 = 1000006;
41 constexpr int32_t kNNAPIRuntimeFeatureLevel7 = 1000007;
42 constexpr int32_t kNNAPIRuntimeFeatureLevel8 = 1000008;
43 
44 class NNAPIOpBuilder;
45 
46 // The kernel that represents the node sub set of TF Lite being run on NN API.
47 struct NNAPIOpMappingArgs {
48   TfLiteContext* context;
49   NNAPIOpBuilder* builder;
50   TfLiteNode* node;
51   int node_index;
52   std::vector<int>* model_state_outputs;
53   std::vector<int>* model_state_tfl_inputs;
54   std::vector<std::tuple<int, int>>* feedback_loops;
55   int* nnapi_errno;
56 };
57 
58 // RAII NN API Model Destructor for use with std::unique_ptr
59 class NNFreeModel {
60  public:
NNFreeModel(const NnApi * nnapi)61   explicit NNFreeModel(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()62   void operator()(ANeuralNetworksModel* model) {
63     nnapi_->ANeuralNetworksModel_free(model);
64   }
65 
66  private:
67   // NnApi instance to use. Not owned by this object.
68   const NnApi* nnapi_;
69 };
70 // RAII NN API Compilation Destructor for use with std::unique_ptr
71 class NNFreeCompilation {
72  public:
NNFreeCompilation(const NnApi * nnapi)73   explicit NNFreeCompilation(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()74   void operator()(ANeuralNetworksCompilation* model) {
75     nnapi_->ANeuralNetworksCompilation_free(model);
76   }
77 
78  private:
79   // NnApi instance to use. Not owned by this object.
80   const NnApi* nnapi_;
81 };
82 // RAII NN API Execution Destructor for use with std::unique_ptr
83 class NNFreeExecution {
84  public:
NNFreeExecution(const NnApi * nnapi)85   explicit NNFreeExecution(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()86   void operator()(ANeuralNetworksExecution* execution) {
87     nnapi_->ANeuralNetworksExecution_free(execution);
88   }
89 
90  private:
91   // NnApi instance to use. Not owned by this object.
92   const NnApi* nnapi_;
93 };
94 // RAII NN API Burst Destructor for use with std::unique_ptr
95 class NNFreeBurst {
96  public:
NNFreeBurst(const NnApi * nnapi)97   explicit NNFreeBurst(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()98   void operator()(ANeuralNetworksBurst* model) {
99     nnapi_->ANeuralNetworksBurst_free(model);
100   }
101 
102  private:
103   // NnApi instance to use. Not owned by this object.
104   const NnApi* nnapi_;
105 };
106 
107 using UniqueExecution =
108     std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution>;
109 
110 // RAII NN API MappingUtil Destructor for use with std::unique_ptr
111 class NNFreeMappingUtil {
112  public:
113   void operator()(NnapiMappingUtilCInterface* mapping_util);
114 };
115 
116 // Manage NNAPI shared memory handle
117 class NNMemory {
118  public:
119   NNMemory(const NnApi* nnapi, const char* name, size_t size);
120 
121   ~NNMemory();
122 
get_handle()123   ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; }
get_data_ptr()124   uint8_t* get_data_ptr() { return data_ptr_; }
get_byte_size()125   size_t get_byte_size() { return byte_size_; }
126 
127  private:
128   // NnApi instance to use. Not owned by this object.
129   const NnApi* nnapi_;
130   int fd_ = 0;
131   size_t byte_size_ = 0;
132   uint8_t* data_ptr_ = nullptr;
133   ANeuralNetworksMemory* nn_memory_handle_ = nullptr;
134 #ifndef __ANDROID__
135   std::string shm_region_name_;
136 #endif
137 };
138 
139 // LINT.IfChange
140 enum class NNAPIValidationFailureType : int {
141   // The operator is not supported by either NNAPI or the NNAPI Delegate.
142   kUnsupportedOperator = 0,
143   // The given operation or operands are not supported on the specified
144   // Android SDK version. The min supported version is specified in the
145   // validation failure message.
146   kUnsupportedAndroidVersion = 1,
147   // The version of the operator (value of TfLiteRegistration::version)
148   // for the given op is not supported. The max supported version
149   // is specified in the validation failure message.
150   // For more details on each operator version see
151   // the GetBuiltinOperatorVersion function in
152   // third_party/tensorflow/lite/tools/versioning/op_version.cc.
153   kUnsupportedOperatorVersion = 2,
154   // The given input operand type is not supported for the current combination
155   // of operator type and sdk version.
156   kUnsupportedInputType = 3,
157   // When using NN API version 1.0 or 1.1, the condition
158   //   input_scale * filter_scale < output_scale
159   // must be true for quantized versions of the following ops:
160   // * CONV_2D
161   // * DEPTHWISE_CONV_2D
162   // * FULLY_CONNECTED (where filter actually stands for weights)
163   // The condition is relaxed and no longer required since version 1.2.
164   kNotRestrictedScaleCompliant = 4,
165   // The given output operand type is not supported for the current combination
166   // of operator type and sdk version.
167   kUnsupportedOutputType = 5,
168   // The size of the operand tensor is too large.
169   kUnsupportedOperandSize = 6,
170   // The value of one of the operands or of a combination of operands is
171   // not supported. Details are provided in the failure message.
172   kUnsupportedOperandValue = 7,
173   // The combination of float inputs and quantized weights or filters
174   // is not supported
175   kUnsupportedHybridOperator = 8,
176   // The quantization type (for example per-channel quantization) is not
177   // supported.
178   kUnsupportedQuantizationType = 9,
179   // The accelerated version of operation requires a specific operand to be
180   // specified.
181   kMissingRequiredOperand = 10,
182   // The rank of the operand is not supported. Details in the failure message.
183   kUnsupportedOperandRank = 11,
184   // The input tensor cannot be dynamically-sized.
185   kInputTensorShouldHaveConstantShape = 12,
186   // The operator has a different number of inputs of the one or ones that
187   // are supported by NNAPI.
188   kUnsupportedOperatorVariant = 13,
189   // The accelerated version of the operator cannot specify an activation
190   // function.
191   kNoActivationExpected = 14,
192   // Quantization scale and/or zero point are not in the supported value(s)
193   // for the accelerated operation.
194   kUnsupportedQuantizationParameters = 15,
195 };
196 // LINT.ThenChange(nnapi_linter/linter.proto)
197 
198 struct NNAPIValidationFailure {
199   NNAPIValidationFailureType type;
200   std::string message;
201 
NNAPIValidationFailureNNAPIValidationFailure202   NNAPIValidationFailure(NNAPIValidationFailureType type, const char* message)
203       : type(type), message(message) {}
204 };
205 
206 // LRU cache of reusable NNAPI executions.
207 class NNAPIExecutionCache {
208  public:
209   // The cache signature. Uniquely identifies an execution request.
210   struct Signature {
211     std::vector<uint64_t> tensor_handle_timestamps;
212     std::vector<int> dynamic_dimensions;
213 
214     bool operator==(const Signature& other) const;
215     struct Hasher {
216       std::size_t operator()(const Signature& signature) const;
217     };
218   };
219 
NNAPIExecutionCache(uint32_t max_cache_size)220   explicit NNAPIExecutionCache(uint32_t max_cache_size)
221       : max_cache_size_(max_cache_size) {}
222 
223   // Gets the cached execution by signature.
224   // On cache hit, the target execution is set to be the most recently used one.
225   // On cache miss, nullptr is returned.
226   ANeuralNetworksExecution* Get(const Signature& signature);
227 
228   // Puts the execution in cache and set it to be the most recently used one.
229   // If the cache is full, the least recently used entry will be released.
230   void Put(const Signature& signature, UniqueExecution execution);
231 
232   // Clears all cache entries.
233   void Clear();
234 
235   // Resets the max cache size.
236   void SetMaxCacheSize(uint32_t max_cache_size);
237 
238  private:
239   // Releases the least recently used cache.
240   void ReleaseLRU();
241 
242   // The maximum number of reusable executions to cache.
243   uint32_t max_cache_size_;
244 
245   // Cache signatures in the order of most recent use. The most recently used
246   // signature is at the front of the list.
247   std::list<Signature> order_;
248 
249   // A hash map to lookup a managed execution by its signature.
250   std::unordered_map<Signature,
251                      std::pair<std::list<Signature>::iterator, UniqueExecution>,
252                      Signature::Hasher>
253       lookup_;
254 };
255 
256 // The kernel that represents the node sub set of TF Lite being run on NN API.
257 class NNAPIDelegateKernel {
258  public:
259   explicit NNAPIDelegateKernel(
260       const NnApi* nnapi, NnapiDelegateVendorPlugin* vendor_plugin = nullptr)
initialised_(false)261       : initialised_(false),
262         nnapi_(nnapi),
263         nn_model_(nullptr, NNFreeModel(nnapi_)),
264         nn_compilation_(nullptr, NNFreeCompilation(nnapi_)),
265         nn_burst_(nullptr, NNFreeBurst(nnapi_)),
266         nn_execution_cache_(/*max_cache_size=*/4),
267         mapping_util_(NnapiMappingUtilCInterfaceCreate(), NNFreeMappingUtil()),
268         vendor_plugin_(vendor_plugin) {}
NNAPIDelegateKernel()269   NNAPIDelegateKernel() : NNAPIDelegateKernel(NnApiImplementation()) {}
~NNAPIDelegateKernel()270   ~NNAPIDelegateKernel() {
271     for (auto content : allocation_memory_mapping_) {
272       nnapi_->ANeuralNetworksMemory_free(content.second);
273     }
274   }
275 
276   static NnapiMappingUtilCInterface* NnapiMappingUtilCInterfaceCreate();
277 
278   // Translate a node into its operands
279   // It assumes that the call to Validate for has been successful for
280   // the operation.
281   // In case of success it returns kTfLiteOk and stores in n_op_type the
282   // NNAPI Operation code.
283   // Returns kTfLiteError in case of failures during mapping.
284   static TfLiteStatus Map(TfLiteContext* context, int builtin_code, int version,
285                           int android_sdk_version,
286                           const NNAPIOpMappingArgs& mapping_args,
287                           ANeuralNetworksOperationType* nn_op_type,
288                           NnapiDelegateVendorPlugin* vendor_plugin = nullptr);
289 
290   // Returns true if the node can be accelerated with NNAPI.
291   static bool Validate(
292       const TfLiteContext* context, const TfLiteRegistration* registration,
293       int android_sdk_version, const TfLiteNode* node,
294       bool is_accelerator_specified,
295       NnapiDelegateVendorPlugin* vendor_plugin = nullptr,
296       // Collects lists of failures collected during
297       // the validation of the possibility of accelerating
298       // the given node
299       std::vector<NNAPIValidationFailure>* map_failures = nullptr);
300 
301   // Initialize the kernel (a NN model) and builds the NN Model.
302   // Any NNAPI Related error causing this method to fail will have the
303   // associated error number stored in nnapi_errno
304   TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params,
305                     int* nnapi_errno);
306 
307   // Creates the NNAPI Compilation for the NN model. It assumes that Init has
308   // been called and completed successfully.
309   // Any NNAPI Related error causing this method to fail will have the
310   // associated error number stored in nnapi_errno
311   TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node,
312                        int* nnapi_errno);
313 
314   // Invoke the NN Model. Expects Init and Prepare to have been completed
315   // successfully.
316   // Any NNAPI Related error causing this method to fail will have the
317   // associated error number stored in nnapi_errno
318   TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node,
319                       int* nnapi_errno);
320 
321   // Returns the list of operations supported by the current NNAPI model as
322   // built in Prepare. Every operation is identified by the index as provided
323   // in the delegate parameters given to the delegate during the Init call.
324   // It expects the Init method has been called and completed successfully and
325   // returns kTfLiteError if not. Returns an error if any of the NNAPI
326   // operations fails or if the
327   // ANeuralNetworksModel_getSupportedOperationsForDevices function is not
328   // available in the NnApi object.
329   TfLiteStatus GetOperationsSupportedByTargetNnApiDevices(
330       TfLiteContext* context, std::vector<int>* supported_nodes,
331       int* nnapi_errno);
332 
333  private:
334   // True if initialization has been completed successfully
335   bool initialised_;
336   // Access to NNApi.
337   const NnApi* nnapi_;
338   // ANN device handle.
339   std::vector<ANeuralNetworksDevice*> nnapi_devices_;
340   // Name of the nnapi device, empty if nnapi_devices_ is empty;
341   std::string device_name_;
342   // ANN API state.
343   std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_;
344   std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation>
345       nn_compilation_;
346   std::unique_ptr<ANeuralNetworksBurst, NNFreeBurst> nn_burst_;
347   NNAPIExecutionCache nn_execution_cache_;
348   // The mappings of tenor id to BufferHandle. Needed to track BufferHandle
349   // change and alter nn_reusable_execution_ if necessary.
350   std::vector<int> tensor_handle_map_;
351   // Node indices that this delegate is responsible for. Indices here
352   // indexes into the nodes array in the TfLiteContext.
353   std::vector<int> nodes_;
354   // Track indices we use
355   std::unique_ptr<NnapiMappingUtilCInterface, NNFreeMappingUtil> mapping_util_;
356 
357   std::map<const MMAPAllocation*, ANeuralNetworksMemory*>
358       allocation_memory_mapping_;
359   // Track memory map
360   const std::vector<StatefulNnApiDelegate::MemoryRegistration>*
361       tensor_memory_map_;
362   std::vector<int> model_state_outputs_;
363   std::vector<int> model_state_tfl_inputs_;
364   // This is the equivalent of the pair model_state_outputs_,
365   // model_state_tfl_inputs_ for all tensors where we have to keep the output
366   // data available for TFLite model users
367   std::vector<std::tuple<int, int>> feedback_loops_;
368   // The mappings of tenor id to max size in bytes. If the hint is not provided
369   // for a tensor, it is set to 0.
370   std::vector<size_t> tensor_max_size_hints_;
371 
372   std::unique_ptr<NNMemory> nn_input_memory_;
373   std::unique_ptr<NNMemory> nn_output_memory_;
374 
375   std::vector<uint8_t> nn_compilation_cache_token_;
376 
377   // Map of DENSIFY output tensor id to node id.
378   std::vector<int> densify_output_to_node_mapping_;
379   // Map of DEQUANTIZE output tensor id to node id.
380   // Only contains DEQUANTIZE nodes with non-const input.
381   std::vector<int> non_const_dequantize_output_to_node_mapping_;
382 
383   NnapiDelegateVendorPlugin* vendor_plugin_ = nullptr;
384 
385   // Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors
386   int target_feature_level_ = 27;  // kMinSdkVersionForNNAPI10
387 
388   void AddDequantizeOperatorsWhereNeeded(
389       const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
390       int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno);
391 
392   TfLiteStatus DensifyAndDequantizeConstTensor(TfLiteContext* context,
393                                                int densify_node_id,
394                                                bool should_dequantize,
395                                                NNAPIOpBuilder& builder);
396 
397   TfLiteStatus AddOpsAndTensors(TfLiteContext* context, int* nnapi_errno,
398                                 bool allow_dynamic_dimensions);
399 
400   TfLiteStatus BuildGraph(TfLiteContext* context,
401                           const StatefulNnApiDelegate::Options& options,
402                           const TfLiteIntArray* input_tensors,
403                           const TfLiteIntArray* output_tensors,
404                           int* nnapi_errno);
405 
406   // Log the compilation info provided by the support library at the end of
407   // a compilation (failed or successful).
408   // To avoid output spamming, logging is done only once, on the first call to
409   // this method, subsequent runs will only retrieve the information but not
410   // log it.
411   //
412   // This method is registered as a callback with the SL which calls it.
413   static void LogCompilationInfoOnce(
414       const NnApi* nnapi, const ANeuralNetworksDiagnosticCompilationInfo* info);
415 
416   // Log the execution info provided by the support library at the end of
417   // an execution (failed or successful).
418   // To avoid output spamming, logging is done only once, on the first call to
419   // this method, subsequent runs will only retrieve the information but not
420   // log it.
421   //
422   // This method is registered as a callback with the SL which calls it.
423   static void LogExecutionInfoOnce(
424       const NnApi* nnapi, const ANeuralNetworksDiagnosticExecutionInfo* info);
425 };
426 
427 }  // namespace nnapi
428 }  // namespace delegate
429 }  // namespace tflite
430 
431 #endif  // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
432